Compare commits

..

10 Commits

6 changed files with 40 additions and 12 deletions

View File

@ -5,6 +5,12 @@ from pathlib import Path
from typing import List from typing import List
def _mergeNoDuplicate(first: List[Path], second: List[Path]) -> List[Path]:
tmp = list(set(first + second))
tmp.sort(key=lambda x: str(x.resolve()))
return tmp
class DownloadSummary: class DownloadSummary:
""" """
Keeps track of all new, modified or deleted files and provides a summary. Keeps track of all new, modified or deleted files and provides a summary.
@ -40,9 +46,9 @@ class DownloadSummary:
""" """
Merges ourselves with the passed summary. Modifies this object, but not the passed one. Merges ourselves with the passed summary. Modifies this object, but not the passed one.
""" """
self._new_files = list(set(self._new_files + summary.new_files)) self._new_files = _mergeNoDuplicate(self._new_files, summary.new_files)
self._modified_files = list(set(self._modified_files + summary.modified_files)) self._modified_files = _mergeNoDuplicate(self._modified_files, summary.modified_files)
self._deleted_files = list(set(self._deleted_files + summary.deleted_files)) self._deleted_files = _mergeNoDuplicate(self._deleted_files, summary.deleted_files)
def add_deleted_file(self, path: Path) -> None: def add_deleted_file(self, path: Path) -> None:
""" """

View File

@ -26,6 +26,10 @@ LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER) PRETTY = PrettyLogger(LOGGER)
def _sanitize_path_name(name: str) -> str:
return name.replace("/", "-")
class IliasElementType(Enum): class IliasElementType(Enum):
""" """
The type of an ilias element. The type of an ilias element.
@ -260,7 +264,7 @@ class IliasCrawler:
links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle") links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
for link in links: for link in links:
abs_url = self._abs_url_from_link(link) abs_url = self._abs_url_from_link(link)
element_path = Path(folder_path, link.getText().strip()) element_path = Path(folder_path, _sanitize_path_name(link.getText().strip()))
element_type = self._find_type_from_link(element_path, link, abs_url) element_type = self._find_type_from_link(element_path, link, abs_url)
if element_type == IliasElementType.REGULAR_FILE: if element_type == IliasElementType.REGULAR_FILE:
@ -377,7 +381,7 @@ class IliasCrawler:
modification_date = demangle_date(modification_date_str) modification_date = demangle_date(modification_date_str)
# Grab the name from the link text # Grab the name from the link text
name = link_element.getText() name = _sanitize_path_name(link_element.getText())
full_path = Path(path, name + "." + file_type) full_path = Path(path, name + "." + file_type)
return [ return [
@ -508,7 +512,7 @@ class IliasCrawler:
).getText().strip() ).getText().strip()
title += ".mp4" title += ".mp4"
video_path: Path = Path(parent_path, title) video_path: Path = Path(parent_path, _sanitize_path_name(title))
video_url = self._abs_url_from_link(link) video_url = self._abs_url_from_link(link)
@ -580,6 +584,7 @@ class IliasCrawler:
# Two divs, side by side. Left is the name, right is the link ==> get left # Two divs, side by side. Left is the name, right is the link ==> get left
# sibling # sibling
file_name = file_link.parent.findPrevious(name="div").getText().strip() file_name = file_link.parent.findPrevious(name="div").getText().strip()
file_name = _sanitize_path_name(file_name)
url = self._abs_url_from_link(file_link) url = self._abs_url_from_link(file_link)
LOGGER.debug("Found file %r at %r", file_name, url) LOGGER.debug("Found file %r at %r", file_name, url)

View File

@ -5,6 +5,8 @@ only files whose names match a regex, or renaming files from one numbering
scheme to another. scheme to another.
""" """
import os
import re
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import PurePath from pathlib import PurePath
from typing import Callable, List, Optional, TypeVar from typing import Callable, List, Optional, TypeVar
@ -45,7 +47,8 @@ def apply_transform(
# Transform combinators # Transform combinators
keep = lambda path: path def keep(path: PurePath) -> Optional[PurePath]:
return path
def attempt(*args: Transform) -> Transform: def attempt(*args: Transform) -> Transform:
def inner(path: PurePath) -> Optional[PurePath]: def inner(path: PurePath) -> Optional[PurePath]:
@ -125,3 +128,15 @@ def re_rename(regex: Regex, target: str) -> Transform:
return path.with_name(target.format(*groups)) return path.with_name(target.format(*groups))
return None return None
return inner return inner
def sanitize_windows_path(path: PurePath) -> Optional[PurePath]:
"""
A small function to escape characters that are forbidden in windows path names.
This method is a no-op on other operating systems.
"""
# Escape windows illegal path characters
if os.name == 'nt':
sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
return PurePath(*sanitized_parts)
return path

View File

@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
To install PFERD or update your installation to the latest version, run this To install PFERD or update your installation to the latest version, run this
wherever you want to install or have already installed PFERD: wherever you want to install or have already installed PFERD:
``` ```
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.2 $ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
``` ```
The use of [venv] is recommended. The use of [venv] is recommended.
@ -60,8 +60,8 @@ $ mkdir Vorlesungen
$ cd Vorlesungen $ cd Vorlesungen
$ python3 -m venv .venv $ python3 -m venv .venv
$ .venv/bin/activate $ .venv/bin/activate
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.2 $ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.2/example_config.py $ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.5/example_config.py
$ python3 example_config.py $ python3 example_config.py
$ deactivate $ deactivate
``` ```

View File

@ -2,7 +2,7 @@ from setuptools import find_packages, setup
setup( setup(
name="PFERD", name="PFERD",
version="2.4.2", version="2.4.5",
packages=find_packages(), packages=find_packages(),
install_requires=[ install_requires=[
"requests>=2.21.0", "requests>=2.21.0",

View File

@ -12,6 +12,7 @@ from PFERD import Pferd
from PFERD.cookie_jar import CookieJar from PFERD.cookie_jar import CookieJar
from PFERD.ilias import (IliasCrawler, IliasElementType, from PFERD.ilias import (IliasCrawler, IliasElementType,
KitShibbolethAuthenticator) KitShibbolethAuthenticator)
from PFERD.transform import sanitize_windows_path
from PFERD.utils import to_path from PFERD.utils import to_path
@ -59,7 +60,8 @@ def main() -> None:
target=folder, target=folder,
full_url=args.url, full_url=args.url,
cookies=args.cookies, cookies=args.cookies,
dir_filter=dir_filter dir_filter=dir_filter,
transform=sanitize_windows_path
) )