Compare commits

..

5 Commits

Author SHA1 Message Date
ba9215ebe8 Bump version 2020-11-18 10:09:45 +01:00
8ebf0eab16 Sort download summary 2020-11-17 21:36:04 +01:00
cd90a60dee Move "sanitize_windows_path" to PFERD.transform 2020-11-12 20:52:46 +01:00
98834c9c95 Bump version 2020-11-12 20:23:36 +01:00
55e9e719ad Sanitize "/" in ilias path names 2020-11-12 20:21:24 +01:00
6 changed files with 40 additions and 24 deletions

View File

@ -5,6 +5,12 @@ from pathlib import Path
from typing import List
def _mergeNoDuplicate(first: List[Path], second: List[Path]) -> List[Path]:
tmp = list(set(first + second))
tmp.sort(key=lambda x: str(x.resolve()))
return tmp
class DownloadSummary:
"""
Keeps track of all new, modified or deleted files and provides a summary.
@ -40,9 +46,9 @@ class DownloadSummary:
"""
Merges ourselves with the passed summary. Modifies this object, but not the passed one.
"""
self._new_files = list(set(self._new_files + summary.new_files))
self._modified_files = list(set(self._modified_files + summary.modified_files))
self._deleted_files = list(set(self._deleted_files + summary.deleted_files))
self._new_files = _mergeNoDuplicate(self._new_files, summary.new_files)
self._modified_files = _mergeNoDuplicate(self._modified_files, summary.modified_files)
self._deleted_files = _mergeNoDuplicate(self._deleted_files, summary.deleted_files)
def add_deleted_file(self, path: Path) -> None:
"""

View File

@ -26,6 +26,10 @@ LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER)
def _sanitize_path_name(name: str) -> str:
return name.replace("/", "-")
class IliasElementType(Enum):
"""
The type of an ilias element.
@ -260,7 +264,7 @@ class IliasCrawler:
links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
for link in links:
abs_url = self._abs_url_from_link(link)
element_path = Path(folder_path, link.getText().strip())
element_path = Path(folder_path, _sanitize_path_name(link.getText().strip()))
element_type = self._find_type_from_link(element_path, link, abs_url)
if element_type == IliasElementType.REGULAR_FILE:
@ -377,7 +381,7 @@ class IliasCrawler:
modification_date = demangle_date(modification_date_str)
# Grab the name from the link text
name = link_element.getText()
name = _sanitize_path_name(link_element.getText())
full_path = Path(path, name + "." + file_type)
return [
@ -508,7 +512,7 @@ class IliasCrawler:
).getText().strip()
title += ".mp4"
video_path: Path = Path(parent_path, title)
video_path: Path = Path(parent_path, _sanitize_path_name(title))
video_url = self._abs_url_from_link(link)
@ -580,6 +584,7 @@ class IliasCrawler:
# Two divs, side by side. Left is the name, right is the link ==> get left
# sibling
file_name = file_link.parent.findPrevious(name="div").getText().strip()
file_name = _sanitize_path_name(file_name)
url = self._abs_url_from_link(file_link)
LOGGER.debug("Found file %r at %r", file_name, url)

View File

@ -5,6 +5,8 @@ only files whose names match a regex, or renaming files from one numbering
scheme to another.
"""
import os
import re
from dataclasses import dataclass
from pathlib import PurePath
from typing import Callable, List, Optional, TypeVar
@ -45,7 +47,8 @@ def apply_transform(
# Transform combinators
keep = lambda path: path
def keep(path: PurePath) -> Optional[PurePath]:
return path
def attempt(*args: Transform) -> Transform:
def inner(path: PurePath) -> Optional[PurePath]:
@ -125,3 +128,15 @@ def re_rename(regex: Regex, target: str) -> Transform:
return path.with_name(target.format(*groups))
return None
return inner
def sanitize_windows_path(path: PurePath) -> Optional[PurePath]:
"""
A small function to escape characters that are forbidden in windows path names.
This method is a no-op on other operating systems.
"""
# Escape windows illegal path characters
if os.name == 'nt':
sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
return PurePath(*sanitized_parts)
return path

View File

@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
To install PFERD or update your installation to the latest version, run this
wherever you want to install or have already installed PFERD:
```
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.3
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
```
The use of [venv] is recommended.
@ -60,8 +60,8 @@ $ mkdir Vorlesungen
$ cd Vorlesungen
$ python3 -m venv .venv
$ .venv/bin/activate
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.3
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.3/example_config.py
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.5/example_config.py
$ python3 example_config.py
$ deactivate
```

View File

@ -2,7 +2,7 @@ from setuptools import find_packages, setup
setup(
name="PFERD",
version="2.4.3",
version="2.4.5",
packages=find_packages(),
install_requires=[
"requests>=2.21.0",

View File

@ -5,27 +5,17 @@ A simple script to download a course by name from ILIAS.
"""
import argparse
import os
import re
from pathlib import Path, PurePath
from typing import Optional
from pathlib import Path
from urllib.parse import urlparse
from PFERD import Pferd
from PFERD.cookie_jar import CookieJar
from PFERD.ilias import (IliasCrawler, IliasElementType,
KitShibbolethAuthenticator)
from PFERD.transform import sanitize_windows_path
from PFERD.utils import to_path
def sanitize_path(path: PurePath) -> Optional[PurePath]:
# Escape windows illegal path characters
if os.name == 'nt':
sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
return PurePath(*sanitized_parts)
return path
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--test-run", action="store_true")
@ -71,7 +61,7 @@ def main() -> None:
full_url=args.url,
cookies=args.cookies,
dir_filter=dir_filter,
transform=sanitize_path
transform=sanitize_windows_path
)