Compare commits

...

7 Commits

Author SHA1 Message Date
I-Al-Istannen
98834c9c95 Bump version 2020-11-12 20:23:36 +01:00
I-Al-Istannen
55e9e719ad Sanitize "/" in ilias path names 2020-11-12 20:21:24 +01:00
I-Al-Istannen
a0ae9aee27 Sanitize individual path parts 2020-11-11 09:36:20 +01:00
I-Al-Istannen
1486a63854 Do not collapse directory structure when sanitizing 2020-11-10 22:53:47 +01:00
I-Al-Istannen
733e1ae136 Bump version 2020-11-10 20:50:31 +01:00
I-Al-Istannen
4ac51048c1 Use "_" as a replacement for illegal characters 2020-11-10 20:49:14 +01:00
I-Al-Istannen
f2aba970fd [sync_url] Sanitize path names on windows 2020-11-10 17:16:14 +01:00
4 changed files with 26 additions and 9 deletions

View File

@@ -26,6 +26,10 @@ LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER)
def _sanitize_path_name(name: str) -> str:
return name.replace("/", "-")
class IliasElementType(Enum):
"""
The type of an ilias element.
@@ -260,7 +264,7 @@ class IliasCrawler:
links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
for link in links:
abs_url = self._abs_url_from_link(link)
element_path = Path(folder_path, link.getText().strip())
element_path = Path(folder_path, _sanitize_path_name(link.getText().strip()))
element_type = self._find_type_from_link(element_path, link, abs_url)
if element_type == IliasElementType.REGULAR_FILE:
@@ -377,7 +381,7 @@ class IliasCrawler:
modification_date = demangle_date(modification_date_str)
# Grab the name from the link text
name = link_element.getText()
name = _sanitize_path_name(link_element.getText())
full_path = Path(path, name + "." + file_type)
return [
@@ -508,7 +512,7 @@ class IliasCrawler:
).getText().strip()
title += ".mp4"
video_path: Path = Path(parent_path, title)
video_path: Path = Path(parent_path, _sanitize_path_name(title))
video_url = self._abs_url_from_link(link)
@@ -580,6 +584,7 @@ class IliasCrawler:
# Two divs, side by side. Left is the name, right is the link ==> get left
# sibling
file_name = file_link.parent.findPrevious(name="div").getText().strip()
file_name = _sanitize_path_name(file_name)
url = self._abs_url_from_link(file_link)
LOGGER.debug("Found file %r at %r", file_name, url)

View File

@@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
To install PFERD or update your installation to the latest version, run this
wherever you want to install or have already installed PFERD:
```
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.2
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.4
```
The use of [venv] is recommended.
@@ -60,8 +60,8 @@ $ mkdir Vorlesungen
$ cd Vorlesungen
$ python3 -m venv .venv
$ .venv/bin/activate
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.2
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.2/example_config.py
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.4
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.4/example_config.py
$ python3 example_config.py
$ deactivate
```

View File

@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
setup(
name="PFERD",
version="2.4.2",
version="2.4.4",
packages=find_packages(),
install_requires=[
"requests>=2.21.0",

View File

@@ -5,7 +5,10 @@ A simple script to download a course by name from ILIAS.
"""
import argparse
from pathlib import Path
import os
import re
from pathlib import Path, PurePath
from typing import Optional
from urllib.parse import urlparse
from PFERD import Pferd
@@ -15,6 +18,14 @@ from PFERD.ilias import (IliasCrawler, IliasElementType,
from PFERD.utils import to_path
def sanitize_path(path: PurePath) -> Optional[PurePath]:
# Escape windows illegal path characters
if os.name == 'nt':
sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
return PurePath(*sanitized_parts)
return path
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--test-run", action="store_true")
@@ -59,7 +70,8 @@ def main() -> None:
target=folder,
full_url=args.url,
cookies=args.cookies,
dir_filter=dir_filter
dir_filter=dir_filter,
transform=sanitize_path
)