mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Compare commits
5 Commits
Author | SHA1 | Date | |
---|---|---|---|
ba9215ebe8 | |||
8ebf0eab16 | |||
cd90a60dee | |||
98834c9c95 | |||
55e9e719ad |
@ -5,6 +5,12 @@ from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
|
||||
def _mergeNoDuplicate(first: List[Path], second: List[Path]) -> List[Path]:
|
||||
tmp = list(set(first + second))
|
||||
tmp.sort(key=lambda x: str(x.resolve()))
|
||||
return tmp
|
||||
|
||||
|
||||
class DownloadSummary:
|
||||
"""
|
||||
Keeps track of all new, modified or deleted files and provides a summary.
|
||||
@ -40,9 +46,9 @@ class DownloadSummary:
|
||||
"""
|
||||
Merges ourselves with the passed summary. Modifies this object, but not the passed one.
|
||||
"""
|
||||
self._new_files = list(set(self._new_files + summary.new_files))
|
||||
self._modified_files = list(set(self._modified_files + summary.modified_files))
|
||||
self._deleted_files = list(set(self._deleted_files + summary.deleted_files))
|
||||
self._new_files = _mergeNoDuplicate(self._new_files, summary.new_files)
|
||||
self._modified_files = _mergeNoDuplicate(self._modified_files, summary.modified_files)
|
||||
self._deleted_files = _mergeNoDuplicate(self._deleted_files, summary.deleted_files)
|
||||
|
||||
def add_deleted_file(self, path: Path) -> None:
|
||||
"""
|
||||
|
@ -26,6 +26,10 @@ LOGGER = logging.getLogger(__name__)
|
||||
PRETTY = PrettyLogger(LOGGER)
|
||||
|
||||
|
||||
def _sanitize_path_name(name: str) -> str:
|
||||
return name.replace("/", "-")
|
||||
|
||||
|
||||
class IliasElementType(Enum):
|
||||
"""
|
||||
The type of an ilias element.
|
||||
@ -260,7 +264,7 @@ class IliasCrawler:
|
||||
links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
|
||||
for link in links:
|
||||
abs_url = self._abs_url_from_link(link)
|
||||
element_path = Path(folder_path, link.getText().strip())
|
||||
element_path = Path(folder_path, _sanitize_path_name(link.getText().strip()))
|
||||
element_type = self._find_type_from_link(element_path, link, abs_url)
|
||||
|
||||
if element_type == IliasElementType.REGULAR_FILE:
|
||||
@ -377,7 +381,7 @@ class IliasCrawler:
|
||||
modification_date = demangle_date(modification_date_str)
|
||||
|
||||
# Grab the name from the link text
|
||||
name = link_element.getText()
|
||||
name = _sanitize_path_name(link_element.getText())
|
||||
full_path = Path(path, name + "." + file_type)
|
||||
|
||||
return [
|
||||
@ -508,7 +512,7 @@ class IliasCrawler:
|
||||
).getText().strip()
|
||||
title += ".mp4"
|
||||
|
||||
video_path: Path = Path(parent_path, title)
|
||||
video_path: Path = Path(parent_path, _sanitize_path_name(title))
|
||||
|
||||
video_url = self._abs_url_from_link(link)
|
||||
|
||||
@ -580,6 +584,7 @@ class IliasCrawler:
|
||||
# Two divs, side by side. Left is the name, right is the link ==> get left
|
||||
# sibling
|
||||
file_name = file_link.parent.findPrevious(name="div").getText().strip()
|
||||
file_name = _sanitize_path_name(file_name)
|
||||
url = self._abs_url_from_link(file_link)
|
||||
|
||||
LOGGER.debug("Found file %r at %r", file_name, url)
|
||||
|
@ -5,6 +5,8 @@ only files whose names match a regex, or renaming files from one numbering
|
||||
scheme to another.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import PurePath
|
||||
from typing import Callable, List, Optional, TypeVar
|
||||
@ -45,7 +47,8 @@ def apply_transform(
|
||||
|
||||
# Transform combinators
|
||||
|
||||
keep = lambda path: path
|
||||
def keep(path: PurePath) -> Optional[PurePath]:
|
||||
return path
|
||||
|
||||
def attempt(*args: Transform) -> Transform:
|
||||
def inner(path: PurePath) -> Optional[PurePath]:
|
||||
@ -125,3 +128,15 @@ def re_rename(regex: Regex, target: str) -> Transform:
|
||||
return path.with_name(target.format(*groups))
|
||||
return None
|
||||
return inner
|
||||
|
||||
|
||||
def sanitize_windows_path(path: PurePath) -> Optional[PurePath]:
|
||||
"""
|
||||
A small function to escape characters that are forbidden in windows path names.
|
||||
This method is a no-op on other operating systems.
|
||||
"""
|
||||
# Escape windows illegal path characters
|
||||
if os.name == 'nt':
|
||||
sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
|
||||
return PurePath(*sanitized_parts)
|
||||
return path
|
||||
|
@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
|
||||
To install PFERD or update your installation to the latest version, run this
|
||||
wherever you want to install or have already installed PFERD:
|
||||
```
|
||||
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.3
|
||||
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
|
||||
```
|
||||
|
||||
The use of [venv] is recommended.
|
||||
@ -60,8 +60,8 @@ $ mkdir Vorlesungen
|
||||
$ cd Vorlesungen
|
||||
$ python3 -m venv .venv
|
||||
$ .venv/bin/activate
|
||||
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.3
|
||||
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.3/example_config.py
|
||||
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
|
||||
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.5/example_config.py
|
||||
$ python3 example_config.py
|
||||
$ deactivate
|
||||
```
|
||||
|
2
setup.py
2
setup.py
@ -2,7 +2,7 @@ from setuptools import find_packages, setup
|
||||
|
||||
setup(
|
||||
name="PFERD",
|
||||
version="2.4.3",
|
||||
version="2.4.5",
|
||||
packages=find_packages(),
|
||||
install_requires=[
|
||||
"requests>=2.21.0",
|
||||
|
16
sync_url.py
16
sync_url.py
@ -5,27 +5,17 @@ A simple script to download a course by name from ILIAS.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path, PurePath
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from PFERD import Pferd
|
||||
from PFERD.cookie_jar import CookieJar
|
||||
from PFERD.ilias import (IliasCrawler, IliasElementType,
|
||||
KitShibbolethAuthenticator)
|
||||
from PFERD.transform import sanitize_windows_path
|
||||
from PFERD.utils import to_path
|
||||
|
||||
|
||||
def sanitize_path(path: PurePath) -> Optional[PurePath]:
|
||||
# Escape windows illegal path characters
|
||||
if os.name == 'nt':
|
||||
sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
|
||||
return PurePath(*sanitized_parts)
|
||||
return path
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--test-run", action="store_true")
|
||||
@ -71,7 +61,7 @@ def main() -> None:
|
||||
full_url=args.url,
|
||||
cookies=args.cookies,
|
||||
dir_filter=dir_filter,
|
||||
transform=sanitize_path
|
||||
transform=sanitize_windows_path
|
||||
)
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user