mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Compare commits
10 Commits
Author | SHA1 | Date | |
---|---|---|---|
ba9215ebe8 | |||
8ebf0eab16 | |||
cd90a60dee | |||
98834c9c95 | |||
55e9e719ad | |||
a0ae9aee27 | |||
1486a63854 | |||
733e1ae136 | |||
4ac51048c1 | |||
f2aba970fd |
@ -5,6 +5,12 @@ from pathlib import Path
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
|
def _mergeNoDuplicate(first: List[Path], second: List[Path]) -> List[Path]:
|
||||||
|
tmp = list(set(first + second))
|
||||||
|
tmp.sort(key=lambda x: str(x.resolve()))
|
||||||
|
return tmp
|
||||||
|
|
||||||
|
|
||||||
class DownloadSummary:
|
class DownloadSummary:
|
||||||
"""
|
"""
|
||||||
Keeps track of all new, modified or deleted files and provides a summary.
|
Keeps track of all new, modified or deleted files and provides a summary.
|
||||||
@ -40,9 +46,9 @@ class DownloadSummary:
|
|||||||
"""
|
"""
|
||||||
Merges ourselves with the passed summary. Modifies this object, but not the passed one.
|
Merges ourselves with the passed summary. Modifies this object, but not the passed one.
|
||||||
"""
|
"""
|
||||||
self._new_files = list(set(self._new_files + summary.new_files))
|
self._new_files = _mergeNoDuplicate(self._new_files, summary.new_files)
|
||||||
self._modified_files = list(set(self._modified_files + summary.modified_files))
|
self._modified_files = _mergeNoDuplicate(self._modified_files, summary.modified_files)
|
||||||
self._deleted_files = list(set(self._deleted_files + summary.deleted_files))
|
self._deleted_files = _mergeNoDuplicate(self._deleted_files, summary.deleted_files)
|
||||||
|
|
||||||
def add_deleted_file(self, path: Path) -> None:
|
def add_deleted_file(self, path: Path) -> None:
|
||||||
"""
|
"""
|
||||||
|
@ -26,6 +26,10 @@ LOGGER = logging.getLogger(__name__)
|
|||||||
PRETTY = PrettyLogger(LOGGER)
|
PRETTY = PrettyLogger(LOGGER)
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_path_name(name: str) -> str:
|
||||||
|
return name.replace("/", "-")
|
||||||
|
|
||||||
|
|
||||||
class IliasElementType(Enum):
|
class IliasElementType(Enum):
|
||||||
"""
|
"""
|
||||||
The type of an ilias element.
|
The type of an ilias element.
|
||||||
@ -260,7 +264,7 @@ class IliasCrawler:
|
|||||||
links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
|
links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
|
||||||
for link in links:
|
for link in links:
|
||||||
abs_url = self._abs_url_from_link(link)
|
abs_url = self._abs_url_from_link(link)
|
||||||
element_path = Path(folder_path, link.getText().strip())
|
element_path = Path(folder_path, _sanitize_path_name(link.getText().strip()))
|
||||||
element_type = self._find_type_from_link(element_path, link, abs_url)
|
element_type = self._find_type_from_link(element_path, link, abs_url)
|
||||||
|
|
||||||
if element_type == IliasElementType.REGULAR_FILE:
|
if element_type == IliasElementType.REGULAR_FILE:
|
||||||
@ -377,7 +381,7 @@ class IliasCrawler:
|
|||||||
modification_date = demangle_date(modification_date_str)
|
modification_date = demangle_date(modification_date_str)
|
||||||
|
|
||||||
# Grab the name from the link text
|
# Grab the name from the link text
|
||||||
name = link_element.getText()
|
name = _sanitize_path_name(link_element.getText())
|
||||||
full_path = Path(path, name + "." + file_type)
|
full_path = Path(path, name + "." + file_type)
|
||||||
|
|
||||||
return [
|
return [
|
||||||
@ -508,7 +512,7 @@ class IliasCrawler:
|
|||||||
).getText().strip()
|
).getText().strip()
|
||||||
title += ".mp4"
|
title += ".mp4"
|
||||||
|
|
||||||
video_path: Path = Path(parent_path, title)
|
video_path: Path = Path(parent_path, _sanitize_path_name(title))
|
||||||
|
|
||||||
video_url = self._abs_url_from_link(link)
|
video_url = self._abs_url_from_link(link)
|
||||||
|
|
||||||
@ -580,6 +584,7 @@ class IliasCrawler:
|
|||||||
# Two divs, side by side. Left is the name, right is the link ==> get left
|
# Two divs, side by side. Left is the name, right is the link ==> get left
|
||||||
# sibling
|
# sibling
|
||||||
file_name = file_link.parent.findPrevious(name="div").getText().strip()
|
file_name = file_link.parent.findPrevious(name="div").getText().strip()
|
||||||
|
file_name = _sanitize_path_name(file_name)
|
||||||
url = self._abs_url_from_link(file_link)
|
url = self._abs_url_from_link(file_link)
|
||||||
|
|
||||||
LOGGER.debug("Found file %r at %r", file_name, url)
|
LOGGER.debug("Found file %r at %r", file_name, url)
|
||||||
|
@ -5,6 +5,8 @@ only files whose names match a regex, or renaming files from one numbering
|
|||||||
scheme to another.
|
scheme to another.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import PurePath
|
from pathlib import PurePath
|
||||||
from typing import Callable, List, Optional, TypeVar
|
from typing import Callable, List, Optional, TypeVar
|
||||||
@ -45,7 +47,8 @@ def apply_transform(
|
|||||||
|
|
||||||
# Transform combinators
|
# Transform combinators
|
||||||
|
|
||||||
keep = lambda path: path
|
def keep(path: PurePath) -> Optional[PurePath]:
|
||||||
|
return path
|
||||||
|
|
||||||
def attempt(*args: Transform) -> Transform:
|
def attempt(*args: Transform) -> Transform:
|
||||||
def inner(path: PurePath) -> Optional[PurePath]:
|
def inner(path: PurePath) -> Optional[PurePath]:
|
||||||
@ -125,3 +128,15 @@ def re_rename(regex: Regex, target: str) -> Transform:
|
|||||||
return path.with_name(target.format(*groups))
|
return path.with_name(target.format(*groups))
|
||||||
return None
|
return None
|
||||||
return inner
|
return inner
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_windows_path(path: PurePath) -> Optional[PurePath]:
|
||||||
|
"""
|
||||||
|
A small function to escape characters that are forbidden in windows path names.
|
||||||
|
This method is a no-op on other operating systems.
|
||||||
|
"""
|
||||||
|
# Escape windows illegal path characters
|
||||||
|
if os.name == 'nt':
|
||||||
|
sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
|
||||||
|
return PurePath(*sanitized_parts)
|
||||||
|
return path
|
||||||
|
@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
|
|||||||
To install PFERD or update your installation to the latest version, run this
|
To install PFERD or update your installation to the latest version, run this
|
||||||
wherever you want to install or have already installed PFERD:
|
wherever you want to install or have already installed PFERD:
|
||||||
```
|
```
|
||||||
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.2
|
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
|
||||||
```
|
```
|
||||||
|
|
||||||
The use of [venv] is recommended.
|
The use of [venv] is recommended.
|
||||||
@ -60,8 +60,8 @@ $ mkdir Vorlesungen
|
|||||||
$ cd Vorlesungen
|
$ cd Vorlesungen
|
||||||
$ python3 -m venv .venv
|
$ python3 -m venv .venv
|
||||||
$ .venv/bin/activate
|
$ .venv/bin/activate
|
||||||
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.2
|
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
|
||||||
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.2/example_config.py
|
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.5/example_config.py
|
||||||
$ python3 example_config.py
|
$ python3 example_config.py
|
||||||
$ deactivate
|
$ deactivate
|
||||||
```
|
```
|
||||||
|
2
setup.py
2
setup.py
@ -2,7 +2,7 @@ from setuptools import find_packages, setup
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="PFERD",
|
name="PFERD",
|
||||||
version="2.4.2",
|
version="2.4.5",
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"requests>=2.21.0",
|
"requests>=2.21.0",
|
||||||
|
@ -12,6 +12,7 @@ from PFERD import Pferd
|
|||||||
from PFERD.cookie_jar import CookieJar
|
from PFERD.cookie_jar import CookieJar
|
||||||
from PFERD.ilias import (IliasCrawler, IliasElementType,
|
from PFERD.ilias import (IliasCrawler, IliasElementType,
|
||||||
KitShibbolethAuthenticator)
|
KitShibbolethAuthenticator)
|
||||||
|
from PFERD.transform import sanitize_windows_path
|
||||||
from PFERD.utils import to_path
|
from PFERD.utils import to_path
|
||||||
|
|
||||||
|
|
||||||
@ -59,7 +60,8 @@ def main() -> None:
|
|||||||
target=folder,
|
target=folder,
|
||||||
full_url=args.url,
|
full_url=args.url,
|
||||||
cookies=args.cookies,
|
cookies=args.cookies,
|
||||||
dir_filter=dir_filter
|
dir_filter=dir_filter,
|
||||||
|
transform=sanitize_windows_path
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user