Compare commits

..

44 Commits

Author SHA1 Message Date
c1ab7485e2 Bump version to 2.6.1 2021-04-19 11:21:56 +02:00
29cd5d1a3c Reflect totality of sanitize_windows_path in return type 2021-04-19 11:10:02 +02:00
6d5d9333ad Force folder to be file-system path 2021-04-19 11:07:25 +02:00
7cc40595dc Allow synchronizing to directory "." 2021-04-14 20:25:25 +02:00
80ae5ddfaa Bump version to v2.6.0 2021-04-14 19:47:41 +02:00
4f480d117e Install keyring in CI 2021-04-14 19:24:05 +02:00
1f2af3a290 Retry on more I/O Errors 2021-04-13 11:43:22 +02:00
14cdfb6a69 Fix typo in date demangler doc 2021-04-13 11:19:51 +02:00
e2bf84392b [sync_url] Properly declare "no-videos" as flag 2021-04-08 18:12:27 +02:00
946b7a7931 Also crawl .c/.java/.zip from IPD page 2021-02-09 12:30:59 +01:00
9a9018751e Bump version 2021-02-06 22:54:05 +01:00
83b75e8254 syncurl: Sanitize element name on windows if it is used as folder name
Otherwise the name of the course might not be a invalid file name.
2021-02-06 22:53:26 +01:00
35c3fa205d Fixed description of activating venv (#22)
Add 'source' to the venv activate command in the readme

`source` was picked over `.` to conform to the python recommendation
(https://docs.python.org/3/library/venv.html#module-venv).

This patch also adds the `egg-info` you get when building to the
gitignore.
2021-01-28 21:24:09 +01:00
0b606f02fa Bump version 2021-01-17 10:33:10 +01:00
fb78a6e98e Retry ILIAS downloads a few times and only fail that file 2021-01-06 13:08:10 +01:00
5de68a0400 Bump version 2020-12-30 17:20:30 +01:00
f0562049b6 Remove Python 3.9 method in crawler 2020-12-30 17:18:04 +01:00
0e1077bb50 Bump version 2020-12-30 14:50:49 +01:00
c978e9edf4 Resolve a few pylint warnings 2020-12-30 14:45:46 +01:00
2714ac6be6 Send CSRF token to Shibboleth 2020-12-30 14:34:11 +01:00
9b048a9cfc Canonize meeting names to a properly formatted date 2020-12-30 14:32:59 +01:00
1c2b6bf994 Bump version 2020-12-13 19:57:29 +01:00
ee39aaf08b Fix merge marker in LICENSE 2020-12-07 22:55:28 +01:00
93e6329901 Use the least destructive conflict resolver if there are multiple 2020-12-06 13:28:08 +01:00
f47b137b59 Fix ILIAS init.py and Pferd.py authenticators 2020-12-06 13:15:32 +01:00
83ea15ee83 Use system keyring service for password auth 2020-12-06 13:15:30 +01:00
75471c46d1 Use credential file 2020-12-05 23:44:09 +01:00
1e0343bba6 sync_url: Add username and password args 2020-12-05 23:30:09 +01:00
0f5e55648b Tell user when the conflict resolver kept existing files 2020-12-05 14:12:45 +01:00
57259e21f4 Print download summary in sync_url 2020-12-05 14:09:09 +01:00
4ce385b262 Treat file overwrite and marked file overwrite differently 2020-12-05 14:03:43 +01:00
2d64409542 Fix handling of empty args.folder 2020-12-05 13:50:46 +01:00
fcb3884a8f Add --remote-first, --local-first and --no-delete flags 2020-12-05 13:49:05 +01:00
9f6dc56a7b Use a strategy to decide conflict resolution 2020-12-02 19:32:57 +01:00
56ab473611 Merge pull request #17 from TheChristophe/master
Add flag to make sync_url use defaults instead of prompting
2020-12-02 19:04:46 +01:00
6426060804 Fix relative paths bug
Introduced in 74ea039458
2020-12-02 18:40:45 +01:00
49a0ca7a7c Add myself to LICENSE
This should've been done back when I added a PR for adding sync_url but people are lazy smh.
2020-12-02 18:24:07 +01:00
f3a4663491 Add passive/no_prompt flag 2020-12-02 18:24:07 +01:00
ecdbca8fb6 Make sync_url work relative to cwd like sane programs 2020-12-02 18:24:04 +01:00
9cbea5fe06 Add requirements.txt 2020-11-23 10:16:40 +01:00
ba3c7f85fa Replace "\" in ILIAS paths as well
I am not sure whether anybody really uses a backslash in their names,
but I guess it can't hurt to do this for windows users.
2020-11-19 19:37:28 +01:00
ba9215ebe8 Bump version 2020-11-18 10:09:45 +01:00
8ebf0eab16 Sort download summary 2020-11-17 21:36:04 +01:00
cd90a60dee Move "sanitize_windows_path" to PFERD.transform 2020-11-12 20:52:46 +01:00
20 changed files with 433 additions and 70 deletions

View File

@ -23,7 +23,7 @@ jobs:
python-version: '3.x'
- name: "Install dependencies"
run: "pip install setuptools pyinstaller rich requests beautifulsoup4 -f --upgrade"
run: "pip install setuptools keyring pyinstaller rich requests beautifulsoup4 -f --upgrade"
- name: "Install sync_url.py"
run: "pyinstaller sync_url.py -F"

1
.gitignore vendored
View File

@ -8,6 +8,7 @@ build/
.env
.vscode
ilias_cookies.txt
PFERD.egg-info/
# PyInstaller
sync_url.spec

View File

@ -1,4 +1,4 @@
Copyright 2019-2020 Garmelon, I-Al-Istannen, danstooamerican, pavelzw
Copyright 2019-2020 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe, Scriptim
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in

View File

@ -3,8 +3,19 @@ General authenticators useful in many situations
"""
import getpass
import logging
from typing import Optional, Tuple
from .logging import PrettyLogger
LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER)
try:
import keyring
except ImportError:
pass
class TfaAuthenticator:
# pylint: disable=too-few-public-methods
@ -123,3 +134,81 @@ class UserPassAuthenticator:
if self._given_username is not None and self._given_password is not None:
self._given_username = None
self._given_password = None
class KeyringAuthenticator(UserPassAuthenticator):
"""
An authenticator for username-password combinations that stores the
password using the system keyring service and prompts the user for missing
information.
"""
def get_credentials(self) -> Tuple[str, str]:
"""
Returns a tuple (username, password). Prompts user for username or
password when necessary.
"""
if self._username is None and self._given_username is not None:
self._username = self._given_username
if self._password is None and self._given_password is not None:
self._password = self._given_password
if self._username is not None and self._password is None:
self._load_password()
if self._username is None or self._password is None:
print(f"Enter credentials ({self._reason})")
username: str
if self._username is None:
username = input("Username: ")
self._username = username
else:
username = self._username
if self._password is None:
self._load_password()
password: str
if self._password is None:
password = getpass.getpass(prompt="Password: ")
self._password = password
self._save_password()
else:
password = self._password
return (username, password)
def _load_password(self) -> None:
"""
Loads the saved password associated with self._username from the system
keyring service (or None if not password has been saved yet) and stores
it in self._password.
"""
self._password = keyring.get_password("pferd-ilias", self._username)
def _save_password(self) -> None:
"""
Saves self._password to the system keyring service and associates it
with self._username.
"""
keyring.set_password("pferd-ilias", self._username, self._password)
def invalidate_credentials(self) -> None:
"""
Marks the credentials as invalid. If only a username was supplied in
the constructor, assumes that the username is valid and only the
password is invalid. If only a password was supplied in the
constructor, assumes that the password is valid and only the username
is invalid. Otherwise, assumes that username and password are both
invalid.
"""
try:
keyring.delete_password("pferd-ilias", self._username)
except keyring.errors.PasswordDeleteError:
pass
super().invalidate_credentials()

View File

@ -5,6 +5,12 @@ from pathlib import Path
from typing import List
def _mergeNoDuplicate(first: List[Path], second: List[Path]) -> List[Path]:
tmp = list(set(first + second))
tmp.sort(key=lambda x: str(x.resolve()))
return tmp
class DownloadSummary:
"""
Keeps track of all new, modified or deleted files and provides a summary.
@ -40,9 +46,9 @@ class DownloadSummary:
"""
Merges ourselves with the passed summary. Modifies this object, but not the passed one.
"""
self._new_files = list(set(self._new_files + summary.new_files))
self._modified_files = list(set(self._modified_files + summary.modified_files))
self._deleted_files = list(set(self._deleted_files + summary.deleted_files))
self._new_files = _mergeNoDuplicate(self._new_files, summary.new_files)
self._modified_files = _mergeNoDuplicate(self._modified_files, summary.modified_files)
self._deleted_files = _mergeNoDuplicate(self._deleted_files, summary.deleted_files)
def add_deleted_file(self, path: Path) -> None:
"""

View File

@ -37,3 +37,21 @@ def swallow_and_print_errors(function: TFun) -> TFun:
Console().print_exception()
return None
return cast(TFun, inner)
def retry_on_io_exception(max_retries: int, message: str) -> Callable[[TFun], TFun]:
"""
Decorates a function and retries it on any exception until the max retries count is hit.
"""
def retry(function: TFun) -> TFun:
def inner(*args: Any, **kwargs: Any) -> Any:
for i in range(0, max_retries):
# pylint: disable=broad-except
try:
return function(*args, **kwargs)
except IOError as error:
PRETTY.warning(f"Error duing operation '{message}': {error}")
PRETTY.warning(
f"Retrying operation '{message}'. Remaining retries: {max_retries - 1 - i}")
return cast(TFun, inner)
return retry

View File

@ -37,8 +37,12 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
Authenticate via KIT's shibboleth system.
"""
def __init__(self, username: Optional[str] = None, password: Optional[str] = None) -> None:
self._auth = UserPassAuthenticator("KIT ILIAS Shibboleth", username, password)
def __init__(self, authenticator: Optional[UserPassAuthenticator] = None) -> None:
if authenticator:
self._auth = authenticator
else:
self._auth = UserPassAuthenticator("KIT ILIAS Shibboleth")
self._tfa_auth = TfaAuthenticator("KIT ILIAS Shibboleth")
def authenticate(self, sess: requests.Session) -> None:
@ -70,6 +74,8 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
form = soup.find("form", {"class": "full content", "method": "post"})
action = form["action"]
csrf_token = form.find("input", {"name": "csrf_token"})["value"]
# Equivalent: Enter credentials in
# https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
LOGGER.debug("Attempt to log in to Shibboleth using credentials")
@ -78,6 +84,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
"_eventId_proceed": "",
"j_username": self._auth.username,
"j_password": self._auth.password,
"csrf_token": csrf_token
}
soup = soupify(sess.post(url, data=data))

View File

@ -15,7 +15,7 @@ from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
import bs4
import requests
from ..errors import FatalException
from ..errors import FatalException, retry_on_io_exception
from ..logging import PrettyLogger
from ..utils import soupify
from .authenticators import IliasAuthenticator
@ -27,7 +27,7 @@ PRETTY = PrettyLogger(LOGGER)
def _sanitize_path_name(name: str) -> str:
return name.replace("/", "-")
return name.replace("/", "-").replace("\\", "-")
class IliasElementType(Enum):
@ -40,6 +40,7 @@ class IliasElementType(Enum):
REGULAR_FILE = "REGULAR_FILE"
VIDEO_FILE = "VIDEO_FILE"
FORUM = "FORUM"
MEETING = "MEETING"
EXTERNAL_LINK = "EXTERNAL_LINK"
def is_folder(self) -> bool:
@ -241,6 +242,8 @@ class IliasCrawler:
entries_to_process += self._crawl_video_directory(entry.path, url)
continue
PRETTY.warning(f"Unknown type: {entry.entry_type}!")
return result
def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]:
@ -269,6 +272,25 @@ class IliasCrawler:
if element_type == IliasElementType.REGULAR_FILE:
result += self._crawl_file(folder_path, link, abs_url)
elif element_type == IliasElementType.MEETING:
meeting_name = str(element_path.name)
date_portion_str = meeting_name.split(" - ")[0]
date_portion = demangle_date(date_portion_str)
if not date_portion:
result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
continue
rest_of_name = meeting_name
if rest_of_name.startswith(date_portion_str):
rest_of_name = rest_of_name[len(date_portion_str):]
new_name = datetime.datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") \
+ rest_of_name
new_path = Path(folder_path, _sanitize_path_name(new_name))
result += [
IliasCrawlerEntry(new_path, abs_url, IliasElementType.REGULAR_FOLDER, None)
]
elif element_type is not None:
result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
else:
@ -320,6 +342,8 @@ class IliasCrawler:
"""
# pylint: disable=too-many-return-statements
found_parent: Optional[bs4.Tag] = None
# We look for the outer div of our inner link, to find information around it
# (mostly the icon)
for parent in link_element.parents:
@ -350,6 +374,9 @@ class IliasCrawler:
if str(img_tag["src"]).endswith("frm.svg"):
return IliasElementType.FORUM
if str(img_tag["src"]).endswith("sess.svg"):
return IliasElementType.MEETING
return IliasElementType.REGULAR_FOLDER
@staticmethod
@ -598,6 +625,7 @@ class IliasCrawler:
return results
@retry_on_io_exception(3, "fetching webpage")
def _get_page(self, url: str, params: Dict[str, Any],
retry_count: int = 0) -> bs4.BeautifulSoup:
"""

View File

@ -20,7 +20,7 @@ def demangle_date(date: str) -> Optional[datetime.datetime]:
"Gestern, HH:MM"
"Heute, HH:MM"
"Morgen, HH:MM"
"dd. mon.yyyy, HH:MM
"dd. mon yyyy, HH:MM
"""
saved = locale.setlocale(locale.LC_ALL)
try:

View File

@ -10,6 +10,7 @@ from typing import Callable, List, Optional, Union
import bs4
import requests
from ..errors import retry_on_io_exception
from ..logging import PrettyLogger
from ..organizer import Organizer
from ..tmp_dir import TmpDir
@ -116,15 +117,25 @@ class IliasDownloader:
"""
LOGGER.debug("Downloading %r", info)
if not self._strategy(self._organizer, info):
self._organizer.mark(info.path)
return
tmp_file = self._tmp_dir.new_path()
while not self._try_download(info, tmp_file):
LOGGER.info("Retrying download: %r", info)
self._authenticator.authenticate(self._session)
@retry_on_io_exception(3, "downloading file")
def download_impl() -> bool:
if not self._try_download(info, tmp_file):
LOGGER.info("Re-Authenticating due to download failure: %r", info)
self._authenticator.authenticate(self._session)
raise IOError("Scheduled retry")
else:
return True
if not download_impl():
PRETTY.error(f"Download of file {info.path} failed too often! Skipping it...")
return
dst_path = self._organizer.accept_file(tmp_file, info.path)
if dst_path and info.modification_date:

View File

@ -82,7 +82,10 @@ class IpdCrawler:
items: List[IpdDownloadInfo] = []
for link in page.findAll(name="a", attrs={"href": lambda x: x and x.endswith("pdf")}):
def is_relevant_url(x: str) -> bool:
return x.endswith(".pdf") or x.endswith(".c") or x.endswith(".java") or x.endswith(".zip")
for link in page.findAll(name="a", attrs={"href": lambda x: x and is_relevant_url(x)}):
href: str = link.attrs.get("href")
name = href.split("/")[-1]

View File

@ -3,13 +3,10 @@ Contains a few logger utility functions and implementations.
"""
import logging
from pathlib import Path
from typing import List, Optional
from typing import Optional
from rich import print as rich_print
from rich._log_render import LogRender
from rich.console import Console
from rich.panel import Panel
from rich.style import Style
from rich.text import Text
from rich.theme import Theme

View File

@ -7,8 +7,9 @@ import filecmp
import logging
import os
import shutil
from enum import Enum
from pathlib import Path, PurePath
from typing import List, Optional, Set
from typing import Callable, List, Optional, Set
from .download_summary import DownloadSummary
from .location import Location
@ -19,6 +20,51 @@ LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER)
class ConflictType(Enum):
"""
The type of the conflict. A file might not exist anymore and will be deleted
or it might be overwritten with a newer version.
FILE_OVERWRITTEN: An existing file will be updated
MARKED_FILE_OVERWRITTEN: A file is written for the second+ time in this run
FILE_DELETED: The file was deleted
"""
FILE_OVERWRITTEN = "overwritten"
MARKED_FILE_OVERWRITTEN = "marked_file_overwritten"
FILE_DELETED = "deleted"
class FileConflictResolution(Enum):
"""
The reaction when confronted with a file conflict:
DESTROY_EXISTING: Delete/overwrite the current file
KEEP_EXISTING: Keep the current file
DEFAULT: Do whatever the PFERD authors thought is sensible
PROMPT: Interactively ask the user
"""
DESTROY_EXISTING = "destroy"
KEEP_EXISTING = "keep"
DEFAULT = "default"
PROMPT = "prompt"
FileConflictResolver = Callable[[PurePath, ConflictType], FileConflictResolution]
def resolve_prompt_user(_path: PurePath, conflict: ConflictType) -> FileConflictResolution:
"""
Resolves conflicts by asking the user if a file was written twice or will be deleted.
"""
if conflict == ConflictType.FILE_OVERWRITTEN:
return FileConflictResolution.DESTROY_EXISTING
return FileConflictResolution.PROMPT
class FileAcceptException(Exception):
"""An exception while accepting a file."""
@ -26,7 +72,7 @@ class FileAcceptException(Exception):
class Organizer(Location):
"""A helper for managing downloaded files."""
def __init__(self, path: Path):
def __init__(self, path: Path, conflict_resolver: FileConflictResolver = resolve_prompt_user):
"""Create a new organizer for a given path."""
super().__init__(path)
self._known_files: Set[Path] = set()
@ -36,6 +82,8 @@ class Organizer(Location):
self.download_summary = DownloadSummary()
self.conflict_resolver = conflict_resolver
def accept_file(self, src: Path, dst: PurePath) -> Optional[Path]:
"""
Move a file to this organizer and mark it.
@ -67,13 +115,16 @@ class Organizer(Location):
if self._is_marked(dst):
PRETTY.warning(f"File {str(dst_absolute)!r} was already written!")
if not prompt_yes_no(f"Overwrite file?", default=False):
conflict = ConflictType.MARKED_FILE_OVERWRITTEN
if self._resolve_conflict("Overwrite file?", dst_absolute, conflict, default=False):
PRETTY.ignored_file(dst_absolute, "file was written previously")
return None
# Destination file is directory
if dst_absolute.exists() and dst_absolute.is_dir():
if prompt_yes_no(f"Overwrite folder {dst_absolute} with file?", default=False):
prompt = f"Overwrite folder {dst_absolute} with file?"
conflict = ConflictType.FILE_OVERWRITTEN
if self._resolve_conflict(prompt, dst_absolute, conflict, default=False):
shutil.rmtree(dst_absolute)
else:
PRETTY.warning(f"Could not add file {str(dst_absolute)!r}")
@ -87,6 +138,12 @@ class Organizer(Location):
self.mark(dst)
return dst_absolute
prompt = f"Overwrite file {dst_absolute}?"
conflict = ConflictType.FILE_OVERWRITTEN
if not self._resolve_conflict(prompt, dst_absolute, conflict, default=True):
PRETTY.ignored_file(dst_absolute, "user conflict resolution")
return None
self.download_summary.add_modified_file(dst_absolute)
PRETTY.modified_file(dst_absolute)
else:
@ -144,6 +201,24 @@ class Organizer(Location):
def _delete_file_if_confirmed(self, path: Path) -> None:
prompt = f"Do you want to delete {path}"
if prompt_yes_no(prompt, False):
if self._resolve_conflict(prompt, path, ConflictType.FILE_DELETED, default=False):
self.download_summary.add_deleted_file(path)
path.unlink()
else:
PRETTY.ignored_file(path, "user conflict resolution")
def _resolve_conflict(
self, prompt: str, path: Path, conflict: ConflictType, default: bool
) -> bool:
if not self.conflict_resolver:
return prompt_yes_no(prompt, default=default)
result = self.conflict_resolver(path, conflict)
if result == FileConflictResolution.DEFAULT:
return default
if result == FileConflictResolution.KEEP_EXISTING:
return False
if result == FileConflictResolution.DESTROY_EXISTING:
return True
return prompt_yes_no(prompt, default=default)

View File

@ -6,6 +6,7 @@ import logging
from pathlib import Path
from typing import Callable, List, Optional, Union
from .authenticators import UserPassAuthenticator
from .cookie_jar import CookieJar
from .diva import (DivaDownloader, DivaDownloadStrategy, DivaPlaylistCrawler,
diva_download_new)
@ -18,7 +19,7 @@ from .ipd import (IpdCrawler, IpdDownloader, IpdDownloadInfo,
IpdDownloadStrategy, ipd_download_new_or_modified)
from .location import Location
from .logging import PrettyLogger, enable_logging
from .organizer import Organizer
from .organizer import FileConflictResolver, Organizer, resolve_prompt_user
from .tmp_dir import TmpDir
from .transform import TF, Transform, apply_transform
from .utils import PathLike, to_path
@ -64,6 +65,13 @@ class Pferd(Location):
for transformable in transformables:
LOGGER.info(transformable.path)
@staticmethod
def _get_authenticator(
username: Optional[str], password: Optional[str]
) -> KitShibbolethAuthenticator:
inner_auth = UserPassAuthenticator("ILIAS - Pferd.py", username, password)
return KitShibbolethAuthenticator(inner_auth)
def _ilias(
self,
target: PathLike,
@ -76,12 +84,13 @@ class Pferd(Location):
download_strategy: IliasDownloadStrategy,
timeout: int,
clean: bool = True,
file_conflict_resolver: FileConflictResolver = resolve_prompt_user
) -> Organizer:
# pylint: disable=too-many-locals
cookie_jar = CookieJar(to_path(cookies) if cookies else None)
session = cookie_jar.create_session()
tmp_dir = self._tmp_dir.new_subdir()
organizer = Organizer(self.resolve(to_path(target)))
organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
crawler = IliasCrawler(base_url, session, authenticator, dir_filter)
downloader = IliasDownloader(tmp_dir, organizer, session,
@ -117,6 +126,7 @@ class Pferd(Location):
download_strategy: IliasDownloadStrategy = download_modified_or_new,
clean: bool = True,
timeout: int = 5,
file_conflict_resolver: FileConflictResolver = resolve_prompt_user
) -> Organizer:
"""
Synchronizes a folder with the ILIAS instance of the KIT.
@ -144,9 +154,11 @@ class Pferd(Location):
clean {bool} -- Whether to clean up when the method finishes.
timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
requests bug.
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
with overwriting or deleting files. The default always asks the user.
"""
# This authenticator only works with the KIT ilias instance.
authenticator = KitShibbolethAuthenticator(username=username, password=password)
authenticator = Pferd._get_authenticator(username=username, password=password)
PRETTY.starting_synchronizer(target, "ILIAS", course_id)
organizer = self._ilias(
@ -159,7 +171,8 @@ class Pferd(Location):
transform=transform,
download_strategy=download_strategy,
clean=clean,
timeout=timeout
timeout=timeout,
file_conflict_resolver=file_conflict_resolver
)
self._download_summary.merge(organizer.download_summary)
@ -184,6 +197,7 @@ class Pferd(Location):
download_strategy: IliasDownloadStrategy = download_modified_or_new,
clean: bool = True,
timeout: int = 5,
file_conflict_resolver: FileConflictResolver = resolve_prompt_user
) -> Organizer:
"""
Synchronizes a folder with the ILIAS instance of the KIT. This method will crawl the ILIAS
@ -210,9 +224,11 @@ class Pferd(Location):
clean {bool} -- Whether to clean up when the method finishes.
timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
requests bug.
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
with overwriting or deleting files. The default always asks the user.
"""
# This authenticator only works with the KIT ilias instance.
authenticator = KitShibbolethAuthenticator(username=username, password=password)
authenticator = Pferd._get_authenticator(username, password)
PRETTY.starting_synchronizer(target, "ILIAS", "Personal Desktop")
organizer = self._ilias(
@ -225,7 +241,8 @@ class Pferd(Location):
transform=transform,
download_strategy=download_strategy,
clean=clean,
timeout=timeout
timeout=timeout,
file_conflict_resolver=file_conflict_resolver
)
self._download_summary.merge(organizer.download_summary)
@ -245,6 +262,7 @@ class Pferd(Location):
download_strategy: IliasDownloadStrategy = download_modified_or_new,
clean: bool = True,
timeout: int = 5,
file_conflict_resolver: FileConflictResolver = resolve_prompt_user
) -> Organizer:
"""
Synchronizes a folder with a given folder on the ILIAS instance of the KIT.
@ -271,9 +289,11 @@ class Pferd(Location):
clean {bool} -- Whether to clean up when the method finishes.
timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
requests bug.
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
with overwriting or deleting files. The default always asks the user.
"""
# This authenticator only works with the KIT ilias instance.
authenticator = KitShibbolethAuthenticator(username=username, password=password)
authenticator = Pferd._get_authenticator(username=username, password=password)
PRETTY.starting_synchronizer(target, "ILIAS", "An ILIAS element by url")
if not full_url.startswith("https://ilias.studium.kit.edu"):
@ -289,7 +309,8 @@ class Pferd(Location):
transform=transform,
download_strategy=download_strategy,
clean=clean,
timeout=timeout
timeout=timeout,
file_conflict_resolver=file_conflict_resolver
)
self._download_summary.merge(organizer.download_summary)
@ -303,7 +324,8 @@ class Pferd(Location):
url: str,
transform: Transform = lambda x: x,
download_strategy: IpdDownloadStrategy = ipd_download_new_or_modified,
clean: bool = True
clean: bool = True,
file_conflict_resolver: FileConflictResolver = resolve_prompt_user
) -> Organizer:
"""
Synchronizes a folder with a DIVA playlist.
@ -319,6 +341,8 @@ class Pferd(Location):
be downloaded. Can save bandwidth and reduce the number of requests.
(default: {diva_download_new})
clean {bool} -- Whether to clean up when the method finishes.
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
with overwriting or deleting files. The default always asks the user.
"""
tmp_dir = self._tmp_dir.new_subdir()
@ -329,7 +353,7 @@ class Pferd(Location):
if isinstance(target, Organizer):
organizer = target
else:
organizer = Organizer(self.resolve(to_path(target)))
organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
PRETTY.starting_synchronizer(organizer.path, "IPD", url)
@ -357,7 +381,8 @@ class Pferd(Location):
playlist_location: str,
transform: Transform = lambda x: x,
download_strategy: DivaDownloadStrategy = diva_download_new,
clean: bool = True
clean: bool = True,
file_conflict_resolver: FileConflictResolver = resolve_prompt_user
) -> Organizer:
"""
Synchronizes a folder with a DIVA playlist.
@ -374,6 +399,8 @@ class Pferd(Location):
be downloaded. Can save bandwidth and reduce the number of requests.
(default: {diva_download_new})
clean {bool} -- Whether to clean up when the method finishes.
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
with overwriting or deleting files. The default always asks the user.
"""
tmp_dir = self._tmp_dir.new_subdir()
@ -389,7 +416,7 @@ class Pferd(Location):
if isinstance(target, Organizer):
organizer = target
else:
organizer = Organizer(self.resolve(to_path(target)))
organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
PRETTY.starting_synchronizer(organizer.path, "DIVA", playlist_id)

View File

@ -5,6 +5,8 @@ only files whose names match a regex, or renaming files from one numbering
scheme to another.
"""
import os
import re
from dataclasses import dataclass
from pathlib import PurePath
from typing import Callable, List, Optional, TypeVar
@ -45,7 +47,8 @@ def apply_transform(
# Transform combinators
keep = lambda path: path
def keep(path: PurePath) -> Optional[PurePath]:
return path
def attempt(*args: Transform) -> Transform:
def inner(path: PurePath) -> Optional[PurePath]:
@ -125,3 +128,15 @@ def re_rename(regex: Regex, target: str) -> Transform:
return path.with_name(target.format(*groups))
return None
return inner
def sanitize_windows_path(path: PurePath) -> PurePath:
"""
A small function to escape characters that are forbidden in windows path names.
This method is a no-op on other operating systems.
"""
# Escape windows illegal path characters
if os.name == 'nt':
sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
return PurePath(*sanitized_parts)
return path

View File

@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
To install PFERD or update your installation to the latest version, run this
wherever you want to install or have already installed PFERD:
```
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.4
$ pip install git+https://github.com/Garmelon/PFERD@v2.6.1
```
The use of [venv] is recommended.
@ -59,9 +59,9 @@ A full example setup and initial use could look like:
$ mkdir Vorlesungen
$ cd Vorlesungen
$ python3 -m venv .venv
$ .venv/bin/activate
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.4
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.4/example_config.py
$ source .venv/bin/activate
$ pip install git+https://github.com/Garmelon/PFERD@v2.6.1
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.6.1/example_config.py
$ python3 example_config.py
$ deactivate
```
@ -69,7 +69,7 @@ $ deactivate
Subsequent runs of the program might look like:
```
$ cd Vorlesungen
$ .venv/bin/activate
$ source .venv/bin/activate
$ python3 example_config.py
$ deactivate
```

View File

@ -3,5 +3,5 @@ disallow_untyped_defs = True
disallow_incomplete_defs = True
no_implicit_optional = True
[mypy-rich.*,bs4]
[mypy-rich.*,bs4,keyring]
ignore_missing_imports = True

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
requests>=2.21.0
beautifulsoup4>=4.7.1
rich>=2.1.0
keyring>=21.5.0

View File

@ -2,12 +2,13 @@ from setuptools import find_packages, setup
setup(
name="PFERD",
version="2.4.4",
version="2.6.1",
packages=find_packages(),
install_requires=[
"requests>=2.21.0",
"beautifulsoup4>=4.7.1",
"rich>=2.1.0"
"rich>=2.1.0",
"keyring>=21.5.0"
],
)

View File

@ -5,75 +5,156 @@ A simple script to download a course by name from ILIAS.
"""
import argparse
import os
import re
import logging
import sys
from pathlib import Path, PurePath
from typing import Optional
from urllib.parse import urlparse
from PFERD import Pferd
from PFERD.authenticators import KeyringAuthenticator, UserPassAuthenticator
from PFERD.cookie_jar import CookieJar
from PFERD.ilias import (IliasCrawler, IliasElementType,
KitShibbolethAuthenticator)
from PFERD.logging import PrettyLogger, enable_logging
from PFERD.organizer import (ConflictType, FileConflictResolution,
FileConflictResolver, resolve_prompt_user)
from PFERD.transform import sanitize_windows_path
from PFERD.utils import to_path
_LOGGER = logging.getLogger("sync_url")
_PRETTY = PrettyLogger(_LOGGER)
def sanitize_path(path: PurePath) -> Optional[PurePath]:
# Escape windows illegal path characters
if os.name == 'nt':
sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
return PurePath(*sanitized_parts)
return path
def _extract_credentials(file_path: Optional[str]) -> UserPassAuthenticator:
if not file_path:
return UserPassAuthenticator("KIT ILIAS Shibboleth", None, None)
if not Path(file_path).exists():
_PRETTY.error("Credential file does not exist")
sys.exit(1)
with open(file_path, "r") as file:
first_line = file.read().splitlines()[0]
read_name, *read_password = first_line.split(":", 1)
name = read_name if read_name else None
password = read_password[0] if read_password else None
return UserPassAuthenticator("KIT ILIAS Shibboleth", username=name, password=password)
def _resolve_remote_first(_path: PurePath, _conflict: ConflictType) -> FileConflictResolution:
return FileConflictResolution.DESTROY_EXISTING
def _resolve_local_first(_path: PurePath, _conflict: ConflictType) -> FileConflictResolution:
return FileConflictResolution.KEEP_EXISTING
def _resolve_no_delete(_path: PurePath, conflict: ConflictType) -> FileConflictResolution:
# Update files
if conflict == ConflictType.FILE_OVERWRITTEN:
return FileConflictResolution.DESTROY_EXISTING
if conflict == ConflictType.MARKED_FILE_OVERWRITTEN:
return FileConflictResolution.DESTROY_EXISTING
# But do not delete them
return FileConflictResolution.KEEP_EXISTING
def main() -> None:
enable_logging(name="sync_url")
parser = argparse.ArgumentParser()
parser.add_argument("--test-run", action="store_true")
parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in")
parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos")
parser.add_argument('-u', '--username', nargs='?', default=None, help="Username for Ilias")
parser.add_argument('-p', '--password', nargs='?', default=None, help="Password for Ilias")
parser.add_argument('--credential-file', nargs='?', default=None,
help="Path to a file containing credentials for Ilias. The file must have "
"one line in the following format: '<user>:<password>'")
parser.add_argument("-k", "--keyring", action="store_true",
help="Use the system keyring service for authentication")
parser.add_argument('--no-videos', action="store_true", help="Don't download videos")
parser.add_argument('--local-first', action="store_true",
help="Don't prompt for confirmation, keep existing files")
parser.add_argument('--remote-first', action="store_true",
help="Don't prompt for confirmation, delete and overwrite local files")
parser.add_argument('--no-delete', action="store_true",
help="Don't prompt for confirmation, overwrite local files, don't delete")
parser.add_argument('url', help="URL to the course page")
parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into")
args = parser.parse_args()
url = urlparse(args.url)
cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
session = cookie_jar.create_session()
authenticator = KitShibbolethAuthenticator()
if args.keyring:
if not args.username:
_PRETTY.error("Keyring auth selected but no --username passed!")
return
inner_auth: UserPassAuthenticator = KeyringAuthenticator(
"KIT ILIAS Shibboleth", username=args.username, password=args.password
)
else:
inner_auth = _extract_credentials(args.credential_file)
username, password = inner_auth.get_credentials()
authenticator = KitShibbolethAuthenticator(inner_auth)
url = urlparse(args.url)
crawler = IliasCrawler(url.scheme + '://' + url.netloc, session,
authenticator, lambda x, y: True)
cookie_jar.load_cookies()
if args.folder is not None:
folder = args.folder
# Initialize pferd at the *parent of the passed folder*
# This is needed so Pferd's internal protections against escaping the working directory
# do not trigger (e.g. if somebody names a file in ILIAS '../../bad thing.txt')
pferd = Pferd(Path(Path(__file__).parent, folder).parent, test_run=args.test_run)
else:
# fetch course name from ilias
folder = crawler.find_element_name(args.url)
if args.folder is None:
element_name = crawler.find_element_name(args.url)
if not element_name:
print("Error, could not get element name. Please specify a folder yourself.")
return
folder = sanitize_windows_path(Path(element_name.replace("/", "-").replace("\\", "-")))
cookie_jar.save_cookies()
else:
folder = Path(args.folder)
# Initialize pferd at the location of the script
pferd = Pferd(Path(__file__).parent, test_run=args.test_run)
# files may not escape the pferd_root with relative paths
# note: Path(Path.cwd, Path(folder)) == Path(folder) if it is an absolute path
pferd_root = Path(Path.cwd(), Path(folder)).parent
# Folder might be a *PurePath* at this point
target = Path(folder).resolve().name
pferd = Pferd(pferd_root, test_run=args.test_run)
def dir_filter(_: Path, element: IliasElementType) -> bool:
if args.no_videos:
return element not in [IliasElementType.VIDEO_FILE, IliasElementType.VIDEO_FOLDER]
return True
if args.local_first:
file_confilict_resolver: FileConflictResolver = _resolve_local_first
elif args.no_delete:
file_confilict_resolver = _resolve_no_delete
elif args.remote_first:
file_confilict_resolver = _resolve_remote_first
else:
file_confilict_resolver = resolve_prompt_user
pferd.enable_logging()
# fetch
pferd.ilias_kit_folder(
target=folder,
target=target,
full_url=args.url,
cookies=args.cookies,
dir_filter=dir_filter,
transform=sanitize_path
username=username,
password=password,
file_conflict_resolver=file_confilict_resolver,
transform=sanitize_windows_path
)
pferd.print_summary()
if __name__ == "__main__":
main()