From 5b929f09a2f8d2772e8b40d1eed37d7c0c824944 Mon Sep 17 00:00:00 2001 From: Joscha Date: Fri, 24 Apr 2020 14:26:20 +0000 Subject: [PATCH] Move download strategies to downloader Also fixes an issue where the downloader didn't mark files that were not downloaded due to the strategy used. --- PFERD/ilias/__init__.py | 5 ++-- PFERD/ilias/download_strategies.py | 40 --------------------------- PFERD/ilias/downloader.py | 43 ++++++++++++++++++++++++++++-- PFERD/pferd.py | 18 +++++-------- 4 files changed, 50 insertions(+), 56 deletions(-) delete mode 100644 PFERD/ilias/download_strategies.py diff --git a/PFERD/ilias/__init__.py b/PFERD/ilias/__init__.py index b243978..2c6374e 100644 --- a/PFERD/ilias/__init__.py +++ b/PFERD/ilias/__init__.py @@ -4,5 +4,6 @@ Synchronizing files from ILIAS instances (https://www.ilias.de/). from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator from .crawler import IliasCrawler, IliasDirectoryFilter -from .download_strategies import * -from .downloader import IliasDownloader +from .downloader import (IliasDownloader, IliasDownloadInfo, + IliasDownloadStrategy, download_everything, + download_modified_or_new) diff --git a/PFERD/ilias/download_strategies.py b/PFERD/ilias/download_strategies.py deleted file mode 100644 index c4420d6..0000000 --- a/PFERD/ilias/download_strategies.py +++ /dev/null @@ -1,40 +0,0 @@ -""" -Contains a few default strategies for limiting the amount of downloaded files. -""" - -import logging -from typing import Callable - -from ..organizer import Organizer -from ..utils import PrettyLogger -from .downloader import IliasDownloadInfo - -LOGGER = logging.getLogger(__name__) -PRETTY = PrettyLogger(LOGGER) - -DownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool] - - -def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool: - # pylint: disable=unused-argument - """ - Accepts everything. - """ - return True - - -def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool: - """ - Accepts new files or files with a more recent modification date. - """ - resolved_file = organizer.resolve(info.path) - if not resolved_file.exists() or info.modification_date is None: - return True - resolved_mod_time_seconds = resolved_file.stat().st_mtime - - # Download if the info is newer - if info.modification_date.timestamp() > resolved_mod_time_seconds: - return True - - PRETTY.filtered_path(info.path, "Local file had newer or equal modification time") - return False diff --git a/PFERD/ilias/downloader.py b/PFERD/ilias/downloader.py index 9a6c785..fcae6ec 100644 --- a/PFERD/ilias/downloader.py +++ b/PFERD/ilias/downloader.py @@ -1,9 +1,10 @@ """Contains a downloader for ILIAS.""" import datetime +import logging from dataclasses import dataclass from pathlib import Path -from typing import List, Optional +from typing import Callable, List, Optional import bs4 import requests @@ -11,9 +12,12 @@ import requests from ..organizer import Organizer from ..tmp_dir import TmpDir from ..transform import Transformable -from ..utils import soupify, stream_to_path +from ..utils import PrettyLogger, soupify, stream_to_path from .authenticators import IliasAuthenticator +LOGGER = logging.getLogger(__name__) +PRETTY = PrettyLogger(LOGGER) + class ContentTypeException(Exception): """Thrown when the content type of the ilias element can not be handled.""" @@ -30,7 +34,36 @@ class IliasDownloadInfo(Transformable): # parameters: Dict[str, Any] = field(default_factory=dict) +IliasDownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool] + + +def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool: + # pylint: disable=unused-argument + """ + Accepts everything. + """ + return True + + +def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool: + """ + Accepts new files or files with a more recent modification date. + """ + resolved_file = organizer.resolve(info.path) + if not resolved_file.exists() or info.modification_date is None: + return True + resolved_mod_time_seconds = resolved_file.stat().st_mtime + + # Download if the info is newer + if info.modification_date.timestamp() > resolved_mod_time_seconds: + return True + + PRETTY.filtered_path(info.path, "Local file had newer or equal modification time") + return False + + class IliasDownloader: + # pylint: disable=too-many-arguments """A downloader for ILIAS.""" def __init__( @@ -39,6 +72,7 @@ class IliasDownloader: organizer: Organizer, session: requests.Session, authenticator: IliasAuthenticator, + strategy: IliasDownloadStrategy, ): """ Create a new IliasDownloader. @@ -48,6 +82,7 @@ class IliasDownloader: self._organizer = organizer self._session = session self._authenticator = authenticator + self._strategy = strategy def download_all(self, infos: List[IliasDownloadInfo]) -> None: """ @@ -64,6 +99,10 @@ class IliasDownloader: Retries authentication until eternity if it could not fetch the file. """ + if not self._strategy(self._organizer, info): + self._organizer.mark(info.path) + return + tmp_file = self._tmp_dir.new_path() while not self._try_download(info, tmp_file): diff --git a/PFERD/pferd.py b/PFERD/pferd.py index 0b8f2ea..4fee3a2 100644 --- a/PFERD/pferd.py +++ b/PFERD/pferd.py @@ -8,9 +8,8 @@ from typing import Optional, Union from .cookie_jar import CookieJar from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter, - IliasDownloader, KitShibbolethAuthenticator) -from .ilias.download_strategies import (DownloadStrategy, - download_modified_or_new) + IliasDownloader, IliasDownloadStrategy, + KitShibbolethAuthenticator, download_modified_or_new) from .location import Location from .organizer import Organizer from .tmp_dir import TmpDir @@ -45,7 +44,7 @@ class Pferd(Location): cookies: Optional[Path], dir_filter: IliasDirectoryFilter, transform: Transform, - download_strategy: DownloadStrategy, + download_strategy: IliasDownloadStrategy, ) -> None: # pylint: disable=too-many-locals cookie_jar = CookieJar(cookies) @@ -54,17 +53,12 @@ class Pferd(Location): organizer = Organizer(self.resolve(Path(target))) crawler = IliasCrawler(base_url, course_id, session, authenticator, dir_filter) - downloader = IliasDownloader(tmp_dir, organizer, session, authenticator) + downloader = IliasDownloader(tmp_dir, organizer, session, authenticator, download_strategy) cookie_jar.load_cookies() info = crawler.crawl() cookie_jar.save_cookies() - downloader.download_all( - [ - info for info in apply_transform(transform, info) - if download_strategy(organizer, info) - ] - ) + downloader.download_all(apply_transform(transform, info)) cookie_jar.save_cookies() organizer.cleanup() @@ -77,7 +71,7 @@ class Pferd(Location): cookies: Optional[Path] = None, username: Optional[str] = None, password: Optional[str] = None, - download_strategy: DownloadStrategy = download_modified_or_new, + download_strategy: IliasDownloadStrategy = download_modified_or_new, ) -> None: """ Synchronizes a folder with the ILIAS instance of the KIT.