Move download strategies to downloader

Also fixes an issue where the downloader didn't mark files that were not
downloaded due to the strategy used.
This commit is contained in:
Joscha 2020-04-24 14:26:20 +00:00
parent 4d32f863bc
commit 5b929f09a2
4 changed files with 50 additions and 56 deletions

View File

@ -4,5 +4,6 @@ Synchronizing files from ILIAS instances (https://www.ilias.de/).
from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
from .crawler import IliasCrawler, IliasDirectoryFilter from .crawler import IliasCrawler, IliasDirectoryFilter
from .download_strategies import * from .downloader import (IliasDownloader, IliasDownloadInfo,
from .downloader import IliasDownloader IliasDownloadStrategy, download_everything,
download_modified_or_new)

View File

@ -1,40 +0,0 @@
"""
Contains a few default strategies for limiting the amount of downloaded files.
"""
import logging
from typing import Callable
from ..organizer import Organizer
from ..utils import PrettyLogger
from .downloader import IliasDownloadInfo
LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER)
DownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool:
# pylint: disable=unused-argument
"""
Accepts everything.
"""
return True
def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool:
"""
Accepts new files or files with a more recent modification date.
"""
resolved_file = organizer.resolve(info.path)
if not resolved_file.exists() or info.modification_date is None:
return True
resolved_mod_time_seconds = resolved_file.stat().st_mtime
# Download if the info is newer
if info.modification_date.timestamp() > resolved_mod_time_seconds:
return True
PRETTY.filtered_path(info.path, "Local file had newer or equal modification time")
return False

View File

@ -1,9 +1,10 @@
"""Contains a downloader for ILIAS.""" """Contains a downloader for ILIAS."""
import datetime import datetime
import logging
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import List, Optional from typing import Callable, List, Optional
import bs4 import bs4
import requests import requests
@ -11,9 +12,12 @@ import requests
from ..organizer import Organizer from ..organizer import Organizer
from ..tmp_dir import TmpDir from ..tmp_dir import TmpDir
from ..transform import Transformable from ..transform import Transformable
from ..utils import soupify, stream_to_path from ..utils import PrettyLogger, soupify, stream_to_path
from .authenticators import IliasAuthenticator from .authenticators import IliasAuthenticator
LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER)
class ContentTypeException(Exception): class ContentTypeException(Exception):
"""Thrown when the content type of the ilias element can not be handled.""" """Thrown when the content type of the ilias element can not be handled."""
@ -30,7 +34,36 @@ class IliasDownloadInfo(Transformable):
# parameters: Dict[str, Any] = field(default_factory=dict) # parameters: Dict[str, Any] = field(default_factory=dict)
IliasDownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool:
# pylint: disable=unused-argument
"""
Accepts everything.
"""
return True
def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool:
"""
Accepts new files or files with a more recent modification date.
"""
resolved_file = organizer.resolve(info.path)
if not resolved_file.exists() or info.modification_date is None:
return True
resolved_mod_time_seconds = resolved_file.stat().st_mtime
# Download if the info is newer
if info.modification_date.timestamp() > resolved_mod_time_seconds:
return True
PRETTY.filtered_path(info.path, "Local file had newer or equal modification time")
return False
class IliasDownloader: class IliasDownloader:
# pylint: disable=too-many-arguments
"""A downloader for ILIAS.""" """A downloader for ILIAS."""
def __init__( def __init__(
@ -39,6 +72,7 @@ class IliasDownloader:
organizer: Organizer, organizer: Organizer,
session: requests.Session, session: requests.Session,
authenticator: IliasAuthenticator, authenticator: IliasAuthenticator,
strategy: IliasDownloadStrategy,
): ):
""" """
Create a new IliasDownloader. Create a new IliasDownloader.
@ -48,6 +82,7 @@ class IliasDownloader:
self._organizer = organizer self._organizer = organizer
self._session = session self._session = session
self._authenticator = authenticator self._authenticator = authenticator
self._strategy = strategy
def download_all(self, infos: List[IliasDownloadInfo]) -> None: def download_all(self, infos: List[IliasDownloadInfo]) -> None:
""" """
@ -64,6 +99,10 @@ class IliasDownloader:
Retries authentication until eternity if it could not fetch the file. Retries authentication until eternity if it could not fetch the file.
""" """
if not self._strategy(self._organizer, info):
self._organizer.mark(info.path)
return
tmp_file = self._tmp_dir.new_path() tmp_file = self._tmp_dir.new_path()
while not self._try_download(info, tmp_file): while not self._try_download(info, tmp_file):

View File

@ -8,9 +8,8 @@ from typing import Optional, Union
from .cookie_jar import CookieJar from .cookie_jar import CookieJar
from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter, from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
IliasDownloader, KitShibbolethAuthenticator) IliasDownloader, IliasDownloadStrategy,
from .ilias.download_strategies import (DownloadStrategy, KitShibbolethAuthenticator, download_modified_or_new)
download_modified_or_new)
from .location import Location from .location import Location
from .organizer import Organizer from .organizer import Organizer
from .tmp_dir import TmpDir from .tmp_dir import TmpDir
@ -45,7 +44,7 @@ class Pferd(Location):
cookies: Optional[Path], cookies: Optional[Path],
dir_filter: IliasDirectoryFilter, dir_filter: IliasDirectoryFilter,
transform: Transform, transform: Transform,
download_strategy: DownloadStrategy, download_strategy: IliasDownloadStrategy,
) -> None: ) -> None:
# pylint: disable=too-many-locals # pylint: disable=too-many-locals
cookie_jar = CookieJar(cookies) cookie_jar = CookieJar(cookies)
@ -54,17 +53,12 @@ class Pferd(Location):
organizer = Organizer(self.resolve(Path(target))) organizer = Organizer(self.resolve(Path(target)))
crawler = IliasCrawler(base_url, course_id, session, authenticator, dir_filter) crawler = IliasCrawler(base_url, course_id, session, authenticator, dir_filter)
downloader = IliasDownloader(tmp_dir, organizer, session, authenticator) downloader = IliasDownloader(tmp_dir, organizer, session, authenticator, download_strategy)
cookie_jar.load_cookies() cookie_jar.load_cookies()
info = crawler.crawl() info = crawler.crawl()
cookie_jar.save_cookies() cookie_jar.save_cookies()
downloader.download_all( downloader.download_all(apply_transform(transform, info))
[
info for info in apply_transform(transform, info)
if download_strategy(organizer, info)
]
)
cookie_jar.save_cookies() cookie_jar.save_cookies()
organizer.cleanup() organizer.cleanup()
@ -77,7 +71,7 @@ class Pferd(Location):
cookies: Optional[Path] = None, cookies: Optional[Path] = None,
username: Optional[str] = None, username: Optional[str] = None,
password: Optional[str] = None, password: Optional[str] = None,
download_strategy: DownloadStrategy = download_modified_or_new, download_strategy: IliasDownloadStrategy = download_modified_or_new,
) -> None: ) -> None:
""" """
Synchronizes a folder with the ILIAS instance of the KIT. Synchronizes a folder with the ILIAS instance of the KIT.