Move download strategies to downloader

Also fixes an issue where the downloader didn't mark files that were not
downloaded due to the strategy used.
This commit is contained in:
Joscha 2020-04-24 14:26:20 +00:00
parent 4d32f863bc
commit 5b929f09a2
4 changed files with 50 additions and 56 deletions

View File

@ -4,5 +4,6 @@ Synchronizing files from ILIAS instances (https://www.ilias.de/).
from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
from .crawler import IliasCrawler, IliasDirectoryFilter
from .download_strategies import *
from .downloader import IliasDownloader
from .downloader import (IliasDownloader, IliasDownloadInfo,
IliasDownloadStrategy, download_everything,
download_modified_or_new)

View File

@ -1,40 +0,0 @@
"""
Contains a few default strategies for limiting the amount of downloaded files.
"""
import logging
from typing import Callable
from ..organizer import Organizer
from ..utils import PrettyLogger
from .downloader import IliasDownloadInfo
LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER)
DownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool:
# pylint: disable=unused-argument
"""
Accepts everything.
"""
return True
def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool:
"""
Accepts new files or files with a more recent modification date.
"""
resolved_file = organizer.resolve(info.path)
if not resolved_file.exists() or info.modification_date is None:
return True
resolved_mod_time_seconds = resolved_file.stat().st_mtime
# Download if the info is newer
if info.modification_date.timestamp() > resolved_mod_time_seconds:
return True
PRETTY.filtered_path(info.path, "Local file had newer or equal modification time")
return False

View File

@ -1,9 +1,10 @@
"""Contains a downloader for ILIAS."""
import datetime
import logging
from dataclasses import dataclass
from pathlib import Path
from typing import List, Optional
from typing import Callable, List, Optional
import bs4
import requests
@ -11,9 +12,12 @@ import requests
from ..organizer import Organizer
from ..tmp_dir import TmpDir
from ..transform import Transformable
from ..utils import soupify, stream_to_path
from ..utils import PrettyLogger, soupify, stream_to_path
from .authenticators import IliasAuthenticator
LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER)
class ContentTypeException(Exception):
"""Thrown when the content type of the ilias element can not be handled."""
@ -30,7 +34,36 @@ class IliasDownloadInfo(Transformable):
# parameters: Dict[str, Any] = field(default_factory=dict)
IliasDownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool:
# pylint: disable=unused-argument
"""
Accepts everything.
"""
return True
def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool:
"""
Accepts new files or files with a more recent modification date.
"""
resolved_file = organizer.resolve(info.path)
if not resolved_file.exists() or info.modification_date is None:
return True
resolved_mod_time_seconds = resolved_file.stat().st_mtime
# Download if the info is newer
if info.modification_date.timestamp() > resolved_mod_time_seconds:
return True
PRETTY.filtered_path(info.path, "Local file had newer or equal modification time")
return False
class IliasDownloader:
# pylint: disable=too-many-arguments
"""A downloader for ILIAS."""
def __init__(
@ -39,6 +72,7 @@ class IliasDownloader:
organizer: Organizer,
session: requests.Session,
authenticator: IliasAuthenticator,
strategy: IliasDownloadStrategy,
):
"""
Create a new IliasDownloader.
@ -48,6 +82,7 @@ class IliasDownloader:
self._organizer = organizer
self._session = session
self._authenticator = authenticator
self._strategy = strategy
def download_all(self, infos: List[IliasDownloadInfo]) -> None:
"""
@ -64,6 +99,10 @@ class IliasDownloader:
Retries authentication until eternity if it could not fetch the file.
"""
if not self._strategy(self._organizer, info):
self._organizer.mark(info.path)
return
tmp_file = self._tmp_dir.new_path()
while not self._try_download(info, tmp_file):

View File

@ -8,9 +8,8 @@ from typing import Optional, Union
from .cookie_jar import CookieJar
from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
IliasDownloader, KitShibbolethAuthenticator)
from .ilias.download_strategies import (DownloadStrategy,
download_modified_or_new)
IliasDownloader, IliasDownloadStrategy,
KitShibbolethAuthenticator, download_modified_or_new)
from .location import Location
from .organizer import Organizer
from .tmp_dir import TmpDir
@ -45,7 +44,7 @@ class Pferd(Location):
cookies: Optional[Path],
dir_filter: IliasDirectoryFilter,
transform: Transform,
download_strategy: DownloadStrategy,
download_strategy: IliasDownloadStrategy,
) -> None:
# pylint: disable=too-many-locals
cookie_jar = CookieJar(cookies)
@ -54,17 +53,12 @@ class Pferd(Location):
organizer = Organizer(self.resolve(Path(target)))
crawler = IliasCrawler(base_url, course_id, session, authenticator, dir_filter)
downloader = IliasDownloader(tmp_dir, organizer, session, authenticator)
downloader = IliasDownloader(tmp_dir, organizer, session, authenticator, download_strategy)
cookie_jar.load_cookies()
info = crawler.crawl()
cookie_jar.save_cookies()
downloader.download_all(
[
info for info in apply_transform(transform, info)
if download_strategy(organizer, info)
]
)
downloader.download_all(apply_transform(transform, info))
cookie_jar.save_cookies()
organizer.cleanup()
@ -77,7 +71,7 @@ class Pferd(Location):
cookies: Optional[Path] = None,
username: Optional[str] = None,
password: Optional[str] = None,
download_strategy: DownloadStrategy = download_modified_or_new,
download_strategy: IliasDownloadStrategy = download_modified_or_new,
) -> None:
"""
Synchronizes a folder with the ILIAS instance of the KIT.