mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Move download strategies to downloader
Also fixes an issue where the downloader didn't mark files that were not downloaded due to the strategy used.
This commit is contained in:
parent
4d32f863bc
commit
5b929f09a2
@ -4,5 +4,6 @@ Synchronizing files from ILIAS instances (https://www.ilias.de/).
|
||||
|
||||
from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
|
||||
from .crawler import IliasCrawler, IliasDirectoryFilter
|
||||
from .download_strategies import *
|
||||
from .downloader import IliasDownloader
|
||||
from .downloader import (IliasDownloader, IliasDownloadInfo,
|
||||
IliasDownloadStrategy, download_everything,
|
||||
download_modified_or_new)
|
||||
|
@ -1,40 +0,0 @@
|
||||
"""
|
||||
Contains a few default strategies for limiting the amount of downloaded files.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Callable
|
||||
|
||||
from ..organizer import Organizer
|
||||
from ..utils import PrettyLogger
|
||||
from .downloader import IliasDownloadInfo
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
PRETTY = PrettyLogger(LOGGER)
|
||||
|
||||
DownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
|
||||
|
||||
|
||||
def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool:
|
||||
# pylint: disable=unused-argument
|
||||
"""
|
||||
Accepts everything.
|
||||
"""
|
||||
return True
|
||||
|
||||
|
||||
def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool:
|
||||
"""
|
||||
Accepts new files or files with a more recent modification date.
|
||||
"""
|
||||
resolved_file = organizer.resolve(info.path)
|
||||
if not resolved_file.exists() or info.modification_date is None:
|
||||
return True
|
||||
resolved_mod_time_seconds = resolved_file.stat().st_mtime
|
||||
|
||||
# Download if the info is newer
|
||||
if info.modification_date.timestamp() > resolved_mod_time_seconds:
|
||||
return True
|
||||
|
||||
PRETTY.filtered_path(info.path, "Local file had newer or equal modification time")
|
||||
return False
|
@ -1,9 +1,10 @@
|
||||
"""Contains a downloader for ILIAS."""
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
import bs4
|
||||
import requests
|
||||
@ -11,9 +12,12 @@ import requests
|
||||
from ..organizer import Organizer
|
||||
from ..tmp_dir import TmpDir
|
||||
from ..transform import Transformable
|
||||
from ..utils import soupify, stream_to_path
|
||||
from ..utils import PrettyLogger, soupify, stream_to_path
|
||||
from .authenticators import IliasAuthenticator
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
PRETTY = PrettyLogger(LOGGER)
|
||||
|
||||
|
||||
class ContentTypeException(Exception):
|
||||
"""Thrown when the content type of the ilias element can not be handled."""
|
||||
@ -30,7 +34,36 @@ class IliasDownloadInfo(Transformable):
|
||||
# parameters: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
IliasDownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
|
||||
|
||||
|
||||
def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool:
|
||||
# pylint: disable=unused-argument
|
||||
"""
|
||||
Accepts everything.
|
||||
"""
|
||||
return True
|
||||
|
||||
|
||||
def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool:
|
||||
"""
|
||||
Accepts new files or files with a more recent modification date.
|
||||
"""
|
||||
resolved_file = organizer.resolve(info.path)
|
||||
if not resolved_file.exists() or info.modification_date is None:
|
||||
return True
|
||||
resolved_mod_time_seconds = resolved_file.stat().st_mtime
|
||||
|
||||
# Download if the info is newer
|
||||
if info.modification_date.timestamp() > resolved_mod_time_seconds:
|
||||
return True
|
||||
|
||||
PRETTY.filtered_path(info.path, "Local file had newer or equal modification time")
|
||||
return False
|
||||
|
||||
|
||||
class IliasDownloader:
|
||||
# pylint: disable=too-many-arguments
|
||||
"""A downloader for ILIAS."""
|
||||
|
||||
def __init__(
|
||||
@ -39,6 +72,7 @@ class IliasDownloader:
|
||||
organizer: Organizer,
|
||||
session: requests.Session,
|
||||
authenticator: IliasAuthenticator,
|
||||
strategy: IliasDownloadStrategy,
|
||||
):
|
||||
"""
|
||||
Create a new IliasDownloader.
|
||||
@ -48,6 +82,7 @@ class IliasDownloader:
|
||||
self._organizer = organizer
|
||||
self._session = session
|
||||
self._authenticator = authenticator
|
||||
self._strategy = strategy
|
||||
|
||||
def download_all(self, infos: List[IliasDownloadInfo]) -> None:
|
||||
"""
|
||||
@ -64,6 +99,10 @@ class IliasDownloader:
|
||||
Retries authentication until eternity if it could not fetch the file.
|
||||
"""
|
||||
|
||||
if not self._strategy(self._organizer, info):
|
||||
self._organizer.mark(info.path)
|
||||
return
|
||||
|
||||
tmp_file = self._tmp_dir.new_path()
|
||||
|
||||
while not self._try_download(info, tmp_file):
|
||||
|
@ -8,9 +8,8 @@ from typing import Optional, Union
|
||||
|
||||
from .cookie_jar import CookieJar
|
||||
from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
|
||||
IliasDownloader, KitShibbolethAuthenticator)
|
||||
from .ilias.download_strategies import (DownloadStrategy,
|
||||
download_modified_or_new)
|
||||
IliasDownloader, IliasDownloadStrategy,
|
||||
KitShibbolethAuthenticator, download_modified_or_new)
|
||||
from .location import Location
|
||||
from .organizer import Organizer
|
||||
from .tmp_dir import TmpDir
|
||||
@ -45,7 +44,7 @@ class Pferd(Location):
|
||||
cookies: Optional[Path],
|
||||
dir_filter: IliasDirectoryFilter,
|
||||
transform: Transform,
|
||||
download_strategy: DownloadStrategy,
|
||||
download_strategy: IliasDownloadStrategy,
|
||||
) -> None:
|
||||
# pylint: disable=too-many-locals
|
||||
cookie_jar = CookieJar(cookies)
|
||||
@ -54,17 +53,12 @@ class Pferd(Location):
|
||||
organizer = Organizer(self.resolve(Path(target)))
|
||||
|
||||
crawler = IliasCrawler(base_url, course_id, session, authenticator, dir_filter)
|
||||
downloader = IliasDownloader(tmp_dir, organizer, session, authenticator)
|
||||
downloader = IliasDownloader(tmp_dir, organizer, session, authenticator, download_strategy)
|
||||
|
||||
cookie_jar.load_cookies()
|
||||
info = crawler.crawl()
|
||||
cookie_jar.save_cookies()
|
||||
downloader.download_all(
|
||||
[
|
||||
info for info in apply_transform(transform, info)
|
||||
if download_strategy(organizer, info)
|
||||
]
|
||||
)
|
||||
downloader.download_all(apply_transform(transform, info))
|
||||
cookie_jar.save_cookies()
|
||||
organizer.cleanup()
|
||||
|
||||
@ -77,7 +71,7 @@ class Pferd(Location):
|
||||
cookies: Optional[Path] = None,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
download_strategy: DownloadStrategy = download_modified_or_new,
|
||||
download_strategy: IliasDownloadStrategy = download_modified_or_new,
|
||||
) -> None:
|
||||
"""
|
||||
Synchronizes a folder with the ILIAS instance of the KIT.
|
||||
|
Loading…
Reference in New Issue
Block a user