mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Move download strategies to downloader
Also fixes an issue where the downloader didn't mark files that were not downloaded due to the strategy used.
This commit is contained in:
parent
4d32f863bc
commit
5b929f09a2
@ -4,5 +4,6 @@ Synchronizing files from ILIAS instances (https://www.ilias.de/).
|
|||||||
|
|
||||||
from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
|
from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
|
||||||
from .crawler import IliasCrawler, IliasDirectoryFilter
|
from .crawler import IliasCrawler, IliasDirectoryFilter
|
||||||
from .download_strategies import *
|
from .downloader import (IliasDownloader, IliasDownloadInfo,
|
||||||
from .downloader import IliasDownloader
|
IliasDownloadStrategy, download_everything,
|
||||||
|
download_modified_or_new)
|
||||||
|
@ -1,40 +0,0 @@
|
|||||||
"""
|
|
||||||
Contains a few default strategies for limiting the amount of downloaded files.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Callable
|
|
||||||
|
|
||||||
from ..organizer import Organizer
|
|
||||||
from ..utils import PrettyLogger
|
|
||||||
from .downloader import IliasDownloadInfo
|
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
|
||||||
PRETTY = PrettyLogger(LOGGER)
|
|
||||||
|
|
||||||
DownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
|
|
||||||
|
|
||||||
|
|
||||||
def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool:
|
|
||||||
# pylint: disable=unused-argument
|
|
||||||
"""
|
|
||||||
Accepts everything.
|
|
||||||
"""
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool:
|
|
||||||
"""
|
|
||||||
Accepts new files or files with a more recent modification date.
|
|
||||||
"""
|
|
||||||
resolved_file = organizer.resolve(info.path)
|
|
||||||
if not resolved_file.exists() or info.modification_date is None:
|
|
||||||
return True
|
|
||||||
resolved_mod_time_seconds = resolved_file.stat().st_mtime
|
|
||||||
|
|
||||||
# Download if the info is newer
|
|
||||||
if info.modification_date.timestamp() > resolved_mod_time_seconds:
|
|
||||||
return True
|
|
||||||
|
|
||||||
PRETTY.filtered_path(info.path, "Local file had newer or equal modification time")
|
|
||||||
return False
|
|
@ -1,9 +1,10 @@
|
|||||||
"""Contains a downloader for ILIAS."""
|
"""Contains a downloader for ILIAS."""
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
import logging
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional
|
from typing import Callable, List, Optional
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
import requests
|
import requests
|
||||||
@ -11,9 +12,12 @@ import requests
|
|||||||
from ..organizer import Organizer
|
from ..organizer import Organizer
|
||||||
from ..tmp_dir import TmpDir
|
from ..tmp_dir import TmpDir
|
||||||
from ..transform import Transformable
|
from ..transform import Transformable
|
||||||
from ..utils import soupify, stream_to_path
|
from ..utils import PrettyLogger, soupify, stream_to_path
|
||||||
from .authenticators import IliasAuthenticator
|
from .authenticators import IliasAuthenticator
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
PRETTY = PrettyLogger(LOGGER)
|
||||||
|
|
||||||
|
|
||||||
class ContentTypeException(Exception):
|
class ContentTypeException(Exception):
|
||||||
"""Thrown when the content type of the ilias element can not be handled."""
|
"""Thrown when the content type of the ilias element can not be handled."""
|
||||||
@ -30,7 +34,36 @@ class IliasDownloadInfo(Transformable):
|
|||||||
# parameters: Dict[str, Any] = field(default_factory=dict)
|
# parameters: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
IliasDownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
|
||||||
|
|
||||||
|
|
||||||
|
def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool:
|
||||||
|
# pylint: disable=unused-argument
|
||||||
|
"""
|
||||||
|
Accepts everything.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool:
|
||||||
|
"""
|
||||||
|
Accepts new files or files with a more recent modification date.
|
||||||
|
"""
|
||||||
|
resolved_file = organizer.resolve(info.path)
|
||||||
|
if not resolved_file.exists() or info.modification_date is None:
|
||||||
|
return True
|
||||||
|
resolved_mod_time_seconds = resolved_file.stat().st_mtime
|
||||||
|
|
||||||
|
# Download if the info is newer
|
||||||
|
if info.modification_date.timestamp() > resolved_mod_time_seconds:
|
||||||
|
return True
|
||||||
|
|
||||||
|
PRETTY.filtered_path(info.path, "Local file had newer or equal modification time")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class IliasDownloader:
|
class IliasDownloader:
|
||||||
|
# pylint: disable=too-many-arguments
|
||||||
"""A downloader for ILIAS."""
|
"""A downloader for ILIAS."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -39,6 +72,7 @@ class IliasDownloader:
|
|||||||
organizer: Organizer,
|
organizer: Organizer,
|
||||||
session: requests.Session,
|
session: requests.Session,
|
||||||
authenticator: IliasAuthenticator,
|
authenticator: IliasAuthenticator,
|
||||||
|
strategy: IliasDownloadStrategy,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a new IliasDownloader.
|
Create a new IliasDownloader.
|
||||||
@ -48,6 +82,7 @@ class IliasDownloader:
|
|||||||
self._organizer = organizer
|
self._organizer = organizer
|
||||||
self._session = session
|
self._session = session
|
||||||
self._authenticator = authenticator
|
self._authenticator = authenticator
|
||||||
|
self._strategy = strategy
|
||||||
|
|
||||||
def download_all(self, infos: List[IliasDownloadInfo]) -> None:
|
def download_all(self, infos: List[IliasDownloadInfo]) -> None:
|
||||||
"""
|
"""
|
||||||
@ -64,6 +99,10 @@ class IliasDownloader:
|
|||||||
Retries authentication until eternity if it could not fetch the file.
|
Retries authentication until eternity if it could not fetch the file.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if not self._strategy(self._organizer, info):
|
||||||
|
self._organizer.mark(info.path)
|
||||||
|
return
|
||||||
|
|
||||||
tmp_file = self._tmp_dir.new_path()
|
tmp_file = self._tmp_dir.new_path()
|
||||||
|
|
||||||
while not self._try_download(info, tmp_file):
|
while not self._try_download(info, tmp_file):
|
||||||
|
@ -8,9 +8,8 @@ from typing import Optional, Union
|
|||||||
|
|
||||||
from .cookie_jar import CookieJar
|
from .cookie_jar import CookieJar
|
||||||
from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
|
from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
|
||||||
IliasDownloader, KitShibbolethAuthenticator)
|
IliasDownloader, IliasDownloadStrategy,
|
||||||
from .ilias.download_strategies import (DownloadStrategy,
|
KitShibbolethAuthenticator, download_modified_or_new)
|
||||||
download_modified_or_new)
|
|
||||||
from .location import Location
|
from .location import Location
|
||||||
from .organizer import Organizer
|
from .organizer import Organizer
|
||||||
from .tmp_dir import TmpDir
|
from .tmp_dir import TmpDir
|
||||||
@ -45,7 +44,7 @@ class Pferd(Location):
|
|||||||
cookies: Optional[Path],
|
cookies: Optional[Path],
|
||||||
dir_filter: IliasDirectoryFilter,
|
dir_filter: IliasDirectoryFilter,
|
||||||
transform: Transform,
|
transform: Transform,
|
||||||
download_strategy: DownloadStrategy,
|
download_strategy: IliasDownloadStrategy,
|
||||||
) -> None:
|
) -> None:
|
||||||
# pylint: disable=too-many-locals
|
# pylint: disable=too-many-locals
|
||||||
cookie_jar = CookieJar(cookies)
|
cookie_jar = CookieJar(cookies)
|
||||||
@ -54,17 +53,12 @@ class Pferd(Location):
|
|||||||
organizer = Organizer(self.resolve(Path(target)))
|
organizer = Organizer(self.resolve(Path(target)))
|
||||||
|
|
||||||
crawler = IliasCrawler(base_url, course_id, session, authenticator, dir_filter)
|
crawler = IliasCrawler(base_url, course_id, session, authenticator, dir_filter)
|
||||||
downloader = IliasDownloader(tmp_dir, organizer, session, authenticator)
|
downloader = IliasDownloader(tmp_dir, organizer, session, authenticator, download_strategy)
|
||||||
|
|
||||||
cookie_jar.load_cookies()
|
cookie_jar.load_cookies()
|
||||||
info = crawler.crawl()
|
info = crawler.crawl()
|
||||||
cookie_jar.save_cookies()
|
cookie_jar.save_cookies()
|
||||||
downloader.download_all(
|
downloader.download_all(apply_transform(transform, info))
|
||||||
[
|
|
||||||
info for info in apply_transform(transform, info)
|
|
||||||
if download_strategy(organizer, info)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
cookie_jar.save_cookies()
|
cookie_jar.save_cookies()
|
||||||
organizer.cleanup()
|
organizer.cleanup()
|
||||||
|
|
||||||
@ -77,7 +71,7 @@ class Pferd(Location):
|
|||||||
cookies: Optional[Path] = None,
|
cookies: Optional[Path] = None,
|
||||||
username: Optional[str] = None,
|
username: Optional[str] = None,
|
||||||
password: Optional[str] = None,
|
password: Optional[str] = None,
|
||||||
download_strategy: DownloadStrategy = download_modified_or_new,
|
download_strategy: IliasDownloadStrategy = download_modified_or_new,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Synchronizes a folder with the ILIAS instance of the KIT.
|
Synchronizes a folder with the ILIAS instance of the KIT.
|
||||||
|
Loading…
Reference in New Issue
Block a user