Add download strategies to save bandwith

Only download files that are newer than the local version.
This commit is contained in:
I-Al-Istannen 2020-04-23 18:29:20 +02:00
parent 13bc78c889
commit 076b8c5a1f
4 changed files with 93 additions and 4 deletions

View File

@ -7,6 +7,7 @@ more complex configuration, you need to import the other submodules manually.
import logging
from .download_strategies import *
from .pferd import Pferd
STYLE = "{"

View File

@ -0,0 +1,80 @@
"""
Contains a few default strategies for limiting the amount of downloaded files.
"""
import datetime
import logging
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Optional
from typing_extensions import Protocol, runtime_checkable
from .organizer import Organizer
from .utils import PrettyLogger
LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER)
@runtime_checkable
class DownloadInfo(Protocol):
# pylint: disable=too-few-public-methods
"""
This class describes some minimal information about a file you can download.
"""
@property
def path(self) -> Path:
"""
Returns the path.
"""
@property
def modification_date(self) -> Optional[datetime.datetime]:
"""
Returns the modification date or None if not known.
"""
class DownloadStrategy(ABC):
# pylint: disable=too-few-public-methods
"""
A strategy deciding whether to download a given info.
"""
@abstractmethod
def should_download(self, organizer: Organizer, info: DownloadInfo) -> bool:
"""
Decides wether a given file should be downloaded.
"""
class DownloadEverythingStrategy(DownloadStrategy):
# pylint: disable=too-few-public-methods
"""
A strategy that redownloads everything.
"""
def should_download(self, organizer: Organizer, info: DownloadInfo) -> bool:
return True
class DownloadNewOrModified(DownloadStrategy):
# pylint: disable=too-few-public-methods
"""
A strategy that only downloads changed or new files.
"""
def should_download(self, organizer: Organizer, info: DownloadInfo) -> bool:
resolved_file = organizer.resolve(info.path)
if not resolved_file.exists() or info.modification_date is None:
return True
resolved_mod_time_seconds = resolved_file.stat().st_mtime
# Download if the info is newer
if info.modification_date.timestamp() > resolved_mod_time_seconds:
return True
PRETTY.filtered_path(info.path, "Local file had newer or equal modification time")
return False

View File

@ -3,7 +3,7 @@
import datetime
from dataclasses import dataclass
from pathlib import Path
from typing import List
from typing import List, Optional
import bs4
import requests
@ -26,7 +26,7 @@ class IliasDownloadInfo(Transformable):
"""
url: str
modification_date: datetime.datetime
modification_date: Optional[datetime.datetime]
# parameters: Dict[str, Any] = field(default_factory=dict)

View File

@ -2,6 +2,7 @@ from pathlib import Path
from typing import Optional
from .cookie_jar import CookieJar
from .download_strategies import DownloadEverythingStrategy, DownloadStrategy
from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
IliasDownloader, KitShibbolethAuthenticator)
from .organizer import Organizer
@ -9,7 +10,6 @@ from .tmp_dir import TmpDir
from .transform import Transform, apply_transform
from .utils import Location
# TODO save known-good cookies as soon as possible
# TODO print synchronizer name before beginning synchronization
@ -31,6 +31,7 @@ class Pferd(Location):
cookies: Optional[Path],
dir_filter: IliasDirectoryFilter,
transform: Transform,
download_strategy: DownloadStrategy,
) -> None:
cookie_jar = CookieJar(cookies)
session = cookie_jar.create_session()
@ -43,7 +44,12 @@ class Pferd(Location):
cookie_jar.load_cookies()
info = crawler.crawl()
cookie_jar.save_cookies()
downloader.download_all(apply_transform(transform, info))
downloader.download_all(
[
info for info in apply_transform(transform, info)
if download_strategy.should_download(organizer, info)
]
)
cookie_jar.save_cookies()
def ilias_kit(
@ -55,6 +61,7 @@ class Pferd(Location):
cookies: Optional[Path] = None,
username: Optional[str] = None,
password: Optional[str] = None,
download_strategy: DownloadStrategy = DownloadEverythingStrategy(),
) -> None:
# This authenticator only works with the KIT ilias instance.
authenticator = KitShibbolethAuthenticator(username=username, password=password)
@ -66,4 +73,5 @@ class Pferd(Location):
cookies=cookies,
dir_filter=dir_filter,
transform=transform,
download_strategy=download_strategy,
)