mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Add download strategies to save bandwith
Only download files that are newer than the local version.
This commit is contained in:
parent
13bc78c889
commit
076b8c5a1f
@ -7,6 +7,7 @@ more complex configuration, you need to import the other submodules manually.
|
||||
|
||||
import logging
|
||||
|
||||
from .download_strategies import *
|
||||
from .pferd import Pferd
|
||||
|
||||
STYLE = "{"
|
||||
|
80
PFERD/download_strategies.py
Normal file
80
PFERD/download_strategies.py
Normal file
@ -0,0 +1,80 @@
|
||||
"""
|
||||
Contains a few default strategies for limiting the amount of downloaded files.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from typing_extensions import Protocol, runtime_checkable
|
||||
|
||||
from .organizer import Organizer
|
||||
from .utils import PrettyLogger
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
PRETTY = PrettyLogger(LOGGER)
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class DownloadInfo(Protocol):
|
||||
# pylint: disable=too-few-public-methods
|
||||
"""
|
||||
This class describes some minimal information about a file you can download.
|
||||
"""
|
||||
|
||||
@property
|
||||
def path(self) -> Path:
|
||||
"""
|
||||
Returns the path.
|
||||
"""
|
||||
|
||||
@property
|
||||
def modification_date(self) -> Optional[datetime.datetime]:
|
||||
"""
|
||||
Returns the modification date or None if not known.
|
||||
"""
|
||||
|
||||
|
||||
class DownloadStrategy(ABC):
|
||||
# pylint: disable=too-few-public-methods
|
||||
"""
|
||||
A strategy deciding whether to download a given info.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def should_download(self, organizer: Organizer, info: DownloadInfo) -> bool:
|
||||
"""
|
||||
Decides wether a given file should be downloaded.
|
||||
"""
|
||||
|
||||
|
||||
class DownloadEverythingStrategy(DownloadStrategy):
|
||||
# pylint: disable=too-few-public-methods
|
||||
"""
|
||||
A strategy that redownloads everything.
|
||||
"""
|
||||
|
||||
def should_download(self, organizer: Organizer, info: DownloadInfo) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
class DownloadNewOrModified(DownloadStrategy):
|
||||
# pylint: disable=too-few-public-methods
|
||||
"""
|
||||
A strategy that only downloads changed or new files.
|
||||
"""
|
||||
|
||||
def should_download(self, organizer: Organizer, info: DownloadInfo) -> bool:
|
||||
resolved_file = organizer.resolve(info.path)
|
||||
if not resolved_file.exists() or info.modification_date is None:
|
||||
return True
|
||||
resolved_mod_time_seconds = resolved_file.stat().st_mtime
|
||||
|
||||
# Download if the info is newer
|
||||
if info.modification_date.timestamp() > resolved_mod_time_seconds:
|
||||
return True
|
||||
|
||||
PRETTY.filtered_path(info.path, "Local file had newer or equal modification time")
|
||||
return False
|
@ -3,7 +3,7 @@
|
||||
import datetime
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
|
||||
import bs4
|
||||
import requests
|
||||
@ -26,7 +26,7 @@ class IliasDownloadInfo(Transformable):
|
||||
"""
|
||||
|
||||
url: str
|
||||
modification_date: datetime.datetime
|
||||
modification_date: Optional[datetime.datetime]
|
||||
# parameters: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
|
@ -2,6 +2,7 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from .cookie_jar import CookieJar
|
||||
from .download_strategies import DownloadEverythingStrategy, DownloadStrategy
|
||||
from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
|
||||
IliasDownloader, KitShibbolethAuthenticator)
|
||||
from .organizer import Organizer
|
||||
@ -9,7 +10,6 @@ from .tmp_dir import TmpDir
|
||||
from .transform import Transform, apply_transform
|
||||
from .utils import Location
|
||||
|
||||
|
||||
# TODO save known-good cookies as soon as possible
|
||||
# TODO print synchronizer name before beginning synchronization
|
||||
|
||||
@ -31,6 +31,7 @@ class Pferd(Location):
|
||||
cookies: Optional[Path],
|
||||
dir_filter: IliasDirectoryFilter,
|
||||
transform: Transform,
|
||||
download_strategy: DownloadStrategy,
|
||||
) -> None:
|
||||
cookie_jar = CookieJar(cookies)
|
||||
session = cookie_jar.create_session()
|
||||
@ -43,7 +44,12 @@ class Pferd(Location):
|
||||
cookie_jar.load_cookies()
|
||||
info = crawler.crawl()
|
||||
cookie_jar.save_cookies()
|
||||
downloader.download_all(apply_transform(transform, info))
|
||||
downloader.download_all(
|
||||
[
|
||||
info for info in apply_transform(transform, info)
|
||||
if download_strategy.should_download(organizer, info)
|
||||
]
|
||||
)
|
||||
cookie_jar.save_cookies()
|
||||
|
||||
def ilias_kit(
|
||||
@ -55,6 +61,7 @@ class Pferd(Location):
|
||||
cookies: Optional[Path] = None,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
download_strategy: DownloadStrategy = DownloadEverythingStrategy(),
|
||||
) -> None:
|
||||
# This authenticator only works with the KIT ilias instance.
|
||||
authenticator = KitShibbolethAuthenticator(username=username, password=password)
|
||||
@ -66,4 +73,5 @@ class Pferd(Location):
|
||||
cookies=cookies,
|
||||
dir_filter=dir_filter,
|
||||
transform=transform,
|
||||
download_strategy=download_strategy,
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user