mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Add download strategies to save bandwith
Only download files that are newer than the local version.
This commit is contained in:
parent
13bc78c889
commit
076b8c5a1f
@ -7,6 +7,7 @@ more complex configuration, you need to import the other submodules manually.
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from .download_strategies import *
|
||||||
from .pferd import Pferd
|
from .pferd import Pferd
|
||||||
|
|
||||||
STYLE = "{"
|
STYLE = "{"
|
||||||
|
80
PFERD/download_strategies.py
Normal file
80
PFERD/download_strategies.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
"""
|
||||||
|
Contains a few default strategies for limiting the amount of downloaded files.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from typing_extensions import Protocol, runtime_checkable
|
||||||
|
|
||||||
|
from .organizer import Organizer
|
||||||
|
from .utils import PrettyLogger
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
PRETTY = PrettyLogger(LOGGER)
|
||||||
|
|
||||||
|
|
||||||
|
@runtime_checkable
|
||||||
|
class DownloadInfo(Protocol):
|
||||||
|
# pylint: disable=too-few-public-methods
|
||||||
|
"""
|
||||||
|
This class describes some minimal information about a file you can download.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def path(self) -> Path:
|
||||||
|
"""
|
||||||
|
Returns the path.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def modification_date(self) -> Optional[datetime.datetime]:
|
||||||
|
"""
|
||||||
|
Returns the modification date or None if not known.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class DownloadStrategy(ABC):
|
||||||
|
# pylint: disable=too-few-public-methods
|
||||||
|
"""
|
||||||
|
A strategy deciding whether to download a given info.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def should_download(self, organizer: Organizer, info: DownloadInfo) -> bool:
|
||||||
|
"""
|
||||||
|
Decides wether a given file should be downloaded.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class DownloadEverythingStrategy(DownloadStrategy):
|
||||||
|
# pylint: disable=too-few-public-methods
|
||||||
|
"""
|
||||||
|
A strategy that redownloads everything.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def should_download(self, organizer: Organizer, info: DownloadInfo) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class DownloadNewOrModified(DownloadStrategy):
|
||||||
|
# pylint: disable=too-few-public-methods
|
||||||
|
"""
|
||||||
|
A strategy that only downloads changed or new files.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def should_download(self, organizer: Organizer, info: DownloadInfo) -> bool:
|
||||||
|
resolved_file = organizer.resolve(info.path)
|
||||||
|
if not resolved_file.exists() or info.modification_date is None:
|
||||||
|
return True
|
||||||
|
resolved_mod_time_seconds = resolved_file.stat().st_mtime
|
||||||
|
|
||||||
|
# Download if the info is newer
|
||||||
|
if info.modification_date.timestamp() > resolved_mod_time_seconds:
|
||||||
|
return True
|
||||||
|
|
||||||
|
PRETTY.filtered_path(info.path, "Local file had newer or equal modification time")
|
||||||
|
return False
|
@ -3,7 +3,7 @@
|
|||||||
import datetime
|
import datetime
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List
|
from typing import List, Optional
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
import requests
|
import requests
|
||||||
@ -26,7 +26,7 @@ class IliasDownloadInfo(Transformable):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
url: str
|
url: str
|
||||||
modification_date: datetime.datetime
|
modification_date: Optional[datetime.datetime]
|
||||||
# parameters: Dict[str, Any] = field(default_factory=dict)
|
# parameters: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ from pathlib import Path
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from .cookie_jar import CookieJar
|
from .cookie_jar import CookieJar
|
||||||
|
from .download_strategies import DownloadEverythingStrategy, DownloadStrategy
|
||||||
from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
|
from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
|
||||||
IliasDownloader, KitShibbolethAuthenticator)
|
IliasDownloader, KitShibbolethAuthenticator)
|
||||||
from .organizer import Organizer
|
from .organizer import Organizer
|
||||||
@ -9,7 +10,6 @@ from .tmp_dir import TmpDir
|
|||||||
from .transform import Transform, apply_transform
|
from .transform import Transform, apply_transform
|
||||||
from .utils import Location
|
from .utils import Location
|
||||||
|
|
||||||
|
|
||||||
# TODO save known-good cookies as soon as possible
|
# TODO save known-good cookies as soon as possible
|
||||||
# TODO print synchronizer name before beginning synchronization
|
# TODO print synchronizer name before beginning synchronization
|
||||||
|
|
||||||
@ -31,6 +31,7 @@ class Pferd(Location):
|
|||||||
cookies: Optional[Path],
|
cookies: Optional[Path],
|
||||||
dir_filter: IliasDirectoryFilter,
|
dir_filter: IliasDirectoryFilter,
|
||||||
transform: Transform,
|
transform: Transform,
|
||||||
|
download_strategy: DownloadStrategy,
|
||||||
) -> None:
|
) -> None:
|
||||||
cookie_jar = CookieJar(cookies)
|
cookie_jar = CookieJar(cookies)
|
||||||
session = cookie_jar.create_session()
|
session = cookie_jar.create_session()
|
||||||
@ -43,7 +44,12 @@ class Pferd(Location):
|
|||||||
cookie_jar.load_cookies()
|
cookie_jar.load_cookies()
|
||||||
info = crawler.crawl()
|
info = crawler.crawl()
|
||||||
cookie_jar.save_cookies()
|
cookie_jar.save_cookies()
|
||||||
downloader.download_all(apply_transform(transform, info))
|
downloader.download_all(
|
||||||
|
[
|
||||||
|
info for info in apply_transform(transform, info)
|
||||||
|
if download_strategy.should_download(organizer, info)
|
||||||
|
]
|
||||||
|
)
|
||||||
cookie_jar.save_cookies()
|
cookie_jar.save_cookies()
|
||||||
|
|
||||||
def ilias_kit(
|
def ilias_kit(
|
||||||
@ -55,6 +61,7 @@ class Pferd(Location):
|
|||||||
cookies: Optional[Path] = None,
|
cookies: Optional[Path] = None,
|
||||||
username: Optional[str] = None,
|
username: Optional[str] = None,
|
||||||
password: Optional[str] = None,
|
password: Optional[str] = None,
|
||||||
|
download_strategy: DownloadStrategy = DownloadEverythingStrategy(),
|
||||||
) -> None:
|
) -> None:
|
||||||
# This authenticator only works with the KIT ilias instance.
|
# This authenticator only works with the KIT ilias instance.
|
||||||
authenticator = KitShibbolethAuthenticator(username=username, password=password)
|
authenticator = KitShibbolethAuthenticator(username=username, password=password)
|
||||||
@ -66,4 +73,5 @@ class Pferd(Location):
|
|||||||
cookies=cookies,
|
cookies=cookies,
|
||||||
dir_filter=dir_filter,
|
dir_filter=dir_filter,
|
||||||
transform=transform,
|
transform=transform,
|
||||||
|
download_strategy=download_strategy,
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user