mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
5b929f09a2
Also fixes an issue where the downloader didn't mark files that were not downloaded due to the strategy used.
132 lines
3.7 KiB
Python
132 lines
3.7 KiB
Python
"""Contains a downloader for ILIAS."""
|
|
|
|
import datetime
|
|
import logging
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Callable, List, Optional
|
|
|
|
import bs4
|
|
import requests
|
|
|
|
from ..organizer import Organizer
|
|
from ..tmp_dir import TmpDir
|
|
from ..transform import Transformable
|
|
from ..utils import PrettyLogger, soupify, stream_to_path
|
|
from .authenticators import IliasAuthenticator
|
|
|
|
LOGGER = logging.getLogger(__name__)
|
|
PRETTY = PrettyLogger(LOGGER)
|
|
|
|
|
|
class ContentTypeException(Exception):
|
|
"""Thrown when the content type of the ilias element can not be handled."""
|
|
|
|
|
|
@dataclass
|
|
class IliasDownloadInfo(Transformable):
|
|
"""
|
|
This class describes a single file to be downloaded.
|
|
"""
|
|
|
|
url: str
|
|
modification_date: Optional[datetime.datetime]
|
|
# parameters: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
IliasDownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
|
|
|
|
|
|
def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool:
|
|
# pylint: disable=unused-argument
|
|
"""
|
|
Accepts everything.
|
|
"""
|
|
return True
|
|
|
|
|
|
def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool:
|
|
"""
|
|
Accepts new files or files with a more recent modification date.
|
|
"""
|
|
resolved_file = organizer.resolve(info.path)
|
|
if not resolved_file.exists() or info.modification_date is None:
|
|
return True
|
|
resolved_mod_time_seconds = resolved_file.stat().st_mtime
|
|
|
|
# Download if the info is newer
|
|
if info.modification_date.timestamp() > resolved_mod_time_seconds:
|
|
return True
|
|
|
|
PRETTY.filtered_path(info.path, "Local file had newer or equal modification time")
|
|
return False
|
|
|
|
|
|
class IliasDownloader:
|
|
# pylint: disable=too-many-arguments
|
|
"""A downloader for ILIAS."""
|
|
|
|
def __init__(
|
|
self,
|
|
tmp_dir: TmpDir,
|
|
organizer: Organizer,
|
|
session: requests.Session,
|
|
authenticator: IliasAuthenticator,
|
|
strategy: IliasDownloadStrategy,
|
|
):
|
|
"""
|
|
Create a new IliasDownloader.
|
|
"""
|
|
|
|
self._tmp_dir = tmp_dir
|
|
self._organizer = organizer
|
|
self._session = session
|
|
self._authenticator = authenticator
|
|
self._strategy = strategy
|
|
|
|
def download_all(self, infos: List[IliasDownloadInfo]) -> None:
|
|
"""
|
|
Download multiple files one after the other.
|
|
"""
|
|
|
|
for info in infos:
|
|
self.download(info)
|
|
|
|
def download(self, info: IliasDownloadInfo) -> None:
|
|
"""
|
|
Download a file from ILIAS.
|
|
|
|
Retries authentication until eternity if it could not fetch the file.
|
|
"""
|
|
|
|
if not self._strategy(self._organizer, info):
|
|
self._organizer.mark(info.path)
|
|
return
|
|
|
|
tmp_file = self._tmp_dir.new_path()
|
|
|
|
while not self._try_download(info, tmp_file):
|
|
self._authenticator.authenticate(self._session)
|
|
|
|
self._organizer.accept_file(tmp_file, info.path)
|
|
|
|
def _try_download(self, info: IliasDownloadInfo, target: Path) -> bool:
|
|
with self._session.get(info.url, stream=True) as response:
|
|
content_type = response.headers["content-type"]
|
|
|
|
if content_type.startswith("text/html"):
|
|
# Dangit, we're probably not logged in.
|
|
if self._is_logged_in(soupify(response)):
|
|
raise ContentTypeException("Attempting to download a web page, not a file")
|
|
|
|
return False
|
|
|
|
# Yay, we got the file :)
|
|
stream_to_path(response, target)
|
|
return True
|
|
|
|
@staticmethod
|
|
def _is_logged_in(soup: bs4.BeautifulSoup) -> bool:
|
|
userlog = soup.find("li", {"id": "userlog"})
|
|
return userlog is not None
|