pferd/PFERD/ilias/downloader.py
2020-04-24 18:24:44 +00:00

132 lines
3.7 KiB
Python

"""Contains a downloader for ILIAS."""
import datetime
import logging
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, List, Optional
import bs4
import requests
from ..organizer import Organizer
from ..tmp_dir import TmpDir
from ..transform import Transformable
from ..utils import PrettyLogger, soupify, stream_to_path
from .authenticators import IliasAuthenticator
LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER)
class ContentTypeException(Exception):
"""Thrown when the content type of the ilias element can not be handled."""
@dataclass
class IliasDownloadInfo(Transformable):
"""
This class describes a single file to be downloaded.
"""
url: str
modification_date: Optional[datetime.datetime]
# parameters: Dict[str, Any] = field(default_factory=dict)
IliasDownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool:
# pylint: disable=unused-argument
"""
Accepts everything.
"""
return True
def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool:
"""
Accepts new files or files with a more recent modification date.
"""
resolved_file = organizer.resolve(info.path)
if not resolved_file.exists() or info.modification_date is None:
return True
resolved_mod_time_seconds = resolved_file.stat().st_mtime
# Download if the info is newer
if info.modification_date.timestamp() > resolved_mod_time_seconds:
return True
PRETTY.ignored_file(info.path, "local file has newer or equal modification time")
return False
class IliasDownloader:
# pylint: disable=too-many-arguments
"""A downloader for ILIAS."""
def __init__(
self,
tmp_dir: TmpDir,
organizer: Organizer,
session: requests.Session,
authenticator: IliasAuthenticator,
strategy: IliasDownloadStrategy,
):
"""
Create a new IliasDownloader.
"""
self._tmp_dir = tmp_dir
self._organizer = organizer
self._session = session
self._authenticator = authenticator
self._strategy = strategy
def download_all(self, infos: List[IliasDownloadInfo]) -> None:
"""
Download multiple files one after the other.
"""
for info in infos:
self.download(info)
def download(self, info: IliasDownloadInfo) -> None:
"""
Download a file from ILIAS.
Retries authentication until eternity if it could not fetch the file.
"""
if not self._strategy(self._organizer, info):
self._organizer.mark(info.path)
return
tmp_file = self._tmp_dir.new_path()
while not self._try_download(info, tmp_file):
self._authenticator.authenticate(self._session)
self._organizer.accept_file(tmp_file, info.path)
def _try_download(self, info: IliasDownloadInfo, target: Path) -> bool:
with self._session.get(info.url, stream=True) as response:
content_type = response.headers["content-type"]
if content_type.startswith("text/html"):
# Dangit, we're probably not logged in.
if self._is_logged_in(soupify(response)):
raise ContentTypeException("Attempting to download a web page, not a file")
return False
# Yay, we got the file :)
stream_to_path(response, target)
return True
@staticmethod
def _is_logged_in(soup: bs4.BeautifulSoup) -> bool:
userlog = soup.find("li", {"id": "userlog"})
return userlog is not None