From 01e6972c960b732644d8bbd8a9448a6d1890b079 Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Mon, 20 Apr 2020 18:36:40 +0200 Subject: [PATCH] Add ilias downloader --- PFERD/ilias/__init__.py | 1 + PFERD/ilias/downloader.py | 66 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 PFERD/ilias/downloader.py diff --git a/PFERD/ilias/__init__.py b/PFERD/ilias/__init__.py index b68f34d..ee3cccc 100644 --- a/PFERD/ilias/__init__.py +++ b/PFERD/ilias/__init__.py @@ -3,3 +3,4 @@ Synchronizing files from ILIAS instances (https://www.ilias.de/). """ from .authenticators import * +from .downloader import * diff --git a/PFERD/ilias/downloader.py b/PFERD/ilias/downloader.py new file mode 100644 index 0000000..5f1aefe --- /dev/null +++ b/PFERD/ilias/downloader.py @@ -0,0 +1,66 @@ +"""Contains a downloader for ILIAS.""" + +from pathlib import Path +from typing import Any, Dict + +import bs4 +import requests + +from ..new_organizer import Organizer +from ..tmp_dir import TmpDir +from ..utils import soupify, stream_to_path +from .authenticators import IliasAuthenticator + + +class ContentTypeException(Exception): + """Thrown when the content type of the ilias element can not be handled.""" + + def __init__(self, message: str): + """Create a new exception.""" + super().__init__(message) + + +class IliasDownloader(): + """A downloader for ILIAS.""" + + def __init__(self, tmp_dir: TmpDir, organizer: Organizer, authenticator: IliasAuthenticator): + """Create a new IliasDownloader.""" + self._authenticator = authenticator + self._session = requests.Session() + self._tmp_dir = tmp_dir + self._organizer = organizer + + def download(self, url: str, target_path: Path, params: Dict[str, Any]) -> None: + """Download a file from ILIAS. + + Retries authentication until eternity, if it could not fetch the file. + """ + tmp_file = self._tmp_dir.new_file() + + while not self._try_download(url, tmp_file, params): + self._authenticator.authenticate(self._session) + + self._organizer.accept_file(tmp_file, target_path) + + def _try_download(self, url: str, target_path: Path, params: Dict[str, Any]) -> bool: + with self._session.get(url, params=params, stream=True) as r: + content_type = r.headers["content-type"] + + if content_type.startswith("text/html"): + # Dangit, we're probably not logged in. + soup = soupify(r) + + if self._is_logged_in(soup): + raise ContentTypeException( + "Attempting to download a web page, not a file" + ) + + return False + else: + # Yay, we got the file :) + stream_to_path(r, target_path) + return True + + def _is_logged_in(self, soup: Any) -> bool: + userlog = soup.find("li", {"id": "userlog"}) + return userlog is not None