From 01e6972c960b732644d8bbd8a9448a6d1890b079 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 20 Apr 2020 18:36:40 +0200
Subject: [PATCH] Add ilias downloader

---
 PFERD/ilias/__init__.py   |  1 +
 PFERD/ilias/downloader.py | 66 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)
 create mode 100644 PFERD/ilias/downloader.py

diff --git a/PFERD/ilias/__init__.py b/PFERD/ilias/__init__.py
index b68f34d..ee3cccc 100644
--- a/PFERD/ilias/__init__.py
+++ b/PFERD/ilias/__init__.py
@@ -3,3 +3,4 @@ Synchronizing files from ILIAS instances (https://www.ilias.de/).
 """
 
 from .authenticators import *
+from .downloader import *
diff --git a/PFERD/ilias/downloader.py b/PFERD/ilias/downloader.py
new file mode 100644
index 0000000..5f1aefe
--- /dev/null
+++ b/PFERD/ilias/downloader.py
@@ -0,0 +1,66 @@
+"""Contains a downloader for ILIAS."""
+
+from pathlib import Path
+from typing import Any, Dict
+
+import bs4
+import requests
+
+from ..new_organizer import Organizer
+from ..tmp_dir import TmpDir
+from ..utils import soupify, stream_to_path
+from .authenticators import IliasAuthenticator
+
+
+class ContentTypeException(Exception):
+    """Thrown when the content type of the ilias element can not be handled."""
+
+    def __init__(self, message: str):
+        """Create a new exception."""
+        super().__init__(message)
+
+
+class IliasDownloader():
+    """A downloader for ILIAS."""
+
+    def __init__(self, tmp_dir: TmpDir, organizer: Organizer, authenticator: IliasAuthenticator):
+        """Create a new IliasDownloader."""
+        self._authenticator = authenticator
+        self._session = requests.Session()
+        self._tmp_dir = tmp_dir
+        self._organizer = organizer
+
+    def download(self, url: str, target_path: Path, params: Dict[str, Any]) -> None:
+        """Download a file from ILIAS.
+
+        Retries authentication until eternity, if it could not fetch the file.
+        """
+        tmp_file = self._tmp_dir.new_file()
+
+        while not self._try_download(url, tmp_file, params):
+            self._authenticator.authenticate(self._session)
+
+        self._organizer.accept_file(tmp_file, target_path)
+
+    def _try_download(self, url: str, target_path: Path, params: Dict[str, Any]) -> bool:
+        with self._session.get(url, params=params, stream=True) as r:
+            content_type = r.headers["content-type"]
+
+            if content_type.startswith("text/html"):
+                # Dangit, we're probably not logged in.
+                soup = soupify(r)
+
+                if self._is_logged_in(soup):
+                    raise ContentTypeException(
+                        "Attempting to download a web page, not a file"
+                    )
+
+                return False
+            else:
+                # Yay, we got the file :)
+                stream_to_path(r, target_path)
+                return True
+
+    def _is_logged_in(self, soup: Any) -> bool:
+        userlog = soup.find("li", {"id": "userlog"})
+        return userlog is not None