From cf3553175ff5adb09881d79314c6e4162104bc70 Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Thu, 27 Feb 2020 14:51:29 +0100 Subject: [PATCH] Add OS_Exams synchronizer --- PFERD/__init__.py | 2 ++ PFERD/os_exams.py | 85 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 PFERD/os_exams.py diff --git a/PFERD/__init__.py b/PFERD/__init__.py index fd85843..a0e924c 100644 --- a/PFERD/__init__.py +++ b/PFERD/__init__.py @@ -6,6 +6,7 @@ from .norbert import * from .tgi import * from .tgi_jimbo import * from .ti import * +from .os_exams import * from .utils import * __all__ = ["STYLE", "FORMAT", "DATE_FORMAT", "FORMATTER", "enable_logging"] @@ -17,6 +18,7 @@ __all__ += tgi.__all__ __all__ += tgi_jimbo.__all__ __all__ += ti.__all__ __all__ += utils.__all__ +__all__ += os_exams.__all__ STYLE = "{" FORMAT = "[{levelname:<7}] {message}" diff --git a/PFERD/os_exams.py b/PFERD/os_exams.py new file mode 100644 index 0000000..45b3881 --- /dev/null +++ b/PFERD/os_exams.py @@ -0,0 +1,85 @@ +# Operating systems Exams + +import getpass +import logging +import pathlib +import re + +import bs4 +import requests + +from .organizer import Organizer +from .utils import stream_to_path, PrettyLogger + +__all__ = ["OsExams"] +logger = logging.getLogger(__name__) +pretty = PrettyLogger(logger) + +class OsExams: + BASE_URL = "https://os.itec.kit.edu/deutsch/1556.php" + LINK_RE = re.compile( + r"^http://os.itec.kit.edu/downloads_own/sysarch-exam-assandsols" + r".*/(.*\.pdf)$" + ) + + _credentials = None + + def __init__(self, base_path): + self.base_path = base_path + + self._session = requests.Session() + + def synchronize(self, to_dir, transform=lambda x: x): + pretty.starting_synchronizer(to_dir, "OsExams") + + sync_path = pathlib.Path(self.base_path, to_dir) + + orga = Organizer(self.base_path, sync_path) + orga.clean_temp_dir() + + self._crawl(orga, transform) + + orga.clean_sync_dir() + orga.clean_temp_dir() + + def _crawl(self, orga, transform): + url = self.BASE_URL + r = self._session.get(url) + soup = bs4.BeautifulSoup(r.text, "html.parser") + + for found in soup.find_all("a", href=self.LINK_RE): + url = found["href"] + filename = re.match(self.LINK_RE, url).group(1).replace("/", ".") + logger.debug(f"Found file {filename} at {url}") + + old_path = pathlib.PurePath(filename) + new_path = transform(old_path) + if new_path is None: + continue + logger.debug(f"Transformed from {old_path} to {new_path}") + + temp_path = orga.temp_file() + self._download(url, temp_path) + orga.add_file(temp_path, new_path) + + def _download(self, url, to_path): + while True: + username, password = self._get_credentials() + with self._session.get(url, stream=True, auth=(username, password)) as r: + if r.ok: + stream_to_path(r, to_path) + return + else: + print("Incorrect credentials.") + self._reset_credentials() + + def _get_credentials(self): + if self._credentials is None: + print("Please enter OS credentials.") + username = getpass.getpass(prompt="Username: ") + password = getpass.getpass(prompt="Password: ") + self._credentials = (username, password) + return self._credentials + + def _reset_credentials(self): + self._credentials = None