Remove unnecessary files

Also document some plans for the new program structure in REWRITE.md
2025-10-02 08:22:34 +02:00 · 2020-04-19 19:49:43 +00:00
parent 7ebeef5873
commit 25043a4aaa
9 changed files with 17 additions and 890 deletions
--- a/PFERD/ffm.py
+++ b/PFERD/ffm.py
@@ -1,61 +0,0 @@
-# Fakultät für Mathematik (FfM)
-
-import logging
-import pathlib
-import re
-
-import bs4
-import requests
-
-from .organizer import Organizer
-from .utils import stream_to_path, PrettyLogger
-
-__all__ = ["FfM"]
-logger = logging.getLogger(__name__)
-pretty = PrettyLogger(logger)
-
-class FfM:
-    BASE_URL = "http://www.math.kit.edu/"
-    LINK_RE = re.compile(r"^https?://www.math.kit.edu/.*/(.*\.pdf)$")
-
-    def __init__(self, base_path):
-        self.base_path = base_path
-
-        self._session = requests.Session()
-
-    def synchronize(self, urlpart, to_dir, transform=lambda x: x):
-        pretty.starting_synchronizer(to_dir, "FfM", urlpart)
-
-        sync_path = pathlib.Path(self.base_path, to_dir)
-
-        orga = Organizer(self.base_path, sync_path)
-        orga.clean_temp_dir()
-
-        self._crawl(orga, urlpart, transform)
-
-        orga.clean_sync_dir()
-        orga.clean_temp_dir()
-
-    def _crawl(self, orga, urlpart, transform):
-        url = self.BASE_URL + urlpart
-        r = self._session.get(url)
-        soup = bs4.BeautifulSoup(r.text, "html.parser")
-
-        for found in soup.find_all("a", href=self.LINK_RE):
-            url = found["href"]
-            filename = re.match(self.LINK_RE, url).group(1).replace("/", ".")
-            logger.debug(f"Found file {filename} at {url}")
-
-            old_path = pathlib.PurePath(filename)
-            new_path = transform(old_path)
-            if new_path is None:
-                continue
-            logger.debug(f"Transformed from {old_path} to {new_path}")
-
-            temp_path = orga.temp_file()
-            self._download(url, temp_path)
-            orga.add_file(temp_path, new_path)
-
-    def _download(self, url, to_path):
-        with self._session.get(url, stream=True) as r:
-            stream_to_path(r, to_path)
--- a/PFERD/norbert.py
+++ b/PFERD/norbert.py
@@ -1,108 +0,0 @@
-# Norberts Prog-Tuts
-
-import logging
-import pathlib
-import re
-import zipfile
-
-import bs4
-import requests
-
-from .organizer import Organizer
-from .utils import rename, stream_to_path, PrettyLogger
-
-__all__ = ["Norbert"]
-logger = logging.getLogger(__name__)
-pretty = PrettyLogger(logger)
-
-class Norbert:
-    BASE_URL = "https://studwww.informatik.kit.edu/~s_blueml/"
-    LINK_RE = re.compile(r"^progtut/.*/(.*\.zip)$")
-
-    def __init__(self, base_path):
-        self.base_path = base_path
-
-        self._session = requests.Session()
-
-    def synchronize(self, to_dir, transform=lambda x: x, unzip=lambda _: True):
-        pretty.starting_synchronizer(to_dir, "Norbert")
-
-        sync_path = pathlib.Path(self.base_path, to_dir)
-        orga = Organizer(self.base_path, sync_path)
-
-        orga.clean_temp_dir()
-
-        files = self._crawl()
-        self._download(orga, files, transform, unzip)
-
-        orga.clean_sync_dir()
-        orga.clean_temp_dir()
-
-    def _crawl(self):
-        url = self.BASE_URL
-        r = self._session.get(url)
-
-        # replace undecodeable characters with a placeholder
-        #text = r.raw.decode("utf-8", "replace")
-
-        text = r.text
-        soup = bs4.BeautifulSoup(text, "html.parser")
-
-        files = []
-
-        for found in soup.find_all("a", href=self.LINK_RE):
-            url = found["href"]
-            full_url = self.BASE_URL + url
-
-            filename = re.search(self.LINK_RE, url).group(1)
-            path = pathlib.PurePath(filename)
-
-            logger.debug(f"Found zip file {filename} at {full_url}")
-            files.append((path, full_url))
-
-        return files
-
-    def _download(self, orga, files, transform, unzip):
-        for path, url in sorted(files):
-            # Yes, we want the zip file contents
-            if unzip(path):
-                logger.debug(f"Downloading and unzipping {path}")
-                zip_path = rename(path, path.stem)
-
-                # Download zip file
-                temp_file = orga.temp_file()
-                self._download_zip(url, temp_file)
-
-                # Search the zip file for files to extract
-                temp_dir = orga.temp_dir()
-                with zipfile.ZipFile(temp_file, "r") as zf:
-                    for info in zf.infolist():
-                        # Only interested in the files themselves, the directory
-                        # structure is created automatically by orga.add_file()
-                        if info.is_dir():
-                            continue
-
-                        file_path = zip_path / pathlib.PurePath(info.filename)
-                        logger.debug(f"Found {info.filename} at path {file_path}")
-
-                        new_path = transform(file_path)
-                        if new_path is not None:
-                            # Extract to temp file and add, the usual deal
-                            temp_file = orga.temp_file()
-                            extracted_path = zf.extract(info, temp_dir)
-                            extracted_path = pathlib.Path(extracted_path)
-                            orga.add_file(extracted_path, new_path)
-
-            # No, we only want the zip file itself
-            else:
-                logger.debug(f"Only downloading {path}")
-
-                new_path = transform(path)
-                if new_path is not None:
-                    temp_file = orga.temp_file()
-                    self._download_zip(url, temp_file)
-                    orga.add_file(temp_file, new_path)
-
-    def _download_zip(self, url, to_path):
-        with self._session.get(url, stream=True) as r:
-            stream_to_path(r, to_path)
--- a/PFERD/os_exams.py
+++ b/PFERD/os_exams.py
@@ -1,85 +0,0 @@
-# Operating systems Exams
-
-import getpass
-import logging
-import pathlib
-import re
-
-import bs4
-import requests
-
-from .organizer import Organizer
-from .utils import stream_to_path, PrettyLogger
-
-__all__ = ["OsExams"]
-logger = logging.getLogger(__name__)
-pretty = PrettyLogger(logger)
-
-class OsExams:
-    BASE_URL = "https://os.itec.kit.edu/deutsch/1556.php"
-    LINK_RE = re.compile(
-            r"^http://os.itec.kit.edu/downloads_own/sysarch-exam-assandsols"
-            r".*/(.*\.pdf)$"
-            )
-
-    _credentials = None
-
-    def __init__(self, base_path):
-        self.base_path = base_path
-
-        self._session = requests.Session()
-
-    def synchronize(self, to_dir, transform=lambda x: x):
-        pretty.starting_synchronizer(to_dir, "OsExams")
-
-        sync_path = pathlib.Path(self.base_path, to_dir)
-
-        orga = Organizer(self.base_path, sync_path)
-        orga.clean_temp_dir()
-
-        self._crawl(orga, transform)
-
-        orga.clean_sync_dir()
-        orga.clean_temp_dir()
-
-    def _crawl(self, orga, transform):
-        url = self.BASE_URL
-        r = self._session.get(url)
-        soup = bs4.BeautifulSoup(r.text, "html.parser")
-
-        for found in soup.find_all("a", href=self.LINK_RE):
-            url = found["href"]
-            filename = re.match(self.LINK_RE, url).group(1).replace("/", ".")
-            logger.debug(f"Found file {filename} at {url}")
-
-            old_path = pathlib.PurePath(filename)
-            new_path = transform(old_path)
-            if new_path is None:
-                continue
-            logger.debug(f"Transformed from {old_path} to {new_path}")
-
-            temp_path = orga.temp_file()
-            self._download(url, temp_path)
-            orga.add_file(temp_path, new_path)
-
-    def _download(self, url, to_path):
-        while True:
-            username, password = self._get_credentials()
-            with self._session.get(url, stream=True, auth=(username, password)) as r:
-                if r.ok:
-                    stream_to_path(r, to_path)
-                    return
-                else:
-                    print("Incorrect credentials.")
-                    self._reset_credentials()
-
-    def _get_credentials(self):
-        if self._credentials is None:
-            print("Please enter OS credentials.")
-            username = getpass.getpass(prompt="Username: ")
-            password = getpass.getpass(prompt="Password: ")
-            self._credentials = (username, password)
-        return self._credentials
-
-    def _reset_credentials(self):
-        self._credentials = None
--- a/PFERD/tgi.py
+++ b/PFERD/tgi.py
@@ -1,75 +0,0 @@
-# TGI Lecture slides
-
-import logging
-import pathlib
-import re
-import zipfile
-
-import bs4
-import requests
-
-from .organizer import Organizer
-from .utils import rename, stream_to_path, PrettyLogger
-
-__all__ = ["TGI"]
-logger = logging.getLogger(__name__)
-pretty = PrettyLogger(logger)
-
-class TGI:
-    CRAWL_URL = "https://i11www.iti.kit.edu/teaching/{year}/tgi/index"
-    BASE_URL = "https://i11www.iti.kit.edu"
-
-    def __init__(self, base_path, year="winter2019"):
-        self.base_path = base_path
-
-        self._session = requests.Session()
-        self.year = year
-
-    def synchronize(self, to_dir, transform=lambda x: x):
-        pretty.starting_synchronizer(to_dir, "TGI")
-
-        sync_path = pathlib.Path(self.base_path, to_dir)
-        orga = Organizer(self.base_path, sync_path)
-
-        orga.clean_temp_dir()
-
-        files = self._crawl()
-        self._download(orga, files, transform)
-
-        orga.clean_sync_dir()
-        orga.clean_temp_dir()
-
-    def _crawl(self):
-        url = self.CRAWL_URL.replace("{year}", self.year)
-        r = self._session.get(url)
-
-        text = r.text
-        soup = bs4.BeautifulSoup(text, "html.parser")
-
-        files = []
-
-        for found in soup.select("a.mediafile.mf_pdf"):
-            url = found["href"]
-            full_url = self.BASE_URL + url
-
-            filename = re.search(r"\d+(/tgi)?/(.+.pdf)", url).group(2)
-            path = pathlib.PurePath(filename)
-
-            logger.debug(f"Found file {filename} at {full_url}")
-            files.append((path, full_url))
-
-        return files
-
-    def _download(self, orga, files, transform):
-        for path, url in sorted(files):
-            logger.debug(f"Downloading {path}")
-
-            new_path = transform(path)
-            if new_path is not None:
-                temp_file = orga.temp_file()
-                self._download_file(url, temp_file)
-                orga.add_file(temp_file, new_path)
-
-    def _download_file(self, url, to_path):
-        with self._session.get(url, stream=True) as r:
-            stream_to_path(r, to_path)
--- a/PFERD/tgi_jimbo.py
+++ b/PFERD/tgi_jimbo.py
@@ -1,80 +0,0 @@
-# TGI Tutorial slides
-
-import logging
-import pathlib
-import re
-import zipfile
-
-import bs4
-import requests
-
-from .organizer import Organizer
-from .utils import rename, stream_to_path, PrettyLogger
-
-__all__ = ["TGI_Tut"]
-logger = logging.getLogger(__name__)
-pretty = PrettyLogger(logger)
-
-
-class TGI_Tut:
-    CRAWL_URL = "https://tgitut.jimdofree.com/"
-
-    def __init__(self, base_path, year="winter2019"):
-        self.base_path = base_path
-
-        self._session = requests.Session()
-
-    def _login(self):
-        post = self._session.post(self.CRAWL_URL, data={
-            "password": "Lebkuchen", "do_login": "yes", "Submit": "Anmelden"
-        })
-
-    def synchronize(self, to_dir, transform=lambda x: x):
-        pretty.starting_synchronizer(to_dir, "TGI_Tut")
-
-        self._login()
-
-        sync_path = pathlib.Path(self.base_path, to_dir)
-        orga = Organizer(self.base_path, sync_path)
-
-        orga.clean_temp_dir()
-
-        files = self._crawl()
-        self._download(orga, files, transform)
-
-        orga.clean_sync_dir()
-        orga.clean_temp_dir()
-
-    def _crawl(self):
-        url = self.CRAWL_URL
-        r = self._session.get(url)
-
-        text = r.text
-        soup = bs4.BeautifulSoup(text, "html.parser")
-        files = []
-
-        for found in soup.select("a.cc-m-download-link"):
-            url = found["href"]
-            full_url = self.CRAWL_URL + url
-
-            filename = re.search(r"/app/download/\d+/(.*.pdf)", url).group(1)
-            path = pathlib.PurePath(filename)
-
-            logger.debug(f"Found file {filename} at {full_url}")
-            files.append((path, full_url))
-
-        return files
-
-    def _download(self, orga, files, transform):
-        for path, url in sorted(files):
-            logger.debug(f"Downloading {path}")
-
-            new_path = transform(path)
-            if new_path is not None:
-                temp_file = orga.temp_file()
-                self._download_file(url, temp_file)
-                orga.add_file(temp_file, new_path)
-
-    def _download_file(self, url, to_path):
-        with self._session.get(url, stream=True) as r:
-            stream_to_path(r, to_path)
--- a/PFERD/ti.py
+++ b/PFERD/ti.py
@@ -1,115 +0,0 @@
-# Fakultät für Mathematik (FfM)
-
-import getpass
-import logging
-import pathlib
-import re
-from urllib.parse import urljoin
-
-import bs4
-import requests
-
-from .organizer import Organizer
-from .utils import stream_to_path, PrettyLogger
-
-__all__ = ["Ti"]
-logger = logging.getLogger(__name__)
-pretty = PrettyLogger(logger)
-
-class Ti:
-    BASE_URL = "http://ti.ira.uka.de/"
-    FILE_RE = re.compile(r"^.+\.pdf$")
-
-    def __init__(self, base_path):
-        self.base_path = base_path
-
-        self._session = requests.Session()
-        self._credentials = None
-
-    def synchronize(self, urlpart, to_dir, transform=lambda x: x,
-            filter=lambda x: True):
-        pretty.starting_synchronizer(to_dir, "Ti", urlpart)
-
-        sync_path = pathlib.Path(self.base_path, to_dir)
-
-        orga = Organizer(self.base_path, sync_path)
-        orga.clean_temp_dir()
-
-        self._reset_credentials()
-
-        available = self._find_available(urlpart)
-
-        for name, address in sorted(available.items()):
-            path = pathlib.PurePath(name)
-            if filter(path):
-                self._crawl(urlpart + address, path, orga, transform)
-            else:
-                logger.info(f"Skipping {name}/")
-
-        orga.clean_sync_dir()
-        orga.clean_temp_dir()
-
-        self._reset_credentials()
-
-    def _find_available(self, urlpart):
-        url = self.BASE_URL + urlpart
-        r = self._session.get(url)
-        soup = bs4.BeautifulSoup(r.text, "html.parser")
-
-        available = {}
-
-        if soup.find(href="./Vorlesung/Vorlesung.php"):
-            logger.info("Found Folien/")
-            available["Folien"] = "/Vorlesung/"
-        if soup.find(href="./Uebungen/Uebungen.php"):
-            logger.info("Found Blätter/")
-            available["Blätter"] = "/Uebungen/"
-        if soup.find(href="./Tutorien/Tutorien.php"):
-            logger.info("Found Tutorien/")
-            available["Tutorien"] = "/Tutorien/"
-        if soup.find(href="./AlteKlausuren/AlteKlausuren.php"):
-            logger.info("Found AlteKlausuren/")
-            available["AltKlausuren"] = "/AlteKlausuren/"
-
-        return available
-
-    def _crawl(self, urlpart, path, orga, transform):
-        url = self.BASE_URL + urlpart
-        r = self._session.get(url)
-        soup = bs4.BeautifulSoup(r.text, "html.parser")
-
-        for filelink in soup.find_all("a", href=self.FILE_RE):
-            filepath = path / filelink["href"]
-            fileurl = urljoin(url, filelink["href"])
-
-            new_path = transform(filepath)
-            if new_path is None:
-                continue
-            logger.debug(f"Transformed from {filepath} to {new_path}")
-
-            temp_path = orga.temp_file()
-            self._download(fileurl, temp_path)
-            orga.add_file(temp_path, new_path)
-
-
-    def _get_credentials(self):
-        if self._credentials is None:
-            print("Please enter Ti credentials.")
-            username = getpass.getpass(prompt="Username: ")
-            password = getpass.getpass(prompt="Password: ")
-            self._credentials = (username, password)
-        return self._credentials
-
-    def _reset_credentials(self):
-        self._credentials = None
-
-    def _download(self, url, to_path):
-        while True:
-            username, password = self._get_credentials()
-            with self._session.get(url, stream=True, auth=(username, password)) as r:
-                if r.ok:
-                    stream_to_path(r, to_path)
-                    return
-                else:
-                    print("Incorrect credentials.")
-                    self._reset_credentials()
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -4,19 +4,6 @@ import pathlib
 from colorama import Style
 from colorama import Fore

-__all__ = [
-    "get_base_dir",
-    "move",
-    "rename",
-    "stream_to_path",
-    "ContentTypeException",
-    "FileNotFoundException",
-    "PrettyLogger",
-]
-
-def get_base_dir(script_file):
-    return pathlib.Path(os.path.dirname(os.path.abspath(script_file)))
-
 def move(path, from_folders, to_folders):
    l = len(from_folders)
    if path.parts[:l] == from_folders:
@@ -30,17 +17,6 @@ def stream_to_path(response, to_path, chunk_size=1024**2):
        for chunk in response.iter_content(chunk_size=chunk_size):
            fd.write(chunk)

-def isOutputPipe():
-    """Returns whether this program's output is attached to a pipe.
-    """
-    return sys.stdout.isatty
-
-class ContentTypeException(Exception):
-    pass
-
-class FileNotFoundException(Exception):
-    pass
-
 class PrettyLogger:

    def __init__(self, logger):