Compare commits

...

7 Commits

Author SHA1 Message Date
50e25346e5 Bump version to 1.1.3 2019-06-07 11:36:41 +00:00
67da4e69fa Add colorful log output
Highlight the important operations (new, modified) in different colours.
2019-06-07 13:28:55 +02:00
da602366f8 Bump version to 1.1.2 2019-05-17 07:43:32 +00:00
2016f61bf8 Crawl more of the TI page 2019-05-09 11:04:24 +00:00
59c278da2c Bump version to 1.1.1 2019-05-06 12:07:12 +00:00
c72e92db18 Make Ti downloader authentication more robust 2019-05-06 12:04:01 +00:00
44b4204517 Add basic Ti downloader 2019-05-06 11:54:36 +00:00
9 changed files with 161 additions and 13 deletions

View File

@ -3,6 +3,7 @@ import logging
from .ffm import * from .ffm import *
from .ilias import * from .ilias import *
from .norbert import * from .norbert import *
from .ti import *
from .utils import * from .utils import *
__all__ = ["STYLE", "FORMAT", "DATE_FORMAT", "FORMATTER", "enable_logging"] __all__ = ["STYLE", "FORMAT", "DATE_FORMAT", "FORMATTER", "enable_logging"]
@ -10,6 +11,7 @@ __all__ = ["STYLE", "FORMAT", "DATE_FORMAT", "FORMATTER", "enable_logging"]
__all__ += ffm.__all__ __all__ += ffm.__all__
__all__ += ilias.__all__ __all__ += ilias.__all__
__all__ += norbert.__all__ __all__ += norbert.__all__
__all__ += ti.__all__
__all__ += utils.__all__ __all__ += utils.__all__
STYLE = "{" STYLE = "{"

View File

@ -8,10 +8,11 @@ import bs4
import requests import requests
from .organizer import Organizer from .organizer import Organizer
from .utils import stream_to_path from .utils import stream_to_path, PrettyLogger
__all__ = ["FfM"] __all__ = ["FfM"]
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
pretty = PrettyLogger(logger)
class FfM: class FfM:
BASE_URL = "http://www.math.kit.edu/" BASE_URL = "http://www.math.kit.edu/"
@ -23,7 +24,7 @@ class FfM:
self._session = requests.Session() self._session = requests.Session()
def synchronize(self, urlpart, to_dir, transform=lambda x: x): def synchronize(self, urlpart, to_dir, transform=lambda x: x):
logger.info(f" Synchronizing {urlpart} to {to_dir} using the FfM synchronizer.") pretty.starting_synchronizer(to_dir, "FfM", urlpart)
sync_path = pathlib.Path(self.base_path, to_dir) sync_path = pathlib.Path(self.base_path, to_dir)

View File

@ -4,13 +4,13 @@ import logging
import pathlib import pathlib
import re import re
import bs4
from .ilias_authenticators import ShibbolethAuthenticator from .ilias_authenticators import ShibbolethAuthenticator
from .organizer import Organizer from .organizer import Organizer
from .utils import PrettyLogger
__all__ = ["Ilias"] __all__ = ["Ilias"]
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
pretty = PrettyLogger(logger)
class Ilias: class Ilias:
FILE_RE = re.compile(r"goto\.php\?target=(file_\d+_download)") FILE_RE = re.compile(r"goto\.php\?target=(file_\d+_download)")
@ -22,7 +22,7 @@ class Ilias:
self._auth = ShibbolethAuthenticator(base_path / cookie_file) self._auth = ShibbolethAuthenticator(base_path / cookie_file)
def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True): def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
logger.info(f" Synchronizing ref_id {ref_id} to {to_dir} using the Ilias synchronizer.") pretty.starting_synchronizer(to_dir, "ILIAS", f"ref_id {ref_id}")
sync_path = pathlib.Path(self.base_path, to_dir) sync_path = pathlib.Path(self.base_path, to_dir)
orga = Organizer(self.base_path, sync_path) orga = Organizer(self.base_path, sync_path)

View File

@ -9,10 +9,11 @@ import bs4
import requests import requests
from .organizer import Organizer from .organizer import Organizer
from .utils import rename, stream_to_path from .utils import rename, stream_to_path, PrettyLogger
__all__ = ["Norbert"] __all__ = ["Norbert"]
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
pretty = PrettyLogger(logger)
class Norbert: class Norbert:
BASE_URL = "https://studwww.informatik.kit.edu/~s_blueml/" BASE_URL = "https://studwww.informatik.kit.edu/~s_blueml/"
@ -24,7 +25,7 @@ class Norbert:
self._session = requests.Session() self._session = requests.Session()
def synchronize(self, to_dir, transform=lambda x: x, unzip=lambda _: True): def synchronize(self, to_dir, transform=lambda x: x, unzip=lambda _: True):
logger.info(f" Synchronizing to {to_dir} using the Norbert synchronizer.") pretty.starting_synchronizer(to_dir, "Norbert")
sync_path = pathlib.Path(self.base_path, to_dir) sync_path = pathlib.Path(self.base_path, to_dir)
orga = Organizer(self.base_path, sync_path) orga = Organizer(self.base_path, sync_path)

View File

@ -7,6 +7,7 @@ from . import utils
__all__ = ["Organizer"] __all__ = ["Organizer"]
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
pretty = utils.PrettyLogger(logger)
class Organizer: class Organizer:
def __init__(self, base_dir, sync_dir): def __init__(self, base_dir, sync_dir):
@ -64,7 +65,7 @@ class Organizer:
if to_path.exists(): if to_path.exists():
if filecmp.cmp(from_path, to_path, shallow=False): if filecmp.cmp(from_path, to_path, shallow=False):
logger.info(f"Ignored {to_path}") pretty.ignored_file(to_path)
# remember path for later reference # remember path for later reference
self._added_files.add(to_path.resolve()) self._added_files.add(to_path.resolve())
@ -73,9 +74,9 @@ class Organizer:
# No further action needed, especially not overwriting symlinks... # No further action needed, especially not overwriting symlinks...
return return
else: else:
logger.info(f"Different file at {to_path}") pretty.modified_file(to_path)
else: else:
logger.info(f"New file at {to_path}") pretty.new_file(to_path)
# copy the file from from_path to sync_dir/to_path # copy the file from from_path to sync_dir/to_path
# If the file being replaced was a symlink, the link itself is overwritten, # If the file being replaced was a symlink, the link itself is overwritten,

111
PFERD/ti.py Normal file
View File

@ -0,0 +1,111 @@
# Fakultät für Mathematik (FfM)
import getpass
import logging
import pathlib
import re
import bs4
import requests
from .organizer import Organizer
from .utils import stream_to_path, PrettyLogger
__all__ = ["Ti"]
logger = logging.getLogger(__name__)
pretty = PrettyLogger(logger)
class Ti:
BASE_URL = "http://ti.ira.uka.de/"
FILE_RE = re.compile(r"^.+\.pdf$")
def __init__(self, base_path):
self.base_path = base_path
self._session = requests.Session()
self._credentials = None
def synchronize(self, urlpart, to_dir, transform=lambda x: x,
filter=lambda x: True):
pretty.starting_synchronizer(to_dir, "Ti", urlpart)
sync_path = pathlib.Path(self.base_path, to_dir)
orga = Organizer(self.base_path, sync_path)
orga.clean_temp_dir()
self._reset_credentials()
available = self._find_available(urlpart)
for name, address in sorted(available.items()):
path = pathlib.PurePath(name)
if filter(path):
self._crawl(urlpart + address, path, orga, transform)
else:
loggwe.info(f"Skipping {name}/")
orga.clean_sync_dir()
orga.clean_temp_dir()
self._reset_credentials()
def _find_available(self, urlpart):
url = self.BASE_URL + urlpart
r = self._session.get(url)
soup = bs4.BeautifulSoup(r.text, "html.parser")
available = {}
if soup.find(href="./Vorlesung/Vorlesung.php"):
logger.info("Found Folien/")
available["Folien"] = "/Vorlesung/"
if soup.find(href="./Uebungen/Uebungen.php"):
logger.info("Found Blätter/")
available["Blätter"] = "/Uebungen/"
if soup.find(href="./Tutorien/Tutorien.php"):
logger.info("Found Tutorien/")
available["Tutorien"] = "/Tutorien/"
return available
def _crawl(self, urlpart, path, orga, transform):
url = self.BASE_URL + urlpart
r = self._session.get(url)
soup = bs4.BeautifulSoup(r.text, "html.parser")
for filelink in soup.find_all("a", href=self.FILE_RE):
filepath = path / filelink["href"]
fileurl = url + "/" + filelink["href"]
new_path = transform(filepath)
if new_path is None:
continue
logger.debug(f"Transformed from {filepath} to {new_path}")
temp_path = orga.temp_file()
self._download(fileurl, temp_path)
orga.add_file(temp_path, new_path)
def _get_credentials(self):
if self._credentials is None:
print("Please enter Ti credentials.")
username = getpass.getpass(prompt="Username: ")
password = getpass.getpass(prompt="Password: ")
self._credentials = (username, password)
return self._credentials
def _reset_credentials(self):
self._credentials = None
def _download(self, url, to_path):
while True:
username, password = self._get_credentials()
with self._session.get(url, stream=True, auth=(username, password)) as r:
if r.ok:
stream_to_path(r, to_path)
return
else:
print("Incorrect credentials.")
self._reset_credentials()

View File

@ -1,5 +1,8 @@
import os import os
import sys
import pathlib import pathlib
from colorama import Style
from colorama import Fore
__all__ = [ __all__ = [
"get_base_dir", "get_base_dir",
@ -8,6 +11,7 @@ __all__ = [
"stream_to_path", "stream_to_path",
"ContentTypeException", "ContentTypeException",
"FileNotFoundException", "FileNotFoundException",
"PrettyLogger",
] ]
def get_base_dir(script_file): def get_base_dir(script_file):
@ -26,8 +30,35 @@ def stream_to_path(response, to_path, chunk_size=1024**2):
for chunk in response.iter_content(chunk_size=chunk_size): for chunk in response.iter_content(chunk_size=chunk_size):
fd.write(chunk) fd.write(chunk)
def isOutputPipe():
"""Returns whether this program's output is attached to a pipe.
"""
return sys.stdout.isatty
class ContentTypeException(Exception): class ContentTypeException(Exception):
pass pass
class FileNotFoundException(Exception): class FileNotFoundException(Exception):
pass pass
class PrettyLogger:
def __init__(self, logger):
self.logger = logger
def modified_file(self, file_name):
self.logger.info(f"{Fore.MAGENTA}{Style.BRIGHT}Modified {file_name}.{Style.RESET_ALL}")
def new_file(self, file_name):
self.logger.info(f"{Fore.GREEN}{Style.BRIGHT}Created {file_name}.{Style.RESET_ALL}")
def ignored_file(self, file_name):
self.logger.info(f"{Style.DIM}Ignored {file_name}.{Style.RESET_ALL}")
def starting_synchronizer(self, target_directory, synchronizer_name, subject=None):
subject_str = f"{subject} " if subject else ""
self.logger.info("")
self.logger.info((
f"{Fore.CYAN}{Style.BRIGHT}Synchronizing {subject_str}to {target_directory}"
f" using the {synchronizer_name} synchronizer.{Style.RESET_ALL}"
))

View File

@ -10,7 +10,7 @@ test it though).
To install PFERD or update your installation to the latest version, run this To install PFERD or update your installation to the latest version, run this
wherever you want to install/have installed PFERD: wherever you want to install/have installed PFERD:
``` ```
$ pip install git+https://github.com/Garmelon/PFERD@v1.1.0 $ pip install git+https://github.com/Garmelon/PFERD@v1.1.3
``` ```
The use of [venv](https://docs.python.org/3/library/venv.html) is recommended. The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
@ -25,7 +25,7 @@ $ mkdir Vorlesungen
$ cd Vorlesungen $ cd Vorlesungen
$ python3 -m venv . $ python3 -m venv .
$ . bin/activate $ . bin/activate
$ pip install git+https://github.com/Garmelon/PFERD@v1.1.0 $ pip install git+https://github.com/Garmelon/PFERD@v1.1.3
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/master/example_config.py $ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/master/example_config.py
$ python3 example_config.py $ python3 example_config.py
$ deactivate $ deactivate

View File

@ -2,11 +2,12 @@ from setuptools import setup
setup( setup(
name="PFERD", name="PFERD",
version="1.1.0", version="1.1.3",
packages=["PFERD"], packages=["PFERD"],
install_requires=[ install_requires=[
"requests>=2.21.0", "requests>=2.21.0",
"beautifulsoup4>=4.7.1", "beautifulsoup4>=4.7.1",
"colorama>=0.4.1"
], ],
) )