Compare commits

...

17 Commits

Author SHA1 Message Date
2eb834afc3 Bump version to 1.1.4 2019-06-11 12:46:40 +00:00
d468a45662 Allow wolfram files 2019-06-11 12:42:55 +00:00
50e25346e5 Bump version to 1.1.3 2019-06-07 11:36:41 +00:00
67da4e69fa Add colorful log output
Highlight the important operations (new, modified) in different colours.
2019-06-07 13:28:55 +02:00
da602366f8 Bump version to 1.1.2 2019-05-17 07:43:32 +00:00
2016f61bf8 Crawl more of the TI page 2019-05-09 11:04:24 +00:00
59c278da2c Bump version to 1.1.1 2019-05-06 12:07:12 +00:00
c72e92db18 Make Ti downloader authentication more robust 2019-05-06 12:04:01 +00:00
44b4204517 Add basic Ti downloader 2019-05-06 11:54:36 +00:00
11b9ff66e4 Bump version to 1.1.0 2019-04-26 09:48:17 +00:00
d730d0064c Conform to other files' __all__ 2019-04-26 09:45:24 +00:00
ae6cc40fb5 Rename ILIAS crawler to ilias
To be consistent with the other classes' capitalisation of acronyms
2019-04-26 04:29:12 +00:00
0891e7f1bc Fix logging messages not appearing 2019-04-26 03:58:11 +00:00
571c2a6c98 Clean up README structure 2019-04-25 20:04:50 +00:00
2d7a2b2d53 Add more usage examples 2019-04-25 20:01:50 +00:00
8550d1101c Fix backronym in README 2019-04-25 19:59:21 +00:00
fd71dc6f6e Use python3 everywhere in README 2019-04-25 19:57:04 +00:00
11 changed files with 187 additions and 23 deletions

View File

@ -3,6 +3,7 @@ import logging
from .ffm import *
from .ilias import *
from .norbert import *
from .ti import *
from .utils import *
__all__ = ["STYLE", "FORMAT", "DATE_FORMAT", "FORMATTER", "enable_logging"]
@ -10,6 +11,7 @@ __all__ = ["STYLE", "FORMAT", "DATE_FORMAT", "FORMATTER", "enable_logging"]
__all__ += ffm.__all__
__all__ += ilias.__all__
__all__ += norbert.__all__
__all__ += ti.__all__
__all__ += utils.__all__
STYLE = "{"

View File

@ -8,10 +8,11 @@ import bs4
import requests
from .organizer import Organizer
from .utils import stream_to_path
from .utils import stream_to_path, PrettyLogger
__all__ = ["FfM"]
logger = logging.getLogger(__name__)
pretty = PrettyLogger(logger)
class FfM:
BASE_URL = "http://www.math.kit.edu/"
@ -23,7 +24,7 @@ class FfM:
self._session = requests.Session()
def synchronize(self, urlpart, to_dir, transform=lambda x: x):
logging.info(f" Synchronizing {urlpart} to {to_dir} using the FfM synchronizer.")
pretty.starting_synchronizer(to_dir, "FfM", urlpart)
sync_path = pathlib.Path(self.base_path, to_dir)

View File

@ -4,15 +4,15 @@ import logging
import pathlib
import re
import bs4
from .ilias_authenticators import ShibbolethAuthenticator
from .organizer import Organizer
from .utils import PrettyLogger
__all__ = ["ILIAS"]
__all__ = ["Ilias"]
logger = logging.getLogger(__name__)
pretty = PrettyLogger(logger)
class ILIAS:
class Ilias:
FILE_RE = re.compile(r"goto\.php\?target=(file_\d+_download)")
DIR_RE = re.compile(r"ilias\.php\?ref_id=(\d+)")
@ -22,7 +22,7 @@ class ILIAS:
self._auth = ShibbolethAuthenticator(base_path / cookie_file)
def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
logging.info(f" Synchronizing ref_id {ref_id} to {to_dir} using the ILIAS synchronizer.")
pretty.starting_synchronizer(to_dir, "ILIAS", f"ref_id {ref_id}")
sync_path = pathlib.Path(self.base_path, to_dir)
orga = Organizer(self.base_path, sync_path)

View File

@ -27,6 +27,7 @@ class ShibbolethAuthenticator:
"application/pdf",
"application/zip",
"application/msword",
"application/vnd.wolfram.nb",
"text/xml",
"text/plain",
"image/jpeg",

View File

@ -9,12 +9,11 @@ import bs4
import requests
from .organizer import Organizer
from .utils import rename, stream_to_path
from .utils import rename, stream_to_path, PrettyLogger
__all__ = [
"Norbert",
]
__all__ = ["Norbert"]
logger = logging.getLogger(__name__)
pretty = PrettyLogger(logger)
class Norbert:
BASE_URL = "https://studwww.informatik.kit.edu/~s_blueml/"
@ -26,7 +25,7 @@ class Norbert:
self._session = requests.Session()
def synchronize(self, to_dir, transform=lambda x: x, unzip=lambda _: True):
logging.info(f" Synchronizing to {to_dir} using the Norbert synchronizer.")
pretty.starting_synchronizer(to_dir, "Norbert")
sync_path = pathlib.Path(self.base_path, to_dir)
orga = Organizer(self.base_path, sync_path)

View File

@ -7,6 +7,7 @@ from . import utils
__all__ = ["Organizer"]
logger = logging.getLogger(__name__)
pretty = utils.PrettyLogger(logger)
class Organizer:
def __init__(self, base_dir, sync_dir):
@ -64,7 +65,7 @@ class Organizer:
if to_path.exists():
if filecmp.cmp(from_path, to_path, shallow=False):
logger.info(f"Ignored {to_path}")
pretty.ignored_file(to_path)
# remember path for later reference
self._added_files.add(to_path.resolve())
@ -73,9 +74,9 @@ class Organizer:
# No further action needed, especially not overwriting symlinks...
return
else:
logger.info(f"Different file at {to_path}")
pretty.modified_file(to_path)
else:
logger.info(f"New file at {to_path}")
pretty.new_file(to_path)
# copy the file from from_path to sync_dir/to_path
# If the file being replaced was a symlink, the link itself is overwritten,

111
PFERD/ti.py Normal file
View File

@ -0,0 +1,111 @@
# Fakultät für Mathematik (FfM)
import getpass
import logging
import pathlib
import re
import bs4
import requests
from .organizer import Organizer
from .utils import stream_to_path, PrettyLogger
__all__ = ["Ti"]
logger = logging.getLogger(__name__)
pretty = PrettyLogger(logger)
class Ti:
BASE_URL = "http://ti.ira.uka.de/"
FILE_RE = re.compile(r"^.+\.pdf$")
def __init__(self, base_path):
self.base_path = base_path
self._session = requests.Session()
self._credentials = None
def synchronize(self, urlpart, to_dir, transform=lambda x: x,
filter=lambda x: True):
pretty.starting_synchronizer(to_dir, "Ti", urlpart)
sync_path = pathlib.Path(self.base_path, to_dir)
orga = Organizer(self.base_path, sync_path)
orga.clean_temp_dir()
self._reset_credentials()
available = self._find_available(urlpart)
for name, address in sorted(available.items()):
path = pathlib.PurePath(name)
if filter(path):
self._crawl(urlpart + address, path, orga, transform)
else:
loggwe.info(f"Skipping {name}/")
orga.clean_sync_dir()
orga.clean_temp_dir()
self._reset_credentials()
def _find_available(self, urlpart):
url = self.BASE_URL + urlpart
r = self._session.get(url)
soup = bs4.BeautifulSoup(r.text, "html.parser")
available = {}
if soup.find(href="./Vorlesung/Vorlesung.php"):
logger.info("Found Folien/")
available["Folien"] = "/Vorlesung/"
if soup.find(href="./Uebungen/Uebungen.php"):
logger.info("Found Blätter/")
available["Blätter"] = "/Uebungen/"
if soup.find(href="./Tutorien/Tutorien.php"):
logger.info("Found Tutorien/")
available["Tutorien"] = "/Tutorien/"
return available
def _crawl(self, urlpart, path, orga, transform):
url = self.BASE_URL + urlpart
r = self._session.get(url)
soup = bs4.BeautifulSoup(r.text, "html.parser")
for filelink in soup.find_all("a", href=self.FILE_RE):
filepath = path / filelink["href"]
fileurl = url + "/" + filelink["href"]
new_path = transform(filepath)
if new_path is None:
continue
logger.debug(f"Transformed from {filepath} to {new_path}")
temp_path = orga.temp_file()
self._download(fileurl, temp_path)
orga.add_file(temp_path, new_path)
def _get_credentials(self):
if self._credentials is None:
print("Please enter Ti credentials.")
username = getpass.getpass(prompt="Username: ")
password = getpass.getpass(prompt="Password: ")
self._credentials = (username, password)
return self._credentials
def _reset_credentials(self):
self._credentials = None
def _download(self, url, to_path):
while True:
username, password = self._get_credentials()
with self._session.get(url, stream=True, auth=(username, password)) as r:
if r.ok:
stream_to_path(r, to_path)
return
else:
print("Incorrect credentials.")
self._reset_credentials()

View File

@ -1,5 +1,8 @@
import os
import sys
import pathlib
from colorama import Style
from colorama import Fore
__all__ = [
"get_base_dir",
@ -8,6 +11,7 @@ __all__ = [
"stream_to_path",
"ContentTypeException",
"FileNotFoundException",
"PrettyLogger",
]
def get_base_dir(script_file):
@ -26,8 +30,35 @@ def stream_to_path(response, to_path, chunk_size=1024**2):
for chunk in response.iter_content(chunk_size=chunk_size):
fd.write(chunk)
def isOutputPipe():
"""Returns whether this program's output is attached to a pipe.
"""
return sys.stdout.isatty
class ContentTypeException(Exception):
pass
class FileNotFoundException(Exception):
pass
class PrettyLogger:
def __init__(self, logger):
self.logger = logger
def modified_file(self, file_name):
self.logger.info(f"{Fore.MAGENTA}{Style.BRIGHT}Modified {file_name}.{Style.RESET_ALL}")
def new_file(self, file_name):
self.logger.info(f"{Fore.GREEN}{Style.BRIGHT}Created {file_name}.{Style.RESET_ALL}")
def ignored_file(self, file_name):
self.logger.info(f"{Style.DIM}Ignored {file_name}.{Style.RESET_ALL}")
def starting_synchronizer(self, target_directory, synchronizer_name, subject=None):
subject_str = f"{subject} " if subject else ""
self.logger.info("")
self.logger.info((
f"{Fore.CYAN}{Style.BRIGHT}Synchronizing {subject_str}to {target_directory}"
f" using the {synchronizer_name} synchronizer.{Style.RESET_ALL}"
))

View File

@ -1,23 +1,40 @@
# PFERD (**P**rogramm **F**ür's **E**infache **R**unterladen von **D**ateien)
# PFERD
**P**rogramm zum **F**lotten, **E**infachen **R**unterladen von **D**ateien
## Installation
Ensure that you have at least Python 3.7 installed.
Ensure that you have at least Python 3.7 installed (3.6 might also work, didn't
test it though).
To install PFERD or update your installation to the latest version, run:
To install PFERD or update your installation to the latest version, run this
wherever you want to install/have installed PFERD:
```
$ pip install git+https://github.com/Garmelon/PFERD@v1.0.0
$ pip install git+https://github.com/Garmelon/PFERD@v1.1.4
```
The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
## Example setup
In this example, `python3` refers to at least Python 3.7.
A full example setup and initial use could look like:
```
$ mkdir Vorlesungen
$ cd Vorlesungen
$ python3 -m venv .
$ . bin/activate
$ pip install git+https://github.com/Garmelon/PFERD@v1.0.0
$ pip install git+https://github.com/Garmelon/PFERD@v1.1.4
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/master/example_config.py
$ python example_config.py
$ python3 example_config.py
$ deactivate
```
Subsequent runs of the program might look like:
```
$ cd Vorlesungen
$ . bin/activate
$ python3 example_config.py
$ deactivate
```

View File

@ -295,7 +295,7 @@ def main(args):
args = [arg.lower() for arg in args]
ffm = PFERD.FfM(base_dir)
ilias = PFERD.ILIAS(base_dir, "cookie_jar")
ilias = PFERD.Ilias(base_dir, "cookie_jar")
norbert = PFERD.Norbert(base_dir)
# Semester 1

View File

@ -2,11 +2,12 @@ from setuptools import setup
setup(
name="PFERD",
version="1.0.0",
version="1.1.4",
packages=["PFERD"],
install_requires=[
"requests>=2.21.0",
"beautifulsoup4>=4.7.1",
"colorama>=0.4.1"
],
)