Compare commits

...

13 Commits

Author SHA1 Message Date
da602366f8 Bump version to 1.1.2 2019-05-17 07:43:32 +00:00
2016f61bf8 Crawl more of the TI page 2019-05-09 11:04:24 +00:00
59c278da2c Bump version to 1.1.1 2019-05-06 12:07:12 +00:00
c72e92db18 Make Ti downloader authentication more robust 2019-05-06 12:04:01 +00:00
44b4204517 Add basic Ti downloader 2019-05-06 11:54:36 +00:00
11b9ff66e4 Bump version to 1.1.0 2019-04-26 09:48:17 +00:00
d730d0064c Conform to other files' __all__ 2019-04-26 09:45:24 +00:00
ae6cc40fb5 Rename ILIAS crawler to ilias
To be consistent with the other classes' capitalisation of acronyms
2019-04-26 04:29:12 +00:00
0891e7f1bc Fix logging messages not appearing 2019-04-26 03:58:11 +00:00
571c2a6c98 Clean up README structure 2019-04-25 20:04:50 +00:00
2d7a2b2d53 Add more usage examples 2019-04-25 20:01:50 +00:00
8550d1101c Fix backronym in README 2019-04-25 19:59:21 +00:00
fd71dc6f6e Use python3 everywhere in README 2019-04-25 19:57:04 +00:00
8 changed files with 143 additions and 16 deletions

View File

@ -3,6 +3,7 @@ import logging
from .ffm import *
from .ilias import *
from .norbert import *
from .ti import *
from .utils import *
__all__ = ["STYLE", "FORMAT", "DATE_FORMAT", "FORMATTER", "enable_logging"]
@ -10,6 +11,7 @@ __all__ = ["STYLE", "FORMAT", "DATE_FORMAT", "FORMATTER", "enable_logging"]
__all__ += ffm.__all__
__all__ += ilias.__all__
__all__ += norbert.__all__
__all__ += ti.__all__
__all__ += utils.__all__
STYLE = "{"

View File

@ -23,7 +23,7 @@ class FfM:
self._session = requests.Session()
def synchronize(self, urlpart, to_dir, transform=lambda x: x):
logging.info(f" Synchronizing {urlpart} to {to_dir} using the FfM synchronizer.")
logger.info(f" Synchronizing {urlpart} to {to_dir} using the FfM synchronizer.")
sync_path = pathlib.Path(self.base_path, to_dir)

View File

@ -9,10 +9,10 @@ import bs4
from .ilias_authenticators import ShibbolethAuthenticator
from .organizer import Organizer
__all__ = ["ILIAS"]
__all__ = ["Ilias"]
logger = logging.getLogger(__name__)
class ILIAS:
class Ilias:
FILE_RE = re.compile(r"goto\.php\?target=(file_\d+_download)")
DIR_RE = re.compile(r"ilias\.php\?ref_id=(\d+)")
@ -22,7 +22,7 @@ class ILIAS:
self._auth = ShibbolethAuthenticator(base_path / cookie_file)
def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
logging.info(f" Synchronizing ref_id {ref_id} to {to_dir} using the ILIAS synchronizer.")
logger.info(f" Synchronizing ref_id {ref_id} to {to_dir} using the Ilias synchronizer.")
sync_path = pathlib.Path(self.base_path, to_dir)
orga = Organizer(self.base_path, sync_path)

View File

@ -11,9 +11,7 @@ import requests
from .organizer import Organizer
from .utils import rename, stream_to_path
__all__ = [
"Norbert",
]
__all__ = ["Norbert"]
logger = logging.getLogger(__name__)
class Norbert:
@ -26,7 +24,7 @@ class Norbert:
self._session = requests.Session()
def synchronize(self, to_dir, transform=lambda x: x, unzip=lambda _: True):
logging.info(f" Synchronizing to {to_dir} using the Norbert synchronizer.")
logger.info(f" Synchronizing to {to_dir} using the Norbert synchronizer.")
sync_path = pathlib.Path(self.base_path, to_dir)
orga = Organizer(self.base_path, sync_path)

110
PFERD/ti.py Normal file
View File

@ -0,0 +1,110 @@
# Fakultät für Mathematik (FfM)
import getpass
import logging
import pathlib
import re
import bs4
import requests
from .organizer import Organizer
from .utils import stream_to_path
__all__ = ["Ti"]
logger = logging.getLogger(__name__)
class Ti:
BASE_URL = "http://ti.ira.uka.de/"
FILE_RE = re.compile(r"^.+\.pdf$")
def __init__(self, base_path):
self.base_path = base_path
self._session = requests.Session()
self._credentials = None
def synchronize(self, urlpart, to_dir, transform=lambda x: x,
filter=lambda x: True):
logger.info(f" Synchronizing {urlpart} to {to_dir} using the Ti synchronizer.")
sync_path = pathlib.Path(self.base_path, to_dir)
orga = Organizer(self.base_path, sync_path)
orga.clean_temp_dir()
self._reset_credentials()
available = self._find_available(urlpart)
for name, address in sorted(available.items()):
path = pathlib.PurePath(name)
if filter(path):
self._crawl(urlpart + address, path, orga, transform)
else:
loggwe.info(f"Skipping {name}/")
orga.clean_sync_dir()
orga.clean_temp_dir()
self._reset_credentials()
def _find_available(self, urlpart):
url = self.BASE_URL + urlpart
r = self._session.get(url)
soup = bs4.BeautifulSoup(r.text, "html.parser")
available = {}
if soup.find(href="./Vorlesung/Vorlesung.php"):
logger.info("Found Folien/")
available["Folien"] = "/Vorlesung/"
if soup.find(href="./Uebungen/Uebungen.php"):
logger.info("Found Blätter/")
available["Blätter"] = "/Uebungen/"
if soup.find(href="./Tutorien/Tutorien.php"):
logger.info("Found Tutorien/")
available["Tutorien"] = "/Tutorien/"
return available
def _crawl(self, urlpart, path, orga, transform):
url = self.BASE_URL + urlpart
r = self._session.get(url)
soup = bs4.BeautifulSoup(r.text, "html.parser")
for filelink in soup.find_all("a", href=self.FILE_RE):
filepath = path / filelink["href"]
fileurl = url + "/" + filelink["href"]
new_path = transform(filepath)
if new_path is None:
continue
logger.debug(f"Transformed from {filepath} to {new_path}")
temp_path = orga.temp_file()
self._download(fileurl, temp_path)
orga.add_file(temp_path, new_path)
def _get_credentials(self):
if self._credentials is None:
print("Please enter Ti credentials.")
username = getpass.getpass(prompt="Username: ")
password = getpass.getpass(prompt="Password: ")
self._credentials = (username, password)
return self._credentials
def _reset_credentials(self):
self._credentials = None
def _download(self, url, to_path):
while True:
username, password = self._get_credentials()
with self._session.get(url, stream=True, auth=(username, password)) as r:
if r.ok:
stream_to_path(r, to_path)
return
else:
print("Incorrect credentials.")
self._reset_credentials()

View File

@ -1,23 +1,40 @@
# PFERD (**P**rogramm **F**ür's **E**infache **R**unterladen von **D**ateien)
# PFERD
**P**rogramm zum **F**lotten, **E**infachen **R**unterladen von **D**ateien
## Installation
Ensure that you have at least Python 3.7 installed.
Ensure that you have at least Python 3.7 installed (3.6 might also work, didn't
test it though).
To install PFERD or update your installation to the latest version, run:
To install PFERD or update your installation to the latest version, run this
wherever you want to install/have installed PFERD:
```
$ pip install git+https://github.com/Garmelon/PFERD@v1.0.0
$ pip install git+https://github.com/Garmelon/PFERD@v1.1.2
```
The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
## Example setup
In this example, `python3` refers to at least Python 3.7.
A full example setup and initial use could look like:
```
$ mkdir Vorlesungen
$ cd Vorlesungen
$ python3 -m venv .
$ . bin/activate
$ pip install git+https://github.com/Garmelon/PFERD@v1.0.0
$ pip install git+https://github.com/Garmelon/PFERD@v1.1.2
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/master/example_config.py
$ python example_config.py
$ python3 example_config.py
$ deactivate
```
Subsequent runs of the program might look like:
```
$ cd Vorlesungen
$ . bin/activate
$ python3 example_config.py
$ deactivate
```

View File

@ -295,7 +295,7 @@ def main(args):
args = [arg.lower() for arg in args]
ffm = PFERD.FfM(base_dir)
ilias = PFERD.ILIAS(base_dir, "cookie_jar")
ilias = PFERD.Ilias(base_dir, "cookie_jar")
norbert = PFERD.Norbert(base_dir)
# Semester 1

View File

@ -2,7 +2,7 @@ from setuptools import setup
setup(
name="PFERD",
version="1.0.0",
version="1.1.2",
packages=["PFERD"],
install_requires=[
"requests>=2.21.0",