Add Pferd class

This commit is contained in:
Joscha 2020-04-23 09:44:13 +00:00
parent 3c808879c9
commit 2de4255a78
8 changed files with 118 additions and 29 deletions

View File

@ -7,6 +7,8 @@ more complex configuration, you need to import the other submodules manually.
import logging import logging
from .pferd import Pferd
STYLE = "{" STYLE = "{"
FORMAT = "[{levelname:<7}] {message}" FORMAT = "[{levelname:<7}] {message}"
DATE_FORMAT = "%F %T" DATE_FORMAT = "%F %T"

View File

@ -3,7 +3,6 @@ General downloaders useful in many situations
""" """
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
import requests import requests
@ -67,7 +66,7 @@ class HttpDownloader:
with self._session.get(info.url, params=info.parameters, stream=True) as response: with self._session.get(info.url, params=info.parameters, stream=True) as response:
if response.status_code == 200: if response.status_code == 200:
tmp_file = self._tmp_dir.new_file() tmp_file = self._tmp_dir.new_path()
stream_to_path(response, tmp_file) stream_to_path(response, tmp_file)
self._organizer.accept_file(tmp_file, info.path) self._organizer.accept_file(tmp_file, info.path)
else: else:

View File

@ -2,5 +2,6 @@
Synchronizing files from ILIAS instances (https://www.ilias.de/). Synchronizing files from ILIAS instances (https://www.ilias.de/).
""" """
from .authenticators import * from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
from .downloader import * from .crawler import IliasCrawler, IliasFilter
from .downloader import IliasDownloader

View File

@ -15,6 +15,9 @@ from ..utils import soupify
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
# TODO save cookies whenever we know they're good
class IliasAuthenticator(abc.ABC): class IliasAuthenticator(abc.ABC):
# pylint: disable=too-few-public-methods # pylint: disable=too-few-public-methods

View File

@ -7,11 +7,12 @@ import json
import logging import logging
import re import re
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional from typing import Any, Callable, Dict, List, Optional
from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit, from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
urlunsplit) urlunsplit)
import bs4 import bs4
import requests
from ..cookie_jar import CookieJar from ..cookie_jar import CookieJar
from ..utils import soupify from ..utils import soupify
@ -22,23 +23,36 @@ from .downloader import IliasDownloadInfo
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
IliasFilter = Callable[[Path], bool]
class IliasCrawler: class IliasCrawler:
# pylint: disable=too-few-public-methods # pylint: disable=too-few-public-methods
# TODO use the filter as appropriate
# TODO log the things that were discovered to the console on INFO
""" """
A crawler for ILIAS. A crawler for ILIAS.
""" """
def __init__(self, authenticator: IliasAuthenticator, base_url: str, course_id: str): def __init__(
self,
base_url: str,
course_id: str,
session: requests.Session,
authenticator: IliasAuthenticator,
filter_: IliasFilter
):
""" """
Create a new ILIAS crawler. Create a new ILIAS crawler.
""" """
self._cookie_jar = CookieJar(Path("/tmp/test/cookies"))
self._cookie_jar.load_cookies()
self._base_url = base_url self._base_url = base_url
self._course_id = course_id self._course_id = course_id
self._session = self._cookie_jar.create_session() self._session = session
self._authenticator = authenticator self._authenticator = authenticator
self._filter = filter_
def _abs_url_from_link(self, link_tag: bs4.Tag) -> str: def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
""" """
@ -342,8 +356,6 @@ class IliasCrawler:
self._authenticator.authenticate(self._session) self._authenticator.authenticate(self._session)
self._cookie_jar.save_cookies("Authed")
return self._get_page(url, params) return self._get_page(url, params)
@staticmethod @staticmethod
@ -369,11 +381,3 @@ class IliasCrawler:
LOGGER.debug("Auth: Found #playerContainer") LOGGER.debug("Auth: Found #playerContainer")
return True return True
return False return False
def run_as_test(ilias_url: str, course_id: int) -> List[IliasDownloadInfo]:
from ..organizer import Organizer
from .authenticators import KitShibbolethAuthenticator
crawler = IliasCrawler(KitShibbolethAuthenticator(), ilias_url, str(course_id))
return crawler.crawl()

View File

@ -33,12 +33,21 @@ class IliasDownloadInfo(Transformable):
class IliasDownloader: class IliasDownloader:
"""A downloader for ILIAS.""" """A downloader for ILIAS."""
def __init__(self, tmp_dir: TmpDir, organizer: Organizer, authenticator: IliasAuthenticator): def __init__(
"""Create a new IliasDownloader.""" self,
self._authenticator = authenticator tmp_dir: TmpDir,
self._session = requests.Session() organizer: Organizer,
session: requests.Session,
authenticator: IliasAuthenticator,
):
"""
Create a new IliasDownloader.
"""
self._tmp_dir = tmp_dir self._tmp_dir = tmp_dir
self._organizer = organizer self._organizer = organizer
self._session = session
self._authenticator = authenticator
def download_all(self, infos: List[IliasDownloadInfo]) -> None: def download_all(self, infos: List[IliasDownloadInfo]) -> None:
""" """
@ -55,7 +64,7 @@ class IliasDownloader:
Retries authentication until eternity if it could not fetch the file. Retries authentication until eternity if it could not fetch the file.
""" """
tmp_file = self._tmp_dir.new_file() tmp_file = self._tmp_dir.new_path()
while not self._try_download(info, tmp_file): while not self._try_download(info, tmp_file):
self._authenticator.authenticate(self._session) self._authenticator.authenticate(self._session)

65
PFERD/pferd.py Normal file
View File

@ -0,0 +1,65 @@
from pathlib import Path
from typing import Optional
from .cookie_jar import CookieJar
from .ilias import (IliasAuthenticator, IliasCrawler, IliasDownloader,
IliasFilter, KitShibbolethAuthenticator)
from .organizer import Organizer
from .tmp_dir import TmpDir
from .transform import Transform, apply_transform
from .utils import Location
class Pferd(Location):
# pylint: disable=too-many-arguments
def __init__(self, base_dir: Path, tmp_dir: Path = Path(".tmp")):
super().__init__(Path(base_dir))
self._tmp_dir = TmpDir(self.resolve(tmp_dir))
def _ilias(
self,
target: Path,
base_url: str,
course_id: str,
authenticator: IliasAuthenticator,
cookies: Optional[Path],
filter_: IliasFilter,
transform: Transform,
) -> None:
cookie_jar = CookieJar(cookies)
session = cookie_jar.create_session()
tmp_dir = self._tmp_dir.new_subdir()
organizer = Organizer(self.resolve(target))
crawler = IliasCrawler(base_url, course_id, session, authenticator, filter_)
downloader = IliasDownloader(tmp_dir, organizer, session, authenticator)
cookie_jar.load_cookies()
info = crawler.crawl()
cookie_jar.save_cookies()
downloader.download_all(apply_transform(transform, info))
cookie_jar.save_cookies()
def ilias_kit(
self,
target: Path,
course_id: str,
filter_: IliasFilter = lambda x: True,
transform: Transform = lambda x: x,
cookies: Optional[Path] = None,
username: Optional[str] = None,
password: Optional[str] = None,
) -> None:
# This authenticator only works with the KIT ilias instance.
authenticator = KitShibbolethAuthenticator(username=username, password=password)
self._ilias(
target=target,
base_url="https://ilias.studium.kit.edu/",
course_id=course_id,
authenticator=authenticator,
cookies=cookies,
filter_=filter_,
transform=transform,
)

View File

@ -39,18 +39,24 @@ class TmpDir(Location):
self.cleanup() self.cleanup()
return None return None
def new_file(self, prefix: Optional[str] = None) -> Path: def new_path(self, prefix: Optional[str] = None) -> Path:
"""Return a unique path inside the folder, but don't create a file.""" """
Return a unique path inside the directory. Doesn't create a file or
directory.
"""
name = f"{prefix if prefix else 'tmp'}-{self._inc_and_get_counter():03}" name = f"{prefix if prefix else 'tmp'}-{self._inc_and_get_counter():03}"
LOGGER.debug("Creating temp file %s", name) LOGGER.debug("Creating temp file %s", name)
return self.resolve(Path(name)) return self.resolve(Path(name))
def new_folder(self, prefix: Optional[str] = None) -> 'TmpDir': def new_subdir(self, prefix: Optional[str] = None) -> 'TmpDir':
"""Create a new nested temporary folder and return its path.""" """
name = f"{prefix if prefix else 'tmp'}-{self._inc_and_get_counter():03}" Create a new nested temporary folder and return it.
"""
name = f"{prefix if prefix else 'tmp'}-{self._inc_and_get_counter():03}"
sub_path = self.resolve(Path(name)) sub_path = self.resolve(Path(name))
sub_path.mkdir(parents=True) sub_path.mkdir(parents=True)