mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Add Pferd class
This commit is contained in:
parent
3c808879c9
commit
2de4255a78
@ -7,6 +7,8 @@ more complex configuration, you need to import the other submodules manually.
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from .pferd import Pferd
|
||||||
|
|
||||||
STYLE = "{"
|
STYLE = "{"
|
||||||
FORMAT = "[{levelname:<7}] {message}"
|
FORMAT = "[{levelname:<7}] {message}"
|
||||||
DATE_FORMAT = "%F %T"
|
DATE_FORMAT = "%F %T"
|
||||||
|
@ -3,7 +3,6 @@ General downloaders useful in many situations
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@ -67,7 +66,7 @@ class HttpDownloader:
|
|||||||
|
|
||||||
with self._session.get(info.url, params=info.parameters, stream=True) as response:
|
with self._session.get(info.url, params=info.parameters, stream=True) as response:
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
tmp_file = self._tmp_dir.new_file()
|
tmp_file = self._tmp_dir.new_path()
|
||||||
stream_to_path(response, tmp_file)
|
stream_to_path(response, tmp_file)
|
||||||
self._organizer.accept_file(tmp_file, info.path)
|
self._organizer.accept_file(tmp_file, info.path)
|
||||||
else:
|
else:
|
||||||
|
@ -2,5 +2,6 @@
|
|||||||
Synchronizing files from ILIAS instances (https://www.ilias.de/).
|
Synchronizing files from ILIAS instances (https://www.ilias.de/).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .authenticators import *
|
from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
|
||||||
from .downloader import *
|
from .crawler import IliasCrawler, IliasFilter
|
||||||
|
from .downloader import IliasDownloader
|
||||||
|
@ -15,6 +15,9 @@ from ..utils import soupify
|
|||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO save cookies whenever we know they're good
|
||||||
|
|
||||||
|
|
||||||
class IliasAuthenticator(abc.ABC):
|
class IliasAuthenticator(abc.ABC):
|
||||||
# pylint: disable=too-few-public-methods
|
# pylint: disable=too-few-public-methods
|
||||||
|
|
||||||
|
@ -7,11 +7,12 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
|
from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
|
||||||
urlunsplit)
|
urlunsplit)
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
|
import requests
|
||||||
|
|
||||||
from ..cookie_jar import CookieJar
|
from ..cookie_jar import CookieJar
|
||||||
from ..utils import soupify
|
from ..utils import soupify
|
||||||
@ -22,23 +23,36 @@ from .downloader import IliasDownloadInfo
|
|||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
IliasFilter = Callable[[Path], bool]
|
||||||
|
|
||||||
|
|
||||||
class IliasCrawler:
|
class IliasCrawler:
|
||||||
# pylint: disable=too-few-public-methods
|
# pylint: disable=too-few-public-methods
|
||||||
|
|
||||||
|
# TODO use the filter as appropriate
|
||||||
|
# TODO log the things that were discovered to the console on INFO
|
||||||
|
|
||||||
"""
|
"""
|
||||||
A crawler for ILIAS.
|
A crawler for ILIAS.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, authenticator: IliasAuthenticator, base_url: str, course_id: str):
|
def __init__(
|
||||||
|
self,
|
||||||
|
base_url: str,
|
||||||
|
course_id: str,
|
||||||
|
session: requests.Session,
|
||||||
|
authenticator: IliasAuthenticator,
|
||||||
|
filter_: IliasFilter
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Create a new ILIAS crawler.
|
Create a new ILIAS crawler.
|
||||||
"""
|
"""
|
||||||
self._cookie_jar = CookieJar(Path("/tmp/test/cookies"))
|
|
||||||
self._cookie_jar.load_cookies()
|
|
||||||
|
|
||||||
self._base_url = base_url
|
self._base_url = base_url
|
||||||
self._course_id = course_id
|
self._course_id = course_id
|
||||||
self._session = self._cookie_jar.create_session()
|
self._session = session
|
||||||
self._authenticator = authenticator
|
self._authenticator = authenticator
|
||||||
|
self._filter = filter_
|
||||||
|
|
||||||
def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
|
def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
|
||||||
"""
|
"""
|
||||||
@ -342,8 +356,6 @@ class IliasCrawler:
|
|||||||
|
|
||||||
self._authenticator.authenticate(self._session)
|
self._authenticator.authenticate(self._session)
|
||||||
|
|
||||||
self._cookie_jar.save_cookies("Authed")
|
|
||||||
|
|
||||||
return self._get_page(url, params)
|
return self._get_page(url, params)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -369,11 +381,3 @@ class IliasCrawler:
|
|||||||
LOGGER.debug("Auth: Found #playerContainer")
|
LOGGER.debug("Auth: Found #playerContainer")
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def run_as_test(ilias_url: str, course_id: int) -> List[IliasDownloadInfo]:
|
|
||||||
from ..organizer import Organizer
|
|
||||||
from .authenticators import KitShibbolethAuthenticator
|
|
||||||
|
|
||||||
crawler = IliasCrawler(KitShibbolethAuthenticator(), ilias_url, str(course_id))
|
|
||||||
return crawler.crawl()
|
|
||||||
|
@ -33,12 +33,21 @@ class IliasDownloadInfo(Transformable):
|
|||||||
class IliasDownloader:
|
class IliasDownloader:
|
||||||
"""A downloader for ILIAS."""
|
"""A downloader for ILIAS."""
|
||||||
|
|
||||||
def __init__(self, tmp_dir: TmpDir, organizer: Organizer, authenticator: IliasAuthenticator):
|
def __init__(
|
||||||
"""Create a new IliasDownloader."""
|
self,
|
||||||
self._authenticator = authenticator
|
tmp_dir: TmpDir,
|
||||||
self._session = requests.Session()
|
organizer: Organizer,
|
||||||
|
session: requests.Session,
|
||||||
|
authenticator: IliasAuthenticator,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Create a new IliasDownloader.
|
||||||
|
"""
|
||||||
|
|
||||||
self._tmp_dir = tmp_dir
|
self._tmp_dir = tmp_dir
|
||||||
self._organizer = organizer
|
self._organizer = organizer
|
||||||
|
self._session = session
|
||||||
|
self._authenticator = authenticator
|
||||||
|
|
||||||
def download_all(self, infos: List[IliasDownloadInfo]) -> None:
|
def download_all(self, infos: List[IliasDownloadInfo]) -> None:
|
||||||
"""
|
"""
|
||||||
@ -55,7 +64,7 @@ class IliasDownloader:
|
|||||||
Retries authentication until eternity if it could not fetch the file.
|
Retries authentication until eternity if it could not fetch the file.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tmp_file = self._tmp_dir.new_file()
|
tmp_file = self._tmp_dir.new_path()
|
||||||
|
|
||||||
while not self._try_download(info, tmp_file):
|
while not self._try_download(info, tmp_file):
|
||||||
self._authenticator.authenticate(self._session)
|
self._authenticator.authenticate(self._session)
|
||||||
|
65
PFERD/pferd.py
Normal file
65
PFERD/pferd.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from .cookie_jar import CookieJar
|
||||||
|
from .ilias import (IliasAuthenticator, IliasCrawler, IliasDownloader,
|
||||||
|
IliasFilter, KitShibbolethAuthenticator)
|
||||||
|
from .organizer import Organizer
|
||||||
|
from .tmp_dir import TmpDir
|
||||||
|
from .transform import Transform, apply_transform
|
||||||
|
from .utils import Location
|
||||||
|
|
||||||
|
|
||||||
|
class Pferd(Location):
|
||||||
|
# pylint: disable=too-many-arguments
|
||||||
|
|
||||||
|
def __init__(self, base_dir: Path, tmp_dir: Path = Path(".tmp")):
|
||||||
|
super().__init__(Path(base_dir))
|
||||||
|
|
||||||
|
self._tmp_dir = TmpDir(self.resolve(tmp_dir))
|
||||||
|
|
||||||
|
def _ilias(
|
||||||
|
self,
|
||||||
|
target: Path,
|
||||||
|
base_url: str,
|
||||||
|
course_id: str,
|
||||||
|
authenticator: IliasAuthenticator,
|
||||||
|
cookies: Optional[Path],
|
||||||
|
filter_: IliasFilter,
|
||||||
|
transform: Transform,
|
||||||
|
) -> None:
|
||||||
|
cookie_jar = CookieJar(cookies)
|
||||||
|
session = cookie_jar.create_session()
|
||||||
|
tmp_dir = self._tmp_dir.new_subdir()
|
||||||
|
organizer = Organizer(self.resolve(target))
|
||||||
|
|
||||||
|
crawler = IliasCrawler(base_url, course_id, session, authenticator, filter_)
|
||||||
|
downloader = IliasDownloader(tmp_dir, organizer, session, authenticator)
|
||||||
|
|
||||||
|
cookie_jar.load_cookies()
|
||||||
|
info = crawler.crawl()
|
||||||
|
cookie_jar.save_cookies()
|
||||||
|
downloader.download_all(apply_transform(transform, info))
|
||||||
|
cookie_jar.save_cookies()
|
||||||
|
|
||||||
|
def ilias_kit(
|
||||||
|
self,
|
||||||
|
target: Path,
|
||||||
|
course_id: str,
|
||||||
|
filter_: IliasFilter = lambda x: True,
|
||||||
|
transform: Transform = lambda x: x,
|
||||||
|
cookies: Optional[Path] = None,
|
||||||
|
username: Optional[str] = None,
|
||||||
|
password: Optional[str] = None,
|
||||||
|
) -> None:
|
||||||
|
# This authenticator only works with the KIT ilias instance.
|
||||||
|
authenticator = KitShibbolethAuthenticator(username=username, password=password)
|
||||||
|
self._ilias(
|
||||||
|
target=target,
|
||||||
|
base_url="https://ilias.studium.kit.edu/",
|
||||||
|
course_id=course_id,
|
||||||
|
authenticator=authenticator,
|
||||||
|
cookies=cookies,
|
||||||
|
filter_=filter_,
|
||||||
|
transform=transform,
|
||||||
|
)
|
@ -39,18 +39,24 @@ class TmpDir(Location):
|
|||||||
self.cleanup()
|
self.cleanup()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def new_file(self, prefix: Optional[str] = None) -> Path:
|
def new_path(self, prefix: Optional[str] = None) -> Path:
|
||||||
"""Return a unique path inside the folder, but don't create a file."""
|
"""
|
||||||
|
Return a unique path inside the directory. Doesn't create a file or
|
||||||
|
directory.
|
||||||
|
"""
|
||||||
|
|
||||||
name = f"{prefix if prefix else 'tmp'}-{self._inc_and_get_counter():03}"
|
name = f"{prefix if prefix else 'tmp'}-{self._inc_and_get_counter():03}"
|
||||||
|
|
||||||
LOGGER.debug("Creating temp file %s", name)
|
LOGGER.debug("Creating temp file %s", name)
|
||||||
|
|
||||||
return self.resolve(Path(name))
|
return self.resolve(Path(name))
|
||||||
|
|
||||||
def new_folder(self, prefix: Optional[str] = None) -> 'TmpDir':
|
def new_subdir(self, prefix: Optional[str] = None) -> 'TmpDir':
|
||||||
"""Create a new nested temporary folder and return its path."""
|
"""
|
||||||
name = f"{prefix if prefix else 'tmp'}-{self._inc_and_get_counter():03}"
|
Create a new nested temporary folder and return it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = f"{prefix if prefix else 'tmp'}-{self._inc_and_get_counter():03}"
|
||||||
sub_path = self.resolve(Path(name))
|
sub_path = self.resolve(Path(name))
|
||||||
sub_path.mkdir(parents=True)
|
sub_path.mkdir(parents=True)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user