From 0926d337980bcb758a51469283cd7bb828f911b7 Mon Sep 17 00:00:00 2001 From: Joscha Date: Mon, 20 Apr 2020 18:06:21 +0000 Subject: [PATCH] Use downloader-specific data classes --- PFERD/downloaders.py | 49 ++++++++++++++++++++++++++------------- PFERD/ilias/downloader.py | 48 +++++++++++++++++++++++++++----------- 2 files changed, 67 insertions(+), 30 deletions(-) diff --git a/PFERD/downloaders.py b/PFERD/downloaders.py index 6c34004..0de6402 100644 --- a/PFERD/downloaders.py +++ b/PFERD/downloaders.py @@ -2,8 +2,9 @@ General downloaders useful in many situations """ +from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional import requests import requests.auth @@ -13,8 +14,18 @@ from .tmp_dir import TmpDir from .utils import stream_to_path -# pylint: disable=too-few-public-methods -class HttpDownloader(): +@dataclass +class HttpDownloadInfo: + """ + This class describes a single file to be downloaded. + """ + + path: Path + url: str + parameters: Dict[str, Any] = field(default_factory=dict) + + +class HttpDownloader: """A HTTP downloader that can handle HTTP basic auth.""" def __init__( @@ -39,20 +50,26 @@ class HttpDownloader(): ) return session - def download( - self, - url: str, - target_path: Path, - parameters: Optional[Dict[str, Any]] = None, - ) -> None: - """Download a given url to a given path, optionally with some get parameters.""" - parameters = parameters if parameters else {} - with self._session.get(url, params=parameters, stream=True) as response: + + def download_all(self, infos: List[HttpDownloadInfo]) -> None: + """ + Download multiple files one after the other. + """ + + for info in infos: + self.download(info) + + + def download(self, info: HttpDownloadInfo) -> None: + """ + Download a single file. + """ + + with self._session.get(info.url, params=info.parameters, stream=True) as response: if response.status_code == 200: tmp_file = self._tmp_dir.new_file() stream_to_path(response, tmp_file) - self._organizer.accept_file(tmp_file, target_path) + self._organizer.accept_file(tmp_file, info.path) else: - raise Exception( - f"Could not download file, got response {response.status_code}" - ) + # TODO use proper exception + raise Exception(f"Could not download file, got response {response.status_code}") diff --git a/PFERD/ilias/downloader.py b/PFERD/ilias/downloader.py index 6724ba9..42e62b5 100644 --- a/PFERD/ilias/downloader.py +++ b/PFERD/ilias/downloader.py @@ -1,7 +1,8 @@ """Contains a downloader for ILIAS.""" +from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Dict +from typing import Any, Dict, List import bs4 import requests @@ -16,8 +17,18 @@ class ContentTypeException(Exception): """Thrown when the content type of the ilias element can not be handled.""" -# pylint: disable=too-few-public-methods -class IliasDownloader(): +@dataclass +class IliasDownloadInfo: + """ + This class describes a single file to be downloaded. + """ + + path: Path + url: str + parameters: Dict[str, Any] = field(default_factory=dict) + + +class IliasDownloader: """A downloader for ILIAS.""" def __init__(self, tmp_dir: TmpDir, organizer: Organizer, authenticator: IliasAuthenticator): @@ -27,32 +38,41 @@ class IliasDownloader(): self._tmp_dir = tmp_dir self._organizer = organizer - def download(self, url: str, target_path: Path, params: Dict[str, Any]) -> None: - """Download a file from ILIAS. - - Retries authentication until eternity, if it could not fetch the file. + def download_all(self, infos: List[IliasDownloadInfo]) -> None: """ + Download multiple files one after the other. + """ + + for info in infos: + self.download(info) + + def download(self, info: IliasDownloadInfo) -> None: + """ + Download a file from ILIAS. + + Retries authentication until eternity if it could not fetch the file. + """ + tmp_file = self._tmp_dir.new_file() - while not self._try_download(url, tmp_file, params): + while not self._try_download(info, tmp_file): self._authenticator.authenticate(self._session) - self._organizer.accept_file(tmp_file, target_path) + self._organizer.accept_file(tmp_file, info.path) - def _try_download(self, url: str, target_path: Path, params: Dict[str, Any]) -> bool: - with self._session.get(url, params=params, stream=True) as response: + def _try_download(self, info: IliasDownloadInfo, target: Path) -> bool: + with self._session.get(info.url, params=info.parameters, stream=True) as response: content_type = response.headers["content-type"] if content_type.startswith("text/html"): # Dangit, we're probably not logged in. - soup = soupify(response) - if self._is_logged_in(soup): + if self._is_logged_in(soupify(response)): raise ContentTypeException("Attempting to download a web page, not a file") return False # Yay, we got the file :) - stream_to_path(response, target_path) + stream_to_path(response, target) return True @staticmethod