Use downloader-specific data classes

This commit is contained in:
Joscha 2020-04-20 18:06:21 +00:00
parent 55ba2f4070
commit 0926d33798
2 changed files with 67 additions and 30 deletions

View File

@ -2,8 +2,9 @@
General downloaders useful in many situations General downloaders useful in many situations
""" """
from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Optional from typing import Any, Dict, List, Optional
import requests import requests
import requests.auth import requests.auth
@ -13,8 +14,18 @@ from .tmp_dir import TmpDir
from .utils import stream_to_path from .utils import stream_to_path
# pylint: disable=too-few-public-methods @dataclass
class HttpDownloader(): class HttpDownloadInfo:
"""
This class describes a single file to be downloaded.
"""
path: Path
url: str
parameters: Dict[str, Any] = field(default_factory=dict)
class HttpDownloader:
"""A HTTP downloader that can handle HTTP basic auth.""" """A HTTP downloader that can handle HTTP basic auth."""
def __init__( def __init__(
@ -39,20 +50,26 @@ class HttpDownloader():
) )
return session return session
def download(
self, def download_all(self, infos: List[HttpDownloadInfo]) -> None:
url: str, """
target_path: Path, Download multiple files one after the other.
parameters: Optional[Dict[str, Any]] = None, """
) -> None:
"""Download a given url to a given path, optionally with some get parameters.""" for info in infos:
parameters = parameters if parameters else {} self.download(info)
with self._session.get(url, params=parameters, stream=True) as response:
def download(self, info: HttpDownloadInfo) -> None:
"""
Download a single file.
"""
with self._session.get(info.url, params=info.parameters, stream=True) as response:
if response.status_code == 200: if response.status_code == 200:
tmp_file = self._tmp_dir.new_file() tmp_file = self._tmp_dir.new_file()
stream_to_path(response, tmp_file) stream_to_path(response, tmp_file)
self._organizer.accept_file(tmp_file, target_path) self._organizer.accept_file(tmp_file, info.path)
else: else:
raise Exception( # TODO use proper exception
f"Could not download file, got response {response.status_code}" raise Exception(f"Could not download file, got response {response.status_code}")
)

View File

@ -1,7 +1,8 @@
"""Contains a downloader for ILIAS.""" """Contains a downloader for ILIAS."""
from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Any, Dict from typing import Any, Dict, List
import bs4 import bs4
import requests import requests
@ -16,8 +17,18 @@ class ContentTypeException(Exception):
"""Thrown when the content type of the ilias element can not be handled.""" """Thrown when the content type of the ilias element can not be handled."""
# pylint: disable=too-few-public-methods @dataclass
class IliasDownloader(): class IliasDownloadInfo:
"""
This class describes a single file to be downloaded.
"""
path: Path
url: str
parameters: Dict[str, Any] = field(default_factory=dict)
class IliasDownloader:
"""A downloader for ILIAS.""" """A downloader for ILIAS."""
def __init__(self, tmp_dir: TmpDir, organizer: Organizer, authenticator: IliasAuthenticator): def __init__(self, tmp_dir: TmpDir, organizer: Organizer, authenticator: IliasAuthenticator):
@ -27,32 +38,41 @@ class IliasDownloader():
self._tmp_dir = tmp_dir self._tmp_dir = tmp_dir
self._organizer = organizer self._organizer = organizer
def download(self, url: str, target_path: Path, params: Dict[str, Any]) -> None: def download_all(self, infos: List[IliasDownloadInfo]) -> None:
"""Download a file from ILIAS.
Retries authentication until eternity, if it could not fetch the file.
""" """
Download multiple files one after the other.
"""
for info in infos:
self.download(info)
def download(self, info: IliasDownloadInfo) -> None:
"""
Download a file from ILIAS.
Retries authentication until eternity if it could not fetch the file.
"""
tmp_file = self._tmp_dir.new_file() tmp_file = self._tmp_dir.new_file()
while not self._try_download(url, tmp_file, params): while not self._try_download(info, tmp_file):
self._authenticator.authenticate(self._session) self._authenticator.authenticate(self._session)
self._organizer.accept_file(tmp_file, target_path) self._organizer.accept_file(tmp_file, info.path)
def _try_download(self, url: str, target_path: Path, params: Dict[str, Any]) -> bool: def _try_download(self, info: IliasDownloadInfo, target: Path) -> bool:
with self._session.get(url, params=params, stream=True) as response: with self._session.get(info.url, params=info.parameters, stream=True) as response:
content_type = response.headers["content-type"] content_type = response.headers["content-type"]
if content_type.startswith("text/html"): if content_type.startswith("text/html"):
# Dangit, we're probably not logged in. # Dangit, we're probably not logged in.
soup = soupify(response) if self._is_logged_in(soupify(response)):
if self._is_logged_in(soup):
raise ContentTypeException("Attempting to download a web page, not a file") raise ContentTypeException("Attempting to download a web page, not a file")
return False return False
# Yay, we got the file :) # Yay, we got the file :)
stream_to_path(response, target_path) stream_to_path(response, target)
return True return True
@staticmethod @staticmethod