Use downloader-specific data classes

This commit is contained in:
Joscha 2020-04-20 18:06:21 +00:00
parent 55ba2f4070
commit 0926d33798
2 changed files with 67 additions and 30 deletions

View File

@ -2,8 +2,9 @@
General downloaders useful in many situations
"""
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional
import requests
import requests.auth
@ -13,8 +14,18 @@ from .tmp_dir import TmpDir
from .utils import stream_to_path
# pylint: disable=too-few-public-methods
class HttpDownloader():
@dataclass
class HttpDownloadInfo:
"""
This class describes a single file to be downloaded.
"""
path: Path
url: str
parameters: Dict[str, Any] = field(default_factory=dict)
class HttpDownloader:
"""A HTTP downloader that can handle HTTP basic auth."""
def __init__(
@ -39,20 +50,26 @@ class HttpDownloader():
)
return session
def download(
self,
url: str,
target_path: Path,
parameters: Optional[Dict[str, Any]] = None,
) -> None:
"""Download a given url to a given path, optionally with some get parameters."""
parameters = parameters if parameters else {}
with self._session.get(url, params=parameters, stream=True) as response:
def download_all(self, infos: List[HttpDownloadInfo]) -> None:
"""
Download multiple files one after the other.
"""
for info in infos:
self.download(info)
def download(self, info: HttpDownloadInfo) -> None:
"""
Download a single file.
"""
with self._session.get(info.url, params=info.parameters, stream=True) as response:
if response.status_code == 200:
tmp_file = self._tmp_dir.new_file()
stream_to_path(response, tmp_file)
self._organizer.accept_file(tmp_file, target_path)
self._organizer.accept_file(tmp_file, info.path)
else:
raise Exception(
f"Could not download file, got response {response.status_code}"
)
# TODO use proper exception
raise Exception(f"Could not download file, got response {response.status_code}")

View File

@ -1,7 +1,8 @@
"""Contains a downloader for ILIAS."""
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict
from typing import Any, Dict, List
import bs4
import requests
@ -16,8 +17,18 @@ class ContentTypeException(Exception):
"""Thrown when the content type of the ilias element can not be handled."""
# pylint: disable=too-few-public-methods
class IliasDownloader():
@dataclass
class IliasDownloadInfo:
"""
This class describes a single file to be downloaded.
"""
path: Path
url: str
parameters: Dict[str, Any] = field(default_factory=dict)
class IliasDownloader:
"""A downloader for ILIAS."""
def __init__(self, tmp_dir: TmpDir, organizer: Organizer, authenticator: IliasAuthenticator):
@ -27,32 +38,41 @@ class IliasDownloader():
self._tmp_dir = tmp_dir
self._organizer = organizer
def download(self, url: str, target_path: Path, params: Dict[str, Any]) -> None:
"""Download a file from ILIAS.
Retries authentication until eternity, if it could not fetch the file.
def download_all(self, infos: List[IliasDownloadInfo]) -> None:
"""
Download multiple files one after the other.
"""
for info in infos:
self.download(info)
def download(self, info: IliasDownloadInfo) -> None:
"""
Download a file from ILIAS.
Retries authentication until eternity if it could not fetch the file.
"""
tmp_file = self._tmp_dir.new_file()
while not self._try_download(url, tmp_file, params):
while not self._try_download(info, tmp_file):
self._authenticator.authenticate(self._session)
self._organizer.accept_file(tmp_file, target_path)
self._organizer.accept_file(tmp_file, info.path)
def _try_download(self, url: str, target_path: Path, params: Dict[str, Any]) -> bool:
with self._session.get(url, params=params, stream=True) as response:
def _try_download(self, info: IliasDownloadInfo, target: Path) -> bool:
with self._session.get(info.url, params=info.parameters, stream=True) as response:
content_type = response.headers["content-type"]
if content_type.startswith("text/html"):
# Dangit, we're probably not logged in.
soup = soupify(response)
if self._is_logged_in(soup):
if self._is_logged_in(soupify(response)):
raise ContentTypeException("Attempting to download a web page, not a file")
return False
# Yay, we got the file :)
stream_to_path(response, target_path)
stream_to_path(response, target)
return True
@staticmethod