2020-04-20 19:08:51 +02:00
|
|
|
"""
|
|
|
|
General downloaders useful in many situations
|
|
|
|
"""
|
2020-04-20 17:43:41 +02:00
|
|
|
|
2020-04-20 20:06:21 +02:00
|
|
|
from dataclasses import dataclass, field
|
2020-04-20 17:43:41 +02:00
|
|
|
from pathlib import Path
|
2020-04-20 20:06:21 +02:00
|
|
|
from typing import Any, Dict, List, Optional
|
2020-04-20 17:43:41 +02:00
|
|
|
|
|
|
|
import requests
|
|
|
|
import requests.auth
|
|
|
|
|
2020-04-20 18:50:23 +02:00
|
|
|
from .organizer import Organizer
|
2020-04-20 17:43:41 +02:00
|
|
|
from .tmp_dir import TmpDir
|
2020-04-22 20:25:09 +02:00
|
|
|
from .transform import Transformable
|
2020-04-20 17:43:41 +02:00
|
|
|
from .utils import stream_to_path
|
|
|
|
|
|
|
|
|
2020-04-20 20:06:21 +02:00
|
|
|
@dataclass
|
2020-04-22 20:25:09 +02:00
|
|
|
class HttpDownloadInfo(Transformable):
|
2020-04-20 20:06:21 +02:00
|
|
|
"""
|
|
|
|
This class describes a single file to be downloaded.
|
|
|
|
"""
|
|
|
|
|
|
|
|
url: str
|
|
|
|
parameters: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
|
|
|
|
|
|
class HttpDownloader:
|
2020-04-20 17:43:41 +02:00
|
|
|
"""A HTTP downloader that can handle HTTP basic auth."""
|
|
|
|
|
2020-04-20 19:08:51 +02:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
tmp_dir: TmpDir,
|
|
|
|
organizer: Organizer,
|
|
|
|
username: Optional[str],
|
|
|
|
password: Optional[str],
|
|
|
|
):
|
2020-04-20 17:43:41 +02:00
|
|
|
"""Create a new http downloader."""
|
|
|
|
self._organizer = organizer
|
|
|
|
self._tmp_dir = tmp_dir
|
|
|
|
self._username = username
|
|
|
|
self._password = password
|
|
|
|
self._session = self._build_session()
|
|
|
|
|
|
|
|
def _build_session(self) -> requests.Session:
|
|
|
|
session = requests.Session()
|
|
|
|
if self._username and self._password:
|
|
|
|
session.auth = requests.auth.HTTPBasicAuth(
|
|
|
|
self._username, self._password
|
|
|
|
)
|
|
|
|
return session
|
|
|
|
|
2020-04-20 20:06:21 +02:00
|
|
|
|
|
|
|
def download_all(self, infos: List[HttpDownloadInfo]) -> None:
|
|
|
|
"""
|
|
|
|
Download multiple files one after the other.
|
|
|
|
"""
|
|
|
|
|
|
|
|
for info in infos:
|
|
|
|
self.download(info)
|
|
|
|
|
|
|
|
|
|
|
|
def download(self, info: HttpDownloadInfo) -> None:
|
|
|
|
"""
|
|
|
|
Download a single file.
|
|
|
|
"""
|
|
|
|
|
|
|
|
with self._session.get(info.url, params=info.parameters, stream=True) as response:
|
2020-04-20 18:04:56 +02:00
|
|
|
if response.status_code == 200:
|
|
|
|
tmp_file = self._tmp_dir.new_file()
|
|
|
|
stream_to_path(response, tmp_file)
|
2020-04-20 20:06:21 +02:00
|
|
|
self._organizer.accept_file(tmp_file, info.path)
|
2020-04-20 18:04:56 +02:00
|
|
|
else:
|
2020-04-20 20:06:21 +02:00
|
|
|
# TODO use proper exception
|
|
|
|
raise Exception(f"Could not download file, got response {response.status_code}")
|