mirror of
https://github.com/Garmelon/PFERD.git
synced 2025-09-09 14:12:26 +02:00
transition from requests to httpx
This commit is contained in:
2
.github/workflows/package.yml
vendored
2
.github/workflows/package.yml
vendored
@@ -23,7 +23,7 @@ jobs:
|
|||||||
python-version: '3.x'
|
python-version: '3.x'
|
||||||
|
|
||||||
- name: "Install dependencies"
|
- name: "Install dependencies"
|
||||||
run: "pip install setuptools keyring pyinstaller rich requests beautifulsoup4 -f --upgrade"
|
run: "pip install setuptools keyring pyinstaller rich httpx beautifulsoup4 -f --upgrade"
|
||||||
|
|
||||||
- name: "Install sync_url.py"
|
- name: "Install sync_url.py"
|
||||||
run: "pyinstaller sync_url.py -F"
|
run: "pyinstaller sync_url.py -F"
|
||||||
|
@@ -1,11 +1,11 @@
|
|||||||
"""A helper for requests cookies."""
|
"""A helper for httpx cookies."""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from http.cookiejar import LoadError, LWPCookieJar
|
from http.cookiejar import LoadError, LWPCookieJar
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import requests
|
import httpx
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -26,7 +26,7 @@ class CookieJar:
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def cookies(self) -> LWPCookieJar:
|
def cookies(self) -> LWPCookieJar:
|
||||||
"""Return the requests cookie jar."""
|
"""Return the httpx cookie jar."""
|
||||||
return self._cookies
|
return self._cookies
|
||||||
|
|
||||||
def load_cookies(self) -> None:
|
def load_cookies(self) -> None:
|
||||||
@@ -57,13 +57,11 @@ class CookieJar:
|
|||||||
# TODO possibly catch a few more exceptions
|
# TODO possibly catch a few more exceptions
|
||||||
self._cookies.save(ignore_discard=True)
|
self._cookies.save(ignore_discard=True)
|
||||||
|
|
||||||
def create_session(self) -> requests.Session:
|
def create_client(self) -> httpx.Client:
|
||||||
"""Create a new session using the cookie jar."""
|
"""Create a new client using the cookie jar."""
|
||||||
sess = requests.Session()
|
# TODO: timeout=None was the default behaviour of requests. An approprite value should probably be set
|
||||||
|
client = httpx.Client(timeout=None)
|
||||||
|
|
||||||
# From the request docs: "All requests code should work out of the box
|
client.cookies = self.cookies # type: ignore
|
||||||
# with externally provided instances of CookieJar, e.g. LWPCookieJar
|
|
||||||
# and FileCookieJar."
|
|
||||||
sess.cookies = self.cookies # type: ignore
|
|
||||||
|
|
||||||
return sess
|
return client
|
||||||
|
@@ -7,7 +7,7 @@ from dataclasses import dataclass
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Callable, List, Optional
|
from typing import Any, Callable, List, Optional
|
||||||
|
|
||||||
import requests
|
import httpx
|
||||||
|
|
||||||
from .errors import FatalException
|
from .errors import FatalException
|
||||||
from .logging import PrettyLogger
|
from .logging import PrettyLogger
|
||||||
@@ -69,7 +69,7 @@ class DivaPlaylistCrawler:
|
|||||||
)
|
)
|
||||||
base_name = match.group(1)
|
base_name = match.group(1)
|
||||||
|
|
||||||
response = requests.get(cls._PLAYLIST_BASE_URL + base_name + ".json")
|
response = httpx.get(cls._PLAYLIST_BASE_URL + base_name + ".json")
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise FatalException(
|
raise FatalException(
|
||||||
@@ -88,7 +88,7 @@ class DivaPlaylistCrawler:
|
|||||||
"""
|
"""
|
||||||
Crawls the playlist given in the constructor.
|
Crawls the playlist given in the constructor.
|
||||||
"""
|
"""
|
||||||
response = requests.get(self._COLLECTION_BASE_URL, params={"collection": self._id})
|
response = httpx.get(self._COLLECTION_BASE_URL, params={"collection": self._id})
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise FatalException(f"Server returned status {response.status_code}.")
|
raise FatalException(f"Server returned status {response.status_code}.")
|
||||||
|
|
||||||
@@ -143,7 +143,7 @@ class DivaDownloader:
|
|||||||
self._tmp_dir = tmp_dir
|
self._tmp_dir = tmp_dir
|
||||||
self._organizer = organizer
|
self._organizer = organizer
|
||||||
self._strategy = strategy
|
self._strategy = strategy
|
||||||
self._session = requests.session()
|
self._client = httpx.Client()
|
||||||
|
|
||||||
def download_all(self, infos: List[DivaDownloadInfo]) -> None:
|
def download_all(self, infos: List[DivaDownloadInfo]) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -160,7 +160,7 @@ class DivaDownloader:
|
|||||||
self._organizer.mark(info.path)
|
self._organizer.mark(info.path)
|
||||||
return
|
return
|
||||||
|
|
||||||
with self._session.get(info.url, stream=True) as response:
|
with self._client.stream("GET", info.url) as response:
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
tmp_file = self._tmp_dir.new_path()
|
tmp_file = self._tmp_dir.new_path()
|
||||||
stream_to_path(response, tmp_file, info.path.name)
|
stream_to_path(response, tmp_file, info.path.name)
|
||||||
|
@@ -5,8 +5,8 @@ General downloaders useful in many situations
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
import requests
|
import httpx
|
||||||
import requests.auth
|
import httpx.auth
|
||||||
|
|
||||||
from .organizer import Organizer
|
from .organizer import Organizer
|
||||||
from .tmp_dir import TmpDir
|
from .tmp_dir import TmpDir
|
||||||
@@ -39,15 +39,15 @@ class HttpDownloader:
|
|||||||
self._tmp_dir = tmp_dir
|
self._tmp_dir = tmp_dir
|
||||||
self._username = username
|
self._username = username
|
||||||
self._password = password
|
self._password = password
|
||||||
self._session = self._build_session()
|
self._client = self._build_client()
|
||||||
|
|
||||||
def _build_session(self) -> requests.Session:
|
def _build_client(self) -> httpx.Client:
|
||||||
session = requests.Session()
|
client = httpx.Client()
|
||||||
if self._username and self._password:
|
if self._username and self._password:
|
||||||
session.auth = requests.auth.HTTPBasicAuth(
|
client.auth = httpx.auth.HTTPBasicAuth(
|
||||||
self._username, self._password
|
self._username, self._password
|
||||||
)
|
)
|
||||||
return session
|
return client
|
||||||
|
|
||||||
def download_all(self, infos: List[HttpDownloadInfo]) -> None:
|
def download_all(self, infos: List[HttpDownloadInfo]) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -62,7 +62,7 @@ class HttpDownloader:
|
|||||||
Download a single file.
|
Download a single file.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
with self._session.get(info.url, params=info.parameters, stream=True) as response:
|
with self._client.stream("GET", info.url, params=info.parameters) as response:
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
tmp_file = self._tmp_dir.new_path()
|
tmp_file = self._tmp_dir.new_path()
|
||||||
stream_to_path(response, tmp_file, info.path.name)
|
stream_to_path(response, tmp_file, info.path.name)
|
||||||
|
@@ -7,7 +7,7 @@ import logging
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
import requests
|
import httpx
|
||||||
|
|
||||||
from ..authenticators import TfaAuthenticator, UserPassAuthenticator
|
from ..authenticators import TfaAuthenticator, UserPassAuthenticator
|
||||||
from ..utils import soupify
|
from ..utils import soupify
|
||||||
@@ -19,14 +19,14 @@ class IliasAuthenticator(abc.ABC):
|
|||||||
# pylint: disable=too-few-public-methods
|
# pylint: disable=too-few-public-methods
|
||||||
|
|
||||||
"""
|
"""
|
||||||
An authenticator that logs an existing requests session into an ILIAS
|
An authenticator that logs an existing httpx client into an ILIAS
|
||||||
account.
|
account.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def authenticate(self, sess: requests.Session) -> None:
|
def authenticate(self, client: httpx.Client) -> None:
|
||||||
"""
|
"""
|
||||||
Log a requests session into this authenticator's ILIAS account.
|
Log a httpx client into this authenticator's ILIAS account.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@@ -45,7 +45,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
|
|||||||
|
|
||||||
self._tfa_auth = TfaAuthenticator("KIT ILIAS Shibboleth")
|
self._tfa_auth = TfaAuthenticator("KIT ILIAS Shibboleth")
|
||||||
|
|
||||||
def authenticate(self, sess: requests.Session) -> None:
|
def authenticate(self, sess: httpx.Client) -> None:
|
||||||
"""
|
"""
|
||||||
Performs the ILIAS Shibboleth authentication dance and saves the login
|
Performs the ILIAS Shibboleth authentication dance and saves the login
|
||||||
cookies it receieves.
|
cookies it receieves.
|
||||||
@@ -109,7 +109,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
|
|||||||
|
|
||||||
def _authenticate_tfa(
|
def _authenticate_tfa(
|
||||||
self,
|
self,
|
||||||
session: requests.Session,
|
client: httpx.Client,
|
||||||
soup: bs4.BeautifulSoup
|
soup: bs4.BeautifulSoup
|
||||||
) -> bs4.BeautifulSoup:
|
) -> bs4.BeautifulSoup:
|
||||||
# Searching the form here so that this fails before asking for
|
# Searching the form here so that this fails before asking for
|
||||||
@@ -125,7 +125,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
|
|||||||
"_eventId_proceed": "",
|
"_eventId_proceed": "",
|
||||||
"j_tokenNumber": self._tfa_auth.get_token()
|
"j_tokenNumber": self._tfa_auth.get_token()
|
||||||
}
|
}
|
||||||
return soupify(session.post(url, data=data))
|
return soupify(client.post(url, data=data))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _login_successful(soup: bs4.BeautifulSoup) -> bool:
|
def _login_successful(soup: bs4.BeautifulSoup) -> bool:
|
||||||
|
@@ -13,7 +13,7 @@ from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
|
|||||||
urlunsplit)
|
urlunsplit)
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
import requests
|
import httpx
|
||||||
|
|
||||||
from ..errors import FatalException, retry_on_io_exception
|
from ..errors import FatalException, retry_on_io_exception
|
||||||
from ..logging import PrettyLogger
|
from ..logging import PrettyLogger
|
||||||
@@ -96,7 +96,7 @@ class IliasCrawler:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
base_url: str,
|
base_url: str,
|
||||||
session: requests.Session,
|
client: httpx.Client,
|
||||||
authenticator: IliasAuthenticator,
|
authenticator: IliasAuthenticator,
|
||||||
dir_filter: IliasDirectoryFilter
|
dir_filter: IliasDirectoryFilter
|
||||||
):
|
):
|
||||||
@@ -105,7 +105,7 @@ class IliasCrawler:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
self._base_url = base_url
|
self._base_url = base_url
|
||||||
self._session = session
|
self._client = client
|
||||||
self._authenticator = authenticator
|
self._authenticator = authenticator
|
||||||
self.dir_filter = dir_filter
|
self.dir_filter = dir_filter
|
||||||
|
|
||||||
@@ -157,9 +157,9 @@ class IliasCrawler:
|
|||||||
return self._iterate_entries_to_download_infos(entries)
|
return self._iterate_entries_to_download_infos(entries)
|
||||||
|
|
||||||
def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
|
def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
|
||||||
response: requests.Response = self._session.get(root_url)
|
response: httpx.Response = self._client.get(root_url)
|
||||||
# We were redirected ==> Non-existant ID
|
# We were redirected ==> Non-existant ID
|
||||||
if course_id not in response.url:
|
if course_id not in str(response.url):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
link_element: bs4.Tag = self._get_page(root_url, {}).find(id="current_perma_link")
|
link_element: bs4.Tag = self._get_page(root_url, {}).find(id="current_perma_link")
|
||||||
@@ -564,7 +564,7 @@ class IliasCrawler:
|
|||||||
# on the page, but defined in a JS object inside a script tag, passed to the player
|
# on the page, but defined in a JS object inside a script tag, passed to the player
|
||||||
# library.
|
# library.
|
||||||
# We do the impossible and RegEx the stream JSON object out of the page's HTML source
|
# We do the impossible and RegEx the stream JSON object out of the page's HTML source
|
||||||
video_page_soup = soupify(self._session.get(play_url))
|
video_page_soup = soupify(self._client.get(play_url))
|
||||||
regex: re.Pattern = re.compile(
|
regex: re.Pattern = re.compile(
|
||||||
r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
|
r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
|
||||||
)
|
)
|
||||||
@@ -639,7 +639,7 @@ class IliasCrawler:
|
|||||||
|
|
||||||
LOGGER.debug("Fetching %r", url)
|
LOGGER.debug("Fetching %r", url)
|
||||||
|
|
||||||
response = self._session.get(url, params=params)
|
response = self._client.get(url, params=params)
|
||||||
content_type = response.headers["content-type"]
|
content_type = response.headers["content-type"]
|
||||||
|
|
||||||
if not content_type.startswith("text/html"):
|
if not content_type.startswith("text/html"):
|
||||||
@@ -655,7 +655,7 @@ class IliasCrawler:
|
|||||||
|
|
||||||
LOGGER.info("Not authenticated, changing that...")
|
LOGGER.info("Not authenticated, changing that...")
|
||||||
|
|
||||||
self._authenticator.authenticate(self._session)
|
self._authenticator.authenticate(self._client)
|
||||||
|
|
||||||
return self._get_page(url, params, retry_count + 1)
|
return self._get_page(url, params, retry_count + 1)
|
||||||
|
|
||||||
|
@@ -8,7 +8,7 @@ from pathlib import Path, PurePath
|
|||||||
from typing import Callable, List, Optional, Union
|
from typing import Callable, List, Optional, Union
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
import requests
|
import httpx
|
||||||
|
|
||||||
from ..errors import retry_on_io_exception
|
from ..errors import retry_on_io_exception
|
||||||
from ..logging import PrettyLogger
|
from ..logging import PrettyLogger
|
||||||
@@ -82,21 +82,18 @@ class IliasDownloader:
|
|||||||
self,
|
self,
|
||||||
tmp_dir: TmpDir,
|
tmp_dir: TmpDir,
|
||||||
organizer: Organizer,
|
organizer: Organizer,
|
||||||
session: requests.Session,
|
client: httpx.Client,
|
||||||
authenticator: IliasAuthenticator,
|
authenticator: IliasAuthenticator,
|
||||||
strategy: IliasDownloadStrategy,
|
strategy: IliasDownloadStrategy,
|
||||||
timeout: int = 5
|
timeout: int = 5
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a new IliasDownloader.
|
Create a new IliasDownloader.
|
||||||
|
|
||||||
The timeout applies to the download request only, as bwcloud uses IPv6
|
|
||||||
and requests has a problem with that: https://github.com/psf/requests/issues/5522
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self._tmp_dir = tmp_dir
|
self._tmp_dir = tmp_dir
|
||||||
self._organizer = organizer
|
self._organizer = organizer
|
||||||
self._session = session
|
self._client = client
|
||||||
self._authenticator = authenticator
|
self._authenticator = authenticator
|
||||||
self._strategy = strategy
|
self._strategy = strategy
|
||||||
self._timeout = timeout
|
self._timeout = timeout
|
||||||
@@ -128,7 +125,7 @@ class IliasDownloader:
|
|||||||
def download_impl() -> bool:
|
def download_impl() -> bool:
|
||||||
if not self._try_download(info, tmp_file):
|
if not self._try_download(info, tmp_file):
|
||||||
LOGGER.info("Re-Authenticating due to download failure: %r", info)
|
LOGGER.info("Re-Authenticating due to download failure: %r", info)
|
||||||
self._authenticator.authenticate(self._session)
|
self._authenticator.authenticate(self._client)
|
||||||
raise IOError("Scheduled retry")
|
raise IOError("Scheduled retry")
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
@@ -153,7 +150,7 @@ class IliasDownloader:
|
|||||||
PRETTY.warning(f"Could not download {str(info.path)!r} as I got no URL :/")
|
PRETTY.warning(f"Could not download {str(info.path)!r} as I got no URL :/")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
with self._session.get(url, stream=True, timeout=self._timeout) as response:
|
with self._client.stream("GET", url, timeout=self._timeout) as response:
|
||||||
content_type = response.headers["content-type"]
|
content_type = response.headers["content-type"]
|
||||||
has_content_disposition = "content-disposition" in response.headers
|
has_content_disposition = "content-disposition" in response.headers
|
||||||
|
|
||||||
|
@@ -11,7 +11,7 @@ from typing import Callable, List, Optional
|
|||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
import requests
|
import httpx
|
||||||
|
|
||||||
from PFERD.errors import FatalException
|
from PFERD.errors import FatalException
|
||||||
from PFERD.utils import soupify
|
from PFERD.utils import soupify
|
||||||
@@ -78,7 +78,7 @@ class IpdCrawler:
|
|||||||
"""
|
"""
|
||||||
Crawls the playlist given in the constructor.
|
Crawls the playlist given in the constructor.
|
||||||
"""
|
"""
|
||||||
page = soupify(requests.get(self._base_url))
|
page = soupify(httpx.get(self._base_url))
|
||||||
|
|
||||||
items: List[IpdDownloadInfo] = []
|
items: List[IpdDownloadInfo] = []
|
||||||
|
|
||||||
@@ -116,7 +116,7 @@ class IpdDownloader:
|
|||||||
self._tmp_dir = tmp_dir
|
self._tmp_dir = tmp_dir
|
||||||
self._organizer = organizer
|
self._organizer = organizer
|
||||||
self._strategy = strategy
|
self._strategy = strategy
|
||||||
self._session = requests.session()
|
self._client = httpx.Client()
|
||||||
|
|
||||||
def download_all(self, infos: List[IpdDownloadInfo]) -> None:
|
def download_all(self, infos: List[IpdDownloadInfo]) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -133,7 +133,7 @@ class IpdDownloader:
|
|||||||
self._organizer.mark(info.path)
|
self._organizer.mark(info.path)
|
||||||
return
|
return
|
||||||
|
|
||||||
with self._session.get(info.url, stream=True) as response:
|
with self._client.stream("GET", info.url) as response:
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
tmp_file = self._tmp_dir.new_path()
|
tmp_file = self._tmp_dir.new_path()
|
||||||
stream_to_path(response, tmp_file, info.path.name)
|
stream_to_path(response, tmp_file, info.path.name)
|
||||||
|
@@ -88,12 +88,12 @@ class Pferd(Location):
|
|||||||
) -> Organizer:
|
) -> Organizer:
|
||||||
# pylint: disable=too-many-locals
|
# pylint: disable=too-many-locals
|
||||||
cookie_jar = CookieJar(to_path(cookies) if cookies else None)
|
cookie_jar = CookieJar(to_path(cookies) if cookies else None)
|
||||||
session = cookie_jar.create_session()
|
client = cookie_jar.create_client()
|
||||||
tmp_dir = self._tmp_dir.new_subdir()
|
tmp_dir = self._tmp_dir.new_subdir()
|
||||||
organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
|
organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
|
||||||
|
|
||||||
crawler = IliasCrawler(base_url, session, authenticator, dir_filter)
|
crawler = IliasCrawler(base_url, client, authenticator, dir_filter)
|
||||||
downloader = IliasDownloader(tmp_dir, organizer, session,
|
downloader = IliasDownloader(tmp_dir, organizer, client,
|
||||||
authenticator, download_strategy, timeout)
|
authenticator, download_strategy, timeout)
|
||||||
|
|
||||||
cookie_jar.load_cookies()
|
cookie_jar.load_cookies()
|
||||||
@@ -149,11 +149,11 @@ class Pferd(Location):
|
|||||||
password {Optional[str]} -- The SCC password. If none is given, it will prompt
|
password {Optional[str]} -- The SCC password. If none is given, it will prompt
|
||||||
the user. (default: {None})
|
the user. (default: {None})
|
||||||
download_strategy {DownloadStrategy} -- A function to determine which files need to
|
download_strategy {DownloadStrategy} -- A function to determine which files need to
|
||||||
be downloaded. Can save bandwidth and reduce the number of requests.
|
be downloaded. Can save bandwidth and reduce the number of httpx.
|
||||||
(default: {download_modified_or_new})
|
(default: {download_modified_or_new})
|
||||||
clean {bool} -- Whether to clean up when the method finishes.
|
clean {bool} -- Whether to clean up when the method finishes.
|
||||||
timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
|
timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
|
||||||
requests bug.
|
httpx bug.
|
||||||
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
|
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
|
||||||
with overwriting or deleting files. The default always asks the user.
|
with overwriting or deleting files. The default always asks the user.
|
||||||
"""
|
"""
|
||||||
@@ -222,8 +222,7 @@ class Pferd(Location):
|
|||||||
be downloaded. Can save bandwidth and reduce the number of requests.
|
be downloaded. Can save bandwidth and reduce the number of requests.
|
||||||
(default: {download_modified_or_new})
|
(default: {download_modified_or_new})
|
||||||
clean {bool} -- Whether to clean up when the method finishes.
|
clean {bool} -- Whether to clean up when the method finishes.
|
||||||
timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
|
timeout {int} -- The download timeout for opencast videos.
|
||||||
requests bug.
|
|
||||||
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
|
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
|
||||||
with overwriting or deleting files. The default always asks the user.
|
with overwriting or deleting files. The default always asks the user.
|
||||||
"""
|
"""
|
||||||
@@ -284,11 +283,11 @@ class Pferd(Location):
|
|||||||
password {Optional[str]} -- The SCC password. If none is given, it will prompt
|
password {Optional[str]} -- The SCC password. If none is given, it will prompt
|
||||||
the user. (default: {None})
|
the user. (default: {None})
|
||||||
download_strategy {DownloadStrategy} -- A function to determine which files need to
|
download_strategy {DownloadStrategy} -- A function to determine which files need to
|
||||||
be downloaded. Can save bandwidth and reduce the number of requests.
|
be downloaded. Can save bandwidth and reduce the number of httpx.
|
||||||
(default: {download_modified_or_new})
|
(default: {download_modified_or_new})
|
||||||
clean {bool} -- Whether to clean up when the method finishes.
|
clean {bool} -- Whether to clean up when the method finishes.
|
||||||
timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
|
timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
|
||||||
requests bug.
|
httpx bug.
|
||||||
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
|
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
|
||||||
with overwriting or deleting files. The default always asks the user.
|
with overwriting or deleting files. The default always asks the user.
|
||||||
"""
|
"""
|
||||||
@@ -338,7 +337,7 @@ class Pferd(Location):
|
|||||||
transform {Transform} -- A transformation function for the output paths. Return None
|
transform {Transform} -- A transformation function for the output paths. Return None
|
||||||
to ignore a file. (default: {lambdax:x})
|
to ignore a file. (default: {lambdax:x})
|
||||||
download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
|
download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
|
||||||
be downloaded. Can save bandwidth and reduce the number of requests.
|
be downloaded. Can save bandwidth and reduce the number of httpx.
|
||||||
(default: {diva_download_new})
|
(default: {diva_download_new})
|
||||||
clean {bool} -- Whether to clean up when the method finishes.
|
clean {bool} -- Whether to clean up when the method finishes.
|
||||||
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
|
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
|
||||||
@@ -396,7 +395,7 @@ class Pferd(Location):
|
|||||||
transform {Transform} -- A transformation function for the output paths. Return None
|
transform {Transform} -- A transformation function for the output paths. Return None
|
||||||
to ignore a file. (default: {lambdax:x})
|
to ignore a file. (default: {lambdax:x})
|
||||||
download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
|
download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
|
||||||
be downloaded. Can save bandwidth and reduce the number of requests.
|
be downloaded. Can save bandwidth and reduce the number of httpx.
|
||||||
(default: {diva_download_new})
|
(default: {diva_download_new})
|
||||||
clean {bool} -- Whether to clean up when the method finishes.
|
clean {bool} -- Whether to clean up when the method finishes.
|
||||||
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
|
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
|
||||||
|
@@ -6,7 +6,7 @@ from dataclasses import dataclass
|
|||||||
from types import TracebackType
|
from types import TracebackType
|
||||||
from typing import Optional, Type
|
from typing import Optional, Type
|
||||||
|
|
||||||
import requests
|
import httpx
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
from rich.progress import (BarColumn, DownloadColumn, Progress, TaskID,
|
from rich.progress import (BarColumn, DownloadColumn, Progress, TaskID,
|
||||||
TextColumn, TimeRemainingColumn,
|
TextColumn, TimeRemainingColumn,
|
||||||
@@ -27,12 +27,12 @@ _progress: Progress = Progress(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def size_from_headers(response: requests.Response) -> Optional[int]:
|
def size_from_headers(response: httpx.Response) -> Optional[int]:
|
||||||
"""
|
"""
|
||||||
Return the size of the download based on the response headers.
|
Return the size of the download based on the response headers.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
response {requests.Response} -- the response
|
response {httpx.Response} -- the response
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Optional[int] -- the size
|
Optional[int] -- the size
|
||||||
|
@@ -7,7 +7,7 @@ from pathlib import Path, PurePath
|
|||||||
from typing import Optional, Tuple, Union
|
from typing import Optional, Tuple, Union
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
import requests
|
import httpx
|
||||||
|
|
||||||
from .progress import ProgressSettings, progress_for, size_from_headers
|
from .progress import ProgressSettings, progress_for, size_from_headers
|
||||||
|
|
||||||
@@ -35,41 +35,38 @@ def to_pattern(regex: Regex) -> re.Pattern:
|
|||||||
return re.compile(regex)
|
return re.compile(regex)
|
||||||
|
|
||||||
|
|
||||||
def soupify(response: requests.Response) -> bs4.BeautifulSoup:
|
def soupify(response: httpx.Response) -> bs4.BeautifulSoup:
|
||||||
"""
|
"""
|
||||||
Wrap a requests response in a bs4 object.
|
Wrap a httpx response in a bs4 object.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return bs4.BeautifulSoup(response.text, "html.parser")
|
return bs4.BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
|
|
||||||
def stream_to_path(
|
def stream_to_path(
|
||||||
response: requests.Response,
|
response: httpx.Response,
|
||||||
target: Path,
|
target: Path,
|
||||||
progress_name: Optional[str] = None,
|
progress_name: Optional[str] = None,
|
||||||
chunk_size: int = 1024 ** 2
|
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Download a requests response content to a file by streaming it. This
|
Download a httpx response content to a file by streaming it. This
|
||||||
function avoids excessive memory usage when downloading large files. The
|
function avoids excessive memory usage when downloading large files.
|
||||||
chunk_size is in bytes.
|
|
||||||
|
|
||||||
If progress_name is None, no progress bar will be shown. Otherwise a progress
|
If progress_name is None, no progress bar will be shown. Otherwise a progress
|
||||||
bar will appear, if the download is bigger than an internal threshold.
|
bar will appear, if the download is bigger than an internal threshold.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
with response:
|
length = size_from_headers(response)
|
||||||
length = size_from_headers(response)
|
if progress_name and length and int(length) > 1024 * 1024 * 10: # 10 MiB
|
||||||
if progress_name and length and int(length) > 1024 * 1024 * 10: # 10 MiB
|
settings: Optional[ProgressSettings] = ProgressSettings(progress_name, length)
|
||||||
settings: Optional[ProgressSettings] = ProgressSettings(progress_name, length)
|
else:
|
||||||
else:
|
settings = None
|
||||||
settings = None
|
|
||||||
|
|
||||||
with open(target, 'wb') as file_descriptor:
|
with open(target, 'wb') as file_descriptor:
|
||||||
with progress_for(settings) as progress:
|
with progress_for(settings) as progress:
|
||||||
for chunk in response.iter_content(chunk_size=chunk_size):
|
for chunk in response.iter_bytes():
|
||||||
file_descriptor.write(chunk)
|
file_descriptor.write(chunk)
|
||||||
progress.advance(len(chunk))
|
progress.advance(len(chunk))
|
||||||
|
|
||||||
|
|
||||||
def prompt_yes_no(question: str, default: Optional[bool] = None) -> bool:
|
def prompt_yes_no(question: str, default: Optional[bool] = None) -> bool:
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
requests>=2.21.0
|
httpx>=0.17.1
|
||||||
beautifulsoup4>=4.7.1
|
beautifulsoup4>=4.7.1
|
||||||
rich>=2.1.0
|
rich>=2.1.0
|
||||||
keyring>=21.5.0
|
keyring>=21.5.0
|
2
setup.py
2
setup.py
@@ -5,7 +5,7 @@ setup(
|
|||||||
version="2.6.1",
|
version="2.6.1",
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"requests>=2.21.0",
|
"httpx>=0.17.1",
|
||||||
"beautifulsoup4>=4.7.1",
|
"beautifulsoup4>=4.7.1",
|
||||||
"rich>=2.1.0",
|
"rich>=2.1.0",
|
||||||
"keyring>=21.5.0"
|
"keyring>=21.5.0"
|
||||||
|
@@ -86,7 +86,7 @@ def main() -> None:
|
|||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
|
cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
|
||||||
session = cookie_jar.create_session()
|
client = cookie_jar.create_client()
|
||||||
|
|
||||||
if args.keyring:
|
if args.keyring:
|
||||||
if not args.username:
|
if not args.username:
|
||||||
@@ -103,7 +103,7 @@ def main() -> None:
|
|||||||
|
|
||||||
url = urlparse(args.url)
|
url = urlparse(args.url)
|
||||||
|
|
||||||
crawler = IliasCrawler(url.scheme + '://' + url.netloc, session,
|
crawler = IliasCrawler(url.scheme + '://' + url.netloc, client,
|
||||||
authenticator, lambda x, y: True)
|
authenticator, lambda x, y: True)
|
||||||
|
|
||||||
cookie_jar.load_cookies()
|
cookie_jar.load_cookies()
|
||||||
|
Reference in New Issue
Block a user