mirror of
https://github.com/Garmelon/PFERD.git
synced 2025-09-09 14:12:26 +02:00
transition from requests to httpx
This commit is contained in:
@@ -7,7 +7,7 @@ import logging
|
||||
from typing import Optional
|
||||
|
||||
import bs4
|
||||
import requests
|
||||
import httpx
|
||||
|
||||
from ..authenticators import TfaAuthenticator, UserPassAuthenticator
|
||||
from ..utils import soupify
|
||||
@@ -19,14 +19,14 @@ class IliasAuthenticator(abc.ABC):
|
||||
# pylint: disable=too-few-public-methods
|
||||
|
||||
"""
|
||||
An authenticator that logs an existing requests session into an ILIAS
|
||||
An authenticator that logs an existing httpx client into an ILIAS
|
||||
account.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def authenticate(self, sess: requests.Session) -> None:
|
||||
def authenticate(self, client: httpx.Client) -> None:
|
||||
"""
|
||||
Log a requests session into this authenticator's ILIAS account.
|
||||
Log a httpx client into this authenticator's ILIAS account.
|
||||
"""
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
|
||||
|
||||
self._tfa_auth = TfaAuthenticator("KIT ILIAS Shibboleth")
|
||||
|
||||
def authenticate(self, sess: requests.Session) -> None:
|
||||
def authenticate(self, sess: httpx.Client) -> None:
|
||||
"""
|
||||
Performs the ILIAS Shibboleth authentication dance and saves the login
|
||||
cookies it receieves.
|
||||
@@ -109,7 +109,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
|
||||
|
||||
def _authenticate_tfa(
|
||||
self,
|
||||
session: requests.Session,
|
||||
client: httpx.Client,
|
||||
soup: bs4.BeautifulSoup
|
||||
) -> bs4.BeautifulSoup:
|
||||
# Searching the form here so that this fails before asking for
|
||||
@@ -125,7 +125,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
|
||||
"_eventId_proceed": "",
|
||||
"j_tokenNumber": self._tfa_auth.get_token()
|
||||
}
|
||||
return soupify(session.post(url, data=data))
|
||||
return soupify(client.post(url, data=data))
|
||||
|
||||
@staticmethod
|
||||
def _login_successful(soup: bs4.BeautifulSoup) -> bool:
|
||||
|
@@ -13,7 +13,7 @@ from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
|
||||
urlunsplit)
|
||||
|
||||
import bs4
|
||||
import requests
|
||||
import httpx
|
||||
|
||||
from ..errors import FatalException, retry_on_io_exception
|
||||
from ..logging import PrettyLogger
|
||||
@@ -96,7 +96,7 @@ class IliasCrawler:
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str,
|
||||
session: requests.Session,
|
||||
client: httpx.Client,
|
||||
authenticator: IliasAuthenticator,
|
||||
dir_filter: IliasDirectoryFilter
|
||||
):
|
||||
@@ -105,7 +105,7 @@ class IliasCrawler:
|
||||
"""
|
||||
|
||||
self._base_url = base_url
|
||||
self._session = session
|
||||
self._client = client
|
||||
self._authenticator = authenticator
|
||||
self.dir_filter = dir_filter
|
||||
|
||||
@@ -157,9 +157,9 @@ class IliasCrawler:
|
||||
return self._iterate_entries_to_download_infos(entries)
|
||||
|
||||
def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
|
||||
response: requests.Response = self._session.get(root_url)
|
||||
response: httpx.Response = self._client.get(root_url)
|
||||
# We were redirected ==> Non-existant ID
|
||||
if course_id not in response.url:
|
||||
if course_id not in str(response.url):
|
||||
return False
|
||||
|
||||
link_element: bs4.Tag = self._get_page(root_url, {}).find(id="current_perma_link")
|
||||
@@ -564,7 +564,7 @@ class IliasCrawler:
|
||||
# on the page, but defined in a JS object inside a script tag, passed to the player
|
||||
# library.
|
||||
# We do the impossible and RegEx the stream JSON object out of the page's HTML source
|
||||
video_page_soup = soupify(self._session.get(play_url))
|
||||
video_page_soup = soupify(self._client.get(play_url))
|
||||
regex: re.Pattern = re.compile(
|
||||
r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
|
||||
)
|
||||
@@ -639,7 +639,7 @@ class IliasCrawler:
|
||||
|
||||
LOGGER.debug("Fetching %r", url)
|
||||
|
||||
response = self._session.get(url, params=params)
|
||||
response = self._client.get(url, params=params)
|
||||
content_type = response.headers["content-type"]
|
||||
|
||||
if not content_type.startswith("text/html"):
|
||||
@@ -655,7 +655,7 @@ class IliasCrawler:
|
||||
|
||||
LOGGER.info("Not authenticated, changing that...")
|
||||
|
||||
self._authenticator.authenticate(self._session)
|
||||
self._authenticator.authenticate(self._client)
|
||||
|
||||
return self._get_page(url, params, retry_count + 1)
|
||||
|
||||
|
@@ -8,7 +8,7 @@ from pathlib import Path, PurePath
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import bs4
|
||||
import requests
|
||||
import httpx
|
||||
|
||||
from ..errors import retry_on_io_exception
|
||||
from ..logging import PrettyLogger
|
||||
@@ -82,21 +82,18 @@ class IliasDownloader:
|
||||
self,
|
||||
tmp_dir: TmpDir,
|
||||
organizer: Organizer,
|
||||
session: requests.Session,
|
||||
client: httpx.Client,
|
||||
authenticator: IliasAuthenticator,
|
||||
strategy: IliasDownloadStrategy,
|
||||
timeout: int = 5
|
||||
):
|
||||
"""
|
||||
Create a new IliasDownloader.
|
||||
|
||||
The timeout applies to the download request only, as bwcloud uses IPv6
|
||||
and requests has a problem with that: https://github.com/psf/requests/issues/5522
|
||||
"""
|
||||
|
||||
self._tmp_dir = tmp_dir
|
||||
self._organizer = organizer
|
||||
self._session = session
|
||||
self._client = client
|
||||
self._authenticator = authenticator
|
||||
self._strategy = strategy
|
||||
self._timeout = timeout
|
||||
@@ -128,7 +125,7 @@ class IliasDownloader:
|
||||
def download_impl() -> bool:
|
||||
if not self._try_download(info, tmp_file):
|
||||
LOGGER.info("Re-Authenticating due to download failure: %r", info)
|
||||
self._authenticator.authenticate(self._session)
|
||||
self._authenticator.authenticate(self._client)
|
||||
raise IOError("Scheduled retry")
|
||||
else:
|
||||
return True
|
||||
@@ -153,7 +150,7 @@ class IliasDownloader:
|
||||
PRETTY.warning(f"Could not download {str(info.path)!r} as I got no URL :/")
|
||||
return True
|
||||
|
||||
with self._session.get(url, stream=True, timeout=self._timeout) as response:
|
||||
with self._client.stream("GET", url, timeout=self._timeout) as response:
|
||||
content_type = response.headers["content-type"]
|
||||
has_content_disposition = "content-disposition" in response.headers
|
||||
|
||||
|
Reference in New Issue
Block a user