transition from requests to httpx

2025-09-09 14:12:26 +02:00 · 2021-04-23 18:02:57 +02:00
parent c1ab7485e2
commit 44aeb6c2eb
14 changed files with 80 additions and 89 deletions
--- a/PFERD/ilias/authenticators.py
+++ b/PFERD/ilias/authenticators.py
@@ -7,7 +7,7 @@ import logging
 from typing import Optional

 import bs4
-import requests
+import httpx

 from ..authenticators import TfaAuthenticator, UserPassAuthenticator
 from ..utils import soupify
@@ -19,14 +19,14 @@ class IliasAuthenticator(abc.ABC):
    # pylint: disable=too-few-public-methods

    """
-    An authenticator that logs an existing requests session into an ILIAS
+    An authenticator that logs an existing httpx client into an ILIAS
    account.
    """

    @abc.abstractmethod
-    def authenticate(self, sess: requests.Session) -> None:
+    def authenticate(self, client: httpx.Client) -> None:
        """
-        Log a requests session into this authenticator's ILIAS account.
+        Log a httpx client into this authenticator's ILIAS account.
        """


@@ -45,7 +45,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):

        self._tfa_auth = TfaAuthenticator("KIT ILIAS Shibboleth")

-    def authenticate(self, sess: requests.Session) -> None:
+    def authenticate(self, sess: httpx.Client) -> None:
        """
        Performs the ILIAS Shibboleth authentication dance and saves the login
        cookies it receieves.
@@ -109,7 +109,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):

    def _authenticate_tfa(
            self,
-            session: requests.Session,
+            client: httpx.Client,
            soup: bs4.BeautifulSoup
    ) -> bs4.BeautifulSoup:
        # Searching the form here so that this fails before asking for
@@ -125,7 +125,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
            "_eventId_proceed": "",
            "j_tokenNumber": self._tfa_auth.get_token()
        }
-        return soupify(session.post(url, data=data))
+        return soupify(client.post(url, data=data))

    @staticmethod
    def _login_successful(soup: bs4.BeautifulSoup) -> bool:
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -13,7 +13,7 @@ from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
                          urlunsplit)

 import bs4
-import requests
+import httpx

 from ..errors import FatalException, retry_on_io_exception
 from ..logging import PrettyLogger
@@ -96,7 +96,7 @@ class IliasCrawler:
    def __init__(
            self,
            base_url: str,
-            session: requests.Session,
+            client: httpx.Client,
            authenticator: IliasAuthenticator,
            dir_filter: IliasDirectoryFilter
    ):
@@ -105,7 +105,7 @@ class IliasCrawler:
        """

        self._base_url = base_url
-        self._session = session
+        self._client = client
        self._authenticator = authenticator
        self.dir_filter = dir_filter

@@ -157,9 +157,9 @@ class IliasCrawler:
        return self._iterate_entries_to_download_infos(entries)

    def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
-        response: requests.Response = self._session.get(root_url)
+        response: httpx.Response = self._client.get(root_url)
        # We were redirected ==> Non-existant ID
-        if course_id not in response.url:
+        if course_id not in str(response.url):
            return False

        link_element: bs4.Tag = self._get_page(root_url, {}).find(id="current_perma_link")
@@ -564,7 +564,7 @@ class IliasCrawler:
            # on the page, but defined in a JS object inside a script tag, passed to the player
            # library.
            # We do the impossible and RegEx the stream JSON object out of the page's HTML source
-            video_page_soup = soupify(self._session.get(play_url))
+            video_page_soup = soupify(self._client.get(play_url))
            regex: re.Pattern = re.compile(
                r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
            )
@@ -639,7 +639,7 @@ class IliasCrawler:

        LOGGER.debug("Fetching %r", url)

-        response = self._session.get(url, params=params)
+        response = self._client.get(url, params=params)
        content_type = response.headers["content-type"]

        if not content_type.startswith("text/html"):
@@ -655,7 +655,7 @@ class IliasCrawler:

        LOGGER.info("Not authenticated, changing that...")

-        self._authenticator.authenticate(self._session)
+        self._authenticator.authenticate(self._client)

        return self._get_page(url, params, retry_count + 1)

--- a/PFERD/ilias/downloader.py
+++ b/PFERD/ilias/downloader.py
@@ -8,7 +8,7 @@ from pathlib import Path, PurePath
 from typing import Callable, List, Optional, Union

 import bs4
-import requests
+import httpx

 from ..errors import retry_on_io_exception
 from ..logging import PrettyLogger
@@ -82,21 +82,18 @@ class IliasDownloader:
            self,
            tmp_dir: TmpDir,
            organizer: Organizer,
-            session: requests.Session,
+            client: httpx.Client,
            authenticator: IliasAuthenticator,
            strategy: IliasDownloadStrategy,
            timeout: int = 5
    ):
        """
        Create a new IliasDownloader.
-
-        The timeout applies to the download request only, as bwcloud uses IPv6
-        and requests has a problem with that: https://github.com/psf/requests/issues/5522
        """

        self._tmp_dir = tmp_dir
        self._organizer = organizer
-        self._session = session
+        self._client = client
        self._authenticator = authenticator
        self._strategy = strategy
        self._timeout = timeout
@@ -128,7 +125,7 @@ class IliasDownloader:
        def download_impl() -> bool:
            if not self._try_download(info, tmp_file):
                LOGGER.info("Re-Authenticating due to download failure: %r", info)
-                self._authenticator.authenticate(self._session)
+                self._authenticator.authenticate(self._client)
                raise IOError("Scheduled retry")
            else:
                return True
@@ -153,7 +150,7 @@ class IliasDownloader:
            PRETTY.warning(f"Could not download {str(info.path)!r} as I got no URL :/")
            return True

-        with self._session.get(url, stream=True, timeout=self._timeout) as response:
+        with self._client.stream("GET", url, timeout=self._timeout) as response:
            content_type = response.headers["content-type"]
            has_content_disposition = "content-disposition" in response.headers