From 44aeb6c2eb47f3601f83a42d2a7b6fe46e7aa664 Mon Sep 17 00:00:00 2001
From: be7a <bela.stoyan@gmail.com>
Date: Fri, 23 Apr 2021 18:02:57 +0200
Subject: [PATCH] transition from requests to httpx

---
 .github/workflows/package.yml |  2 +-
 PFERD/cookie_jar.py           | 20 +++++++++-----------
 PFERD/diva.py                 | 10 +++++-----
 PFERD/downloaders.py          | 16 ++++++++--------
 PFERD/ilias/authenticators.py | 14 +++++++-------
 PFERD/ilias/crawler.py        | 16 ++++++++--------
 PFERD/ilias/downloader.py     | 13 +++++--------
 PFERD/ipd.py                  |  8 ++++----
 PFERD/pferd.py                | 21 ++++++++++-----------
 PFERD/progress.py             |  6 +++---
 PFERD/utils.py                | 35 ++++++++++++++++-------------------
 requirements.txt              |  2 +-
 setup.py                      |  2 +-
 sync_url.py                   |  4 ++--
 14 files changed, 80 insertions(+), 89 deletions(-)

diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml
index 615917b..6564894 100644
--- a/.github/workflows/package.yml
+++ b/.github/workflows/package.yml
@@ -23,7 +23,7 @@ jobs:
         python-version: '3.x'
 
     - name: "Install dependencies"
-      run: "pip install setuptools keyring pyinstaller rich requests beautifulsoup4 -f --upgrade"
+      run: "pip install setuptools keyring pyinstaller rich httpx beautifulsoup4 -f --upgrade"
 
     - name: "Install sync_url.py"
       run: "pyinstaller sync_url.py -F"
diff --git a/PFERD/cookie_jar.py b/PFERD/cookie_jar.py
index e5b568f..754979c 100644
--- a/PFERD/cookie_jar.py
+++ b/PFERD/cookie_jar.py
@@ -1,11 +1,11 @@
-"""A helper for requests cookies."""
+"""A helper for httpx cookies."""
 
 import logging
 from http.cookiejar import LoadError, LWPCookieJar
 from pathlib import Path
 from typing import Optional
 
-import requests
+import httpx
 
 LOGGER = logging.getLogger(__name__)
 
@@ -26,7 +26,7 @@ class CookieJar:
 
     @property
     def cookies(self) -> LWPCookieJar:
-        """Return the requests cookie jar."""
+        """Return the httpx cookie jar."""
         return self._cookies
 
     def load_cookies(self) -> None:
@@ -57,13 +57,11 @@ class CookieJar:
         # TODO possibly catch a few more exceptions
         self._cookies.save(ignore_discard=True)
 
-    def create_session(self) -> requests.Session:
-        """Create a new session using the cookie jar."""
-        sess = requests.Session()
+    def create_client(self) -> httpx.Client:
+        """Create a new client using the cookie jar."""
+        # TODO: timeout=None was the default behaviour of requests. An approprite value should probably be set
+        client = httpx.Client(timeout=None)
 
-        # From the request docs: "All requests code should work out of the box
-        # with externally provided instances of CookieJar, e.g. LWPCookieJar
-        # and FileCookieJar."
-        sess.cookies = self.cookies  # type: ignore
+        client.cookies = self.cookies  # type: ignore
 
-        return sess
+        return client
diff --git a/PFERD/diva.py b/PFERD/diva.py
index 148fa56..a6bdba0 100644
--- a/PFERD/diva.py
+++ b/PFERD/diva.py
@@ -7,7 +7,7 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Callable, List, Optional
 
-import requests
+import httpx
 
 from .errors import FatalException
 from .logging import PrettyLogger
@@ -69,7 +69,7 @@ class DivaPlaylistCrawler:
             )
         base_name = match.group(1)
 
-        response = requests.get(cls._PLAYLIST_BASE_URL + base_name + ".json")
+        response = httpx.get(cls._PLAYLIST_BASE_URL + base_name + ".json")
 
         if response.status_code != 200:
             raise FatalException(
@@ -88,7 +88,7 @@ class DivaPlaylistCrawler:
         """
         Crawls the playlist given in the constructor.
         """
-        response = requests.get(self._COLLECTION_BASE_URL, params={"collection": self._id})
+        response = httpx.get(self._COLLECTION_BASE_URL, params={"collection": self._id})
         if response.status_code != 200:
             raise FatalException(f"Server returned status {response.status_code}.")
 
@@ -143,7 +143,7 @@ class DivaDownloader:
         self._tmp_dir = tmp_dir
         self._organizer = organizer
         self._strategy = strategy
-        self._session = requests.session()
+        self._client = httpx.Client()
 
     def download_all(self, infos: List[DivaDownloadInfo]) -> None:
         """
@@ -160,7 +160,7 @@ class DivaDownloader:
             self._organizer.mark(info.path)
             return
 
-        with self._session.get(info.url, stream=True) as response:
+        with self._client.stream("GET", info.url) as response:
             if response.status_code == 200:
                 tmp_file = self._tmp_dir.new_path()
                 stream_to_path(response, tmp_file, info.path.name)
diff --git a/PFERD/downloaders.py b/PFERD/downloaders.py
index 94b8b9f..f004450 100644
--- a/PFERD/downloaders.py
+++ b/PFERD/downloaders.py
@@ -5,8 +5,8 @@ General downloaders useful in many situations
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional
 
-import requests
-import requests.auth
+import httpx
+import httpx.auth
 
 from .organizer import Organizer
 from .tmp_dir import TmpDir
@@ -39,15 +39,15 @@ class HttpDownloader:
         self._tmp_dir = tmp_dir
         self._username = username
         self._password = password
-        self._session = self._build_session()
+        self._client = self._build_client()
 
-    def _build_session(self) -> requests.Session:
-        session = requests.Session()
+    def _build_client(self) -> httpx.Client:
+        client = httpx.Client()
         if self._username and self._password:
-            session.auth = requests.auth.HTTPBasicAuth(
+            client.auth = httpx.auth.HTTPBasicAuth(
                 self._username, self._password
             )
-        return session
+        return client
 
     def download_all(self, infos: List[HttpDownloadInfo]) -> None:
         """
@@ -62,7 +62,7 @@ class HttpDownloader:
         Download a single file.
         """
 
-        with self._session.get(info.url, params=info.parameters, stream=True) as response:
+        with self._client.stream("GET", info.url, params=info.parameters) as response:
             if response.status_code == 200:
                 tmp_file = self._tmp_dir.new_path()
                 stream_to_path(response, tmp_file, info.path.name)
diff --git a/PFERD/ilias/authenticators.py b/PFERD/ilias/authenticators.py
index 4b99dd8..c1f4087 100644
--- a/PFERD/ilias/authenticators.py
+++ b/PFERD/ilias/authenticators.py
@@ -7,7 +7,7 @@ import logging
 from typing import Optional
 
 import bs4
-import requests
+import httpx
 
 from ..authenticators import TfaAuthenticator, UserPassAuthenticator
 from ..utils import soupify
@@ -19,14 +19,14 @@ class IliasAuthenticator(abc.ABC):
     # pylint: disable=too-few-public-methods
 
     """
-    An authenticator that logs an existing requests session into an ILIAS
+    An authenticator that logs an existing httpx client into an ILIAS
     account.
     """
 
     @abc.abstractmethod
-    def authenticate(self, sess: requests.Session) -> None:
+    def authenticate(self, client: httpx.Client) -> None:
         """
-        Log a requests session into this authenticator's ILIAS account.
+        Log a httpx client into this authenticator's ILIAS account.
         """
 
 
@@ -45,7 +45,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
 
         self._tfa_auth = TfaAuthenticator("KIT ILIAS Shibboleth")
 
-    def authenticate(self, sess: requests.Session) -> None:
+    def authenticate(self, sess: httpx.Client) -> None:
         """
         Performs the ILIAS Shibboleth authentication dance and saves the login
         cookies it receieves.
@@ -109,7 +109,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
 
     def _authenticate_tfa(
             self,
-            session: requests.Session,
+            client: httpx.Client,
             soup: bs4.BeautifulSoup
     ) -> bs4.BeautifulSoup:
         # Searching the form here so that this fails before asking for
@@ -125,7 +125,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
             "_eventId_proceed": "",
             "j_tokenNumber": self._tfa_auth.get_token()
         }
-        return soupify(session.post(url, data=data))
+        return soupify(client.post(url, data=data))
 
     @staticmethod
     def _login_successful(soup: bs4.BeautifulSoup) -> bool:
diff --git a/PFERD/ilias/crawler.py b/PFERD/ilias/crawler.py
index edab284..9726a4f 100644
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -13,7 +13,7 @@ from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
                           urlunsplit)
 
 import bs4
-import requests
+import httpx
 
 from ..errors import FatalException, retry_on_io_exception
 from ..logging import PrettyLogger
@@ -96,7 +96,7 @@ class IliasCrawler:
     def __init__(
             self,
             base_url: str,
-            session: requests.Session,
+            client: httpx.Client,
             authenticator: IliasAuthenticator,
             dir_filter: IliasDirectoryFilter
     ):
@@ -105,7 +105,7 @@ class IliasCrawler:
         """
 
         self._base_url = base_url
-        self._session = session
+        self._client = client
         self._authenticator = authenticator
         self.dir_filter = dir_filter
 
@@ -157,9 +157,9 @@ class IliasCrawler:
         return self._iterate_entries_to_download_infos(entries)
 
     def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
-        response: requests.Response = self._session.get(root_url)
+        response: httpx.Response = self._client.get(root_url)
         # We were redirected ==> Non-existant ID
-        if course_id not in response.url:
+        if course_id not in str(response.url):
             return False
 
         link_element: bs4.Tag = self._get_page(root_url, {}).find(id="current_perma_link")
@@ -564,7 +564,7 @@ class IliasCrawler:
             # on the page, but defined in a JS object inside a script tag, passed to the player
             # library.
             # We do the impossible and RegEx the stream JSON object out of the page's HTML source
-            video_page_soup = soupify(self._session.get(play_url))
+            video_page_soup = soupify(self._client.get(play_url))
             regex: re.Pattern = re.compile(
                 r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
             )
@@ -639,7 +639,7 @@ class IliasCrawler:
 
         LOGGER.debug("Fetching %r", url)
 
-        response = self._session.get(url, params=params)
+        response = self._client.get(url, params=params)
         content_type = response.headers["content-type"]
 
         if not content_type.startswith("text/html"):
@@ -655,7 +655,7 @@ class IliasCrawler:
 
         LOGGER.info("Not authenticated, changing that...")
 
-        self._authenticator.authenticate(self._session)
+        self._authenticator.authenticate(self._client)
 
         return self._get_page(url, params, retry_count + 1)
 
diff --git a/PFERD/ilias/downloader.py b/PFERD/ilias/downloader.py
index f6132bf..005dfec 100644
--- a/PFERD/ilias/downloader.py
+++ b/PFERD/ilias/downloader.py
@@ -8,7 +8,7 @@ from pathlib import Path, PurePath
 from typing import Callable, List, Optional, Union
 
 import bs4
-import requests
+import httpx
 
 from ..errors import retry_on_io_exception
 from ..logging import PrettyLogger
@@ -82,21 +82,18 @@ class IliasDownloader:
             self,
             tmp_dir: TmpDir,
             organizer: Organizer,
-            session: requests.Session,
+            client: httpx.Client,
             authenticator: IliasAuthenticator,
             strategy: IliasDownloadStrategy,
             timeout: int = 5
     ):
         """
         Create a new IliasDownloader.
-
-        The timeout applies to the download request only, as bwcloud uses IPv6
-        and requests has a problem with that: https://github.com/psf/requests/issues/5522
         """
 
         self._tmp_dir = tmp_dir
         self._organizer = organizer
-        self._session = session
+        self._client = client
         self._authenticator = authenticator
         self._strategy = strategy
         self._timeout = timeout
@@ -128,7 +125,7 @@ class IliasDownloader:
         def download_impl() -> bool:
             if not self._try_download(info, tmp_file):
                 LOGGER.info("Re-Authenticating due to download failure: %r", info)
-                self._authenticator.authenticate(self._session)
+                self._authenticator.authenticate(self._client)
                 raise IOError("Scheduled retry")
             else:
                 return True
@@ -153,7 +150,7 @@ class IliasDownloader:
             PRETTY.warning(f"Could not download {str(info.path)!r} as I got no URL :/")
             return True
 
-        with self._session.get(url, stream=True, timeout=self._timeout) as response:
+        with self._client.stream("GET", url, timeout=self._timeout) as response:
             content_type = response.headers["content-type"]
             has_content_disposition = "content-disposition" in response.headers
 
diff --git a/PFERD/ipd.py b/PFERD/ipd.py
index ece6a97..336b21c 100644
--- a/PFERD/ipd.py
+++ b/PFERD/ipd.py
@@ -11,7 +11,7 @@ from typing import Callable, List, Optional
 from urllib.parse import urljoin
 
 import bs4
-import requests
+import httpx
 
 from PFERD.errors import FatalException
 from PFERD.utils import soupify
@@ -78,7 +78,7 @@ class IpdCrawler:
         """
         Crawls the playlist given in the constructor.
         """
-        page = soupify(requests.get(self._base_url))
+        page = soupify(httpx.get(self._base_url))
 
         items: List[IpdDownloadInfo] = []
 
@@ -116,7 +116,7 @@ class IpdDownloader:
         self._tmp_dir = tmp_dir
         self._organizer = organizer
         self._strategy = strategy
-        self._session = requests.session()
+        self._client = httpx.Client()
 
     def download_all(self, infos: List[IpdDownloadInfo]) -> None:
         """
@@ -133,7 +133,7 @@ class IpdDownloader:
             self._organizer.mark(info.path)
             return
 
-        with self._session.get(info.url, stream=True) as response:
+        with self._client.stream("GET", info.url) as response:
             if response.status_code == 200:
                 tmp_file = self._tmp_dir.new_path()
                 stream_to_path(response, tmp_file, info.path.name)
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 1bb6f78..3efe8f2 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -88,12 +88,12 @@ class Pferd(Location):
     ) -> Organizer:
         # pylint: disable=too-many-locals
         cookie_jar = CookieJar(to_path(cookies) if cookies else None)
-        session = cookie_jar.create_session()
+        client = cookie_jar.create_client()
         tmp_dir = self._tmp_dir.new_subdir()
         organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
 
-        crawler = IliasCrawler(base_url, session, authenticator, dir_filter)
-        downloader = IliasDownloader(tmp_dir, organizer, session,
+        crawler = IliasCrawler(base_url, client, authenticator, dir_filter)
+        downloader = IliasDownloader(tmp_dir, organizer, client,
                                      authenticator, download_strategy, timeout)
 
         cookie_jar.load_cookies()
@@ -149,11 +149,11 @@ class Pferd(Location):
             password {Optional[str]} -- The SCC password. If none is given, it will prompt
                 the user. (default: {None})
             download_strategy {DownloadStrategy} -- A function to determine which files need to
-                be downloaded. Can save bandwidth and reduce the number of requests.
+                be downloaded. Can save bandwidth and reduce the number of httpx.
                 (default: {download_modified_or_new})
             clean {bool} -- Whether to clean up when the method finishes.
             timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
-                requests bug.
+                httpx bug.
             file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
                 with overwriting or deleting files. The default always asks the user.
         """
@@ -222,8 +222,7 @@ class Pferd(Location):
                 be downloaded. Can save bandwidth and reduce the number of requests.
                 (default: {download_modified_or_new})
             clean {bool} -- Whether to clean up when the method finishes.
-            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
-                requests bug.
+            timeout {int} -- The download timeout for opencast videos. 
             file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
                 with overwriting or deleting files. The default always asks the user.
         """
@@ -284,11 +283,11 @@ class Pferd(Location):
             password {Optional[str]} -- The SCC password. If none is given, it will prompt
                 the user. (default: {None})
             download_strategy {DownloadStrategy} -- A function to determine which files need to
-                be downloaded. Can save bandwidth and reduce the number of requests.
+                be downloaded. Can save bandwidth and reduce the number of httpx.
                 (default: {download_modified_or_new})
             clean {bool} -- Whether to clean up when the method finishes.
             timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
-                requests bug.
+                httpx bug.
             file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
                 with overwriting or deleting files. The default always asks the user.
         """
@@ -338,7 +337,7 @@ class Pferd(Location):
             transform {Transform} -- A transformation function for the output paths. Return None
                 to ignore a file. (default: {lambdax:x})
             download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
-                be downloaded. Can save bandwidth and reduce the number of requests.
+                be downloaded. Can save bandwidth and reduce the number of httpx.
                 (default: {diva_download_new})
             clean {bool} -- Whether to clean up when the method finishes.
             file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
@@ -396,7 +395,7 @@ class Pferd(Location):
             transform {Transform} -- A transformation function for the output paths. Return None
                 to ignore a file. (default: {lambdax:x})
             download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
-                be downloaded. Can save bandwidth and reduce the number of requests.
+                be downloaded. Can save bandwidth and reduce the number of httpx.
                 (default: {diva_download_new})
             clean {bool} -- Whether to clean up when the method finishes.
             file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
diff --git a/PFERD/progress.py b/PFERD/progress.py
index 6ad098f..06cc378 100644
--- a/PFERD/progress.py
+++ b/PFERD/progress.py
@@ -6,7 +6,7 @@ from dataclasses import dataclass
 from types import TracebackType
 from typing import Optional, Type
 
-import requests
+import httpx
 from rich.console import Console
 from rich.progress import (BarColumn, DownloadColumn, Progress, TaskID,
                            TextColumn, TimeRemainingColumn,
@@ -27,12 +27,12 @@ _progress: Progress = Progress(
 )
 
 
-def size_from_headers(response: requests.Response) -> Optional[int]:
+def size_from_headers(response: httpx.Response) -> Optional[int]:
     """
     Return the size of the download based on the response headers.
 
     Arguments:
-        response {requests.Response} -- the response
+        response {httpx.Response} -- the response
 
     Returns:
         Optional[int] -- the size
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 56c101a..9b841d0 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -7,7 +7,7 @@ from pathlib import Path, PurePath
 from typing import Optional, Tuple, Union
 
 import bs4
-import requests
+import httpx
 
 from .progress import ProgressSettings, progress_for, size_from_headers
 
@@ -35,41 +35,38 @@ def to_pattern(regex: Regex) -> re.Pattern:
     return re.compile(regex)
 
 
-def soupify(response: requests.Response) -> bs4.BeautifulSoup:
+def soupify(response: httpx.Response) -> bs4.BeautifulSoup:
     """
-    Wrap a requests response in a bs4 object.
+    Wrap a httpx response in a bs4 object.
     """
 
     return bs4.BeautifulSoup(response.text, "html.parser")
 
 
 def stream_to_path(
-        response: requests.Response,
+        response: httpx.Response,
         target: Path,
         progress_name: Optional[str] = None,
-        chunk_size: int = 1024 ** 2
 ) -> None:
     """
-    Download a requests response content to a file by streaming it. This
-    function avoids excessive memory usage when downloading large files. The
-    chunk_size is in bytes.
+    Download a httpx response content to a file by streaming it. This
+    function avoids excessive memory usage when downloading large files.
 
     If progress_name is None, no progress bar will be shown. Otherwise a progress
     bar will appear, if the download is bigger than an internal threshold.
     """
 
-    with response:
-        length = size_from_headers(response)
-        if progress_name and length and int(length) > 1024 * 1024 * 10:  # 10 MiB
-            settings: Optional[ProgressSettings] = ProgressSettings(progress_name, length)
-        else:
-            settings = None
+    length = size_from_headers(response)
+    if progress_name and length and int(length) > 1024 * 1024 * 10:  # 10 MiB
+        settings: Optional[ProgressSettings] = ProgressSettings(progress_name, length)
+    else:
+        settings = None
 
-        with open(target, 'wb') as file_descriptor:
-            with progress_for(settings) as progress:
-                for chunk in response.iter_content(chunk_size=chunk_size):
-                    file_descriptor.write(chunk)
-                    progress.advance(len(chunk))
+    with open(target, 'wb') as file_descriptor:
+        with progress_for(settings) as progress:
+            for chunk in response.iter_bytes():
+                file_descriptor.write(chunk)
+                progress.advance(len(chunk))
 
 
 def prompt_yes_no(question: str, default: Optional[bool] = None) -> bool:
diff --git a/requirements.txt b/requirements.txt
index 2d852e1..0b805f8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-requests>=2.21.0
+httpx>=0.17.1
 beautifulsoup4>=4.7.1
 rich>=2.1.0
 keyring>=21.5.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index a4dfab3..322f2a9 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ setup(
     version="2.6.1",
     packages=find_packages(),
     install_requires=[
-        "requests>=2.21.0",
+        "httpx>=0.17.1",
         "beautifulsoup4>=4.7.1",
         "rich>=2.1.0",
         "keyring>=21.5.0"
diff --git a/sync_url.py b/sync_url.py
index ca78de0..2ccbc95 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -86,7 +86,7 @@ def main() -> None:
     args = parser.parse_args()
 
     cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
-    session = cookie_jar.create_session()
+    client = cookie_jar.create_client()
 
     if args.keyring:
         if not args.username:
@@ -103,7 +103,7 @@ def main() -> None:
 
     url = urlparse(args.url)
 
-    crawler = IliasCrawler(url.scheme + '://' + url.netloc, session,
+    crawler = IliasCrawler(url.scheme + '://' + url.netloc, client,
                            authenticator, lambda x, y: True)
 
     cookie_jar.load_cookies()