Add timeout to video downloads to work around requests IPv6 bug

This commit is contained in:
I-Al-Istannen 2020-08-11 14:40:13 +02:00
parent e367da925e
commit a57ee8b96b
2 changed files with 18 additions and 3 deletions

View File

@ -84,9 +84,13 @@ class IliasDownloader:
session: requests.Session, session: requests.Session,
authenticator: IliasAuthenticator, authenticator: IliasAuthenticator,
strategy: IliasDownloadStrategy, strategy: IliasDownloadStrategy,
timeout: int = 5
): ):
""" """
Create a new IliasDownloader. Create a new IliasDownloader.
The timeout applies to the download request only, as bwcloud uses IPv6
and requests has a problem with that: https://github.com/psf/requests/issues/5522
""" """
self._tmp_dir = tmp_dir self._tmp_dir = tmp_dir
@ -94,6 +98,7 @@ class IliasDownloader:
self._session = session self._session = session
self._authenticator = authenticator self._authenticator = authenticator
self._strategy = strategy self._strategy = strategy
self._timeout = timeout
def download_all(self, infos: List[IliasDownloadInfo]) -> None: def download_all(self, infos: List[IliasDownloadInfo]) -> None:
""" """
@ -137,7 +142,7 @@ class IliasDownloader:
PRETTY.warning(f"Could not download {str(info.path)!r} as I got no URL :/") PRETTY.warning(f"Could not download {str(info.path)!r} as I got no URL :/")
return True return True
with self._session.get(url, stream=True) as response: with self._session.get(url, stream=True, timeout=self._timeout) as response:
content_type = response.headers["content-type"] content_type = response.headers["content-type"]
has_content_disposition = "content-disposition" in response.headers has_content_disposition = "content-disposition" in response.headers

View File

@ -72,7 +72,8 @@ class Pferd(Location):
dir_filter: IliasDirectoryFilter, dir_filter: IliasDirectoryFilter,
transform: Transform, transform: Transform,
download_strategy: IliasDownloadStrategy, download_strategy: IliasDownloadStrategy,
clean: bool = True timeout: int,
clean: bool = True,
) -> Organizer: ) -> Organizer:
# pylint: disable=too-many-locals # pylint: disable=too-many-locals
cookie_jar = CookieJar(to_path(cookies) if cookies else None) cookie_jar = CookieJar(to_path(cookies) if cookies else None)
@ -81,7 +82,8 @@ class Pferd(Location):
organizer = Organizer(self.resolve(to_path(target))) organizer = Organizer(self.resolve(to_path(target)))
crawler = IliasCrawler(base_url, session, authenticator, dir_filter) crawler = IliasCrawler(base_url, session, authenticator, dir_filter)
downloader = IliasDownloader(tmp_dir, organizer, session, authenticator, download_strategy) downloader = IliasDownloader(tmp_dir, organizer, session,
authenticator, download_strategy, timeout)
cookie_jar.load_cookies() cookie_jar.load_cookies()
info = crawl_function(crawler) info = crawl_function(crawler)
@ -112,6 +114,7 @@ class Pferd(Location):
password: Optional[str] = None, password: Optional[str] = None,
download_strategy: IliasDownloadStrategy = download_modified_or_new, download_strategy: IliasDownloadStrategy = download_modified_or_new,
clean: bool = True, clean: bool = True,
timeout: int = 5,
) -> Organizer: ) -> Organizer:
""" """
Synchronizes a folder with the ILIAS instance of the KIT. Synchronizes a folder with the ILIAS instance of the KIT.
@ -137,6 +140,8 @@ class Pferd(Location):
be downloaded. Can save bandwidth and reduce the number of requests. be downloaded. Can save bandwidth and reduce the number of requests.
(default: {download_modified_or_new}) (default: {download_modified_or_new})
clean {bool} -- Whether to clean up when the method finishes. clean {bool} -- Whether to clean up when the method finishes.
timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
requests bug.
""" """
# This authenticator only works with the KIT ilias instance. # This authenticator only works with the KIT ilias instance.
authenticator = KitShibbolethAuthenticator(username=username, password=password) authenticator = KitShibbolethAuthenticator(username=username, password=password)
@ -152,6 +157,7 @@ class Pferd(Location):
transform=transform, transform=transform,
download_strategy=download_strategy, download_strategy=download_strategy,
clean=clean, clean=clean,
timeout=timeout
) )
self._download_summary.merge(organizer.download_summary) self._download_summary.merge(organizer.download_summary)
@ -175,6 +181,7 @@ class Pferd(Location):
password: Optional[str] = None, password: Optional[str] = None,
download_strategy: IliasDownloadStrategy = download_modified_or_new, download_strategy: IliasDownloadStrategy = download_modified_or_new,
clean: bool = True, clean: bool = True,
timeout: int = 5,
) -> Organizer: ) -> Organizer:
""" """
Synchronizes a folder with the ILIAS instance of the KIT. This method will crawl the ILIAS Synchronizes a folder with the ILIAS instance of the KIT. This method will crawl the ILIAS
@ -199,6 +206,8 @@ class Pferd(Location):
be downloaded. Can save bandwidth and reduce the number of requests. be downloaded. Can save bandwidth and reduce the number of requests.
(default: {download_modified_or_new}) (default: {download_modified_or_new})
clean {bool} -- Whether to clean up when the method finishes. clean {bool} -- Whether to clean up when the method finishes.
timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
requests bug.
""" """
# This authenticator only works with the KIT ilias instance. # This authenticator only works with the KIT ilias instance.
authenticator = KitShibbolethAuthenticator(username=username, password=password) authenticator = KitShibbolethAuthenticator(username=username, password=password)
@ -214,6 +223,7 @@ class Pferd(Location):
transform=transform, transform=transform,
download_strategy=download_strategy, download_strategy=download_strategy,
clean=clean, clean=clean,
timeout=timeout
) )
self._download_summary.merge(organizer.download_summary) self._download_summary.merge(organizer.download_summary)