From 3a05b905251f0430ca8d34a353ffe9983304bbfc Mon Sep 17 00:00:00 2001 From: Philipp Fruck Date: Mon, 8 Apr 2024 11:49:28 +0200 Subject: [PATCH] fix circular import for _io_repeat --- PFERD/crawl/ilias/async_helper.py | 39 ++++++++++++++++++++++ PFERD/crawl/ilias/ilias_web_crawler.py | 3 +- PFERD/crawl/ilias/kit_ilias_web_crawler.py | 3 +- PFERD/utils.py | 36 -------------------- 4 files changed, 43 insertions(+), 38 deletions(-) create mode 100644 PFERD/crawl/ilias/async_helper.py diff --git a/PFERD/crawl/ilias/async_helper.py b/PFERD/crawl/ilias/async_helper.py new file mode 100644 index 0000000..527a819 --- /dev/null +++ b/PFERD/crawl/ilias/async_helper.py @@ -0,0 +1,39 @@ +import asyncio +from typing import Any, Callable, Optional + +import aiohttp + +from ...logging import log +from ..crawler import AWrapped, CrawlError, CrawlWarning + + +def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]: + def decorator(f: AWrapped) -> AWrapped: + async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]: + last_exception: Optional[BaseException] = None + for round in range(attempts): + try: + return await f(*args, **kwargs) + except aiohttp.ContentTypeError: # invalid content type + raise CrawlWarning("ILIAS returned an invalid content type") + except aiohttp.TooManyRedirects: + raise CrawlWarning("Got stuck in a redirect loop") + except aiohttp.ClientPayloadError as e: # encoding or not enough bytes + last_exception = e + except aiohttp.ClientConnectionError as e: # e.g. timeout, disconnect, resolve failed, etc. + last_exception = e + except asyncio.exceptions.TimeoutError as e: # explicit http timeouts in HttpCrawler + last_exception = e + log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}") + + if last_exception: + message = f"Error in I/O Operation: {last_exception}" + if failure_is_error: + raise CrawlError(message) from last_exception + else: + raise CrawlWarning(message) from last_exception + raise CrawlError("Impossible return in ilias _iorepeat") + + return wrapper # type: ignore + + return decorator diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py index 166034f..1048c30 100644 --- a/PFERD/crawl/ilias/ilias_web_crawler.py +++ b/PFERD/crawl/ilias/ilias_web_crawler.py @@ -15,9 +15,10 @@ from ...auth import Authenticator from ...config import Config from ...logging import ProgressBar, log from ...output_dir import FileSink, Redownload -from ...utils import _iorepeat, fmt_path, soupify, url_set_query_param +from ...utils import fmt_path, soupify, url_set_query_param from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical from ..http_crawler import HttpCrawler, HttpCrawlerSection +from .async_helper import _iorepeat from .file_templates import Links, learning_module_template from .ilias_html_cleaner import clean, insert_base_markup from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage, diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py index e9d1475..3cd0334 100644 --- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py +++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py @@ -7,8 +7,9 @@ from bs4 import BeautifulSoup from ...auth import Authenticator, TfaAuthenticator from ...config import Config from ...logging import log -from ...utils import _iorepeat, soupify +from ...utils import soupify from ..crawler import CrawlError, CrawlWarning +from .async_helper import _iorepeat from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection TargetType = Union[str, int] diff --git a/PFERD/utils.py b/PFERD/utils.py index 9f5d4d5..7c7b6f4 100644 --- a/PFERD/utils.py +++ b/PFERD/utils.py @@ -9,47 +9,11 @@ from types import TracebackType from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit -import aiohttp import bs4 -from .crawl.crawler import AWrapped, CrawlError, CrawlWarning -from .logging import log - T = TypeVar("T") -def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]: - def decorator(f: AWrapped) -> AWrapped: - async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]: - last_exception: Optional[BaseException] = None - for round in range(attempts): - try: - return await f(*args, **kwargs) - except aiohttp.ContentTypeError: # invalid content type - raise CrawlWarning("ILIAS returned an invalid content type") - except aiohttp.TooManyRedirects: - raise CrawlWarning("Got stuck in a redirect loop") - except aiohttp.ClientPayloadError as e: # encoding or not enough bytes - last_exception = e - except aiohttp.ClientConnectionError as e: # e.g. timeout, disconnect, resolve failed, etc. - last_exception = e - except asyncio.exceptions.TimeoutError as e: # explicit http timeouts in HttpCrawler - last_exception = e - log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}") - - if last_exception: - message = f"Error in I/O Operation: {last_exception}" - if failure_is_error: - raise CrawlError(message) from last_exception - else: - raise CrawlWarning(message) from last_exception - raise CrawlError("Impossible return in ilias _iorepeat") - - return wrapper # type: ignore - - return decorator - - async def in_daemon_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T: loop = asyncio.get_running_loop() future: asyncio.Future[T] = asyncio.Future()