mirror of
https://github.com/Garmelon/PFERD.git
synced 2025-07-12 22:22:30 +02:00
fix circular import for _io_repeat
This commit is contained in:
39
PFERD/crawl/ilias/async_helper.py
Normal file
39
PFERD/crawl/ilias/async_helper.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import asyncio
|
||||||
|
from typing import Any, Callable, Optional
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
from ...logging import log
|
||||||
|
from ..crawler import AWrapped, CrawlError, CrawlWarning
|
||||||
|
|
||||||
|
|
||||||
|
def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
|
||||||
|
def decorator(f: AWrapped) -> AWrapped:
|
||||||
|
async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
|
||||||
|
last_exception: Optional[BaseException] = None
|
||||||
|
for round in range(attempts):
|
||||||
|
try:
|
||||||
|
return await f(*args, **kwargs)
|
||||||
|
except aiohttp.ContentTypeError: # invalid content type
|
||||||
|
raise CrawlWarning("ILIAS returned an invalid content type")
|
||||||
|
except aiohttp.TooManyRedirects:
|
||||||
|
raise CrawlWarning("Got stuck in a redirect loop")
|
||||||
|
except aiohttp.ClientPayloadError as e: # encoding or not enough bytes
|
||||||
|
last_exception = e
|
||||||
|
except aiohttp.ClientConnectionError as e: # e.g. timeout, disconnect, resolve failed, etc.
|
||||||
|
last_exception = e
|
||||||
|
except asyncio.exceptions.TimeoutError as e: # explicit http timeouts in HttpCrawler
|
||||||
|
last_exception = e
|
||||||
|
log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
|
||||||
|
|
||||||
|
if last_exception:
|
||||||
|
message = f"Error in I/O Operation: {last_exception}"
|
||||||
|
if failure_is_error:
|
||||||
|
raise CrawlError(message) from last_exception
|
||||||
|
else:
|
||||||
|
raise CrawlWarning(message) from last_exception
|
||||||
|
raise CrawlError("Impossible return in ilias _iorepeat")
|
||||||
|
|
||||||
|
return wrapper # type: ignore
|
||||||
|
|
||||||
|
return decorator
|
@ -15,9 +15,10 @@ from ...auth import Authenticator
|
|||||||
from ...config import Config
|
from ...config import Config
|
||||||
from ...logging import ProgressBar, log
|
from ...logging import ProgressBar, log
|
||||||
from ...output_dir import FileSink, Redownload
|
from ...output_dir import FileSink, Redownload
|
||||||
from ...utils import _iorepeat, fmt_path, soupify, url_set_query_param
|
from ...utils import fmt_path, soupify, url_set_query_param
|
||||||
from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
|
from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
|
||||||
from ..http_crawler import HttpCrawler, HttpCrawlerSection
|
from ..http_crawler import HttpCrawler, HttpCrawlerSection
|
||||||
|
from .async_helper import _iorepeat
|
||||||
from .file_templates import Links, learning_module_template
|
from .file_templates import Links, learning_module_template
|
||||||
from .ilias_html_cleaner import clean, insert_base_markup
|
from .ilias_html_cleaner import clean, insert_base_markup
|
||||||
from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
|
from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
|
||||||
|
@ -7,8 +7,9 @@ from bs4 import BeautifulSoup
|
|||||||
from ...auth import Authenticator, TfaAuthenticator
|
from ...auth import Authenticator, TfaAuthenticator
|
||||||
from ...config import Config
|
from ...config import Config
|
||||||
from ...logging import log
|
from ...logging import log
|
||||||
from ...utils import _iorepeat, soupify
|
from ...utils import soupify
|
||||||
from ..crawler import CrawlError, CrawlWarning
|
from ..crawler import CrawlError, CrawlWarning
|
||||||
|
from .async_helper import _iorepeat
|
||||||
from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection
|
from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection
|
||||||
|
|
||||||
TargetType = Union[str, int]
|
TargetType = Union[str, int]
|
||||||
|
@ -9,47 +9,11 @@ from types import TracebackType
|
|||||||
from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
|
from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
|
||||||
from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
|
from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
|
||||||
|
|
||||||
import aiohttp
|
|
||||||
import bs4
|
import bs4
|
||||||
|
|
||||||
from .crawl.crawler import AWrapped, CrawlError, CrawlWarning
|
|
||||||
from .logging import log
|
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
|
|
||||||
def decorator(f: AWrapped) -> AWrapped:
|
|
||||||
async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
|
|
||||||
last_exception: Optional[BaseException] = None
|
|
||||||
for round in range(attempts):
|
|
||||||
try:
|
|
||||||
return await f(*args, **kwargs)
|
|
||||||
except aiohttp.ContentTypeError: # invalid content type
|
|
||||||
raise CrawlWarning("ILIAS returned an invalid content type")
|
|
||||||
except aiohttp.TooManyRedirects:
|
|
||||||
raise CrawlWarning("Got stuck in a redirect loop")
|
|
||||||
except aiohttp.ClientPayloadError as e: # encoding or not enough bytes
|
|
||||||
last_exception = e
|
|
||||||
except aiohttp.ClientConnectionError as e: # e.g. timeout, disconnect, resolve failed, etc.
|
|
||||||
last_exception = e
|
|
||||||
except asyncio.exceptions.TimeoutError as e: # explicit http timeouts in HttpCrawler
|
|
||||||
last_exception = e
|
|
||||||
log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
|
|
||||||
|
|
||||||
if last_exception:
|
|
||||||
message = f"Error in I/O Operation: {last_exception}"
|
|
||||||
if failure_is_error:
|
|
||||||
raise CrawlError(message) from last_exception
|
|
||||||
else:
|
|
||||||
raise CrawlWarning(message) from last_exception
|
|
||||||
raise CrawlError("Impossible return in ilias _iorepeat")
|
|
||||||
|
|
||||||
return wrapper # type: ignore
|
|
||||||
|
|
||||||
return decorator
|
|
||||||
|
|
||||||
|
|
||||||
async def in_daemon_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
|
async def in_daemon_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
future: asyncio.Future[T] = asyncio.Future()
|
future: asyncio.Future[T] = asyncio.Future()
|
||||||
|
Reference in New Issue
Block a user