Clean up crawler exceptions and (a)noncritical

This commit is contained in:
Joscha 2021-05-19 13:25:57 +02:00
parent 3851065500
commit b7a999bc2e

View File

@ -13,11 +13,15 @@ from .config import Config, Section
from .limiter import Limiter from .limiter import Limiter
from .logging import ProgressBar, log from .logging import ProgressBar, log
from .output_dir import FileSink, OnConflict, OutputDirectory, Redownload from .output_dir import FileSink, OnConflict, OutputDirectory, Redownload
from .transformer import RuleParseException, Transformer from .transformer import Transformer
from .version import __version__ from .version import __version__
class CrawlerLoadException(Exception): class CrawlWarning(Exception):
pass
class CrawlError(Exception):
pass pass
@ -26,41 +30,29 @@ Wrapped = TypeVar("Wrapped", bound=Callable[..., None])
def noncritical(f: Wrapped) -> Wrapped: def noncritical(f: Wrapped) -> Wrapped:
""" """
Warning: Must only be applied to member functions of the Crawler class!
Catches all exceptions occuring during the function call. If an exception Catches all exceptions occuring during the function call. If an exception
occurs, the crawler's error_free variable is set to False. occurs, the crawler's error_free variable is set to False.
"""
def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
try:
f(self, *args, **kwargs)
except Exception as e:
log.print(f"[red]Something went wrong: {escape(str(e))}")
self.error_free = False
return wrapper # type: ignore
def repeat(attempts: int) -> Callable[[Wrapped], Wrapped]:
"""
Warning: Must only be applied to member functions of the Crawler class! Warning: Must only be applied to member functions of the Crawler class!
If an exception occurs during the function call, retries the function call
a set amount of times. Exceptions that occur during the last attempt are
not caught and instead passed on upwards.
""" """
def decorator(f: Wrapped) -> Wrapped: def wrapper(*args: Any, **kwargs: Any) -> None:
def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None: if not (args and isinstance(args[0], Crawler)):
for _ in range(attempts - 1): raise RuntimeError("@noncritical must only applied to Crawler methods")
try:
f(self, *args, **kwargs) crawler = args[0]
return
except Exception: try:
pass f(*args, **kwargs)
f(self, *args, **kwargs) except CrawlWarning as e:
return wrapper # type: ignore log.print(f"[bold bright_red]Warning[/] {escape(str(e))}")
return decorator crawler.error_free = False
except CrawlError as e:
log.print(f"[bold bright_red]Error[/] [red]{escape(str(e))}")
crawler.error_free = False
raise
return wrapper # type: ignore
AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]]) AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
@ -69,42 +61,30 @@ AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
def anoncritical(f: AWrapped) -> AWrapped: def anoncritical(f: AWrapped) -> AWrapped:
""" """
An async version of @noncritical. An async version of @noncritical.
Warning: Must only be applied to member functions of the Crawler class!
Catches all exceptions occuring during the function call. If an exception Catches all exceptions occuring during the function call. If an exception
occurs, the crawler's error_free variable is set to False. occurs, the crawler's error_free variable is set to False.
"""
async def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
try:
await f(self, *args, **kwargs)
except Exception as e:
log.print(f"[red]Something went wrong: {escape(str(e))}")
self.error_free = False
return wrapper # type: ignore
def arepeat(attempts: int) -> Callable[[AWrapped], AWrapped]:
"""
An async version of @noncritical.
Warning: Must only be applied to member functions of the Crawler class! Warning: Must only be applied to member functions of the Crawler class!
If an exception occurs during the function call, retries the function call
a set amount of times. Exceptions that occur during the last attempt are
not caught and instead passed on upwards.
""" """
def decorator(f: AWrapped) -> AWrapped: async def wrapper(*args: Any, **kwargs: Any) -> None:
async def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None: if not (args and isinstance(args[0], Crawler)):
for _ in range(attempts - 1): raise RuntimeError("@anoncritical must only applied to Crawler methods")
try:
await f(self, *args, **kwargs) crawler = args[0]
return
except Exception: try:
pass await f(*args, **kwargs)
await f(self, *args, **kwargs) except CrawlWarning as e:
return wrapper # type: ignore log.print(f"[bold bright_red]Warning[/] {escape(str(e))}")
return decorator crawler.error_free = False
except CrawlError as e:
log.print(f"[bold bright_red]Error[/] [red]{escape(str(e))}")
crawler.error_free = False
raise
return wrapper # type: ignore
class CrawlerSection(Section): class CrawlerSection(Section):
@ -201,11 +181,7 @@ class Crawler(ABC):
task_delay=section.delay_between_tasks(), task_delay=section.delay_between_tasks(),
) )
try: self._transformer = Transformer(section.transform())
self._transformer = Transformer(section.transform())
except RuleParseException as e:
e.pretty_print()
raise CrawlerLoadException()
self._output_dir = OutputDirectory( self._output_dir = OutputDirectory(
config.working_dir / section.output_dir(name), config.working_dir / section.output_dir(name),
@ -312,3 +288,33 @@ class HttpCrawler(Crawler):
cookie_jar.save(self._cookie_jar_path) cookie_jar.save(self._cookie_jar_path)
except Exception: except Exception:
log.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}") log.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}")
def repeat(attempts: int) -> Callable[[Wrapped], Wrapped]:
"""Deprecated."""
def decorator(f: Wrapped) -> Wrapped:
def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
for _ in range(attempts - 1):
try:
f(self, *args, **kwargs)
return
except Exception:
pass
f(self, *args, **kwargs)
return wrapper # type: ignore
return decorator
def arepeat(attempts: int) -> Callable[[AWrapped], AWrapped]:
"""Deprecated."""
def decorator(f: AWrapped) -> AWrapped:
async def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
for _ in range(attempts - 1):
try:
await f(self, *args, **kwargs)
return
except Exception:
pass
await f(self, *args, **kwargs)
return wrapper # type: ignore
return decorator