diff --git a/PFERD/conductor.py b/PFERD/conductor.py index 76d0e2a..4648e77 100644 --- a/PFERD/conductor.py +++ b/PFERD/conductor.py @@ -14,7 +14,7 @@ class ProgressBar: def advance(self, amount: float = 1) -> None: self._progress.advance(self._taskid, advance=amount) - def set_total(self, total) -> None: + def set_total(self, total: float) -> None: self._progress.update(self._taskid, total=total) diff --git a/PFERD/config.py b/PFERD/config.py index f2abe8d..f63922b 100644 --- a/PFERD/config.py +++ b/PFERD/config.py @@ -23,6 +23,10 @@ class ConfigFormatException(Exception): class Section: + """ + Base class for the crawler and auth section classes. + """ + def __init__(self, section: SectionProxy): self.s = section diff --git a/PFERD/crawler.py b/PFERD/crawler.py index b8e9d7c..4bcfe65 100644 --- a/PFERD/crawler.py +++ b/PFERD/crawler.py @@ -4,7 +4,7 @@ from datetime import datetime from pathlib import Path, PurePath # TODO In Python 3.9 and above, AsyncContextManager is deprecated from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable, - Callable, Optional, Protocol, TypeVar) + Callable, Optional, TypeVar) from rich.markup import escape @@ -141,8 +141,9 @@ class Crawler(ABC): def __init__( self, name: str, - config: Config, section: CrawlerSection, + config: Config, + conductor: TerminalConductor, ) -> None: """ Initialize a crawler from its name and its section in the config file. @@ -154,9 +155,9 @@ class Crawler(ABC): """ self.name = name - - self._conductor = TerminalConductor() + self._conductor = conductor self._limiter = Limiter() + self.error_free = True try: self._transformer = Transformer(section.transform()) @@ -171,8 +172,6 @@ class Crawler(ABC): self._conductor, ) - self.error_free = False - def print(self, text: str) -> None: """ Print rich markup to the terminal. Crawlers *must* use this function to diff --git a/PFERD/crawlers/__init__.py b/PFERD/crawlers/__init__.py index bf88a2a..aa049b9 100644 --- a/PFERD/crawlers/__init__.py +++ b/PFERD/crawlers/__init__.py @@ -1,10 +1,19 @@ from configparser import SectionProxy from typing import Callable, Dict +from ..conductor import TerminalConductor from ..config import Config -from ..crawler import Crawler, CrawlerSection +from ..crawler import Crawler from .local import LocalCrawler, LocalCrawlerSection -CRAWLERS: Dict[str, Callable[[str, Config, SectionProxy], Crawler]] = { - "local": lambda n, c, s: LocalCrawler(n, c, LocalCrawlerSection(s)), +CrawlerConstructor = Callable[[ + str, # Name (without the "crawl:" prefix) + SectionProxy, # Crawler's section of global config + Config, # Global config + TerminalConductor, # Global conductor instance +], Crawler] + +CRAWLERS: Dict[str, CrawlerConstructor] = { + "local": lambda n, s, c, t: + LocalCrawler(n, LocalCrawlerSection(s), c, t), } diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py index 40cc233..8501877 100644 --- a/PFERD/crawlers/local.py +++ b/PFERD/crawlers/local.py @@ -1,6 +1,7 @@ import asyncio from pathlib import Path, PurePath +from ..conductor import TerminalConductor from ..config import Config from ..crawler import Crawler, CrawlerSection, anoncritical @@ -17,10 +18,11 @@ class LocalCrawler(Crawler): def __init__( self, name: str, - config: Config, section: LocalCrawlerSection, + config: Config, + conductor: TerminalConductor, ): - super().__init__(name, config, section) + super().__init__(name, section, config, conductor) self._path = config.working_dir / section.path() diff --git a/PFERD/pferd.py b/PFERD/pferd.py index 7cdbfa0..c7cd695 100644 --- a/PFERD/pferd.py +++ b/PFERD/pferd.py @@ -3,6 +3,7 @@ from typing import Dict from rich import print from rich.markup import escape +from .conductor import TerminalConductor from .config import Config from .crawler import Crawler from .crawlers import CRAWLERS @@ -15,6 +16,7 @@ class PferdLoadException(Exception): class Pferd: def __init__(self, config: Config): self._config = config + self._conductor = TerminalConductor() self._crawlers: Dict[str, Crawler] = {} def _load_crawlers(self) -> None: @@ -29,7 +31,12 @@ class Pferd: print(f"[red]Error: Unknown crawler type {t}") continue - crawler = crawler_constructor(name, self._config, section) + crawler = crawler_constructor( + name, + section, + self._config, + self._conductor, + ) self._crawlers[name] = crawler if abort: