Use global conductor instance

The switch from crawler-local conductors to a single pferd-global conductor was
made to prepare for auth section credential providers.
This commit is contained in:
Joscha 2021-05-10 23:50:16 +02:00
parent 595ba8b7ab
commit d5f29f01c5
6 changed files with 34 additions and 13 deletions

View File

@ -14,7 +14,7 @@ class ProgressBar:
def advance(self, amount: float = 1) -> None:
self._progress.advance(self._taskid, advance=amount)
def set_total(self, total) -> None:
def set_total(self, total: float) -> None:
self._progress.update(self._taskid, total=total)

View File

@ -23,6 +23,10 @@ class ConfigFormatException(Exception):
class Section:
"""
Base class for the crawler and auth section classes.
"""
def __init__(self, section: SectionProxy):
self.s = section

View File

@ -4,7 +4,7 @@ from datetime import datetime
from pathlib import Path, PurePath
# TODO In Python 3.9 and above, AsyncContextManager is deprecated
from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
Callable, Optional, Protocol, TypeVar)
Callable, Optional, TypeVar)
from rich.markup import escape
@ -141,8 +141,9 @@ class Crawler(ABC):
def __init__(
self,
name: str,
config: Config,
section: CrawlerSection,
config: Config,
conductor: TerminalConductor,
) -> None:
"""
Initialize a crawler from its name and its section in the config file.
@ -154,9 +155,9 @@ class Crawler(ABC):
"""
self.name = name
self._conductor = TerminalConductor()
self._conductor = conductor
self._limiter = Limiter()
self.error_free = True
try:
self._transformer = Transformer(section.transform())
@ -171,8 +172,6 @@ class Crawler(ABC):
self._conductor,
)
self.error_free = False
def print(self, text: str) -> None:
"""
Print rich markup to the terminal. Crawlers *must* use this function to

View File

@ -1,10 +1,19 @@
from configparser import SectionProxy
from typing import Callable, Dict
from ..conductor import TerminalConductor
from ..config import Config
from ..crawler import Crawler, CrawlerSection
from ..crawler import Crawler
from .local import LocalCrawler, LocalCrawlerSection
CRAWLERS: Dict[str, Callable[[str, Config, SectionProxy], Crawler]] = {
"local": lambda n, c, s: LocalCrawler(n, c, LocalCrawlerSection(s)),
CrawlerConstructor = Callable[[
str, # Name (without the "crawl:" prefix)
SectionProxy, # Crawler's section of global config
Config, # Global config
TerminalConductor, # Global conductor instance
], Crawler]
CRAWLERS: Dict[str, CrawlerConstructor] = {
"local": lambda n, s, c, t:
LocalCrawler(n, LocalCrawlerSection(s), c, t),
}

View File

@ -1,6 +1,7 @@
import asyncio
from pathlib import Path, PurePath
from ..conductor import TerminalConductor
from ..config import Config
from ..crawler import Crawler, CrawlerSection, anoncritical
@ -17,10 +18,11 @@ class LocalCrawler(Crawler):
def __init__(
self,
name: str,
config: Config,
section: LocalCrawlerSection,
config: Config,
conductor: TerminalConductor,
):
super().__init__(name, config, section)
super().__init__(name, section, config, conductor)
self._path = config.working_dir / section.path()

View File

@ -3,6 +3,7 @@ from typing import Dict
from rich import print
from rich.markup import escape
from .conductor import TerminalConductor
from .config import Config
from .crawler import Crawler
from .crawlers import CRAWLERS
@ -15,6 +16,7 @@ class PferdLoadException(Exception):
class Pferd:
def __init__(self, config: Config):
self._config = config
self._conductor = TerminalConductor()
self._crawlers: Dict[str, Crawler] = {}
def _load_crawlers(self) -> None:
@ -29,7 +31,12 @@ class Pferd:
print(f"[red]Error: Unknown crawler type {t}")
continue
crawler = crawler_constructor(name, self._config, section)
crawler = crawler_constructor(
name,
section,
self._config,
self._conductor,
)
self._crawlers[name] = crawler
if abort: