Let crawlers obtain authenticators

This commit is contained in:
Joscha 2021-05-13 18:57:20 +02:00
parent c3ce6bb31c
commit 0acdee15a0
3 changed files with 19 additions and 6 deletions

View File

@ -4,10 +4,11 @@ from datetime import datetime
from pathlib import Path, PurePath
# TODO In Python 3.9 and above, AsyncContextManager is deprecated
from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
Callable, Optional, TypeVar)
Callable, Dict, Optional, TypeVar)
from rich.markup import escape
from .authenticator import Authenticator
from .conductor import ProgressBar, TerminalConductor
from .config import Config, Section
from .limiter import Limiter
@ -136,6 +137,15 @@ class CrawlerSection(Section):
def transform(self) -> str:
return self.s.get("transform", "")
def auth(self, authenticators: Dict[str, Authenticator]) -> Authenticator:
value = self.s.get("auth")
if value is None:
self.missing_value("auth")
auth = authenticators.get(f"auth:{value}")
if auth is None:
self.invalid_value("auth", value)
return auth
class Crawler(ABC):
def __init__(

View File

@ -1,19 +1,21 @@
from configparser import SectionProxy
from typing import Callable, Dict
from ..authenticator import Authenticator
from ..conductor import TerminalConductor
from ..config import Config
from ..crawler import Crawler
from .local import LocalCrawler, LocalCrawlerSection
CrawlerConstructor = Callable[[
str, # Name (without the "crawl:" prefix)
SectionProxy, # Crawler's section of global config
Config, # Global config
TerminalConductor, # Global conductor instance
str, # Name (without the "crawl:" prefix)
SectionProxy, # Crawler's section of global config
Config, # Global config
TerminalConductor, # Global conductor instance
Dict[str, Authenticator], # Loaded authenticators by name
], Crawler]
CRAWLERS: Dict[str, CrawlerConstructor] = {
"local": lambda n, s, c, t:
"local": lambda n, s, c, t, a:
LocalCrawler(n, LocalCrawlerSection(s), c, t),
}

View File

@ -62,6 +62,7 @@ class Pferd:
section,
self._config,
self._conductor,
self._authenticators,
)
self._crawlers[name] = crawler