From 0acdee15a0987bef6f8de8105404bedf414bee72 Mon Sep 17 00:00:00 2001 From: Joscha Date: Thu, 13 May 2021 18:57:20 +0200 Subject: [PATCH] Let crawlers obtain authenticators --- PFERD/crawler.py | 12 +++++++++++- PFERD/crawlers/__init__.py | 12 +++++++----- PFERD/pferd.py | 1 + 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/PFERD/crawler.py b/PFERD/crawler.py index 4bcfe65..5148d9d 100644 --- a/PFERD/crawler.py +++ b/PFERD/crawler.py @@ -4,10 +4,11 @@ from datetime import datetime from pathlib import Path, PurePath # TODO In Python 3.9 and above, AsyncContextManager is deprecated from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable, - Callable, Optional, TypeVar) + Callable, Dict, Optional, TypeVar) from rich.markup import escape +from .authenticator import Authenticator from .conductor import ProgressBar, TerminalConductor from .config import Config, Section from .limiter import Limiter @@ -136,6 +137,15 @@ class CrawlerSection(Section): def transform(self) -> str: return self.s.get("transform", "") + def auth(self, authenticators: Dict[str, Authenticator]) -> Authenticator: + value = self.s.get("auth") + if value is None: + self.missing_value("auth") + auth = authenticators.get(f"auth:{value}") + if auth is None: + self.invalid_value("auth", value) + return auth + class Crawler(ABC): def __init__( diff --git a/PFERD/crawlers/__init__.py b/PFERD/crawlers/__init__.py index aa049b9..b2e5af5 100644 --- a/PFERD/crawlers/__init__.py +++ b/PFERD/crawlers/__init__.py @@ -1,19 +1,21 @@ from configparser import SectionProxy from typing import Callable, Dict +from ..authenticator import Authenticator from ..conductor import TerminalConductor from ..config import Config from ..crawler import Crawler from .local import LocalCrawler, LocalCrawlerSection CrawlerConstructor = Callable[[ - str, # Name (without the "crawl:" prefix) - SectionProxy, # Crawler's section of global config - Config, # Global config - TerminalConductor, # Global conductor instance + str, # Name (without the "crawl:" prefix) + SectionProxy, # Crawler's section of global config + Config, # Global config + TerminalConductor, # Global conductor instance + Dict[str, Authenticator], # Loaded authenticators by name ], Crawler] CRAWLERS: Dict[str, CrawlerConstructor] = { - "local": lambda n, s, c, t: + "local": lambda n, s, c, t, a: LocalCrawler(n, LocalCrawlerSection(s), c, t), } diff --git a/PFERD/pferd.py b/PFERD/pferd.py index fb411fb..4500ba9 100644 --- a/PFERD/pferd.py +++ b/PFERD/pferd.py @@ -62,6 +62,7 @@ class Pferd: section, self._config, self._conductor, + self._authenticators, ) self._crawlers[name] = crawler