2021-04-30 16:22:14 +02:00
|
|
|
from typing import Dict
|
|
|
|
|
|
|
|
from rich.markup import escape
|
|
|
|
|
2021-05-11 00:27:43 +02:00
|
|
|
from .authenticator import Authenticator
|
|
|
|
from .authenticators import AUTHENTICATORS
|
2021-05-22 21:05:32 +02:00
|
|
|
from .config import Config, ConfigOptionError
|
|
|
|
from .crawler import Crawler, CrawlError
|
2021-04-29 13:45:04 +02:00
|
|
|
from .crawlers import CRAWLERS
|
2021-05-22 21:05:32 +02:00
|
|
|
from .logging import log
|
2021-04-30 16:22:14 +02:00
|
|
|
|
|
|
|
|
2021-04-29 13:45:04 +02:00
|
|
|
class Pferd:
|
|
|
|
def __init__(self, config: Config):
|
2021-05-22 21:05:32 +02:00
|
|
|
"""
|
|
|
|
May throw ConfigOptionError.
|
|
|
|
"""
|
|
|
|
|
2021-04-29 13:45:04 +02:00
|
|
|
self._config = config
|
2021-05-11 00:27:43 +02:00
|
|
|
self._authenticators: Dict[str, Authenticator] = {}
|
2021-04-30 16:22:14 +02:00
|
|
|
self._crawlers: Dict[str, Crawler] = {}
|
|
|
|
|
2021-05-11 00:27:43 +02:00
|
|
|
def _load_authenticators(self) -> None:
|
|
|
|
for name, section in self._config.authenticator_sections():
|
2021-05-22 21:05:32 +02:00
|
|
|
log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
|
|
|
|
auth_type = section.get("type")
|
|
|
|
authenticator_constructor = AUTHENTICATORS.get(auth_type)
|
2021-05-11 00:27:43 +02:00
|
|
|
if authenticator_constructor is None:
|
2021-05-22 21:05:32 +02:00
|
|
|
raise ConfigOptionError(name, "type", f"Unknown authenticator type: {auth_type!r}")
|
2021-05-11 00:27:43 +02:00
|
|
|
|
2021-05-18 22:43:46 +02:00
|
|
|
authenticator = authenticator_constructor(name, section, self._config)
|
2021-05-11 00:27:43 +02:00
|
|
|
self._authenticators[name] = authenticator
|
|
|
|
|
2021-04-30 16:22:14 +02:00
|
|
|
def _load_crawlers(self) -> None:
|
|
|
|
for name, section in self._config.crawler_sections():
|
2021-05-22 21:05:32 +02:00
|
|
|
log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
|
|
|
|
crawl_type = section.get("type")
|
|
|
|
crawler_constructor = CRAWLERS.get(crawl_type)
|
2021-04-30 16:22:14 +02:00
|
|
|
if crawler_constructor is None:
|
2021-05-22 21:05:32 +02:00
|
|
|
raise ConfigOptionError(name, "type", f"Unknown crawler type: {crawl_type!r}")
|
2021-04-30 16:22:14 +02:00
|
|
|
|
2021-05-18 22:43:46 +02:00
|
|
|
crawler = crawler_constructor(name, section, self._config, self._authenticators)
|
2021-04-30 16:22:14 +02:00
|
|
|
self._crawlers[name] = crawler
|
|
|
|
|
2021-04-29 13:45:04 +02:00
|
|
|
async def run(self) -> None:
|
2021-05-22 22:25:58 +02:00
|
|
|
# These two functions must run inside the same event loop as the
|
|
|
|
# crawlers, so that any new objects (like Conditions or Futures) can
|
|
|
|
# obtain the correct event loop.
|
|
|
|
self._load_authenticators()
|
|
|
|
self._load_crawlers()
|
|
|
|
|
2021-04-30 16:22:14 +02:00
|
|
|
for name, crawler in self._crawlers.items():
|
2021-05-22 21:05:32 +02:00
|
|
|
log.print("")
|
|
|
|
log.print(f"[bold bright_cyan]Running[/] {escape(name)}")
|
|
|
|
|
|
|
|
try:
|
|
|
|
await crawler.run()
|
|
|
|
except CrawlError as e:
|
|
|
|
log.error(str(e))
|
|
|
|
except Exception:
|
|
|
|
log.unexpected_exception()
|