2021-04-30 16:22:14 +02:00
|
|
|
from typing import Dict
|
|
|
|
|
|
|
|
from rich import print
|
|
|
|
from rich.markup import escape
|
|
|
|
|
2021-05-11 00:27:43 +02:00
|
|
|
from .authenticator import Authenticator
|
|
|
|
from .authenticators import AUTHENTICATORS
|
2021-05-10 23:50:16 +02:00
|
|
|
from .conductor import TerminalConductor
|
2021-04-29 13:45:04 +02:00
|
|
|
from .config import Config
|
2021-04-30 16:22:14 +02:00
|
|
|
from .crawler import Crawler
|
2021-04-29 13:45:04 +02:00
|
|
|
from .crawlers import CRAWLERS
|
2020-04-23 20:31:32 +02:00
|
|
|
|
2020-04-23 11:44:13 +02:00
|
|
|
|
2021-04-30 16:22:14 +02:00
|
|
|
class PferdLoadException(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2021-04-29 13:45:04 +02:00
|
|
|
class Pferd:
|
|
|
|
def __init__(self, config: Config):
|
|
|
|
self._config = config
|
2021-05-10 23:50:16 +02:00
|
|
|
self._conductor = TerminalConductor()
|
2021-05-11 00:27:43 +02:00
|
|
|
self._authenticators: Dict[str, Authenticator] = {}
|
2021-04-30 16:22:14 +02:00
|
|
|
self._crawlers: Dict[str, Crawler] = {}
|
|
|
|
|
2021-05-11 00:27:43 +02:00
|
|
|
def _load_authenticators(self) -> None:
|
|
|
|
abort = False
|
|
|
|
for name, section in self._config.authenticator_sections():
|
|
|
|
print(f"[bold bright_cyan]Loading[/] auth:{escape(name)}")
|
|
|
|
authenticator_type = section.get("type")
|
|
|
|
authenticator_constructor = AUTHENTICATORS.get(authenticator_type)
|
|
|
|
if authenticator_constructor is None:
|
|
|
|
abort = True
|
|
|
|
t = escape(repr(authenticator_type))
|
|
|
|
print(f"[red]Error: Unknown authenticator type {t}")
|
|
|
|
continue
|
|
|
|
|
|
|
|
authenticator = authenticator_constructor(
|
|
|
|
name,
|
|
|
|
section,
|
|
|
|
self._config,
|
|
|
|
self._conductor,
|
|
|
|
)
|
|
|
|
self._authenticators[name] = authenticator
|
|
|
|
|
|
|
|
if abort:
|
|
|
|
raise PferdLoadException()
|
|
|
|
|
2021-04-30 16:22:14 +02:00
|
|
|
def _load_crawlers(self) -> None:
|
|
|
|
abort = False
|
|
|
|
for name, section in self._config.crawler_sections():
|
|
|
|
print(f"[bold bright_cyan]Loading[/] crawler:{escape(name)}")
|
|
|
|
crawler_type = section.get("type")
|
|
|
|
crawler_constructor = CRAWLERS.get(crawler_type)
|
|
|
|
if crawler_constructor is None:
|
|
|
|
abort = True
|
2021-05-05 18:08:34 +02:00
|
|
|
t = escape(repr(crawler_type))
|
2021-05-06 01:02:40 +02:00
|
|
|
print(f"[red]Error: Unknown crawler type {t}")
|
2021-04-30 16:22:14 +02:00
|
|
|
continue
|
|
|
|
|
2021-05-10 23:50:16 +02:00
|
|
|
crawler = crawler_constructor(
|
|
|
|
name,
|
|
|
|
section,
|
|
|
|
self._config,
|
|
|
|
self._conductor,
|
|
|
|
)
|
2021-04-30 16:22:14 +02:00
|
|
|
self._crawlers[name] = crawler
|
|
|
|
|
|
|
|
if abort:
|
|
|
|
raise PferdLoadException()
|
2020-04-23 11:44:13 +02:00
|
|
|
|
2021-04-29 13:45:04 +02:00
|
|
|
async def run(self) -> None:
|
2021-04-30 16:22:14 +02:00
|
|
|
try:
|
2021-05-11 00:27:43 +02:00
|
|
|
self._load_authenticators()
|
2021-04-30 16:22:14 +02:00
|
|
|
self._load_crawlers()
|
|
|
|
except PferdLoadException:
|
|
|
|
print("[bold red]Could not initialize PFERD properly")
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
for name, crawler in self._crawlers.items():
|
|
|
|
print()
|
|
|
|
print(f"[bold bright_cyan]Running[/] crawler:{escape(name)}")
|
|
|
|
|
|
|
|
await crawler.run()
|