Load crawlers from config file

This commit is contained in:
Joscha 2021-04-30 16:22:14 +02:00
parent a8dcf941b9
commit 91c33596da
3 changed files with 69 additions and 6 deletions

View File

@ -1,7 +1,7 @@
import configparser
import os
from pathlib import Path
from typing import Optional
from typing import List, Optional, Tuple
from .utils import prompt_yes_no
@ -26,7 +26,6 @@ class Config:
def __init__(self, parser: configparser.ConfigParser):
self._parser = parser
# TODO Load and validate config into dataclasses
@staticmethod
def _fail_load(path: Path, reason: str) -> None:
@ -99,3 +98,21 @@ class Config:
self._fail_dump(path, "That's a directory, not a file")
except PermissionError:
self._fail_dump(path, "Insufficient permissions")
@property
def default_section(self) -> configparser.SectionProxy:
return self._parser[configparser.DEFAULTSECT]
def crawler_sections(self) -> List[Tuple[str, configparser.SectionProxy]]:
result = []
for section_name, section_proxy in self._parser.items():
if section_name.startswith("crawler:"):
crawler_name = section_name[8:]
result.append((crawler_name, section_proxy))
return result
@property
def working_dir(self) -> Path:
pathstr = self.default_section.get("working_dir", ".")
return Path(pathstr).expanduser()

View File

@ -8,6 +8,7 @@ from typing import AsyncContextManager, AsyncIterator, Optional
from rich.markup import escape
from .conductor import ProgressBar, TerminalConductor
from .config import Config
from .limiter import Limiter
from .transformer import RuleParseException, Transformer
@ -17,7 +18,12 @@ class CrawlerLoadException(Exception):
class Crawler(ABC):
def __init__(self, name: str, section: configparser.SectionProxy) -> None:
def __init__(
self,
name: str,
config: Config,
section: configparser.SectionProxy,
) -> None:
"""
Initialize a crawler from its name and its section in the config file.

View File

@ -1,12 +1,52 @@
from typing import Dict
from rich import print
from rich.markup import escape
from .config import Config
from .crawler import Crawler
from .crawlers import CRAWLERS
class PferdLoadException(Exception):
pass
class Pferd:
def __init__(self, config: Config):
self._config = config
self._crawlers: Dict[str, Crawler] = {}
def _load_crawlers(self) -> None:
abort = False
for name, section in self._config.crawler_sections():
print(f"[bold bright_cyan]Loading[/] crawler:{escape(name)}")
crawler_type = section.get("type")
crawler_constructor = CRAWLERS.get(crawler_type)
if crawler_constructor is None:
abort = True
if crawler_type is None:
print("[red]Error: No type")
else:
t = escape(repr(crawler_type))
print(f"[red]Error: Unknown type {t}")
continue
crawler = crawler_constructor(name, self._config, section)
self._crawlers[name] = crawler
if abort:
raise PferdLoadException()
async def run(self) -> None:
print("Bleep bloop 1")
await CRAWLERS["dummy"]("dummy", self._config._parser["dummy"]).run()
print("Bleep bloop 2")
try:
self._load_crawlers()
except PferdLoadException:
print("[bold red]Could not initialize PFERD properly")
exit(1)
for name, crawler in self._crawlers.items():
print()
print(f"[bold bright_cyan]Running[/] crawler:{escape(name)}")
await crawler.run()