Load crawlers from config file

This commit is contained in:
Joscha 2021-04-30 16:22:14 +02:00
parent a8dcf941b9
commit 91c33596da
3 changed files with 69 additions and 6 deletions

View File

@ -1,7 +1,7 @@
import configparser import configparser
import os import os
from pathlib import Path from pathlib import Path
from typing import Optional from typing import List, Optional, Tuple
from .utils import prompt_yes_no from .utils import prompt_yes_no
@ -26,7 +26,6 @@ class Config:
def __init__(self, parser: configparser.ConfigParser): def __init__(self, parser: configparser.ConfigParser):
self._parser = parser self._parser = parser
# TODO Load and validate config into dataclasses
@staticmethod @staticmethod
def _fail_load(path: Path, reason: str) -> None: def _fail_load(path: Path, reason: str) -> None:
@ -99,3 +98,21 @@ class Config:
self._fail_dump(path, "That's a directory, not a file") self._fail_dump(path, "That's a directory, not a file")
except PermissionError: except PermissionError:
self._fail_dump(path, "Insufficient permissions") self._fail_dump(path, "Insufficient permissions")
@property
def default_section(self) -> configparser.SectionProxy:
return self._parser[configparser.DEFAULTSECT]
def crawler_sections(self) -> List[Tuple[str, configparser.SectionProxy]]:
result = []
for section_name, section_proxy in self._parser.items():
if section_name.startswith("crawler:"):
crawler_name = section_name[8:]
result.append((crawler_name, section_proxy))
return result
@property
def working_dir(self) -> Path:
pathstr = self.default_section.get("working_dir", ".")
return Path(pathstr).expanduser()

View File

@ -8,6 +8,7 @@ from typing import AsyncContextManager, AsyncIterator, Optional
from rich.markup import escape from rich.markup import escape
from .conductor import ProgressBar, TerminalConductor from .conductor import ProgressBar, TerminalConductor
from .config import Config
from .limiter import Limiter from .limiter import Limiter
from .transformer import RuleParseException, Transformer from .transformer import RuleParseException, Transformer
@ -17,7 +18,12 @@ class CrawlerLoadException(Exception):
class Crawler(ABC): class Crawler(ABC):
def __init__(self, name: str, section: configparser.SectionProxy) -> None: def __init__(
self,
name: str,
config: Config,
section: configparser.SectionProxy,
) -> None:
""" """
Initialize a crawler from its name and its section in the config file. Initialize a crawler from its name and its section in the config file.

View File

@ -1,12 +1,52 @@
from typing import Dict
from rich import print
from rich.markup import escape
from .config import Config from .config import Config
from .crawler import Crawler
from .crawlers import CRAWLERS from .crawlers import CRAWLERS
class PferdLoadException(Exception):
pass
class Pferd: class Pferd:
def __init__(self, config: Config): def __init__(self, config: Config):
self._config = config self._config = config
self._crawlers: Dict[str, Crawler] = {}
def _load_crawlers(self) -> None:
abort = False
for name, section in self._config.crawler_sections():
print(f"[bold bright_cyan]Loading[/] crawler:{escape(name)}")
crawler_type = section.get("type")
crawler_constructor = CRAWLERS.get(crawler_type)
if crawler_constructor is None:
abort = True
if crawler_type is None:
print("[red]Error: No type")
else:
t = escape(repr(crawler_type))
print(f"[red]Error: Unknown type {t}")
continue
crawler = crawler_constructor(name, self._config, section)
self._crawlers[name] = crawler
if abort:
raise PferdLoadException()
async def run(self) -> None: async def run(self) -> None:
print("Bleep bloop 1") try:
await CRAWLERS["dummy"]("dummy", self._config._parser["dummy"]).run() self._load_crawlers()
print("Bleep bloop 2") except PferdLoadException:
print("[bold red]Could not initialize PFERD properly")
exit(1)
for name, crawler in self._crawlers.items():
print()
print(f"[bold bright_cyan]Running[/] crawler:{escape(name)}")
await crawler.run()