mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Load crawlers from config file
This commit is contained in:
parent
a8dcf941b9
commit
91c33596da
@ -1,7 +1,7 @@
|
|||||||
import configparser
|
import configparser
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
from .utils import prompt_yes_no
|
from .utils import prompt_yes_no
|
||||||
|
|
||||||
@ -26,7 +26,6 @@ class Config:
|
|||||||
|
|
||||||
def __init__(self, parser: configparser.ConfigParser):
|
def __init__(self, parser: configparser.ConfigParser):
|
||||||
self._parser = parser
|
self._parser = parser
|
||||||
# TODO Load and validate config into dataclasses
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _fail_load(path: Path, reason: str) -> None:
|
def _fail_load(path: Path, reason: str) -> None:
|
||||||
@ -99,3 +98,21 @@ class Config:
|
|||||||
self._fail_dump(path, "That's a directory, not a file")
|
self._fail_dump(path, "That's a directory, not a file")
|
||||||
except PermissionError:
|
except PermissionError:
|
||||||
self._fail_dump(path, "Insufficient permissions")
|
self._fail_dump(path, "Insufficient permissions")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def default_section(self) -> configparser.SectionProxy:
|
||||||
|
return self._parser[configparser.DEFAULTSECT]
|
||||||
|
|
||||||
|
def crawler_sections(self) -> List[Tuple[str, configparser.SectionProxy]]:
|
||||||
|
result = []
|
||||||
|
for section_name, section_proxy in self._parser.items():
|
||||||
|
if section_name.startswith("crawler:"):
|
||||||
|
crawler_name = section_name[8:]
|
||||||
|
result.append((crawler_name, section_proxy))
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
@property
|
||||||
|
def working_dir(self) -> Path:
|
||||||
|
pathstr = self.default_section.get("working_dir", ".")
|
||||||
|
return Path(pathstr).expanduser()
|
||||||
|
@ -8,6 +8,7 @@ from typing import AsyncContextManager, AsyncIterator, Optional
|
|||||||
from rich.markup import escape
|
from rich.markup import escape
|
||||||
|
|
||||||
from .conductor import ProgressBar, TerminalConductor
|
from .conductor import ProgressBar, TerminalConductor
|
||||||
|
from .config import Config
|
||||||
from .limiter import Limiter
|
from .limiter import Limiter
|
||||||
from .transformer import RuleParseException, Transformer
|
from .transformer import RuleParseException, Transformer
|
||||||
|
|
||||||
@ -17,7 +18,12 @@ class CrawlerLoadException(Exception):
|
|||||||
|
|
||||||
|
|
||||||
class Crawler(ABC):
|
class Crawler(ABC):
|
||||||
def __init__(self, name: str, section: configparser.SectionProxy) -> None:
|
def __init__(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
config: Config,
|
||||||
|
section: configparser.SectionProxy,
|
||||||
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Initialize a crawler from its name and its section in the config file.
|
Initialize a crawler from its name and its section in the config file.
|
||||||
|
|
||||||
|
@ -1,12 +1,52 @@
|
|||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
from rich import print
|
||||||
|
from rich.markup import escape
|
||||||
|
|
||||||
from .config import Config
|
from .config import Config
|
||||||
|
from .crawler import Crawler
|
||||||
from .crawlers import CRAWLERS
|
from .crawlers import CRAWLERS
|
||||||
|
|
||||||
|
|
||||||
|
class PferdLoadException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Pferd:
|
class Pferd:
|
||||||
def __init__(self, config: Config):
|
def __init__(self, config: Config):
|
||||||
self._config = config
|
self._config = config
|
||||||
|
self._crawlers: Dict[str, Crawler] = {}
|
||||||
|
|
||||||
|
def _load_crawlers(self) -> None:
|
||||||
|
abort = False
|
||||||
|
for name, section in self._config.crawler_sections():
|
||||||
|
print(f"[bold bright_cyan]Loading[/] crawler:{escape(name)}")
|
||||||
|
crawler_type = section.get("type")
|
||||||
|
crawler_constructor = CRAWLERS.get(crawler_type)
|
||||||
|
if crawler_constructor is None:
|
||||||
|
abort = True
|
||||||
|
if crawler_type is None:
|
||||||
|
print("[red]Error: No type")
|
||||||
|
else:
|
||||||
|
t = escape(repr(crawler_type))
|
||||||
|
print(f"[red]Error: Unknown type {t}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
crawler = crawler_constructor(name, self._config, section)
|
||||||
|
self._crawlers[name] = crawler
|
||||||
|
|
||||||
|
if abort:
|
||||||
|
raise PferdLoadException()
|
||||||
|
|
||||||
async def run(self) -> None:
|
async def run(self) -> None:
|
||||||
print("Bleep bloop 1")
|
try:
|
||||||
await CRAWLERS["dummy"]("dummy", self._config._parser["dummy"]).run()
|
self._load_crawlers()
|
||||||
print("Bleep bloop 2")
|
except PferdLoadException:
|
||||||
|
print("[bold red]Could not initialize PFERD properly")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
for name, crawler in self._crawlers.items():
|
||||||
|
print()
|
||||||
|
print(f"[bold bright_cyan]Running[/] crawler:{escape(name)}")
|
||||||
|
|
||||||
|
await crawler.run()
|
||||||
|
Loading…
Reference in New Issue
Block a user