mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Use global conductor instance
The switch from crawler-local conductors to a single pferd-global conductor was made to prepare for auth section credential providers.
This commit is contained in:
parent
595ba8b7ab
commit
d5f29f01c5
@ -14,7 +14,7 @@ class ProgressBar:
|
||||
def advance(self, amount: float = 1) -> None:
|
||||
self._progress.advance(self._taskid, advance=amount)
|
||||
|
||||
def set_total(self, total) -> None:
|
||||
def set_total(self, total: float) -> None:
|
||||
self._progress.update(self._taskid, total=total)
|
||||
|
||||
|
||||
|
@ -23,6 +23,10 @@ class ConfigFormatException(Exception):
|
||||
|
||||
|
||||
class Section:
|
||||
"""
|
||||
Base class for the crawler and auth section classes.
|
||||
"""
|
||||
|
||||
def __init__(self, section: SectionProxy):
|
||||
self.s = section
|
||||
|
||||
|
@ -4,7 +4,7 @@ from datetime import datetime
|
||||
from pathlib import Path, PurePath
|
||||
# TODO In Python 3.9 and above, AsyncContextManager is deprecated
|
||||
from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
|
||||
Callable, Optional, Protocol, TypeVar)
|
||||
Callable, Optional, TypeVar)
|
||||
|
||||
from rich.markup import escape
|
||||
|
||||
@ -141,8 +141,9 @@ class Crawler(ABC):
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
config: Config,
|
||||
section: CrawlerSection,
|
||||
config: Config,
|
||||
conductor: TerminalConductor,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize a crawler from its name and its section in the config file.
|
||||
@ -154,9 +155,9 @@ class Crawler(ABC):
|
||||
"""
|
||||
|
||||
self.name = name
|
||||
|
||||
self._conductor = TerminalConductor()
|
||||
self._conductor = conductor
|
||||
self._limiter = Limiter()
|
||||
self.error_free = True
|
||||
|
||||
try:
|
||||
self._transformer = Transformer(section.transform())
|
||||
@ -171,8 +172,6 @@ class Crawler(ABC):
|
||||
self._conductor,
|
||||
)
|
||||
|
||||
self.error_free = False
|
||||
|
||||
def print(self, text: str) -> None:
|
||||
"""
|
||||
Print rich markup to the terminal. Crawlers *must* use this function to
|
||||
|
@ -1,10 +1,19 @@
|
||||
from configparser import SectionProxy
|
||||
from typing import Callable, Dict
|
||||
|
||||
from ..conductor import TerminalConductor
|
||||
from ..config import Config
|
||||
from ..crawler import Crawler, CrawlerSection
|
||||
from ..crawler import Crawler
|
||||
from .local import LocalCrawler, LocalCrawlerSection
|
||||
|
||||
CRAWLERS: Dict[str, Callable[[str, Config, SectionProxy], Crawler]] = {
|
||||
"local": lambda n, c, s: LocalCrawler(n, c, LocalCrawlerSection(s)),
|
||||
CrawlerConstructor = Callable[[
|
||||
str, # Name (without the "crawl:" prefix)
|
||||
SectionProxy, # Crawler's section of global config
|
||||
Config, # Global config
|
||||
TerminalConductor, # Global conductor instance
|
||||
], Crawler]
|
||||
|
||||
CRAWLERS: Dict[str, CrawlerConstructor] = {
|
||||
"local": lambda n, s, c, t:
|
||||
LocalCrawler(n, LocalCrawlerSection(s), c, t),
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
import asyncio
|
||||
from pathlib import Path, PurePath
|
||||
|
||||
from ..conductor import TerminalConductor
|
||||
from ..config import Config
|
||||
from ..crawler import Crawler, CrawlerSection, anoncritical
|
||||
|
||||
@ -17,10 +18,11 @@ class LocalCrawler(Crawler):
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
config: Config,
|
||||
section: LocalCrawlerSection,
|
||||
config: Config,
|
||||
conductor: TerminalConductor,
|
||||
):
|
||||
super().__init__(name, config, section)
|
||||
super().__init__(name, section, config, conductor)
|
||||
|
||||
self._path = config.working_dir / section.path()
|
||||
|
||||
|
@ -3,6 +3,7 @@ from typing import Dict
|
||||
from rich import print
|
||||
from rich.markup import escape
|
||||
|
||||
from .conductor import TerminalConductor
|
||||
from .config import Config
|
||||
from .crawler import Crawler
|
||||
from .crawlers import CRAWLERS
|
||||
@ -15,6 +16,7 @@ class PferdLoadException(Exception):
|
||||
class Pferd:
|
||||
def __init__(self, config: Config):
|
||||
self._config = config
|
||||
self._conductor = TerminalConductor()
|
||||
self._crawlers: Dict[str, Crawler] = {}
|
||||
|
||||
def _load_crawlers(self) -> None:
|
||||
@ -29,7 +31,12 @@ class Pferd:
|
||||
print(f"[red]Error: Unknown crawler type {t}")
|
||||
continue
|
||||
|
||||
crawler = crawler_constructor(name, self._config, section)
|
||||
crawler = crawler_constructor(
|
||||
name,
|
||||
section,
|
||||
self._config,
|
||||
self._conductor,
|
||||
)
|
||||
self._crawlers[name] = crawler
|
||||
|
||||
if abort:
|
||||
|
Loading…
Reference in New Issue
Block a user