mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Use global conductor instance
The switch from crawler-local conductors to a single pferd-global conductor was made to prepare for auth section credential providers.
This commit is contained in:
parent
595ba8b7ab
commit
d5f29f01c5
@ -14,7 +14,7 @@ class ProgressBar:
|
|||||||
def advance(self, amount: float = 1) -> None:
|
def advance(self, amount: float = 1) -> None:
|
||||||
self._progress.advance(self._taskid, advance=amount)
|
self._progress.advance(self._taskid, advance=amount)
|
||||||
|
|
||||||
def set_total(self, total) -> None:
|
def set_total(self, total: float) -> None:
|
||||||
self._progress.update(self._taskid, total=total)
|
self._progress.update(self._taskid, total=total)
|
||||||
|
|
||||||
|
|
||||||
|
@ -23,6 +23,10 @@ class ConfigFormatException(Exception):
|
|||||||
|
|
||||||
|
|
||||||
class Section:
|
class Section:
|
||||||
|
"""
|
||||||
|
Base class for the crawler and auth section classes.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, section: SectionProxy):
|
def __init__(self, section: SectionProxy):
|
||||||
self.s = section
|
self.s = section
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ from datetime import datetime
|
|||||||
from pathlib import Path, PurePath
|
from pathlib import Path, PurePath
|
||||||
# TODO In Python 3.9 and above, AsyncContextManager is deprecated
|
# TODO In Python 3.9 and above, AsyncContextManager is deprecated
|
||||||
from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
|
from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
|
||||||
Callable, Optional, Protocol, TypeVar)
|
Callable, Optional, TypeVar)
|
||||||
|
|
||||||
from rich.markup import escape
|
from rich.markup import escape
|
||||||
|
|
||||||
@ -141,8 +141,9 @@ class Crawler(ABC):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
config: Config,
|
|
||||||
section: CrawlerSection,
|
section: CrawlerSection,
|
||||||
|
config: Config,
|
||||||
|
conductor: TerminalConductor,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Initialize a crawler from its name and its section in the config file.
|
Initialize a crawler from its name and its section in the config file.
|
||||||
@ -154,9 +155,9 @@ class Crawler(ABC):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
self.name = name
|
self.name = name
|
||||||
|
self._conductor = conductor
|
||||||
self._conductor = TerminalConductor()
|
|
||||||
self._limiter = Limiter()
|
self._limiter = Limiter()
|
||||||
|
self.error_free = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self._transformer = Transformer(section.transform())
|
self._transformer = Transformer(section.transform())
|
||||||
@ -171,8 +172,6 @@ class Crawler(ABC):
|
|||||||
self._conductor,
|
self._conductor,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.error_free = False
|
|
||||||
|
|
||||||
def print(self, text: str) -> None:
|
def print(self, text: str) -> None:
|
||||||
"""
|
"""
|
||||||
Print rich markup to the terminal. Crawlers *must* use this function to
|
Print rich markup to the terminal. Crawlers *must* use this function to
|
||||||
|
@ -1,10 +1,19 @@
|
|||||||
from configparser import SectionProxy
|
from configparser import SectionProxy
|
||||||
from typing import Callable, Dict
|
from typing import Callable, Dict
|
||||||
|
|
||||||
|
from ..conductor import TerminalConductor
|
||||||
from ..config import Config
|
from ..config import Config
|
||||||
from ..crawler import Crawler, CrawlerSection
|
from ..crawler import Crawler
|
||||||
from .local import LocalCrawler, LocalCrawlerSection
|
from .local import LocalCrawler, LocalCrawlerSection
|
||||||
|
|
||||||
CRAWLERS: Dict[str, Callable[[str, Config, SectionProxy], Crawler]] = {
|
CrawlerConstructor = Callable[[
|
||||||
"local": lambda n, c, s: LocalCrawler(n, c, LocalCrawlerSection(s)),
|
str, # Name (without the "crawl:" prefix)
|
||||||
|
SectionProxy, # Crawler's section of global config
|
||||||
|
Config, # Global config
|
||||||
|
TerminalConductor, # Global conductor instance
|
||||||
|
], Crawler]
|
||||||
|
|
||||||
|
CRAWLERS: Dict[str, CrawlerConstructor] = {
|
||||||
|
"local": lambda n, s, c, t:
|
||||||
|
LocalCrawler(n, LocalCrawlerSection(s), c, t),
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
from pathlib import Path, PurePath
|
from pathlib import Path, PurePath
|
||||||
|
|
||||||
|
from ..conductor import TerminalConductor
|
||||||
from ..config import Config
|
from ..config import Config
|
||||||
from ..crawler import Crawler, CrawlerSection, anoncritical
|
from ..crawler import Crawler, CrawlerSection, anoncritical
|
||||||
|
|
||||||
@ -17,10 +18,11 @@ class LocalCrawler(Crawler):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
config: Config,
|
|
||||||
section: LocalCrawlerSection,
|
section: LocalCrawlerSection,
|
||||||
|
config: Config,
|
||||||
|
conductor: TerminalConductor,
|
||||||
):
|
):
|
||||||
super().__init__(name, config, section)
|
super().__init__(name, section, config, conductor)
|
||||||
|
|
||||||
self._path = config.working_dir / section.path()
|
self._path = config.working_dir / section.path()
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ from typing import Dict
|
|||||||
from rich import print
|
from rich import print
|
||||||
from rich.markup import escape
|
from rich.markup import escape
|
||||||
|
|
||||||
|
from .conductor import TerminalConductor
|
||||||
from .config import Config
|
from .config import Config
|
||||||
from .crawler import Crawler
|
from .crawler import Crawler
|
||||||
from .crawlers import CRAWLERS
|
from .crawlers import CRAWLERS
|
||||||
@ -15,6 +16,7 @@ class PferdLoadException(Exception):
|
|||||||
class Pferd:
|
class Pferd:
|
||||||
def __init__(self, config: Config):
|
def __init__(self, config: Config):
|
||||||
self._config = config
|
self._config = config
|
||||||
|
self._conductor = TerminalConductor()
|
||||||
self._crawlers: Dict[str, Crawler] = {}
|
self._crawlers: Dict[str, Crawler] = {}
|
||||||
|
|
||||||
def _load_crawlers(self) -> None:
|
def _load_crawlers(self) -> None:
|
||||||
@ -29,7 +31,12 @@ class Pferd:
|
|||||||
print(f"[red]Error: Unknown crawler type {t}")
|
print(f"[red]Error: Unknown crawler type {t}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
crawler = crawler_constructor(name, self._config, section)
|
crawler = crawler_constructor(
|
||||||
|
name,
|
||||||
|
section,
|
||||||
|
self._config,
|
||||||
|
self._conductor,
|
||||||
|
)
|
||||||
self._crawlers[name] = crawler
|
self._crawlers[name] = crawler
|
||||||
|
|
||||||
if abort:
|
if abort:
|
||||||
|
Loading…
Reference in New Issue
Block a user