From 80eeb8fe97e28437dcce0e148ffba202fde6a156 Mon Sep 17 00:00:00 2001 From: Joscha Date: Thu, 1 Jul 2021 11:01:55 +0200 Subject: [PATCH] Add --skip option --- PFERD/__main__.py | 2 +- PFERD/cli/parser.py | 8 ++++++++ PFERD/pferd.py | 24 +++++++++++++++++++----- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/PFERD/__main__.py b/PFERD/__main__.py index b274b6b..b665feb 100644 --- a/PFERD/__main__.py +++ b/PFERD/__main__.py @@ -116,7 +116,7 @@ def main() -> None: sys.exit() try: - pferd = Pferd(config, args.crawler) + pferd = Pferd(config, args.crawler, args.skip) except PferdLoadError as e: log.unlock() log.error(str(e)) diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py index f5fb215..e753023 100644 --- a/PFERD/cli/parser.py +++ b/PFERD/cli/parser.py @@ -181,6 +181,14 @@ PARSER.add_argument( help="only execute a single crawler." " Can be specified multiple times to execute multiple crawlers" ) +PARSER.add_argument( + "--skip", "-S", + action="append", + type=str, + metavar="NAME", + help="don't execute this particular crawler." + " Can be specified multiple times to skip multiple crawlers" +) PARSER.add_argument( "--working-dir", type=Path, diff --git a/PFERD/pferd.py b/PFERD/pferd.py index d98b426..726ed45 100644 --- a/PFERD/pferd.py +++ b/PFERD/pferd.py @@ -15,13 +15,13 @@ class PferdLoadError(Exception): class Pferd: - def __init__(self, config: Config, cli_crawlers: Optional[List[str]]): + def __init__(self, config: Config, cli_crawlers: Optional[List[str]], cli_skips: Optional[List[str]]): """ May throw PferdLoadError. """ self._config = config - self._crawlers_to_run = self._find_crawlers_to_run(config, cli_crawlers) + self._crawlers_to_run = self._find_crawlers_to_run(config, cli_crawlers, cli_skips) self._authenticators: Dict[str, Authenticator] = {} self._crawlers: Dict[str, Crawler] = {} @@ -65,16 +65,30 @@ class Pferd: return crawlers_to_run - def _find_crawlers_to_run(self, config: Config, cli_crawlers: Optional[List[str]]) -> List[str]: + def _find_crawlers_to_run( + self, + config: Config, + cli_crawlers: Optional[List[str]], + cli_skips: Optional[List[str]], + ) -> List[str]: log.explain_topic("Deciding which crawlers to run") + crawlers: List[str] if cli_crawlers is None: log.explain("No crawlers specified on CLI") log.explain("Running crawlers specified in config") - return self._find_config_crawlers(config) + crawlers = self._find_config_crawlers(config) else: log.explain("Crawlers specified on CLI") - return self._find_cli_crawlers(config, cli_crawlers) + crawlers = self._find_cli_crawlers(config, cli_crawlers) + + skips = {f"crawl:{name}" for name in cli_skips} if cli_skips else set() + for crawler in crawlers: + if crawler in skips: + log.explain(f"Skipping crawler {crawler!r}") + crawlers = [crawler for crawler in crawlers if crawler not in skips] + + return crawlers def _load_authenticators(self) -> None: for name, section in self._config.auth_sections():