Improve specifying crawlers via CLI

Instead of removing the sections of unselected crawlers from the config file,
crawler selection now happens in the Pferd after loading the crawlers and is
more sophisticated. It also has better error messages.
This commit is contained in:
Joscha
2021-05-23 18:16:25 +02:00
parent 59f13bb8d6
commit a9af56a5e9
2 changed files with 64 additions and 28 deletions

View File

@ -6,7 +6,7 @@ from pathlib import Path
from .cli import PARSER, load_default_section
from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
from .logging import log
from .pferd import Pferd
from .pferd import Pferd, PferdLoadError
from .transformer import RuleParseError
from .version import NAME, VERSION
@ -24,28 +24,10 @@ def load_config_parser(args: argparse.Namespace) -> configparser.ConfigParser:
args.command(args, parser)
load_default_section(args, parser)
prune_crawlers(args, parser)
return parser
def prune_crawlers(
args: argparse.Namespace,
parser: configparser.ConfigParser,
) -> None:
if not args.crawler:
return
for section in parser.sections():
if section.startswith("crawl:"):
# TODO Use removeprefix() when switching to 3.9
name = section[len("crawl:"):]
if name not in args.crawler:
parser.remove_section(section)
# TODO Check if crawlers actually exist
def load_config(args: argparse.Namespace) -> Config:
try:
return Config(load_config_parser(args))
@ -119,9 +101,9 @@ def main() -> None:
exit()
try:
pferd = Pferd(config)
pferd = Pferd(config, args.crawler)
asyncio.run(pferd.run())
except ConfigOptionError as e:
except (PferdLoadError, ConfigOptionError) as e:
log.unlock()
log.error(str(e))
exit(1)