mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Improve specifying crawlers via CLI
Instead of removing the sections of unselected crawlers from the config file, crawler selection now happens in the Pferd after loading the crawlers and is more sophisticated. It also has better error messages.
This commit is contained in:
@ -6,7 +6,7 @@ from pathlib import Path
|
||||
from .cli import PARSER, load_default_section
|
||||
from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
|
||||
from .logging import log
|
||||
from .pferd import Pferd
|
||||
from .pferd import Pferd, PferdLoadError
|
||||
from .transformer import RuleParseError
|
||||
from .version import NAME, VERSION
|
||||
|
||||
@ -24,28 +24,10 @@ def load_config_parser(args: argparse.Namespace) -> configparser.ConfigParser:
|
||||
args.command(args, parser)
|
||||
|
||||
load_default_section(args, parser)
|
||||
prune_crawlers(args, parser)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def prune_crawlers(
|
||||
args: argparse.Namespace,
|
||||
parser: configparser.ConfigParser,
|
||||
) -> None:
|
||||
if not args.crawler:
|
||||
return
|
||||
|
||||
for section in parser.sections():
|
||||
if section.startswith("crawl:"):
|
||||
# TODO Use removeprefix() when switching to 3.9
|
||||
name = section[len("crawl:"):]
|
||||
if name not in args.crawler:
|
||||
parser.remove_section(section)
|
||||
|
||||
# TODO Check if crawlers actually exist
|
||||
|
||||
|
||||
def load_config(args: argparse.Namespace) -> Config:
|
||||
try:
|
||||
return Config(load_config_parser(args))
|
||||
@ -119,9 +101,9 @@ def main() -> None:
|
||||
exit()
|
||||
|
||||
try:
|
||||
pferd = Pferd(config)
|
||||
pferd = Pferd(config, args.crawler)
|
||||
asyncio.run(pferd.run())
|
||||
except ConfigOptionError as e:
|
||||
except (PferdLoadError, ConfigOptionError) as e:
|
||||
log.unlock()
|
||||
log.error(str(e))
|
||||
exit(1)
|
||||
|
Reference in New Issue
Block a user