pferd/PFERD/__main__.py
Joscha a9af56a5e9 Improve specifying crawlers via CLI
Instead of removing the sections of unselected crawlers from the config file,
crawler selection now happens in the Pferd after loading the crawlers and is
more sophisticated. It also has better error messages.
2021-05-23 18:18:50 +02:00

127 lines
3.8 KiB
Python

import argparse
import asyncio
import configparser
from pathlib import Path
from .cli import PARSER, load_default_section
from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
from .logging import log
from .pferd import Pferd, PferdLoadError
from .transformer import RuleParseError
from .version import NAME, VERSION
def load_config_parser(args: argparse.Namespace) -> configparser.ConfigParser:
log.explain_topic("Loading config")
parser = configparser.ConfigParser()
if args.command is None:
log.explain("No CLI command specified, loading config from file")
Config.load_parser(parser, path=args.config)
else:
log.explain(f"CLI command specified, creating config for {args.command!r}")
if args.command:
args.command(args, parser)
load_default_section(args, parser)
return parser
def load_config(args: argparse.Namespace) -> Config:
try:
return Config(load_config_parser(args))
except ConfigLoadError as e:
log.error(str(e))
log.error_contd(e.reason)
exit(1)
def configure_logging_from_args(args: argparse.Namespace) -> None:
if args.explain is not None:
log.output_explain = args.explain
# We want to prevent any unnecessary output if we're printing the config to
# stdout, otherwise it would not be a valid config file.
if args.dump_config == "-":
log.output_explain = False
def configure_logging_from_config(args: argparse.Namespace, config: Config) -> None:
# In configure_logging_from_args(), all normal logging is already disabled
# whenever we dump the config. We don't want to override that decision with
# values from the config file.
if args.dump_config == "-":
return
try:
if args.explain is None:
log.output_explain = config.default_section.explain()
except ConfigOptionError as e:
log.error(str(e))
exit(1)
def dump_config(args: argparse.Namespace, config: Config) -> None:
log.explain_topic("Dumping config")
try:
if args.dump_config is True:
config.dump()
elif args.dump_config == "-":
config.dump_to_stdout()
else:
config.dump(Path(args.dump_config))
except ConfigDumpError as e:
log.error(str(e))
log.error_contd(e.reason)
exit(1)
def main() -> None:
args = PARSER.parse_args()
if args.version:
print(f"{NAME} {VERSION}")
exit()
# Configuring logging happens in two stages because CLI args have
# precedence over config file options and loading the config already
# produces some kinds of log messages (usually only explain()-s).
configure_logging_from_args(args)
config = load_config(args)
# Now, after loading the config file, we can apply its logging settings in
# all places that were not already covered by CLI args.
configure_logging_from_config(args, config)
if args.dump_config is not None:
dump_config(args, config)
exit()
try:
pferd = Pferd(config, args.crawler)
asyncio.run(pferd.run())
except (PferdLoadError, ConfigOptionError) as e:
log.unlock()
log.error(str(e))
exit(1)
except RuleParseError as e:
log.unlock()
e.pretty_print()
exit(1)
except KeyboardInterrupt:
log.unlock()
log.explain_topic("Interrupted, exiting immediately")
log.explain("Open files and connections are left for the OS to clean up")
log.explain("Temporary files are not cleaned up")
# TODO Clean up tmp files
# And when those files *do* actually get cleaned up properly,
# reconsider what exit code to use here.
exit(1)
except Exception:
log.unlock()
log.unexpected_exception()
exit(1)