pferd/PFERD/__main__.py

import argparse
import asyncio
import configparser
from pathlib import Path

from .cli import PARSER, load_default_section
from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
from .logging import log
from .pferd import Pferd, PferdLoadError
from .transformer import RuleParseError
from .version import NAME, VERSION


def load_config_parser(args: argparse.Namespace) -> configparser.ConfigParser:
    log.explain_topic("Loading config")
    parser = configparser.ConfigParser()

    if args.command is None:
        log.explain("No CLI command specified, loading config from file")
        Config.load_parser(parser, path=args.config)
    else:
        log.explain(f"CLI command specified, creating config for {args.command!r}")
        if args.command:
            args.command(args, parser)

    load_default_section(args, parser)

    return parser


def load_config(args: argparse.Namespace) -> Config:
    try:
        return Config(load_config_parser(args))
    except ConfigLoadError as e:
        log.error(str(e))
        log.error_contd(e.reason)
        exit(1)


def configure_logging_from_args(args: argparse.Namespace) -> None:
    if args.explain is not None:
        log.output_explain = args.explain

    # We want to prevent any unnecessary output if we're printing the config to
    # stdout, otherwise it would not be a valid config file.
    if args.dump_config == "-":
        log.output_explain = False


def configure_logging_from_config(args: argparse.Namespace, config: Config) -> None:
    # In configure_logging_from_args(), all normal logging is already disabled
    # whenever we dump the config. We don't want to override that decision with
    # values from the config file.
    if args.dump_config == "-":
        return

    try:
        if args.explain is None:
            log.output_explain = config.default_section.explain()
    except ConfigOptionError as e:
        log.error(str(e))
        exit(1)


def dump_config(args: argparse.Namespace, config: Config) -> None:
    log.explain_topic("Dumping config")

    try:
        if args.dump_config is True:
            config.dump()
        elif args.dump_config == "-":
            config.dump_to_stdout()
        else:
            config.dump(Path(args.dump_config))
    except ConfigDumpError as e:
        log.error(str(e))
        log.error_contd(e.reason)
        exit(1)


def main() -> None:
    args = PARSER.parse_args()

    if args.version:
        print(f"{NAME} {VERSION}")
        exit()

    # Configuring logging happens in two stages because CLI args have
    # precedence over config file options and loading the config already
    # produces some kinds of log messages (usually only explain()-s).
    configure_logging_from_args(args)

    config = load_config(args)

    # Now, after loading the config file, we can apply its logging settings in
    # all places that were not already covered by CLI args.
    configure_logging_from_config(args, config)

    if args.dump_config is not None:
        dump_config(args, config)
        exit()

    try:
        pferd = Pferd(config, args.crawler)
        asyncio.run(pferd.run())
    except (PferdLoadError, ConfigOptionError) as e:
        log.unlock()
        log.error(str(e))
        exit(1)
    except RuleParseError as e:
        log.unlock()
        e.pretty_print()
        exit(1)
    except KeyboardInterrupt:
        log.unlock()
        log.explain_topic("Interrupted, exiting immediately")
        log.explain("Open files and connections are left for the OS to clean up")
        log.explain("Temporary files are not cleaned up")
        # TODO Clean up tmp files
        # And when those files *do* actually get cleaned up properly,
        # reconsider what exit code to use here.
        exit(1)
    except Exception:
        log.unlock()
        log.unexpected_exception()
        exit(1)