mirror of
				https://github.com/Garmelon/PFERD.git
				synced 2025-11-04 06:32:52 +01:00 
			
		
		
		
	--dump-config with its optional argument tended to consume the command name, so it had to be split up.
		
			
				
	
	
		
			144 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			144 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import argparse
 | 
						|
import asyncio
 | 
						|
import configparser
 | 
						|
from pathlib import Path
 | 
						|
 | 
						|
from .cli import PARSER, load_default_section
 | 
						|
from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
 | 
						|
from .logging import log
 | 
						|
from .pferd import Pferd, PferdLoadError
 | 
						|
from .transformer import RuleParseError
 | 
						|
 | 
						|
 | 
						|
def load_config_parser(args: argparse.Namespace) -> configparser.ConfigParser:
 | 
						|
    log.explain_topic("Loading config")
 | 
						|
    parser = configparser.ConfigParser()
 | 
						|
 | 
						|
    if args.command is None:
 | 
						|
        log.explain("No CLI command specified, loading config from file")
 | 
						|
        Config.load_parser(parser, path=args.config)
 | 
						|
    else:
 | 
						|
        log.explain(f"CLI command specified, creating config for {args.command!r}")
 | 
						|
        if args.command:
 | 
						|
            args.command(args, parser)
 | 
						|
 | 
						|
    load_default_section(args, parser)
 | 
						|
 | 
						|
    return parser
 | 
						|
 | 
						|
 | 
						|
def load_config(args: argparse.Namespace) -> Config:
 | 
						|
    try:
 | 
						|
        return Config(load_config_parser(args))
 | 
						|
    except ConfigLoadError as e:
 | 
						|
        log.error(str(e))
 | 
						|
        log.error_contd(e.reason)
 | 
						|
        exit(1)
 | 
						|
 | 
						|
 | 
						|
def configure_logging_from_args(args: argparse.Namespace) -> None:
 | 
						|
    if args.explain is not None:
 | 
						|
        log.output_explain = args.explain
 | 
						|
    if args.status is not None:
 | 
						|
        log.output_status = args.status
 | 
						|
    if args.report is not None:
 | 
						|
        log.output_report = args.report
 | 
						|
 | 
						|
    # We want to prevent any unnecessary output if we're printing the config to
 | 
						|
    # stdout, otherwise it would not be a valid config file.
 | 
						|
    if args.dump_config_to == "-":
 | 
						|
        log.output_explain = False
 | 
						|
        log.output_status = False
 | 
						|
        log.output_report = False
 | 
						|
 | 
						|
 | 
						|
def configure_logging_from_config(args: argparse.Namespace, config: Config) -> None:
 | 
						|
    # In configure_logging_from_args(), all normal logging is already disabled
 | 
						|
    # whenever we dump the config. We don't want to override that decision with
 | 
						|
    # values from the config file.
 | 
						|
    if args.dump_config_to == "-":
 | 
						|
        return
 | 
						|
 | 
						|
    try:
 | 
						|
        if args.explain is None:
 | 
						|
            log.output_explain = config.default_section.explain()
 | 
						|
        if args.status is None:
 | 
						|
            log.output_status = config.default_section.status()
 | 
						|
        if args.report is None:
 | 
						|
            log.output_report = config.default_section.report()
 | 
						|
    except ConfigOptionError as e:
 | 
						|
        log.error(str(e))
 | 
						|
        exit(1)
 | 
						|
 | 
						|
 | 
						|
def dump_config(args: argparse.Namespace, config: Config) -> None:
 | 
						|
    log.explain_topic("Dumping config")
 | 
						|
 | 
						|
    if args.dump_config and args.dump_config_to is not None:
 | 
						|
        log.error("--dump-config and --dump-config-to can't be specified at the same time")
 | 
						|
        exit(1)
 | 
						|
 | 
						|
    try:
 | 
						|
        if args.dump_config:
 | 
						|
            config.dump()
 | 
						|
        elif args.dump_config_to == "-":
 | 
						|
            config.dump_to_stdout()
 | 
						|
        else:
 | 
						|
            config.dump(Path(args.dump_config_to))
 | 
						|
    except ConfigDumpError as e:
 | 
						|
        log.error(str(e))
 | 
						|
        log.error_contd(e.reason)
 | 
						|
        exit(1)
 | 
						|
 | 
						|
 | 
						|
def main() -> None:
 | 
						|
    args = PARSER.parse_args()
 | 
						|
 | 
						|
    # Configuring logging happens in two stages because CLI args have
 | 
						|
    # precedence over config file options and loading the config already
 | 
						|
    # produces some kinds of log messages (usually only explain()-s).
 | 
						|
    configure_logging_from_args(args)
 | 
						|
 | 
						|
    config = load_config(args)
 | 
						|
 | 
						|
    # Now, after loading the config file, we can apply its logging settings in
 | 
						|
    # all places that were not already covered by CLI args.
 | 
						|
    configure_logging_from_config(args, config)
 | 
						|
 | 
						|
    if args.dump_config or args.dump_config_to is not None:
 | 
						|
        dump_config(args, config)
 | 
						|
        exit()
 | 
						|
 | 
						|
    try:
 | 
						|
        pferd = Pferd(config, args.crawler)
 | 
						|
    except PferdLoadError as e:
 | 
						|
        log.unlock()
 | 
						|
        log.error(str(e))
 | 
						|
        exit(1)
 | 
						|
 | 
						|
    try:
 | 
						|
        asyncio.run(pferd.run())
 | 
						|
    except ConfigOptionError as e:
 | 
						|
        log.unlock()
 | 
						|
        log.error(str(e))
 | 
						|
        exit(1)
 | 
						|
    except RuleParseError as e:
 | 
						|
        log.unlock()
 | 
						|
        e.pretty_print()
 | 
						|
        exit(1)
 | 
						|
    except KeyboardInterrupt:
 | 
						|
        log.unlock()
 | 
						|
        log.explain_topic("Interrupted, exiting immediately")
 | 
						|
        log.explain("Open files and connections are left for the OS to clean up")
 | 
						|
        log.explain("Temporary files are not cleaned up")
 | 
						|
        pferd.print_report()
 | 
						|
        # TODO Clean up tmp files
 | 
						|
        # And when those files *do* actually get cleaned up properly,
 | 
						|
        # reconsider if this should really exit with 1
 | 
						|
        exit(1)
 | 
						|
    except Exception:
 | 
						|
        log.unlock()
 | 
						|
        log.unexpected_exception()
 | 
						|
        pferd.print_report()
 | 
						|
        exit(1)
 |