mirror of
				https://github.com/Garmelon/PFERD.git
				synced 2025-11-03 22:23:41 +01:00 
			
		
		
		
	The behaviour of get_event_loop changed in 3.14 and no longer creates one. Instead, it will crash.
		
			
				
	
	
		
			171 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			171 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import argparse
 | 
						|
import asyncio
 | 
						|
import configparser
 | 
						|
import os
 | 
						|
import sys
 | 
						|
from pathlib import Path
 | 
						|
 | 
						|
from .auth import AuthLoadError
 | 
						|
from .cli import PARSER, ParserLoadError, load_default_section
 | 
						|
from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
 | 
						|
from .logging import log
 | 
						|
from .pferd import Pferd, PferdLoadError
 | 
						|
from .transformer import RuleParseError
 | 
						|
 | 
						|
 | 
						|
def load_config_parser(args: argparse.Namespace) -> configparser.ConfigParser:
 | 
						|
    log.explain_topic("Loading config")
 | 
						|
    parser = configparser.ConfigParser(interpolation=None)
 | 
						|
 | 
						|
    if args.command is None:
 | 
						|
        log.explain("No CLI command specified, loading config from file")
 | 
						|
        Config.load_parser(parser, path=args.config)
 | 
						|
    else:
 | 
						|
        log.explain("CLI command specified, loading config from its arguments")
 | 
						|
        if args.command:
 | 
						|
            args.command(args, parser)
 | 
						|
 | 
						|
    load_default_section(args, parser)
 | 
						|
 | 
						|
    return parser
 | 
						|
 | 
						|
 | 
						|
def load_config(args: argparse.Namespace) -> Config:
 | 
						|
    try:
 | 
						|
        return Config(load_config_parser(args))
 | 
						|
    except ConfigLoadError as e:
 | 
						|
        log.error(str(e))
 | 
						|
        log.error_contd(e.reason)
 | 
						|
        sys.exit(1)
 | 
						|
    except ParserLoadError as e:
 | 
						|
        log.error(str(e))
 | 
						|
        sys.exit(1)
 | 
						|
 | 
						|
 | 
						|
def configure_logging_from_args(args: argparse.Namespace) -> None:
 | 
						|
    if args.explain is not None:
 | 
						|
        log.output_explain = args.explain
 | 
						|
    if args.status is not None:
 | 
						|
        log.output_status = args.status
 | 
						|
    if args.show_not_deleted is not None:
 | 
						|
        log.output_not_deleted = args.show_not_deleted
 | 
						|
    if args.report is not None:
 | 
						|
        log.output_report = args.report
 | 
						|
 | 
						|
    # We want to prevent any unnecessary output if we're printing the config to
 | 
						|
    # stdout, otherwise it would not be a valid config file.
 | 
						|
    if args.dump_config_to == "-":
 | 
						|
        log.output_explain = False
 | 
						|
        log.output_status = False
 | 
						|
        log.output_report = False
 | 
						|
 | 
						|
 | 
						|
def configure_logging_from_config(args: argparse.Namespace, config: Config) -> None:
 | 
						|
    # In configure_logging_from_args(), all normal logging is already disabled
 | 
						|
    # whenever we dump the config. We don't want to override that decision with
 | 
						|
    # values from the config file.
 | 
						|
    if args.dump_config_to == "-":
 | 
						|
        return
 | 
						|
 | 
						|
    try:
 | 
						|
        if args.explain is None:
 | 
						|
            log.output_explain = config.default_section.explain()
 | 
						|
        if args.status is None:
 | 
						|
            log.output_status = config.default_section.status()
 | 
						|
        if args.report is None:
 | 
						|
            log.output_report = config.default_section.report()
 | 
						|
        if args.show_not_deleted is None:
 | 
						|
            log.output_not_deleted = config.default_section.show_not_deleted()
 | 
						|
    except ConfigOptionError as e:
 | 
						|
        log.error(str(e))
 | 
						|
        sys.exit(1)
 | 
						|
 | 
						|
 | 
						|
def dump_config(args: argparse.Namespace, config: Config) -> None:
 | 
						|
    log.explain_topic("Dumping config")
 | 
						|
 | 
						|
    if args.dump_config and args.dump_config_to is not None:
 | 
						|
        log.error("--dump-config and --dump-config-to can't be specified at the same time")
 | 
						|
        sys.exit(1)
 | 
						|
 | 
						|
    try:
 | 
						|
        if args.dump_config:
 | 
						|
            config.dump()
 | 
						|
        elif args.dump_config_to == "-":
 | 
						|
            config.dump_to_stdout()
 | 
						|
        else:
 | 
						|
            config.dump(Path(args.dump_config_to))
 | 
						|
    except ConfigDumpError as e:
 | 
						|
        log.error(str(e))
 | 
						|
        log.error_contd(e.reason)
 | 
						|
        sys.exit(1)
 | 
						|
 | 
						|
 | 
						|
def main() -> None:
 | 
						|
    args = PARSER.parse_args()
 | 
						|
 | 
						|
    # Configuring logging happens in two stages because CLI args have
 | 
						|
    # precedence over config file options and loading the config already
 | 
						|
    # produces some kinds of log messages (usually only explain()-s).
 | 
						|
    configure_logging_from_args(args)
 | 
						|
 | 
						|
    config = load_config(args)
 | 
						|
 | 
						|
    # Now, after loading the config file, we can apply its logging settings in
 | 
						|
    # all places that were not already covered by CLI args.
 | 
						|
    configure_logging_from_config(args, config)
 | 
						|
 | 
						|
    if args.dump_config or args.dump_config_to is not None:
 | 
						|
        dump_config(args, config)
 | 
						|
        sys.exit()
 | 
						|
 | 
						|
    try:
 | 
						|
        pferd = Pferd(config, args.crawler, args.skip)
 | 
						|
    except PferdLoadError as e:
 | 
						|
        log.unlock()
 | 
						|
        log.error(str(e))
 | 
						|
        sys.exit(1)
 | 
						|
 | 
						|
    try:
 | 
						|
        if os.name == "nt":
 | 
						|
            # A "workaround" for the windows event loop somehow crashing after
 | 
						|
            # asyncio.run() completes. See:
 | 
						|
            # https://bugs.python.org/issue39232
 | 
						|
            # https://github.com/encode/httpx/issues/914#issuecomment-780023632
 | 
						|
            # TODO Fix this properly
 | 
						|
            loop = asyncio.new_event_loop()
 | 
						|
            asyncio.set_event_loop(loop)
 | 
						|
            loop.run_until_complete(pferd.run(args.debug_transforms))
 | 
						|
            loop.run_until_complete(asyncio.sleep(1))
 | 
						|
            loop.close()
 | 
						|
        else:
 | 
						|
            asyncio.run(pferd.run(args.debug_transforms))
 | 
						|
    except (ConfigOptionError, AuthLoadError) as e:
 | 
						|
        log.unlock()
 | 
						|
        log.error(str(e))
 | 
						|
        sys.exit(1)
 | 
						|
    except RuleParseError as e:
 | 
						|
        log.unlock()
 | 
						|
        e.pretty_print()
 | 
						|
        sys.exit(1)
 | 
						|
    except KeyboardInterrupt:
 | 
						|
        log.unlock()
 | 
						|
        log.explain_topic("Interrupted, exiting immediately")
 | 
						|
        log.explain("Open files and connections are left for the OS to clean up")
 | 
						|
        pferd.print_report()
 | 
						|
        # TODO Clean up tmp files
 | 
						|
        # And when those files *do* actually get cleaned up properly,
 | 
						|
        # reconsider if this should really exit with 1
 | 
						|
        sys.exit(1)
 | 
						|
    except Exception:
 | 
						|
        log.unlock()
 | 
						|
        log.unexpected_exception()
 | 
						|
        pferd.print_report()
 | 
						|
        sys.exit(1)
 | 
						|
    else:
 | 
						|
        pferd.print_report()
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    main()
 |