diff --git a/PFERD/cli/__init__.py b/PFERD/cli/__init__.py index 71d9732..2a9f124 100644 --- a/PFERD/cli/__init__.py +++ b/PFERD/cli/__init__.py @@ -1,125 +1,10 @@ -import argparse -import configparser -from pathlib import Path +# isort: skip_file -from ..output_dir import OnConflict, Redownload +# The order of imports matters because each command module registers itself +# with the parser from ".parser". Because of this, isort is disabled for this +# file. Also, since we're reexporting or just using the side effect of +# importing itself, we get a few linting warnings, which we're disabling as +# well. -CRAWLER_PARSER = argparse.ArgumentParser(add_help=False) -CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group( - title="general crawler arguments", - description="arguments common to all crawlers", -) -CRAWLER_PARSER_GROUP.add_argument( - "--redownload", - type=Redownload.from_string, - metavar="OPTION", - help="when to redownload a file that's already present locally" -) -CRAWLER_PARSER_GROUP.add_argument( - "--on-conflict", - type=OnConflict.from_string, - metavar="OPTION", - help="what to do when local and remote files or directories differ" -) -CRAWLER_PARSER_GROUP.add_argument( - "--transform", "-t", - action="append", - type=str, - metavar="RULE", - help="add a single transformation rule. Can be specified multiple times" -) -CRAWLER_PARSER_GROUP.add_argument( - "--max-concurrent-tasks", - type=int, - metavar="N", - help="maximum number of concurrent tasks (crawling, downloading)" -) -CRAWLER_PARSER_GROUP.add_argument( - "--max-concurrent-downloads", - type=int, - metavar="N", - help="maximum number of tasks that may download data at the same time" -) -CRAWLER_PARSER_GROUP.add_argument( - "--delay-between-tasks", - type=float, - metavar="SECONDS", - help="time the crawler should wait between subsequent tasks" -) - - -def load_crawler( - args: argparse.Namespace, - section: configparser.SectionProxy, -) -> None: - if args.redownload is not None: - section["redownload"] = args.redownload.value - if args.on_conflict is not None: - section["on_conflict"] = args.on_conflict.value - if args.transform is not None: - section["transform"] = "\n" + "\n".join(args.transform) - if args.max_concurrent_tasks is not None: - section["max_concurrent_tasks"] = str(args.max_concurrent_tasks) - if args.max_concurrent_downloads is not None: - section["max_concurrent_downloads"] = str(args.max_concurrent_downloads) - if args.delay_between_tasks is not None: - section["delay_between_tasks"] = str(args.delay_between_tasks) - - -PARSER = argparse.ArgumentParser() -PARSER.set_defaults(command=None) -PARSER.add_argument( - "--version", - action="store_true", - help="print version and exit" -) -PARSER.add_argument( - "--config", "-c", - type=Path, - metavar="PATH", - help="custom config file" -) -PARSER.add_argument( - "--dump-config", - nargs="?", - const=True, - metavar="PATH", - help="dump current configuration to a file and exit." - " Uses default config file path if no path is specified" -) -PARSER.add_argument( - "--crawler", - action="append", - type=str, - metavar="NAME", - help="only execute a single crawler." - " Can be specified multiple times to execute multiple crawlers" -) -PARSER.add_argument( - "--working-dir", - type=Path, - metavar="PATH", - help="custom working directory" -) -PARSER.add_argument( - "--explain", "-e", - # TODO Use argparse.BooleanOptionalAction after updating to 3.9 - action="store_const", - const=True, - help="log and explain in detail what PFERD is doing" -) - - -def load_default_section( - args: argparse.Namespace, - parser: configparser.ConfigParser, -) -> None: - section = parser[parser.default_section] - - if args.working_dir is not None: - section["working_dir"] = str(args.working_dir) - if args.explain is not None: - section["explain"] = "true" if args.explain else "false" - - -SUBPARSERS = PARSER.add_subparsers(title="crawlers") +from . import command_local # noqa: F401 imported but unused +from .parser import PARSER, load_default_section # noqa: F401 imported but unused diff --git a/PFERD/cli/local.py b/PFERD/cli/command_local.py similarity index 96% rename from PFERD/cli/local.py rename to PFERD/cli/command_local.py index 5df81db..73f9d43 100644 --- a/PFERD/cli/local.py +++ b/PFERD/cli/command_local.py @@ -2,7 +2,7 @@ import argparse import configparser from pathlib import Path -from . import CRAWLER_PARSER, SUBPARSERS, load_crawler +from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler SUBPARSER = SUBPARSERS.add_parser( "local", diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py new file mode 100644 index 0000000..71d9732 --- /dev/null +++ b/PFERD/cli/parser.py @@ -0,0 +1,125 @@ +import argparse +import configparser +from pathlib import Path + +from ..output_dir import OnConflict, Redownload + +CRAWLER_PARSER = argparse.ArgumentParser(add_help=False) +CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group( + title="general crawler arguments", + description="arguments common to all crawlers", +) +CRAWLER_PARSER_GROUP.add_argument( + "--redownload", + type=Redownload.from_string, + metavar="OPTION", + help="when to redownload a file that's already present locally" +) +CRAWLER_PARSER_GROUP.add_argument( + "--on-conflict", + type=OnConflict.from_string, + metavar="OPTION", + help="what to do when local and remote files or directories differ" +) +CRAWLER_PARSER_GROUP.add_argument( + "--transform", "-t", + action="append", + type=str, + metavar="RULE", + help="add a single transformation rule. Can be specified multiple times" +) +CRAWLER_PARSER_GROUP.add_argument( + "--max-concurrent-tasks", + type=int, + metavar="N", + help="maximum number of concurrent tasks (crawling, downloading)" +) +CRAWLER_PARSER_GROUP.add_argument( + "--max-concurrent-downloads", + type=int, + metavar="N", + help="maximum number of tasks that may download data at the same time" +) +CRAWLER_PARSER_GROUP.add_argument( + "--delay-between-tasks", + type=float, + metavar="SECONDS", + help="time the crawler should wait between subsequent tasks" +) + + +def load_crawler( + args: argparse.Namespace, + section: configparser.SectionProxy, +) -> None: + if args.redownload is not None: + section["redownload"] = args.redownload.value + if args.on_conflict is not None: + section["on_conflict"] = args.on_conflict.value + if args.transform is not None: + section["transform"] = "\n" + "\n".join(args.transform) + if args.max_concurrent_tasks is not None: + section["max_concurrent_tasks"] = str(args.max_concurrent_tasks) + if args.max_concurrent_downloads is not None: + section["max_concurrent_downloads"] = str(args.max_concurrent_downloads) + if args.delay_between_tasks is not None: + section["delay_between_tasks"] = str(args.delay_between_tasks) + + +PARSER = argparse.ArgumentParser() +PARSER.set_defaults(command=None) +PARSER.add_argument( + "--version", + action="store_true", + help="print version and exit" +) +PARSER.add_argument( + "--config", "-c", + type=Path, + metavar="PATH", + help="custom config file" +) +PARSER.add_argument( + "--dump-config", + nargs="?", + const=True, + metavar="PATH", + help="dump current configuration to a file and exit." + " Uses default config file path if no path is specified" +) +PARSER.add_argument( + "--crawler", + action="append", + type=str, + metavar="NAME", + help="only execute a single crawler." + " Can be specified multiple times to execute multiple crawlers" +) +PARSER.add_argument( + "--working-dir", + type=Path, + metavar="PATH", + help="custom working directory" +) +PARSER.add_argument( + "--explain", "-e", + # TODO Use argparse.BooleanOptionalAction after updating to 3.9 + action="store_const", + const=True, + help="log and explain in detail what PFERD is doing" +) + + +def load_default_section( + args: argparse.Namespace, + parser: configparser.ConfigParser, +) -> None: + section = parser[parser.default_section] + + if args.working_dir is not None: + section["working_dir"] = str(args.working_dir) + if args.explain is not None: + section["explain"] = "true" if args.explain else "false" + + +SUBPARSERS = PARSER.add_subparsers(title="crawlers")