mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Fix CLI arguments
This commit is contained in:
parent
33a81a5f5c
commit
e81005ae4b
@ -1,125 +1,10 @@
|
||||
import argparse
|
||||
import configparser
|
||||
from pathlib import Path
|
||||
# isort: skip_file
|
||||
|
||||
from ..output_dir import OnConflict, Redownload
|
||||
# The order of imports matters because each command module registers itself
|
||||
# with the parser from ".parser". Because of this, isort is disabled for this
|
||||
# file. Also, since we're reexporting or just using the side effect of
|
||||
# importing itself, we get a few linting warnings, which we're disabling as
|
||||
# well.
|
||||
|
||||
CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
|
||||
CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
|
||||
title="general crawler arguments",
|
||||
description="arguments common to all crawlers",
|
||||
)
|
||||
CRAWLER_PARSER_GROUP.add_argument(
|
||||
"--redownload",
|
||||
type=Redownload.from_string,
|
||||
metavar="OPTION",
|
||||
help="when to redownload a file that's already present locally"
|
||||
)
|
||||
CRAWLER_PARSER_GROUP.add_argument(
|
||||
"--on-conflict",
|
||||
type=OnConflict.from_string,
|
||||
metavar="OPTION",
|
||||
help="what to do when local and remote files or directories differ"
|
||||
)
|
||||
CRAWLER_PARSER_GROUP.add_argument(
|
||||
"--transform", "-t",
|
||||
action="append",
|
||||
type=str,
|
||||
metavar="RULE",
|
||||
help="add a single transformation rule. Can be specified multiple times"
|
||||
)
|
||||
CRAWLER_PARSER_GROUP.add_argument(
|
||||
"--max-concurrent-tasks",
|
||||
type=int,
|
||||
metavar="N",
|
||||
help="maximum number of concurrent tasks (crawling, downloading)"
|
||||
)
|
||||
CRAWLER_PARSER_GROUP.add_argument(
|
||||
"--max-concurrent-downloads",
|
||||
type=int,
|
||||
metavar="N",
|
||||
help="maximum number of tasks that may download data at the same time"
|
||||
)
|
||||
CRAWLER_PARSER_GROUP.add_argument(
|
||||
"--delay-between-tasks",
|
||||
type=float,
|
||||
metavar="SECONDS",
|
||||
help="time the crawler should wait between subsequent tasks"
|
||||
)
|
||||
|
||||
|
||||
def load_crawler(
|
||||
args: argparse.Namespace,
|
||||
section: configparser.SectionProxy,
|
||||
) -> None:
|
||||
if args.redownload is not None:
|
||||
section["redownload"] = args.redownload.value
|
||||
if args.on_conflict is not None:
|
||||
section["on_conflict"] = args.on_conflict.value
|
||||
if args.transform is not None:
|
||||
section["transform"] = "\n" + "\n".join(args.transform)
|
||||
if args.max_concurrent_tasks is not None:
|
||||
section["max_concurrent_tasks"] = str(args.max_concurrent_tasks)
|
||||
if args.max_concurrent_downloads is not None:
|
||||
section["max_concurrent_downloads"] = str(args.max_concurrent_downloads)
|
||||
if args.delay_between_tasks is not None:
|
||||
section["delay_between_tasks"] = str(args.delay_between_tasks)
|
||||
|
||||
|
||||
PARSER = argparse.ArgumentParser()
|
||||
PARSER.set_defaults(command=None)
|
||||
PARSER.add_argument(
|
||||
"--version",
|
||||
action="store_true",
|
||||
help="print version and exit"
|
||||
)
|
||||
PARSER.add_argument(
|
||||
"--config", "-c",
|
||||
type=Path,
|
||||
metavar="PATH",
|
||||
help="custom config file"
|
||||
)
|
||||
PARSER.add_argument(
|
||||
"--dump-config",
|
||||
nargs="?",
|
||||
const=True,
|
||||
metavar="PATH",
|
||||
help="dump current configuration to a file and exit."
|
||||
" Uses default config file path if no path is specified"
|
||||
)
|
||||
PARSER.add_argument(
|
||||
"--crawler",
|
||||
action="append",
|
||||
type=str,
|
||||
metavar="NAME",
|
||||
help="only execute a single crawler."
|
||||
" Can be specified multiple times to execute multiple crawlers"
|
||||
)
|
||||
PARSER.add_argument(
|
||||
"--working-dir",
|
||||
type=Path,
|
||||
metavar="PATH",
|
||||
help="custom working directory"
|
||||
)
|
||||
PARSER.add_argument(
|
||||
"--explain", "-e",
|
||||
# TODO Use argparse.BooleanOptionalAction after updating to 3.9
|
||||
action="store_const",
|
||||
const=True,
|
||||
help="log and explain in detail what PFERD is doing"
|
||||
)
|
||||
|
||||
|
||||
def load_default_section(
|
||||
args: argparse.Namespace,
|
||||
parser: configparser.ConfigParser,
|
||||
) -> None:
|
||||
section = parser[parser.default_section]
|
||||
|
||||
if args.working_dir is not None:
|
||||
section["working_dir"] = str(args.working_dir)
|
||||
if args.explain is not None:
|
||||
section["explain"] = "true" if args.explain else "false"
|
||||
|
||||
|
||||
SUBPARSERS = PARSER.add_subparsers(title="crawlers")
|
||||
from . import command_local # noqa: F401 imported but unused
|
||||
from .parser import PARSER, load_default_section # noqa: F401 imported but unused
|
||||
|
@ -2,7 +2,7 @@ import argparse
|
||||
import configparser
|
||||
from pathlib import Path
|
||||
|
||||
from . import CRAWLER_PARSER, SUBPARSERS, load_crawler
|
||||
from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
|
||||
|
||||
SUBPARSER = SUBPARSERS.add_parser(
|
||||
"local",
|
125
PFERD/cli/parser.py
Normal file
125
PFERD/cli/parser.py
Normal file
@ -0,0 +1,125 @@
|
||||
import argparse
|
||||
import configparser
|
||||
from pathlib import Path
|
||||
|
||||
from ..output_dir import OnConflict, Redownload
|
||||
|
||||
CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
|
||||
CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
|
||||
title="general crawler arguments",
|
||||
description="arguments common to all crawlers",
|
||||
)
|
||||
CRAWLER_PARSER_GROUP.add_argument(
|
||||
"--redownload",
|
||||
type=Redownload.from_string,
|
||||
metavar="OPTION",
|
||||
help="when to redownload a file that's already present locally"
|
||||
)
|
||||
CRAWLER_PARSER_GROUP.add_argument(
|
||||
"--on-conflict",
|
||||
type=OnConflict.from_string,
|
||||
metavar="OPTION",
|
||||
help="what to do when local and remote files or directories differ"
|
||||
)
|
||||
CRAWLER_PARSER_GROUP.add_argument(
|
||||
"--transform", "-t",
|
||||
action="append",
|
||||
type=str,
|
||||
metavar="RULE",
|
||||
help="add a single transformation rule. Can be specified multiple times"
|
||||
)
|
||||
CRAWLER_PARSER_GROUP.add_argument(
|
||||
"--max-concurrent-tasks",
|
||||
type=int,
|
||||
metavar="N",
|
||||
help="maximum number of concurrent tasks (crawling, downloading)"
|
||||
)
|
||||
CRAWLER_PARSER_GROUP.add_argument(
|
||||
"--max-concurrent-downloads",
|
||||
type=int,
|
||||
metavar="N",
|
||||
help="maximum number of tasks that may download data at the same time"
|
||||
)
|
||||
CRAWLER_PARSER_GROUP.add_argument(
|
||||
"--delay-between-tasks",
|
||||
type=float,
|
||||
metavar="SECONDS",
|
||||
help="time the crawler should wait between subsequent tasks"
|
||||
)
|
||||
|
||||
|
||||
def load_crawler(
|
||||
args: argparse.Namespace,
|
||||
section: configparser.SectionProxy,
|
||||
) -> None:
|
||||
if args.redownload is not None:
|
||||
section["redownload"] = args.redownload.value
|
||||
if args.on_conflict is not None:
|
||||
section["on_conflict"] = args.on_conflict.value
|
||||
if args.transform is not None:
|
||||
section["transform"] = "\n" + "\n".join(args.transform)
|
||||
if args.max_concurrent_tasks is not None:
|
||||
section["max_concurrent_tasks"] = str(args.max_concurrent_tasks)
|
||||
if args.max_concurrent_downloads is not None:
|
||||
section["max_concurrent_downloads"] = str(args.max_concurrent_downloads)
|
||||
if args.delay_between_tasks is not None:
|
||||
section["delay_between_tasks"] = str(args.delay_between_tasks)
|
||||
|
||||
|
||||
PARSER = argparse.ArgumentParser()
|
||||
PARSER.set_defaults(command=None)
|
||||
PARSER.add_argument(
|
||||
"--version",
|
||||
action="store_true",
|
||||
help="print version and exit"
|
||||
)
|
||||
PARSER.add_argument(
|
||||
"--config", "-c",
|
||||
type=Path,
|
||||
metavar="PATH",
|
||||
help="custom config file"
|
||||
)
|
||||
PARSER.add_argument(
|
||||
"--dump-config",
|
||||
nargs="?",
|
||||
const=True,
|
||||
metavar="PATH",
|
||||
help="dump current configuration to a file and exit."
|
||||
" Uses default config file path if no path is specified"
|
||||
)
|
||||
PARSER.add_argument(
|
||||
"--crawler",
|
||||
action="append",
|
||||
type=str,
|
||||
metavar="NAME",
|
||||
help="only execute a single crawler."
|
||||
" Can be specified multiple times to execute multiple crawlers"
|
||||
)
|
||||
PARSER.add_argument(
|
||||
"--working-dir",
|
||||
type=Path,
|
||||
metavar="PATH",
|
||||
help="custom working directory"
|
||||
)
|
||||
PARSER.add_argument(
|
||||
"--explain", "-e",
|
||||
# TODO Use argparse.BooleanOptionalAction after updating to 3.9
|
||||
action="store_const",
|
||||
const=True,
|
||||
help="log and explain in detail what PFERD is doing"
|
||||
)
|
||||
|
||||
|
||||
def load_default_section(
|
||||
args: argparse.Namespace,
|
||||
parser: configparser.ConfigParser,
|
||||
) -> None:
|
||||
section = parser[parser.default_section]
|
||||
|
||||
if args.working_dir is not None:
|
||||
section["working_dir"] = str(args.working_dir)
|
||||
if args.explain is not None:
|
||||
section["explain"] = "true" if args.explain else "false"
|
||||
|
||||
|
||||
SUBPARSERS = PARSER.add_subparsers(title="crawlers")
|
Loading…
Reference in New Issue
Block a user