Overhaul config and CLI option names

This commit is contained in:
Joscha
2021-05-25 14:12:19 +02:00
parent eb8b915813
commit 61430c8739
5 changed files with 154 additions and 129 deletions

View File

@ -11,14 +11,14 @@ SUBPARSER = SUBPARSERS.add_parser(
)
GROUP = SUBPARSER.add_argument_group(
title="KIT ILIAS web-crawler arguments",
title="kit-ilias-web crawler arguments",
description="arguments for the 'kit-ilias-web' crawler",
)
GROUP.add_argument(
"target",
type=str,
metavar="TARGET",
help="course id, 'desktop', or ILIAS https-URL to crawl"
help="course id, 'desktop', or ILIAS URL to crawl"
)
GROUP.add_argument(
"output",
@ -27,14 +27,9 @@ GROUP.add_argument(
help="output directory"
)
GROUP.add_argument(
"--videos",
action=BooleanOptionalAction,
help="crawl and download videos"
)
GROUP.add_argument(
"--username",
"--username", "-u",
type=str,
metavar="USER_NAME",
metavar="USERNAME",
help="user name for authentication"
)
GROUP.add_argument(
@ -46,19 +41,24 @@ GROUP.add_argument(
"--links",
type=show_value_error(Links.from_string),
metavar="OPTION",
help="how to treat external links"
help="how to represent external links"
)
GROUP.add_argument(
"--link-file-redirect-delay",
"--link-redirect-delay",
type=int,
metavar="SECONDS",
help="delay before external link files redirect you to their target (-1 to disable)"
help="time before 'fancy' links redirect to to their target (-1 to disable)"
)
GROUP.add_argument(
"--http-timeout",
"--videos",
action=BooleanOptionalAction,
help="crawl and download videos"
)
GROUP.add_argument(
"--http-timeout", "-t",
type=float,
metavar="SECONDS",
help="the timeout to use for HTTP requests"
help="timeout for all HTTP requests"
)
@ -66,33 +66,30 @@ def load(
args: argparse.Namespace,
parser: configparser.ConfigParser,
) -> None:
parser["crawl:kit-ilias-web"] = {}
section = parser["crawl:kit-ilias-web"]
parser["crawl:ilias"] = {}
section = parser["crawl:ilias"]
load_crawler(args, section)
section["type"] = "kit-ilias-web"
section["target"] = str(args.target)
section["output_dir"] = str(args.output)
section["auth"] = "auth:kit-ilias-web"
if args.link_file_redirect_delay is not None:
section["link_file_redirect_delay"] = str(args.link_file_redirect_delay)
section["auth"] = "auth:ilias"
if args.links is not None:
section["links"] = str(args.links.value)
if args.link_redirect_delay is not None:
section["link_redirect_delay"] = str(args.link_redirect_delay)
if args.videos is not None:
section["videos"] = str(False)
section["videos"] = "yes" if args.videos else "no"
if args.http_timeout is not None:
section["http_timeout"] = str(args.http_timeout)
parser["auth:kit-ilias-web"] = {}
auth_section = parser["auth:kit-ilias-web"]
parser["auth:ilias"] = {}
auth_section = parser["auth:ilias"]
auth_section["type"] = "simple"
if args.username is not None:
auth_section["username"] = args.username
if args.keyring:
auth_section["type"] = "keyring"
else:
auth_section["type"] = "simple"
if args.username is not None:
auth_section["username"] = str(args.username)
SUBPARSER.set_defaults(command=load)

View File

@ -77,10 +77,10 @@ CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
description="arguments common to all crawlers",
)
CRAWLER_PARSER_GROUP.add_argument(
"--redownload",
"--redownload", "-r",
type=show_value_error(Redownload.from_string),
metavar="OPTION",
help="when to redownload a file that's already present locally"
help="when to download a file that's already present locally"
)
CRAWLER_PARSER_GROUP.add_argument(
"--on-conflict",
@ -89,30 +89,35 @@ CRAWLER_PARSER_GROUP.add_argument(
help="what to do when local and remote files or directories differ"
)
CRAWLER_PARSER_GROUP.add_argument(
"--transform", "-t",
"--transform", "-T",
action="append",
type=str,
metavar="RULE",
help="add a single transformation rule. Can be specified multiple times"
)
CRAWLER_PARSER_GROUP.add_argument(
"--max-concurrent-tasks",
"--tasks", "-n",
type=int,
metavar="N",
help="maximum number of concurrent tasks (crawling, downloading)"
)
CRAWLER_PARSER_GROUP.add_argument(
"--max-concurrent-downloads",
"--downloads", "-N",
type=int,
metavar="N",
help="maximum number of tasks that may download data at the same time"
)
CRAWLER_PARSER_GROUP.add_argument(
"--delay-between-tasks",
"--task-delay", "-d",
type=float,
metavar="SECONDS",
help="time the crawler should wait between subsequent tasks"
)
CRAWLER_PARSER_GROUP.add_argument(
"--windows-paths",
action=BooleanOptionalAction,
help="whether to repair invalid paths on windows"
)
def load_crawler(
@ -125,12 +130,14 @@ def load_crawler(
section["on_conflict"] = args.on_conflict.value
if args.transform is not None:
section["transform"] = "\n" + "\n".join(args.transform)
if args.max_concurrent_tasks is not None:
section["max_concurrent_tasks"] = str(args.max_concurrent_tasks)
if args.max_concurrent_downloads is not None:
section["max_concurrent_downloads"] = str(args.max_concurrent_downloads)
if args.delay_between_tasks is not None:
section["delay_between_tasks"] = str(args.delay_between_tasks)
if args.tasks is not None:
section["tasks"] = str(args.tasks)
if args.downloads is not None:
section["downloads"] = str(args.downloads)
if args.task_delay is not None:
section["task_delay"] = str(args.task_delay)
if args.windows_paths is not None:
section["windows_paths"] = "yes" if args.windows_paths else "no"
PARSER = argparse.ArgumentParser()
@ -200,6 +207,10 @@ def load_default_section(
section["working_dir"] = str(args.working_dir)
if args.explain is not None:
section["explain"] = "yes" if args.explain else "no"
if args.status is not None:
section["status"] = "yes" if args.status else "no"
if args.report is not None:
section["report"] = "yes" if args.report else "no"
if args.share_cookies is not None:
section["share_cookies"] = "yes" if args.share_cookies else "no"