Add fancy CLI options

This commit is contained in:
Joscha 2021-05-15 21:33:51 +02:00
parent c454fabc9d
commit 05573ccc53
4 changed files with 250 additions and 56 deletions

View File

@ -1,40 +1,229 @@
import argparse import argparse
import asyncio import asyncio
import configparser
from pathlib import Path from pathlib import Path
from .config import Config, ConfigDumpException, ConfigLoadException from .config import Config, ConfigDumpException, ConfigLoadException
from .output_dir import OnConflict, Redownload
from .pferd import Pferd from .pferd import Pferd
GENERAL_PARSER = argparse.ArgumentParser(add_help=False)
def main() -> None: GENERAL_PARSER.add_argument(
parser = argparse.ArgumentParser()
parser.add_argument(
"--config", "-c", "--config", "-c",
type=Path, type=Path,
metavar="PATH", metavar="PATH",
help="specify custom config file path", help="custom config file"
) )
parser.add_argument( GENERAL_PARSER.add_argument(
"--dump-config", "--dump-config",
nargs="?", nargs="?",
const=True, const=True,
type=Path,
metavar="PATH", metavar="PATH",
help="dump current configuration to a file and exit." help="dump current configuration to a file and exit."
" Uses default config file path if no path is specified", " Uses default config file path if no path is specified"
) )
args = parser.parse_args() GENERAL_PARSER.add_argument(
"--crawler",
action="append",
type=str,
metavar="NAME",
help="only execute a single crawler."
" Can be specified multiple times to execute multiple crawlers"
)
GENERAL_PARSER.add_argument(
"--working-dir",
type=Path,
metavar="PATH",
help="custom working directory"
)
def load_general(
args: argparse.Namespace,
parser: configparser.ConfigParser,
) -> None:
section = parser[parser.default_section]
if args.working_dir is not None:
section["working_dir"] = str(args.working_dir)
CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
title="general crawler arguments",
description="arguments common to all crawlers",
)
CRAWLER_PARSER_GROUP.add_argument(
"--redownload",
type=Redownload.from_string,
metavar="OPTION",
help="when to redownload a file that's already present locally"
)
CRAWLER_PARSER_GROUP.add_argument(
"--on-conflict",
type=OnConflict.from_string,
metavar="OPTION",
help="what to do when local and remote files or directories differ"
)
CRAWLER_PARSER_GROUP.add_argument(
"--transform", "-t",
action="append",
type=str,
metavar="RULE",
help="add a single transformation rule. Can be specified multiple times"
)
CRAWLER_PARSER_GROUP.add_argument(
"--max-concurrent-tasks",
type=int,
metavar="N",
help="maximum number of concurrent tasks (crawling, downloading)"
)
CRAWLER_PARSER_GROUP.add_argument(
"--max-concurrent-downloads",
type=int,
metavar="N",
help="maximum number of tasks that may download data at the same time"
)
CRAWLER_PARSER_GROUP.add_argument(
"--delay-between-tasks",
type=float,
metavar="SECONDS",
help="time the crawler should wait between subsequent tasks"
)
def load_crawler(
args: argparse.Namespace,
section: configparser.SectionProxy,
) -> None:
if args.redownload is not None:
section["redownload"] = args.redownload.value
if args.on_conflict is not None:
section["on_conflict"] = args.on_conflict.value
if args.transform is not None:
section["transform"] = "\n" + "\n".join(args.transform)
if args.max_concurrent_tasks is not None:
section["max_concurrent_tasks"] = str(args.max_concurrent_tasks)
if args.max_concurrent_downloads is not None:
section["max_concurrent_downloads"] = str(args.max_concurrent_downloads)
if args.delay_between_tasks is not None:
section["delay_between_tasks"] = str(args.delay_between_tasks)
PARSER = argparse.ArgumentParser(parents=[GENERAL_PARSER])
PARSER.set_defaults(command=None)
SUBPARSERS = PARSER.add_subparsers(title="crawlers")
LOCAL_CRAWLER = SUBPARSERS.add_parser(
"local",
parents=[GENERAL_PARSER, CRAWLER_PARSER],
)
LOCAL_CRAWLER.set_defaults(command="local")
LOCAL_CRAWLER_GROUP = LOCAL_CRAWLER.add_argument_group(
title="local crawler arguments",
description="arguments for the 'local' crawler",
)
LOCAL_CRAWLER_GROUP.add_argument(
"target",
type=Path,
metavar="TARGET",
help="directory to crawl"
)
LOCAL_CRAWLER_GROUP.add_argument(
"output",
type=Path,
metavar="OUTPUT",
help="output directory"
)
LOCAL_CRAWLER_GROUP.add_argument(
"--crawl-delay",
type=float,
metavar="SECONDS",
help="artificial delay to simulate for crawl requests"
)
LOCAL_CRAWLER_GROUP.add_argument(
"--download-delay",
type=float,
metavar="SECONDS",
help="artificial delay to simulate for download requests"
)
LOCAL_CRAWLER_GROUP.add_argument(
"--download-speed",
type=int,
metavar="BYTES_PER_SECOND",
help="download speed to simulate"
)
def load_local_crawler(
args: argparse.Namespace,
parser: configparser.ConfigParser,
) -> None:
parser["crawl:local"] = {}
section = parser["crawl:local"]
load_crawler(args, section)
section["type"] = "local"
section["target"] = str(args.target)
section["output_dir"] = str(args.output)
if args.crawl_delay is not None:
section["crawl_delay"] = str(args.crawl_delay)
if args.download_delay is not None:
section["download_delay"] = str(args.download_delay)
if args.download_speed is not None:
section["download_speed"] = str(args.download_speed)
def load_parser(
args: argparse.Namespace,
) -> configparser.ConfigParser:
parser = configparser.ConfigParser()
if args.command is None:
Config.load_parser(parser, path=args.config)
elif args.command == "local":
load_local_crawler(args, parser)
load_general(args, parser)
prune_crawlers(args, parser)
return parser
def prune_crawlers(
args: argparse.Namespace,
parser: configparser.ConfigParser,
) -> None:
if not args.crawler:
return
for section in parser.sections():
if section.startswith("crawl:"):
# TODO Use removeprefix() when switching to 3.9
name = section[len("crawl:"):]
if name not in args.crawler:
parser.remove_section(section)
# TODO Check if crawlers actually exist
def main() -> None:
args = PARSER.parse_args()
try: try:
config_parser = Config.load_parser(args.config) config = Config(load_parser(args))
config = Config(config_parser)
except ConfigLoadException: except ConfigLoadException:
exit(1) exit(1)
if args.dump_config: if args.dump_config is not None:
path = None if args.dump_config is True else args.dump_config
try: try:
config.dump(path) if args.dump_config is True:
config.dump()
elif args.dump_config == "-":
config.dump_to_stdout()
else:
config.dump(Path(args.dump_config))
except ConfigDumpException: except ConfigDumpException:
exit(1) exit(1)
exit() exit()

View File

@ -1,4 +1,6 @@
import asyncio
import os import os
import sys
from configparser import ConfigParser, SectionProxy from configparser import ConfigParser, SectionProxy
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
@ -68,7 +70,7 @@ class Config:
raise ConfigLoadException() raise ConfigLoadException()
@staticmethod @staticmethod
def load_parser(path: Optional[Path] = None) -> ConfigParser: def load_parser(parser: ConfigParser, path: Optional[Path] = None) -> None:
""" """
May throw a ConfigLoadException. May throw a ConfigLoadException.
""" """
@ -76,8 +78,6 @@ class Config:
if not path: if not path:
path = Config._default_path() path = Config._default_path()
parser = ConfigParser()
# Using config.read_file instead of config.read because config.read # Using config.read_file instead of config.read because config.read
# would just ignore a missing file and carry on. # would just ignore a missing file and carry on.
try: try:
@ -90,8 +90,6 @@ class Config:
except PermissionError: except PermissionError:
Config._fail_load(path, "Insufficient permissions") Config._fail_load(path, "Insufficient permissions")
return parser
@staticmethod @staticmethod
def _fail_dump(path: Path, reason: str) -> None: def _fail_dump(path: Path, reason: str) -> None:
print(f"Failed to dump config file to {path}") print(f"Failed to dump config file to {path}")
@ -123,7 +121,7 @@ class Config:
self._parser.write(f) self._parser.write(f)
except FileExistsError: except FileExistsError:
print("That file already exists.") print("That file already exists.")
if prompt_yes_no("Overwrite it?", default=False): if asyncio.run(prompt_yes_no("Overwrite it?", default=False)):
with open(path, "w") as f: with open(path, "w") as f:
self._parser.write(f) self._parser.write(f)
else: else:
@ -133,6 +131,9 @@ class Config:
except PermissionError: except PermissionError:
self._fail_dump(path, "Insufficient permissions") self._fail_dump(path, "Insufficient permissions")
def dump_to_stdout(self) -> None:
self._parser.write(sys.stdout)
@property @property
def default_section(self) -> SectionProxy: def default_section(self) -> SectionProxy:
return self._parser[self._parser.default_section] return self._parser[self._parser.default_section]

View File

@ -117,36 +117,24 @@ class CrawlerSection(Section):
def redownload(self) -> Redownload: def redownload(self) -> Redownload:
value = self.s.get("redownload", "never-smart") value = self.s.get("redownload", "never-smart")
if value == "never": try:
return Redownload.NEVER return Redownload.from_string(value)
elif value == "never-smart": except ValueError as e:
return Redownload.NEVER_SMART
elif value == "always":
return Redownload.ALWAYS
elif value == "always-smart":
return Redownload.ALWAYS_SMART
self.invalid_value( self.invalid_value(
"redownload", "redownload",
value, value,
"Must be 'never', 'never-smart', 'always' or 'always-smart'" str(e).capitalize(),
) )
def on_conflict(self) -> OnConflict: def on_conflict(self) -> OnConflict:
value = self.s.get("on_conflict", "prompt") value = self.s.get("on_conflict", "prompt")
if value == "prompt": try:
return OnConflict.PROMPT return OnConflict.from_string(value)
elif value == "local-first": except ValueError as e:
return OnConflict.LOCAL_FIRST
elif value == "remote-first":
return OnConflict.REMOTE_FIRST
elif value == "no-delete":
return OnConflict.NO_DELETE
self.invalid_value( self.invalid_value(
"on_conflict", "on_conflict",
value, value,
"Must be 'prompt', 'local-first', 'remote-first' or 'no-delete'", str(e).capitalize(),
) )
def transform(self) -> str: def transform(self) -> str:

View File

@ -32,6 +32,14 @@ class Redownload(Enum):
ALWAYS = "always" ALWAYS = "always"
ALWAYS_SMART = "always-smart" ALWAYS_SMART = "always-smart"
@staticmethod
def from_string(string: str) -> "Redownload":
try:
return Redownload(string)
except ValueError:
raise ValueError("must be one of 'never', 'never-smart',"
" 'always', 'always-smart'")
class OnConflict(Enum): class OnConflict(Enum):
PROMPT = "prompt" PROMPT = "prompt"
@ -39,6 +47,14 @@ class OnConflict(Enum):
REMOTE_FIRST = "remote-first" REMOTE_FIRST = "remote-first"
NO_DELETE = "no-delete" NO_DELETE = "no-delete"
@staticmethod
def from_string(string: str) -> "OnConflict":
try:
return OnConflict(string)
except ValueError:
raise ValueError("must be one of 'prompt', 'local-first',"
" 'remote-first', 'no-delete'")
@dataclass @dataclass
class Heuristics: class Heuristics: