mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Add fancy CLI options
This commit is contained in:
parent
c454fabc9d
commit
05573ccc53
@ -1,40 +1,229 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import configparser
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from .config import Config, ConfigDumpException, ConfigLoadException
|
from .config import Config, ConfigDumpException, ConfigLoadException
|
||||||
|
from .output_dir import OnConflict, Redownload
|
||||||
from .pferd import Pferd
|
from .pferd import Pferd
|
||||||
|
|
||||||
|
GENERAL_PARSER = argparse.ArgumentParser(add_help=False)
|
||||||
|
GENERAL_PARSER.add_argument(
|
||||||
|
"--config", "-c",
|
||||||
|
type=Path,
|
||||||
|
metavar="PATH",
|
||||||
|
help="custom config file"
|
||||||
|
)
|
||||||
|
GENERAL_PARSER.add_argument(
|
||||||
|
"--dump-config",
|
||||||
|
nargs="?",
|
||||||
|
const=True,
|
||||||
|
metavar="PATH",
|
||||||
|
help="dump current configuration to a file and exit."
|
||||||
|
" Uses default config file path if no path is specified"
|
||||||
|
)
|
||||||
|
GENERAL_PARSER.add_argument(
|
||||||
|
"--crawler",
|
||||||
|
action="append",
|
||||||
|
type=str,
|
||||||
|
metavar="NAME",
|
||||||
|
help="only execute a single crawler."
|
||||||
|
" Can be specified multiple times to execute multiple crawlers"
|
||||||
|
)
|
||||||
|
GENERAL_PARSER.add_argument(
|
||||||
|
"--working-dir",
|
||||||
|
type=Path,
|
||||||
|
metavar="PATH",
|
||||||
|
help="custom working directory"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def load_general(
|
||||||
|
args: argparse.Namespace,
|
||||||
|
parser: configparser.ConfigParser,
|
||||||
|
) -> None:
|
||||||
|
section = parser[parser.default_section]
|
||||||
|
|
||||||
|
if args.working_dir is not None:
|
||||||
|
section["working_dir"] = str(args.working_dir)
|
||||||
|
|
||||||
|
|
||||||
|
CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
|
||||||
|
CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
|
||||||
|
title="general crawler arguments",
|
||||||
|
description="arguments common to all crawlers",
|
||||||
|
)
|
||||||
|
CRAWLER_PARSER_GROUP.add_argument(
|
||||||
|
"--redownload",
|
||||||
|
type=Redownload.from_string,
|
||||||
|
metavar="OPTION",
|
||||||
|
help="when to redownload a file that's already present locally"
|
||||||
|
)
|
||||||
|
CRAWLER_PARSER_GROUP.add_argument(
|
||||||
|
"--on-conflict",
|
||||||
|
type=OnConflict.from_string,
|
||||||
|
metavar="OPTION",
|
||||||
|
help="what to do when local and remote files or directories differ"
|
||||||
|
)
|
||||||
|
CRAWLER_PARSER_GROUP.add_argument(
|
||||||
|
"--transform", "-t",
|
||||||
|
action="append",
|
||||||
|
type=str,
|
||||||
|
metavar="RULE",
|
||||||
|
help="add a single transformation rule. Can be specified multiple times"
|
||||||
|
)
|
||||||
|
CRAWLER_PARSER_GROUP.add_argument(
|
||||||
|
"--max-concurrent-tasks",
|
||||||
|
type=int,
|
||||||
|
metavar="N",
|
||||||
|
help="maximum number of concurrent tasks (crawling, downloading)"
|
||||||
|
)
|
||||||
|
CRAWLER_PARSER_GROUP.add_argument(
|
||||||
|
"--max-concurrent-downloads",
|
||||||
|
type=int,
|
||||||
|
metavar="N",
|
||||||
|
help="maximum number of tasks that may download data at the same time"
|
||||||
|
)
|
||||||
|
CRAWLER_PARSER_GROUP.add_argument(
|
||||||
|
"--delay-between-tasks",
|
||||||
|
type=float,
|
||||||
|
metavar="SECONDS",
|
||||||
|
help="time the crawler should wait between subsequent tasks"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def load_crawler(
|
||||||
|
args: argparse.Namespace,
|
||||||
|
section: configparser.SectionProxy,
|
||||||
|
) -> None:
|
||||||
|
if args.redownload is not None:
|
||||||
|
section["redownload"] = args.redownload.value
|
||||||
|
if args.on_conflict is not None:
|
||||||
|
section["on_conflict"] = args.on_conflict.value
|
||||||
|
if args.transform is not None:
|
||||||
|
section["transform"] = "\n" + "\n".join(args.transform)
|
||||||
|
if args.max_concurrent_tasks is not None:
|
||||||
|
section["max_concurrent_tasks"] = str(args.max_concurrent_tasks)
|
||||||
|
if args.max_concurrent_downloads is not None:
|
||||||
|
section["max_concurrent_downloads"] = str(args.max_concurrent_downloads)
|
||||||
|
if args.delay_between_tasks is not None:
|
||||||
|
section["delay_between_tasks"] = str(args.delay_between_tasks)
|
||||||
|
|
||||||
|
|
||||||
|
PARSER = argparse.ArgumentParser(parents=[GENERAL_PARSER])
|
||||||
|
PARSER.set_defaults(command=None)
|
||||||
|
SUBPARSERS = PARSER.add_subparsers(title="crawlers")
|
||||||
|
|
||||||
|
|
||||||
|
LOCAL_CRAWLER = SUBPARSERS.add_parser(
|
||||||
|
"local",
|
||||||
|
parents=[GENERAL_PARSER, CRAWLER_PARSER],
|
||||||
|
)
|
||||||
|
LOCAL_CRAWLER.set_defaults(command="local")
|
||||||
|
LOCAL_CRAWLER_GROUP = LOCAL_CRAWLER.add_argument_group(
|
||||||
|
title="local crawler arguments",
|
||||||
|
description="arguments for the 'local' crawler",
|
||||||
|
)
|
||||||
|
LOCAL_CRAWLER_GROUP.add_argument(
|
||||||
|
"target",
|
||||||
|
type=Path,
|
||||||
|
metavar="TARGET",
|
||||||
|
help="directory to crawl"
|
||||||
|
)
|
||||||
|
LOCAL_CRAWLER_GROUP.add_argument(
|
||||||
|
"output",
|
||||||
|
type=Path,
|
||||||
|
metavar="OUTPUT",
|
||||||
|
help="output directory"
|
||||||
|
)
|
||||||
|
LOCAL_CRAWLER_GROUP.add_argument(
|
||||||
|
"--crawl-delay",
|
||||||
|
type=float,
|
||||||
|
metavar="SECONDS",
|
||||||
|
help="artificial delay to simulate for crawl requests"
|
||||||
|
)
|
||||||
|
LOCAL_CRAWLER_GROUP.add_argument(
|
||||||
|
"--download-delay",
|
||||||
|
type=float,
|
||||||
|
metavar="SECONDS",
|
||||||
|
help="artificial delay to simulate for download requests"
|
||||||
|
)
|
||||||
|
LOCAL_CRAWLER_GROUP.add_argument(
|
||||||
|
"--download-speed",
|
||||||
|
type=int,
|
||||||
|
metavar="BYTES_PER_SECOND",
|
||||||
|
help="download speed to simulate"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def load_local_crawler(
|
||||||
|
args: argparse.Namespace,
|
||||||
|
parser: configparser.ConfigParser,
|
||||||
|
) -> None:
|
||||||
|
parser["crawl:local"] = {}
|
||||||
|
section = parser["crawl:local"]
|
||||||
|
load_crawler(args, section)
|
||||||
|
|
||||||
|
section["type"] = "local"
|
||||||
|
section["target"] = str(args.target)
|
||||||
|
section["output_dir"] = str(args.output)
|
||||||
|
if args.crawl_delay is not None:
|
||||||
|
section["crawl_delay"] = str(args.crawl_delay)
|
||||||
|
if args.download_delay is not None:
|
||||||
|
section["download_delay"] = str(args.download_delay)
|
||||||
|
if args.download_speed is not None:
|
||||||
|
section["download_speed"] = str(args.download_speed)
|
||||||
|
|
||||||
|
|
||||||
|
def load_parser(
|
||||||
|
args: argparse.Namespace,
|
||||||
|
) -> configparser.ConfigParser:
|
||||||
|
parser = configparser.ConfigParser()
|
||||||
|
|
||||||
|
if args.command is None:
|
||||||
|
Config.load_parser(parser, path=args.config)
|
||||||
|
elif args.command == "local":
|
||||||
|
load_local_crawler(args, parser)
|
||||||
|
|
||||||
|
load_general(args, parser)
|
||||||
|
prune_crawlers(args, parser)
|
||||||
|
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def prune_crawlers(
|
||||||
|
args: argparse.Namespace,
|
||||||
|
parser: configparser.ConfigParser,
|
||||||
|
) -> None:
|
||||||
|
if not args.crawler:
|
||||||
|
return
|
||||||
|
|
||||||
|
for section in parser.sections():
|
||||||
|
if section.startswith("crawl:"):
|
||||||
|
# TODO Use removeprefix() when switching to 3.9
|
||||||
|
name = section[len("crawl:"):]
|
||||||
|
if name not in args.crawler:
|
||||||
|
parser.remove_section(section)
|
||||||
|
|
||||||
|
# TODO Check if crawlers actually exist
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
parser = argparse.ArgumentParser()
|
args = PARSER.parse_args()
|
||||||
parser.add_argument(
|
|
||||||
"--config", "-c",
|
|
||||||
type=Path,
|
|
||||||
metavar="PATH",
|
|
||||||
help="specify custom config file path",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--dump-config",
|
|
||||||
nargs="?",
|
|
||||||
const=True,
|
|
||||||
type=Path,
|
|
||||||
metavar="PATH",
|
|
||||||
help="dump current configuration to a file and exit."
|
|
||||||
" Uses default config file path if no path is specified",
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
config_parser = Config.load_parser(args.config)
|
config = Config(load_parser(args))
|
||||||
config = Config(config_parser)
|
|
||||||
except ConfigLoadException:
|
except ConfigLoadException:
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
if args.dump_config:
|
if args.dump_config is not None:
|
||||||
path = None if args.dump_config is True else args.dump_config
|
|
||||||
try:
|
try:
|
||||||
config.dump(path)
|
if args.dump_config is True:
|
||||||
|
config.dump()
|
||||||
|
elif args.dump_config == "-":
|
||||||
|
config.dump_to_stdout()
|
||||||
|
else:
|
||||||
|
config.dump(Path(args.dump_config))
|
||||||
except ConfigDumpException:
|
except ConfigDumpException:
|
||||||
exit(1)
|
exit(1)
|
||||||
exit()
|
exit()
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
|
import asyncio
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from configparser import ConfigParser, SectionProxy
|
from configparser import ConfigParser, SectionProxy
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -68,7 +70,7 @@ class Config:
|
|||||||
raise ConfigLoadException()
|
raise ConfigLoadException()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load_parser(path: Optional[Path] = None) -> ConfigParser:
|
def load_parser(parser: ConfigParser, path: Optional[Path] = None) -> None:
|
||||||
"""
|
"""
|
||||||
May throw a ConfigLoadException.
|
May throw a ConfigLoadException.
|
||||||
"""
|
"""
|
||||||
@ -76,8 +78,6 @@ class Config:
|
|||||||
if not path:
|
if not path:
|
||||||
path = Config._default_path()
|
path = Config._default_path()
|
||||||
|
|
||||||
parser = ConfigParser()
|
|
||||||
|
|
||||||
# Using config.read_file instead of config.read because config.read
|
# Using config.read_file instead of config.read because config.read
|
||||||
# would just ignore a missing file and carry on.
|
# would just ignore a missing file and carry on.
|
||||||
try:
|
try:
|
||||||
@ -90,8 +90,6 @@ class Config:
|
|||||||
except PermissionError:
|
except PermissionError:
|
||||||
Config._fail_load(path, "Insufficient permissions")
|
Config._fail_load(path, "Insufficient permissions")
|
||||||
|
|
||||||
return parser
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _fail_dump(path: Path, reason: str) -> None:
|
def _fail_dump(path: Path, reason: str) -> None:
|
||||||
print(f"Failed to dump config file to {path}")
|
print(f"Failed to dump config file to {path}")
|
||||||
@ -123,7 +121,7 @@ class Config:
|
|||||||
self._parser.write(f)
|
self._parser.write(f)
|
||||||
except FileExistsError:
|
except FileExistsError:
|
||||||
print("That file already exists.")
|
print("That file already exists.")
|
||||||
if prompt_yes_no("Overwrite it?", default=False):
|
if asyncio.run(prompt_yes_no("Overwrite it?", default=False)):
|
||||||
with open(path, "w") as f:
|
with open(path, "w") as f:
|
||||||
self._parser.write(f)
|
self._parser.write(f)
|
||||||
else:
|
else:
|
||||||
@ -133,6 +131,9 @@ class Config:
|
|||||||
except PermissionError:
|
except PermissionError:
|
||||||
self._fail_dump(path, "Insufficient permissions")
|
self._fail_dump(path, "Insufficient permissions")
|
||||||
|
|
||||||
|
def dump_to_stdout(self) -> None:
|
||||||
|
self._parser.write(sys.stdout)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def default_section(self) -> SectionProxy:
|
def default_section(self) -> SectionProxy:
|
||||||
return self._parser[self._parser.default_section]
|
return self._parser[self._parser.default_section]
|
||||||
|
@ -117,37 +117,25 @@ class CrawlerSection(Section):
|
|||||||
|
|
||||||
def redownload(self) -> Redownload:
|
def redownload(self) -> Redownload:
|
||||||
value = self.s.get("redownload", "never-smart")
|
value = self.s.get("redownload", "never-smart")
|
||||||
if value == "never":
|
try:
|
||||||
return Redownload.NEVER
|
return Redownload.from_string(value)
|
||||||
elif value == "never-smart":
|
except ValueError as e:
|
||||||
return Redownload.NEVER_SMART
|
self.invalid_value(
|
||||||
elif value == "always":
|
"redownload",
|
||||||
return Redownload.ALWAYS
|
value,
|
||||||
elif value == "always-smart":
|
str(e).capitalize(),
|
||||||
return Redownload.ALWAYS_SMART
|
)
|
||||||
|
|
||||||
self.invalid_value(
|
|
||||||
"redownload",
|
|
||||||
value,
|
|
||||||
"Must be 'never', 'never-smart', 'always' or 'always-smart'"
|
|
||||||
)
|
|
||||||
|
|
||||||
def on_conflict(self) -> OnConflict:
|
def on_conflict(self) -> OnConflict:
|
||||||
value = self.s.get("on_conflict", "prompt")
|
value = self.s.get("on_conflict", "prompt")
|
||||||
if value == "prompt":
|
try:
|
||||||
return OnConflict.PROMPT
|
return OnConflict.from_string(value)
|
||||||
elif value == "local-first":
|
except ValueError as e:
|
||||||
return OnConflict.LOCAL_FIRST
|
self.invalid_value(
|
||||||
elif value == "remote-first":
|
"on_conflict",
|
||||||
return OnConflict.REMOTE_FIRST
|
value,
|
||||||
elif value == "no-delete":
|
str(e).capitalize(),
|
||||||
return OnConflict.NO_DELETE
|
)
|
||||||
|
|
||||||
self.invalid_value(
|
|
||||||
"on_conflict",
|
|
||||||
value,
|
|
||||||
"Must be 'prompt', 'local-first', 'remote-first' or 'no-delete'",
|
|
||||||
)
|
|
||||||
|
|
||||||
def transform(self) -> str:
|
def transform(self) -> str:
|
||||||
return self.s.get("transform", "")
|
return self.s.get("transform", "")
|
||||||
|
@ -32,6 +32,14 @@ class Redownload(Enum):
|
|||||||
ALWAYS = "always"
|
ALWAYS = "always"
|
||||||
ALWAYS_SMART = "always-smart"
|
ALWAYS_SMART = "always-smart"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def from_string(string: str) -> "Redownload":
|
||||||
|
try:
|
||||||
|
return Redownload(string)
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError("must be one of 'never', 'never-smart',"
|
||||||
|
" 'always', 'always-smart'")
|
||||||
|
|
||||||
|
|
||||||
class OnConflict(Enum):
|
class OnConflict(Enum):
|
||||||
PROMPT = "prompt"
|
PROMPT = "prompt"
|
||||||
@ -39,6 +47,14 @@ class OnConflict(Enum):
|
|||||||
REMOTE_FIRST = "remote-first"
|
REMOTE_FIRST = "remote-first"
|
||||||
NO_DELETE = "no-delete"
|
NO_DELETE = "no-delete"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def from_string(string: str) -> "OnConflict":
|
||||||
|
try:
|
||||||
|
return OnConflict(string)
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError("must be one of 'prompt', 'local-first',"
|
||||||
|
" 'remote-first', 'no-delete'")
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Heuristics:
|
class Heuristics:
|
||||||
|
Loading…
Reference in New Issue
Block a user