mirror of https://github.com/Garmelon/PFERD.git
Compare commits
4 Commits
db86d23989
...
29251fa003
Author | SHA1 | Date |
---|---|---|
Julius Rüberg | 29251fa003 | |
Mr. Pine | e41a22149e | |
Julius Rüberg | 77c1f1516c | |
Julius Rüberg | 9e12e96d90 |
|
@ -31,6 +31,9 @@ ambiguous situations.
|
|||
### Added
|
||||
- `no-delete-prompt-override` conflict resolution strategy
|
||||
- support for ILIAS learning modules
|
||||
- `show_not_deleted` option to stop printing the "Not Deleted" status or report
|
||||
message. This combines nicely with the `no-delete-prompt-override` strategy,
|
||||
causing PFERD to mostly ignore local-only files.
|
||||
|
||||
## 3.4.3 - 2022-11-29
|
||||
|
||||
|
|
12
CONFIG.md
12
CONFIG.md
|
@ -26,6 +26,9 @@ default values for the other sections.
|
|||
`Added ...`) while running a crawler. (Default: `yes`)
|
||||
- `report`: Whether PFERD should print a report of added, changed and deleted
|
||||
local files for all crawlers before exiting. (Default: `yes`)
|
||||
- `show_not_deleted`: Whether PFERD should print messages in status and report
|
||||
when a local-only file wasn't deleted. Combines nicely with the
|
||||
`no-delete-prompt-override` conflict resolution strategy.
|
||||
- `share_cookies`: Whether crawlers should share cookies where applicable. For
|
||||
example, some crawlers share cookies if they crawl the same website using the
|
||||
same account. (Default: `yes`)
|
||||
|
@ -75,8 +78,9 @@ common to all crawlers:
|
|||
using `prompt` and always choosing "yes".
|
||||
- `no-delete`: Never delete local files, but overwrite local files if the
|
||||
remote file is different.
|
||||
- `no-delete-prompt-overwrite`: Never delete local files, but prompt to overwrite local files if the
|
||||
remote file is different.
|
||||
- `no-delete-prompt-overwrite`: Never delete local files, but prompt to
|
||||
overwrite local files if the remote file is different. Combines nicely
|
||||
with the `show_not_deleted` option.
|
||||
- `transform`: Rules for renaming and excluding certain files and directories.
|
||||
For more details, see [this section](#transformation-rules). (Default: empty)
|
||||
- `tasks`: The maximum number of concurrent tasks (such as crawling or
|
||||
|
@ -88,6 +92,9 @@ common to all crawlers:
|
|||
load for the crawl target. (Default: `0.0`)
|
||||
- `windows_paths`: Whether PFERD should find alternative names for paths that
|
||||
are invalid on Windows. (Default: `yes` on Windows, `no` otherwise)
|
||||
- `aliases`: List of strings that are considered as an alias when invoking with
|
||||
the `--crawler` or `-C` flag. If there is more than one crawl section with
|
||||
the same aliases all are selected. Thereby, you can group different crawlers.
|
||||
|
||||
Some crawlers may also require credentials for authentication. To configure how
|
||||
the crawler obtains its credentials, the `auth` option is used. It is set to the
|
||||
|
@ -102,6 +109,7 @@ username = foo
|
|||
password = bar
|
||||
|
||||
[crawl:something]
|
||||
aliases = [sth, some]
|
||||
type = some-complex-crawler
|
||||
auth = auth:example
|
||||
on_conflict = no-delete
|
||||
|
|
|
@ -47,6 +47,8 @@ def configure_logging_from_args(args: argparse.Namespace) -> None:
|
|||
log.output_explain = args.explain
|
||||
if args.status is not None:
|
||||
log.output_status = args.status
|
||||
if args.show_not_deleted is not None:
|
||||
log.output_not_deleted = args.show_not_deleted
|
||||
if args.report is not None:
|
||||
log.output_report = args.report
|
||||
|
||||
|
@ -72,6 +74,8 @@ def configure_logging_from_config(args: argparse.Namespace, config: Config) -> N
|
|||
log.output_status = config.default_section.status()
|
||||
if args.report is None:
|
||||
log.output_report = config.default_section.report()
|
||||
if args.show_not_deleted is None:
|
||||
log.output_not_deleted = config.default_section.show_not_deleted()
|
||||
except ConfigOptionError as e:
|
||||
log.error(str(e))
|
||||
sys.exit(1)
|
||||
|
|
|
@ -215,6 +215,11 @@ PARSER.add_argument(
|
|||
action=BooleanOptionalAction,
|
||||
help="whether crawlers should share cookies where applicable"
|
||||
)
|
||||
PARSER.add_argument(
|
||||
"--show-not-deleted",
|
||||
action=BooleanOptionalAction,
|
||||
help="print messages in status and report when PFERD did not delete a local only file"
|
||||
)
|
||||
|
||||
|
||||
def load_default_section(
|
||||
|
@ -233,6 +238,7 @@ def load_default_section(
|
|||
section["report"] = "yes" if args.report else "no"
|
||||
if args.share_cookies is not None:
|
||||
section["share_cookies"] = "yes" if args.share_cookies else "no"
|
||||
|
||||
if args.show_not_deleted is not None:
|
||||
section["show_not_deleted"] = "yes" if args.show_not_deleted else "no"
|
||||
|
||||
SUBPARSERS = PARSER.add_subparsers(title="crawlers")
|
||||
|
|
|
@ -82,6 +82,9 @@ class DefaultSection(Section):
|
|||
def report(self) -> bool:
|
||||
return self.s.getboolean("report", fallback=True)
|
||||
|
||||
def show_not_deleted(self) -> bool:
|
||||
return self.s.getboolean("show_not_deleted", fallback=True)
|
||||
|
||||
def share_cookies(self) -> bool:
|
||||
return self.s.getboolean("share_cookies", fallback=True)
|
||||
|
||||
|
|
|
@ -59,6 +59,7 @@ class Log:
|
|||
# Whether different parts of the output are enabled or disabled
|
||||
self.output_explain = False
|
||||
self.output_status = True
|
||||
self.output_not_deleted = True
|
||||
self.output_report = True
|
||||
|
||||
def _update_live(self) -> None:
|
||||
|
@ -207,6 +208,17 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
|
|||
action = escape(f"{action:<{self.STATUS_WIDTH}}")
|
||||
self.print(f"{style}{action}[/] {escape(text)} {suffix}")
|
||||
|
||||
def not_deleted(self, style: str, action: str, text: str, suffix: str = "") -> None:
|
||||
"""
|
||||
Print a message for a local only file that wasn't
|
||||
deleted while crawling. Allows markup in the "style"
|
||||
argument which will be applied to the "action" string.
|
||||
"""
|
||||
|
||||
if self.output_status and self.output_not_deleted:
|
||||
action = escape(f"{action:<{self.STATUS_WIDTH}}")
|
||||
self.print(f"{style}{action}[/] {escape(text)} {suffix}")
|
||||
|
||||
def report(self, text: str) -> None:
|
||||
"""
|
||||
Print a report after crawling. Allows markup.
|
||||
|
@ -215,6 +227,14 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
|
|||
if self.output_report:
|
||||
self.print(text)
|
||||
|
||||
def report_not_deleted(self, text: str) -> None:
|
||||
"""
|
||||
Print a report for a local only file that wasn't deleted after crawling. Allows markup.
|
||||
"""
|
||||
|
||||
if self.output_report and self.output_not_deleted:
|
||||
self.print(text)
|
||||
|
||||
@contextmanager
|
||||
def _bar(
|
||||
self,
|
||||
|
|
|
@ -496,7 +496,7 @@ class OutputDirectory:
|
|||
except OSError:
|
||||
pass
|
||||
else:
|
||||
log.status("[bold bright_magenta]", "Not deleted", fmt_path(pure))
|
||||
log.not_deleted("[bold bright_magenta]", "Not deleted", fmt_path(pure))
|
||||
self._report.not_delete_file(pure)
|
||||
|
||||
def load_prev_report(self) -> None:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Dict, List, Optional, Set
|
||||
|
||||
from rich.markup import escape
|
||||
|
||||
|
@ -43,16 +43,24 @@ class Pferd:
|
|||
|
||||
crawl_sections = [name for name, _ in config.crawl_sections()]
|
||||
|
||||
crawlers_to_run = [] # With crawl: prefix
|
||||
crawlers_to_run = set() # With crawl: prefix
|
||||
unknown_names = [] # Without crawl: prefix
|
||||
|
||||
for name in cli_crawlers:
|
||||
section_name = f"crawl:{name}"
|
||||
if section_name in crawl_sections:
|
||||
log.explain(f"Crawler section named {section_name!r} exists")
|
||||
crawlers_to_run.append(section_name)
|
||||
else:
|
||||
log.explain(f"There's no crawler section named {section_name!r}")
|
||||
crawlers_to_run.add(section_name)
|
||||
# interprete name as alias of a crawler
|
||||
alias_names = self._find_crawlers_by_alias(name, config)
|
||||
if alias_names:
|
||||
crawlers_to_run.update(alias_names)
|
||||
log.explain_topic(f"Crawler alias {name!r} found corresponding crawler sections:")
|
||||
for alias_name in alias_names:
|
||||
log.explain(f"Crawler section named {alias_name!r} with alias {name!r} exists")
|
||||
|
||||
if not section_name in crawl_sections and not alias_names:
|
||||
log.explain(f"There's neither a crawler section named {section_name!r} nor does a crawler with alias {name!r} exist.")
|
||||
unknown_names.append(name)
|
||||
|
||||
if unknown_names:
|
||||
|
@ -65,6 +73,14 @@ class Pferd:
|
|||
|
||||
return crawlers_to_run
|
||||
|
||||
def _find_crawlers_by_alias(self, alias: str, config: Config) -> Set[str]:
|
||||
alias_names = set()
|
||||
for (section_name, section) in config.crawl_sections():
|
||||
section_aliases = section.get("aliases", [])
|
||||
if alias in section_aliases:
|
||||
alias_names.add(section_name)
|
||||
return alias_names
|
||||
|
||||
def _find_crawlers_to_run(
|
||||
self,
|
||||
config: Config,
|
||||
|
@ -180,7 +196,7 @@ class Pferd:
|
|||
log.report(f" [bold bright_magenta]Deleted[/] {fmt_path(path)}")
|
||||
for path in sorted(crawler.report.not_deleted_files):
|
||||
something_changed = True
|
||||
log.report(f" [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
|
||||
log.report_not_deleted(f" [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
|
||||
|
||||
for warning in crawler.report.encountered_warnings:
|
||||
something_changed = True
|
||||
|
|
Loading…
Reference in New Issue