diff --git a/PFERD/__main__.py b/PFERD/__main__.py index 59004ae..26a1dc4 100644 --- a/PFERD/__main__.py +++ b/PFERD/__main__.py @@ -119,7 +119,7 @@ def main() -> None: try: loop = asyncio.get_event_loop() - loop.run_until_complete(pferd.run()) + loop.run_until_complete(pferd.run(args.debug_transforms)) loop.run_until_complete(asyncio.sleep(1)) loop.close() except ConfigOptionError as e: diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py index e6b0671..269a19a 100644 --- a/PFERD/cli/parser.py +++ b/PFERD/cli/parser.py @@ -164,6 +164,11 @@ PARSER.add_argument( help="dump current configuration to a file and exit." " Use '-' as path to print to stdout instead" ) +PARSER.add_argument( + "--debug-transforms", + action="store_true", + help="apply transform rules to files of previous run" +) PARSER.add_argument( "--crawler", "-C", action="append", diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py index 321daa2..aa0f81c 100644 --- a/PFERD/crawl/crawler.py +++ b/PFERD/crawl/crawler.py @@ -3,7 +3,7 @@ import os from abc import ABC, abstractmethod from datetime import datetime from pathlib import Path, PurePath -from typing import Any, Awaitable, Callable, Dict, List, Optional, Sequence, Tuple, TypeVar +from typing import Any, Awaitable, Callable, Dict, List, Optional, Sequence, Set, Tuple, TypeVar from rich.markup import escape @@ -334,3 +334,21 @@ class Crawler(ABC): """ pass + + def debug_transforms(self) -> None: + self._output_dir.load_prev_report() + + if not self.prev_report: + log.warn("Couldn't find or load old report") + return + + seen: Set[PurePath] = set() + for known in self.prev_report.known_files: + looking_at = list(reversed(known.parents)) + [known] + for path in looking_at: + if path in seen: + continue + + log.explain_topic(f"Transforming {fmt_path(path)}") + self._transformer.transform(path) + seen.add(path) diff --git a/PFERD/pferd.py b/PFERD/pferd.py index 7f4d6ff..ac373cf 100644 --- a/PFERD/pferd.py +++ b/PFERD/pferd.py @@ -92,7 +92,14 @@ class Pferd: if isinstance(crawler, KitIliasWebCrawler): crawler.share_cookies(kit_ilias_web_paths) - async def run(self) -> None: + def debug_transforms(self) -> None: + for name in self._crawlers_to_run: + crawler = self._crawlers[name] + log.print("") + log.print(f"[bold bright_cyan]Debugging transforms[/] for {escape(name)}") + crawler.debug_transforms() + + async def run(self, debug_transforms: bool) -> None: """ May throw ConfigOptionError. """ @@ -103,6 +110,12 @@ class Pferd: self._load_authenticators() self._load_crawlers() + if debug_transforms: + log.output_explain = True + log.output_report = False + self.debug_transforms() + return + log.print("") for name in self._crawlers_to_run: