From c0cecf8363eb296dad5a04e2c2685d4f5a2080b2 Mon Sep 17 00:00:00 2001 From: Joscha Date: Sun, 23 May 2021 16:22:58 +0200 Subject: [PATCH] Log crawl and download actions more extensively --- PFERD/crawler.py | 29 ++++++++++++++++------------- PFERD/output_dir.py | 8 +++++++- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/PFERD/crawler.py b/PFERD/crawler.py index c1184c0..749510c 100644 --- a/PFERD/crawler.py +++ b/PFERD/crawler.py @@ -90,31 +90,38 @@ def anoncritical(f: AWrapped) -> AWrapped: class CrawlToken(ReusableAsyncContextManager[ProgressBar]): - def __init__(self, limiter: Limiter, desc: str): + def __init__(self, limiter: Limiter, path: PurePath): super().__init__() self._limiter = limiter - self._desc = desc + self._path = path async def _on_aenter(self) -> ProgressBar: + bar_desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(self._path))}" + after_desc = f"[bold bright_cyan]Crawled[/] {escape(fmt_path(self._path))}" + + self._stack.callback(lambda: log.action(after_desc)) await self._stack.enter_async_context(self._limiter.limit_crawl()) - bar = self._stack.enter_context(log.crawl_bar(self._desc)) + bar = self._stack.enter_context(log.crawl_bar(bar_desc)) return bar class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]): - def __init__(self, limiter: Limiter, fs_token: FileSinkToken, desc: str): + def __init__(self, limiter: Limiter, fs_token: FileSinkToken, path: PurePath): super().__init__() self._limiter = limiter self._fs_token = fs_token - self._desc = desc + self._path = path async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]: + bar_desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(self._path))}" + # The "Downloaded ..." message is printed in the output dir, not here + await self._stack.enter_async_context(self._limiter.limit_crawl()) sink = await self._stack.enter_async_context(self._fs_token) - bar = self._stack.enter_context(log.crawl_bar(self._desc)) + bar = self._stack.enter_context(log.crawl_bar(bar_desc)) return bar, sink @@ -229,9 +236,7 @@ class Crawler(ABC): return None log.explain("Answer: Yes") - - desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(path))}" - return CrawlToken(self._limiter, desc) + return CrawlToken(self._limiter, path) async def download( self, @@ -247,15 +252,13 @@ class Crawler(ABC): log.explain("Answer: No") return None - fs_token = await self._output_dir.download(transformed_path, mtime, redownload, on_conflict) + fs_token = await self._output_dir.download(path, transformed_path, mtime, redownload, on_conflict) if fs_token is None: log.explain("Answer: No") return None log.explain("Answer: Yes") - - desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(path))}" - return DownloadToken(self._limiter, fs_token, desc) + return DownloadToken(self._limiter, fs_token, path) async def _cleanup(self) -> None: log.explain_topic("Decision: Clean up files") diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py index bf908f8..b07fe3e 100644 --- a/PFERD/output_dir.py +++ b/PFERD/output_dir.py @@ -78,6 +78,7 @@ class FileSink: @dataclass class DownloadInfo: + remote_path: PurePath path: PurePath local_path: Path tmp_path: Path @@ -96,6 +97,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]): def __init__( self, output_dir: "OutputDirectory", + remote_path: PurePath, path: PurePath, local_path: Path, heuristics: Heuristics, @@ -104,6 +106,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]): super().__init__() self._output_dir = output_dir + self._remote_path = remote_path self._path = path self._local_path = local_path self._heuristics = heuristics @@ -115,6 +118,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]): async def after_download() -> None: await self._output_dir._after_download(DownloadInfo( + self._remote_path, self._path, self._local_path, tmp_path, @@ -317,6 +321,7 @@ class OutputDirectory: async def download( self, + remote_path: PurePath, path: PurePath, mtime: Optional[datetime] = None, redownload: Optional[Redownload] = None, @@ -363,7 +368,7 @@ class OutputDirectory: # Ensure parent directory exists local_path.parent.mkdir(parents=True, exist_ok=True) - return FileSinkToken(self, path, local_path, heuristics, on_conflict) + return FileSinkToken(self, remote_path, path, local_path, heuristics, on_conflict) def _update_metadata(self, info: DownloadInfo) -> None: if mtime := info.heuristics.mtime: @@ -379,6 +384,7 @@ class OutputDirectory: async def _after_download(self, info: DownloadInfo) -> None: with self._ensure_deleted(info.tmp_path): + log.action(f"[bold bright_cyan]Downloaded[/] {fmt_path(info.remote_path)}") log.explain_topic(f"Processing downloaded file for {fmt_path(info.path)}") changed = False