Log crawl and download actions more extensively

This commit is contained in:
Joscha 2021-05-23 16:22:58 +02:00
parent b998339002
commit c0cecf8363
2 changed files with 23 additions and 14 deletions

View File

@ -90,31 +90,38 @@ def anoncritical(f: AWrapped) -> AWrapped:
class CrawlToken(ReusableAsyncContextManager[ProgressBar]): class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
def __init__(self, limiter: Limiter, desc: str): def __init__(self, limiter: Limiter, path: PurePath):
super().__init__() super().__init__()
self._limiter = limiter self._limiter = limiter
self._desc = desc self._path = path
async def _on_aenter(self) -> ProgressBar: async def _on_aenter(self) -> ProgressBar:
bar_desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(self._path))}"
after_desc = f"[bold bright_cyan]Crawled[/] {escape(fmt_path(self._path))}"
self._stack.callback(lambda: log.action(after_desc))
await self._stack.enter_async_context(self._limiter.limit_crawl()) await self._stack.enter_async_context(self._limiter.limit_crawl())
bar = self._stack.enter_context(log.crawl_bar(self._desc)) bar = self._stack.enter_context(log.crawl_bar(bar_desc))
return bar return bar
class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]): class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
def __init__(self, limiter: Limiter, fs_token: FileSinkToken, desc: str): def __init__(self, limiter: Limiter, fs_token: FileSinkToken, path: PurePath):
super().__init__() super().__init__()
self._limiter = limiter self._limiter = limiter
self._fs_token = fs_token self._fs_token = fs_token
self._desc = desc self._path = path
async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]: async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]:
bar_desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(self._path))}"
# The "Downloaded ..." message is printed in the output dir, not here
await self._stack.enter_async_context(self._limiter.limit_crawl()) await self._stack.enter_async_context(self._limiter.limit_crawl())
sink = await self._stack.enter_async_context(self._fs_token) sink = await self._stack.enter_async_context(self._fs_token)
bar = self._stack.enter_context(log.crawl_bar(self._desc)) bar = self._stack.enter_context(log.crawl_bar(bar_desc))
return bar, sink return bar, sink
@ -229,9 +236,7 @@ class Crawler(ABC):
return None return None
log.explain("Answer: Yes") log.explain("Answer: Yes")
return CrawlToken(self._limiter, path)
desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(path))}"
return CrawlToken(self._limiter, desc)
async def download( async def download(
self, self,
@ -247,15 +252,13 @@ class Crawler(ABC):
log.explain("Answer: No") log.explain("Answer: No")
return None return None
fs_token = await self._output_dir.download(transformed_path, mtime, redownload, on_conflict) fs_token = await self._output_dir.download(path, transformed_path, mtime, redownload, on_conflict)
if fs_token is None: if fs_token is None:
log.explain("Answer: No") log.explain("Answer: No")
return None return None
log.explain("Answer: Yes") log.explain("Answer: Yes")
return DownloadToken(self._limiter, fs_token, path)
desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(path))}"
return DownloadToken(self._limiter, fs_token, desc)
async def _cleanup(self) -> None: async def _cleanup(self) -> None:
log.explain_topic("Decision: Clean up files") log.explain_topic("Decision: Clean up files")

View File

@ -78,6 +78,7 @@ class FileSink:
@dataclass @dataclass
class DownloadInfo: class DownloadInfo:
remote_path: PurePath
path: PurePath path: PurePath
local_path: Path local_path: Path
tmp_path: Path tmp_path: Path
@ -96,6 +97,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
def __init__( def __init__(
self, self,
output_dir: "OutputDirectory", output_dir: "OutputDirectory",
remote_path: PurePath,
path: PurePath, path: PurePath,
local_path: Path, local_path: Path,
heuristics: Heuristics, heuristics: Heuristics,
@ -104,6 +106,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
super().__init__() super().__init__()
self._output_dir = output_dir self._output_dir = output_dir
self._remote_path = remote_path
self._path = path self._path = path
self._local_path = local_path self._local_path = local_path
self._heuristics = heuristics self._heuristics = heuristics
@ -115,6 +118,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
async def after_download() -> None: async def after_download() -> None:
await self._output_dir._after_download(DownloadInfo( await self._output_dir._after_download(DownloadInfo(
self._remote_path,
self._path, self._path,
self._local_path, self._local_path,
tmp_path, tmp_path,
@ -317,6 +321,7 @@ class OutputDirectory:
async def download( async def download(
self, self,
remote_path: PurePath,
path: PurePath, path: PurePath,
mtime: Optional[datetime] = None, mtime: Optional[datetime] = None,
redownload: Optional[Redownload] = None, redownload: Optional[Redownload] = None,
@ -363,7 +368,7 @@ class OutputDirectory:
# Ensure parent directory exists # Ensure parent directory exists
local_path.parent.mkdir(parents=True, exist_ok=True) local_path.parent.mkdir(parents=True, exist_ok=True)
return FileSinkToken(self, path, local_path, heuristics, on_conflict) return FileSinkToken(self, remote_path, path, local_path, heuristics, on_conflict)
def _update_metadata(self, info: DownloadInfo) -> None: def _update_metadata(self, info: DownloadInfo) -> None:
if mtime := info.heuristics.mtime: if mtime := info.heuristics.mtime:
@ -379,6 +384,7 @@ class OutputDirectory:
async def _after_download(self, info: DownloadInfo) -> None: async def _after_download(self, info: DownloadInfo) -> None:
with self._ensure_deleted(info.tmp_path): with self._ensure_deleted(info.tmp_path):
log.action(f"[bold bright_cyan]Downloaded[/] {fmt_path(info.remote_path)}")
log.explain_topic(f"Processing downloaded file for {fmt_path(info.path)}") log.explain_topic(f"Processing downloaded file for {fmt_path(info.path)}")
changed = False changed = False