mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Log crawl and download actions more extensively
This commit is contained in:
parent
b998339002
commit
c0cecf8363
@ -90,31 +90,38 @@ def anoncritical(f: AWrapped) -> AWrapped:
|
||||
|
||||
|
||||
class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
|
||||
def __init__(self, limiter: Limiter, desc: str):
|
||||
def __init__(self, limiter: Limiter, path: PurePath):
|
||||
super().__init__()
|
||||
|
||||
self._limiter = limiter
|
||||
self._desc = desc
|
||||
self._path = path
|
||||
|
||||
async def _on_aenter(self) -> ProgressBar:
|
||||
bar_desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(self._path))}"
|
||||
after_desc = f"[bold bright_cyan]Crawled[/] {escape(fmt_path(self._path))}"
|
||||
|
||||
self._stack.callback(lambda: log.action(after_desc))
|
||||
await self._stack.enter_async_context(self._limiter.limit_crawl())
|
||||
bar = self._stack.enter_context(log.crawl_bar(self._desc))
|
||||
bar = self._stack.enter_context(log.crawl_bar(bar_desc))
|
||||
|
||||
return bar
|
||||
|
||||
|
||||
class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
|
||||
def __init__(self, limiter: Limiter, fs_token: FileSinkToken, desc: str):
|
||||
def __init__(self, limiter: Limiter, fs_token: FileSinkToken, path: PurePath):
|
||||
super().__init__()
|
||||
|
||||
self._limiter = limiter
|
||||
self._fs_token = fs_token
|
||||
self._desc = desc
|
||||
self._path = path
|
||||
|
||||
async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]:
|
||||
bar_desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(self._path))}"
|
||||
# The "Downloaded ..." message is printed in the output dir, not here
|
||||
|
||||
await self._stack.enter_async_context(self._limiter.limit_crawl())
|
||||
sink = await self._stack.enter_async_context(self._fs_token)
|
||||
bar = self._stack.enter_context(log.crawl_bar(self._desc))
|
||||
bar = self._stack.enter_context(log.crawl_bar(bar_desc))
|
||||
|
||||
return bar, sink
|
||||
|
||||
@ -229,9 +236,7 @@ class Crawler(ABC):
|
||||
return None
|
||||
|
||||
log.explain("Answer: Yes")
|
||||
|
||||
desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(path))}"
|
||||
return CrawlToken(self._limiter, desc)
|
||||
return CrawlToken(self._limiter, path)
|
||||
|
||||
async def download(
|
||||
self,
|
||||
@ -247,15 +252,13 @@ class Crawler(ABC):
|
||||
log.explain("Answer: No")
|
||||
return None
|
||||
|
||||
fs_token = await self._output_dir.download(transformed_path, mtime, redownload, on_conflict)
|
||||
fs_token = await self._output_dir.download(path, transformed_path, mtime, redownload, on_conflict)
|
||||
if fs_token is None:
|
||||
log.explain("Answer: No")
|
||||
return None
|
||||
|
||||
log.explain("Answer: Yes")
|
||||
|
||||
desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(path))}"
|
||||
return DownloadToken(self._limiter, fs_token, desc)
|
||||
return DownloadToken(self._limiter, fs_token, path)
|
||||
|
||||
async def _cleanup(self) -> None:
|
||||
log.explain_topic("Decision: Clean up files")
|
||||
|
@ -78,6 +78,7 @@ class FileSink:
|
||||
|
||||
@dataclass
|
||||
class DownloadInfo:
|
||||
remote_path: PurePath
|
||||
path: PurePath
|
||||
local_path: Path
|
||||
tmp_path: Path
|
||||
@ -96,6 +97,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
|
||||
def __init__(
|
||||
self,
|
||||
output_dir: "OutputDirectory",
|
||||
remote_path: PurePath,
|
||||
path: PurePath,
|
||||
local_path: Path,
|
||||
heuristics: Heuristics,
|
||||
@ -104,6 +106,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
|
||||
super().__init__()
|
||||
|
||||
self._output_dir = output_dir
|
||||
self._remote_path = remote_path
|
||||
self._path = path
|
||||
self._local_path = local_path
|
||||
self._heuristics = heuristics
|
||||
@ -115,6 +118,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
|
||||
|
||||
async def after_download() -> None:
|
||||
await self._output_dir._after_download(DownloadInfo(
|
||||
self._remote_path,
|
||||
self._path,
|
||||
self._local_path,
|
||||
tmp_path,
|
||||
@ -317,6 +321,7 @@ class OutputDirectory:
|
||||
|
||||
async def download(
|
||||
self,
|
||||
remote_path: PurePath,
|
||||
path: PurePath,
|
||||
mtime: Optional[datetime] = None,
|
||||
redownload: Optional[Redownload] = None,
|
||||
@ -363,7 +368,7 @@ class OutputDirectory:
|
||||
# Ensure parent directory exists
|
||||
local_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
return FileSinkToken(self, path, local_path, heuristics, on_conflict)
|
||||
return FileSinkToken(self, remote_path, path, local_path, heuristics, on_conflict)
|
||||
|
||||
def _update_metadata(self, info: DownloadInfo) -> None:
|
||||
if mtime := info.heuristics.mtime:
|
||||
@ -379,6 +384,7 @@ class OutputDirectory:
|
||||
|
||||
async def _after_download(self, info: DownloadInfo) -> None:
|
||||
with self._ensure_deleted(info.tmp_path):
|
||||
log.action(f"[bold bright_cyan]Downloaded[/] {fmt_path(info.remote_path)}")
|
||||
log.explain_topic(f"Processing downloaded file for {fmt_path(info.path)}")
|
||||
|
||||
changed = False
|
||||
|
Loading…
Reference in New Issue
Block a user