mirror of
				https://github.com/Garmelon/PFERD.git
				synced 2025-11-04 06:32:52 +01:00 
			
		
		
		
	Log crawl and download actions more extensively
This commit is contained in:
		@@ -90,31 +90,38 @@ def anoncritical(f: AWrapped) -> AWrapped:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
 | 
			
		||||
    def __init__(self, limiter: Limiter, desc: str):
 | 
			
		||||
    def __init__(self, limiter: Limiter, path: PurePath):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
 | 
			
		||||
        self._limiter = limiter
 | 
			
		||||
        self._desc = desc
 | 
			
		||||
        self._path = path
 | 
			
		||||
 | 
			
		||||
    async def _on_aenter(self) -> ProgressBar:
 | 
			
		||||
        bar_desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(self._path))}"
 | 
			
		||||
        after_desc = f"[bold bright_cyan]Crawled[/] {escape(fmt_path(self._path))}"
 | 
			
		||||
 | 
			
		||||
        self._stack.callback(lambda: log.action(after_desc))
 | 
			
		||||
        await self._stack.enter_async_context(self._limiter.limit_crawl())
 | 
			
		||||
        bar = self._stack.enter_context(log.crawl_bar(self._desc))
 | 
			
		||||
        bar = self._stack.enter_context(log.crawl_bar(bar_desc))
 | 
			
		||||
 | 
			
		||||
        return bar
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
 | 
			
		||||
    def __init__(self, limiter: Limiter, fs_token: FileSinkToken, desc: str):
 | 
			
		||||
    def __init__(self, limiter: Limiter, fs_token: FileSinkToken, path: PurePath):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
 | 
			
		||||
        self._limiter = limiter
 | 
			
		||||
        self._fs_token = fs_token
 | 
			
		||||
        self._desc = desc
 | 
			
		||||
        self._path = path
 | 
			
		||||
 | 
			
		||||
    async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]:
 | 
			
		||||
        bar_desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(self._path))}"
 | 
			
		||||
        # The "Downloaded ..." message is printed in the output dir, not here
 | 
			
		||||
 | 
			
		||||
        await self._stack.enter_async_context(self._limiter.limit_crawl())
 | 
			
		||||
        sink = await self._stack.enter_async_context(self._fs_token)
 | 
			
		||||
        bar = self._stack.enter_context(log.crawl_bar(self._desc))
 | 
			
		||||
        bar = self._stack.enter_context(log.crawl_bar(bar_desc))
 | 
			
		||||
 | 
			
		||||
        return bar, sink
 | 
			
		||||
 | 
			
		||||
@@ -229,9 +236,7 @@ class Crawler(ABC):
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        log.explain("Answer: Yes")
 | 
			
		||||
 | 
			
		||||
        desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(path))}"
 | 
			
		||||
        return CrawlToken(self._limiter, desc)
 | 
			
		||||
        return CrawlToken(self._limiter, path)
 | 
			
		||||
 | 
			
		||||
    async def download(
 | 
			
		||||
            self,
 | 
			
		||||
@@ -247,15 +252,13 @@ class Crawler(ABC):
 | 
			
		||||
            log.explain("Answer: No")
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        fs_token = await self._output_dir.download(transformed_path, mtime, redownload, on_conflict)
 | 
			
		||||
        fs_token = await self._output_dir.download(path, transformed_path, mtime, redownload, on_conflict)
 | 
			
		||||
        if fs_token is None:
 | 
			
		||||
            log.explain("Answer: No")
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        log.explain("Answer: Yes")
 | 
			
		||||
 | 
			
		||||
        desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(path))}"
 | 
			
		||||
        return DownloadToken(self._limiter, fs_token, desc)
 | 
			
		||||
        return DownloadToken(self._limiter, fs_token, path)
 | 
			
		||||
 | 
			
		||||
    async def _cleanup(self) -> None:
 | 
			
		||||
        log.explain_topic("Decision: Clean up files")
 | 
			
		||||
 
 | 
			
		||||
@@ -78,6 +78,7 @@ class FileSink:
 | 
			
		||||
 | 
			
		||||
@dataclass
 | 
			
		||||
class DownloadInfo:
 | 
			
		||||
    remote_path: PurePath
 | 
			
		||||
    path: PurePath
 | 
			
		||||
    local_path: Path
 | 
			
		||||
    tmp_path: Path
 | 
			
		||||
@@ -96,6 +97,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
 | 
			
		||||
    def __init__(
 | 
			
		||||
            self,
 | 
			
		||||
            output_dir: "OutputDirectory",
 | 
			
		||||
            remote_path: PurePath,
 | 
			
		||||
            path: PurePath,
 | 
			
		||||
            local_path: Path,
 | 
			
		||||
            heuristics: Heuristics,
 | 
			
		||||
@@ -104,6 +106,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
 | 
			
		||||
        self._output_dir = output_dir
 | 
			
		||||
        self._remote_path = remote_path
 | 
			
		||||
        self._path = path
 | 
			
		||||
        self._local_path = local_path
 | 
			
		||||
        self._heuristics = heuristics
 | 
			
		||||
@@ -115,6 +118,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
 | 
			
		||||
 | 
			
		||||
        async def after_download() -> None:
 | 
			
		||||
            await self._output_dir._after_download(DownloadInfo(
 | 
			
		||||
                self._remote_path,
 | 
			
		||||
                self._path,
 | 
			
		||||
                self._local_path,
 | 
			
		||||
                tmp_path,
 | 
			
		||||
@@ -317,6 +321,7 @@ class OutputDirectory:
 | 
			
		||||
 | 
			
		||||
    async def download(
 | 
			
		||||
            self,
 | 
			
		||||
            remote_path: PurePath,
 | 
			
		||||
            path: PurePath,
 | 
			
		||||
            mtime: Optional[datetime] = None,
 | 
			
		||||
            redownload: Optional[Redownload] = None,
 | 
			
		||||
@@ -363,7 +368,7 @@ class OutputDirectory:
 | 
			
		||||
        # Ensure parent directory exists
 | 
			
		||||
        local_path.parent.mkdir(parents=True, exist_ok=True)
 | 
			
		||||
 | 
			
		||||
        return FileSinkToken(self, path, local_path, heuristics, on_conflict)
 | 
			
		||||
        return FileSinkToken(self, remote_path, path, local_path, heuristics, on_conflict)
 | 
			
		||||
 | 
			
		||||
    def _update_metadata(self, info: DownloadInfo) -> None:
 | 
			
		||||
        if mtime := info.heuristics.mtime:
 | 
			
		||||
@@ -379,6 +384,7 @@ class OutputDirectory:
 | 
			
		||||
 | 
			
		||||
    async def _after_download(self, info: DownloadInfo) -> None:
 | 
			
		||||
        with self._ensure_deleted(info.tmp_path):
 | 
			
		||||
            log.action(f"[bold bright_cyan]Downloaded[/] {fmt_path(info.remote_path)}")
 | 
			
		||||
            log.explain_topic(f"Processing downloaded file for {fmt_path(info.path)}")
 | 
			
		||||
 | 
			
		||||
            changed = False
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user