mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Make crawlers use transformers
This commit is contained in:
parent
302b8c0c34
commit
b0f731bf84
@ -273,6 +273,9 @@ class Crawler(ABC):
|
||||
with self._conductor.progress_bar(desc, total=total) as bar:
|
||||
yield bar
|
||||
|
||||
def should_crawl(self, path: PurePath) -> bool:
|
||||
return self._transformer.transform(path) is not None
|
||||
|
||||
async def download(
|
||||
self,
|
||||
path: PurePath,
|
||||
@ -280,8 +283,12 @@ class Crawler(ABC):
|
||||
redownload: Optional[Redownload] = None,
|
||||
on_conflict: Optional[OnConflict] = None,
|
||||
) -> Optional[AsyncContextManager[FileSink]]:
|
||||
transformed_path = self._transformer.transform(path)
|
||||
if transformed_path is None:
|
||||
return None
|
||||
|
||||
return await self._output_dir.download(
|
||||
path, mtime, redownload, on_conflict)
|
||||
transformed_path, mtime, redownload, on_conflict)
|
||||
|
||||
async def cleanup(self) -> None:
|
||||
await self._output_dir.cleanup()
|
||||
|
@ -81,7 +81,8 @@ class LocalCrawler(Crawler):
|
||||
|
||||
for child in path.iterdir():
|
||||
pure_child = pure / child.name
|
||||
tasks.append(self._crawl_path(child, pure_child))
|
||||
if self.should_crawl(child):
|
||||
tasks.append(self._crawl_path(child, pure_child))
|
||||
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
|
@ -292,4 +292,4 @@ class Transformer:
|
||||
else:
|
||||
continue
|
||||
|
||||
return None
|
||||
return path
|
||||
|
Loading…
Reference in New Issue
Block a user