mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Only call should_crawl once
This commit is contained in:
parent
81301f3a76
commit
8cfa818f04
@ -130,6 +130,12 @@ class KitIliasWebCrawler(HttpCrawler):
|
|||||||
@arepeat(3)
|
@arepeat(3)
|
||||||
@anoncritical
|
@anoncritical
|
||||||
async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None:
|
async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None:
|
||||||
|
# We might not want to crawl this directory-ish page.
|
||||||
|
# This is not in #handle_element, as the download methods check it themselves and therefore
|
||||||
|
# would perform this check twice - messing with the explain output
|
||||||
|
if not self.should_crawl(path):
|
||||||
|
return
|
||||||
|
|
||||||
tasks = []
|
tasks = []
|
||||||
async with self.crawl_bar(path):
|
async with self.crawl_bar(path):
|
||||||
soup = await self._get_page(url)
|
soup = await self._get_page(url)
|
||||||
|
Loading…
Reference in New Issue
Block a user