Only call should_crawl once

This commit is contained in:
I-Al-Istannen 2021-05-19 21:57:55 +02:00
parent 81301f3a76
commit 8cfa818f04

View File

@ -130,6 +130,12 @@ class KitIliasWebCrawler(HttpCrawler):
@arepeat(3) @arepeat(3)
@anoncritical @anoncritical
async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None: async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None:
# We might not want to crawl this directory-ish page.
# This is not in #handle_element, as the download methods check it themselves and therefore
# would perform this check twice - messing with the explain output
if not self.should_crawl(path):
return
tasks = [] tasks = []
async with self.crawl_bar(path): async with self.crawl_bar(path):
soup = await self._get_page(url) soup = await self._get_page(url)