Bail out when crawling recursive courses

This commit is contained in:
I-Al-Istannen 2022-01-08 20:28:30 +01:00
parent 43c5453e10
commit 10d9d74528

View File

@ -182,6 +182,7 @@ instance's greatest bottleneck.
self._link_file_redirect_delay = section.link_redirect_delay() self._link_file_redirect_delay = section.link_redirect_delay()
self._links = section.links() self._links = section.links()
self._videos = section.videos() self._videos = section.videos()
self._visited_urls: Set[str] = set()
async def _run(self) -> None: async def _run(self) -> None:
if isinstance(self._target, int): if isinstance(self._target, int):
@ -309,6 +310,12 @@ instance's greatest bottleneck.
parent_path: PurePath, parent_path: PurePath,
element: IliasPageElement, element: IliasPageElement,
) -> Optional[Awaitable[None]]: ) -> Optional[Awaitable[None]]:
if element.url in self._visited_urls:
raise CrawlWarning(
f"Found second path to element {element.name!r} at {element.url!r}. Aborting subpath"
)
self._visited_urls.add(element.url)
element_path = PurePath(parent_path, element.name) element_path = PurePath(parent_path, element.name)
if element.type in _VIDEO_ELEMENTS: if element.type in _VIDEO_ELEMENTS: