diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 384f0de..41f45e2 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -62,9 +62,11 @@ class IliasPage:
log.explain("Page is a normal folder, searching for elements")
return self._find_normal_entries()
- def get_next_stage_url(self) -> Optional[str]:
+ def get_next_stage_element(self) -> Optional[IliasPageElement]:
if self._is_ilias_opencast_embedding():
- return self.get_child_elements()[0].url
+ return self.get_child_elements()[0]
+ if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
+ return self._find_video_entries_paginated()[0]
return None
def _is_video_player(self) -> bool:
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 6495da9..41c301c 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -248,13 +248,18 @@ instance's greatest bottleneck.
elements.clear()
async with cl:
next_stage_url: Optional[str] = url
+ current_parent = parent
while next_stage_url:
soup = await self._get_page(next_stage_url)
log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
log.explain(f"URL: {next_stage_url}")
- page = IliasPage(soup, next_stage_url, parent)
- next_stage_url = page.get_next_stage_url()
+ page = IliasPage(soup, next_stage_url, current_parent)
+ if next_element := page.get_next_stage_element():
+ current_parent = next_element
+ next_stage_url = next_element.url
+ else:
+ next_stage_url = None
elements.extend(page.get_child_elements())