Crawl all video stages in one crawl bar

This ensures folders are not renamed, as they are crawled twice
This commit is contained in:
I-Al-Istannen 2021-06-13 16:50:29 +02:00
parent 57aef26217
commit 6e4d423c81
2 changed files with 11 additions and 4 deletions

View File

@ -62,9 +62,11 @@ class IliasPage:
log.explain("Page is a normal folder, searching for elements") log.explain("Page is a normal folder, searching for elements")
return self._find_normal_entries() return self._find_normal_entries()
def get_next_stage_url(self) -> Optional[str]: def get_next_stage_element(self) -> Optional[IliasPageElement]:
if self._is_ilias_opencast_embedding(): if self._is_ilias_opencast_embedding():
return self.get_child_elements()[0].url return self.get_child_elements()[0]
if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
return self._find_video_entries_paginated()[0]
return None return None
def _is_video_player(self) -> bool: def _is_video_player(self) -> bool:

View File

@ -248,13 +248,18 @@ instance's greatest bottleneck.
elements.clear() elements.clear()
async with cl: async with cl:
next_stage_url: Optional[str] = url next_stage_url: Optional[str] = url
current_parent = parent
while next_stage_url: while next_stage_url:
soup = await self._get_page(next_stage_url) soup = await self._get_page(next_stage_url)
log.explain_topic(f"Parsing HTML page for {fmt_path(path)}") log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
log.explain(f"URL: {next_stage_url}") log.explain(f"URL: {next_stage_url}")
page = IliasPage(soup, next_stage_url, parent) page = IliasPage(soup, next_stage_url, current_parent)
next_stage_url = page.get_next_stage_url() if next_element := page.get_next_stage_element():
current_parent = next_element
next_stage_url = next_element.url
else:
next_stage_url = None
elements.extend(page.get_child_elements()) elements.extend(page.get_child_elements())