Fix renaming conflict with multi-stage video elements

This commit is contained in:
I-Al-Istannen 2021-05-27 00:55:46 +02:00 committed by Joscha
parent 19eed5bdff
commit 5beb4d9a2d
2 changed files with 22 additions and 8 deletions

View File

@ -62,15 +62,17 @@ class IliasPage:
log.explain("Page is a normal folder, searching for elements")
return self._find_normal_entries()
def get_next_stage_url(self) -> Optional[str]:
if self._is_ilias_opencast_embedding():
return self.get_child_elements()[0].url
return None
def _is_video_player(self) -> bool:
return "paella_config_file" in str(self._soup)
def _is_video_listing(self) -> bool:
# ILIAS fluff around it
if self._soup.find(id="headerimage"):
element: Tag = self._soup.find(id="headerimage")
if "opencast" in element.attrs["src"].lower():
return True
if self._is_ilias_opencast_embedding():
return True
# Raw listing without ILIAS fluff
video_element_table: Tag = self._soup.find(
@ -78,6 +80,14 @@ class IliasPage:
)
return video_element_table is not None
def _is_ilias_opencast_embedding(self) -> bool:
# ILIAS fluff around the real opencast html
if self._soup.find(id="headerimage"):
element: Tag = self._soup.find(id="headerimage")
if "opencast" in element.attrs["src"].lower():
return True
return False
def _is_exercise_file(self) -> bool:
# we know it from before
if self._page_type == IliasElementType.EXERCISE:

View File

@ -242,10 +242,14 @@ class KitIliasWebCrawler(HttpCrawler):
async def gather_elements() -> None:
elements.clear()
async with cl:
soup = await self._get_page(url)
next_stage_url: Optional[str] = url
log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
log.explain(f"URL: {url}")
page = IliasPage(soup, url, parent)
while next_stage_url:
soup = await self._get_page(next_stage_url)
log.explain(f"URL: {url}")
page = IliasPage(soup, url, parent)
next_stage_url = page.get_next_stage_url()
elements.extend(page.get_child_elements())