mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Crawl all video stages in one crawl bar
This ensures folders are not renamed, as they are crawled twice
This commit is contained in:
parent
57aef26217
commit
6e4d423c81
@ -62,9 +62,11 @@ class IliasPage:
|
||||
log.explain("Page is a normal folder, searching for elements")
|
||||
return self._find_normal_entries()
|
||||
|
||||
def get_next_stage_url(self) -> Optional[str]:
|
||||
def get_next_stage_element(self) -> Optional[IliasPageElement]:
|
||||
if self._is_ilias_opencast_embedding():
|
||||
return self.get_child_elements()[0].url
|
||||
return self.get_child_elements()[0]
|
||||
if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
|
||||
return self._find_video_entries_paginated()[0]
|
||||
return None
|
||||
|
||||
def _is_video_player(self) -> bool:
|
||||
|
@ -248,13 +248,18 @@ instance's greatest bottleneck.
|
||||
elements.clear()
|
||||
async with cl:
|
||||
next_stage_url: Optional[str] = url
|
||||
current_parent = parent
|
||||
|
||||
while next_stage_url:
|
||||
soup = await self._get_page(next_stage_url)
|
||||
log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
|
||||
log.explain(f"URL: {next_stage_url}")
|
||||
page = IliasPage(soup, next_stage_url, parent)
|
||||
next_stage_url = page.get_next_stage_url()
|
||||
page = IliasPage(soup, next_stage_url, current_parent)
|
||||
if next_element := page.get_next_stage_element():
|
||||
current_parent = next_element
|
||||
next_stage_url = next_element.url
|
||||
else:
|
||||
next_stage_url = None
|
||||
|
||||
elements.extend(page.get_child_elements())
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user