mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Crawl all video stages in one crawl bar
This ensures folders are not renamed, as they are crawled twice
This commit is contained in:
parent
57aef26217
commit
6e4d423c81
@ -62,9 +62,11 @@ class IliasPage:
|
|||||||
log.explain("Page is a normal folder, searching for elements")
|
log.explain("Page is a normal folder, searching for elements")
|
||||||
return self._find_normal_entries()
|
return self._find_normal_entries()
|
||||||
|
|
||||||
def get_next_stage_url(self) -> Optional[str]:
|
def get_next_stage_element(self) -> Optional[IliasPageElement]:
|
||||||
if self._is_ilias_opencast_embedding():
|
if self._is_ilias_opencast_embedding():
|
||||||
return self.get_child_elements()[0].url
|
return self.get_child_elements()[0]
|
||||||
|
if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
|
||||||
|
return self._find_video_entries_paginated()[0]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _is_video_player(self) -> bool:
|
def _is_video_player(self) -> bool:
|
||||||
|
@ -248,13 +248,18 @@ instance's greatest bottleneck.
|
|||||||
elements.clear()
|
elements.clear()
|
||||||
async with cl:
|
async with cl:
|
||||||
next_stage_url: Optional[str] = url
|
next_stage_url: Optional[str] = url
|
||||||
|
current_parent = parent
|
||||||
|
|
||||||
while next_stage_url:
|
while next_stage_url:
|
||||||
soup = await self._get_page(next_stage_url)
|
soup = await self._get_page(next_stage_url)
|
||||||
log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
|
log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
|
||||||
log.explain(f"URL: {next_stage_url}")
|
log.explain(f"URL: {next_stage_url}")
|
||||||
page = IliasPage(soup, next_stage_url, parent)
|
page = IliasPage(soup, next_stage_url, current_parent)
|
||||||
next_stage_url = page.get_next_stage_url()
|
if next_element := page.get_next_stage_element():
|
||||||
|
current_parent = next_element
|
||||||
|
next_stage_url = next_element.url
|
||||||
|
else:
|
||||||
|
next_stage_url = None
|
||||||
|
|
||||||
elements.extend(page.get_child_elements())
|
elements.extend(page.get_child_elements())
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user