Fix wrong base URL for multi-stage pages

This commit is contained in:
I-Al-Istannen 2021-06-13 15:39:22 +02:00
parent 70b33ecfd9
commit 70ec64a48b
3 changed files with 5 additions and 2 deletions

View File

@ -38,6 +38,9 @@ path separators to `/` in your regex rules.
- Use the label to the left for exercises instead of the button name to - Use the label to the left for exercises instead of the button name to
determine the folder name determine the folder name
### Fixed
- Video pagination handling in ILIAS crawler
## 3.0.1 - 2021-06-01 ## 3.0.1 - 2021-06-01
### Added ### Added

View File

@ -480,7 +480,7 @@ class IliasPage:
return None return None
if "opencast" in str(img_tag["alt"]).lower(): if "opencast" in str(img_tag["alt"]).lower():
return IliasElementType.VIDEO_FOLDER return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
if str(img_tag["src"]).endswith("icon_exc.svg"): if str(img_tag["src"]).endswith("icon_exc.svg"):
return IliasElementType.EXERCISE return IliasElementType.EXERCISE

View File

@ -253,7 +253,7 @@ instance's greatest bottleneck.
soup = await self._get_page(next_stage_url) soup = await self._get_page(next_stage_url)
log.explain_topic(f"Parsing HTML page for {fmt_path(path)}") log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
log.explain(f"URL: {next_stage_url}") log.explain(f"URL: {next_stage_url}")
page = IliasPage(soup, url, parent) page = IliasPage(soup, next_stage_url, parent)
next_stage_url = page.get_next_stage_url() next_stage_url = page.get_next_stage_url()
elements.extend(page.get_child_elements()) elements.extend(page.get_child_elements())