Fix crawling of courses with preselected timeline tab

This commit is contained in:
I-Al-Istannen 2022-11-25 12:49:36 +01:00
parent 6d44aac278
commit 722d2eb393
2 changed files with 26 additions and 0 deletions

View File

@ -22,6 +22,9 @@ ambiguous situations.
## Unreleased ## Unreleased
### Fixed
- Crawling of courses with the timeline view as the default tab
## 3.4.3 - 2022-11-29 ## 3.4.3 - 2022-11-29
### Added ### Added

View File

@ -158,6 +158,8 @@ class IliasPage:
if self._contains_collapsed_future_meetings(): if self._contains_collapsed_future_meetings():
log.explain("Requesting *all* future meetings") log.explain("Requesting *all* future meetings")
return self._uncollapse_future_meetings_url() return self._uncollapse_future_meetings_url()
if not self._is_content_tab_selected():
return self._select_content_page_url()
return None return None
def _is_forum_page(self) -> bool: def _is_forum_page(self) -> bool:
@ -220,6 +222,27 @@ class IliasPage:
link = self._abs_url_from_link(element) link = self._abs_url_from_link(element)
return IliasPageElement(IliasElementType.FOLDER, link, "show all meetings") return IliasPageElement(IliasElementType.FOLDER, link, "show all meetings")
def _is_content_tab_selected(self) -> bool:
return self._select_content_page_url() is None
def _select_content_page_url(self) -> Optional[IliasPageElement]:
tab = self._soup.find(
id="tab_view_content",
attrs={"class": lambda x: x is not None and "active" not in x}
)
# Already selected (or not found)
if not tab:
return None
link = tab.find("a")
if link:
link = self._abs_url_from_link(link)
return IliasPageElement(IliasElementType.FOLDER, link, "select content page")
_unexpected_html_warning()
log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
log.warn_contd("PFERD might not find content on the course's main page.")
return None
def _player_to_video(self) -> List[IliasPageElement]: def _player_to_video(self) -> List[IliasPageElement]:
# Fetch the actual video page. This is a small wrapper page initializing a javscript # Fetch the actual video page. This is a small wrapper page initializing a javscript
# player. Sadly we can not execute that JS. The actual video stream url is nowhere # player. Sadly we can not execute that JS. The actual video stream url is nowhere