From c897d9e2f50d3c281bbf4a10c2dc7bb960ec202f Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Wed, 26 Jun 2024 16:39:24 +0200 Subject: [PATCH] Support finding entries for course overview page Related to issue #93 --- CHANGELOG.md | 2 ++ PFERD/crawl/ilias/kit_ilias_html.py | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6de08a3..b93bd33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,8 @@ ambiguous situations. ### Added - Generic `ilias-web` crawler and `ilias-web` CLI command +- Support for the course overview page. Using this URL as a target might cause + duplication warnings, as subgroups are listed separately. ### Fixed - Normalization of meeting names in cards diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index 54d56a0..4c1d798 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -349,6 +349,9 @@ class IliasPage: might_be_info = self._soup.find("form", attrs={"name": lambda x: x == "formInfoScreen"}) is not None return self._page_type == IliasElementType.INFO_TAB and might_be_info + def _is_course_overview_page(self) -> bool: + return "baseClass=ilmembershipoverviewgui" in self._page_url + def _select_content_page_url(self) -> Optional[IliasPageElement]: tab = self._soup.find( id="tab_view_content", @@ -686,8 +689,13 @@ class IliasPage: def _find_normal_entries(self) -> List[IliasPageElement]: result: List[IliasPageElement] = [] + links: List[Tag] = [] # Fetch all links and throw them to the general interpreter - links: List[Tag] = self._soup.select("a.il_ContainerItemTitle") + if self._is_course_overview_page(): + log.explain("Page is a course overview page, adjusting link selector") + links.extend(self._soup.select(".il-item-title > a")) + else: + links.extend(self._soup.select("a.il_ContainerItemTitle")) for link in links: abs_url = self._abs_url_from_link(link)