From e246053de22c54b42df0885082b687b362ce7678 Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Tue, 2 Dec 2025 16:33:04 +0100 Subject: [PATCH] Crawl the info tab of courses again This got lost in a refactor --- CHANGELOG.md | 1 + PFERD/crawl/ilias/ilias_web_crawler.py | 2 ++ PFERD/crawl/ilias/kit_ilias_html.py | 6 ++++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e80f345..2a2848c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ ambiguous situations. ## Fixed - Event loop errors on Windows with Python 3.14 - Sanitize `/` in headings in kit-ipd crawler +- Crawl info tab again ## 3.8.3 - 2025-07-01 diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py index fda9f6d..b5041b3 100644 --- a/PFERD/crawl/ilias/ilias_web_crawler.py +++ b/PFERD/crawl/ilias/ilias_web_crawler.py @@ -297,6 +297,8 @@ instance's greatest bottleneck. page = cast(IliasPage, page) elements.extend(page.get_child_elements()) + if current_element is None and (info_tab := page.get_info_tab()): + elements.append(info_tab) if description_string := page.get_description(): description.append(description_string) diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index e23469c..5966141 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -739,9 +739,10 @@ class IliasPage: links: list[Tag] = self._soup.select("a.il_ContainerItemCommand") for link in links: - if "cmdClass=ilobjcoursegui" not in link["href"]: + log.explain(f"Found info tab link: {self._abs_url_from_link(link)}") + if "cmdclass=ilobjcoursegui" not in cast(str, link["href"]).lower(): continue - if "cmd=sendfile" not in link["href"]: + if "cmd=sendfile" not in cast(str, link["href"]).lower(): continue items.append( IliasPageElement.create_new( @@ -749,6 +750,7 @@ class IliasPage: ) ) + log.explain(f"Found {len(items)} info tab entries {items}") return items def _find_opencast_video_entries(self) -> list[IliasPageElement]: