From 50b50513c6d8bb01200104633d7ce312e17a0ba7 Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Tue, 29 Aug 2023 13:51:19 +0200 Subject: [PATCH] Ignore SCORM learning modules --- CHANGELOG.md | 1 + PFERD/crawl/ilias/kit_ilias_html.py | 7 +++++++ PFERD/crawl/ilias/kit_ilias_web_crawler.py | 10 +++++++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f318b2..47df846 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ ambiguous situations. - Remove size suffix for files in content pages - Sanitize ascii control characters on Windows - Crawling of paginated past meetings +- Ignore SCORM learning modules ### Added - `no-delete-prompt-override` conflict resolution strategy diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index 5a94a0b..2c37816 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -27,6 +27,7 @@ class IliasElementType(Enum): BOOKING = "booking" MEETING = "meeting" SURVEY = "survey" + SCORM_LEARNING_MODULE = "scorm_learning_module" MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder" MEDIACAST_VIDEO = "mediacast_video" OPENCAST_VIDEO = "opencast_video" @@ -953,6 +954,9 @@ class IliasPage: if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query: return IliasElementType.MEDIACAST_VIDEO_FOLDER + if "baseClass=ilSAHSPresentationGUI" in parsed_url.query: + return IliasElementType.SCORM_LEARNING_MODULE + # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so # try to guess it from the image. @@ -1031,6 +1035,9 @@ class IliasPage: if str(img_tag["src"]).endswith("icon_mcst.svg"): return IliasElementType.MEDIACAST_VIDEO_FOLDER + if str(img_tag["src"]).endswith("icon_sahs.svg"): + return IliasElementType.SCORM_LEARNING_MODULE + return IliasElementType.FOLDER @staticmethod diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py index 4f6cc74..d5f6809 100644 --- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py +++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py @@ -403,6 +403,14 @@ instance's greatest bottleneck. "[bright_black](surveys contain no relevant data)" ) return None + elif element.type == IliasElementType.SCORM_LEARNING_MODULE: + log.status( + "[bold bright_black]", + "Ignored", + fmt_path(element_path), + "[bright_black](scorm learning modules are not supported)" + ) + return None elif element.type == IliasElementType.LEARNING_MODULE: return await self._handle_learning_module(element, element_path) elif element.type == IliasElementType.LINK: @@ -897,7 +905,7 @@ instance's greatest bottleneck. soup = soupify(await request.read()) if self._is_logged_in(soup): return self._verify_page(soup, url, root_page_allowed) - raise CrawlError("get_page failed even after authenticating") + raise CrawlError(f"get_page failed even after authenticating on {url!r}") def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup: if IliasPage.is_root_page(soup) and not root_page_allowed: