Ignore SCORM learning modules

2025-07-14 15:12:30 +02:00 · 2023-08-29 13:51:19 +02:00
parent df3514cd03
commit 50b50513c6
3 changed files with 17 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -30,6 +30,7 @@ ambiguous situations.
 - Remove size suffix for files in content pages
 - Sanitize ascii control characters on Windows
 - Crawling of paginated past meetings
+- Ignore SCORM learning modules

 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@ -27,6 +27,7 @@ class IliasElementType(Enum):
    BOOKING = "booking"
    MEETING = "meeting"
    SURVEY = "survey"
+    SCORM_LEARNING_MODULE = "scorm_learning_module"
    MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
    MEDIACAST_VIDEO = "mediacast_video"
    OPENCAST_VIDEO = "opencast_video"
@ -953,6 +954,9 @@ class IliasPage:
        if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query:
            return IliasElementType.MEDIACAST_VIDEO_FOLDER

+        if "baseClass=ilSAHSPresentationGUI" in parsed_url.query:
+            return IliasElementType.SCORM_LEARNING_MODULE
+
        # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
        # try to guess it from the image.

@ -1031,6 +1035,9 @@ class IliasPage:
        if str(img_tag["src"]).endswith("icon_mcst.svg"):
            return IliasElementType.MEDIACAST_VIDEO_FOLDER

+        if str(img_tag["src"]).endswith("icon_sahs.svg"):
+            return IliasElementType.SCORM_LEARNING_MODULE
+
        return IliasElementType.FOLDER

    @staticmethod
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@ -403,6 +403,14 @@ instance's greatest bottleneck.
                "[bright_black](surveys contain no relevant data)"
            )
            return None
+        elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](scorm learning modules are not supported)"
+            )
+            return None
        elif element.type == IliasElementType.LEARNING_MODULE:
            return await self._handle_learning_module(element, element_path)
        elif element.type == IliasElementType.LINK:
@ -897,7 +905,7 @@ instance's greatest bottleneck.
            soup = soupify(await request.read())
            if self._is_logged_in(soup):
                return self._verify_page(soup, url, root_page_allowed)
-        raise CrawlError("get_page failed even after authenticating")
+        raise CrawlError(f"get_page failed even after authenticating on {url!r}")

    def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
        if IliasPage.is_root_page(soup) and not root_page_allowed: