From 549ce6cce911f298eb0ea16c6e00dca2880d7dc4 Mon Sep 17 00:00:00 2001 From: "Mr. Pine" Date: Wed, 28 May 2025 17:04:57 +0200 Subject: [PATCH] Ignore unavailable elements (#119) --- CHANGELOG.md | 3 +++ PFERD/crawl/ilias/ilias_web_crawler.py | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f9bf6d0..59cc6fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,9 @@ ambiguous situations. ## Fixed - Crawling of exercises with instructions +- Don't download unavailable elements. + Elements that are unavailable (for example, because their availability is + time restricted) will not download the HTML for the info page anymore. ## 3.8.2 - 2025-04-29 diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py index b682c0a..2eb8e9c 100644 --- a/PFERD/crawl/ilias/ilias_web_crawler.py +++ b/PFERD/crawl/ilias/ilias_web_crawler.py @@ -329,6 +329,15 @@ instance's greatest bottleneck. # directory escape attacks. element_path = PurePath(parent_path, element.name) + # This is symptomatic of no access to the element, for example, because + # of time availability restrictions. + if "cmdClass=ilInfoScreenGUI" in element.url and "cmd=showSummary" in element.url: + log.explain( + "Skipping element as url points to info screen, " + "this should only happen with not-yet-released elements" + ) + return None + if element.type in _VIDEO_ELEMENTS: if not self._videos: log.status(