diff --git a/CHANGELOG.md b/CHANGELOG.md index f9bf6d0..59cc6fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,9 @@ ambiguous situations. ## Fixed - Crawling of exercises with instructions +- Don't download unavailable elements. + Elements that are unavailable (for example, because their availability is + time restricted) will not download the HTML for the info page anymore. ## 3.8.2 - 2025-04-29 diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py index b682c0a..2eb8e9c 100644 --- a/PFERD/crawl/ilias/ilias_web_crawler.py +++ b/PFERD/crawl/ilias/ilias_web_crawler.py @@ -329,6 +329,15 @@ instance's greatest bottleneck. # directory escape attacks. element_path = PurePath(parent_path, element.name) + # This is symptomatic of no access to the element, for example, because + # of time availability restrictions. + if "cmdClass=ilInfoScreenGUI" in element.url and "cmd=showSummary" in element.url: + log.explain( + "Skipping element as url points to info screen, " + "this should only happen with not-yet-released elements" + ) + return None + if element.type in _VIDEO_ELEMENTS: if not self._videos: log.status(