From 4a51aaa4f5a1b3382f0bed59f1292fc0952c2832 Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Wed, 19 Oct 2022 22:59:33 +0200 Subject: [PATCH] Fix forum crawling crashing for empty threads --- CHANGELOG.md | 3 +++ PFERD/crawl/ilias/kit_ilias_html.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 671d48a..70d2cd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,9 @@ ambiguous situations. ## Unreleased +### Fixed +- Forum crawling crashing when parsing empty (= 0 messages) threads + ## 3.4.1 - 2022-08-17 ### Added diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index 7bab152..8795512 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -937,6 +937,13 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThre for p in forum_export.select("body > p"): title_tag = p content_tag = p.find_next_sibling("ul") + + if not content_tag: + # ILIAS allows users to delete the initial post while keeping the thread open + # This produces empty threads without *any* content. + # I am not sure why you would want this, but ILIAS makes it easy to do. + continue + title = p.find("b").text if ":" in title: title = title[title.find(":") + 1:]