diff --git a/CHANGELOG.md b/CHANGELOG.md index 671d48a..70d2cd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,9 @@ ambiguous situations. ## Unreleased +### Fixed +- Forum crawling crashing when parsing empty (= 0 messages) threads + ## 3.4.1 - 2022-08-17 ### Added diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index 7bab152..8795512 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -937,6 +937,13 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThre for p in forum_export.select("body > p"): title_tag = p content_tag = p.find_next_sibling("ul") + + if not content_tag: + # ILIAS allows users to delete the initial post while keeping the thread open + # This produces empty threads without *any* content. + # I am not sure why you would want this, but ILIAS makes it easy to do. + continue + title = p.find("b").text if ":" in title: title = title[title.find(":") + 1:]