From 63f25277b04a46e415da4f994f17e2b211ddbaf9 Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Sun, 9 Mar 2025 23:44:25 +0100 Subject: [PATCH] Fix crawling of empty forum threads --- PFERD/crawl/ilias/kit_ilias_html.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index 7956b00..963ab05 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -1387,16 +1387,18 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThre title_tag = p content_tag = cast(Optional[Tag], p.find_next_sibling("ul")) - if not content_tag: - # ILIAS allows users to delete the initial post while keeping the thread open - # This produces empty threads without *any* content. - # I am not sure why you would want this, but ILIAS makes it easy to do. - continue - title = cast(Tag, p.find("b")).text if ":" in title: title = title[title.find(":") + 1:] title = title.strip() + + if not content_tag or content_tag.find_previous_sibling("p") != title_tag: + # ILIAS allows users to delete the initial post while keeping the thread open + # This produces empty threads without *any* content. + # I am not sure why you would want this, but ILIAS makes it easy to do. + elements.append(IliasForumThread(title, title_tag, forum_export.new_tag("ul"), None)) + continue + mtime = _guess_timestamp_from_forum_post_content(content_tag) elements.append(IliasForumThread(title, title_tag, content_tag, mtime))