From 72cd0f77e2d7e58a3505961f3c58b94636e24156 Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Wed, 19 Feb 2025 14:29:37 +0100 Subject: [PATCH] Prettify forum thread exports Co-authored-by: Tim --- CHANGELOG.md | 3 + PFERD/crawl/ilias/file_templates.py | 89 ++++++++++++++++++++++++++ PFERD/crawl/ilias/ilias_web_crawler.py | 12 ++-- 3 files changed, 99 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 572f8c7..ae82e4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,9 @@ ambiguous situations. ## Unreleased +### Changed +- Added prettier CSS to forum threads + ## Fixed - File links in report on Windows - TOTP authentication in KIT Shibboleth diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py index 0a72199..e148875 100644 --- a/PFERD/crawl/ilias/file_templates.py +++ b/PFERD/crawl/ilias/file_templates.py @@ -126,6 +126,88 @@ _learning_module_template = """ """ +_forum_thread_template = """ + + + + + ILIAS - Forum: {{name}} + + + + {{heading}} + {{content}} + + +""".strip() # noqa: E501 line too long + def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next: Optional[str]) -> str: # Seems to be comments, ignore those. @@ -164,6 +246,13 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name) +def forum_thread_template(name: str, heading: bs4.Tag, content: bs4.Tag) -> str: + return _forum_thread_template \ + .replace("{{name}}", name) \ + .replace("{{heading}}", cast(str, heading.prettify())) \ + .replace("{{content}}", cast(str, content.prettify())) + + class Links(Enum): IGNORE = "ignore" PLAINTEXT = "plaintext" diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py index 7351593..bc90991 100644 --- a/PFERD/crawl/ilias/ilias_web_crawler.py +++ b/PFERD/crawl/ilias/ilias_web_crawler.py @@ -19,7 +19,7 @@ from ...utils import fmt_path, soupify, url_set_query_param from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical from ..http_crawler import HttpCrawler, HttpCrawlerSection from .async_helper import _iorepeat -from .file_templates import Links, learning_module_template +from .file_templates import Links, forum_thread_template, learning_module_template from .ilias_html_cleaner import clean, insert_base_markup from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage, IliasPageElement, _sanitize_path_name, parse_ilias_forum_export) @@ -786,10 +786,12 @@ instance's greatest bottleneck. return async with maybe_dl as (bar, sink): - content = "\n" - content += cast(str, element.name_tag.prettify()) - content += cast(str, await self.internalize_images(element.content_tag.prettify())) - sink.file.write(content.encode("utf-8")) + rendered = forum_thread_template( + element.name, + element.name_tag, + element.content_tag + ) + sink.file.write(rendered.encode("utf-8")) sink.done() async def _handle_learning_module(