From 6dda4c55a8bdd0afba9126f39e7402df7dc59479 Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Tue, 5 Nov 2024 18:36:21 +0100 Subject: [PATCH] Add doctype header to forum threads This should fix mimetype detection on most systems and is more relevant now that the report is clickable --- CHANGELOG.md | 1 + PFERD/crawl/ilias/ilias_web_crawler.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8024bba..5206b20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ ambiguous situations. - Crawling of nested courses - Downloading of links with no target URL - Handle row flex on description pages +- Add `` heading to forum threads to fix mime type detection ## 3.6.0 - 2024-10-23 diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py index a6c68f1..2fc399d 100644 --- a/PFERD/crawl/ilias/ilias_web_crawler.py +++ b/PFERD/crawl/ilias/ilias_web_crawler.py @@ -750,7 +750,8 @@ instance's greatest bottleneck. return async with maybe_dl as (bar, sink): - content = element.title_tag.prettify() + content = "\n" + content += element.title_tag.prettify() content += element.content_tag.prettify() sink.file.write(content.encode("utf-8")) sink.done()