diff --git a/CHANGELOG.md b/CHANGELOG.md index d58ea18..0e93f01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ ambiguous situations. - Crawling of file and custom opencast cards - Crawling of button cards without descriptions - Abort crawling when encountering an unexpected ilias root page redirect +- Remove size suffix for files in content pages ### Added - `no-delete-prompt-override` conflict resolution strategy diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index d5ea76d..c0807d3 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -377,7 +377,8 @@ class IliasPage: for link in links: url = self._abs_url_from_link(link) - name = _sanitize_path_name(link.getText().strip().replace("\t", "")) + name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.getText()).strip().replace("\t", "") + name = _sanitize_path_name(name) if "file_id" not in url: _unexpected_html_warning()