diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 636fa68..61df57a 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -461,3 +461,55 @@ def _tomorrow() -> date:
def _sanitize_path_name(name: str) -> str:
return name.replace("/", "-").replace("\\", "-").strip()
+
+
+def deduplicate_element_names(elements: List[IliasPageElement]) -> List[IliasPageElement]:
+ """
+ De-duplicates element names by appending an incrementing number to later elements:
+ test.pdf
+ test.pdf
+ would result in
+ test.pdf
+ test_1.pdf
+
+ It is also space-aware:
+ "te st.pdf"
+ "te st.pdf"
+ would result in
+ "tes st.pdf"
+ "tes st 1.pdf"
+ """
+ known_names = dict()
+ result_elements = []
+
+ for element in elements:
+ # This file is new - add it and mark its name as used
+ if element.name not in known_names:
+ known_names[element.name] = 1
+ result_elements.append(element)
+ continue
+
+ # This file is a duplicate. Find a suitable suffix
+ current_counter = known_names[element.name]
+ adjusted_element = _append_number(element, current_counter)
+ # increment the counter so the next duplicate does not conflict
+ known_names[element.name] += 1
+ # also block the new name, so another file with the *renamed* name gets renamed as well
+ known_names[adjusted_element.name] = 1
+
+ result_elements.append(adjusted_element)
+
+ return result_elements
+
+
+def _append_number(element: IliasPageElement, number: int) -> IliasPageElement:
+ extension_index = element.name.rfind(".")
+ suffix = f" {number}" if " " in element.name else f"_{number}"
+ if extension_index < 0:
+ new_name = element.name + suffix
+ else:
+ new_name = element.name[:extension_index] + suffix + element.name[extension_index:]
+
+ return IliasPageElement(
+ element.type, element.url, new_name, element.mtime, element.description
+ )
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 445997f..222e1d6 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -15,7 +15,7 @@ from ...utils import fmt_path, soupify, url_set_query_param
from ..crawler import CrawlError, CrawlWarning, anoncritical
from ..http_crawler import HttpCrawler, HttpCrawlerSection
from .file_templates import link_template_plain, link_template_rich
-from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
+from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement, deduplicate_element_names
TargetType = Union[str, int]
@@ -214,6 +214,7 @@ class KitIliasWebCrawler(HttpCrawler):
# Fill up our task list with the found elements
await gather_elements()
+ elements = deduplicate_element_names(elements)
tasks = [self._handle_ilias_element(PurePath("."), element) for element in elements]
# And execute them
@@ -240,6 +241,7 @@ class KitIliasWebCrawler(HttpCrawler):
# Fill up our task list with the found elements
await gather_elements()
+ elements = deduplicate_element_names(elements)
tasks = [self._handle_ilias_element(path, element) for element in elements]
# And execute them