From c020cccc64f152882688b119416f0582ec94e074 Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Sat, 29 Oct 2022 14:08:29 +0200 Subject: [PATCH] Include found paths in "second path found" warning --- CHANGELOG.md | 3 +++ PFERD/crawl/ilias/kit_ilias_html.py | 2 +- PFERD/crawl/ilias/kit_ilias_web_crawler.py | 8 +++++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ecddf7..3dd25b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,9 @@ ambiguous situations. ## Unreleased +### Changed +- Clear up error message shown when multiple paths are found to an element + ## 3.4.2 - 2022-10-26 ### Added diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index ee0364a..56dcf7b 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -134,7 +134,7 @@ class IliasPage: thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})] - form_data: Dict[str, Union[str, List[ſtr]]] = { + form_data: Dict[str, Union[str, List[str]]] = { "thread_ids[]": thread_ids, "selected_cmd2": "html", "select_cmd2": "Ausführen", diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py index 9295e93..e3719b8 100644 --- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py +++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py @@ -194,7 +194,7 @@ instance's greatest bottleneck. self._links = section.links() self._videos = section.videos() self._forums = section.forums() - self._visited_urls: Set[str] = set() + self._visited_urls: Dict[str, PurePath] = dict() async def _run(self) -> None: if isinstance(self._target, int): @@ -348,9 +348,11 @@ instance's greatest bottleneck. ) -> Optional[Coroutine[Any, Any, None]]: if element.url in self._visited_urls: raise CrawlWarning( - f"Found second path to element {element.name!r} at {element.url!r}. Aborting subpath" + f"Found second path to element {element.name!r} at {element.url!r}. " + + f"First path: {fmt_path(self._visited_urls[element.url])}. " + + f"Second path: {fmt_path(parent_path)}." ) - self._visited_urls.add(element.url) + self._visited_urls[element.url] = parent_path element_path = PurePath(parent_path, element.name)