From 1ca6740e052166397b76b9eb9df3e7c33cf52efc Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Thu, 27 May 2021 17:59:22 +0200 Subject: [PATCH] Improve log messages when parsing ILIAS HTML Previously some logs were split around an "await", which isn't a great idea. --- PFERD/crawl/ilias/kit_ilias_html.py | 6 +++--- PFERD/crawl/ilias/kit_ilias_web_crawler.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index afb7005..a2f30e1 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -142,7 +142,7 @@ class IliasPage: url: str = self._abs_url_from_link(content_link) query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"} url = url_set_query_params(url, query_params) - log.explain("Found ILIAS redirection page, following it as a new entry") + log.explain("Found ILIAS video frame page, fetching actual content next") return [IliasPageElement(IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "")] is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None @@ -205,7 +205,7 @@ class IliasPage: video_url = self._abs_url_from_link(link) - log.explain(f"Found video {video_name!r} at {video_url!r}") + log.explain(f"Found video {video_name!r} at {video_url}") return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time) def _find_exercise_entries(self) -> List[IliasPageElement]: @@ -436,7 +436,7 @@ class IliasPage: _unexpected_html_warning() log.warn_contd( - f"Tried to figure out element type, but failed for {str(element_name)!r} / {link_element!r})" + f"Tried to figure out element type, but failed for {element_name!r} / {link_element!r})" ) return None diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py index 12a6e79..fbbfc1b 100644 --- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py +++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py @@ -243,11 +243,11 @@ class KitIliasWebCrawler(HttpCrawler): elements.clear() async with cl: next_stage_url: Optional[str] = url - log.explain_topic(f"Parsing HTML page for {fmt_path(path)}") while next_stage_url: soup = await self._get_page(next_stage_url) - log.explain(f"URL: {url}") + log.explain_topic(f"Parsing HTML page for {fmt_path(path)}") + log.explain(f"URL: {next_stage_url}") page = IliasPage(soup, url, parent) next_stage_url = page.get_next_stage_url()