mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Improve log messages when parsing ILIAS HTML
Previously some logs were split around an "await", which isn't a great idea.
This commit is contained in:
parent
474aa7e1cc
commit
1ca6740e05
@ -142,7 +142,7 @@ class IliasPage:
|
|||||||
url: str = self._abs_url_from_link(content_link)
|
url: str = self._abs_url_from_link(content_link)
|
||||||
query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
|
query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
|
||||||
url = url_set_query_params(url, query_params)
|
url = url_set_query_params(url, query_params)
|
||||||
log.explain("Found ILIAS redirection page, following it as a new entry")
|
log.explain("Found ILIAS video frame page, fetching actual content next")
|
||||||
return [IliasPageElement(IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
|
return [IliasPageElement(IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
|
||||||
|
|
||||||
is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
|
is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
|
||||||
@ -205,7 +205,7 @@ class IliasPage:
|
|||||||
|
|
||||||
video_url = self._abs_url_from_link(link)
|
video_url = self._abs_url_from_link(link)
|
||||||
|
|
||||||
log.explain(f"Found video {video_name!r} at {video_url!r}")
|
log.explain(f"Found video {video_name!r} at {video_url}")
|
||||||
return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
|
return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
|
||||||
|
|
||||||
def _find_exercise_entries(self) -> List[IliasPageElement]:
|
def _find_exercise_entries(self) -> List[IliasPageElement]:
|
||||||
@ -436,7 +436,7 @@ class IliasPage:
|
|||||||
|
|
||||||
_unexpected_html_warning()
|
_unexpected_html_warning()
|
||||||
log.warn_contd(
|
log.warn_contd(
|
||||||
f"Tried to figure out element type, but failed for {str(element_name)!r} / {link_element!r})"
|
f"Tried to figure out element type, but failed for {element_name!r} / {link_element!r})"
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -243,11 +243,11 @@ class KitIliasWebCrawler(HttpCrawler):
|
|||||||
elements.clear()
|
elements.clear()
|
||||||
async with cl:
|
async with cl:
|
||||||
next_stage_url: Optional[str] = url
|
next_stage_url: Optional[str] = url
|
||||||
log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
|
|
||||||
|
|
||||||
while next_stage_url:
|
while next_stage_url:
|
||||||
soup = await self._get_page(next_stage_url)
|
soup = await self._get_page(next_stage_url)
|
||||||
log.explain(f"URL: {url}")
|
log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
|
||||||
|
log.explain(f"URL: {next_stage_url}")
|
||||||
page = IliasPage(soup, url, parent)
|
page = IliasPage(soup, url, parent)
|
||||||
next_stage_url = page.get_next_stage_url()
|
next_stage_url = page.get_next_stage_url()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user