mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Use final crawl path in HTML parsing message
This commit is contained in:
parent
91200f3684
commit
89be07d4d3
@ -247,13 +247,12 @@ instance's greatest bottleneck.
|
|||||||
maybe_cl = await self.crawl(path)
|
maybe_cl = await self.crawl(path)
|
||||||
if not maybe_cl:
|
if not maybe_cl:
|
||||||
return None
|
return None
|
||||||
return self._crawl_ilias_page(url, parent, path, maybe_cl)
|
return self._crawl_ilias_page(url, parent, maybe_cl)
|
||||||
|
|
||||||
async def _crawl_ilias_page(
|
async def _crawl_ilias_page(
|
||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
parent: IliasPageElement,
|
parent: IliasPageElement,
|
||||||
path: PurePath,
|
|
||||||
cl: CrawlToken,
|
cl: CrawlToken,
|
||||||
) -> None:
|
) -> None:
|
||||||
elements: List[IliasPageElement] = []
|
elements: List[IliasPageElement] = []
|
||||||
@ -267,7 +266,7 @@ instance's greatest bottleneck.
|
|||||||
|
|
||||||
while next_stage_url:
|
while next_stage_url:
|
||||||
soup = await self._get_page(next_stage_url)
|
soup = await self._get_page(next_stage_url)
|
||||||
log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
|
log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
|
||||||
log.explain(f"URL: {next_stage_url}")
|
log.explain(f"URL: {next_stage_url}")
|
||||||
page = IliasPage(soup, next_stage_url, current_parent)
|
page = IliasPage(soup, next_stage_url, current_parent)
|
||||||
if next_element := page.get_next_stage_element():
|
if next_element := page.get_next_stage_element():
|
||||||
|
Loading…
Reference in New Issue
Block a user