Crawl the info tab of courses again

This got lost in a refactor
This commit is contained in:
I-Al-Istannen
2025-12-02 16:33:04 +01:00
parent 3f5637366e
commit e246053de2
3 changed files with 7 additions and 2 deletions

View File

@@ -29,6 +29,7 @@ ambiguous situations.
## Fixed ## Fixed
- Event loop errors on Windows with Python 3.14 - Event loop errors on Windows with Python 3.14
- Sanitize `/` in headings in kit-ipd crawler - Sanitize `/` in headings in kit-ipd crawler
- Crawl info tab again
## 3.8.3 - 2025-07-01 ## 3.8.3 - 2025-07-01

View File

@@ -297,6 +297,8 @@ instance's greatest bottleneck.
page = cast(IliasPage, page) page = cast(IliasPage, page)
elements.extend(page.get_child_elements()) elements.extend(page.get_child_elements())
if current_element is None and (info_tab := page.get_info_tab()):
elements.append(info_tab)
if description_string := page.get_description(): if description_string := page.get_description():
description.append(description_string) description.append(description_string)

View File

@@ -739,9 +739,10 @@ class IliasPage:
links: list[Tag] = self._soup.select("a.il_ContainerItemCommand") links: list[Tag] = self._soup.select("a.il_ContainerItemCommand")
for link in links: for link in links:
if "cmdClass=ilobjcoursegui" not in link["href"]: log.explain(f"Found info tab link: {self._abs_url_from_link(link)}")
if "cmdclass=ilobjcoursegui" not in cast(str, link["href"]).lower():
continue continue
if "cmd=sendfile" not in link["href"]: if "cmd=sendfile" not in cast(str, link["href"]).lower():
continue continue
items.append( items.append(
IliasPageElement.create_new( IliasPageElement.create_new(
@@ -749,6 +750,7 @@ class IliasPage:
) )
) )
log.explain(f"Found {len(items)} info tab entries {items}")
return items return items
def _find_opencast_video_entries(self) -> list[IliasPageElement]: def _find_opencast_video_entries(self) -> list[IliasPageElement]: