Compare commits

..

1 Commits

Author SHA1 Message Date
I-Al-Istannen
dd2fedf1a2 Fix crawling of exercises with instructions
We do not want a second path and the instruction field has an identical
link...
2025-04-29 18:30:33 +02:00
4 changed files with 15 additions and 13 deletions

View File

@@ -22,6 +22,9 @@ ambiguous situations.
## Unreleased ## Unreleased
## Fixed
- Crawling of exercises with instructions
## 3.8.2 - 2025-04-29 ## 3.8.2 - 2025-04-29
## Changed ## Changed

View File

@@ -45,8 +45,8 @@ def load(
load_crawler(args, section) load_crawler(args, section)
section["type"] = COMMAND_NAME section["type"] = COMMAND_NAME
if args.base_url is not None: if args.ilias_url is not None:
section["base_url"] = args.base_url section["base_url"] = args.ilias_url
if args.client_id is not None: if args.client_id is not None:
section["client_id"] = args.client_id section["client_id"] = args.client_id

View File

@@ -983,8 +983,6 @@ instance's greatest bottleneck.
soup = IliasSoup(soupify(await request.read()), str(request.url)) soup = IliasSoup(soupify(await request.read()), str(request.url))
if IliasPage.is_logged_in(soup): if IliasPage.is_logged_in(soup):
return self._verify_page(soup, url, root_page_allowed) return self._verify_page(soup, url, root_page_allowed)
with open("/tmp/ilias_debug.html", "w") as f:
f.write(str(soup.soup.prettify()))
raise CrawlError(f"get_page failed even after authenticating on {url!r}") raise CrawlError(f"get_page failed even after authenticating on {url!r}")
@staticmethod @staticmethod

View File

@@ -975,16 +975,17 @@ class IliasPage:
_unexpected_html_warning() _unexpected_html_warning()
return [] return []
individual_exercises = content_tab.find_all( exercise_links = content_tab.select(".il-item-title a")
name="a",
attrs={ for exercise in cast(list[Tag], exercise_links):
"href": lambda x: x is not None if "href" not in exercise.attrs:
and "ass_id=" in x continue
and "cmdClass=ilAssignmentPresentationGUI" in x href = exercise.attrs["href"]
} if type(href) is not str:
) continue
if "ass_id=" not in href or "cmdclass=ilassignmentpresentationgui" not in href.lower():
continue
for exercise in cast(list[Tag], individual_exercises):
name = _sanitize_path_name(exercise.get_text().strip()) name = _sanitize_path_name(exercise.get_text().strip())
results.append(IliasPageElement.create_new( results.append(IliasPageElement.create_new(
IliasElementType.EXERCISE, IliasElementType.EXERCISE,