Bump version to 3.8.2

Fix exercise crawling
Fix login using the native ilias login form
2025-07-12 22:22:30 +02:00 · 2025-04-29 17:55:57 +02:00 · 2025-04-25 13:45:57 +02:00 · 2025-04-23 16:08:45 +02:00 · 2025-04-23 16:03:37 +02:00
4 changed files with 123 additions and 73 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -22,6 +22,15 @@ ambiguous situations.
 ## Unreleased
 ## 3.8.2 - 2025-04-29
 ## Changed
 - Explicitly mention that wikis are not supported at the moment and ignore them
 ## Fixed
 - Ilias-native login
 - Exercise crawling
 ## 3.8.1 - 2025-04-17
 ## Fixed
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@ -107,6 +107,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 _DIRECTORY_PAGES: Set[IliasElementType] = {
    IliasElementType.EXERCISE,
    IliasElementType.EXERCISE_FILES,
    IliasElementType.EXERCISE_OVERVIEW,
    IliasElementType.FOLDER,
    IliasElementType.INFO_TAB,
    IliasElementType.MEDIACAST_VIDEO_FOLDER,
@ -424,6 +425,14 @@ instance's greatest bottleneck.
                "[bright_black](not descending into linked course)"
            )
            return None
        elif element.type == IliasElementType.WIKI:
            log.status(
                "[bold bright_black]",
                "Ignored",
                fmt_path(element_path),
                "[bright_black](wikis are not currently supported)"
            )
            return None
        elif element.type == IliasElementType.LEARNING_MODULE:
            return await self._handle_learning_module(element, element_path)
        elif element.type == IliasElementType.LINK:
@ -1031,7 +1040,7 @@ instance's greatest bottleneck.
            async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
                login_page = soupify(await request.read())
-            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "formlogin"}))
+            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "login_form"}))
            if login_form is None:
                raise CrawlError("Could not find the login form! Specified client id might be invalid.")
@ -1041,14 +1050,12 @@ instance's greatest bottleneck.
            username, password = await self._auth.credentials()
-            login_data = {
+            login_form_data = aiohttp.FormData()
-                "username": username,
+            login_form_data.add_field('login_form/input_3/input_4', username)
-                "password": password,
+            login_form_data.add_field('login_form/input_3/input_5', password)
                "cmd[doStandardAuthentication]": "Login",
            }
            # do the actual login
-            async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
+            async with self.session.post(urljoin(self._base_url, login_url), data=login_form_data) as request:
                soup = IliasSoup(soupify(await request.read()), str(request.url))
                if not IliasPage.is_logged_in(soup):
                    self._auth.invalidate_credentials()
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@ -97,7 +97,8 @@ class IliasElementType(Enum):
    BOOKING = "booking"
    COURSE = "course"
    DCL_RECORD_LIST = "dcl_record_list"
-    EXERCISE = "exercise"
+    EXERCISE_OVERVIEW = "exercise_overview"
    EXERCISE = "exercise"  # own submitted files
    EXERCISE_FILES = "exercise_files"  # own submitted files
    FILE = "file"
    FOLDER = "folder"
@ -120,6 +121,7 @@ class IliasElementType(Enum):
    SCORM_LEARNING_MODULE = "scorm_learning_module"
    SURVEY = "survey"
    TEST = "test"  # an online test. Will be ignored currently.
    WIKI = "wiki"
    def matcher(self) -> IliasElementMatcher:
        match self:
@ -140,13 +142,15 @@ class IliasElementType(Enum):
                    TypeMatcher.query("cmdclass=ildclrecordlistgui")
                )
            case IliasElementType.EXERCISE:
                return TypeMatcher.never()
            case IliasElementType.EXERCISE_FILES:
                return TypeMatcher.never()
            case IliasElementType.EXERCISE_OVERVIEW:
                return TypeMatcher.any(
                    TypeMatcher.path("/exc/"),
                    TypeMatcher.path("_exc_"),
                    TypeMatcher.img_src("_exc.svg"),
                )
            case IliasElementType.EXERCISE_FILES:
                return TypeMatcher.never()
            case IliasElementType.FILE:
                return TypeMatcher.any(
                    TypeMatcher.query("cmd=sendfile"),
@ -243,6 +247,11 @@ class IliasElementType(Enum):
                    TypeMatcher.query("cmdclass=iltestscreengui"),
                    TypeMatcher.img_src("_tst.svg")
                )
            case IliasElementType.WIKI:
                return TypeMatcher.any(
                    TypeMatcher.query("baseClass=ilwikihandlergui"),
                    TypeMatcher.img_src("wiki.svg")
                )
        raise CrawlWarning(f"Unknown matcher {self}")
@ -524,6 +533,8 @@ class IliasPage:
        if self._contains_collapsed_future_meetings():
            log.explain("Requesting *all* future meetings")
            return self._uncollapse_future_meetings_url()
        if self._is_exercise_not_all_shown():
            return self._show_all_exercises()
        if not self._is_content_tab_selected():
            if self._page_type != IliasElementType.INFO_TAB:
                log.explain("Selecting content tab")
@ -555,7 +566,7 @@ class IliasPage:
    def _is_exercise_file(self) -> bool:
        # we know it from before
-        if self._page_type == IliasElementType.EXERCISE:
+        if self._page_type == IliasElementType.EXERCISE_OVERVIEW:
            return True
        # We have no suitable parent - let's guesss
@ -592,6 +603,17 @@ class IliasPage:
        link = self._abs_url_from_link(element)
        return IliasPageElement.create_new(IliasElementType.FOLDER, link, "show all meetings")
    def _is_exercise_not_all_shown(self) -> bool:
        return (self._page_type == IliasElementType.EXERCISE_OVERVIEW
                and "mode=all" not in self._page_url.lower())
    def _show_all_exercises(self) -> Optional[IliasPageElement]:
        return IliasPageElement.create_new(
            IliasElementType.EXERCISE_OVERVIEW,
            self._page_url + "&mode=all",
            "show all exercises"
        )
    def _is_content_tab_selected(self) -> bool:
        return self._select_content_page_url() is None
@ -857,15 +879,62 @@ class IliasPage:
    def _find_exercise_entries(self) -> list[IliasPageElement]:
        if self._soup.find(id="tab_submission"):
-            log.explain("Found submission tab. This is an exercise detail page")
+            log.explain("Found submission tab. This is an exercise detail or files page")
            if self._soup.select_one("#tab_submission.active") is None:
                log.explain("  This is a details page")
                return self._find_exercise_entries_detail_page()
            else:
                log.explain("  This is a files page")
                return self._find_exercise_entries_files_page()
        log.explain("Found no submission tab. This is an exercise root page")
        return self._find_exercise_entries_root_page()
    def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
        results: list[IliasPageElement] = []
-        # Find all download links in the container (this will contain all the files)
+        if link := cast(Optional[Tag], self._soup.select_one("#tab_submission > a")):
            results.append(IliasPageElement.create_new(
                IliasElementType.EXERCISE_FILES,
                self._abs_url_from_link(link),
                "Submission"
            ))
        else:
            log.explain("Found no submission link for exercise, maybe it has not started yet?")
        # Find all download links in the container (this will contain all the *feedback* files)
        download_links = cast(list[Tag], self._soup.find_all(
            name="a",
            # download links contain the given command class
            attrs={"href": lambda x: x is not None and "cmd=download" in x},
            text="Download"
        ))
        for link in download_links:
            parent_row: Tag = cast(Tag, link.find_parent(
                attrs={"class": lambda x: x is not None and "row" in x}))
            name_tag = cast(Optional[Tag], parent_row.find(name="div"))
            if not name_tag:
                log.warn("Could not find name tag for exercise entry")
                _unexpected_html_warning()
                continue
            name = _sanitize_path_name(name_tag.get_text().strip())
            log.explain(f"Found exercise detail entry {name!r}")
            results.append(IliasPageElement.create_new(
                IliasElementType.FILE,
                self._abs_url_from_link(link),
                name
            ))
        return results
    def _find_exercise_entries_files_page(self) -> list[IliasPageElement]:
        results: list[IliasPageElement] = []
        # Find all download links in the container
        download_links = cast(list[Tag], self._soup.find_all(
            name="a",
            # download links contain the given command class
@ -878,7 +947,7 @@ class IliasPage:
            children = cast(list[Tag], parent_row.find_all("td"))
            name = _sanitize_path_name(children[1].get_text().strip())
-            log.explain(f"Found exercise detail entry {name!r}")
+            log.explain(f"Found exercise file entry {name!r}")
            date = None
            for child in reversed(children):
@ -886,7 +955,7 @@ class IliasPage:
                if date is not None:
                    break
            if date is None:
-                log.warn(f"Date parsing failed for exercise entry {name!r}")
+                log.warn(f"Date parsing failed for exercise file entry {name!r}")
            results.append(IliasPageElement.create_new(
                IliasElementType.FILE,
@ -900,66 +969,31 @@ class IliasPage:
    def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
        results: list[IliasPageElement] = []
-        # Each assignment is in an accordion container
+        content_tab = cast(Optional[Tag], self._soup.find(id="ilContentContainer"))
-        assignment_containers: list[Tag] = self._soup.select(".il_VAccordionInnerContainer")
+        if not content_tab:
            log.warn("Could not find content tab in exercise overview page")
            _unexpected_html_warning()
            return []
-        for container in assignment_containers:
+        individual_exercises = content_tab.find_all(
            # Fetch the container name out of the header to use it in the path
            container_name = cast(Tag, container.select_one(".ilAssignmentHeader")).get_text().strip()
            log.explain(f"Found exercise container {container_name!r}")
            # Find all download links in the container (this will contain all the files)
            files = cast(list[Tag], container.find_all(
            name="a",
-                # download links contain the given command class
+            attrs={
-                attrs={"href": lambda x: x is not None and "cmdClass=ilexsubmissiongui" in x},
+                "href": lambda x: x is not None
-                text="Download"
+                and "ass_id=" in x
-            ))
+                and "cmdClass=ilAssignmentPresentationGUI" in x
            }
        )
-            # Grab each file as you now have the link
+        for exercise in cast(list[Tag], individual_exercises):
-            for file_link in files:
+            name = _sanitize_path_name(exercise.get_text().strip())
                # Two divs, side by side. Left is the name, right is the link ==> get left
                # sibling
                file_name = cast(
                    Tag,
                    cast(Tag, file_link.parent).find_previous(name="div")
                ).get_text().strip()
                url = self._abs_url_from_link(file_link)
                log.explain(f"Found exercise entry {file_name!r}")
            results.append(IliasPageElement.create_new(
-                    IliasElementType.FILE,
+                IliasElementType.EXERCISE,
-                    url,
+                self._abs_url_from_link(exercise),
-                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
+                name
                    mtime=None,  # We do not have any timestamp
                    skip_sanitize=True
            ))
-            # Find all links to file listings (e.g. "Submitted Files" for groups)
+        for result in results:
-            file_listings = cast(list[Tag], container.find_all(
+            log.explain(f"Found exercise {result.name!r}")
                name="a",
                # download links contain the given command class
                attrs={"href": lambda x: x is not None and "cmdclass=ilexsubmissionfilegui" in x.lower()}
            ))
            # Add each listing as a new
            for listing in file_listings:
                parent_container = cast(Tag, listing.find_parent(
                    "div", attrs={"class": lambda x: x is not None and "form-group" in x}
                ))
                label_container = cast(Tag, parent_container.find(
                    attrs={"class": lambda x: x is not None and "control-label" in x}
                ))
                file_name = label_container.get_text().strip()
                url = self._abs_url_from_link(listing)
                log.explain(f"Found exercise detail {file_name!r} at {url}")
                results.append(IliasPageElement.create_new(
                    IliasElementType.EXERCISE_FILES,
                    url,
                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
                    None,  # we do not have any timestamp
                    skip_sanitize=True
                ))
        return results
--- a/PFERD/version.py
+++ b/PFERD/version.py
@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.8.1"
+VERSION = "3.8.2"
Author	SHA1	Message	Date
I-Al-Istannen	77a23265a9	Bump version to 3.8.2	2025-04-29 17:55:57 +02:00
I-Al-Istannen	4c230ef6dd	Fix exercise crawling	2025-04-25 13:45:57 +02:00
Nikolas Heise	b305e1ce23	Fix login using the native ilias login form	2025-04-23 16:08:45 +02:00
I-Al-Istannen	bdf17f5c87	Ignore wikis	2025-04-23 16:03:37 +02:00
`@ -1,2 +1,2 @@`
	`NAME = "PFERD"`	`NAME = "PFERD"`
	`VERSION = "3.8.1"`	`VERSION = "3.8.2"`