Bump version to 3.8.2

Fix exercise crawling
Fix login using the native ilias login form
2025-07-12 14:12:30 +02:00 · 2025-04-29 17:55:57 +02:00 · 2025-04-25 13:45:57 +02:00 · 2025-04-23 16:08:45 +02:00 · 2025-04-23 16:03:37 +02:00
4 changed files with 123 additions and 73 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -22,6 +22,15 @@ ambiguous situations.

 ## Unreleased

+## 3.8.2 - 2025-04-29
+
+## Changed
+- Explicitly mention that wikis are not supported at the moment and ignore them
+
+## Fixed
+- Ilias-native login
+- Exercise crawling
+
 ## 3.8.1 - 2025-04-17

 ## Fixed
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@ -107,6 +107,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 _DIRECTORY_PAGES: Set[IliasElementType] = {
    IliasElementType.EXERCISE,
    IliasElementType.EXERCISE_FILES,
+    IliasElementType.EXERCISE_OVERVIEW,
    IliasElementType.FOLDER,
    IliasElementType.INFO_TAB,
    IliasElementType.MEDIACAST_VIDEO_FOLDER,
@ -424,6 +425,14 @@ instance's greatest bottleneck.
                "[bright_black](not descending into linked course)"
            )
            return None
+        elif element.type == IliasElementType.WIKI:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](wikis are not currently supported)"
+            )
+            return None
        elif element.type == IliasElementType.LEARNING_MODULE:
            return await self._handle_learning_module(element, element_path)
        elif element.type == IliasElementType.LINK:
@ -1031,7 +1040,7 @@ instance's greatest bottleneck.
            async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
                login_page = soupify(await request.read())

-            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "formlogin"}))
+            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "login_form"}))
            if login_form is None:
                raise CrawlError("Could not find the login form! Specified client id might be invalid.")

@ -1041,14 +1050,12 @@ instance's greatest bottleneck.

            username, password = await self._auth.credentials()

-            login_data = {
-                "username": username,
-                "password": password,
-                "cmd[doStandardAuthentication]": "Login",
-            }
+            login_form_data = aiohttp.FormData()
+            login_form_data.add_field('login_form/input_3/input_4', username)
+            login_form_data.add_field('login_form/input_3/input_5', password)

            # do the actual login
-            async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
+            async with self.session.post(urljoin(self._base_url, login_url), data=login_form_data) as request:
                soup = IliasSoup(soupify(await request.read()), str(request.url))
                if not IliasPage.is_logged_in(soup):
                    self._auth.invalidate_credentials()
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@ -97,7 +97,8 @@ class IliasElementType(Enum):
    BOOKING = "booking"
    COURSE = "course"
    DCL_RECORD_LIST = "dcl_record_list"
-    EXERCISE = "exercise"
+    EXERCISE_OVERVIEW = "exercise_overview"
+    EXERCISE = "exercise"  # own submitted files
    EXERCISE_FILES = "exercise_files"  # own submitted files
    FILE = "file"
    FOLDER = "folder"
@ -120,6 +121,7 @@ class IliasElementType(Enum):
    SCORM_LEARNING_MODULE = "scorm_learning_module"
    SURVEY = "survey"
    TEST = "test"  # an online test. Will be ignored currently.
+    WIKI = "wiki"

    def matcher(self) -> IliasElementMatcher:
        match self:
@ -140,13 +142,15 @@ class IliasElementType(Enum):
                    TypeMatcher.query("cmdclass=ildclrecordlistgui")
                )
            case IliasElementType.EXERCISE:
+                return TypeMatcher.never()
+            case IliasElementType.EXERCISE_FILES:
+                return TypeMatcher.never()
+            case IliasElementType.EXERCISE_OVERVIEW:
                return TypeMatcher.any(
                    TypeMatcher.path("/exc/"),
                    TypeMatcher.path("_exc_"),
                    TypeMatcher.img_src("_exc.svg"),
                )
-            case IliasElementType.EXERCISE_FILES:
-                return TypeMatcher.never()
            case IliasElementType.FILE:
                return TypeMatcher.any(
                    TypeMatcher.query("cmd=sendfile"),
@ -243,6 +247,11 @@ class IliasElementType(Enum):
                    TypeMatcher.query("cmdclass=iltestscreengui"),
                    TypeMatcher.img_src("_tst.svg")
                )
+            case IliasElementType.WIKI:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseClass=ilwikihandlergui"),
+                    TypeMatcher.img_src("wiki.svg")
+                )

        raise CrawlWarning(f"Unknown matcher {self}")

@ -524,6 +533,8 @@ class IliasPage:
        if self._contains_collapsed_future_meetings():
            log.explain("Requesting *all* future meetings")
            return self._uncollapse_future_meetings_url()
+        if self._is_exercise_not_all_shown():
+            return self._show_all_exercises()
        if not self._is_content_tab_selected():
            if self._page_type != IliasElementType.INFO_TAB:
                log.explain("Selecting content tab")
@ -555,7 +566,7 @@ class IliasPage:

    def _is_exercise_file(self) -> bool:
        # we know it from before
-        if self._page_type == IliasElementType.EXERCISE:
+        if self._page_type == IliasElementType.EXERCISE_OVERVIEW:
            return True

        # We have no suitable parent - let's guesss
@ -592,6 +603,17 @@ class IliasPage:
        link = self._abs_url_from_link(element)
        return IliasPageElement.create_new(IliasElementType.FOLDER, link, "show all meetings")

+    def _is_exercise_not_all_shown(self) -> bool:
+        return (self._page_type == IliasElementType.EXERCISE_OVERVIEW
+                and "mode=all" not in self._page_url.lower())
+
+    def _show_all_exercises(self) -> Optional[IliasPageElement]:
+        return IliasPageElement.create_new(
+            IliasElementType.EXERCISE_OVERVIEW,
+            self._page_url + "&mode=all",
+            "show all exercises"
+        )
+
    def _is_content_tab_selected(self) -> bool:
        return self._select_content_page_url() is None

@ -857,15 +879,62 @@ class IliasPage:

    def _find_exercise_entries(self) -> list[IliasPageElement]:
        if self._soup.find(id="tab_submission"):
-            log.explain("Found submission tab. This is an exercise detail page")
+            log.explain("Found submission tab. This is an exercise detail or files page")
+            if self._soup.select_one("#tab_submission.active") is None:
+                log.explain("  This is a details page")
                return self._find_exercise_entries_detail_page()
+            else:
+                log.explain("  This is a files page")
+                return self._find_exercise_entries_files_page()
+
        log.explain("Found no submission tab. This is an exercise root page")
        return self._find_exercise_entries_root_page()

    def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
        results: list[IliasPageElement] = []

-        # Find all download links in the container (this will contain all the files)
+        if link := cast(Optional[Tag], self._soup.select_one("#tab_submission > a")):
+            results.append(IliasPageElement.create_new(
+                IliasElementType.EXERCISE_FILES,
+                self._abs_url_from_link(link),
+                "Submission"
+            ))
+        else:
+            log.explain("Found no submission link for exercise, maybe it has not started yet?")
+
+        # Find all download links in the container (this will contain all the *feedback* files)
+        download_links = cast(list[Tag], self._soup.find_all(
+            name="a",
+            # download links contain the given command class
+            attrs={"href": lambda x: x is not None and "cmd=download" in x},
+            text="Download"
+        ))
+
+        for link in download_links:
+            parent_row: Tag = cast(Tag, link.find_parent(
+                attrs={"class": lambda x: x is not None and "row" in x}))
+            name_tag = cast(Optional[Tag], parent_row.find(name="div"))
+
+            if not name_tag:
+                log.warn("Could not find name tag for exercise entry")
+                _unexpected_html_warning()
+                continue
+
+            name = _sanitize_path_name(name_tag.get_text().strip())
+            log.explain(f"Found exercise detail entry {name!r}")
+
+            results.append(IliasPageElement.create_new(
+                IliasElementType.FILE,
+                self._abs_url_from_link(link),
+                name
+            ))
+
+        return results
+
+    def _find_exercise_entries_files_page(self) -> list[IliasPageElement]:
+        results: list[IliasPageElement] = []
+
+        # Find all download links in the container
        download_links = cast(list[Tag], self._soup.find_all(
            name="a",
            # download links contain the given command class
@ -878,7 +947,7 @@ class IliasPage:
            children = cast(list[Tag], parent_row.find_all("td"))

            name = _sanitize_path_name(children[1].get_text().strip())
-            log.explain(f"Found exercise detail entry {name!r}")
+            log.explain(f"Found exercise file entry {name!r}")

            date = None
            for child in reversed(children):
@ -886,7 +955,7 @@ class IliasPage:
                if date is not None:
                    break
            if date is None:
-                log.warn(f"Date parsing failed for exercise entry {name!r}")
+                log.warn(f"Date parsing failed for exercise file entry {name!r}")

            results.append(IliasPageElement.create_new(
                IliasElementType.FILE,
@ -900,66 +969,31 @@ class IliasPage:
    def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
        results: list[IliasPageElement] = []

-        # Each assignment is in an accordion container
-        assignment_containers: list[Tag] = self._soup.select(".il_VAccordionInnerContainer")
+        content_tab = cast(Optional[Tag], self._soup.find(id="ilContentContainer"))
+        if not content_tab:
+            log.warn("Could not find content tab in exercise overview page")
+            _unexpected_html_warning()
+            return []

-        for container in assignment_containers:
-            # Fetch the container name out of the header to use it in the path
-            container_name = cast(Tag, container.select_one(".ilAssignmentHeader")).get_text().strip()
-            log.explain(f"Found exercise container {container_name!r}")
-
-            # Find all download links in the container (this will contain all the files)
-            files = cast(list[Tag], container.find_all(
+        individual_exercises = content_tab.find_all(
            name="a",
-                # download links contain the given command class
-                attrs={"href": lambda x: x is not None and "cmdClass=ilexsubmissiongui" in x},
-                text="Download"
-            ))
+            attrs={
+                "href": lambda x: x is not None
+                and "ass_id=" in x
+                and "cmdClass=ilAssignmentPresentationGUI" in x
+            }
+        )

-            # Grab each file as you now have the link
-            for file_link in files:
-                # Two divs, side by side. Left is the name, right is the link ==> get left
-                # sibling
-                file_name = cast(
-                    Tag,
-                    cast(Tag, file_link.parent).find_previous(name="div")
-                ).get_text().strip()
-                url = self._abs_url_from_link(file_link)
-
-                log.explain(f"Found exercise entry {file_name!r}")
+        for exercise in cast(list[Tag], individual_exercises):
+            name = _sanitize_path_name(exercise.get_text().strip())
            results.append(IliasPageElement.create_new(
-                    IliasElementType.FILE,
-                    url,
-                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
-                    mtime=None,  # We do not have any timestamp
-                    skip_sanitize=True
+                IliasElementType.EXERCISE,
+                self._abs_url_from_link(exercise),
+                name
            ))

-            # Find all links to file listings (e.g. "Submitted Files" for groups)
-            file_listings = cast(list[Tag], container.find_all(
-                name="a",
-                # download links contain the given command class
-                attrs={"href": lambda x: x is not None and "cmdclass=ilexsubmissionfilegui" in x.lower()}
-            ))
-
-            # Add each listing as a new
-            for listing in file_listings:
-                parent_container = cast(Tag, listing.find_parent(
-                    "div", attrs={"class": lambda x: x is not None and "form-group" in x}
-                ))
-                label_container = cast(Tag, parent_container.find(
-                    attrs={"class": lambda x: x is not None and "control-label" in x}
-                ))
-                file_name = label_container.get_text().strip()
-                url = self._abs_url_from_link(listing)
-                log.explain(f"Found exercise detail {file_name!r} at {url}")
-                results.append(IliasPageElement.create_new(
-                    IliasElementType.EXERCISE_FILES,
-                    url,
-                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
-                    None,  # we do not have any timestamp
-                    skip_sanitize=True
-                ))
+        for result in results:
+            log.explain(f"Found exercise {result.name!r}")

        return results

--- a/PFERD/version.py
+++ b/PFERD/version.py
@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.8.1"
+VERSION = "3.8.2"
Author	SHA1	Message	Date
I-Al-Istannen	77a23265a9	Bump version to 3.8.2	2025-04-29 17:55:57 +02:00
I-Al-Istannen	4c230ef6dd	Fix exercise crawling	2025-04-25 13:45:57 +02:00
Nikolas Heise	b305e1ce23	Fix login using the native ilias login form	2025-04-23 16:08:45 +02:00
I-Al-Istannen	bdf17f5c87	Ignore wikis	2025-04-23 16:03:37 +02:00