"Fix" mypy errors

Thank you mypy, very cool. These types make things *so much better*. They don't just complicate everything and don't really help because they can not detect that an element queried by a tag is no navigable string...
2025-07-12 22:22:30 +02:00 · 2025-02-12 22:41:43 +01:00
parent 16a2dd5b15
commit bd9d7efe64
8 changed files with 224 additions and 204 deletions
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import Dict, List, Optional, Union, cast
+from typing import Dict, Optional, Union, cast
 from urllib.parse import urljoin, urlparse

 from bs4 import BeautifulSoup, Tag
@ -117,7 +117,7 @@ class IliasPageElement:
@dataclass
 class IliasDownloadForumData:
    url: str
-    form_data: Dict[str, Union[str, List[str]]]
+    form_data: Dict[str, Union[str, list[str]]]
    empty: bool


@ -151,7 +151,7 @@ class IliasPage:
            return "goto.php?target=root_" in permalink
        return False

-    def get_child_elements(self) -> List[IliasPageElement]:
+    def get_child_elements(self) -> list[IliasPageElement]:
        """
        Return all child page elements you can find here.
        """
@ -177,10 +177,10 @@ class IliasPage:
        return self._find_normal_entries()

    def get_info_tab(self) -> Optional[IliasPageElement]:
-        tab: Optional[Tag] = self._soup.find(
+        tab: Optional[Tag] = cast(Optional[Tag], self._soup.find(
            name="a",
-            attrs={"href": lambda x: x and "cmdClass=ilinfoscreengui" in x}
-        )
+            attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
+        ))
        if tab is not None:
            return IliasPageElement.create_new(
                IliasElementType.INFO_TAB,
@ -193,7 +193,7 @@ class IliasPage:
        def is_interesting_class(name: str) -> bool:
            return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]

-        paragraphs: List[Tag] = self._soup.findAll(class_=is_interesting_class)
+        paragraphs: list[Tag] = cast(list[Tag], self._soup.find_all(class_=is_interesting_class))
        if not paragraphs:
            return None

@ -217,8 +217,8 @@ class IliasPage:
    def get_learning_module_data(self) -> Optional[IliasLearningModulePage]:
        if not self._is_learning_module_page():
            return None
-        content = self._soup.select_one("#ilLMPageContent")
-        title = self._soup.select_one(".ilc_page_title_PageTitle").getText().strip()
+        content = cast(Tag, self._soup.select_one("#ilLMPageContent"))
+        title = cast(Tag, self._soup.select_one(".ilc_page_title_PageTitle")).get_text().strip()
        return IliasLearningModulePage(
            title=title,
            content=content,
@ -243,15 +243,18 @@ class IliasPage:
        return None

    def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
-        form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
+        form = cast(Optional[Tag], self._soup.find(
+            "form",
+            attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
+        ))
        if not form:
            return None
-        post_url = self._abs_url_from_relative(form["action"])
+        post_url = self._abs_url_from_relative(cast(str, form["action"]))

-        thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
+        thread_ids = [f["value"] for f in cast(list[Tag], form.find_all(attrs={"name": "thread_ids[]"}))]

-        form_data: Dict[str, Union[str, List[str]]] = {
-            "thread_ids[]": thread_ids,
+        form_data: Dict[str, Union[str, list[str]]] = {
+            "thread_ids[]": cast(list[str], thread_ids),
            "selected_cmd2": "html",
            "select_cmd2": "Ausführen",
            "selected_cmd": "",
@ -285,7 +288,7 @@ class IliasPage:
    def _is_forum_page(self) -> bool:
        read_more_btn = self._soup.find(
            "button",
-            attrs={"onclick": lambda x: x and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
+            attrs={"onclick": lambda x: x is not None and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
        )
        return read_more_btn is not None

@ -297,7 +300,7 @@ class IliasPage:
            return True

        # Raw listing without ILIAS fluff
-        video_element_table: Tag = self._soup.find(
+        video_element_table = self._soup.find(
            name="table", id=re.compile(r"tbl_xoct_.+")
        )
        return video_element_table is not None
@ -305,8 +308,8 @@ class IliasPage:
    def _is_ilias_opencast_embedding(self) -> bool:
        # ILIAS fluff around the real opencast html
        if self._soup.find(id="headerimage"):
-            element: Tag = self._soup.find(id="headerimage")
-            if "opencast" in element.attrs["src"].lower():
+            element: Tag = cast(Tag, self._soup.find(id="headerimage"))
+            if "opencast" in cast(str, element.attrs["src"]).lower():
                return True
        return False

@ -317,8 +320,8 @@ class IliasPage:

        # We have no suitable parent - let's guesss
        if self._soup.find(id="headerimage"):
-            element: Tag = self._soup.find(id="headerimage")
-            if "exc" in element.attrs["src"].lower():
+            element: Tag = cast(Tag, self._soup.find(id="headerimage"))
+            if "exc" in cast(str, element.attrs["src"]).lower():
                return True

        return False
@ -340,10 +343,10 @@ class IliasPage:
        return self._uncollapse_future_meetings_url() is not None

    def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
-        element = self._soup.find(
+        element = cast(Optional[Tag], self._soup.find(
            "a",
-            attrs={"href": lambda x: x and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
-        )
+            attrs={"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
+        ))
        if not element:
            return None
        link = self._abs_url_from_link(element)
@ -360,24 +363,24 @@ class IliasPage:
        return "baseClass=ilmembershipoverviewgui" in self._page_url

    def _select_content_page_url(self) -> Optional[IliasPageElement]:
-        tab = self._soup.find(
+        tab = cast(Optional[Tag], self._soup.find(
            id="tab_view_content",
            attrs={"class": lambda x: x is not None and "active" not in x}
-        )
+        ))
        # Already selected (or not found)
        if not tab:
            return None
-        link = tab.find("a")
+        link = cast(Optional[Tag], tab.find("a"))
        if link:
-            link = self._abs_url_from_link(link)
-            return IliasPageElement.create_new(IliasElementType.FOLDER, link, "select content page")
+            link_str = self._abs_url_from_link(link)
+            return IliasPageElement.create_new(IliasElementType.FOLDER, link_str, "select content page")

        _unexpected_html_warning()
        log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
        log.warn_contd("PFERD might not find content on the course's main page.")
        return None

-    def _player_to_video(self) -> List[IliasPageElement]:
+    def _player_to_video(self) -> list[IliasPageElement]:
        # Fetch the actual video page. This is a small wrapper page initializing a javscript
        # player. Sadly we can not execute that JS. The actual video stream url is nowhere
        # on the page, but defined in a JS object inside a script tag, passed to the player
@ -414,10 +417,10 @@ class IliasPage:
        return items

    def _get_show_max_forum_entries_per_page_url(self) -> Optional[IliasPageElement]:
-        correct_link = self._soup.find(
+        correct_link = cast(Optional[Tag], self._soup.find(
            "a",
-            attrs={"href": lambda x: x and "trows=800" in x and "cmd=showThreads" in x}
-        )
+            attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
+        ))

        if not correct_link:
            return None
@ -426,15 +429,15 @@ class IliasPage:

        return IliasPageElement.create_new(IliasElementType.FORUM, link, "show all forum threads")

-    def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
-        items: List[IliasPageElement] = []
+    def _find_personal_desktop_entries(self) -> list[IliasPageElement]:
+        items: list[IliasPageElement] = []

-        titles: List[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
+        titles: list[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
        for title in titles:
-            link = title.find("a")
+            link = cast(Optional[Tag], title.find("a"))

            if not link:
-                log.explain(f"Skipping offline item: {title.getText().strip()!r}")
+                log.explain(f"Skipping offline item: {title.get_text().strip()!r}")
                continue

            name = _sanitize_path_name(link.text.strip())
@ -460,13 +463,13 @@ class IliasPage:

        return items

-    def _find_copa_entries(self) -> List[IliasPageElement]:
-        items: List[IliasPageElement] = []
-        links: List[Tag] = self._soup.findAll(class_="ilc_flist_a_FileListItemLink")
+    def _find_copa_entries(self) -> list[IliasPageElement]:
+        items: list[IliasPageElement] = []
+        links: list[Tag] = cast(list[Tag], self._soup.find_all(class_="ilc_flist_a_FileListItemLink"))

        for link in links:
            url = self._abs_url_from_link(link)
-            name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.getText()).strip().replace("\t", "")
+            name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.get_text()).strip().replace("\t", "")
            name = _sanitize_path_name(name)

            if "file_id" not in url:
@ -478,9 +481,9 @@ class IliasPage:

        return items

-    def _find_info_tab_entries(self) -> List[IliasPageElement]:
+    def _find_info_tab_entries(self) -> list[IliasPageElement]:
        items = []
-        links: List[Tag] = self._soup.select("a.il_ContainerItemCommand")
+        links: list[Tag] = self._soup.select("a.il_ContainerItemCommand")

        for link in links:
            if "cmdClass=ilobjcoursegui" not in link["href"]:
@ -490,12 +493,12 @@ class IliasPage:
            items.append(IliasPageElement.create_new(
                IliasElementType.FILE,
                self._abs_url_from_link(link),
-                _sanitize_path_name(link.getText())
+                _sanitize_path_name(link.get_text())
            ))

        return items

-    def _find_opencast_video_entries(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries(self) -> list[IliasPageElement]:
        # ILIAS has three stages for video pages
        # 1. The initial dummy page without any videos. This page contains the link to the listing
        # 2. The video listing which might be paginated
@ -503,14 +506,14 @@ class IliasPage:
        #
        # We need to figure out where we are.

-        video_element_table: Tag = self._soup.find(
+        video_element_table = cast(Optional[Tag], self._soup.find(
            name="table", id=re.compile(r"tbl_xoct_.+")
-        )
+        ))

        if video_element_table is None:
            # We are in stage 1
            # The page is actually emtpy but contains the link to stage 2
-            content_link: Tag = self._soup.select_one("#tab_series a")
+            content_link: Tag = cast(Tag, self._soup.select_one("#tab_series a"))
            url: str = self._abs_url_from_link(content_link)
            query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
            url = url_set_query_params(url, query_params)
@ -527,14 +530,14 @@ class IliasPage:

        return self._find_opencast_video_entries_no_paging()

-    def _find_opencast_video_entries_paginated(self) -> List[IliasPageElement]:
-        table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
+    def _find_opencast_video_entries_paginated(self) -> list[IliasPageElement]:
+        table_element = cast(Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+")))

        if table_element is None:
            log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
            return self._find_opencast_video_entries_no_paging()

-        id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
+        id_match = re.match(r"tbl_xoct_(.+)", cast(str, table_element.attrs["id"]))
        if id_match is None:
            log.warn("Couldn't increase elements per page (table id not found). I might miss elements.")
            return self._find_opencast_video_entries_no_paging()
@ -548,16 +551,16 @@ class IliasPage:
        log.explain("Disabled pagination, retrying folder as a new entry")
        return [IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]

-    def _find_opencast_video_entries_no_paging(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries_no_paging(self) -> list[IliasPageElement]:
        """
        Crawls the "second stage" video page. This page contains the actual video urls.
        """
        # Video start links are marked with an "Abspielen" link
-        video_links: List[Tag] = self._soup.findAll(
+        video_links = cast(list[Tag], self._soup.find_all(
            name="a", text=re.compile(r"\s*(Abspielen|Play)\s*")
-        )
+        ))

-        results: List[IliasPageElement] = []
+        results: list[IliasPageElement] = []

        for link in video_links:
            results.append(self._listed_opencast_video_to_element(link))
@ -569,12 +572,12 @@ class IliasPage:
        # 6th or 7th child (1 indexed) is the modification time string. Try to find it
        # by parsing backwards from the end and finding something that looks like a date
        modification_time = None
-        row: Tag = link.parent.parent.parent
+        row: Tag = link.parent.parent.parent  # type: ignore
        column_count = len(row.select("td.std"))
        for index in range(column_count, 0, -1):
-            modification_string = link.parent.parent.parent.select_one(
+            modification_string = link.parent.parent.parent.select_one(  # type: ignore
                f"td.std:nth-child({index})"
-            ).getText().strip()
+            ).get_text().strip()
            if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
                modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
                break
@ -583,7 +586,7 @@ class IliasPage:
            log.warn(f"Could not determine upload time for {link}")
            modification_time = datetime.now()

-        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").getText().strip()
+        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip()  # type: ignore
        title += ".mp4"

        video_name: str = _sanitize_path_name(title)
@ -595,33 +598,34 @@ class IliasPage:
            IliasElementType.OPENCAST_VIDEO_PLAYER, video_url, video_name, modification_time
        )

-    def _find_exercise_entries(self) -> List[IliasPageElement]:
+    def _find_exercise_entries(self) -> list[IliasPageElement]:
        if self._soup.find(id="tab_submission"):
            log.explain("Found submission tab. This is an exercise detail page")
            return self._find_exercise_entries_detail_page()
        log.explain("Found no submission tab. This is an exercise root page")
        return self._find_exercise_entries_root_page()

-    def _find_exercise_entries_detail_page(self) -> List[IliasPageElement]:
-        results: List[IliasPageElement] = []
+    def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
+        results: list[IliasPageElement] = []

        # Find all download links in the container (this will contain all the files)
-        download_links: List[Tag] = self._soup.findAll(
+        download_links = cast(list[Tag], self._soup.find_all(
            name="a",
            # download links contain the given command class
-            attrs={"href": lambda x: x and "cmd=download" in x},
+            attrs={"href": lambda x: x is not None and "cmd=download" in x},
            text="Download"
-        )
+        ))

        for link in download_links:
-            parent_row: Tag = link.findParent("tr")
-            children: List[Tag] = parent_row.findChildren("td")
+            parent_row: Tag = cast(Tag, link.find_parent("tr"))
+            children = cast(list[Tag], parent_row.find_all("td"))

-            name = _sanitize_path_name(children[1].getText().strip())
+            name = _sanitize_path_name(children[1].get_text().strip())
            log.explain(f"Found exercise detail entry {name!r}")

+            date = None
            for child in reversed(children):
-                date = demangle_date(child.getText().strip(), fail_silently=True)
+                date = demangle_date(child.get_text().strip(), fail_silently=True)
                if date is not None:
                    break
            if date is None:
@ -636,30 +640,33 @@ class IliasPage:

        return results

-    def _find_exercise_entries_root_page(self) -> List[IliasPageElement]:
-        results: List[IliasPageElement] = []
+    def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
+        results: list[IliasPageElement] = []

        # Each assignment is in an accordion container
-        assignment_containers: List[Tag] = self._soup.select(".il_VAccordionInnerContainer")
+        assignment_containers: list[Tag] = self._soup.select(".il_VAccordionInnerContainer")

        for container in assignment_containers:
            # Fetch the container name out of the header to use it in the path
-            container_name = container.select_one(".ilAssignmentHeader").getText().strip()
+            container_name = cast(Tag, container.select_one(".ilAssignmentHeader")).get_text().strip()
            log.explain(f"Found exercise container {container_name!r}")

            # Find all download links in the container (this will contain all the files)
-            files: List[Tag] = container.findAll(
+            files = cast(list[Tag], container.find_all(
                name="a",
                # download links contain the given command class
-                attrs={"href": lambda x: x and "cmdClass=ilexsubmissiongui" in x},
+                attrs={"href": lambda x: x is not None and "cmdClass=ilexsubmissiongui" in x},
                text="Download"
-            )
+            ))

            # Grab each file as you now have the link
            for file_link in files:
                # Two divs, side by side. Left is the name, right is the link ==> get left
                # sibling
-                file_name = file_link.parent.findPrevious(name="div").getText().strip()
+                file_name = cast(
+                    Tag,
+                    cast(Tag, file_link.parent).find_previous(name="div")
+                ).get_text().strip()
                url = self._abs_url_from_link(file_link)

                log.explain(f"Found exercise entry {file_name!r}")
@ -672,21 +679,21 @@ class IliasPage:
                ))

            # Find all links to file listings (e.g. "Submitted Files" for groups)
-            file_listings: List[Tag] = container.findAll(
+            file_listings = cast(list[Tag], container.find_all(
                name="a",
                # download links contain the given command class
-                attrs={"href": lambda x: x and "cmdclass=ilexsubmissionfilegui" in x.lower()}
-            )
+                attrs={"href": lambda x: x is not None and "cmdclass=ilexsubmissionfilegui" in x.lower()}
+            ))

            # Add each listing as a new
            for listing in file_listings:
-                parent_container: Tag = listing.findParent(
-                    "div", attrs={"class": lambda x: x and "form-group" in x}
-                )
-                label_container: Tag = parent_container.find(
-                    attrs={"class": lambda x: x and "control-label" in x}
-                )
-                file_name = label_container.getText().strip()
+                parent_container = cast(Tag, listing.find_parent(
+                    "div", attrs={"class": lambda x: x is not None and "form-group" in x}
+                ))
+                label_container = cast(Tag, parent_container.find(
+                    attrs={"class": lambda x: x is not None and "control-label" in x}
+                ))
+                file_name = label_container.get_text().strip()
                url = self._abs_url_from_link(listing)
                log.explain(f"Found exercise detail {file_name!r} at {url}")
                results.append(IliasPageElement.create_new(
@ -699,10 +706,10 @@ class IliasPage:

        return results

-    def _find_normal_entries(self) -> List[IliasPageElement]:
-        result: List[IliasPageElement] = []
+    def _find_normal_entries(self) -> list[IliasPageElement]:
+        result: list[IliasPageElement] = []

-        links: List[Tag] = []
+        links: list[Tag] = []
        # Fetch all links and throw them to the general interpreter
        if self._is_course_overview_page():
            log.explain("Page is a course overview page, adjusting link selector")
@ -716,9 +723,9 @@ class IliasPage:
            parents = [_sanitize_path_name(x) for x in self._find_upwards_folder_hierarchy(link)]

            if parents:
-                element_name = "/".join(parents) + "/" + _sanitize_path_name(link.getText())
+                element_name = "/".join(parents) + "/" + _sanitize_path_name(link.get_text())
            else:
-                element_name = _sanitize_path_name(link.getText())
+                element_name = _sanitize_path_name(link.get_text())

            element_type = self._find_type_from_link(element_name, link, abs_url)
            description = self._find_link_description(link)
@ -750,17 +757,17 @@ class IliasPage:

        return result

-    def _find_mediacast_videos(self) -> List[IliasPageElement]:
-        videos: List[IliasPageElement] = []
+    def _find_mediacast_videos(self) -> list[IliasPageElement]:
+        videos: list[IliasPageElement] = []

-        for elem in cast(List[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
+        for elem in cast(list[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
            element_name = _sanitize_path_name(
-                elem.select_one(".ilPlayerPreviewDescription").getText().strip()
+                cast(Tag, elem.select_one(".ilPlayerPreviewDescription")).get_text().strip()
            )
            if not element_name.endswith(".mp4"):
                # just to make sure it has some kinda-alrightish ending
                element_name = element_name + ".mp4"
-            video_element = elem.find(name="video")
+            video_element = cast(Optional[Tag], elem.find(name="video"))
            if not video_element:
                _unexpected_html_warning()
                log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
@ -768,18 +775,18 @@ class IliasPage:

            videos.append(IliasPageElement.create_new(
                typ=IliasElementType.MEDIACAST_VIDEO,
-                url=self._abs_url_from_relative(video_element.get("src")),
+                url=self._abs_url_from_relative(cast(str, video_element.get("src"))),
                name=element_name,
-                mtime=self._find_mediacast_video_mtime(elem.findParent(name="td"))
+                mtime=self._find_mediacast_video_mtime(cast(Tag, elem.find_parent(name="td")))
            ))

        return videos

-    def _find_mob_videos(self) -> List[IliasPageElement]:
-        videos: List[IliasPageElement] = []
+    def _find_mob_videos(self) -> list[IliasPageElement]:
+        videos: list[IliasPageElement] = []

        for figure in self._soup.select("figure.ilc_media_cont_MediaContainerHighlighted"):
-            title = figure.select_one("figcaption").getText().strip() + ".mp4"
+            title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
            video_element = figure.select_one("video")
            if not video_element:
                _unexpected_html_warning()
@ -789,7 +796,7 @@ class IliasPage:
            url = None
            for source in video_element.select("source"):
                if source.get("type", "") == "video/mp4":
-                    url = source.get("src")
+                    url = cast(Optional[str], source.get("src"))
                    break

            if url is None:
@ -807,15 +814,15 @@ class IliasPage:
        return videos

    def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
-        description_td: Tag = enclosing_td.findPreviousSibling("td")
+        description_td = cast(Tag, enclosing_td.find_previous_sibling("td"))
        if not description_td:
            return None

-        meta_tag: Tag = description_td.find_all("p")[-1]
+        meta_tag = cast(Optional[Tag], description_td.find_all("p")[-1])
        if not meta_tag:
            return None

-        updated_str = meta_tag.getText().strip().replace("\n", " ")
+        updated_str = meta_tag.get_text().strip().replace("\n", " ")
        updated_str = re.sub(".+?: ", "", updated_str)
        return demangle_date(updated_str)

@ -826,20 +833,20 @@ class IliasPage:
        It is in the same general div and this whole thing is guesswork.
        Therefore, you should check for meetings before passing them in this function.
        """
-        parents: List[Tag] = list(tag.parents)
+        parents: list[Tag] = list(tag.parents)
        for parent in parents:
            if not parent.get("class"):
                continue

            # We should not crawl files under meetings
-            if "ilContainerListItemContentCB" in parent.get("class"):
-                link: Tag = parent.parent.find("a")
+            if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
+                link: Tag = parent.parent.find("a")  # type: ignore
                type = IliasPage._find_type_from_folder_like(link, self._page_url)
                return type == IliasElementType.MEETING

        return False

-    def _find_upwards_folder_hierarchy(self, tag: Tag) -> List[str]:
+    def _find_upwards_folder_hierarchy(self, tag: Tag) -> list[str]:
        """
        Interprets accordions and expandable blocks as virtual folders and returns them
        in order. This allows us to find a file named "Test" in an accordion "Acc" as "Acc/Test"
@ -848,7 +855,7 @@ class IliasPage:

        outer_accordion_content: Optional[Tag] = None

-        parents: List[Tag] = list(tag.parents)
+        parents: list[Tag] = list(tag.parents)
        for parent in parents:
            if not parent.get("class"):
                continue
@ -857,57 +864,63 @@ class IliasPage:
            # but some JS later transforms them into an accordion.

            # This is for these weird JS-y blocks and custom item groups
-            if "ilContainerItemsContainer" in parent.get("class"):
-                data_store_url = parent.parent.get("data-store-url", "").lower()
+            if "ilContainerItemsContainer" in cast(str, parent.get("class")):
+                data_store_url = parent.parent.get("data-store-url", "").lower()  # type: ignore
                is_custom_item_group = "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url \
                                       and "cont_block_id=" in data_store_url
                # I am currently under the impression that *only* those JS blocks have an
                # ilNoDisplay class.
-                if not is_custom_item_group and "ilNoDisplay" not in parent.get("class"):
+                if not is_custom_item_group and "ilNoDisplay" not in cast(str, parent.get("class")):
                    continue
-                prev: Tag = parent.findPreviousSibling("div")
-                if "ilContainerBlockHeader" in prev.get("class"):
+                prev = cast(Tag, parent.find_previous_sibling("div"))
+                if "ilContainerBlockHeader" in cast(str, prev.get("class")):
                    if prev.find("h3"):
-                        found_titles.append(prev.find("h3").getText().strip())
+                        found_titles.append(cast(Tag, prev.find("h3")).get_text().strip())
                    else:
-                        found_titles.append(prev.find("h2").getText().strip())
+                        found_titles.append(cast(Tag, prev.find("h2")).get_text().strip())

            # And this for real accordions
-            if "il_VAccordionContentDef" in parent.get("class"):
+            if "il_VAccordionContentDef" in cast(str, parent.get("class")):
                outer_accordion_content = parent
                break

        if outer_accordion_content:
-            accordion_tag: Tag = outer_accordion_content.parent
-            head_tag: Tag = accordion_tag.find(attrs={
-                "class": lambda x: x and "ilc_va_ihead_VAccordIHead" in x
-            })
-            found_titles.append(head_tag.getText().strip())
+            accordion_tag = cast(Tag, outer_accordion_content.parent)
+            head_tag = cast(Tag, accordion_tag.find(attrs={
+                "class": lambda x: x is not None and "ilc_va_ihead_VAccordIHead" in x
+            }))
+            found_titles.append(head_tag.get_text().strip())

        return [_sanitize_path_name(x) for x in reversed(found_titles)]

    def _find_link_description(self, link: Tag) -> Optional[str]:
-        tile: Tag = link.findParent("div", {"class": lambda x: x and "il_ContainerListItem" in x})
+        tile = cast(
+            Tag,
+            link.find_parent("div", {"class": lambda x: x is not None and "il_ContainerListItem" in x})
+        )
        if not tile:
            return None
-        description_element: Tag = tile.find("div", {"class": lambda x: x and "il_Description" in x})
+        description_element = cast(
+            Tag,
+            tile.find("div", {"class": lambda x: x is not None and "il_Description" in x})
+        )
        if not description_element:
            return None
-        return description_element.getText().strip()
+        return description_element.get_text().strip()

    def _file_to_element(self, name: str, url: str, link_element: Tag) -> IliasPageElement:
        # Files have a list of properties (type, modification date, size, etc.)
        # In a series of divs.
        # Find the parent containing all those divs, so we can filter our what we need
-        properties_parent: Tag = link_element.findParent(
+        properties_parent = cast(Tag, cast(Tag, link_element.find_parent(
            "div", {"class": lambda x: "il_ContainerListItem" in x}
-        ).select_one(".il_ItemProperties")
+        )).select_one(".il_ItemProperties"))
        # The first one is always the filetype
-        file_type = properties_parent.select_one("span.il_ItemProperty").getText().strip()
+        file_type = cast(Tag, properties_parent.select_one("span.il_ItemProperty")).get_text().strip()

        # The rest does not have a stable order. Grab the whole text and reg-ex the date
        # out of it
-        all_properties_text = properties_parent.getText().strip()
+        all_properties_text = properties_parent.get_text().strip()
        modification_date_match = re.search(
            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
            all_properties_text
@ -927,14 +940,14 @@ class IliasPage:
            IliasElementType.FILE, url, full_path, modification_date, skip_sanitize=True
        )

-    def _find_cards(self) -> List[IliasPageElement]:
-        result: List[IliasPageElement] = []
+    def _find_cards(self) -> list[IliasPageElement]:
+        result: list[IliasPageElement] = []

-        card_titles: List[Tag] = self._soup.select(".card-title a")
+        card_titles: list[Tag] = self._soup.select(".card-title a")

        for title in card_titles:
            url = self._abs_url_from_link(title)
-            name = _sanitize_path_name(title.getText().strip())
+            name = _sanitize_path_name(title.get_text().strip())
            type = self._find_type_from_card(title)

            if not type:
@ -944,25 +957,25 @@ class IliasPage:

            result.append(IliasPageElement.create_new(type, url, name))

-        card_button_tiles: List[Tag] = self._soup.select(".card-title button")
+        card_button_tiles: list[Tag] = self._soup.select(".card-title button")

        for button in card_button_tiles:
-            regex = re.compile(button["id"] + r".*window.open\(['\"](.+?)['\"]")
+            regex = re.compile(button["id"] + r".*window.open\(['\"](.+?)['\"]")  # type: ignore
            res = regex.search(str(self._soup))
            if not res:
                _unexpected_html_warning()
                log.warn_contd(f"Could not find click handler target for {button}")
                continue
            url = self._abs_url_from_relative(res.group(1))
-            name = _sanitize_path_name(button.getText().strip())
+            name = _sanitize_path_name(button.get_text().strip())
            type = self._find_type_from_card(button)
-            caption_parent = button.findParent(
+            caption_parent = cast(Tag, button.find_parent(
                "div",
-                attrs={"class": lambda x: x and "caption" in x},
-            )
+                attrs={"class": lambda x: x is not None and "caption" in x},
+            ))
            caption_container = caption_parent.find_next_sibling("div")
            if caption_container:
-                description = caption_container.getText().strip()
+                description = caption_container.get_text().strip()
            else:
                description = None

@ -992,7 +1005,7 @@ class IliasPage:
            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {card_title}")
            return None

-        icon: Tag = card_root.select_one(".il-card-repository-head .icon")
+        icon = cast(Tag, card_root.select_one(".il-card-repository-head .icon"))

        if "opencast" in icon["class"] or "xoct" in icon["class"]:
            return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
@ -1125,7 +1138,7 @@ class IliasPage:

        is_session_expansion_button = found_parent.find(
            "a",
-            attrs={"href": lambda x: x and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
+            attrs={"href": lambda x: x is not None and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
        )
        if img_tag is None and is_session_expansion_button:
            log.explain("Found session expansion button, skipping it as it has no content")
@ -1168,19 +1181,19 @@ class IliasPage:
    @staticmethod
    def is_logged_in(soup: BeautifulSoup) -> bool:
        # Normal ILIAS pages
-        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
        if mainbar is not None:
-            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+            login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
            shib_login = soup.find(id="button_shib_login")
            return not login_button and not shib_login

        # Personal Desktop
-        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+        if soup.find("a", attrs={"href": lambda x: x is not None and "block_type=pditems" in x}):
            return True

        # Empty personal desktop has zero (0) markers. Match on the text...
        if alert := soup.select_one(".alert-info"):
-            text = alert.getText().lower()
+            text = alert.get_text().lower()
            if "you have not yet selected any favourites" in text:
                return True
            if "sie haben aktuell noch keine favoriten ausgewählt" in text:
@ -1208,7 +1221,7 @@ class IliasPage:
        """
        Create an absolute url from an <a> tag.
        """
-        return self._abs_url_from_relative(link_tag.get("href"))
+        return self._abs_url_from_relative(cast(str, link_tag.get("href")))

    def _abs_url_from_relative(self, relative_url: str) -> str:
        """
@ -1218,10 +1231,10 @@ class IliasPage:

    @staticmethod
    def get_soup_permalink(soup: BeautifulSoup) -> Optional[str]:
-        perma_link_element: Tag = soup.select_one(".il-footer-permanent-url > a")
+        perma_link_element = cast(Tag, soup.select_one(".il-footer-permanent-url > a"))
        if not perma_link_element or not perma_link_element.get("href"):
            return None
-        return perma_link_element.get("href")
+        return cast(Optional[str], perma_link_element.get("href"))


 def _unexpected_html_warning() -> None:
@ -1298,11 +1311,11 @@ def _sanitize_path_name(name: str) -> str:
    return name.replace("/", "-").replace("\\", "-").strip()


-def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThread]:
+def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThread]:
    elements = []
    for p in forum_export.select("body > p"):
        title_tag = p
-        content_tag = p.find_next_sibling("ul")
+        content_tag = cast(Optional[Tag], p.find_next_sibling("ul"))

        if not content_tag:
            # ILIAS allows users to delete the initial post while keeping the thread open
@ -1310,7 +1323,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThre
            # I am not sure why you would want this, but ILIAS makes it easy to do.
            continue

-        title = p.find("b").text
+        title = cast(Tag, p.find("b")).text
        if ":" in title:
            title = title[title.find(":") + 1:]
        title = title.strip()
@ -1321,7 +1334,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThre


 def _guess_timestamp_from_forum_post_content(content: Tag) -> Optional[datetime]:
-    posts: Optional[Tag] = content.select(".ilFrmPostHeader > span.small")
+    posts = cast(Optional[Tag], content.select(".ilFrmPostHeader > span.small"))
    if not posts:
        return None