From ef7d66c5afabe4ccc0cbae2c1747e93191bcce81 Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Sun, 19 Oct 2025 15:59:08 +0200 Subject: [PATCH] Fix some typing errors It seems like the type hints have gotten better :) --- PFERD/auth/keyring.py | 4 +- PFERD/crawl/http_crawler.py | 4 +- PFERD/crawl/ilias/file_templates.py | 8 +-- PFERD/crawl/ilias/ilias_html_cleaner.py | 2 +- PFERD/crawl/ilias/ilias_web_crawler.py | 10 ++-- PFERD/crawl/ilias/kit_ilias_html.py | 71 +++++++++---------------- PFERD/crawl/ilias/shibboleth_login.py | 6 +-- PFERD/logging.py | 4 +- 8 files changed, 44 insertions(+), 65 deletions(-) diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py index e69a69e..414640a 100644 --- a/PFERD/auth/keyring.py +++ b/PFERD/auth/keyring.py @@ -1,4 +1,4 @@ -from typing import Optional, cast +from typing import Optional import keyring @@ -13,7 +13,7 @@ class KeyringAuthSection(AuthSection): return self.s.get("username") def keyring_name(self) -> str: - return cast(str, self.s.get("keyring_name", fallback=NAME)) + return self.s.get("keyring_name", fallback=NAME) class KeyringAuthenticator(Authenticator): diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py index 830f537..70ec5c1 100644 --- a/PFERD/crawl/http_crawler.py +++ b/PFERD/crawl/http_crawler.py @@ -3,7 +3,7 @@ import http.cookies import ssl from datetime import datetime from pathlib import Path, PurePath -from typing import Any, Optional, cast +from typing import Any, Optional import aiohttp import certifi @@ -187,7 +187,7 @@ class HttpCrawler(Crawler): if level == 0 or (level == 1 and drop_h1): return PurePath() - level_heading = cast(Optional[Tag], tag.find_previous(name=f"h{level}")) + level_heading = tag.find_previous(name=f"h{level}") if level_heading is None: return find_associated_headings(tag, level - 1) diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py index 37691b2..c832977 100644 --- a/PFERD/crawl/ilias/file_templates.py +++ b/PFERD/crawl/ilias/file_templates.py @@ -258,17 +258,17 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode()) ) - body_str = cast(str, body.prettify()) + body_str = body.prettify() return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name) def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Tag) -> str: - if title := cast(Optional[bs4.Tag], heading.find(name="b")): + if title := heading.find(name="b"): title.wrap(bs4.Tag(name="a", attrs={"href": url})) return ( _forum_thread_template.replace("{{name}}", name) - .replace("{{heading}}", cast(str, heading.prettify())) - .replace("{{content}}", cast(str, content.prettify())) + .replace("{{heading}}", heading.prettify()) + .replace("{{content}}", content.prettify()) ) diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py index 958860a..35a7ea0 100644 --- a/PFERD/crawl/ilias/ilias_html_cleaner.py +++ b/PFERD/crawl/ilias/ilias_html_cleaner.py @@ -86,7 +86,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup: for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")): block.name = "h3" - block["class"] += ["accordion-head"] + block["class"] += ["accordion-head"] # type: ignore for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"): children = list(dummy.children) diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py index b8212a4..12d8700 100644 --- a/PFERD/crawl/ilias/ilias_web_crawler.py +++ b/PFERD/crawl/ilias/ilias_web_crawler.py @@ -548,7 +548,7 @@ instance's greatest bottleneck. @staticmethod def _parse_link_content(element: IliasPageElement, content: BeautifulSoup) -> list[LinkData]: - links = cast(list[Tag], list(content.select("a"))) + links = list(content.select("a")) if len(links) == 1: url = str(links[0].get("href")).strip() return [LinkData(name=element.name, description=element.description or "", url=url)] @@ -598,7 +598,7 @@ instance's greatest bottleneck. async with dl as (_bar, sink): description = clean(insert_base_markup(description)) description_tag = await self.internalize_images(description) - sink.file.write(cast(str, description_tag.prettify()).encode("utf-8")) + sink.file.write(description_tag.prettify().encode("utf-8")) sink.done() @anoncritical @@ -946,10 +946,10 @@ instance's greatest bottleneck. if prev: prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html")) - prev = cast(str, os.path.relpath(prev_p, my_path.parent)) if prev_p else None + prev = os.path.relpath(prev_p, my_path.parent) if prev_p else None if next: next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html")) - next = cast(str, os.path.relpath(next_p, my_path.parent)) if next_p else None + next = os.path.relpath(next_p, my_path.parent) if next_p else None async with maybe_dl as (bar, sink): content = element.content @@ -1052,7 +1052,7 @@ instance's greatest bottleneck. async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request: login_page = soupify(await request.read()) - login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "login_form"})) + login_form = login_page.find("form", attrs={"name": "login_form"}) if login_form is None: raise CrawlError("Could not find the login form! Specified client id might be invalid.") diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index d7f6f8d..f727028 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -401,11 +401,8 @@ class IliasPage: return self._find_normal_entries() def get_info_tab(self) -> Optional[IliasPageElement]: - tab: Optional[Tag] = cast( - Optional[Tag], - self._soup.find( - name="a", attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x} - ), + tab: Optional[Tag] = self._soup.find( + name="a", attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x} ) if tab is not None: return IliasPageElement.create_new( @@ -496,10 +493,7 @@ class IliasPage: base_url = re.sub(r"cmd=\w+", "cmd=post", base_url) base_url = re.sub(r"cmdClass=\w+", "cmdClass=ilExportGUI", base_url) - rtoken_form = cast( - Optional[Tag], - self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x}), - ) + rtoken_form = self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x}) if not rtoken_form: log.explain("Found no rtoken anywhere") return None @@ -579,14 +573,9 @@ class IliasPage: return self._uncollapse_future_meetings_url() is not None def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]: - element = cast( - Optional[Tag], - self._soup.find( - "a", - attrs={ - "href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x) - }, - ), + element = self._soup.find( + "a", + attrs={"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}, ) if not element: return None @@ -614,16 +603,13 @@ class IliasPage: return "baseClass=ilmembershipoverviewgui" in self._page_url def _select_content_page_url(self) -> Optional[IliasPageElement]: - tab = cast( - Optional[Tag], - self._soup.find( - id="tab_view_content", attrs={"class": lambda x: x is not None and "active" not in x} - ), + tab = self._soup.find( + id="tab_view_content", attrs={"class": lambda x: x is not None and "active" not in x} ) # Already selected (or not found) if not tab: return None - link = cast(Optional[Tag], tab.find("a")) + link = tab.find("a") if link: link_str = self._abs_url_from_link(link) return IliasPageElement.create_new(IliasElementType.FOLDER, link_str, "select content page") @@ -670,11 +656,8 @@ class IliasPage: def _get_show_max_forum_entries_per_page_url( self, wanted_max: Optional[int] = None ) -> Optional[IliasPageElement]: - correct_link = cast( - Optional[Tag], - self._soup.find( - "a", attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x} - ), + correct_link = self._soup.find( + "a", attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x} ) if not correct_link: @@ -706,7 +689,7 @@ class IliasPage: titles: list[Tag] = self._soup.select("#block_pditems_0 .il-item-title") for title in titles: - link = cast(Optional[Tag], title.find("a")) + link = title.find("a") if not link: log.explain(f"Skipping offline item: {title.get_text().strip()!r}") @@ -776,9 +759,7 @@ class IliasPage: # # We need to figure out where we are. - video_element_table = cast( - Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+")) - ) + video_element_table = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+")) if video_element_table is None: # We are in stage 1 @@ -801,7 +782,7 @@ class IliasPage: return self._find_opencast_video_entries_no_paging() def _find_opencast_video_entries_paginated(self) -> list[IliasPageElement]: - table_element = cast(Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))) + table_element = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+")) if table_element is None: log.warn("Couldn't increase elements per page (table not found). I might miss elements.") @@ -841,12 +822,10 @@ class IliasPage: # 6th or 7th child (1 indexed) is the modification time string. Try to find it # by parsing backwards from the end and finding something that looks like a date modification_time = None - row: Tag = link.parent.parent.parent + row: Tag = link.parent.parent.parent # type: ignore column_count = len(row.select("td.std")) for index in range(column_count, 0, -1): - modification_string = ( - link.parent.parent.parent.select_one(f"td.std:nth-child({index})").get_text().strip() - ) + modification_string = cast(Tag, row.select_one(f"td.std:nth-child({index})")).get_text().strip() if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string): modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M") break @@ -855,7 +834,7 @@ class IliasPage: log.warn(f"Could not determine upload time for {link}") modification_time = datetime.now() - title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip() + title = cast(Tag, row.select_one("td.std:nth-child(3)")).get_text().strip() title += ".mp4" video_name: str = _sanitize_path_name(title) @@ -883,7 +862,7 @@ class IliasPage: def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]: results: list[IliasPageElement] = [] - if link := cast(Optional[Tag], self._soup.select_one("#tab_submission > a")): + if link := self._soup.select_one("#tab_submission > a"): results.append( IliasPageElement.create_new( IliasElementType.EXERCISE_FILES, self._abs_url_from_link(link), "Submission" @@ -907,7 +886,7 @@ class IliasPage: parent_row: Tag = cast( Tag, link.find_parent(attrs={"class": lambda x: x is not None and "row" in x}) ) - name_tag = cast(Optional[Tag], parent_row.find(name="div")) + name_tag = parent_row.find(name="div") if not name_tag: log.warn("Could not find name tag for exercise entry") @@ -961,7 +940,7 @@ class IliasPage: def _find_exercise_entries_root_page(self) -> list[IliasPageElement]: results: list[IliasPageElement] = [] - content_tab = cast(Optional[Tag], self._soup.find(id="ilContentContainer")) + content_tab = self._soup.find(id="ilContentContainer") if not content_tab: log.warn("Could not find content tab in exercise overview page") _unexpected_html_warning() @@ -1118,7 +1097,7 @@ class IliasPage: if url is None and video_element.get("src"): url = cast(Optional[str], video_element.get("src")) - fig_caption = cast(Optional[Tag], figure.select_one("figcaption")) + fig_caption = figure.select_one("figcaption") if fig_caption: title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4" elif url is not None: @@ -1146,7 +1125,7 @@ class IliasPage: # We should not crawl files under meetings if "ilContainerListItemContentCB" in cast(str, parent.get("class")): - link: Tag = parent.parent.find("a") + link: Tag = cast(Tag, cast(Tag, parent.parent).find("a")) typ = IliasPage._find_type_for_element( "meeting", self._abs_url_from_link(link), @@ -1179,7 +1158,7 @@ class IliasPage: # This is for these weird JS-y blocks and custom item groups if "ilContainerItemsContainer" in cast(str, parent.get("class")): - data_store_url = parent.parent.get("data-store-url", "").lower() + data_store_url = cast(str, cast(Tag, parent.parent).get("data-store-url", "")).lower() is_custom_item_group = ( "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url and "cont_block_id=" in data_store_url @@ -1417,7 +1396,7 @@ class IliasPage: def is_logged_in(ilias_soup: IliasSoup) -> bool: soup = ilias_soup.soup # Normal ILIAS pages - mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar")) + mainbar = soup.find(class_="il-maincontrols-metabar") if mainbar is not None: login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x}) shib_login = soup.find(id="button_shib_login") @@ -1561,7 +1540,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThre elements = [] for p in forum_export.select("body > p"): title_tag = p - content_tag = cast(Optional[Tag], p.find_next_sibling("ul")) + content_tag = p.find_next_sibling("ul") title = cast(Tag, p.find("b")).text if ":" in title: diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py index bdff4ea..bffb183 100644 --- a/PFERD/crawl/ilias/shibboleth_login.py +++ b/PFERD/crawl/ilias/shibboleth_login.py @@ -60,7 +60,7 @@ class ShibbolethLogin: "fudis_web_authn_assertion_input": "", } if csrf_token_input := form.find("input", {"name": "csrf_token"}): - data["csrf_token"] = csrf_token_input["value"] + data["csrf_token"] = csrf_token_input["value"] # type: ignore soup = await _post(sess, url, data) if soup.find(id="attributeRelease"): @@ -79,7 +79,7 @@ class ShibbolethLogin: # (or clicking "Continue" if you have JS disabled) relay_state = cast(Tag, soup.find("input", {"name": "RelayState"})) saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"})) - url = form = soup.find("form", {"method": "post"})["action"] + url = cast(str, cast(Tag, soup.find("form", {"method": "post"}))["action"]) data = { # using the info obtained in the while loop above "RelayState": cast(str, relay_state["value"]), "SAMLResponse": cast(str, saml_response["value"]), @@ -108,7 +108,7 @@ class ShibbolethLogin: "fudis_otp_input": tfa_token, } if csrf_token_input := form.find("input", {"name": "csrf_token"}): - data["csrf_token"] = csrf_token_input["value"] + data["csrf_token"] = csrf_token_input["value"] # type: ignore return await _post(session, url, data) @staticmethod diff --git a/PFERD/logging.py b/PFERD/logging.py index a810aa9..ac633ec 100644 --- a/PFERD/logging.py +++ b/PFERD/logging.py @@ -3,7 +3,7 @@ import sys import traceback from collections.abc import AsyncIterator, Iterator from contextlib import AbstractContextManager, asynccontextmanager, contextmanager -from typing import Optional +from typing import Any, Optional from rich.console import Console, Group from rich.live import Live @@ -122,7 +122,7 @@ class Log: for line in self._lines: self.print(line) - def print(self, text: str) -> None: + def print(self, text: Any) -> None: """ Print a normal message. Allows markup. """