Fix some typing errors

It seems like the type hints have gotten better :)
This commit is contained in:
I-Al-Istannen
2025-10-19 15:59:08 +02:00
parent 5646e933fd
commit ef7d66c5af
8 changed files with 44 additions and 65 deletions

View File

@@ -1,4 +1,4 @@
from typing import Optional, cast from typing import Optional
import keyring import keyring
@@ -13,7 +13,7 @@ class KeyringAuthSection(AuthSection):
return self.s.get("username") return self.s.get("username")
def keyring_name(self) -> str: def keyring_name(self) -> str:
return cast(str, self.s.get("keyring_name", fallback=NAME)) return self.s.get("keyring_name", fallback=NAME)
class KeyringAuthenticator(Authenticator): class KeyringAuthenticator(Authenticator):

View File

@@ -3,7 +3,7 @@ import http.cookies
import ssl import ssl
from datetime import datetime from datetime import datetime
from pathlib import Path, PurePath from pathlib import Path, PurePath
from typing import Any, Optional, cast from typing import Any, Optional
import aiohttp import aiohttp
import certifi import certifi
@@ -187,7 +187,7 @@ class HttpCrawler(Crawler):
if level == 0 or (level == 1 and drop_h1): if level == 0 or (level == 1 and drop_h1):
return PurePath() return PurePath()
level_heading = cast(Optional[Tag], tag.find_previous(name=f"h{level}")) level_heading = tag.find_previous(name=f"h{level}")
if level_heading is None: if level_heading is None:
return find_associated_headings(tag, level - 1) return find_associated_headings(tag, level - 1)

View File

@@ -258,17 +258,17 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode()) soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
) )
body_str = cast(str, body.prettify()) body_str = body.prettify()
return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name) return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Tag) -> str: def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Tag) -> str:
if title := cast(Optional[bs4.Tag], heading.find(name="b")): if title := heading.find(name="b"):
title.wrap(bs4.Tag(name="a", attrs={"href": url})) title.wrap(bs4.Tag(name="a", attrs={"href": url}))
return ( return (
_forum_thread_template.replace("{{name}}", name) _forum_thread_template.replace("{{name}}", name)
.replace("{{heading}}", cast(str, heading.prettify())) .replace("{{heading}}", heading.prettify())
.replace("{{content}}", cast(str, content.prettify())) .replace("{{content}}", content.prettify())
) )

View File

@@ -86,7 +86,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")): for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")):
block.name = "h3" block.name = "h3"
block["class"] += ["accordion-head"] block["class"] += ["accordion-head"] # type: ignore
for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"): for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
children = list(dummy.children) children = list(dummy.children)

View File

@@ -548,7 +548,7 @@ instance's greatest bottleneck.
@staticmethod @staticmethod
def _parse_link_content(element: IliasPageElement, content: BeautifulSoup) -> list[LinkData]: def _parse_link_content(element: IliasPageElement, content: BeautifulSoup) -> list[LinkData]:
links = cast(list[Tag], list(content.select("a"))) links = list(content.select("a"))
if len(links) == 1: if len(links) == 1:
url = str(links[0].get("href")).strip() url = str(links[0].get("href")).strip()
return [LinkData(name=element.name, description=element.description or "", url=url)] return [LinkData(name=element.name, description=element.description or "", url=url)]
@@ -598,7 +598,7 @@ instance's greatest bottleneck.
async with dl as (_bar, sink): async with dl as (_bar, sink):
description = clean(insert_base_markup(description)) description = clean(insert_base_markup(description))
description_tag = await self.internalize_images(description) description_tag = await self.internalize_images(description)
sink.file.write(cast(str, description_tag.prettify()).encode("utf-8")) sink.file.write(description_tag.prettify().encode("utf-8"))
sink.done() sink.done()
@anoncritical @anoncritical
@@ -946,10 +946,10 @@ instance's greatest bottleneck.
if prev: if prev:
prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html")) prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
prev = cast(str, os.path.relpath(prev_p, my_path.parent)) if prev_p else None prev = os.path.relpath(prev_p, my_path.parent) if prev_p else None
if next: if next:
next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html")) next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
next = cast(str, os.path.relpath(next_p, my_path.parent)) if next_p else None next = os.path.relpath(next_p, my_path.parent) if next_p else None
async with maybe_dl as (bar, sink): async with maybe_dl as (bar, sink):
content = element.content content = element.content
@@ -1052,7 +1052,7 @@ instance's greatest bottleneck.
async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request: async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
login_page = soupify(await request.read()) login_page = soupify(await request.read())
login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "login_form"})) login_form = login_page.find("form", attrs={"name": "login_form"})
if login_form is None: if login_form is None:
raise CrawlError("Could not find the login form! Specified client id might be invalid.") raise CrawlError("Could not find the login form! Specified client id might be invalid.")

View File

@@ -401,11 +401,8 @@ class IliasPage:
return self._find_normal_entries() return self._find_normal_entries()
def get_info_tab(self) -> Optional[IliasPageElement]: def get_info_tab(self) -> Optional[IliasPageElement]:
tab: Optional[Tag] = cast( tab: Optional[Tag] = self._soup.find(
Optional[Tag],
self._soup.find(
name="a", attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x} name="a", attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
),
) )
if tab is not None: if tab is not None:
return IliasPageElement.create_new( return IliasPageElement.create_new(
@@ -496,10 +493,7 @@ class IliasPage:
base_url = re.sub(r"cmd=\w+", "cmd=post", base_url) base_url = re.sub(r"cmd=\w+", "cmd=post", base_url)
base_url = re.sub(r"cmdClass=\w+", "cmdClass=ilExportGUI", base_url) base_url = re.sub(r"cmdClass=\w+", "cmdClass=ilExportGUI", base_url)
rtoken_form = cast( rtoken_form = self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x})
Optional[Tag],
self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x}),
)
if not rtoken_form: if not rtoken_form:
log.explain("Found no rtoken anywhere") log.explain("Found no rtoken anywhere")
return None return None
@@ -579,14 +573,9 @@ class IliasPage:
return self._uncollapse_future_meetings_url() is not None return self._uncollapse_future_meetings_url() is not None
def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]: def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
element = cast( element = self._soup.find(
Optional[Tag],
self._soup.find(
"a", "a",
attrs={ attrs={"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)},
"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)
},
),
) )
if not element: if not element:
return None return None
@@ -614,16 +603,13 @@ class IliasPage:
return "baseClass=ilmembershipoverviewgui" in self._page_url return "baseClass=ilmembershipoverviewgui" in self._page_url
def _select_content_page_url(self) -> Optional[IliasPageElement]: def _select_content_page_url(self) -> Optional[IliasPageElement]:
tab = cast( tab = self._soup.find(
Optional[Tag],
self._soup.find(
id="tab_view_content", attrs={"class": lambda x: x is not None and "active" not in x} id="tab_view_content", attrs={"class": lambda x: x is not None and "active" not in x}
),
) )
# Already selected (or not found) # Already selected (or not found)
if not tab: if not tab:
return None return None
link = cast(Optional[Tag], tab.find("a")) link = tab.find("a")
if link: if link:
link_str = self._abs_url_from_link(link) link_str = self._abs_url_from_link(link)
return IliasPageElement.create_new(IliasElementType.FOLDER, link_str, "select content page") return IliasPageElement.create_new(IliasElementType.FOLDER, link_str, "select content page")
@@ -670,11 +656,8 @@ class IliasPage:
def _get_show_max_forum_entries_per_page_url( def _get_show_max_forum_entries_per_page_url(
self, wanted_max: Optional[int] = None self, wanted_max: Optional[int] = None
) -> Optional[IliasPageElement]: ) -> Optional[IliasPageElement]:
correct_link = cast( correct_link = self._soup.find(
Optional[Tag],
self._soup.find(
"a", attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x} "a", attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
),
) )
if not correct_link: if not correct_link:
@@ -706,7 +689,7 @@ class IliasPage:
titles: list[Tag] = self._soup.select("#block_pditems_0 .il-item-title") titles: list[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
for title in titles: for title in titles:
link = cast(Optional[Tag], title.find("a")) link = title.find("a")
if not link: if not link:
log.explain(f"Skipping offline item: {title.get_text().strip()!r}") log.explain(f"Skipping offline item: {title.get_text().strip()!r}")
@@ -776,9 +759,7 @@ class IliasPage:
# #
# We need to figure out where we are. # We need to figure out where we are.
video_element_table = cast( video_element_table = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
)
if video_element_table is None: if video_element_table is None:
# We are in stage 1 # We are in stage 1
@@ -801,7 +782,7 @@ class IliasPage:
return self._find_opencast_video_entries_no_paging() return self._find_opencast_video_entries_no_paging()
def _find_opencast_video_entries_paginated(self) -> list[IliasPageElement]: def _find_opencast_video_entries_paginated(self) -> list[IliasPageElement]:
table_element = cast(Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))) table_element = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
if table_element is None: if table_element is None:
log.warn("Couldn't increase elements per page (table not found). I might miss elements.") log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
@@ -841,12 +822,10 @@ class IliasPage:
# 6th or 7th child (1 indexed) is the modification time string. Try to find it # 6th or 7th child (1 indexed) is the modification time string. Try to find it
# by parsing backwards from the end and finding something that looks like a date # by parsing backwards from the end and finding something that looks like a date
modification_time = None modification_time = None
row: Tag = link.parent.parent.parent row: Tag = link.parent.parent.parent # type: ignore
column_count = len(row.select("td.std")) column_count = len(row.select("td.std"))
for index in range(column_count, 0, -1): for index in range(column_count, 0, -1):
modification_string = ( modification_string = cast(Tag, row.select_one(f"td.std:nth-child({index})")).get_text().strip()
link.parent.parent.parent.select_one(f"td.std:nth-child({index})").get_text().strip()
)
if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string): if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M") modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
break break
@@ -855,7 +834,7 @@ class IliasPage:
log.warn(f"Could not determine upload time for {link}") log.warn(f"Could not determine upload time for {link}")
modification_time = datetime.now() modification_time = datetime.now()
title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip() title = cast(Tag, row.select_one("td.std:nth-child(3)")).get_text().strip()
title += ".mp4" title += ".mp4"
video_name: str = _sanitize_path_name(title) video_name: str = _sanitize_path_name(title)
@@ -883,7 +862,7 @@ class IliasPage:
def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]: def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
results: list[IliasPageElement] = [] results: list[IliasPageElement] = []
if link := cast(Optional[Tag], self._soup.select_one("#tab_submission > a")): if link := self._soup.select_one("#tab_submission > a"):
results.append( results.append(
IliasPageElement.create_new( IliasPageElement.create_new(
IliasElementType.EXERCISE_FILES, self._abs_url_from_link(link), "Submission" IliasElementType.EXERCISE_FILES, self._abs_url_from_link(link), "Submission"
@@ -907,7 +886,7 @@ class IliasPage:
parent_row: Tag = cast( parent_row: Tag = cast(
Tag, link.find_parent(attrs={"class": lambda x: x is not None and "row" in x}) Tag, link.find_parent(attrs={"class": lambda x: x is not None and "row" in x})
) )
name_tag = cast(Optional[Tag], parent_row.find(name="div")) name_tag = parent_row.find(name="div")
if not name_tag: if not name_tag:
log.warn("Could not find name tag for exercise entry") log.warn("Could not find name tag for exercise entry")
@@ -961,7 +940,7 @@ class IliasPage:
def _find_exercise_entries_root_page(self) -> list[IliasPageElement]: def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
results: list[IliasPageElement] = [] results: list[IliasPageElement] = []
content_tab = cast(Optional[Tag], self._soup.find(id="ilContentContainer")) content_tab = self._soup.find(id="ilContentContainer")
if not content_tab: if not content_tab:
log.warn("Could not find content tab in exercise overview page") log.warn("Could not find content tab in exercise overview page")
_unexpected_html_warning() _unexpected_html_warning()
@@ -1118,7 +1097,7 @@ class IliasPage:
if url is None and video_element.get("src"): if url is None and video_element.get("src"):
url = cast(Optional[str], video_element.get("src")) url = cast(Optional[str], video_element.get("src"))
fig_caption = cast(Optional[Tag], figure.select_one("figcaption")) fig_caption = figure.select_one("figcaption")
if fig_caption: if fig_caption:
title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4" title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
elif url is not None: elif url is not None:
@@ -1146,7 +1125,7 @@ class IliasPage:
# We should not crawl files under meetings # We should not crawl files under meetings
if "ilContainerListItemContentCB" in cast(str, parent.get("class")): if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
link: Tag = parent.parent.find("a") link: Tag = cast(Tag, cast(Tag, parent.parent).find("a"))
typ = IliasPage._find_type_for_element( typ = IliasPage._find_type_for_element(
"meeting", "meeting",
self._abs_url_from_link(link), self._abs_url_from_link(link),
@@ -1179,7 +1158,7 @@ class IliasPage:
# This is for these weird JS-y blocks and custom item groups # This is for these weird JS-y blocks and custom item groups
if "ilContainerItemsContainer" in cast(str, parent.get("class")): if "ilContainerItemsContainer" in cast(str, parent.get("class")):
data_store_url = parent.parent.get("data-store-url", "").lower() data_store_url = cast(str, cast(Tag, parent.parent).get("data-store-url", "")).lower()
is_custom_item_group = ( is_custom_item_group = (
"baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url
and "cont_block_id=" in data_store_url and "cont_block_id=" in data_store_url
@@ -1417,7 +1396,7 @@ class IliasPage:
def is_logged_in(ilias_soup: IliasSoup) -> bool: def is_logged_in(ilias_soup: IliasSoup) -> bool:
soup = ilias_soup.soup soup = ilias_soup.soup
# Normal ILIAS pages # Normal ILIAS pages
mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar")) mainbar = soup.find(class_="il-maincontrols-metabar")
if mainbar is not None: if mainbar is not None:
login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x}) login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
shib_login = soup.find(id="button_shib_login") shib_login = soup.find(id="button_shib_login")
@@ -1561,7 +1540,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThre
elements = [] elements = []
for p in forum_export.select("body > p"): for p in forum_export.select("body > p"):
title_tag = p title_tag = p
content_tag = cast(Optional[Tag], p.find_next_sibling("ul")) content_tag = p.find_next_sibling("ul")
title = cast(Tag, p.find("b")).text title = cast(Tag, p.find("b")).text
if ":" in title: if ":" in title:

View File

@@ -60,7 +60,7 @@ class ShibbolethLogin:
"fudis_web_authn_assertion_input": "", "fudis_web_authn_assertion_input": "",
} }
if csrf_token_input := form.find("input", {"name": "csrf_token"}): if csrf_token_input := form.find("input", {"name": "csrf_token"}):
data["csrf_token"] = csrf_token_input["value"] data["csrf_token"] = csrf_token_input["value"] # type: ignore
soup = await _post(sess, url, data) soup = await _post(sess, url, data)
if soup.find(id="attributeRelease"): if soup.find(id="attributeRelease"):
@@ -79,7 +79,7 @@ class ShibbolethLogin:
# (or clicking "Continue" if you have JS disabled) # (or clicking "Continue" if you have JS disabled)
relay_state = cast(Tag, soup.find("input", {"name": "RelayState"})) relay_state = cast(Tag, soup.find("input", {"name": "RelayState"}))
saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"})) saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"}))
url = form = soup.find("form", {"method": "post"})["action"] url = cast(str, cast(Tag, soup.find("form", {"method": "post"}))["action"])
data = { # using the info obtained in the while loop above data = { # using the info obtained in the while loop above
"RelayState": cast(str, relay_state["value"]), "RelayState": cast(str, relay_state["value"]),
"SAMLResponse": cast(str, saml_response["value"]), "SAMLResponse": cast(str, saml_response["value"]),
@@ -108,7 +108,7 @@ class ShibbolethLogin:
"fudis_otp_input": tfa_token, "fudis_otp_input": tfa_token,
} }
if csrf_token_input := form.find("input", {"name": "csrf_token"}): if csrf_token_input := form.find("input", {"name": "csrf_token"}):
data["csrf_token"] = csrf_token_input["value"] data["csrf_token"] = csrf_token_input["value"] # type: ignore
return await _post(session, url, data) return await _post(session, url, data)
@staticmethod @staticmethod

View File

@@ -3,7 +3,7 @@ import sys
import traceback import traceback
from collections.abc import AsyncIterator, Iterator from collections.abc import AsyncIterator, Iterator
from contextlib import AbstractContextManager, asynccontextmanager, contextmanager from contextlib import AbstractContextManager, asynccontextmanager, contextmanager
from typing import Optional from typing import Any, Optional
from rich.console import Console, Group from rich.console import Console, Group
from rich.live import Live from rich.live import Live
@@ -122,7 +122,7 @@ class Log:
for line in self._lines: for line in self._lines:
self.print(line) self.print(line)
def print(self, text: str) -> None: def print(self, text: Any) -> None:
""" """
Print a normal message. Allows markup. Print a normal message. Allows markup.
""" """