diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index c14f6fb..02a9269 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple
+from typing import Optional, Tuple, cast
import keyring
@@ -13,7 +13,7 @@ class KeyringAuthSection(AuthSection):
return self.s.get("username")
def keyring_name(self) -> str:
- return self.s.get("keyring_name", fallback=NAME)
+ return cast(str, self.s.get("keyring_name", fallback=NAME))
class KeyringAuthenticator(Authenticator):
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 2cc97e1..1c4631c 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -3,7 +3,7 @@ import http.cookies
import ssl
from datetime import datetime
from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
import aiohttp
import certifi
@@ -187,12 +187,12 @@ class HttpCrawler(Crawler):
if level == 0 or (level == 1 and drop_h1):
return PurePath()
- level_heading = tag.find_previous(name=f"h{level}")
+ level_heading = cast(Optional[Tag], tag.find_previous(name=f"h{level}"))
if level_heading is None:
return find_associated_headings(tag, level - 1)
- folder_name = level_heading.getText().strip()
+ folder_name = level_heading.get_text().strip()
return find_associated_headings(level_heading, level - 1) / folder_name
# start at level
because paragraph-level headings are usually too granular for folder names
@@ -231,6 +231,7 @@ class HttpCrawler(Crawler):
etag_header = resp.headers.get("ETag")
last_modified_header = resp.headers.get("Last-Modified")
+ last_modified = None
if last_modified_header:
try:
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index b206461..0a72199 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -1,5 +1,5 @@
from enum import Enum
-from typing import Optional
+from typing import Optional, cast
import bs4
@@ -139,13 +139,13 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
"""
if prev and body.select_one(".ilc_page_lnav_LeftNavigation"):
- text = body.select_one(".ilc_page_lnav_LeftNavigation").getText().strip()
+ text = cast(bs4.Tag, body.select_one(".ilc_page_lnav_LeftNavigation")).get_text().strip()
left = f'{text}'
else:
left = ""
if next and body.select_one(".ilc_page_rnav_RightNavigation"):
- text = body.select_one(".ilc_page_rnav_RightNavigation").getText().strip()
+ text = cast(bs4.Tag, body.select_one(".ilc_page_rnav_RightNavigation")).get_text().strip()
right = f'{text}'
else:
right = ""
@@ -160,8 +160,8 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
"{{left}}", left).replace("{{right}}", right).encode())
)
- body = body.prettify()
- return _learning_module_template.replace("{{body}}", body).replace("{{name}}", name)
+ body_str = cast(str, body.prettify())
+ return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
class Links(Enum):
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index e82906f..fb35bc0 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -1,3 +1,5 @@
+from typing import cast
+
from bs4 import BeautifulSoup, Comment, Tag
_STYLE_TAG_CONTENT = """
@@ -70,18 +72,18 @@ def insert_base_markup(soup: BeautifulSoup) -> BeautifulSoup:
def clean(soup: BeautifulSoup) -> BeautifulSoup:
- for block in soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES):
+ for block in cast(list[Tag], soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES)):
block.name = "article"
- for block in soup.find_all("h3"):
+ for block in cast(list[Tag], soup.find_all("h3")):
block.name = "div"
- for block in soup.find_all("h1"):
+ for block in cast(list[Tag], soup.find_all("h1")):
block.name = "h3"
- for block in soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap"):
+ for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")):
block.name = "h3"
- block["class"] += ["accordion-head"]
+ block["class"] += ["accordion-head"] # type: ignore
for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
children = list(dummy.children)
@@ -97,7 +99,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
if figure := video.find_parent("figure"):
figure.decompose()
- for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
+ for hrule_imposter in cast(list[Tag], soup.find_all(class_="ilc_section_Separator")):
hrule_imposter.insert(0, soup.new_tag("hr"))
return soup
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 2fc399d..557150c 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -257,6 +257,7 @@ instance's greatest bottleneck.
async with cl:
next_stage_url: Optional[str] = url
current_parent = current_element
+ page = None
while next_stage_url:
soup = await self._get_page(next_stage_url)
@@ -278,6 +279,7 @@ instance's greatest bottleneck.
else:
next_stage_url = None
+ page = cast(IliasPage, page)
elements.extend(page.get_child_elements())
if description_string := page.get_description():
description.append(description_string)
@@ -461,10 +463,10 @@ instance's greatest bottleneck.
if not dl:
return
- async with dl as (bar, sink):
+ async with dl as (_bar, sink):
description = clean(insert_base_markup(description))
- description = await self.internalize_images(description)
- sink.file.write(description.prettify().encode("utf-8"))
+ description_tag = await self.internalize_images(description)
+ sink.file.write(cast(str, description_tag.prettify()).encode("utf-8"))
sink.done()
@anoncritical
@@ -483,7 +485,7 @@ instance's greatest bottleneck.
async with self.session.get(export_url, allow_redirects=False) as resp:
# No redirect means we were authenticated
if hdrs.LOCATION not in resp.headers:
- return soupify(await resp.read()).select_one("a").get("href").strip()
+ return soupify(await resp.read()).select_one("a").get("href").strip() # type: ignore
# We are either unauthenticated or the link is not active
new_url = resp.headers[hdrs.LOCATION].lower()
if "baseclass=illinkresourcehandlergui" in new_url and "cmd=infoscreen" in new_url:
@@ -707,6 +709,8 @@ instance's greatest bottleneck.
async with cl:
next_stage_url = element.url
+ page = None
+
while next_stage_url:
log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
log.explain(f"URL: {next_stage_url}")
@@ -719,7 +723,7 @@ instance's greatest bottleneck.
else:
break
- download_data = page.get_download_forum_data()
+ download_data = cast(IliasPage, page).get_download_forum_data()
if not download_data:
raise CrawlWarning("Failed to extract forum data")
if download_data.empty:
@@ -751,8 +755,8 @@ instance's greatest bottleneck.
async with maybe_dl as (bar, sink):
content = "\n"
- content += element.title_tag.prettify()
- content += element.content_tag.prettify()
+ content += cast(str, element.title_tag.prettify())
+ content += cast(str, element.content_tag.prettify())
sink.file.write(content.encode("utf-8"))
sink.done()
@@ -877,15 +881,15 @@ instance's greatest bottleneck.
continue
if elem.name == "img":
if src := elem.attrs.get("src", None):
- url = urljoin(self._base_url, src)
+ url = urljoin(self._base_url, cast(str, src))
if not url.startswith(self._base_url):
continue
log.explain(f"Internalizing {url!r}")
img = await self._get_authenticated(url)
elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
- if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
+ if elem.name == "iframe" and cast(str, elem.attrs.get("src", "")).startswith("//"):
# For unknown reasons the protocol seems to be stripped.
- elem.attrs["src"] = "https:" + elem.attrs["src"]
+ elem.attrs["src"] = "https:" + cast(str, elem.attrs["src"])
return tag
def _ensure_not_seen(self, element: IliasPageElement, parent_path: PurePath) -> None:
@@ -979,11 +983,11 @@ instance's greatest bottleneck.
async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
login_page = soupify(await request.read())
- login_form = login_page.find("form", attrs={"name": "formlogin"})
+ login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "formlogin"}))
if login_form is None:
raise CrawlError("Could not find the login form! Specified client id might be invalid.")
- login_url = login_form.attrs.get("action")
+ login_url = cast(Optional[str], login_form.attrs.get("action"))
if login_url is None:
raise CrawlError("Could not find the action URL in the login form!")
@@ -1004,14 +1008,14 @@ instance's greatest bottleneck.
@staticmethod
def _is_logged_in(soup: BeautifulSoup) -> bool:
# Normal ILIAS pages
- mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+ mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
if mainbar is not None:
- login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+ login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
shib_login = soup.find(id="button_shib_login")
return not login_button and not shib_login
# Personal Desktop
- if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+ if soup.find("a", attrs={"href": lambda x: x is not None and "block_type=pditems" in x}):
return True
# Video listing embeds do not have complete ILIAS html. Try to match them by
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 57c81e5..ee61cab 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -3,7 +3,7 @@ import re
from dataclasses import dataclass
from datetime import date, datetime, timedelta
from enum import Enum
-from typing import Dict, List, Optional, Union, cast
+from typing import Dict, Optional, Union, cast
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup, Tag
@@ -117,7 +117,7 @@ class IliasPageElement:
@dataclass
class IliasDownloadForumData:
url: str
- form_data: Dict[str, Union[str, List[str]]]
+ form_data: Dict[str, Union[str, list[str]]]
empty: bool
@@ -151,7 +151,7 @@ class IliasPage:
return "goto.php?target=root_" in permalink
return False
- def get_child_elements(self) -> List[IliasPageElement]:
+ def get_child_elements(self) -> list[IliasPageElement]:
"""
Return all child page elements you can find here.
"""
@@ -177,10 +177,10 @@ class IliasPage:
return self._find_normal_entries()
def get_info_tab(self) -> Optional[IliasPageElement]:
- tab: Optional[Tag] = self._soup.find(
+ tab: Optional[Tag] = cast(Optional[Tag], self._soup.find(
name="a",
- attrs={"href": lambda x: x and "cmdClass=ilinfoscreengui" in x}
- )
+ attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
+ ))
if tab is not None:
return IliasPageElement.create_new(
IliasElementType.INFO_TAB,
@@ -193,7 +193,7 @@ class IliasPage:
def is_interesting_class(name: str) -> bool:
return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
- paragraphs: List[Tag] = self._soup.findAll(class_=is_interesting_class)
+ paragraphs: list[Tag] = cast(list[Tag], self._soup.find_all(class_=is_interesting_class))
if not paragraphs:
return None
@@ -217,8 +217,8 @@ class IliasPage:
def get_learning_module_data(self) -> Optional[IliasLearningModulePage]:
if not self._is_learning_module_page():
return None
- content = self._soup.select_one("#ilLMPageContent")
- title = self._soup.select_one(".ilc_page_title_PageTitle").getText().strip()
+ content = cast(Tag, self._soup.select_one("#ilLMPageContent"))
+ title = cast(Tag, self._soup.select_one(".ilc_page_title_PageTitle")).get_text().strip()
return IliasLearningModulePage(
title=title,
content=content,
@@ -243,15 +243,18 @@ class IliasPage:
return None
def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
- form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
+ form = cast(Optional[Tag], self._soup.find(
+ "form",
+ attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
+ ))
if not form:
return None
- post_url = self._abs_url_from_relative(form["action"])
+ post_url = self._abs_url_from_relative(cast(str, form["action"]))
- thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
+ thread_ids = [f["value"] for f in cast(list[Tag], form.find_all(attrs={"name": "thread_ids[]"}))]
- form_data: Dict[str, Union[str, List[str]]] = {
- "thread_ids[]": thread_ids,
+ form_data: Dict[str, Union[str, list[str]]] = {
+ "thread_ids[]": cast(list[str], thread_ids),
"selected_cmd2": "html",
"select_cmd2": "Ausführen",
"selected_cmd": "",
@@ -285,7 +288,7 @@ class IliasPage:
def _is_forum_page(self) -> bool:
read_more_btn = self._soup.find(
"button",
- attrs={"onclick": lambda x: x and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
+ attrs={"onclick": lambda x: x is not None and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
)
return read_more_btn is not None
@@ -297,7 +300,7 @@ class IliasPage:
return True
# Raw listing without ILIAS fluff
- video_element_table: Tag = self._soup.find(
+ video_element_table = self._soup.find(
name="table", id=re.compile(r"tbl_xoct_.+")
)
return video_element_table is not None
@@ -305,8 +308,8 @@ class IliasPage:
def _is_ilias_opencast_embedding(self) -> bool:
# ILIAS fluff around the real opencast html
if self._soup.find(id="headerimage"):
- element: Tag = self._soup.find(id="headerimage")
- if "opencast" in element.attrs["src"].lower():
+ element: Tag = cast(Tag, self._soup.find(id="headerimage"))
+ if "opencast" in cast(str, element.attrs["src"]).lower():
return True
return False
@@ -317,8 +320,8 @@ class IliasPage:
# We have no suitable parent - let's guesss
if self._soup.find(id="headerimage"):
- element: Tag = self._soup.find(id="headerimage")
- if "exc" in element.attrs["src"].lower():
+ element: Tag = cast(Tag, self._soup.find(id="headerimage"))
+ if "exc" in cast(str, element.attrs["src"]).lower():
return True
return False
@@ -340,10 +343,10 @@ class IliasPage:
return self._uncollapse_future_meetings_url() is not None
def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
- element = self._soup.find(
+ element = cast(Optional[Tag], self._soup.find(
"a",
- attrs={"href": lambda x: x and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
- )
+ attrs={"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
+ ))
if not element:
return None
link = self._abs_url_from_link(element)
@@ -360,24 +363,24 @@ class IliasPage:
return "baseClass=ilmembershipoverviewgui" in self._page_url
def _select_content_page_url(self) -> Optional[IliasPageElement]:
- tab = self._soup.find(
+ tab = cast(Optional[Tag], self._soup.find(
id="tab_view_content",
attrs={"class": lambda x: x is not None and "active" not in x}
- )
+ ))
# Already selected (or not found)
if not tab:
return None
- link = tab.find("a")
+ link = cast(Optional[Tag], tab.find("a"))
if link:
- link = self._abs_url_from_link(link)
- return IliasPageElement.create_new(IliasElementType.FOLDER, link, "select content page")
+ link_str = self._abs_url_from_link(link)
+ return IliasPageElement.create_new(IliasElementType.FOLDER, link_str, "select content page")
_unexpected_html_warning()
log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
log.warn_contd("PFERD might not find content on the course's main page.")
return None
- def _player_to_video(self) -> List[IliasPageElement]:
+ def _player_to_video(self) -> list[IliasPageElement]:
# Fetch the actual video page. This is a small wrapper page initializing a javscript
# player. Sadly we can not execute that JS. The actual video stream url is nowhere
# on the page, but defined in a JS object inside a script tag, passed to the player
@@ -414,10 +417,10 @@ class IliasPage:
return items
def _get_show_max_forum_entries_per_page_url(self) -> Optional[IliasPageElement]:
- correct_link = self._soup.find(
+ correct_link = cast(Optional[Tag], self._soup.find(
"a",
- attrs={"href": lambda x: x and "trows=800" in x and "cmd=showThreads" in x}
- )
+ attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
+ ))
if not correct_link:
return None
@@ -426,15 +429,15 @@ class IliasPage:
return IliasPageElement.create_new(IliasElementType.FORUM, link, "show all forum threads")
- def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
- items: List[IliasPageElement] = []
+ def _find_personal_desktop_entries(self) -> list[IliasPageElement]:
+ items: list[IliasPageElement] = []
- titles: List[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
+ titles: list[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
for title in titles:
- link = title.find("a")
+ link = cast(Optional[Tag], title.find("a"))
if not link:
- log.explain(f"Skipping offline item: {title.getText().strip()!r}")
+ log.explain(f"Skipping offline item: {title.get_text().strip()!r}")
continue
name = _sanitize_path_name(link.text.strip())
@@ -460,13 +463,13 @@ class IliasPage:
return items
- def _find_copa_entries(self) -> List[IliasPageElement]:
- items: List[IliasPageElement] = []
- links: List[Tag] = self._soup.findAll(class_="ilc_flist_a_FileListItemLink")
+ def _find_copa_entries(self) -> list[IliasPageElement]:
+ items: list[IliasPageElement] = []
+ links: list[Tag] = cast(list[Tag], self._soup.find_all(class_="ilc_flist_a_FileListItemLink"))
for link in links:
url = self._abs_url_from_link(link)
- name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.getText()).strip().replace("\t", "")
+ name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.get_text()).strip().replace("\t", "")
name = _sanitize_path_name(name)
if "file_id" not in url:
@@ -478,9 +481,9 @@ class IliasPage:
return items
- def _find_info_tab_entries(self) -> List[IliasPageElement]:
+ def _find_info_tab_entries(self) -> list[IliasPageElement]:
items = []
- links: List[Tag] = self._soup.select("a.il_ContainerItemCommand")
+ links: list[Tag] = self._soup.select("a.il_ContainerItemCommand")
for link in links:
if "cmdClass=ilobjcoursegui" not in link["href"]:
@@ -490,12 +493,12 @@ class IliasPage:
items.append(IliasPageElement.create_new(
IliasElementType.FILE,
self._abs_url_from_link(link),
- _sanitize_path_name(link.getText())
+ _sanitize_path_name(link.get_text())
))
return items
- def _find_opencast_video_entries(self) -> List[IliasPageElement]:
+ def _find_opencast_video_entries(self) -> list[IliasPageElement]:
# ILIAS has three stages for video pages
# 1. The initial dummy page without any videos. This page contains the link to the listing
# 2. The video listing which might be paginated
@@ -503,14 +506,14 @@ class IliasPage:
#
# We need to figure out where we are.
- video_element_table: Tag = self._soup.find(
+ video_element_table = cast(Optional[Tag], self._soup.find(
name="table", id=re.compile(r"tbl_xoct_.+")
- )
+ ))
if video_element_table is None:
# We are in stage 1
# The page is actually emtpy but contains the link to stage 2
- content_link: Tag = self._soup.select_one("#tab_series a")
+ content_link: Tag = cast(Tag, self._soup.select_one("#tab_series a"))
url: str = self._abs_url_from_link(content_link)
query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
url = url_set_query_params(url, query_params)
@@ -527,14 +530,14 @@ class IliasPage:
return self._find_opencast_video_entries_no_paging()
- def _find_opencast_video_entries_paginated(self) -> List[IliasPageElement]:
- table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
+ def _find_opencast_video_entries_paginated(self) -> list[IliasPageElement]:
+ table_element = cast(Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+")))
if table_element is None:
log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
return self._find_opencast_video_entries_no_paging()
- id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
+ id_match = re.match(r"tbl_xoct_(.+)", cast(str, table_element.attrs["id"]))
if id_match is None:
log.warn("Couldn't increase elements per page (table id not found). I might miss elements.")
return self._find_opencast_video_entries_no_paging()
@@ -548,16 +551,16 @@ class IliasPage:
log.explain("Disabled pagination, retrying folder as a new entry")
return [IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
- def _find_opencast_video_entries_no_paging(self) -> List[IliasPageElement]:
+ def _find_opencast_video_entries_no_paging(self) -> list[IliasPageElement]:
"""
Crawls the "second stage" video page. This page contains the actual video urls.
"""
# Video start links are marked with an "Abspielen" link
- video_links: List[Tag] = self._soup.findAll(
+ video_links = cast(list[Tag], self._soup.find_all(
name="a", text=re.compile(r"\s*(Abspielen|Play)\s*")
- )
+ ))
- results: List[IliasPageElement] = []
+ results: list[IliasPageElement] = []
for link in video_links:
results.append(self._listed_opencast_video_to_element(link))
@@ -569,12 +572,12 @@ class IliasPage:
# 6th or 7th child (1 indexed) is the modification time string. Try to find it
# by parsing backwards from the end and finding something that looks like a date
modification_time = None
- row: Tag = link.parent.parent.parent
+ row: Tag = link.parent.parent.parent # type: ignore
column_count = len(row.select("td.std"))
for index in range(column_count, 0, -1):
- modification_string = link.parent.parent.parent.select_one(
+ modification_string = link.parent.parent.parent.select_one( # type: ignore
f"td.std:nth-child({index})"
- ).getText().strip()
+ ).get_text().strip()
if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
break
@@ -583,7 +586,7 @@ class IliasPage:
log.warn(f"Could not determine upload time for {link}")
modification_time = datetime.now()
- title = link.parent.parent.parent.select_one("td.std:nth-child(3)").getText().strip()
+ title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip() # type: ignore
title += ".mp4"
video_name: str = _sanitize_path_name(title)
@@ -595,33 +598,34 @@ class IliasPage:
IliasElementType.OPENCAST_VIDEO_PLAYER, video_url, video_name, modification_time
)
- def _find_exercise_entries(self) -> List[IliasPageElement]:
+ def _find_exercise_entries(self) -> list[IliasPageElement]:
if self._soup.find(id="tab_submission"):
log.explain("Found submission tab. This is an exercise detail page")
return self._find_exercise_entries_detail_page()
log.explain("Found no submission tab. This is an exercise root page")
return self._find_exercise_entries_root_page()
- def _find_exercise_entries_detail_page(self) -> List[IliasPageElement]:
- results: List[IliasPageElement] = []
+ def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
+ results: list[IliasPageElement] = []
# Find all download links in the container (this will contain all the files)
- download_links: List[Tag] = self._soup.findAll(
+ download_links = cast(list[Tag], self._soup.find_all(
name="a",
# download links contain the given command class
- attrs={"href": lambda x: x and "cmd=download" in x},
+ attrs={"href": lambda x: x is not None and "cmd=download" in x},
text="Download"
- )
+ ))
for link in download_links:
- parent_row: Tag = link.findParent("tr")
- children: List[Tag] = parent_row.findChildren("td")
+ parent_row: Tag = cast(Tag, link.find_parent("tr"))
+ children = cast(list[Tag], parent_row.find_all("td"))
- name = _sanitize_path_name(children[1].getText().strip())
+ name = _sanitize_path_name(children[1].get_text().strip())
log.explain(f"Found exercise detail entry {name!r}")
+ date = None
for child in reversed(children):
- date = demangle_date(child.getText().strip(), fail_silently=True)
+ date = demangle_date(child.get_text().strip(), fail_silently=True)
if date is not None:
break
if date is None:
@@ -636,30 +640,33 @@ class IliasPage:
return results
- def _find_exercise_entries_root_page(self) -> List[IliasPageElement]:
- results: List[IliasPageElement] = []
+ def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
+ results: list[IliasPageElement] = []
# Each assignment is in an accordion container
- assignment_containers: List[Tag] = self._soup.select(".il_VAccordionInnerContainer")
+ assignment_containers: list[Tag] = self._soup.select(".il_VAccordionInnerContainer")
for container in assignment_containers:
# Fetch the container name out of the header to use it in the path
- container_name = container.select_one(".ilAssignmentHeader").getText().strip()
+ container_name = cast(Tag, container.select_one(".ilAssignmentHeader")).get_text().strip()
log.explain(f"Found exercise container {container_name!r}")
# Find all download links in the container (this will contain all the files)
- files: List[Tag] = container.findAll(
+ files = cast(list[Tag], container.find_all(
name="a",
# download links contain the given command class
- attrs={"href": lambda x: x and "cmdClass=ilexsubmissiongui" in x},
+ attrs={"href": lambda x: x is not None and "cmdClass=ilexsubmissiongui" in x},
text="Download"
- )
+ ))
# Grab each file as you now have the link
for file_link in files:
# Two divs, side by side. Left is the name, right is the link ==> get left
# sibling
- file_name = file_link.parent.findPrevious(name="div").getText().strip()
+ file_name = cast(
+ Tag,
+ cast(Tag, file_link.parent).find_previous(name="div")
+ ).get_text().strip()
url = self._abs_url_from_link(file_link)
log.explain(f"Found exercise entry {file_name!r}")
@@ -672,21 +679,21 @@ class IliasPage:
))
# Find all links to file listings (e.g. "Submitted Files" for groups)
- file_listings: List[Tag] = container.findAll(
+ file_listings = cast(list[Tag], container.find_all(
name="a",
# download links contain the given command class
- attrs={"href": lambda x: x and "cmdclass=ilexsubmissionfilegui" in x.lower()}
- )
+ attrs={"href": lambda x: x is not None and "cmdclass=ilexsubmissionfilegui" in x.lower()}
+ ))
# Add each listing as a new
for listing in file_listings:
- parent_container: Tag = listing.findParent(
- "div", attrs={"class": lambda x: x and "form-group" in x}
- )
- label_container: Tag = parent_container.find(
- attrs={"class": lambda x: x and "control-label" in x}
- )
- file_name = label_container.getText().strip()
+ parent_container = cast(Tag, listing.find_parent(
+ "div", attrs={"class": lambda x: x is not None and "form-group" in x}
+ ))
+ label_container = cast(Tag, parent_container.find(
+ attrs={"class": lambda x: x is not None and "control-label" in x}
+ ))
+ file_name = label_container.get_text().strip()
url = self._abs_url_from_link(listing)
log.explain(f"Found exercise detail {file_name!r} at {url}")
results.append(IliasPageElement.create_new(
@@ -699,10 +706,10 @@ class IliasPage:
return results
- def _find_normal_entries(self) -> List[IliasPageElement]:
- result: List[IliasPageElement] = []
+ def _find_normal_entries(self) -> list[IliasPageElement]:
+ result: list[IliasPageElement] = []
- links: List[Tag] = []
+ links: list[Tag] = []
# Fetch all links and throw them to the general interpreter
if self._is_course_overview_page():
log.explain("Page is a course overview page, adjusting link selector")
@@ -716,9 +723,9 @@ class IliasPage:
parents = [_sanitize_path_name(x) for x in self._find_upwards_folder_hierarchy(link)]
if parents:
- element_name = "/".join(parents) + "/" + _sanitize_path_name(link.getText())
+ element_name = "/".join(parents) + "/" + _sanitize_path_name(link.get_text())
else:
- element_name = _sanitize_path_name(link.getText())
+ element_name = _sanitize_path_name(link.get_text())
element_type = self._find_type_from_link(element_name, link, abs_url)
description = self._find_link_description(link)
@@ -750,17 +757,17 @@ class IliasPage:
return result
- def _find_mediacast_videos(self) -> List[IliasPageElement]:
- videos: List[IliasPageElement] = []
+ def _find_mediacast_videos(self) -> list[IliasPageElement]:
+ videos: list[IliasPageElement] = []
- for elem in cast(List[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
+ for elem in cast(list[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
element_name = _sanitize_path_name(
- elem.select_one(".ilPlayerPreviewDescription").getText().strip()
+ cast(Tag, elem.select_one(".ilPlayerPreviewDescription")).get_text().strip()
)
if not element_name.endswith(".mp4"):
# just to make sure it has some kinda-alrightish ending
element_name = element_name + ".mp4"
- video_element = elem.find(name="video")
+ video_element = cast(Optional[Tag], elem.find(name="video"))
if not video_element:
_unexpected_html_warning()
log.warn_contd(f"No