Fix ruff errors

This commit is contained in:
I-Al-Istannen
2025-10-19 15:25:40 +02:00
parent 2cf0e060ed
commit 6e563134b2
26 changed files with 194 additions and 209 deletions

View File

@@ -1,5 +1,5 @@
from collections.abc import Callable
from configparser import SectionProxy
from typing import Callable, Dict
from ..auth import Authenticator
from ..config import Config
@@ -13,12 +13,12 @@ CrawlerConstructor = Callable[
str, # Name (without the "crawl:" prefix)
SectionProxy, # Crawler's section of global config
Config, # Global config
Dict[str, Authenticator], # Loaded authenticators by name
dict[str, Authenticator], # Loaded authenticators by name
],
Crawler,
]
CRAWLERS: Dict[str, CrawlerConstructor] = {
CRAWLERS: dict[str, CrawlerConstructor] = {
"local": lambda n, s, c, a: LocalCrawler(n, LocalCrawlerSection(s), c),
"ilias-web": lambda n, s, c, a: IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
"kit-ilias-web": lambda n, s, c, a: KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),

View File

@@ -1,10 +1,10 @@
import asyncio
import os
from abc import ABC, abstractmethod
from collections.abc import Awaitable, Coroutine
from collections.abc import Awaitable, Callable, Coroutine, Sequence
from datetime import datetime
from pathlib import Path, PurePath
from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, TypeVar
from typing import Any, Optional, TypeVar
from ..auth import Authenticator
from ..config import Config, Section
@@ -116,7 +116,7 @@ class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
return bar
class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
class DownloadToken(ReusableAsyncContextManager[tuple[ProgressBar, FileSink]]):
def __init__(self, limiter: Limiter, fs_token: FileSinkToken, path: PurePath):
super().__init__()
@@ -128,7 +128,7 @@ class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
def path(self) -> PurePath:
return self._path
async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]:
async def _on_aenter(self) -> tuple[ProgressBar, FileSink]:
await self._stack.enter_async_context(self._limiter.limit_download())
sink = await self._stack.enter_async_context(self._fs_token)
# The "Downloaded ..." message is printed in the output dir, not here
@@ -205,7 +205,7 @@ class CrawlerSection(Section):
on_windows = os.name == "nt"
return self.s.getboolean("windows_paths", fallback=on_windows)
def auth(self, authenticators: Dict[str, Authenticator]) -> Authenticator:
def auth(self, authenticators: dict[str, Authenticator]) -> Authenticator:
value = self.s.get("auth")
if value is None:
self.missing_value("auth")
@@ -262,7 +262,7 @@ class Crawler(ABC):
return self._output_dir
@staticmethod
async def gather(awaitables: Sequence[Awaitable[Any]]) -> List[Any]:
async def gather(awaitables: Sequence[Awaitable[Any]]) -> list[Any]:
"""
Similar to asyncio.gather. However, in the case of an exception, all
still running tasks are cancelled and the exception is rethrown.
@@ -394,7 +394,7 @@ class Crawler(ABC):
log.warn("Couldn't find or load old report")
return
seen: Set[PurePath] = set()
seen: set[PurePath] = set()
for known in sorted(self.prev_report.found_paths):
looking_at = list(reversed(known.parents)) + [known]
for path in looking_at:

View File

@@ -3,7 +3,7 @@ import http.cookies
import ssl
from datetime import datetime
from pathlib import Path, PurePath
from typing import Any, Dict, List, Optional, Tuple, cast
from typing import Any, Optional, cast
import aiohttp
import certifi
@@ -43,7 +43,7 @@ class HttpCrawler(Crawler):
self._http_timeout = section.http_timeout()
self._cookie_jar_path = self._output_dir.resolve(self.COOKIE_FILE)
self._shared_cookie_jar_paths: Optional[List[Path]] = None
self._shared_cookie_jar_paths: Optional[list[Path]] = None
self._shared_auth = shared_auth
self._output_dir.register_reserved(self.COOKIE_FILE)
@@ -98,7 +98,7 @@ class HttpCrawler(Crawler):
"""
raise RuntimeError("_authenticate() was called but crawler doesn't provide an implementation")
def share_cookies(self, shared: Dict[Authenticator, List[Path]]) -> None:
def share_cookies(self, shared: dict[Authenticator, list[Path]]) -> None:
if not self._shared_auth:
return
@@ -219,7 +219,7 @@ class HttpCrawler(Crawler):
etags[str(path)] = etag
self._output_dir.report.add_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY, etags)
async def _request_resource_version(self, resource_url: str) -> Tuple[Optional[str], Optional[datetime]]:
async def _request_resource_version(self, resource_url: str) -> tuple[Optional[str], Optional[datetime]]:
"""
Requests the ETag and Last-Modified headers of a resource via a HEAD request.
If no entity tag / modification date can be obtained, the according value will be None.

View File

@@ -1,5 +1,6 @@
import asyncio
from typing import Any, Callable, Optional
from collections.abc import Callable
from typing import Any, Optional
import aiohttp
@@ -15,9 +16,9 @@ def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Calla
try:
return await f(*args, **kwargs)
except aiohttp.ContentTypeError: # invalid content type
raise CrawlWarning("ILIAS returned an invalid content type")
raise CrawlWarning("ILIAS returned an invalid content type") from None
except aiohttp.TooManyRedirects:
raise CrawlWarning("Got stuck in a redirect loop")
raise CrawlWarning("Got stuck in a redirect loop") from None
except aiohttp.ClientPayloadError as e: # encoding or not enough bytes
last_exception = e
except aiohttp.ClientConnectionError as e: # e.g. timeout, disconnect, resolve failed, etc.

View File

@@ -297,9 +297,7 @@ class Links(Enum):
raise ValueError("Missing switch case")
def collection_as_one(self) -> bool:
if self == Links.FANCY:
return True
return False
return self == Links.FANCY
def extension(self) -> Optional[str]:
if self == Links.FANCY:
@@ -355,4 +353,4 @@ class Links(Enum):
return Links(string)
except ValueError:
options = [f"'{option.value}'" for option in Links]
raise ValueError(f"must be one of {', '.join(options)}")
raise ValueError(f"must be one of {', '.join(options)}") from None

View File

@@ -4,7 +4,7 @@ import os
import re
from collections.abc import Awaitable, Coroutine
from pathlib import PurePath
from typing import Any, Dict, List, Literal, Optional, Set, Union, cast
from typing import Any, Literal, Optional, cast
from urllib.parse import urljoin
import aiohttp
@@ -33,7 +33,7 @@ from .kit_ilias_html import (
)
from .shibboleth_login import ShibbolethLogin
TargetType = Union[str, int]
TargetType = str | int
class LoginTypeLocal:
@@ -49,7 +49,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
return base_url
def login(self) -> Union[Literal["shibboleth"], LoginTypeLocal]:
def login(self) -> Literal["shibboleth"] | LoginTypeLocal:
login_type = self.s.get("login_type")
if not login_type:
self.missing_value("login_type")
@@ -63,7 +63,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
self.invalid_value("login_type", login_type, "Should be <shibboleth | local>")
def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
def tfa_auth(self, authenticators: dict[str, Authenticator]) -> Optional[Authenticator]:
value: Optional[str] = self.s.get("tfa_auth")
if value is None:
return None
@@ -110,7 +110,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
return self.s.getboolean("forums", fallback=False)
_DIRECTORY_PAGES: Set[IliasElementType] = {
_DIRECTORY_PAGES: set[IliasElementType] = {
IliasElementType.EXERCISE,
IliasElementType.EXERCISE_FILES,
IliasElementType.EXERCISE_OVERVIEW,
@@ -122,7 +122,7 @@ _DIRECTORY_PAGES: Set[IliasElementType] = {
IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
}
_VIDEO_ELEMENTS: Set[IliasElementType] = {
_VIDEO_ELEMENTS: set[IliasElementType] = {
IliasElementType.MEDIACAST_VIDEO,
IliasElementType.MEDIACAST_VIDEO_FOLDER,
IliasElementType.OPENCAST_VIDEO,
@@ -172,7 +172,7 @@ class IliasWebCrawler(HttpCrawler):
name: str,
section: IliasWebCrawlerSection,
config: Config,
authenticators: Dict[str, Authenticator],
authenticators: dict[str, Authenticator],
):
# Setting a main authenticator for cookie sharing
auth = section.auth(authenticators)
@@ -201,7 +201,7 @@ instance's greatest bottleneck.
self._links = section.links()
self._videos = section.videos()
self._forums = section.forums()
self._visited_urls: Dict[str, PurePath] = dict()
self._visited_urls: dict[str, PurePath] = dict()
async def _run(self) -> None:
if isinstance(self._target, int):
@@ -264,9 +264,9 @@ instance's greatest bottleneck.
expected_course_id: Optional[int] = None,
crawl_nested_courses: bool = False,
) -> None:
elements: List[IliasPageElement] = []
elements: list[IliasPageElement] = []
# A list as variable redefinitions are not propagated to outer scopes
description: List[BeautifulSoup] = []
description: list[BeautifulSoup] = []
@_iorepeat(3, "crawling folder")
async def gather_elements() -> None:
@@ -309,7 +309,7 @@ instance's greatest bottleneck.
elements.sort(key=lambda e: e.id())
tasks: List[Awaitable[None]] = []
tasks: list[Awaitable[None]] = []
for element in elements:
if handle := await self._handle_ilias_element(cl.path, element, crawl_nested_courses):
tasks.append(asyncio.create_task(handle))
@@ -340,15 +340,14 @@ instance's greatest bottleneck.
)
return None
if element.type in _VIDEO_ELEMENTS:
if not self._videos:
log.status(
"[bold bright_black]",
"Ignored",
fmt_path(element_path),
"[bright_black](enable with option 'videos')",
)
return None
if element.type in _VIDEO_ELEMENTS and not self._videos:
log.status(
"[bold bright_black]",
"Ignored",
fmt_path(element_path),
"[bright_black](enable with option 'videos')",
)
return None
if element.type == IliasElementType.FILE:
return await self._handle_file(element, element_path)
@@ -522,8 +521,8 @@ instance's greatest bottleneck.
sink.file.write(rendered.encode("utf-8"))
sink.done()
async def _resolve_link_target(self, export_url: str) -> Union[BeautifulSoup, Literal["none"]]:
async def impl() -> Optional[Union[BeautifulSoup, Literal["none"]]]:
async def _resolve_link_target(self, export_url: str) -> BeautifulSoup | Literal["none"]:
async def impl() -> Optional[BeautifulSoup | Literal["none"]]:
async with self.session.get(export_url, allow_redirects=False) as resp:
# No redirect means we were authenticated
if hdrs.LOCATION not in resp.headers:
@@ -658,7 +657,7 @@ instance's greatest bottleneck.
def _previous_contained_opencast_videos(
self, element: IliasPageElement, element_path: PurePath
) -> List[PurePath]:
) -> list[PurePath]:
if not self.prev_report:
return []
custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
@@ -714,7 +713,7 @@ instance's greatest bottleneck.
add_to_report([str(self._transformer.transform(dl.path))])
return
contained_video_paths: List[str] = []
contained_video_paths: list[str] = []
for stream_element in stream_elements:
video_path = dl.path.parent / stream_element.name
@@ -832,7 +831,7 @@ instance's greatest bottleneck.
elements = parse_ilias_forum_export(soupify(export))
tasks: List[Awaitable[None]] = []
tasks: list[Awaitable[None]] = []
for thread in elements:
tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread, element.url)))
@@ -842,7 +841,7 @@ instance's greatest bottleneck.
@anoncritical
@_iorepeat(3, "saving forum thread")
async def _download_forum_thread(
self, parent_path: PurePath, thread: Union[IliasForumThread, IliasPageElement], forum_url: str
self, parent_path: PurePath, thread: IliasForumThread | IliasPageElement, forum_url: str
) -> None:
path = parent_path / (_sanitize_path_name(thread.name) + ".html")
maybe_dl = await self.download(path, mtime=thread.mtime)
@@ -871,7 +870,7 @@ instance's greatest bottleneck.
@_iorepeat(3, "crawling learning module")
@anoncritical
async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
elements: List[IliasLearningModulePage] = []
elements: list[IliasLearningModulePage] = []
async with cl:
log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
@@ -891,7 +890,7 @@ instance's greatest bottleneck.
for index, lm_element in enumerate(elements):
lm_element.title = f"{index:02}_{lm_element.title}"
tasks: List[Awaitable[None]] = []
tasks: list[Awaitable[None]] = []
for index, elem in enumerate(elements):
prev_url = elements[index - 1].title if index > 0 else None
next_url = elements[index + 1].title if index < len(elements) - 1 else None
@@ -906,10 +905,10 @@ instance's greatest bottleneck.
self,
path: PurePath,
start_url: Optional[str],
dir: Union[Literal["left"], Literal["right"]],
dir: Literal["left"] | Literal["right"],
parent_element: IliasPageElement,
) -> List[IliasLearningModulePage]:
elements: List[IliasLearningModulePage] = []
) -> list[IliasLearningModulePage]:
elements: list[IliasLearningModulePage] = []
if not start_url:
return elements
@@ -923,10 +922,7 @@ instance's greatest bottleneck.
page = IliasPage(soup, parent_element)
if next := page.get_learning_module_data():
elements.append(next)
if dir == "left":
next_element_url = next.previous_url
else:
next_element_url = next.next_url
next_element_url = next.previous_url if dir == "left" else next.next_url
counter += 1
return elements
@@ -950,16 +946,10 @@ instance's greatest bottleneck.
if prev:
prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
if prev_p:
prev = cast(str, os.path.relpath(prev_p, my_path.parent))
else:
prev = None
prev = cast(str, os.path.relpath(prev_p, my_path.parent)) if prev_p else None
if next:
next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
if next_p:
next = cast(str, os.path.relpath(next_p, my_path.parent))
else:
next = None
next = cast(str, os.path.relpath(next_p, my_path.parent)) if next_p else None
async with maybe_dl as (bar, sink):
content = element.content
@@ -973,14 +963,13 @@ instance's greatest bottleneck.
"""
log.explain_topic("Internalizing images")
for elem in tag.find_all(recursive=True):
if elem.name == "img":
if src := elem.attrs.get("src", None):
url = urljoin(self._base_url, cast(str, src))
if not url.startswith(self._base_url):
continue
log.explain(f"Internalizing {url!r}")
img = await self._get_authenticated(url)
elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
if elem.name == "img" and (src := elem.attrs.get("src", None)):
url = urljoin(self._base_url, cast(str, src))
if not url.startswith(self._base_url):
continue
log.explain(f"Internalizing {url!r}")
img = await self._get_authenticated(url)
elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
if elem.name == "iframe" and cast(str, elem.attrs.get("src", "")).startswith("//"):
# For unknown reasons the protocol seems to be stripped.
elem.attrs["src"] = "https:" + cast(str, elem.attrs["src"])
@@ -1025,7 +1014,7 @@ instance's greatest bottleneck.
)
return soup
async def _post(self, url: str, data: dict[str, Union[str, List[str]]]) -> bytes:
async def _post(self, url: str, data: dict[str, str | list[str]]) -> bytes:
form_data = aiohttp.FormData()
for key, val in data.items():
form_data.add_field(key, val)

View File

@@ -1,9 +1,10 @@
import json
import re
from collections.abc import Callable
from dataclasses import dataclass
from datetime import date, datetime, timedelta
from enum import Enum
from typing import Callable, Dict, Optional, Union, cast
from typing import Optional, cast
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup, Tag
@@ -13,7 +14,7 @@ from PFERD.crawl.crawler import CrawlWarning
from PFERD.logging import log
from PFERD.utils import url_set_query_params
TargetType = Union[str, int]
TargetType = str | int
class TypeMatcher:
@@ -308,7 +309,7 @@ class IliasPageElement:
"""
# This checks whether we can reach a `:` without passing a `-`
if re.search(r"^[^-]+: ", meeting_name):
if re.search(r"^[^-]+: ", meeting_name): # noqa: SIM108
# Meeting name only contains date: "05. Jan 2000:"
split_delimiter = ":"
else:
@@ -331,7 +332,7 @@ class IliasPageElement:
@dataclass
class IliasDownloadForumData:
url: str
form_data: Dict[str, Union[str, list[str]]]
form_data: dict[str, str | list[str]]
empty: bool
@@ -433,21 +434,20 @@ class IliasPage:
for p in paragraphs:
if p.find_parent(class_=is_interesting_class):
continue
if "ilc_media_cont_MediaContainer" in p["class"]:
if "ilc_media_cont_MediaContainer" in p["class"] and (video := p.select_one("video")):
# We have an embedded video which should be downloaded by _find_mob_videos
if video := p.select_one("video"):
url, title = self._find_mob_video_url_title(video, p)
raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
raw_html += "display: flex; justify-content: center; align-items: center;"
raw_html += ' margin: 0.5rem;">'
if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
if url.startswith("//"):
url = "https:" + url
raw_html += f'<a href="{url}" target="_blank">External Video: {title}</a>'
else:
raw_html += f"Video elided. Filename: '{title}'."
raw_html += "</div>\n"
continue
url, title = self._find_mob_video_url_title(video, p)
raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
raw_html += "display: flex; justify-content: center; align-items: center;"
raw_html += ' margin: 0.5rem;">'
if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
if url.startswith("//"):
url = "https:" + url
raw_html += f'<a href="{url}" target="_blank">External Video: {title}</a>'
else:
raw_html += f"Video elided. Filename: '{title}'."
raw_html += "</div>\n"
continue
# Ignore special listings (like folder groupings)
if "ilc_section_Special" in p["class"]:
@@ -794,7 +794,7 @@ class IliasPage:
is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
if is_paginated and not self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER:
if is_paginated and self._page_type != IliasElementType.OPENCAST_VIDEO_FOLDER:
# We are in stage 2 - try to break pagination
return self._find_opencast_video_entries_paginated()
@@ -1164,6 +1164,9 @@ class IliasPage:
"""
found_titles = []
if None == "hey":
pass
outer_accordion_content: Optional[Tag] = None
parents: list[Tag] = list(tag.parents)
@@ -1302,10 +1305,7 @@ class IliasPage:
),
)
caption_container = caption_parent.find_next_sibling("div")
if caption_container:
description = caption_container.get_text().strip()
else:
description = None
description = caption_container.get_text().strip() if caption_container else None
if not typ:
_unexpected_html_warning()
@@ -1444,9 +1444,7 @@ class IliasPage:
return True
# The individual video player wrapper page has nothing of the above.
# Match it by its playerContainer.
if soup.select_one("#playerContainer") is not None:
return True
return False
return soup.select_one("#playerContainer") is not None
@staticmethod
def _find_date_in_text(text: str) -> Optional[datetime]:
@@ -1505,11 +1503,11 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
# Normalize whitespace because users
date_str = re.sub(r"\s+", " ", date_str)
date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()), date_str, re.I)
date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, flags=re.I)
date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, flags=re.I)
date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()), date_str, flags=re.I)
date_str = date_str.strip()
for german, english in zip(german_months, english_months):
for german, english in zip(german_months, english_months, strict=True):
date_str = date_str.replace(german, english)
# Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"
date_str = date_str.replace(english + ".", english)

View File

@@ -1,4 +1,4 @@
from typing import Dict, Literal
from typing import Literal
from ...auth import Authenticator
from ...config import Config
@@ -26,7 +26,7 @@ class KitIliasWebCrawler(IliasWebCrawler):
name: str,
section: KitIliasWebCrawlerSection,
config: Config,
authenticators: Dict[str, Authenticator],
authenticators: dict[str, Authenticator],
):
super().__init__(name, section, config, authenticators)

View File

@@ -1,9 +1,11 @@
import os
import re
from collections.abc import Awaitable, Generator, Iterable
from dataclasses import dataclass
from datetime import datetime
from pathlib import PurePath
from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union, cast
from re import Pattern
from typing import Any, Optional, Union, cast
from urllib.parse import urljoin
from bs4 import BeautifulSoup, Tag
@@ -44,7 +46,7 @@ class KitIpdFile:
@dataclass
class KitIpdFolder:
name: str
entries: List[Union[KitIpdFile, "KitIpdFolder"]]
entries: list[Union[KitIpdFile, "KitIpdFolder"]]
def explain(self) -> None:
log.explain_topic(f"Folder {self.name!r}")
@@ -68,7 +70,7 @@ class KitIpdCrawler(HttpCrawler):
if not maybe_cl:
return
tasks: List[Awaitable[None]] = []
tasks: list[Awaitable[None]] = []
async with maybe_cl:
for item in await self._fetch_items():
@@ -120,9 +122,9 @@ class KitIpdCrawler(HttpCrawler):
async with maybe_dl as (bar, sink):
await self._stream_from_url(file.url, element_path, sink, bar)
async def _fetch_items(self) -> Iterable[Union[KitIpdFile, KitIpdFolder]]:
async def _fetch_items(self) -> Iterable[KitIpdFile | KitIpdFolder]:
page, url = await self.get_page()
elements: List[Tag] = self._find_file_links(page)
elements: list[Tag] = self._find_file_links(page)
# do not add unnecessary nesting for a single <h1> heading
drop_h1: bool = len(page.find_all(name="h1")) <= 1
@@ -151,7 +153,7 @@ class KitIpdCrawler(HttpCrawler):
name = os.path.basename(url)
return KitIpdFile(name, url)
def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> list[Tag]:
def _find_file_links(self, tag: Tag | BeautifulSoup) -> list[Tag]:
return cast(list[Tag], tag.find_all(name="a", attrs={"href": self._file_regex}))
def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
@@ -172,7 +174,7 @@ class KitIpdCrawler(HttpCrawler):
self._add_etag_to_report(path, resp.headers.get("ETag"))
async def get_page(self) -> Tuple[BeautifulSoup, str]:
async def get_page(self) -> tuple[BeautifulSoup, str]:
async with self.session.get(self._url) as request:
# The web page for Algorithmen für Routenplanung contains some
# weird comments that beautifulsoup doesn't parse correctly. This