From 596b6a7688a5101ec6e44a13f602c4673eb5e8e0 Mon Sep 17 00:00:00 2001
From: PinieP <59698589+PinieP@users.noreply.github.com>
Date: Tue, 5 Nov 2024 18:30:34 +0100
Subject: [PATCH] Add support for non-KIT shibboleth login (#98)

Co-authored-by: Mr-Pine <git@mr-pine.de>
Co-authored-by: I-Al-Istannen <I-Al-Istannen@users.noreply.github.com>
---
 CHANGELOG.md                               |   1 +
 CONFIG.md                                  |  21 ++-
 LICENSE                                    |   2 +-
 PFERD/crawl/http_crawler.py                |   7 +-
 PFERD/crawl/ilias/async_helper.py          |   3 +-
 PFERD/crawl/ilias/ilias_web_crawler.py     |  98 ++++++----
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 210 +--------------------
 PFERD/crawl/ilias/shibboleth_login.py      | 128 +++++++++++++
 8 files changed, 226 insertions(+), 244 deletions(-)
 create mode 100644 PFERD/crawl/ilias/shibboleth_login.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 12cda26..8024bba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Added
 - Support for MOB videos in page descriptions
 - Clickable links in the report to directly open new/modified/not-deleted files
+- Support for non KIT shibboleth login
 
 ### Changed
 - Remove videos from description pages
diff --git a/CONFIG.md b/CONFIG.md
index a52506d..9b79be8 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -163,12 +163,13 @@ out of the box for the corresponding universities:
 
 [ilias-dl]: https://github.com/V3lop5/ilias-downloader/blob/main/configs "ilias-downloader configs"
 
-| University    | `base_url`                           | `client_id`   |
-|---------------|--------------------------------------|---------------|
-| FH Aachen     | https://www.ili.fh-aachen.de         | elearning     |
-| Uni Köln      | https://www.ilias.uni-koeln.de/ilias | uk            |
-| Uni Konstanz  | https://ilias.uni-konstanz.de        | ILIASKONSTANZ |
-| Uni Stuttgart | https://ilias3.uni-stuttgart.de      | Uni_Stuttgart |
+| University    | `base_url`                              | `login_type` | `client_id`   |
+|---------------|-----------------------------------------|--------------|---------------|
+| FH Aachen     | https://www.ili.fh-aachen.de            | local        | elearning     |
+| Uni Köln      | https://www.ilias.uni-koeln.de/ilias    | local        | uk            |
+| Uni Konstanz  | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ |
+| Uni Stuttgart | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart |
+| Uni Tübingen  | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth   |               |
 
 If your university isn't listed, try navigating to your instance's login page.
 Assuming no custom login service is used, the URL will look something like this:
@@ -180,7 +181,11 @@ Assuming no custom login service is used, the URL will look something like this:
 If the values work, feel free to submit a PR and add them to the table above.
 
 - `base_url`: The URL where the ILIAS instance is located. (Required)
-- `client_id`: An ID used for authentication. (Required)
+- `login_type`: How you authenticate. (Required)
+    - `local`: Use `client_id` for authentication.
+    - `shibboleth`: Use shibboleth for authentication.
+- `client_id`: An ID used for authentication if `login_type` is `local`. Is
+  ignored if `login_type` is `shibboleth`.
 - `target`: The ILIAS element to crawl. (Required)
     - `desktop`: Crawl your personal desktop / dashboard
     - `<course id>`: Crawl the course with the given id
@@ -191,6 +196,8 @@ If the values work, feel free to submit a PR and add them to the table above.
       and duplication warnings if you are a member of an ILIAS group. The
       `desktop` target is generally preferable.
 - `auth`: Name of auth section to use for login. (Required)
+- `tfa_auth`: Name of auth section to use for two-factor authentication. Only
+  uses the auth section's password. (Default: Anonymous `tfa` authenticator)
 - `links`: How to represent external links. (Default: `fancy`)
     - `ignore`: Don't download links.
     - `plaintext`: A text file containing only the URL.
diff --git a/LICENSE b/LICENSE
index 13fa307..ccccbe3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
                     TheChristophe, Scriptim, thelukasprobst, Toorero,
-                    Mr-Pine, p-fruck
+                    Mr-Pine, p-fruck, PinieP
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index fe8a360..2cc97e1 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -262,7 +262,12 @@ class HttpCrawler(Crawler):
                     connect=self._http_timeout,
                     sock_connect=self._http_timeout,
                     sock_read=self._http_timeout,
-                )
+                ),
+                # See https://github.com/aio-libs/aiohttp/issues/6626
+                # Without this aiohttp will mangle the redirect header from Shibboleth, invalidating the
+                # passed signature. Shibboleth will not accept the broken signature and authentication will
+                # fail.
+                requote_redirect_url=False
         ) as session:
             self.session = session
             try:
diff --git a/PFERD/crawl/ilias/async_helper.py b/PFERD/crawl/ilias/async_helper.py
index 527a819..5e586b1 100644
--- a/PFERD/crawl/ilias/async_helper.py
+++ b/PFERD/crawl/ilias/async_helper.py
@@ -25,9 +25,10 @@ def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Calla
                 except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
                     last_exception = e
                 log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
+                log.explain(f"Last exception: {last_exception!r}")
 
             if last_exception:
-                message = f"Error in I/O Operation: {last_exception}"
+                message = f"Error in I/O Operation: {last_exception!r}"
                 if failure_is_error:
                     raise CrawlError(message) from last_exception
                 else:
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 941b265..a6c68f1 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -23,10 +23,16 @@ from .file_templates import Links, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
                              IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+from .shibboleth_login import ShibbolethLogin
 
 TargetType = Union[str, int]
 
 
+class LoginTypeLocal:
+    def __init__(self, client_id: str):
+        self.client_id = client_id
+
+
 class IliasWebCrawlerSection(HttpCrawlerSection):
     def base_url(self) -> str:
         base_url = self.s.get("base_url")
@@ -35,12 +41,30 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
         return base_url
 
-    def client_id(self) -> str:
-        client_id = self.s.get("client_id")
-        if not client_id:
-            self.missing_value("client_id")
+    def login(self) -> Union[Literal["shibboleth"], LoginTypeLocal]:
+        login_type = self.s.get("login_type")
+        if not login_type:
+            self.missing_value("login_type")
+        if login_type == "shibboleth":
+            return "shibboleth"
+        if login_type == "local":
+            client_id = self.s.get("client_id")
+            if not client_id:
+                self.missing_value("client_id")
+            return LoginTypeLocal(client_id)
 
-        return client_id
+        self.invalid_value("login_type", login_type, "Should be <shibboleth | local>")
+
+    def tfa_auth(
+        self, authenticators: Dict[str, Authenticator]
+    ) -> Optional[Authenticator]:
+        value: Optional[str] = self.s.get("tfa_auth")
+        if value is None:
+            return None
+        auth = authenticators.get(value)
+        if auth is None:
+            self.invalid_value("tfa_auth", value, "No such auth section exists")
+        return auth
 
     def target(self) -> TargetType:
         target = self.s.get("target")
@@ -156,7 +180,13 @@ instance's greatest bottleneck.
 
         self._auth = auth
         self._base_url = section.base_url()
-        self._client_id = section.client_id()
+        self._tfa_auth = section.tfa_auth(authenticators)
+
+        self._login_type = section.login()
+        if isinstance(self._login_type, LoginTypeLocal):
+            self._client_id = self._login_type.client_id
+        else:
+            self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth)
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_redirect_delay()
@@ -179,7 +209,7 @@ instance's greatest bottleneck.
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
         root_url = url_set_query_param(
-            urljoin(self._base_url, "/goto.php"),
+            urljoin(self._base_url + "/", "goto.php"),
             "target", f"crs_{course_id}",
         )
 
@@ -460,11 +490,12 @@ instance's greatest bottleneck.
                     return ""
                 return None
 
+        auth_id = await self._current_auth_id()
         target = await impl()
         if target is not None:
             return target
 
-        await self._authenticate()
+        await self.authenticate(auth_id)
 
         target = await impl()
         if target is not None:
@@ -935,38 +966,39 @@ instance's greatest bottleneck.
                 return await request.read()
         raise CrawlError("get_authenticated failed even after authenticating")
 
-    # ToDo: Is iorepeat still required?
-    @_iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
         # fill the session with the correct cookies
-        params = {
-            "client_id": self._client_id,
-            "cmd": "force_login",
-        }
-        async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
-            login_page = soupify(await request.read())
+        if self._login_type == "shibboleth":
+            await self._shibboleth_login.login(self.session)
+        else:
+            params = {
+                "client_id": self._client_id,
+                "cmd": "force_login",
+            }
+            async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
+                login_page = soupify(await request.read())
 
-        login_form = login_page.find("form", attrs={"name": "formlogin"})
-        if login_form is None:
-            raise CrawlError("Could not find the login form! Specified client id might be invalid.")
+            login_form = login_page.find("form", attrs={"name": "formlogin"})
+            if login_form is None:
+                raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
-        login_url = login_form.attrs.get("action")
-        if login_url is None:
-            raise CrawlError("Could not find the action URL in the login form!")
+            login_url = login_form.attrs.get("action")
+            if login_url is None:
+                raise CrawlError("Could not find the action URL in the login form!")
 
-        username, password = await self._auth.credentials()
+            username, password = await self._auth.credentials()
 
-        login_data = {
-            "username": username,
-            "password": password,
-            "cmd[doStandardAuthentication]": "Login",
-        }
+            login_data = {
+                "username": username,
+                "password": password,
+                "cmd[doStandardAuthentication]": "Login",
+            }
 
-        # do the actual login
-        async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
-            soup = soupify(await request.read())
-            if not self._is_logged_in(soup):
-                self._auth.invalidate_credentials()
+            # do the actual login
+            async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
+                soup = soupify(await request.read())
+                if not self._is_logged_in(soup):
+                    self._auth.invalidate_credentials()
 
     @staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 558221d..fc1d58f 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,23 +1,14 @@
-from typing import Any, Dict, Optional, Union
+from typing import Dict, Literal
 
-import aiohttp
-import yarl
-from bs4 import BeautifulSoup
-
-from ...auth import Authenticator, TfaAuthenticator
+from ...auth import Authenticator
 from ...config import Config
-from ...logging import log
-from ...utils import soupify
-from ..crawler import CrawlError, CrawlWarning
-from .async_helper import _iorepeat
 from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection
-
-TargetType = Union[str, int]
+from .shibboleth_login import ShibbolethLogin
 
 _ILIAS_URL = "https://ilias.studium.kit.edu"
 
 
-class KitShibbolethBackgroundLoginSuccessful():
+class KitShibbolethBackgroundLoginSuccessful:
     pass
 
 
@@ -25,19 +16,8 @@ class KitIliasWebCrawlerSection(IliasWebCrawlerSection):
     def base_url(self) -> str:
         return _ILIAS_URL
 
-    def client_id(self) -> str:
-        # KIT ILIAS uses the Shibboleth service for authentication. There's no
-        # use for a client id.
-        return "unused"
-
-    def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
-        value: Optional[str] = self.s.get("tfa_auth")
-        if value is None:
-            return None
-        auth = authenticators.get(value)
-        if auth is None:
-            self.invalid_value("tfa_auth", value, "No such auth section exists")
-        return auth
+    def login(self) -> Literal["shibboleth"]:
+        return "shibboleth"
 
 
 class KitIliasWebCrawler(IliasWebCrawler):
@@ -46,184 +26,12 @@ class KitIliasWebCrawler(IliasWebCrawler):
         name: str,
         section: KitIliasWebCrawlerSection,
         config: Config,
-        authenticators: Dict[str, Authenticator]
+        authenticators: Dict[str, Authenticator],
     ):
         super().__init__(name, section, config, authenticators)
 
-        self._shibboleth_login = KitShibbolethLogin(
+        self._shibboleth_login = ShibbolethLogin(
+            _ILIAS_URL,
             self._auth,
             section.tfa_auth(authenticators),
         )
-
-    # We repeat this as the login method in shibboleth doesn't handle I/O errors.
-    # Shibboleth is quite reliable as well, the repeat is likely not critical here.
-    @_iorepeat(3, "Login", failure_is_error=True)
-    async def _authenticate(self) -> None:
-        await self._shibboleth_login.login(self.session)
-
-
-class KitShibbolethLogin:
-    """
-    Login via KIT's shibboleth system.
-    """
-
-    def __init__(self, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator]) -> None:
-        self._auth = authenticator
-        self._tfa_auth = tfa_authenticator
-
-    async def login(self, sess: aiohttp.ClientSession) -> None:
-        """
-        Performs the ILIAS Shibboleth authentication dance and saves the login
-        cookies it receieves.
-
-        This function should only be called whenever it is detected that you're
-        not logged in. The cookies obtained should be good for a few minutes,
-        maybe even an hour or two.
-        """
-
-        # Equivalent: Click on "Mit KIT-Account anmelden" button in
-        # https://ilias.studium.kit.edu/login.php
-        url = f"{_ILIAS_URL}/shib_login.php"
-        data = {
-            "sendLogin": "1",
-            "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
-            "il_target": "",
-            "home_organization_selection": "Weiter",
-        }
-        soup: Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful] = await _shib_post(sess, url, data)
-
-        if isinstance(soup, KitShibbolethBackgroundLoginSuccessful):
-            return
-
-        # Attempt to login using credentials, if necessary
-        while not self._login_successful(soup):
-            # Searching the form here so that this fails before asking for
-            # credentials rather than after asking.
-            form = soup.find("form", {"class": "full content", "method": "post"})
-            action = form["action"]
-
-            csrf_token = form.find("input", {"name": "csrf_token"})["value"]
-
-            # Equivalent: Enter credentials in
-            # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
-            url = "https://idp.scc.kit.edu" + action
-            username, password = await self._auth.credentials()
-            data = {
-                "_eventId_proceed": "",
-                "j_username": username,
-                "j_password": password,
-                "csrf_token": csrf_token
-            }
-            soup = await _post(sess, url, data)
-
-            if soup.find(id="attributeRelease"):
-                raise CrawlError(
-                    "ILIAS Shibboleth entitlements changed! "
-                    "Please log in once in your browser and review them"
-                )
-
-            if self._tfa_required(soup):
-                soup = await self._authenticate_tfa(sess, soup)
-
-            if not self._login_successful(soup):
-                self._auth.invalidate_credentials()
-
-        # Equivalent: Being redirected via JS automatically
-        # (or clicking "Continue" if you have JS disabled)
-        relay_state = soup.find("input", {"name": "RelayState"})
-        saml_response = soup.find("input", {"name": "SAMLResponse"})
-        url = f"{_ILIAS_URL}/Shibboleth.sso/SAML2/POST"
-        data = {  # using the info obtained in the while loop above
-            "RelayState": relay_state["value"],
-            "SAMLResponse": saml_response["value"],
-        }
-        await sess.post(url, data=data)
-
-    async def _authenticate_tfa(
-        self,
-        session: aiohttp.ClientSession,
-        soup: BeautifulSoup
-    ) -> BeautifulSoup:
-        if not self._tfa_auth:
-            self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
-
-        tfa_token = await self._tfa_auth.password()
-
-        # Searching the form here so that this fails before asking for
-        # credentials rather than after asking.
-        form = soup.find("form", {"method": "post"})
-        action = form["action"]
-        csrf_token = form.find("input", {"name": "csrf_token"})["value"]
-
-        # Equivalent: Enter token in
-        # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
-        url = "https://idp.scc.kit.edu" + action
-        data = {
-            "_eventId_proceed": "",
-            "j_tokenNumber": tfa_token,
-            "csrf_token": csrf_token
-        }
-        return await _post(session, url, data)
-
-    @staticmethod
-    def _login_successful(soup: BeautifulSoup) -> bool:
-        relay_state = soup.find("input", {"name": "RelayState"})
-        saml_response = soup.find("input", {"name": "SAMLResponse"})
-        return relay_state is not None and saml_response is not None
-
-    @staticmethod
-    def _tfa_required(soup: BeautifulSoup) -> bool:
-        return soup.find(id="j_tokenNumber") is not None
-
-
-async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
-    async with session.post(url, data=data) as response:
-        return soupify(await response.read())
-
-
-async def _shib_post(
-    session: aiohttp.ClientSession,
-    url: str,
-    data: Any
-) -> Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful]:
-    """
-    aiohttp unescapes '/' and ':' in URL query parameters which is not RFC compliant and rejected
-    by Shibboleth. Thanks a lot. So now we unroll the requests manually, parse location headers and
-    build encoded URL objects ourselves... Who thought mangling location header was a good idea??
-    """
-    log.explain_topic("Shib login POST")
-    async with session.post(url, data=data, allow_redirects=False) as response:
-        location = response.headers.get("location")
-        log.explain(f"Got location {location!r}")
-        if not location:
-            raise CrawlWarning(f"Login failed (1), no location header present at {url}")
-        correct_url = yarl.URL(location, encoded=True)
-        log.explain(f"Corrected location to {correct_url!r}")
-
-        if str(correct_url).startswith(_ILIAS_URL):
-            log.explain("ILIAS recognized our shib token and logged us in in the background, returning")
-            return KitShibbolethBackgroundLoginSuccessful()
-
-        async with session.get(correct_url, allow_redirects=False) as response:
-            location = response.headers.get("location")
-            log.explain(f"Redirected to {location!r} with status {response.status}")
-            # If shib still has a valid session, it will directly respond to the request
-            if location is None:
-                log.explain("Shib recognized us, returning its response directly")
-                return soupify(await response.read())
-
-            as_yarl = yarl.URL(response.url)
-            # Probably not needed anymore, but might catch a few weird situations with a nicer message
-            if not location or not as_yarl.host:
-                raise CrawlWarning(f"Login failed (2), no location header present at {correct_url}")
-
-            correct_url = yarl.URL.build(
-                scheme=as_yarl.scheme,
-                host=as_yarl.host,
-                path=location,
-                encoded=True
-            )
-            log.explain(f"Corrected location to {correct_url!r}")
-
-            async with session.get(correct_url, allow_redirects=False) as response:
-                return soupify(await response.read())
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
new file mode 100644
index 0000000..d57820e
--- /dev/null
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -0,0 +1,128 @@
+from typing import Any, Optional
+
+import aiohttp
+import yarl
+from bs4 import BeautifulSoup
+
+from ...auth import Authenticator, TfaAuthenticator
+from ...logging import log
+from ...utils import soupify
+from ..crawler import CrawlError
+
+
+class ShibbolethLogin:
+    """
+    Login via shibboleth system.
+    """
+
+    def __init__(
+        self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator]
+    ) -> None:
+        self._ilias_url = ilias_url
+        self._auth = authenticator
+        self._tfa_auth = tfa_authenticator
+
+    async def login(self, sess: aiohttp.ClientSession) -> None:
+        """
+        Performs the ILIAS Shibboleth authentication dance and saves the login
+        cookies it receieves.
+
+        This function should only be called whenever it is detected that you're
+        not logged in. The cookies obtained should be good for a few minutes,
+        maybe even an hour or two.
+        """
+
+        # Equivalent: Click on "Mit KIT-Account anmelden" button in
+        # https://ilias.studium.kit.edu/login.php
+        url = f"{self._ilias_url}/shib_login.php"
+        async with sess.get(url) as response:
+            shib_url = response.url
+            if str(shib_url).startswith(self._ilias_url):
+                log.explain(
+                    "ILIAS recognized our shib token and logged us in in the background, returning"
+                )
+                return
+            soup: BeautifulSoup = soupify(await response.read())
+
+        # Attempt to login using credentials, if necessary
+        while not self._login_successful(soup):
+            # Searching the form here so that this fails before asking for
+            # credentials rather than after asking.
+            form = soup.find("form", {"method": "post"})
+            action = form["action"]
+
+            # Equivalent: Enter credentials in
+            # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
+            url = str(shib_url.origin()) + action
+            username, password = await self._auth.credentials()
+            data = {
+                "_eventId_proceed": "",
+                "j_username": username,
+                "j_password": password,
+            }
+            if csrf_token_input := form.find("input", {"name": "csrf_token"}):
+                data["csrf_token"] = csrf_token_input["value"]
+            soup = await _post(sess, url, data)
+
+            if soup.find(id="attributeRelease"):
+                raise CrawlError(
+                    "ILIAS Shibboleth entitlements changed! "
+                    "Please log in once in your browser and review them"
+                )
+
+            if self._tfa_required(soup):
+                soup = await self._authenticate_tfa(sess, soup, shib_url)
+
+            if not self._login_successful(soup):
+                self._auth.invalidate_credentials()
+
+        # Equivalent: Being redirected via JS automatically
+        # (or clicking "Continue" if you have JS disabled)
+        relay_state = soup.find("input", {"name": "RelayState"})
+        saml_response = soup.find("input", {"name": "SAMLResponse"})
+        url = form = soup.find("form", {"method": "post"})["action"]
+        data = {  # using the info obtained in the while loop above
+            "RelayState": relay_state["value"],
+            "SAMLResponse": saml_response["value"],
+        }
+        await sess.post(url, data=data)
+
+    async def _authenticate_tfa(
+        self, session: aiohttp.ClientSession, soup: BeautifulSoup, shib_url: yarl.URL
+    ) -> BeautifulSoup:
+        if not self._tfa_auth:
+            self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
+
+        tfa_token = await self._tfa_auth.password()
+
+        # Searching the form here so that this fails before asking for
+        # credentials rather than after asking.
+        form = soup.find("form", {"method": "post"})
+        action = form["action"]
+
+        # Equivalent: Enter token in
+        # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
+        url = str(shib_url.origin()) + action
+        username, password = await self._auth.credentials()
+        data = {
+            "_eventId_proceed": "",
+            "j_tokenNumber": tfa_token,
+        }
+        if csrf_token_input := form.find("input", {"name": "csrf_token"}):
+            data["csrf_token"] = csrf_token_input["value"]
+        return await _post(session, url, data)
+
+    @staticmethod
+    def _login_successful(soup: BeautifulSoup) -> bool:
+        relay_state = soup.find("input", {"name": "RelayState"})
+        saml_response = soup.find("input", {"name": "SAMLResponse"})
+        return relay_state is not None and saml_response is not None
+
+    @staticmethod
+    def _tfa_required(soup: BeautifulSoup) -> bool:
+        return soup.find(id="j_tokenNumber") is not None
+
+
+async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
+    async with session.post(url, data=data) as response:
+        return soupify(await response.read())