Fix login after shib url parser change

This commit is contained in:
I-Al-Istannen 2022-01-14 20:15:19 +01:00
parent 0045124a4e
commit 57ec51e95a
2 changed files with 39 additions and 4 deletions

View File

@ -22,6 +22,10 @@ ambiguous situations.
## Unreleased
### Fixed
- Shibboleth login fixed. It was broken due to URL parser changes and really
*unfortunate* behaviour by aiohttp.
## 3.3.0 - 2022-01-09
### Added

View File

@ -4,6 +4,7 @@ from pathlib import PurePath
from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, TypeVar, Union, cast
import aiohttp
import yarl
from aiohttp import hdrs
from bs4 import BeautifulSoup, Tag
@ -674,14 +675,14 @@ class KitShibbolethLogin:
# Equivalent: Click on "Mit KIT-Account anmelden" button in
# https://ilias.studium.kit.edu/login.php
url = "https://ilias.studium.kit.edu/Shibboleth.sso/Login"
url = "https://ilias.studium.kit.edu/shib_login.php"
data = {
"sendLogin": "1",
"idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
"target": "/shib_login.php",
"home_organization_selection": "Mit KIT-Account anmelden",
"il_target": "",
"home_organization_selection": "Weiter",
}
soup: BeautifulSoup = await _post(sess, url, data)
soup: BeautifulSoup = await _shib_post(sess, url, data)
# Attempt to login using credentials, if necessary
while not self._login_successful(soup):
@ -761,3 +762,33 @@ class KitShibbolethLogin:
async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
async with session.post(url, data=data) as response:
return soupify(await response.read())
async def _shib_post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
"""
aiohttp unescapes '/' and ':' in URL query parameters which is not RFC compliant and rejected
by Shibboleth. Thanks a lot. So now we unroll the requests manually, parse location headers and
build encoded URL objects ourselfs... Who thought mangling location header was a good idea??
"""
async with session.post(url, data=data, allow_redirects=False) as response:
location = response.headers.get("location")
if not location:
raise CrawlWarning(f"Login failed, no location header present at {url}")
correct_url = yarl.URL(location, encoded=True)
async with session.get(correct_url, allow_redirects=False) as response:
as_yarl = yarl.URL(response.url)
location = response.headers.get("location")
if not location or not as_yarl.host:
raise CrawlWarning(f"Login failed, no location header present at {correct_url}")
correct_url = yarl.URL.build(
scheme=as_yarl.scheme,
host=as_yarl.host,
path=location,
encoded=True
)
async with session.get(correct_url, allow_redirects=False) as response:
return soupify(await response.read())