mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Authenticate with ILIAS and get pages by refid
This commit is contained in:
parent
0803f196c8
commit
95646b0b29
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
||||
**/__pycache__/
|
||||
bin/
|
||||
include/
|
||||
lib/
|
||||
|
80
PFERD/ReadWriteLock.py
Normal file
80
PFERD/ReadWriteLock.py
Normal file
@ -0,0 +1,80 @@
|
||||
# From https://charemza.name/blog/posts/python/asyncio/read-write-lock/
|
||||
# https://gist.github.com/michalc/ab9bd571cfab09216c0316f2302a76b0#file-asyncio_read_write_lock-py
|
||||
|
||||
import asyncio
|
||||
import collections
|
||||
import contextlib
|
||||
|
||||
|
||||
class _ReadWaiter(asyncio.Future):
|
||||
pass
|
||||
|
||||
class _WriteWaiter(asyncio.Future):
|
||||
pass
|
||||
|
||||
class ReadWriteLock():
|
||||
|
||||
def __init__(self):
|
||||
self._waiters = collections.deque()
|
||||
self._reads_held = 0
|
||||
self._write_held = False
|
||||
|
||||
def _pop_queued_waiters(self, waiter_type):
|
||||
while True:
|
||||
correct_type = self._waiters and isinstance(self._waiters[0], waiter_type)
|
||||
cancelled = self._waiters and self._waiters[0].cancelled()
|
||||
|
||||
if correct_type or cancelled:
|
||||
waiter = self._waiters.popleft()
|
||||
|
||||
if correct_type and not cancelled:
|
||||
yield waiter
|
||||
|
||||
if not correct_type and not cancelled:
|
||||
break
|
||||
|
||||
def _resolve_queued_waiters(self):
|
||||
if not self._write_held:
|
||||
for waiter in self._pop_queued_waiters(_ReadWaiter):
|
||||
self._reads_held += 1
|
||||
waiter.set_result(None)
|
||||
|
||||
if not self._write_held and not self._reads_held:
|
||||
for waiter in self._pop_queued_waiters(_WriteWaiter):
|
||||
self._write_held = True
|
||||
waiter.set_result(None)
|
||||
break
|
||||
|
||||
def _on_read_release(self):
|
||||
self._reads_held -= 1
|
||||
|
||||
def _on_write_release(self):
|
||||
self._write_held = False
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def _acquire(self, waiter_type, on_release):
|
||||
waiter = waiter_type()
|
||||
self._waiters.append(waiter)
|
||||
self._resolve_queued_waiters()
|
||||
|
||||
try:
|
||||
await waiter
|
||||
except asyncio.CancelledError:
|
||||
self._resolve_queued_waiters()
|
||||
raise
|
||||
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
on_release()
|
||||
self._resolve_queued_waiters()
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def read(self):
|
||||
async with self._acquire(_ReadWaiter, self._on_read_release):
|
||||
yield
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def write(self):
|
||||
async with self._acquire(_WriteWaiter, self._on_write_release):
|
||||
yield
|
7
PFERD/__init__.py
Normal file
7
PFERD/__init__.py
Normal file
@ -0,0 +1,7 @@
|
||||
from .authenticator import *
|
||||
|
||||
__all__ = (
|
||||
authenticator.__all__
|
||||
)
|
||||
|
||||
LOG_FORMAT = "[%(levelname)s] %(message)s"
|
174
PFERD/authenticator.py
Normal file
174
PFERD/authenticator.py
Normal file
@ -0,0 +1,174 @@
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import bs4
|
||||
import getpass
|
||||
import logging
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .ReadWriteLock import ReadWriteLock
|
||||
|
||||
__all__ = [
|
||||
"OutOfTriesException",
|
||||
"ShibbolethAuthenticator",
|
||||
]
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class OutOfTriesException(Exception):
|
||||
pass
|
||||
|
||||
class ShibbolethAuthenticator:
|
||||
|
||||
RETRY_ATTEMPTS = 5
|
||||
RETRY_DELAY = 1 # seconds
|
||||
|
||||
def __init__(self, cookie_path=None):
|
||||
self._cookie_path = cookie_path
|
||||
|
||||
# Authentication and file/page download should not happen at the same time.
|
||||
# Authenticating counts as writing, file/page downloads as reading.
|
||||
self._lock = ReadWriteLock()
|
||||
|
||||
# Only one self._authenticate() should be started, even if multiple self.get_page()s
|
||||
# notice they're logged in.
|
||||
# If self._event is not None, authenticating is currently in progress.
|
||||
self._event = None
|
||||
|
||||
jar = aiohttp.CookieJar()
|
||||
if self._cookie_path is not None:
|
||||
try:
|
||||
jar.load(self._cookie_path)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
self._session = aiohttp.ClientSession(cookie_jar=jar)
|
||||
|
||||
async def close(self):
|
||||
await self._session.close()
|
||||
|
||||
async def _post(self, url, params=None, data=None):
|
||||
for t in range(self.RETRY_ATTEMPTS):
|
||||
try:
|
||||
async with self._session.post(url, params=params, data=data) as resp:
|
||||
text = await resp.text()
|
||||
return resp.url, text
|
||||
except aiohttp.client_exceptions.ServerDisconnectedError:
|
||||
logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
|
||||
await asyncio.sleep(self.RETRY_DELAY)
|
||||
|
||||
logger.error("Could not retrieve url")
|
||||
raise OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
|
||||
|
||||
async def _get(self, url, params=None):
|
||||
for t in range(self.RETRY_ATTEMPTS):
|
||||
try:
|
||||
async with self._session.get(url, params=params) as resp:
|
||||
text = await resp.text()
|
||||
return resp.url, text
|
||||
except aiohttp.client_exceptions.ServerDisconnectedError:
|
||||
logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
|
||||
await asyncio.sleep(self.RETRY_DELAY)
|
||||
|
||||
logger.error("Could not retrieve url")
|
||||
raise OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
|
||||
|
||||
def _login_successful(self, soup):
|
||||
saml_response = soup.find("input", {"name": "SAMLResponse"})
|
||||
relay_state = soup.find("input", {"name": "RelayState"})
|
||||
return saml_response is not None and relay_state is not None
|
||||
|
||||
def _save_cookies(self):
|
||||
logger.info(f"Saving cookies to {self._cookie_path!r}")
|
||||
if self._cookie_path is not None:
|
||||
self._session.cookie_jar.save(self._cookie_path)
|
||||
|
||||
# WARNING: Only use self._ensure_authenticated() to authenticate,
|
||||
# don't call self._authenticate() itself.
|
||||
async def _authenticate(self):
|
||||
async with self._lock.write():
|
||||
# Equivalent: Click on "Mit KIT-Account anmelden" button in
|
||||
# https://ilias.studium.kit.edu/login.php
|
||||
url = "https://ilias.studium.kit.edu/Shibboleth.sso/Login"
|
||||
data = {
|
||||
"sendLogin": "1",
|
||||
"idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
|
||||
"target": "/shib_login.php",
|
||||
"home_organization_selection": "Mit KIT-Account anmelden",
|
||||
}
|
||||
logger.debug("Begin authentication process with ILIAS")
|
||||
url, text = await self._post(url, data=data)
|
||||
soup = bs4.BeautifulSoup(text, "html.parser")
|
||||
|
||||
# Attempt to login using credentials, if necessary
|
||||
while not self._login_successful(soup):
|
||||
form = soup.find("form", {"class": "form2", "method": "post"})
|
||||
action = form["action"]
|
||||
|
||||
print("Please enter Shibboleth credentials.")
|
||||
username = getpass.getpass(prompt="Username: ")
|
||||
password = getpass.getpass(prompt="Password: ")
|
||||
|
||||
# Equivalent: Enter credentials in
|
||||
# https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
|
||||
url = "https://idp.scc.kit.edu" + action
|
||||
data = {
|
||||
"_eventId_proceed": "",
|
||||
"j_username": username,
|
||||
"j_password": password,
|
||||
}
|
||||
logger.debug("Attempt to log in to Shibboleth using credentials")
|
||||
url, text = await self._post(url, data=data)
|
||||
soup = bs4.BeautifulSoup(text, "html.parser")
|
||||
|
||||
if not self._login_successful(soup):
|
||||
print("Incorrect credentials.")
|
||||
|
||||
# Saving progress: Successfully authenticated with Shibboleth
|
||||
self._save_cookies()
|
||||
|
||||
relay_state = soup.find("input", {"name": "RelayState"})["value"]
|
||||
saml_response = soup.find("input", {"name": "SAMLResponse"})["value"]
|
||||
|
||||
# Equivalent: Being redirected via JS automatically
|
||||
# (or clicking "Continue" if you have JS disabled)
|
||||
url = "https://ilias.studium.kit.edu/Shibboleth.sso/SAML2/POST"
|
||||
data = {
|
||||
"RelayState": relay_state,
|
||||
"SAMLResponse": saml_response,
|
||||
}
|
||||
logger.debug("Redirect back to ILIAS with login information")
|
||||
url, text = await self._post(url, data=data)
|
||||
|
||||
# Saving progress: Successfully authenticated with Ilias
|
||||
self._save_cookies()
|
||||
|
||||
async def _ensure_authenticated(self):
|
||||
if self._event is None:
|
||||
self._event = asyncio.Event()
|
||||
logger.info("Not logged in, authentication required.")
|
||||
await self._authenticate()
|
||||
self._event.set()
|
||||
else:
|
||||
await self._event.wait()
|
||||
|
||||
def _is_logged_in(self, soup):
|
||||
userlog = soup.find("li", {"id": "userlog"})
|
||||
return userlog is not None
|
||||
|
||||
async def get_webpage(self, ref_id):
|
||||
url = "https://ilias.studium.kit.edu/goto.php"
|
||||
params = {"target": f"fold_{ref_id}"}
|
||||
|
||||
while True:
|
||||
async with self._lock.read():
|
||||
logger.info(f"Getting {url} {params}")
|
||||
_, text = await self._get(url, params=params)
|
||||
soup = bs4.BeautifulSoup(text, "html.parser")
|
||||
|
||||
if self._is_logged_in(soup):
|
||||
return soup
|
||||
else:
|
||||
await self._ensure_authenticated()
|
||||
|
||||
async def download_file(self, file_id):
|
||||
async with self._lock.read():
|
||||
pass # TODO
|
30
test.py
Normal file
30
test.py
Normal file
@ -0,0 +1,30 @@
|
||||
import PFERD
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG, format=PFERD.LOG_FORMAT)
|
||||
#logging.basicConfig(level=logging.INFO, format=PFERD.LOG_FORMAT)
|
||||
|
||||
async def test_download():
|
||||
auth = PFERD.ShibbolethAuthenticator(cookie_path="cookie_jar")
|
||||
soup = await auth.get_webpage("885157")
|
||||
await auth.close()
|
||||
if soup:
|
||||
print("Soup acquired!")
|
||||
else:
|
||||
print("No soup acquired :(")
|
||||
|
||||
def main():
|
||||
#print(f" os.getcwd(): {os.getcwd()}")
|
||||
#print(f" sys.argv[0]: {sys.argv[0]}")
|
||||
#print(f" both: {os.path.dirname(os.getcwd() + '/' + sys.argv[0])}")
|
||||
#print(f" __file__: {__file__}")
|
||||
#print(f"stackoverflow: {os.path.dirname(os.path.abspath(__file__))}")
|
||||
|
||||
#asyncio.run(test_download(), debug=True)
|
||||
asyncio.run(test_download())
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user