Switch from tabs to spaces

2025-12-20 05:22:31 +01:00 · 2019-04-24 12:34:20 +00:00
parent c3e64da570
commit 5a1bf2188b
7 changed files with 504 additions and 504 deletions
--- a/PFERD/init.py
+++ b/PFERD/init.py
@@ -4,11 +4,11 @@ from .norbert import *
 from .utils import *
 __all__ = (
-	ffm.__all__ +
+    ffm.__all__ +
-	ilias.__all__ +
+    ilias.__all__ +
-	norbert.__all__ +
+    norbert.__all__ +
-	utils.__all__ +
+    utils.__all__ +
-	[]
+    []
 )
 LOG_FORMAT = "[%(levelname)s] %(message)s"
--- a/PFERD/ffm.py
+++ b/PFERD/ffm.py
@@ -11,69 +11,69 @@ from .organizer import Organizer
 from . import utils
 __all__ = [
-	"FfM",
+    "FfM",
 ]
 logger = logging.getLogger(__name__)
 class FfM:
-	BASE_URL = "http://www.math.kit.edu/"
+    BASE_URL = "http://www.math.kit.edu/"
-	LINK_RE = re.compile(r"^https?://www.math.kit.edu/.*/(.*\.pdf)$")
+    LINK_RE = re.compile(r"^https?://www.math.kit.edu/.*/(.*\.pdf)$")
-	RETRY_ATTEMPTS = 5
+    RETRY_ATTEMPTS = 5
-	RETRY_DELAY = 1 # seconds
+    RETRY_DELAY = 1 # seconds
-	def __init__(self, base_path):
+    def __init__(self, base_path):
-		self.base_path = base_path
+        self.base_path = base_path
-		self._session = aiohttp.ClientSession()
+        self._session = aiohttp.ClientSession()
-	async def synchronize(self, urlpart, to_dir, transform=lambda x: x):
+    async def synchronize(self, urlpart, to_dir, transform=lambda x: x):
-		logging.info(f"    Synchronizing {urlpart} to {to_dir} using the FfM synchronizer.")
+        logging.info(f"    Synchronizing {urlpart} to {to_dir} using the FfM synchronizer.")
-		sync_path = pathlib.Path(self.base_path, to_dir)
+        sync_path = pathlib.Path(self.base_path, to_dir)
-		orga = Organizer(self.base_path, sync_path)
+        orga = Organizer(self.base_path, sync_path)
-		orga.clean_temp_dir()
+        orga.clean_temp_dir()
-		await self._crawl(orga, urlpart, transform)
+        await self._crawl(orga, urlpart, transform)
-		orga.clean_sync_dir()
+        orga.clean_sync_dir()
-		orga.clean_temp_dir()
+        orga.clean_temp_dir()
-	async def close(self):
+    async def close(self):
-		await self._session.close()
+        await self._session.close()
-	async def _crawl(self, orga, urlpart, transform):
+    async def _crawl(self, orga, urlpart, transform):
-		url = self.BASE_URL + urlpart
+        url = self.BASE_URL + urlpart
-		async with self._session.get(url) as resp:
+        async with self._session.get(url) as resp:
-			text = await resp.text()
+            text = await resp.text()
-		soup = bs4.BeautifulSoup(text, "html.parser")
+        soup = bs4.BeautifulSoup(text, "html.parser")
-		for found in soup.find_all("a", href=self.LINK_RE):
+        for found in soup.find_all("a", href=self.LINK_RE):
-			url = found["href"]
+            url = found["href"]
-			filename = re.match(self.LINK_RE, url).group(1).replace("/", ".")
+            filename = re.match(self.LINK_RE, url).group(1).replace("/", ".")
-			logger.debug(f"Found file {filename} at {url}")
+            logger.debug(f"Found file {filename} at {url}")
-			old_path = pathlib.PurePath(filename)
+            old_path = pathlib.PurePath(filename)
-			new_path = transform(old_path)
+            new_path = transform(old_path)
-			if new_path is None:
+            if new_path is None:
-				continue
+                continue
-			logger.debug(f"Transformed from {old_path} to {new_path}")
+            logger.debug(f"Transformed from {old_path} to {new_path}")
-			temp_path = orga.temp_file()
+            temp_path = orga.temp_file()
-			await self._download(url, temp_path)
+            await self._download(url, temp_path)
-			orga.add_file(temp_path, new_path)
+            orga.add_file(temp_path, new_path)
-	async def _download(self, url, to_path):
+    async def _download(self, url, to_path):
-		for t in range(self.RETRY_ATTEMPTS):
+        for t in range(self.RETRY_ATTEMPTS):
-			try:
+            try:
-				async with self._session.get(url) as resp:
+                async with self._session.get(url) as resp:
-					await utils.stream_to_path(resp, to_path)
+                    await utils.stream_to_path(resp, to_path)
-			except aiohttp.client_exceptions.ServerDisconnectedError:
+            except aiohttp.client_exceptions.ServerDisconnectedError:
-				logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
+                logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
-				await asyncio.sleep(self.RETRY_DELAY)
+                await asyncio.sleep(self.RETRY_DELAY)
-			else:
+            else:
-				return
+                return
-		else:
+        else:
-			logger.error(f"Could not download {url}")
+            logger.error(f"Could not download {url}")
-			raise utils.OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
+            raise utils.OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
--- a/PFERD/ilias.py
+++ b/PFERD/ilias.py
@@ -12,105 +12,105 @@ from .ilias_authenticators import ShibbolethAuthenticator
 from . import utils
 __all__ = [
-	"ILIAS",
+    "ILIAS",
 ]
 logger = logging.getLogger(__name__)
 class ILIAS:
-	FILE_RE = re.compile(r"goto\.php\?target=(file_\d+_download)")
+    FILE_RE = re.compile(r"goto\.php\?target=(file_\d+_download)")
-	DIR_RE = re.compile(r"ilias\.php\?ref_id=(\d+)")
+    DIR_RE = re.compile(r"ilias\.php\?ref_id=(\d+)")
-	def __init__(self, base_path, cookie_file):
+    def __init__(self, base_path, cookie_file):
-		self.base_path = base_path
+        self.base_path = base_path
-		self._auth = ShibbolethAuthenticator(base_path / cookie_file)
+        self._auth = ShibbolethAuthenticator(base_path / cookie_file)
-	async def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
+    async def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
-		logging.info(f"    Synchronizing ref_id {ref_id} to {to_dir} using the ILIAS synchronizer.")
+        logging.info(f"    Synchronizing ref_id {ref_id} to {to_dir} using the ILIAS synchronizer.")
-		sync_path = pathlib.Path(self.base_path, to_dir)
+        sync_path = pathlib.Path(self.base_path, to_dir)
-		orga = Organizer(self.base_path, sync_path)
+        orga = Organizer(self.base_path, sync_path)
-		orga.clean_temp_dir()
+        orga.clean_temp_dir()
-		files = await self._crawl(pathlib.PurePath(), f"fold_{ref_id}", filter)
+        files = await self._crawl(pathlib.PurePath(), f"fold_{ref_id}", filter)
-		await self._download(orga, files, transform)
+        await self._download(orga, files, transform)
-		orga.clean_sync_dir()
+        orga.clean_sync_dir()
-		orga.clean_temp_dir()
+        orga.clean_temp_dir()
-	async def close(self):
+    async def close(self):
-		await self._auth.close()
+        await self._auth.close()
-	async def _crawl(self, dir_path, dir_id, filter_):
+    async def _crawl(self, dir_path, dir_id, filter_):
-		soup = await self._auth.get_webpage(dir_id)
+        soup = await self._auth.get_webpage(dir_id)
-		found_files = []
+        found_files = []
-		files = self._find_files(soup)
+        files = self._find_files(soup)
-		for (name, file_id) in files:
+        for (name, file_id) in files:
-			path = dir_path / name
+            path = dir_path / name
-			found_files.append((path, file_id))
+            found_files.append((path, file_id))
-			logger.debug(f"Found file {path}")
+            logger.debug(f"Found file {path}")
-		dirs = self._find_dirs(soup)
+        dirs = self._find_dirs(soup)
-		for (name, ref_id) in dirs:
+        for (name, ref_id) in dirs:
-			path = dir_path / name
+            path = dir_path / name
-			logger.debug(f"Found dir {path}")
+            logger.debug(f"Found dir {path}")
-			if filter_(path):
+            if filter_(path):
-				logger.info(f"Searching {path}")
+                logger.info(f"Searching {path}")
-				files = await self._crawl(path, ref_id, filter_)
+                files = await self._crawl(path, ref_id, filter_)
-				found_files.extend(files)
+                found_files.extend(files)
-			else:
+            else:
-				logger.info(f"Not searching {path}")
+                logger.info(f"Not searching {path}")
-		return found_files
+        return found_files
-	async def _download(self, orga, files, transform):
+    async def _download(self, orga, files, transform):
-		for (path, file_id) in sorted(files):
+        for (path, file_id) in sorted(files):
-			to_path = transform(path)
+            to_path = transform(path)
-			if to_path is not None:
+            if to_path is not None:
-				temp_path = orga.temp_file()
+                temp_path = orga.temp_file()
-				await self._auth.download_file(file_id, temp_path)
+                await self._auth.download_file(file_id, temp_path)
-				orga.add_file(temp_path, to_path)
+                orga.add_file(temp_path, to_path)
-	def _find_files(self, soup):
+    def _find_files(self, soup):
-		files = []
+        files = []
-		file_names = set()
+        file_names = set()
-		found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.FILE_RE})
+        found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.FILE_RE})
-		for element in found:
+        for element in found:
-			file_stem = element.string.strip().replace("/", ".")
+            file_stem = element.string.strip().replace("/", ".")
-			file_type = element.parent.parent.parent.find("div", {"class": "il_ItemProperties"}).find("span").string.strip()
+            file_type = element.parent.parent.parent.find("div", {"class": "il_ItemProperties"}).find("span").string.strip()
-			file_id = re.search(self.FILE_RE, element.get("href")).group(1)
+            file_id = re.search(self.FILE_RE, element.get("href")).group(1)
-			file_name = f"{file_stem}.{file_type}"
+            file_name = f"{file_stem}.{file_type}"
-			if file_name in file_names:
+            if file_name in file_names:
-				counter = 1
+                counter = 1
-				while True:
+                while True:
-					file_name = f"{file_stem} (duplicate {counter}).{file_type}"
+                    file_name = f"{file_stem} (duplicate {counter}).{file_type}"
-					if file_name in file_names:
+                    if file_name in file_names:
-						counter += 1
+                        counter += 1
-					else:
+                    else:
-						break
+                        break
-			files.append((file_name, file_id))
+            files.append((file_name, file_id))
-			file_names.add(file_name)
+            file_names.add(file_name)
-		return files
+        return files
-	def _find_dirs(self, soup):
+    def _find_dirs(self, soup):
-		dirs = []
+        dirs = []
-		found = soup.find_all("div", {"class": "alert", "role": "alert"})
+        found = soup.find_all("div", {"class": "alert", "role": "alert"})
-		if found:
+        if found:
-			return []
+            return []
-		found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.DIR_RE})
+        found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.DIR_RE})
-		for element in found:
+        for element in found:
-			dir_name = element.string.strip().replace("/", ".")
+            dir_name = element.string.strip().replace("/", ".")
-			ref_id = re.search(self.DIR_RE, element.get("href")).group(1)
+            ref_id = re.search(self.DIR_RE, element.get("href")).group(1)
-			dir_id = f"fold_{ref_id}"
+            dir_id = f"fold_{ref_id}"
-			dirs.append((dir_name, dir_id))
+            dirs.append((dir_name, dir_id))
-		return dirs
+        return dirs
--- a/PFERD/ilias_authenticators.py
+++ b/PFERD/ilias_authenticators.py
@@ -19,211 +19,211 @@ from .read_write_lock import ReadWriteLock
 from . import utils
 __all__ = [
-	"ShibbolethAuthenticator",
+    "ShibbolethAuthenticator",
 ]
 logger = logging.getLogger(__name__)
 class ShibbolethAuthenticator:
-	ILIAS_GOTO = "https://ilias.studium.kit.edu/goto.php"
+    ILIAS_GOTO = "https://ilias.studium.kit.edu/goto.php"
-	RETRY_ATTEMPTS = 5
+    RETRY_ATTEMPTS = 5
-	RETRY_DELAY = 1 # seconds
+    RETRY_DELAY = 1 # seconds
-	CHUNK_SIZE = 1024**2
+    CHUNK_SIZE = 1024**2
-	ALLOWED_CONTENT_TYPES = [
+    ALLOWED_CONTENT_TYPES = [
-		"application/pdf",
+        "application/pdf",
-		"application/zip",
+        "application/zip",
-		"text/xml",
+        "text/xml",
-		"text/plain",
+        "text/plain",
-		"image/jpeg",
+        "image/jpeg",
-		"image/png",
+        "image/png",
-	]
+    ]
-	def __init__(self, cookie_path=None):
+    def __init__(self, cookie_path=None):
-		self._cookie_path = cookie_path
+        self._cookie_path = cookie_path
-		# Authentication and file/page download should not happen at the same time.
+        # Authentication and file/page download should not happen at the same time.
-		# Authenticating counts as writing, file/page downloads as reading.
+        # Authenticating counts as writing, file/page downloads as reading.
-		self._lock = ReadWriteLock()
+        self._lock = ReadWriteLock()
-		# Only one self._authenticate() should be started, even if multiple self.get_page()s
+        # Only one self._authenticate() should be started, even if multiple self.get_page()s
-		# notice they're logged in.
+        # notice they're logged in.
-		# If self._event is not None, authenticating is currently in progress.
+        # If self._event is not None, authenticating is currently in progress.
-		self._event = None
+        self._event = None
-		jar = aiohttp.CookieJar()
+        jar = aiohttp.CookieJar()
-		if self._cookie_path is not None:
+        if self._cookie_path is not None:
-			try:
+            try:
-				jar.load(self._cookie_path)
+                jar.load(self._cookie_path)
-			except FileNotFoundError:
+            except FileNotFoundError:
-				pass
+                pass
-		self._session = aiohttp.ClientSession(cookie_jar=jar)
+        self._session = aiohttp.ClientSession(cookie_jar=jar)
-	async def close(self):
+    async def close(self):
-		await self._session.close()
+        await self._session.close()
-	async def _post(self, url, params=None, data=None):
+    async def _post(self, url, params=None, data=None):
-		for t in range(self.RETRY_ATTEMPTS):
+        for t in range(self.RETRY_ATTEMPTS):
-			try:
+            try:
-				async with self._session.post(url, params=params, data=data) as resp:
+                async with self._session.post(url, params=params, data=data) as resp:
-					text = await resp.text()
+                    text = await resp.text()
-					return resp.url, text
+                    return resp.url, text
-			except aiohttp.client_exceptions.ServerDisconnectedError:
+            except aiohttp.client_exceptions.ServerDisconnectedError:
-				logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
+                logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
-				await asyncio.sleep(self.RETRY_DELAY)
+                await asyncio.sleep(self.RETRY_DELAY)
-		logger.error(f"Could not POST {url} params:{params} data:{data}.")
+        logger.error(f"Could not POST {url} params:{params} data:{data}.")
-		raise utils.OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
+        raise utils.OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
-	async def _get(self, url, params=None):
+    async def _get(self, url, params=None):
-		for t in range(self.RETRY_ATTEMPTS):
+        for t in range(self.RETRY_ATTEMPTS):
-			try:
+            try:
-				async with self._session.get(url, params=params) as resp:
+                async with self._session.get(url, params=params) as resp:
-					text = await resp.text()
+                    text = await resp.text()
-					return resp.url, text
+                    return resp.url, text
-			except aiohttp.client_exceptions.ServerDisconnectedError:
+            except aiohttp.client_exceptions.ServerDisconnectedError:
-				logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
+                logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
-				await asyncio.sleep(self.RETRY_DELAY)
+                await asyncio.sleep(self.RETRY_DELAY)
-		logger.error(f"Could not GET {url} params:{params}.")
+        logger.error(f"Could not GET {url} params:{params}.")
-		raise utils.OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
+        raise utils.OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
-	def _login_successful(self, soup):
+    def _login_successful(self, soup):
-		saml_response = soup.find("input", {"name": "SAMLResponse"})
+        saml_response = soup.find("input", {"name": "SAMLResponse"})
-		relay_state = soup.find("input", {"name": "RelayState"})
+        relay_state = soup.find("input", {"name": "RelayState"})
-		return saml_response is not None and relay_state is not None
+        return saml_response is not None and relay_state is not None
-	def _save_cookies(self):
+    def _save_cookies(self):
-		logger.info(f"Saving cookies to {self._cookie_path}")
+        logger.info(f"Saving cookies to {self._cookie_path}")
-		if self._cookie_path is not None:
+        if self._cookie_path is not None:
-			self._session.cookie_jar.save(self._cookie_path)
+            self._session.cookie_jar.save(self._cookie_path)
-	# WARNING: Only use self._ensure_authenticated() to authenticate,
+    # WARNING: Only use self._ensure_authenticated() to authenticate,
-	# don't call self._authenticate() itself.
+    # don't call self._authenticate() itself.
-	async def _authenticate(self):
+    async def _authenticate(self):
-		async with self._lock.write():
+        async with self._lock.write():
-			# Equivalent: Click on "Mit KIT-Account anmelden" button in
+            # Equivalent: Click on "Mit KIT-Account anmelden" button in
-			# https://ilias.studium.kit.edu/login.php
+            # https://ilias.studium.kit.edu/login.php
-			url = "https://ilias.studium.kit.edu/Shibboleth.sso/Login"
+            url = "https://ilias.studium.kit.edu/Shibboleth.sso/Login"
-			data = {
+            data = {
-				"sendLogin": "1",
+                "sendLogin": "1",
-				"idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
+                "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
-				"target": "/shib_login.php",
+                "target": "/shib_login.php",
-				"home_organization_selection": "Mit KIT-Account anmelden",
+                "home_organization_selection": "Mit KIT-Account anmelden",
-			}
+            }
-			logger.debug("Begin authentication process with ILIAS")
+            logger.debug("Begin authentication process with ILIAS")
-			url, text = await self._post(url, data=data)
+            url, text = await self._post(url, data=data)
-			soup = bs4.BeautifulSoup(text, "html.parser")
+            soup = bs4.BeautifulSoup(text, "html.parser")
-			# Attempt to login using credentials, if necessary
+            # Attempt to login using credentials, if necessary
-			while not self._login_successful(soup):
+            while not self._login_successful(soup):
-				form = soup.find("form", {"class": "form2", "method": "post"})
+                form = soup.find("form", {"class": "form2", "method": "post"})
-				action = form["action"]
+                action = form["action"]
-				print("Please enter Shibboleth credentials.")
+                print("Please enter Shibboleth credentials.")
-				username = getpass.getpass(prompt="Username: ")
+                username = getpass.getpass(prompt="Username: ")
-				password = getpass.getpass(prompt="Password: ")
+                password = getpass.getpass(prompt="Password: ")
-				# Equivalent: Enter credentials in
+                # Equivalent: Enter credentials in
-				# https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
+                # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
-				url = "https://idp.scc.kit.edu" + action
+                url = "https://idp.scc.kit.edu" + action
-				data = {
+                data = {
-					"_eventId_proceed": "",
+                    "_eventId_proceed": "",
-					"j_username": username,
+                    "j_username": username,
-					"j_password": password,
+                    "j_password": password,
-				}
+                }
-				logger.debug("Attempt to log in to Shibboleth using credentials")
+                logger.debug("Attempt to log in to Shibboleth using credentials")
-				url, text = await self._post(url, data=data)
+                url, text = await self._post(url, data=data)
-				soup = bs4.BeautifulSoup(text, "html.parser")
+                soup = bs4.BeautifulSoup(text, "html.parser")
-				if not self._login_successful(soup):
+                if not self._login_successful(soup):
-					print("Incorrect credentials.")
+                    print("Incorrect credentials.")
-			# Saving progress: Successfully authenticated with Shibboleth
+            # Saving progress: Successfully authenticated with Shibboleth
-			self._save_cookies()
+            self._save_cookies()
-			relay_state = soup.find("input", {"name": "RelayState"})["value"]
+            relay_state = soup.find("input", {"name": "RelayState"})["value"]
-			saml_response = soup.find("input", {"name": "SAMLResponse"})["value"]
+            saml_response = soup.find("input", {"name": "SAMLResponse"})["value"]
-			# Equivalent: Being redirected via JS automatically
+            # Equivalent: Being redirected via JS automatically
-			# (or clicking "Continue" if you have JS disabled)
+            # (or clicking "Continue" if you have JS disabled)
-			url = "https://ilias.studium.kit.edu/Shibboleth.sso/SAML2/POST"
+            url = "https://ilias.studium.kit.edu/Shibboleth.sso/SAML2/POST"
-			data = {
+            data = {
-				"RelayState": relay_state,
+                "RelayState": relay_state,
-				"SAMLResponse": saml_response,
+                "SAMLResponse": saml_response,
-			}
+            }
-			logger.debug("Redirect back to ILIAS with login information")
+            logger.debug("Redirect back to ILIAS with login information")
-			url, text = await self._post(url, data=data)
+            url, text = await self._post(url, data=data)
-			# Saving progress: Successfully authenticated with Ilias
+            # Saving progress: Successfully authenticated with Ilias
-			self._save_cookies()
+            self._save_cookies()
-	async def _ensure_authenticated(self):
+    async def _ensure_authenticated(self):
-		if self._event is None:
+        if self._event is None:
-			self._event = asyncio.Event()
+            self._event = asyncio.Event()
-			logger.info("Not logged in, authentication required.")
+            logger.info("Not logged in, authentication required.")
-			await self._authenticate()
+            await self._authenticate()
-			self._event.set()
+            self._event.set()
-			self._event = None
+            self._event = None
-		else:
+        else:
-			await self._event.wait()
+            await self._event.wait()
-	def _is_logged_in(self, soup):
+    def _is_logged_in(self, soup):
-		userlog = soup.find("li", {"id": "userlog"})
+        userlog = soup.find("li", {"id": "userlog"})
-		return userlog is not None
+        return userlog is not None
-	async def get_webpage_refid(self, ref_id):
+    async def get_webpage_refid(self, ref_id):
-		return await self.get_webpage(f"fold_{ref_id}")
+        return await self.get_webpage(f"fold_{ref_id}")
-	async def get_webpage(self, object_id):
+    async def get_webpage(self, object_id):
-		params = {"target": object_id}
+        params = {"target": object_id}
-		while True:
+        while True:
-			async with self._lock.read():
+            async with self._lock.read():
-				logger.debug(f"Getting {self.ILIAS_GOTO} {params}")
+                logger.debug(f"Getting {self.ILIAS_GOTO} {params}")
-				_, text = await self._get(self.ILIAS_GOTO, params=params)
+                _, text = await self._get(self.ILIAS_GOTO, params=params)
-				soup = bs4.BeautifulSoup(text, "html.parser")
+                soup = bs4.BeautifulSoup(text, "html.parser")
-			if self._is_logged_in(soup):
+            if self._is_logged_in(soup):
-				return soup
+                return soup
-			else:
+            else:
-				await self._ensure_authenticated()
+                await self._ensure_authenticated()
-	async def _download(self, url, params, to_path):
+    async def _download(self, url, params, to_path):
-		for t in range(self.RETRY_ATTEMPTS):
+        for t in range(self.RETRY_ATTEMPTS):
-			try:
+            try:
-				async with self._session.get(url, params=params) as resp:
+                async with self._session.get(url, params=params) as resp:
-					if resp.content_type in self.ALLOWED_CONTENT_TYPES:
+                    if resp.content_type in self.ALLOWED_CONTENT_TYPES:
-						# Yay, we got the file (as long as it's a PDF)
+                        # Yay, we got the file (as long as it's a PDF)
-						await utils.stream_to_path(resp, to_path)
+                        await utils.stream_to_path(resp, to_path)
-						return True
+                        return True
-					elif resp.content_type == "text/html":
+                    elif resp.content_type == "text/html":
-						# Dangit, we're probably not logged in.
+                        # Dangit, we're probably not logged in.
-						text = await resp.text()
+                        text = await resp.text()
-						soup = bs4.BeautifulSoup(text, "html.parser")
+                        soup = bs4.BeautifulSoup(text, "html.parser")
-						if self._is_logged_in(soup):
+                        if self._is_logged_in(soup):
-							raise utils.UnknownFileTypeException(f"Attempting to download a web page (use get_webpage() instead).")
+                            raise utils.UnknownFileTypeException(f"Attempting to download a web page (use get_webpage() instead).")
-						return False
+                        return False
-					else:
+                    else:
-						# What *did* we get?
+                        # What *did* we get?
-						raise utils.UnknownFileTypeException(f"Unknown file of type {resp.content_type}.")
+                        raise utils.UnknownFileTypeException(f"Unknown file of type {resp.content_type}.")
-			except aiohttp.client_exceptions.ServerDisconnectedError:
+            except aiohttp.client_exceptions.ServerDisconnectedError:
-				logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
+                logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
-				await asyncio.sleep(self.RETRY_DELAY)
+                await asyncio.sleep(self.RETRY_DELAY)
-		logger.error(f"Could not download {url} params:{params}.")
+        logger.error(f"Could not download {url} params:{params}.")
-		raise utils.OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
+        raise utils.OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
-	async def download_file(self, file_id, to_path):
+    async def download_file(self, file_id, to_path):
-		params = {"target": file_id}
+        params = {"target": file_id}
-		while True:
+        while True:
-			async with self._lock.read():
+            async with self._lock.read():
-				success = await self._download(self.ILIAS_GOTO, params, to_path)
+                success = await self._download(self.ILIAS_GOTO, params, to_path)
-			if success:
+            if success:
-				return
+                return
-			else:
+            else:
-				await self._ensure_authenticated()
+                await self._ensure_authenticated()
--- a/PFERD/norbert.py
+++ b/PFERD/norbert.py
@@ -12,113 +12,113 @@ from .organizer import Organizer
 from . import utils
 __all__ = [
-	"Norbert",
+    "Norbert",
 ]
 logger = logging.getLogger(__name__)
 class Norbert:
-	BASE_URL = "https://studwww.informatik.kit.edu/~s_blueml/"
+    BASE_URL = "https://studwww.informatik.kit.edu/~s_blueml/"
-	LINK_RE = re.compile(r"^progtut/.*/(.*\.zip)$")
+    LINK_RE = re.compile(r"^progtut/.*/(.*\.zip)$")
-	RETRY_ATTEMPTS = 5
+    RETRY_ATTEMPTS = 5
-	RETRY_DELAY = 1 # seconds
+    RETRY_DELAY = 1 # seconds
-	def __init__(self, base_path):
+    def __init__(self, base_path):
-		self.base_path = base_path
+        self.base_path = base_path
-		self._session = aiohttp.ClientSession()
+        self._session = aiohttp.ClientSession()
-	async def synchronize(self, to_dir, transform=lambda x: x, unzip=lambda _: True):
+    async def synchronize(self, to_dir, transform=lambda x: x, unzip=lambda _: True):
-		logging.info(f"    Synchronizing to {to_dir} using the Norbert synchronizer.")
+        logging.info(f"    Synchronizing to {to_dir} using the Norbert synchronizer.")
-		sync_path = pathlib.Path(self.base_path, to_dir)
+        sync_path = pathlib.Path(self.base_path, to_dir)
-		orga = Organizer(self.base_path, sync_path)
+        orga = Organizer(self.base_path, sync_path)
-		orga.clean_temp_dir()
+        orga.clean_temp_dir()
-		files = await self._crawl()
+        files = await self._crawl()
-		await self._download(orga, files, transform, unzip)
+        await self._download(orga, files, transform, unzip)
-		orga.clean_sync_dir()
+        orga.clean_sync_dir()
-		orga.clean_temp_dir()
+        orga.clean_temp_dir()
-	async def close(self):
+    async def close(self):
-		await self._session.close()
+        await self._session.close()
-	async def _crawl(self):
+    async def _crawl(self):
-		url = self.BASE_URL
+        url = self.BASE_URL
-		async with self._session.get(url) as resp:
+        async with self._session.get(url) as resp:
-			raw = await resp.read()
+            raw = await resp.read()
-			# replace undecodeable characters with a placeholder
+            # replace undecodeable characters with a placeholder
-			text = raw.decode("utf-8", "replace")
+            text = raw.decode("utf-8", "replace")
-		soup = bs4.BeautifulSoup(text, "html.parser")
+        soup = bs4.BeautifulSoup(text, "html.parser")
-		files = []
+        files = []
-		for found in soup.find_all("a", href=self.LINK_RE):
+        for found in soup.find_all("a", href=self.LINK_RE):
-			url = found["href"]
+            url = found["href"]
-			full_url = self.BASE_URL + url
+            full_url = self.BASE_URL + url
-			filename = re.search(self.LINK_RE, url).group(1)
+            filename = re.search(self.LINK_RE, url).group(1)
-			path = pathlib.PurePath(filename)
+            path = pathlib.PurePath(filename)
-			logger.debug(f"Found zip file {filename} at {full_url}")
+            logger.debug(f"Found zip file {filename} at {full_url}")
-			files.append((path, full_url))
+            files.append((path, full_url))
-		return files
+        return files
-	async def _download(self, orga, files, transform, unzip):
+    async def _download(self, orga, files, transform, unzip):
-		for path, url in sorted(files):
+        for path, url in sorted(files):
-			# Yes, we want the zip file contents
+            # Yes, we want the zip file contents
-			if unzip(path):
+            if unzip(path):
-				logger.debug(f"Downloading and unzipping {path}")
+                logger.debug(f"Downloading and unzipping {path}")
-				zip_path = utils.rename(path, path.stem)
+                zip_path = utils.rename(path, path.stem)
-				# Download zip file
+                # Download zip file
-				temp_file = orga.temp_file()
+                temp_file = orga.temp_file()
-				await self._download_zip(url, temp_file)
+                await self._download_zip(url, temp_file)
-				# Search the zip file for files to extract
+                # Search the zip file for files to extract
-				temp_dir = orga.temp_dir()
+                temp_dir = orga.temp_dir()
-				with zipfile.ZipFile(temp_file, "r") as zf:
+                with zipfile.ZipFile(temp_file, "r") as zf:
-					for info in zf.infolist():
+                    for info in zf.infolist():
-						# Only interested in the files themselves, the directory
+                        # Only interested in the files themselves, the directory
-						# structure is created automatically by orga.add_file()
+                        # structure is created automatically by orga.add_file()
-						if info.is_dir():
+                        if info.is_dir():
-							continue
+                            continue
-						file_path = zip_path / pathlib.PurePath(info.filename)
+                        file_path = zip_path / pathlib.PurePath(info.filename)
-						logger.debug(f"Found {info.filename} at path {file_path}")
+                        logger.debug(f"Found {info.filename} at path {file_path}")
-						new_path = transform(file_path)
+                        new_path = transform(file_path)
-						if new_path is not None:
+                        if new_path is not None:
-							# Extract to temp file and add, the usual deal
+                            # Extract to temp file and add, the usual deal
-							temp_file = orga.temp_file()
+                            temp_file = orga.temp_file()
-							extracted_path = zf.extract(info, temp_dir)
+                            extracted_path = zf.extract(info, temp_dir)
-							extracted_path = pathlib.Path(extracted_path)
+                            extracted_path = pathlib.Path(extracted_path)
-							orga.add_file(extracted_path, new_path)
+                            orga.add_file(extracted_path, new_path)
-			# No, we only want the zip file itself
+            # No, we only want the zip file itself
-			else:
+            else:
-				logger.debug(f"Only downloading {path}")
+                logger.debug(f"Only downloading {path}")
-				new_path = transform(path)
+                new_path = transform(path)
-				if new_path is not None:
+                if new_path is not None:
-					temp_file = orga.temp_file()
+                    temp_file = orga.temp_file()
-					await self._download_zip(url, temp_file)
+                    await self._download_zip(url, temp_file)
-					orga.add_file(temp_file, new_path)
+                    orga.add_file(temp_file, new_path)
-	async def _download_zip(self, url, to_path):
+    async def _download_zip(self, url, to_path):
-		for t in range(self.RETRY_ATTEMPTS):
+        for t in range(self.RETRY_ATTEMPTS):
-			try:
+            try:
-				async with self._session.get(url) as resp:
+                async with self._session.get(url) as resp:
-					await utils.stream_to_path(resp, to_path)
+                    await utils.stream_to_path(resp, to_path)
-			except aiohttp.client_exceptions.ServerDisconnectedError:
+            except aiohttp.client_exceptions.ServerDisconnectedError:
-				logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
+                logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
-				await asyncio.sleep(self.RETRY_DELAY)
+                await asyncio.sleep(self.RETRY_DELAY)
-			else:
+            else:
-				return
+                return
-		else:
+        else:
-			logger.error(f"Could not download {url}")
+            logger.error(f"Could not download {url}")
-			raise utils.OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
+            raise utils.OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@@ -6,135 +6,135 @@ import shutil
 from . import utils
 __all__ = [
-	"Organizer",
+    "Organizer",
 ]
 logger = logging.getLogger(__name__)
 class Organizer:
-	def __init__(self, base_dir, sync_dir):
+    def __init__(self, base_dir, sync_dir):
-		"""
+        """
-		base_dir - the .tmp directory will be created here
+        base_dir - the .tmp directory will be created here
-		sync_dir - synced files will be moved here
+        sync_dir - synced files will be moved here
-		Both are expected to be concrete pathlib paths.
+        Both are expected to be concrete pathlib paths.
-		"""
+        """
-		self._base_dir = base_dir
+        self._base_dir = base_dir
-		self._sync_dir = sync_dir
+        self._sync_dir = sync_dir
-		self._temp_dir = pathlib.Path(self._base_dir, ".tmp")
+        self._temp_dir = pathlib.Path(self._base_dir, ".tmp")
-		self._temp_nr = 0
+        self._temp_nr = 0
-		# check if base/sync dir exist?
+        # check if base/sync dir exist?
-		self._added_files = set()
+        self._added_files = set()
-	def clean_temp_dir(self):
+    def clean_temp_dir(self):
-		if self._temp_dir.exists():
+        if self._temp_dir.exists():
-			shutil.rmtree(self._temp_dir)
+            shutil.rmtree(self._temp_dir)
-		self._temp_dir.mkdir(exist_ok=True)
+        self._temp_dir.mkdir(exist_ok=True)
-		logger.debug(f"Cleaned temp dir: {self._temp_dir}")
+        logger.debug(f"Cleaned temp dir: {self._temp_dir}")
-	def temp_dir(self):
+    def temp_dir(self):
-		nr = self._temp_nr
+        nr = self._temp_nr
-		self._temp_nr += 1
+        self._temp_nr += 1
-		temp_dir = pathlib.Path(self._temp_dir, f"{nr:08}").resolve()
+        temp_dir = pathlib.Path(self._temp_dir, f"{nr:08}").resolve()
-		logger.debug(f"Produced new temp dir: {temp_dir}")
+        logger.debug(f"Produced new temp dir: {temp_dir}")
-		return temp_dir
+        return temp_dir
-	def temp_file(self):
+    def temp_file(self):
-		# generate the path to a new temp file in base_path/.tmp/
+        # generate the path to a new temp file in base_path/.tmp/
-		# make sure no two paths are the same
+        # make sure no two paths are the same
-		nr = self._temp_nr
+        nr = self._temp_nr
-		self._temp_nr += 1
+        self._temp_nr += 1
-		temp_file =  pathlib.Path(self._temp_dir, f"{nr:08}.tmp").resolve()
+        temp_file =  pathlib.Path(self._temp_dir, f"{nr:08}.tmp").resolve()
-		logger.debug(f"Produced new temp file: {temp_file}")
+        logger.debug(f"Produced new temp file: {temp_file}")
-		return temp_file
+        return temp_file
-	def add_file(self, from_path, to_path):
+    def add_file(self, from_path, to_path):
-		if not from_path.exists():
+        if not from_path.exists():
-			raise utils.FileNotFoundException(f"Could not add file at {from_path}")
+            raise utils.FileNotFoundException(f"Could not add file at {from_path}")
-		# check if sync_dir/to_path is inside sync_dir?
+        # check if sync_dir/to_path is inside sync_dir?
-		to_path = pathlib.Path(self._sync_dir, to_path)
+        to_path = pathlib.Path(self._sync_dir, to_path)
-		if to_path.exists() and to_path.is_dir():
+        if to_path.exists() and to_path.is_dir():
-			if self._prompt_yes_no(f"Overwrite folder {to_path} with file?", default=False):
+            if self._prompt_yes_no(f"Overwrite folder {to_path} with file?", default=False):
-				shutil.rmtree(to_path)
+                shutil.rmtree(to_path)
-			else:
+            else:
-				logger.warn(f"Could not add file {to_path}")
+                logger.warn(f"Could not add file {to_path}")
-				return
+                return
-		if to_path.exists():
+        if to_path.exists():
-			if filecmp.cmp(from_path, to_path, shallow=False):
+            if filecmp.cmp(from_path, to_path, shallow=False):
-				logger.info(f"Ignored {to_path}")
+                logger.info(f"Ignored {to_path}")
-				# remember path for later reference
+                # remember path for later reference
-				self._added_files.add(to_path.resolve())
+                self._added_files.add(to_path.resolve())
-				logger.debug(f"Added file {to_path.resolve()}")
+                logger.debug(f"Added file {to_path.resolve()}")
-				# No further action needed, especially not overwriting symlinks...
+                # No further action needed, especially not overwriting symlinks...
-				return
+                return
-			else:
+            else:
-				logger.info(f"Different file at {to_path}")
+                logger.info(f"Different file at {to_path}")
-		else:
+        else:
-			logger.info(f"New file at {to_path}")
+            logger.info(f"New file at {to_path}")
-		# copy the file from from_path to sync_dir/to_path
+        # copy the file from from_path to sync_dir/to_path
-		# If the file being replaced was a symlink, the link itself is overwritten,
+        # If the file being replaced was a symlink, the link itself is overwritten,
-		# not the file the link points to.
+        # not the file the link points to.
-		to_path.parent.mkdir(parents=True, exist_ok=True)
+        to_path.parent.mkdir(parents=True, exist_ok=True)
-		from_path.replace(to_path)
+        from_path.replace(to_path)
-		logger.debug(f"Moved {from_path} to {to_path}")
+        logger.debug(f"Moved {from_path} to {to_path}")
-		# remember path for later reference, after the new file was written
+        # remember path for later reference, after the new file was written
-		# This is necessary here because otherwise, resolve() would resolve the symlink too.
+        # This is necessary here because otherwise, resolve() would resolve the symlink too.
-		self._added_files.add(to_path.resolve())
+        self._added_files.add(to_path.resolve())
-		logger.debug(f"Added file {to_path.resolve()}")
+        logger.debug(f"Added file {to_path.resolve()}")
-	def clean_sync_dir(self):
+    def clean_sync_dir(self):
-		self._clean_dir(self._sync_dir, remove_parent=False)
+        self._clean_dir(self._sync_dir, remove_parent=False)
-		logger.debug(f"Cleaned sync dir: {self._sync_dir}")
+        logger.debug(f"Cleaned sync dir: {self._sync_dir}")
-	def _clean_dir(self, path, remove_parent=True):
+    def _clean_dir(self, path, remove_parent=True):
-		for child in sorted(path.iterdir()):
+        for child in sorted(path.iterdir()):
-			logger.debug(f"Looking at {child.resolve()}")
+            logger.debug(f"Looking at {child.resolve()}")
-			if child.is_dir():
+            if child.is_dir():
-				self._clean_dir(child, remove_parent=True)
+                self._clean_dir(child, remove_parent=True)
-			elif child.resolve() not in self._added_files:
+            elif child.resolve() not in self._added_files:
-				if self._prompt_yes_no(f"Delete {child}?", default=False):
+                if self._prompt_yes_no(f"Delete {child}?", default=False):
-					child.unlink()
+                    child.unlink()
-					logger.debug(f"Deleted {child}")
+                    logger.debug(f"Deleted {child}")
-		if remove_parent:
+        if remove_parent:
-			try:
+            try:
-				path.rmdir()
+                path.rmdir()
-			except OSError: # directory not empty
+            except OSError: # directory not empty
-				pass
+                pass
-	def _prompt_yes_no(self, question, default=None):
+    def _prompt_yes_no(self, question, default=None):
-		if default is True:
+        if default is True:
-			prompt = "[Y/n]"
+            prompt = "[Y/n]"
-		elif default is False:
+        elif default is False:
-			prompt = "[y/N]"
+            prompt = "[y/N]"
-		else:
+        else:
-			prompt = "[y/n]"
+            prompt = "[y/n]"
-		text = f"{question} {prompt} "
+        text = f"{question} {prompt} "
-		WRONG_REPLY = "Please reply with 'yes'/'y' or 'no'/'n'."
+        WRONG_REPLY = "Please reply with 'yes'/'y' or 'no'/'n'."
-		while True:
+        while True:
-			response = input(text).strip().lower()
+            response = input(text).strip().lower()
-			if response in {"yes", "ye", "y"}:
+            if response in {"yes", "ye", "y"}:
-				return True
+                return True
-			elif response in {"no", "n"}:
+            elif response in {"no", "n"}:
-				return False
+                return False
-			elif response == "":
+            elif response == "":
-				if default is None:
+                if default is None:
-					print(WRONG_REPLY)
+                    print(WRONG_REPLY)
-				else:
+                else:
-					return default
+                    return default
-			else:
+            else:
-				print(WRONG_REPLY)
+                print(WRONG_REPLY)
 # How to use:
 #
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -2,39 +2,39 @@ import os
 import pathlib
 __all__ = [
-	"get_base_dir",
+    "get_base_dir",
-	"move",
+    "move",
-	"rename",
+    "rename",
-	"stream_to_path",
+    "stream_to_path",
-	"OutOfTriesException",
+    "OutOfTriesException",
-	"UnknownFileTypeException",
+    "UnknownFileTypeException",
-	"FileNotFoundException",
+    "FileNotFoundException",
 ]
 def get_base_dir(script_file):
-	return pathlib.Path(os.path.dirname(os.path.abspath(script_file)))
+    return pathlib.Path(os.path.dirname(os.path.abspath(script_file)))
 def move(path, from_folders, to_folders):
-	l = len(from_folders)
+    l = len(from_folders)
-	if path.parts[:l] == from_folders:
+    if path.parts[:l] == from_folders:
-		return pathlib.PurePath(*to_folders, *path.parts[l:])
+        return pathlib.PurePath(*to_folders, *path.parts[l:])
 def rename(path, to_name):
-	return pathlib.PurePath(*path.parts[:-1], to_name)
+    return pathlib.PurePath(*path.parts[:-1], to_name)
 async def stream_to_path(resp, to_path, chunk_size=1024**2):
-	with open(to_path, 'wb') as fd:
+    with open(to_path, 'wb') as fd:
-		while True:
+        while True:
-			chunk = await resp.content.read(chunk_size)
+            chunk = await resp.content.read(chunk_size)
-			if not chunk:
+            if not chunk:
-				break
+                break
-			fd.write(chunk)
+            fd.write(chunk)
 class OutOfTriesException(Exception):
-	pass
+    pass
 class UnknownFileTypeException(Exception):
-	pass
+    pass
 class FileNotFoundException(Exception):
-	pass
+    pass