From 068fe77dcf250766b227362b89ce0a61a6f786c1 Mon Sep 17 00:00:00 2001
From: Joscha <brummer@migejolise.de>
Date: Mon, 26 Nov 2018 17:00:17 +0000
Subject: [PATCH] Clean up minor things

- improve logging messages
- allow more download file formats
- strip file names
---
 PFERD/ffm.py                  |  2 +-
 PFERD/ilias.py                | 12 +++++-------
 PFERD/ilias_authenticators.py |  2 +-
 PFERD/organizer.py            |  2 +-
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/PFERD/ffm.py b/PFERD/ffm.py
index 1122b72..b20b414 100644
--- a/PFERD/ffm.py
+++ b/PFERD/ffm.py
@@ -28,7 +28,7 @@ class FfM:
 		self._session = aiohttp.ClientSession()
 
 	async def synchronize(self, urlpart, to_dir, transform=lambda x: x):
-		logging.info(f"Synchronizing {urlpart} to {to_dir} using the FfM synchronizer.")
+		logging.info(f"    Synchronizing {urlpart} to {to_dir} using the FfM synchronizer.")
 
 		sync_path = pathlib.Path(self.base_path, to_dir)
 		orga = Organizer(self.base_path, sync_path)
diff --git a/PFERD/ilias.py b/PFERD/ilias.py
index 9885826..438e6bd 100644
--- a/PFERD/ilias.py
+++ b/PFERD/ilias.py
@@ -26,7 +26,7 @@ class ILIAS:
 		self._auth = ShibbolethAuthenticator(base_path / cookie_file)
 
 	async def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
-		logging.info(f"Synchronizing {ref_id} to {to_dir} using the ILIAS synchronizer.")
+		logging.info(f"    Synchronizing ref_id {ref_id} to {to_dir} using the ILIAS synchronizer.")
 
 		sync_path = pathlib.Path(self.base_path, to_dir)
 		orga = Organizer(self.base_path, sync_path)
@@ -67,7 +67,7 @@ class ILIAS:
 		return found_files
 
 	async def _download(self, orga, files, transform):
-		for (path, file_id) in files:
+		for (path, file_id) in sorted(files):
 			to_path = transform(path)
 			if to_path is not None:
 				temp_path = orga.temp_file()
@@ -79,11 +79,9 @@ class ILIAS:
 
 		found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.FILE_RE})
 		for element in found:
-			file_stem = element.string
-			file_id = re.search(self.FILE_RE, element.get("href")).group(1)
-
-			# find out file type
+			file_stem = element.string.strip()
 			file_type = element.parent.parent.parent.find("div", {"class": "il_ItemProperties"}).find("span").string.strip()
+			file_id = re.search(self.FILE_RE, element.get("href")).group(1)
 
 			file_name = f"{file_stem}.{file_type}"
 			files.append((file_name, file_id))
@@ -95,7 +93,7 @@ class ILIAS:
 
 		found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.DIR_RE})
 		for element in found:
-			dir_name = element.string
+			dir_name = element.string.strip()
 			ref_id = re.search(self.DIR_RE, element.get("href")).group(1)
 			dir_id = f"fold_{ref_id}"
 			dirs.append((dir_name, dir_id))
diff --git a/PFERD/ilias_authenticators.py b/PFERD/ilias_authenticators.py
index 3588640..ad10374 100644
--- a/PFERD/ilias_authenticators.py
+++ b/PFERD/ilias_authenticators.py
@@ -185,7 +185,7 @@ class ShibbolethAuthenticator:
 		for t in range(self.RETRY_ATTEMPTS):
 			try:
 				async with self._session.get(url, params=params) as resp:
-					if resp.content_type == "application/pdf":
+					if resp.content_type in ["application/pdf", "application/zip", "text/xml"]:
 						# Yay, we got the file (as long as it's a PDF)
 						await utils.stream_to_path(resp, to_path)
 						return True
diff --git a/PFERD/organizer.py b/PFERD/organizer.py
index 0a0f2b5..55ecf6f 100644
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@@ -55,7 +55,7 @@ class Organizer:
 
 		if to_path.exists():
 			if filecmp.cmp(from_path, to_path, shallow=False):
-				logger.info(f"Done nothing at {to_path}")
+				logger.info(f"Ignored {to_path}")
 				# No further action needed, especially not overwriting symlinks...
 				return
 			else: