Clean up minor things

- improve logging messages - allow more download file formats - strip file names
2025-07-12 06:02:31 +02:00 · 2018-11-26 17:00:17 +00:00
parent 34da5d4d19
commit 068fe77dcf
4 changed files with 8 additions and 10 deletions
--- a/PFERD/ilias.py
+++ b/PFERD/ilias.py
@ -26,7 +26,7 @@ class ILIAS:
 		self._auth = ShibbolethAuthenticator(base_path / cookie_file)
 	async def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
-		logging.info(f"Synchronizing {ref_id} to {to_dir} using the ILIAS synchronizer.")
+		logging.info(f"    Synchronizing ref_id {ref_id} to {to_dir} using the ILIAS synchronizer.")
 		sync_path = pathlib.Path(self.base_path, to_dir)
 		orga = Organizer(self.base_path, sync_path)
@ -67,7 +67,7 @@ class ILIAS:
 		return found_files
 	async def _download(self, orga, files, transform):
-		for (path, file_id) in files:
+		for (path, file_id) in sorted(files):
 			to_path = transform(path)
 			if to_path is not None:
 				temp_path = orga.temp_file()
@ -79,11 +79,9 @@ class ILIAS:
 		found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.FILE_RE})
 		for element in found:
-			file_stem = element.string
+			file_stem = element.string.strip()
 			file_id = re.search(self.FILE_RE, element.get("href")).group(1)
 			# find out file type
 			file_type = element.parent.parent.parent.find("div", {"class": "il_ItemProperties"}).find("span").string.strip()
 			file_id = re.search(self.FILE_RE, element.get("href")).group(1)
 			file_name = f"{file_stem}.{file_type}"
 			files.append((file_name, file_id))
@ -95,7 +93,7 @@ class ILIAS:
 		found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.DIR_RE})
 		for element in found:
-			dir_name = element.string
+			dir_name = element.string.strip()
 			ref_id = re.search(self.DIR_RE, element.get("href")).group(1)
 			dir_id = f"fold_{ref_id}"
 			dirs.append((dir_name, dir_id))
--- a/PFERD/ilias_authenticators.py
+++ b/PFERD/ilias_authenticators.py
@ -185,7 +185,7 @@ class ShibbolethAuthenticator:
 		for t in range(self.RETRY_ATTEMPTS):
 			try:
 				async with self._session.get(url, params=params) as resp:
-					if resp.content_type == "application/pdf":
+					if resp.content_type in ["application/pdf", "application/zip", "text/xml"]:
 						# Yay, we got the file (as long as it's a PDF)
 						await utils.stream_to_path(resp, to_path)
 						return True
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@ -55,7 +55,7 @@ class Organizer:
 		if to_path.exists():
 			if filecmp.cmp(from_path, to_path, shallow=False):
-				logger.info(f"Done nothing at {to_path}")
+				logger.info(f"Ignored {to_path}")
 				# No further action needed, especially not overwriting symlinks...
 				return
 			else: