Clean up minor things

- improve logging messages
- allow more download file formats
- strip file names
This commit is contained in:
Joscha 2018-11-26 17:00:17 +00:00
parent 34da5d4d19
commit 068fe77dcf
4 changed files with 8 additions and 10 deletions

View File

@ -28,7 +28,7 @@ class FfM:
self._session = aiohttp.ClientSession() self._session = aiohttp.ClientSession()
async def synchronize(self, urlpart, to_dir, transform=lambda x: x): async def synchronize(self, urlpart, to_dir, transform=lambda x: x):
logging.info(f"Synchronizing {urlpart} to {to_dir} using the FfM synchronizer.") logging.info(f" Synchronizing {urlpart} to {to_dir} using the FfM synchronizer.")
sync_path = pathlib.Path(self.base_path, to_dir) sync_path = pathlib.Path(self.base_path, to_dir)
orga = Organizer(self.base_path, sync_path) orga = Organizer(self.base_path, sync_path)

View File

@ -26,7 +26,7 @@ class ILIAS:
self._auth = ShibbolethAuthenticator(base_path / cookie_file) self._auth = ShibbolethAuthenticator(base_path / cookie_file)
async def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True): async def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
logging.info(f"Synchronizing {ref_id} to {to_dir} using the ILIAS synchronizer.") logging.info(f" Synchronizing ref_id {ref_id} to {to_dir} using the ILIAS synchronizer.")
sync_path = pathlib.Path(self.base_path, to_dir) sync_path = pathlib.Path(self.base_path, to_dir)
orga = Organizer(self.base_path, sync_path) orga = Organizer(self.base_path, sync_path)
@ -67,7 +67,7 @@ class ILIAS:
return found_files return found_files
async def _download(self, orga, files, transform): async def _download(self, orga, files, transform):
for (path, file_id) in files: for (path, file_id) in sorted(files):
to_path = transform(path) to_path = transform(path)
if to_path is not None: if to_path is not None:
temp_path = orga.temp_file() temp_path = orga.temp_file()
@ -79,11 +79,9 @@ class ILIAS:
found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.FILE_RE}) found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.FILE_RE})
for element in found: for element in found:
file_stem = element.string file_stem = element.string.strip()
file_id = re.search(self.FILE_RE, element.get("href")).group(1)
# find out file type
file_type = element.parent.parent.parent.find("div", {"class": "il_ItemProperties"}).find("span").string.strip() file_type = element.parent.parent.parent.find("div", {"class": "il_ItemProperties"}).find("span").string.strip()
file_id = re.search(self.FILE_RE, element.get("href")).group(1)
file_name = f"{file_stem}.{file_type}" file_name = f"{file_stem}.{file_type}"
files.append((file_name, file_id)) files.append((file_name, file_id))
@ -95,7 +93,7 @@ class ILIAS:
found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.DIR_RE}) found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.DIR_RE})
for element in found: for element in found:
dir_name = element.string dir_name = element.string.strip()
ref_id = re.search(self.DIR_RE, element.get("href")).group(1) ref_id = re.search(self.DIR_RE, element.get("href")).group(1)
dir_id = f"fold_{ref_id}" dir_id = f"fold_{ref_id}"
dirs.append((dir_name, dir_id)) dirs.append((dir_name, dir_id))

View File

@ -185,7 +185,7 @@ class ShibbolethAuthenticator:
for t in range(self.RETRY_ATTEMPTS): for t in range(self.RETRY_ATTEMPTS):
try: try:
async with self._session.get(url, params=params) as resp: async with self._session.get(url, params=params) as resp:
if resp.content_type == "application/pdf": if resp.content_type in ["application/pdf", "application/zip", "text/xml"]:
# Yay, we got the file (as long as it's a PDF) # Yay, we got the file (as long as it's a PDF)
await utils.stream_to_path(resp, to_path) await utils.stream_to_path(resp, to_path)
return True return True

View File

@ -55,7 +55,7 @@ class Organizer:
if to_path.exists(): if to_path.exists():
if filecmp.cmp(from_path, to_path, shallow=False): if filecmp.cmp(from_path, to_path, shallow=False):
logger.info(f"Done nothing at {to_path}") logger.info(f"Ignored {to_path}")
# No further action needed, especially not overwriting symlinks... # No further action needed, especially not overwriting symlinks...
return return
else: else: