mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Clean up minor things
- improve logging messages - allow more download file formats - strip file names
This commit is contained in:
parent
34da5d4d19
commit
068fe77dcf
@ -28,7 +28,7 @@ class FfM:
|
||||
self._session = aiohttp.ClientSession()
|
||||
|
||||
async def synchronize(self, urlpart, to_dir, transform=lambda x: x):
|
||||
logging.info(f"Synchronizing {urlpart} to {to_dir} using the FfM synchronizer.")
|
||||
logging.info(f" Synchronizing {urlpart} to {to_dir} using the FfM synchronizer.")
|
||||
|
||||
sync_path = pathlib.Path(self.base_path, to_dir)
|
||||
orga = Organizer(self.base_path, sync_path)
|
||||
|
@ -26,7 +26,7 @@ class ILIAS:
|
||||
self._auth = ShibbolethAuthenticator(base_path / cookie_file)
|
||||
|
||||
async def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
|
||||
logging.info(f"Synchronizing {ref_id} to {to_dir} using the ILIAS synchronizer.")
|
||||
logging.info(f" Synchronizing ref_id {ref_id} to {to_dir} using the ILIAS synchronizer.")
|
||||
|
||||
sync_path = pathlib.Path(self.base_path, to_dir)
|
||||
orga = Organizer(self.base_path, sync_path)
|
||||
@ -67,7 +67,7 @@ class ILIAS:
|
||||
return found_files
|
||||
|
||||
async def _download(self, orga, files, transform):
|
||||
for (path, file_id) in files:
|
||||
for (path, file_id) in sorted(files):
|
||||
to_path = transform(path)
|
||||
if to_path is not None:
|
||||
temp_path = orga.temp_file()
|
||||
@ -79,11 +79,9 @@ class ILIAS:
|
||||
|
||||
found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.FILE_RE})
|
||||
for element in found:
|
||||
file_stem = element.string
|
||||
file_id = re.search(self.FILE_RE, element.get("href")).group(1)
|
||||
|
||||
# find out file type
|
||||
file_stem = element.string.strip()
|
||||
file_type = element.parent.parent.parent.find("div", {"class": "il_ItemProperties"}).find("span").string.strip()
|
||||
file_id = re.search(self.FILE_RE, element.get("href")).group(1)
|
||||
|
||||
file_name = f"{file_stem}.{file_type}"
|
||||
files.append((file_name, file_id))
|
||||
@ -95,7 +93,7 @@ class ILIAS:
|
||||
|
||||
found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.DIR_RE})
|
||||
for element in found:
|
||||
dir_name = element.string
|
||||
dir_name = element.string.strip()
|
||||
ref_id = re.search(self.DIR_RE, element.get("href")).group(1)
|
||||
dir_id = f"fold_{ref_id}"
|
||||
dirs.append((dir_name, dir_id))
|
||||
|
@ -185,7 +185,7 @@ class ShibbolethAuthenticator:
|
||||
for t in range(self.RETRY_ATTEMPTS):
|
||||
try:
|
||||
async with self._session.get(url, params=params) as resp:
|
||||
if resp.content_type == "application/pdf":
|
||||
if resp.content_type in ["application/pdf", "application/zip", "text/xml"]:
|
||||
# Yay, we got the file (as long as it's a PDF)
|
||||
await utils.stream_to_path(resp, to_path)
|
||||
return True
|
||||
|
@ -55,7 +55,7 @@ class Organizer:
|
||||
|
||||
if to_path.exists():
|
||||
if filecmp.cmp(from_path, to_path, shallow=False):
|
||||
logger.info(f"Done nothing at {to_path}")
|
||||
logger.info(f"Ignored {to_path}")
|
||||
# No further action needed, especially not overwriting symlinks...
|
||||
return
|
||||
else:
|
||||
|
Loading…
Reference in New Issue
Block a user