mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Clean up minor things
- improve logging messages - allow more download file formats - strip file names
This commit is contained in:
parent
34da5d4d19
commit
068fe77dcf
@ -26,7 +26,7 @@ class ILIAS:
|
|||||||
self._auth = ShibbolethAuthenticator(base_path / cookie_file)
|
self._auth = ShibbolethAuthenticator(base_path / cookie_file)
|
||||||
|
|
||||||
async def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
|
async def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
|
||||||
logging.info(f"Synchronizing {ref_id} to {to_dir} using the ILIAS synchronizer.")
|
logging.info(f" Synchronizing ref_id {ref_id} to {to_dir} using the ILIAS synchronizer.")
|
||||||
|
|
||||||
sync_path = pathlib.Path(self.base_path, to_dir)
|
sync_path = pathlib.Path(self.base_path, to_dir)
|
||||||
orga = Organizer(self.base_path, sync_path)
|
orga = Organizer(self.base_path, sync_path)
|
||||||
@ -67,7 +67,7 @@ class ILIAS:
|
|||||||
return found_files
|
return found_files
|
||||||
|
|
||||||
async def _download(self, orga, files, transform):
|
async def _download(self, orga, files, transform):
|
||||||
for (path, file_id) in files:
|
for (path, file_id) in sorted(files):
|
||||||
to_path = transform(path)
|
to_path = transform(path)
|
||||||
if to_path is not None:
|
if to_path is not None:
|
||||||
temp_path = orga.temp_file()
|
temp_path = orga.temp_file()
|
||||||
@ -79,11 +79,9 @@ class ILIAS:
|
|||||||
|
|
||||||
found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.FILE_RE})
|
found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.FILE_RE})
|
||||||
for element in found:
|
for element in found:
|
||||||
file_stem = element.string
|
file_stem = element.string.strip()
|
||||||
file_id = re.search(self.FILE_RE, element.get("href")).group(1)
|
|
||||||
|
|
||||||
# find out file type
|
|
||||||
file_type = element.parent.parent.parent.find("div", {"class": "il_ItemProperties"}).find("span").string.strip()
|
file_type = element.parent.parent.parent.find("div", {"class": "il_ItemProperties"}).find("span").string.strip()
|
||||||
|
file_id = re.search(self.FILE_RE, element.get("href")).group(1)
|
||||||
|
|
||||||
file_name = f"{file_stem}.{file_type}"
|
file_name = f"{file_stem}.{file_type}"
|
||||||
files.append((file_name, file_id))
|
files.append((file_name, file_id))
|
||||||
@ -95,7 +93,7 @@ class ILIAS:
|
|||||||
|
|
||||||
found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.DIR_RE})
|
found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.DIR_RE})
|
||||||
for element in found:
|
for element in found:
|
||||||
dir_name = element.string
|
dir_name = element.string.strip()
|
||||||
ref_id = re.search(self.DIR_RE, element.get("href")).group(1)
|
ref_id = re.search(self.DIR_RE, element.get("href")).group(1)
|
||||||
dir_id = f"fold_{ref_id}"
|
dir_id = f"fold_{ref_id}"
|
||||||
dirs.append((dir_name, dir_id))
|
dirs.append((dir_name, dir_id))
|
||||||
|
@ -185,7 +185,7 @@ class ShibbolethAuthenticator:
|
|||||||
for t in range(self.RETRY_ATTEMPTS):
|
for t in range(self.RETRY_ATTEMPTS):
|
||||||
try:
|
try:
|
||||||
async with self._session.get(url, params=params) as resp:
|
async with self._session.get(url, params=params) as resp:
|
||||||
if resp.content_type == "application/pdf":
|
if resp.content_type in ["application/pdf", "application/zip", "text/xml"]:
|
||||||
# Yay, we got the file (as long as it's a PDF)
|
# Yay, we got the file (as long as it's a PDF)
|
||||||
await utils.stream_to_path(resp, to_path)
|
await utils.stream_to_path(resp, to_path)
|
||||||
return True
|
return True
|
||||||
|
@ -55,7 +55,7 @@ class Organizer:
|
|||||||
|
|
||||||
if to_path.exists():
|
if to_path.exists():
|
||||||
if filecmp.cmp(from_path, to_path, shallow=False):
|
if filecmp.cmp(from_path, to_path, shallow=False):
|
||||||
logger.info(f"Done nothing at {to_path}")
|
logger.info(f"Ignored {to_path}")
|
||||||
# No further action needed, especially not overwriting symlinks...
|
# No further action needed, especially not overwriting symlinks...
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
|
Loading…
Reference in New Issue
Block a user