Sanitize "/" in ilias path names

This commit is contained in:
I-Al-Istannen 2020-11-12 19:32:45 +01:00
parent a0ae9aee27
commit 55e9e719ad

View File

@ -26,6 +26,10 @@ LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER) PRETTY = PrettyLogger(LOGGER)
def _sanitize_path_name(name: str) -> str:
return name.replace("/", "-")
class IliasElementType(Enum): class IliasElementType(Enum):
""" """
The type of an ilias element. The type of an ilias element.
@ -260,7 +264,7 @@ class IliasCrawler:
links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle") links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
for link in links: for link in links:
abs_url = self._abs_url_from_link(link) abs_url = self._abs_url_from_link(link)
element_path = Path(folder_path, link.getText().strip()) element_path = Path(folder_path, _sanitize_path_name(link.getText().strip()))
element_type = self._find_type_from_link(element_path, link, abs_url) element_type = self._find_type_from_link(element_path, link, abs_url)
if element_type == IliasElementType.REGULAR_FILE: if element_type == IliasElementType.REGULAR_FILE:
@ -377,7 +381,7 @@ class IliasCrawler:
modification_date = demangle_date(modification_date_str) modification_date = demangle_date(modification_date_str)
# Grab the name from the link text # Grab the name from the link text
name = link_element.getText() name = _sanitize_path_name(link_element.getText())
full_path = Path(path, name + "." + file_type) full_path = Path(path, name + "." + file_type)
return [ return [
@ -508,7 +512,7 @@ class IliasCrawler:
).getText().strip() ).getText().strip()
title += ".mp4" title += ".mp4"
video_path: Path = Path(parent_path, title) video_path: Path = Path(parent_path, _sanitize_path_name(title))
video_url = self._abs_url_from_link(link) video_url = self._abs_url_from_link(link)
@ -580,6 +584,7 @@ class IliasCrawler:
# Two divs, side by side. Left is the name, right is the link ==> get left # Two divs, side by side. Left is the name, right is the link ==> get left
# sibling # sibling
file_name = file_link.parent.findPrevious(name="div").getText().strip() file_name = file_link.parent.findPrevious(name="div").getText().strip()
file_name = _sanitize_path_name(file_name)
url = self._abs_url_from_link(file_link) url = self._abs_url_from_link(file_link)
LOGGER.debug("Found file %r at %r", file_name, url) LOGGER.debug("Found file %r at %r", file_name, url)