Sanitize slashes in exercise container names

This commit is contained in:
I-Al-Istannen
2024-10-21 16:28:49 +02:00
parent 4f9e2ab48d
commit f9bb2e41cf
3 changed files with 29 additions and 11 deletions

View File

@ -30,6 +30,7 @@ ambiguous situations.
### Fixed ### Fixed
- Normalization of meeting names in cards - Normalization of meeting names in cards
- Sanitization of slashes in exercise container names
## 3.5.2 - 2024-04-14 ## 3.5.2 - 2024-04-14

View File

@ -328,6 +328,9 @@ instance's greatest bottleneck.
) )
self._visited_urls[element.url] = parent_path self._visited_urls[element.url] = parent_path
# element.name might contain `/` if the crawler created nested elements,
# so we can not sanitize it here. We trust in the output dir to thwart worst-case
# directory escape attacks.
element_path = PurePath(parent_path, element.name) element_path = PurePath(parent_path, element.name)
if element.type in _VIDEO_ELEMENTS: if element.type in _VIDEO_ELEMENTS:

View File

@ -71,12 +71,17 @@ class IliasPageElement:
url: str, url: str,
name: str, name: str,
mtime: Optional[datetime] = None, mtime: Optional[datetime] = None,
description: Optional[str] = None description: Optional[str] = None,
skip_sanitize: bool = False
) -> 'IliasPageElement': ) -> 'IliasPageElement':
if typ == IliasElementType.MEETING: if typ == IliasElementType.MEETING:
normalized = _sanitize_path_name(IliasPageElement._normalize_meeting_name(name)) normalized = IliasPageElement._normalize_meeting_name(name)
log.explain(f"Normalized meeting name from {name!r} to {normalized!r}") log.explain(f"Normalized meeting name from {name!r} to {normalized!r}")
name = normalized name = normalized
if not skip_sanitize:
name = _sanitize_path_name(name)
return IliasPageElement(typ, url, name, mtime, description) return IliasPageElement(typ, url, name, mtime, description)
@staticmethod @staticmethod
@ -648,15 +653,15 @@ class IliasPage:
# Two divs, side by side. Left is the name, right is the link ==> get left # Two divs, side by side. Left is the name, right is the link ==> get left
# sibling # sibling
file_name = file_link.parent.findPrevious(name="div").getText().strip() file_name = file_link.parent.findPrevious(name="div").getText().strip()
file_name = _sanitize_path_name(file_name)
url = self._abs_url_from_link(file_link) url = self._abs_url_from_link(file_link)
log.explain(f"Found exercise entry {file_name!r}") log.explain(f"Found exercise entry {file_name!r}")
results.append(IliasPageElement.create_new( results.append(IliasPageElement.create_new(
IliasElementType.FILE, IliasElementType.FILE,
url, url,
container_name + "/" + file_name, _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
None # We do not have any timestamp mtime=None, # We do not have any timestamp
skip_sanitize=True
)) ))
# Find all links to file listings (e.g. "Submitted Files" for groups) # Find all links to file listings (e.g. "Submitted Files" for groups)
@ -674,14 +679,15 @@ class IliasPage:
label_container: Tag = parent_container.find( label_container: Tag = parent_container.find(
attrs={"class": lambda x: x and "control-label" in x} attrs={"class": lambda x: x and "control-label" in x}
) )
file_name = _sanitize_path_name(label_container.getText().strip()) file_name = label_container.getText().strip()
url = self._abs_url_from_link(listing) url = self._abs_url_from_link(listing)
log.explain(f"Found exercise detail {file_name!r} at {url}") log.explain(f"Found exercise detail {file_name!r} at {url}")
results.append(IliasPageElement.create_new( results.append(IliasPageElement.create_new(
IliasElementType.EXERCISE_FILES, IliasElementType.EXERCISE_FILES,
url, url,
container_name + "/" + file_name, _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
None # we do not have any timestamp None, # we do not have any timestamp
skip_sanitize=True
)) ))
return results return results
@ -699,7 +705,8 @@ class IliasPage:
for link in links: for link in links:
abs_url = self._abs_url_from_link(link) abs_url = self._abs_url_from_link(link)
parents = self._find_upwards_folder_hierarchy(link) # Make sure parents are sanitized. We do not want accidental parents
parents = [_sanitize_path_name(x) for x in self._find_upwards_folder_hierarchy(link)]
if parents: if parents:
element_name = "/".join(parents) + "/" + _sanitize_path_name(link.getText()) element_name = "/".join(parents) + "/" + _sanitize_path_name(link.getText())
@ -723,7 +730,12 @@ class IliasPage:
log.explain(f"Found {element_name!r}") log.explain(f"Found {element_name!r}")
result.append(IliasPageElement.create_new( result.append(IliasPageElement.create_new(
element_type, abs_url, element_name, description=description)) element_type,
abs_url,
element_name,
description=description,
skip_sanitize=True
))
result += self._find_cards() result += self._find_cards()
result += self._find_mediacast_videos() result += self._find_mediacast_videos()
@ -869,7 +881,9 @@ class IliasPage:
full_path = name + "." + file_type full_path = name + "." + file_type
log.explain(f"Found file {full_path!r}") log.explain(f"Found file {full_path!r}")
return IliasPageElement.create_new(IliasElementType.FILE, url, full_path, modification_date) return IliasPageElement.create_new(
IliasElementType.FILE, url, full_path, modification_date, skip_sanitize=True
)
def _find_cards(self) -> List[IliasPageElement]: def _find_cards(self) -> List[IliasPageElement]:
result: List[IliasPageElement] = [] result: List[IliasPageElement] = []