mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Fix crawling of expanded meetings
The last meeting on every page is expanded by default. Its content is then shown inline *and* in the meeting page itself. We should skip the inline content.
This commit is contained in:
parent
694ffb4d77
commit
bcc537468c
@ -428,6 +428,12 @@ class IliasPage:
|
|||||||
element_type = self._find_type_from_link(element_name, link, abs_url)
|
element_type = self._find_type_from_link(element_name, link, abs_url)
|
||||||
description = self._find_link_description(link)
|
description = self._find_link_description(link)
|
||||||
|
|
||||||
|
# The last meeting on every page is expanded by default.
|
||||||
|
# Its content is then shown inline *and* in the meeting page itself.
|
||||||
|
# We should skip the inline content.
|
||||||
|
if element_type != IliasElementType.MEETING and self._is_in_expanded_meeting(link):
|
||||||
|
continue
|
||||||
|
|
||||||
if not element_type:
|
if not element_type:
|
||||||
continue
|
continue
|
||||||
if element_type == IliasElementType.MEETING:
|
if element_type == IliasElementType.MEETING:
|
||||||
@ -445,6 +451,26 @@ class IliasPage:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def _is_in_expanded_meeting(self, tag: Tag) -> bool:
|
||||||
|
"""
|
||||||
|
Returns whether a file is part of an expanded meeting.
|
||||||
|
Has false positives for meetings themselves as their title is also "in the expanded meeting content".
|
||||||
|
It is in the same general div and this whole thing is guesswork.
|
||||||
|
Therefore, you should check for meetings before passing them in this function.
|
||||||
|
"""
|
||||||
|
parents: List[Tag] = list(tag.parents)
|
||||||
|
for parent in parents:
|
||||||
|
if not parent.get("class"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# We should not crawl files under meetings
|
||||||
|
if "ilContainerListItemContentCB" in parent.get("class"):
|
||||||
|
link: Tag = parent.parent.find("a")
|
||||||
|
type = IliasPage._find_type_from_folder_like(link, self._page_url)
|
||||||
|
return type == IliasElementType.MEETING
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
def _find_upwards_folder_hierarchy(self, tag: Tag) -> List[str]:
|
def _find_upwards_folder_hierarchy(self, tag: Tag) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Interprets accordions and expandable blocks as virtual folders and returns them
|
Interprets accordions and expandable blocks as virtual folders and returns them
|
||||||
|
Loading…
Reference in New Issue
Block a user