mirror of
				https://github.com/Garmelon/PFERD.git
				synced 2025-11-03 22:23:41 +01:00 
			
		
		
		
	Fix crawling of expanded meetings
The last meeting on every page is expanded by default. Its content is then shown inline *and* in the meeting page itself. We should skip the inline content.
This commit is contained in:
		@@ -428,6 +428,12 @@ class IliasPage:
 | 
			
		||||
            element_type = self._find_type_from_link(element_name, link, abs_url)
 | 
			
		||||
            description = self._find_link_description(link)
 | 
			
		||||
 | 
			
		||||
            # The last meeting on every page is expanded by default.
 | 
			
		||||
            # Its content is then shown inline *and* in the meeting page itself.
 | 
			
		||||
            # We should skip the inline content.
 | 
			
		||||
            if element_type != IliasElementType.MEETING and self._is_in_expanded_meeting(link):
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            if not element_type:
 | 
			
		||||
                continue
 | 
			
		||||
            if element_type == IliasElementType.MEETING:
 | 
			
		||||
@@ -445,6 +451,26 @@ class IliasPage:
 | 
			
		||||
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    def _is_in_expanded_meeting(self, tag: Tag) -> bool:
 | 
			
		||||
        """
 | 
			
		||||
        Returns whether a file is part of an expanded meeting.
 | 
			
		||||
        Has false positives for meetings themselves as their title is also "in the expanded meeting content".
 | 
			
		||||
        It is in the same general div and this whole thing is guesswork.
 | 
			
		||||
        Therefore, you should check for meetings before passing them in this function.
 | 
			
		||||
        """
 | 
			
		||||
        parents: List[Tag] = list(tag.parents)
 | 
			
		||||
        for parent in parents:
 | 
			
		||||
            if not parent.get("class"):
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            # We should not crawl files under meetings
 | 
			
		||||
            if "ilContainerListItemContentCB" in parent.get("class"):
 | 
			
		||||
                link: Tag = parent.parent.find("a")
 | 
			
		||||
                type = IliasPage._find_type_from_folder_like(link, self._page_url)
 | 
			
		||||
                return type == IliasElementType.MEETING
 | 
			
		||||
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    def _find_upwards_folder_hierarchy(self, tag: Tag) -> List[str]:
 | 
			
		||||
        """
 | 
			
		||||
        Interprets accordions and expandable blocks as virtual folders and returns them
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user