mirror of
				https://github.com/Garmelon/PFERD.git
				synced 2025-11-03 22:23:41 +01:00 
			
		
		
		
	Handle exercise detail containers in ILIAS html parser
This commit is contained in:
		@@ -16,6 +16,7 @@ TargetType = Union[str, int]
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class IliasElementType(Enum):
 | 
					class IliasElementType(Enum):
 | 
				
			||||||
    EXERCISE = "exercise"
 | 
					    EXERCISE = "exercise"
 | 
				
			||||||
 | 
					    EXERCISE_FILES = "exercise_files"  # own submitted files
 | 
				
			||||||
    FILE = "file"
 | 
					    FILE = "file"
 | 
				
			||||||
    FOLDER = "folder"
 | 
					    FOLDER = "folder"
 | 
				
			||||||
    FORUM = "forum"
 | 
					    FORUM = "forum"
 | 
				
			||||||
@@ -197,6 +198,43 @@ class IliasPage:
 | 
				
			|||||||
        return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
 | 
					        return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _find_exercise_entries(self) -> List[IliasPageElement]:
 | 
					    def _find_exercise_entries(self) -> List[IliasPageElement]:
 | 
				
			||||||
 | 
					        if self._soup.find(id="tab_submission"):
 | 
				
			||||||
 | 
					            log.explain("Found submission tab. This is an exercise detail page")
 | 
				
			||||||
 | 
					            return self._find_exercise_entries_detail_page()
 | 
				
			||||||
 | 
					        log.explain("Found no submission tab. This is an exercise root page")
 | 
				
			||||||
 | 
					        return self._find_exercise_entries_root_page()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _find_exercise_entries_detail_page(self) -> List[IliasPageElement]:
 | 
				
			||||||
 | 
					        results: List[IliasPageElement] = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Find all download links in the container (this will contain all the files)
 | 
				
			||||||
 | 
					        download_links: List[Tag] = self._soup.findAll(
 | 
				
			||||||
 | 
					            name="a",
 | 
				
			||||||
 | 
					            # download links contain the given command class
 | 
				
			||||||
 | 
					            attrs={"href": lambda x: x and "cmd=download" in x},
 | 
				
			||||||
 | 
					            text="Download"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for link in download_links:
 | 
				
			||||||
 | 
					            parent_row: Tag = link.findParent("tr")
 | 
				
			||||||
 | 
					            children: List[Tag] = parent_row.findChildren("td")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # <checkbox> <name> <uploader> <date> <download>
 | 
				
			||||||
 | 
					            #     0         1        2       3        4
 | 
				
			||||||
 | 
					            name = _sanitize_path_name(children[1].getText().strip())
 | 
				
			||||||
 | 
					            date = demangle_date(children[3].getText().strip())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            log.explain(f"Found exercise detail entry {name!r}")
 | 
				
			||||||
 | 
					            results.append(IliasPageElement(
 | 
				
			||||||
 | 
					                IliasElementType.FILE,
 | 
				
			||||||
 | 
					                self._abs_url_from_link(link),
 | 
				
			||||||
 | 
					                name,
 | 
				
			||||||
 | 
					                date
 | 
				
			||||||
 | 
					            ))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _find_exercise_entries_root_page(self) -> List[IliasPageElement]:
 | 
				
			||||||
        results: List[IliasPageElement] = []
 | 
					        results: List[IliasPageElement] = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Each assignment is in an accordion container
 | 
					        # Each assignment is in an accordion container
 | 
				
			||||||
@@ -205,6 +243,8 @@ class IliasPage:
 | 
				
			|||||||
        for container in assignment_containers:
 | 
					        for container in assignment_containers:
 | 
				
			||||||
            # Fetch the container name out of the header to use it in the path
 | 
					            # Fetch the container name out of the header to use it in the path
 | 
				
			||||||
            container_name = container.select_one(".ilAssignmentHeader").getText().strip()
 | 
					            container_name = container.select_one(".ilAssignmentHeader").getText().strip()
 | 
				
			||||||
 | 
					            log.explain(f"Found exercise container {container_name!r}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # Find all download links in the container (this will contain all the files)
 | 
					            # Find all download links in the container (this will contain all the files)
 | 
				
			||||||
            files: List[Tag] = container.findAll(
 | 
					            files: List[Tag] = container.findAll(
 | 
				
			||||||
                name="a",
 | 
					                name="a",
 | 
				
			||||||
@@ -213,8 +253,6 @@ class IliasPage:
 | 
				
			|||||||
                text="Download"
 | 
					                text="Download"
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            log.explain(f"Found exercise container {container_name!r}")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            # Grab each file as you now have the link
 | 
					            # Grab each file as you now have the link
 | 
				
			||||||
            for file_link in files:
 | 
					            for file_link in files:
 | 
				
			||||||
                # Two divs, side by side. Left is the name, right is the link ==> get left
 | 
					                # Two divs, side by side. Left is the name, right is the link ==> get left
 | 
				
			||||||
@@ -231,6 +269,25 @@ class IliasPage:
 | 
				
			|||||||
                    None  # We do not have any timestamp
 | 
					                    None  # We do not have any timestamp
 | 
				
			||||||
                ))
 | 
					                ))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # Find all links to file listings (e.g. "Submitted Files" for groups)
 | 
				
			||||||
 | 
					            file_listings: List[Tag] = container.findAll(
 | 
				
			||||||
 | 
					                name="a",
 | 
				
			||||||
 | 
					                # download links contain the given command class
 | 
				
			||||||
 | 
					                attrs={"href": lambda x: x and "cmdClass=ilexsubmissionfilegui" in x}
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # Add each listing as a new
 | 
				
			||||||
 | 
					            for listing in file_listings:
 | 
				
			||||||
 | 
					                file_name = _sanitize_path_name(listing.getText().strip())
 | 
				
			||||||
 | 
					                url = self._abs_url_from_link(listing)
 | 
				
			||||||
 | 
					                log.explain(f"Found exercise detail {file_name!r} at {url}")
 | 
				
			||||||
 | 
					                results.append(IliasPageElement(
 | 
				
			||||||
 | 
					                    IliasElementType.EXERCISE_FILES,
 | 
				
			||||||
 | 
					                    url,
 | 
				
			||||||
 | 
					                    container_name + "/" + file_name,
 | 
				
			||||||
 | 
					                    None  # we do not have any timestamp
 | 
				
			||||||
 | 
					                ))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return results
 | 
					        return results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _find_normal_entries(self) -> List[IliasPageElement]:
 | 
					    def _find_normal_entries(self) -> List[IliasPageElement]:
 | 
				
			||||||
@@ -349,7 +406,7 @@ class IliasPage:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        if found_parent is None:
 | 
					        if found_parent is None:
 | 
				
			||||||
            _unexpected_html_warning()
 | 
					            _unexpected_html_warning()
 | 
				
			||||||
            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {url!r}")
 | 
					            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {url}")
 | 
				
			||||||
            return None
 | 
					            return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Find the small descriptive icon to figure out the type
 | 
					        # Find the small descriptive icon to figure out the type
 | 
				
			||||||
@@ -357,7 +414,7 @@ class IliasPage:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        if img_tag is None:
 | 
					        if img_tag is None:
 | 
				
			||||||
            _unexpected_html_warning()
 | 
					            _unexpected_html_warning()
 | 
				
			||||||
            log.warn_contd(f"Tried to figure out element type, but did not find an image for {url!r}")
 | 
					            log.warn_contd(f"Tried to figure out element type, but did not find an image for {url}")
 | 
				
			||||||
            return None
 | 
					            return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if "opencast" in str(img_tag["alt"]).lower():
 | 
					        if "opencast" in str(img_tag["alt"]).lower():
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -61,6 +61,7 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
_DIRECTORY_PAGES: Set[IliasElementType] = set([
 | 
					_DIRECTORY_PAGES: Set[IliasElementType] = set([
 | 
				
			||||||
    IliasElementType.EXERCISE,
 | 
					    IliasElementType.EXERCISE,
 | 
				
			||||||
 | 
					    IliasElementType.EXERCISE_FILES,
 | 
				
			||||||
    IliasElementType.FOLDER,
 | 
					    IliasElementType.FOLDER,
 | 
				
			||||||
    IliasElementType.MEETING,
 | 
					    IliasElementType.MEETING,
 | 
				
			||||||
    IliasElementType.VIDEO_FOLDER,
 | 
					    IliasElementType.VIDEO_FOLDER,
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user