mirror of
				https://github.com/Garmelon/PFERD.git
				synced 2025-11-04 06:32:52 +01:00 
			
		
		
		
	Compare commits
	
		
			2 Commits
		
	
	
		
			db86d23989
			...
			b1d42f8b70
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					b1d42f8b70 | ||
| 
						 | 
					266812f90e | 
@@ -1067,6 +1067,34 @@ class IliasPage:
 | 
				
			|||||||
        rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
 | 
					        rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
 | 
				
			||||||
        return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
 | 
					        return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @staticmethod
 | 
				
			||||||
 | 
					    def is_logged_in(soup: BeautifulSoup) -> bool:
 | 
				
			||||||
 | 
					        # Normal ILIAS pages
 | 
				
			||||||
 | 
					        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
 | 
				
			||||||
 | 
					        if mainbar is not None:
 | 
				
			||||||
 | 
					            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
 | 
				
			||||||
 | 
					            shib_login = soup.find(id="button_shib_login")
 | 
				
			||||||
 | 
					            return not login_button and not shib_login
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Personal Desktop
 | 
				
			||||||
 | 
					        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
 | 
				
			||||||
 | 
					            return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Video listing embeds do not have complete ILIAS html. Try to match them by
 | 
				
			||||||
 | 
					        # their video listing table
 | 
				
			||||||
 | 
					        video_table = soup.find(
 | 
				
			||||||
 | 
					            recursive=True,
 | 
				
			||||||
 | 
					            name="table",
 | 
				
			||||||
 | 
					            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        if video_table is not None:
 | 
				
			||||||
 | 
					            return True
 | 
				
			||||||
 | 
					        # The individual video player wrapper page has nothing of the above.
 | 
				
			||||||
 | 
					        # Match it by its playerContainer.
 | 
				
			||||||
 | 
					        if soup.select_one("#playerContainer") is not None:
 | 
				
			||||||
 | 
					            return True
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _abs_url_from_link(self, link_tag: Tag) -> str:
 | 
					    def _abs_url_from_link(self, link_tag: Tag) -> str:
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        Create an absolute url from an <a> tag.
 | 
					        Create an absolute url from an <a> tag.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -894,7 +894,7 @@ instance's greatest bottleneck.
 | 
				
			|||||||
        auth_id = await self._current_auth_id()
 | 
					        auth_id = await self._current_auth_id()
 | 
				
			||||||
        async with self.session.get(url) as request:
 | 
					        async with self.session.get(url) as request:
 | 
				
			||||||
            soup = soupify(await request.read())
 | 
					            soup = soupify(await request.read())
 | 
				
			||||||
            if self._is_logged_in(soup):
 | 
					            if IliasPage.is_logged_in(soup):
 | 
				
			||||||
                return self._verify_page(soup, url, root_page_allowed)
 | 
					                return self._verify_page(soup, url, root_page_allowed)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # We weren't authenticated, so try to do that
 | 
					        # We weren't authenticated, so try to do that
 | 
				
			||||||
@@ -903,11 +903,12 @@ instance's greatest bottleneck.
 | 
				
			|||||||
        # Retry once after authenticating. If this fails, we will die.
 | 
					        # Retry once after authenticating. If this fails, we will die.
 | 
				
			||||||
        async with self.session.get(url) as request:
 | 
					        async with self.session.get(url) as request:
 | 
				
			||||||
            soup = soupify(await request.read())
 | 
					            soup = soupify(await request.read())
 | 
				
			||||||
            if self._is_logged_in(soup):
 | 
					            if IliasPage.is_logged_in(soup):
 | 
				
			||||||
                return self._verify_page(soup, url, root_page_allowed)
 | 
					                return self._verify_page(soup, url, root_page_allowed)
 | 
				
			||||||
        raise CrawlError(f"get_page failed even after authenticating on {url!r}")
 | 
					        raise CrawlError(f"get_page failed even after authenticating on {url!r}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
 | 
					    @staticmethod
 | 
				
			||||||
 | 
					    def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
 | 
				
			||||||
        if IliasPage.is_root_page(soup) and not root_page_allowed:
 | 
					        if IliasPage.is_root_page(soup) and not root_page_allowed:
 | 
				
			||||||
            raise CrawlError(
 | 
					            raise CrawlError(
 | 
				
			||||||
                "Unexpectedly encountered ILIAS root page. "
 | 
					                "Unexpectedly encountered ILIAS root page. "
 | 
				
			||||||
@@ -965,34 +966,6 @@ instance's greatest bottleneck.
 | 
				
			|||||||
    async def _authenticate(self) -> None:
 | 
					    async def _authenticate(self) -> None:
 | 
				
			||||||
        await self._shibboleth_login.login(self.session)
 | 
					        await self._shibboleth_login.login(self.session)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @ staticmethod
 | 
					 | 
				
			||||||
    def _is_logged_in(soup: BeautifulSoup) -> bool:
 | 
					 | 
				
			||||||
        # Normal ILIAS pages
 | 
					 | 
				
			||||||
        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
 | 
					 | 
				
			||||||
        if mainbar is not None:
 | 
					 | 
				
			||||||
            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
 | 
					 | 
				
			||||||
            shib_login = soup.find(id="button_shib_login")
 | 
					 | 
				
			||||||
            return not login_button and not shib_login
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Personal Desktop
 | 
					 | 
				
			||||||
        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
 | 
					 | 
				
			||||||
            return True
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Video listing embeds do not have complete ILIAS html. Try to match them by
 | 
					 | 
				
			||||||
        # their video listing table
 | 
					 | 
				
			||||||
        video_table = soup.find(
 | 
					 | 
				
			||||||
            recursive=True,
 | 
					 | 
				
			||||||
            name="table",
 | 
					 | 
				
			||||||
            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        if video_table is not None:
 | 
					 | 
				
			||||||
            return True
 | 
					 | 
				
			||||||
        # The individual video player wrapper page has nothing of the above.
 | 
					 | 
				
			||||||
        # Match it by its playerContainer.
 | 
					 | 
				
			||||||
        if soup.select_one("#playerContainer") is not None:
 | 
					 | 
				
			||||||
            return True
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
class KitShibbolethLogin:
 | 
					class KitShibbolethLogin:
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user