mirror of
https://github.com/Garmelon/PFERD.git
synced 2025-07-12 14:12:30 +02:00
Add compatibility with ILIAS 8
This commit is contained in:
@ -24,6 +24,7 @@ ambiguous situations.
|
||||
|
||||
### Fixed
|
||||
- Video name deduplication
|
||||
- Compatibility with ILIAS 8
|
||||
|
||||
## 3.5.0 - 2023-09-13
|
||||
|
||||
|
@ -95,13 +95,9 @@ class IliasPage:
|
||||
|
||||
@staticmethod
|
||||
def is_root_page(soup: BeautifulSoup) -> bool:
|
||||
permalink = soup.find(id="current_perma_link")
|
||||
if permalink is None:
|
||||
if permalink := IliasPage.get_soup_permalink(soup):
|
||||
return "goto.php?target=root_" in permalink
|
||||
return False
|
||||
value = permalink.attrs.get("value")
|
||||
if value is None:
|
||||
return False
|
||||
return "goto.php?target=root_" in value
|
||||
|
||||
def get_child_elements(self) -> List[IliasPageElement]:
|
||||
"""
|
||||
@ -279,16 +275,14 @@ class IliasPage:
|
||||
return self._soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x})
|
||||
|
||||
def _is_content_page(self) -> bool:
|
||||
link = self._soup.find(id="current_perma_link")
|
||||
if not link:
|
||||
if link := self.get_permalink():
|
||||
return "target=copa_" in link
|
||||
return False
|
||||
return "target=copa_" in link.get("value")
|
||||
|
||||
def _is_learning_module_page(self) -> bool:
|
||||
link = self._soup.find(id="current_perma_link")
|
||||
if not link:
|
||||
if link := self.get_permalink():
|
||||
return "target=pg_" in link
|
||||
return False
|
||||
return "target=pg_" in link.get("value")
|
||||
|
||||
def _contains_collapsed_future_meetings(self) -> bool:
|
||||
return self._uncollapse_future_meetings_url() is not None
|
||||
@ -513,8 +507,8 @@ class IliasPage:
|
||||
modification_string = link.parent.parent.parent.select_one(
|
||||
f"td.std:nth-child({index})"
|
||||
).getText().strip()
|
||||
if re.search(r"\d+\.\d+.\d+ - \d+:\d+", modification_string):
|
||||
modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
|
||||
if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
|
||||
modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
|
||||
break
|
||||
|
||||
if modification_time is None:
|
||||
@ -613,7 +607,7 @@ class IliasPage:
|
||||
file_listings: List[Tag] = container.findAll(
|
||||
name="a",
|
||||
# download links contain the given command class
|
||||
attrs={"href": lambda x: x and "cmdClass=ilexsubmissionfilegui" in x}
|
||||
attrs={"href": lambda x: x and "cmdclass=ilexsubmissionfilegui" in x.lower()}
|
||||
)
|
||||
|
||||
# Add each listing as a new
|
||||
@ -1095,6 +1089,9 @@ class IliasPage:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_permalink(self) -> Optional[str]:
|
||||
return IliasPage.get_soup_permalink(self._soup)
|
||||
|
||||
def _abs_url_from_link(self, link_tag: Tag) -> str:
|
||||
"""
|
||||
Create an absolute url from an <a> tag.
|
||||
@ -1107,6 +1104,13 @@ class IliasPage:
|
||||
"""
|
||||
return urljoin(self._page_url, relative_url)
|
||||
|
||||
@staticmethod
|
||||
def get_soup_permalink(soup: BeautifulSoup) -> Optional[str]:
|
||||
perma_link_element: Tag = soup.select_one(".il-footer-permanent-url > a")
|
||||
if not perma_link_element or not perma_link_element.get("href"):
|
||||
return None
|
||||
return perma_link_element.get("href")
|
||||
|
||||
|
||||
def _unexpected_html_warning() -> None:
|
||||
log.warn("Encountered unexpected HTML structure, ignoring element.")
|
||||
|
@ -12,17 +12,17 @@ import yarl
|
||||
from aiohttp import hdrs
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
from .file_templates import Links, learning_module_template
|
||||
from .ilias_html_cleaner import clean, insert_base_markup
|
||||
from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
|
||||
IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
|
||||
from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
|
||||
from ..http_crawler import HttpCrawler, HttpCrawlerSection
|
||||
from ...auth import Authenticator, TfaAuthenticator
|
||||
from ...config import Config
|
||||
from ...logging import ProgressBar, log
|
||||
from ...output_dir import FileSink, Redownload
|
||||
from ...utils import fmt_path, soupify, url_set_query_param
|
||||
from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
|
||||
from ..http_crawler import HttpCrawler, HttpCrawlerSection
|
||||
from .file_templates import Links, learning_module_template
|
||||
from .ilias_html_cleaner import clean, insert_base_markup
|
||||
from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
|
||||
IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
|
||||
|
||||
TargetType = Union[str, int]
|
||||
|
||||
@ -130,6 +130,7 @@ def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Calla
|
||||
raise CrawlError("Impossible return in ilias _iorepeat")
|
||||
|
||||
return wrapper # type: ignore
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
@ -253,8 +254,8 @@ instance's greatest bottleneck.
|
||||
soup = await self._get_page(next_stage_url, root_page_allowed=True)
|
||||
|
||||
if current_parent is None and expected_id is not None:
|
||||
perma_link_element: Tag = soup.find(id="current_perma_link")
|
||||
if not perma_link_element or "crs_" not in perma_link_element.get("value"):
|
||||
perma_link = IliasPage.get_soup_permalink(soup)
|
||||
if not perma_link or "crs_" not in perma_link:
|
||||
raise CrawlError("Invalid course id? Didn't find anything looking like a course")
|
||||
|
||||
log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
|
||||
@ -677,7 +678,7 @@ instance's greatest bottleneck.
|
||||
async with self.session.get(url, allow_redirects=is_video) as resp:
|
||||
if not is_video:
|
||||
# Redirect means we weren't authenticated
|
||||
if hdrs.LOCATION in resp.headers:
|
||||
if hdrs.LOCATION in resp.headers and "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
|
||||
return False
|
||||
# we wanted a video but got HTML
|
||||
if is_video and "html" in resp.content_type:
|
||||
|
Reference in New Issue
Block a user