Ignore SCORM learning modules

This commit is contained in:
I-Al-Istannen 2023-08-29 13:51:19 +02:00
parent df3514cd03
commit 50b50513c6
3 changed files with 17 additions and 1 deletions

View File

@ -30,6 +30,7 @@ ambiguous situations.
- Remove size suffix for files in content pages
- Sanitize ascii control characters on Windows
- Crawling of paginated past meetings
- Ignore SCORM learning modules
### Added
- `no-delete-prompt-override` conflict resolution strategy

View File

@ -27,6 +27,7 @@ class IliasElementType(Enum):
BOOKING = "booking"
MEETING = "meeting"
SURVEY = "survey"
SCORM_LEARNING_MODULE = "scorm_learning_module"
MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
MEDIACAST_VIDEO = "mediacast_video"
OPENCAST_VIDEO = "opencast_video"
@ -953,6 +954,9 @@ class IliasPage:
if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query:
return IliasElementType.MEDIACAST_VIDEO_FOLDER
if "baseClass=ilSAHSPresentationGUI" in parsed_url.query:
return IliasElementType.SCORM_LEARNING_MODULE
# Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
# try to guess it from the image.
@ -1031,6 +1035,9 @@ class IliasPage:
if str(img_tag["src"]).endswith("icon_mcst.svg"):
return IliasElementType.MEDIACAST_VIDEO_FOLDER
if str(img_tag["src"]).endswith("icon_sahs.svg"):
return IliasElementType.SCORM_LEARNING_MODULE
return IliasElementType.FOLDER
@staticmethod

View File

@ -403,6 +403,14 @@ instance's greatest bottleneck.
"[bright_black](surveys contain no relevant data)"
)
return None
elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
log.status(
"[bold bright_black]",
"Ignored",
fmt_path(element_path),
"[bright_black](scorm learning modules are not supported)"
)
return None
elif element.type == IliasElementType.LEARNING_MODULE:
return await self._handle_learning_module(element, element_path)
elif element.type == IliasElementType.LINK:
@ -897,7 +905,7 @@ instance's greatest bottleneck.
soup = soupify(await request.read())
if self._is_logged_in(soup):
return self._verify_page(soup, url, root_page_allowed)
raise CrawlError("get_page failed even after authenticating")
raise CrawlError(f"get_page failed even after authenticating on {url!r}")
def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
if IliasPage.is_root_page(soup) and not root_page_allowed: