Add support for mob videos in page descriptions

This commit is contained in:
I-Al-Istannen
2024-10-28 20:15:55 +01:00
parent 81d6ff53c4
commit fa71a9f44f
3 changed files with 49 additions and 3 deletions

View File

@ -22,6 +22,9 @@ ambiguous situations.
## Unreleased ## Unreleased
### Added
- Support for MOB videos in page descriptions
### Changed ### Changed
- Remove videos from description pages - Remove videos from description pages

View File

@ -389,6 +389,8 @@ instance's greatest bottleneck.
return await self._handle_opencast_video(element, element_path) return await self._handle_opencast_video(element, element_path)
elif element.type == IliasElementType.MEDIACAST_VIDEO: elif element.type == IliasElementType.MEDIACAST_VIDEO:
return await self._handle_file(element, element_path) return await self._handle_file(element, element_path)
elif element.type == IliasElementType.MOB_VIDEO:
return await self._handle_file(element, element_path, is_video=True)
elif element.type in _DIRECTORY_PAGES: elif element.type in _DIRECTORY_PAGES:
return await self._handle_ilias_page(element.url, element, element_path) return await self._handle_ilias_page(element.url, element, element_path)
else: else:
@ -631,18 +633,19 @@ instance's greatest bottleneck.
self, self,
element: IliasPageElement, element: IliasPageElement,
element_path: PurePath, element_path: PurePath,
is_video: bool = False,
) -> Optional[Coroutine[Any, Any, None]]: ) -> Optional[Coroutine[Any, Any, None]]:
maybe_dl = await self.download(element_path, mtime=element.mtime) maybe_dl = await self.download(element_path, mtime=element.mtime)
if not maybe_dl: if not maybe_dl:
return None return None
return self._download_file(element, maybe_dl) return self._download_file(element, maybe_dl, is_video)
@_iorepeat(3, "downloading file") @_iorepeat(3, "downloading file")
@anoncritical @anoncritical
async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None: async def _download_file(self, element: IliasPageElement, dl: DownloadToken, is_video: bool) -> None:
assert dl # The function is only reached when dl is not None assert dl # The function is only reached when dl is not None
async with dl as (bar, sink): async with dl as (bar, sink):
await self._stream_from_url(element.url, sink, bar, is_video=False) await self._stream_from_url(element.url, sink, bar, is_video)
async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None: async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
async def try_stream() -> bool: async def try_stream() -> bool:
@ -671,6 +674,13 @@ instance's greatest bottleneck.
if is_video and "html" in resp.content_type: if is_video and "html" in resp.content_type:
return False return False
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Range
if content_range := resp.headers.get(hdrs.CONTENT_RANGE, default=None):
parts = content_range.split("/")
if len(parts) == 2 and parts[1].isdigit():
bar.set_total(int(parts[1]))
# Prefer the content length header
if resp.content_length: if resp.content_length:
bar.set_total(resp.content_length) bar.set_total(resp.content_length)

View File

@ -28,6 +28,7 @@ class IliasElementType(Enum):
MEDIACAST_VIDEO = "mediacast_video" MEDIACAST_VIDEO = "mediacast_video"
MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder" MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
MEETING = "meeting" MEETING = "meeting"
MOB_VIDEO = "mob_video"
OPENCAST_VIDEO = "opencast_video" OPENCAST_VIDEO = "opencast_video"
OPENCAST_VIDEO_FOLDER = "opencast_video_folder" OPENCAST_VIDEO_FOLDER = "opencast_video_folder"
OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED = "opencast_video_folder_maybe_paginated" OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED = "opencast_video_folder_maybe_paginated"
@ -745,6 +746,7 @@ class IliasPage:
result += self._find_cards() result += self._find_cards()
result += self._find_mediacast_videos() result += self._find_mediacast_videos()
result += self._find_mob_videos()
return result return result
@ -773,6 +775,37 @@ class IliasPage:
return videos return videos
def _find_mob_videos(self) -> List[IliasPageElement]:
videos: List[IliasPageElement] = []
for figure in self._soup.select("figure.ilc_media_cont_MediaContainerHighlighted"):
title = figure.select_one("figcaption").getText().strip() + ".mp4"
video_element = figure.select_one("video")
if not video_element:
_unexpected_html_warning()
log.warn_contd(f"No <video> element found for mob video '{title}'")
continue
url = None
for source in video_element.select("source"):
if source.get("type", "") == "video/mp4":
url = source.get("src")
break
if url is None:
_unexpected_html_warning()
log.warn_contd(f"No <source> element found for mob video '{title}'")
continue
videos.append(IliasPageElement.create_new(
typ=IliasElementType.MOB_VIDEO,
url=self._abs_url_from_relative(url),
name=_sanitize_path_name(title),
mtime=None
))
return videos
def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]: def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
description_td: Tag = enclosing_td.findPreviousSibling("td") description_td: Tag = enclosing_td.findPreviousSibling("td")
if not description_td: if not description_td: