Handle multi-stream videos

This commit is contained in:
I-Al-Istannen 2021-10-30 18:12:29 +02:00
parent ef7d5ea2d3
commit f9a3f9b9f2
2 changed files with 92 additions and 12 deletions

View File

@ -133,10 +133,22 @@ class IliasPage:
# parse it # parse it
json_object = json.loads(json_str) json_object = json.loads(json_str)
# and fetch the video url! streams = [stream for stream in json_object["streams"] if stream["type"] == "video"]
video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
# and just fetch the lone video url!
if len(streams) == 1:
video_url = streams[0]["sources"]["mp4"][0]["src"]
return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)] return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)]
log.explain(f"Found multiple videos for stream at {self._source_name}")
items = []
for stream in sorted(streams, key=lambda stream: stream["content"]):
full_name = f"{self._source_name.replace('.mp4', '')} ({stream['content']}).mp4"
video_url = stream["sources"]["mp4"][0]["src"]
items.append(IliasPageElement(IliasElementType.VIDEO, video_url, full_name))
return items
def _find_video_entries(self) -> List[IliasPageElement]: def _find_video_entries(self) -> List[IliasPageElement]:
# ILIAS has three stages for video pages # ILIAS has three stages for video pages
# 1. The initial dummy page without any videos. This page contains the link to the listing # 1. The initial dummy page without any videos. This page contains the link to the listing

View File

@ -1,7 +1,7 @@
import asyncio import asyncio
import re import re
from pathlib import PurePath from pathlib import PurePath
from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, TypeVar, Union from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, TypeVar, Union, cast
import aiohttp import aiohttp
from aiohttp import hdrs from aiohttp import hdrs
@ -439,22 +439,90 @@ instance's greatest bottleneck.
element: IliasPageElement, element: IliasPageElement,
element_path: PurePath, element_path: PurePath,
) -> Optional[Awaitable[None]]: ) -> Optional[Awaitable[None]]:
# Videos will NOT be redownloaded - their content doesn't really change and they are chunky # Copy old mapping as it is likely still relevant
maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.NEVER) if self.prev_report:
if not maybe_dl: self.report.add_custom_value(
str(element_path),
self.prev_report.get_custom_value(str(element_path))
)
# A video might contain other videos, so let's "crawl" the video first
# to ensure rate limits apply. This must be a download as *this token*
# is re-used if the video consists of a single stream. In that case the
# file name is used and *not* the stream name the ilias html parser reported
# to ensure backwards compatibility.
maybe_dl = await self.download(element_path, redownload=Redownload.ALWAYS)
# If we do not want to crawl it (user filter) or we have every file
# from the cached mapping already, we can ignore this and bail
if not maybe_dl or self._all_videos_locally_present(element_path):
# Mark all existing cideos as known so they do not get deleted
# during dleanup. We "downloaded" them, just without actually making
# a network request as we assumed they did not change.
for video in self._previous_contained_videos(element_path):
await self.download(video)
return None return None
return self._download_video(element, maybe_dl) return self._download_video(element_path, element, maybe_dl)
def _previous_contained_videos(self, video_path: PurePath) -> List[PurePath]:
if not self.prev_report:
return []
custom_value = self.prev_report.get_custom_value(str(video_path))
if not custom_value:
return []
names = cast(List[str], custom_value)
folder = video_path.parent
return [PurePath(folder, name) for name in names]
def _all_videos_locally_present(self, video_path: PurePath) -> bool:
if contained_videos := self._previous_contained_videos(video_path):
log.explain_topic(f"Checking local cache for video {video_path.name}")
all_found_locally = True
for video in contained_videos:
all_found_locally = all_found_locally and self._output_dir.resolve(video).exists()
if all_found_locally:
log.explain("Found all videos locally, skipping enumeration request")
return True
log.explain("Missing at least one video, continuing with requests!")
return False
@_iorepeat(3, "downloading video") @_iorepeat(3, "downloading video")
async def _download_video(self, element: IliasPageElement, dl: DownloadToken) -> None: async def _download_video(
self,
original_path: PurePath,
element: IliasPageElement,
dl: DownloadToken
) -> None:
stream_elements: List[IliasPageElement] = []
async with dl as (bar, sink): async with dl as (bar, sink):
page = IliasPage(await self._get_page(element.url), element.url, element) page = IliasPage(await self._get_page(element.url), element.url, element)
real_element = page.get_child_elements()[0] stream_elements = page.get_child_elements()
log.explain(f"Streaming video from real url {real_element.url}") if len(stream_elements) > 1:
log.explain(f"Found multiple video streams for {element.name}")
else:
log.explain(f"Using single video mode for {element.name}")
stream_element = stream_elements[0]
await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
self.report.add_custom_value(str(original_path), [original_path.name])
return
await self._stream_from_url(real_element.url, sink, bar, is_video=True) contained_video_paths: List[str] = []
for stream_element in stream_elements:
contained_video_paths.append(stream_element.name)
video_path = original_path.parent / stream_element.name
maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
if not maybe_dl:
continue
async with maybe_dl as (bar, sink):
log.explain(f"Streaming video from real url {stream_element.url}")
await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
self.report.add_custom_value(str(original_path), contained_video_paths)
async def _handle_file( async def _handle_file(
self, self,