mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Add support for ILIAS mediacast listings
This commit is contained in:
parent
b3d412360b
commit
2184ac8040
@ -34,6 +34,7 @@ ambiguous situations.
|
|||||||
- `show_not_deleted` option to stop printing the "Not Deleted" status or report
|
- `show_not_deleted` option to stop printing the "Not Deleted" status or report
|
||||||
message. This combines nicely with the `no-delete-prompt-override` strategy,
|
message. This combines nicely with the `no-delete-prompt-override` strategy,
|
||||||
causing PFERD to mostly ignore local-only files.
|
causing PFERD to mostly ignore local-only files.
|
||||||
|
- support for mediacast video listings
|
||||||
|
|
||||||
## 3.4.3 - 2022-11-29
|
## 3.4.3 - 2022-11-29
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ import re
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import date, datetime, timedelta
|
from datetime import date, datetime, timedelta
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Dict, List, Optional, Union
|
from typing import Dict, List, Optional, Union, cast
|
||||||
from urllib.parse import urljoin, urlparse
|
from urllib.parse import urljoin, urlparse
|
||||||
|
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, Tag
|
||||||
@ -26,10 +26,12 @@ class IliasElementType(Enum):
|
|||||||
BOOKING = "booking"
|
BOOKING = "booking"
|
||||||
MEETING = "meeting"
|
MEETING = "meeting"
|
||||||
SURVEY = "survey"
|
SURVEY = "survey"
|
||||||
VIDEO = "video"
|
MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
|
||||||
VIDEO_PLAYER = "video_player"
|
MEDIACAST_VIDEO = "mediacast_video"
|
||||||
VIDEO_FOLDER = "video_folder"
|
OPENCAST_VIDEO = "opencast_video"
|
||||||
VIDEO_FOLDER_MAYBE_PAGINATED = "video_folder_maybe_paginated"
|
OPENCAST_VIDEO_PLAYER = "opencast_video_player"
|
||||||
|
OPENCAST_VIDEO_FOLDER = "opencast_video_folder"
|
||||||
|
OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED = "opencast_video_folder_maybe_paginated"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -45,7 +47,8 @@ class IliasPageElement:
|
|||||||
r"eid=(?P<id>[0-9a-z\-]+)",
|
r"eid=(?P<id>[0-9a-z\-]+)",
|
||||||
r"file_(?P<id>\d+)",
|
r"file_(?P<id>\d+)",
|
||||||
r"ref_id=(?P<id>\d+)",
|
r"ref_id=(?P<id>\d+)",
|
||||||
r"target=[a-z]+_(?P<id>\d+)"
|
r"target=[a-z]+_(?P<id>\d+)",
|
||||||
|
r"mm_(?P<id>\d+)"
|
||||||
]
|
]
|
||||||
|
|
||||||
for regex in regexes:
|
for regex in regexes:
|
||||||
@ -105,9 +108,9 @@ class IliasPage:
|
|||||||
if self._is_video_player():
|
if self._is_video_player():
|
||||||
log.explain("Page is a video player, extracting URL")
|
log.explain("Page is a video player, extracting URL")
|
||||||
return self._player_to_video()
|
return self._player_to_video()
|
||||||
if self._is_video_listing():
|
if self._is_opencast_video_listing():
|
||||||
log.explain("Page is a video listing, searching for elements")
|
log.explain("Page is an opencast video listing, searching for elements")
|
||||||
return self._find_video_entries()
|
return self._find_opencast_video_entries()
|
||||||
if self._is_exercise_file():
|
if self._is_exercise_file():
|
||||||
log.explain("Page is an exercise, searching for elements")
|
log.explain("Page is an exercise, searching for elements")
|
||||||
return self._find_exercise_entries()
|
return self._find_exercise_entries()
|
||||||
@ -199,9 +202,9 @@ class IliasPage:
|
|||||||
if self._is_ilias_opencast_embedding():
|
if self._is_ilias_opencast_embedding():
|
||||||
log.explain("Unwrapping opencast embedding")
|
log.explain("Unwrapping opencast embedding")
|
||||||
return self.get_child_elements()[0]
|
return self.get_child_elements()[0]
|
||||||
if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
|
if self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED:
|
||||||
log.explain("Unwrapping video pagination")
|
log.explain("Unwrapping video pagination")
|
||||||
return self._find_video_entries_paginated()[0]
|
return self._find_opencast_video_entries_paginated()[0]
|
||||||
if self._contains_collapsed_future_meetings():
|
if self._contains_collapsed_future_meetings():
|
||||||
log.explain("Requesting *all* future meetings")
|
log.explain("Requesting *all* future meetings")
|
||||||
return self._uncollapse_future_meetings_url()
|
return self._uncollapse_future_meetings_url()
|
||||||
@ -219,7 +222,7 @@ class IliasPage:
|
|||||||
def _is_video_player(self) -> bool:
|
def _is_video_player(self) -> bool:
|
||||||
return "paella_config_file" in str(self._soup)
|
return "paella_config_file" in str(self._soup)
|
||||||
|
|
||||||
def _is_video_listing(self) -> bool:
|
def _is_opencast_video_listing(self) -> bool:
|
||||||
if self._is_ilias_opencast_embedding():
|
if self._is_ilias_opencast_embedding():
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -319,14 +322,14 @@ class IliasPage:
|
|||||||
# and just fetch the lone video url!
|
# and just fetch the lone video url!
|
||||||
if len(streams) == 1:
|
if len(streams) == 1:
|
||||||
video_url = streams[0]["sources"]["mp4"][0]["src"]
|
video_url = streams[0]["sources"]["mp4"][0]["src"]
|
||||||
return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)]
|
return [IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, self._source_name)]
|
||||||
|
|
||||||
log.explain(f"Found multiple videos for stream at {self._source_name}")
|
log.explain(f"Found multiple videos for stream at {self._source_name}")
|
||||||
items = []
|
items = []
|
||||||
for stream in sorted(streams, key=lambda stream: stream["content"]):
|
for stream in sorted(streams, key=lambda stream: stream["content"]):
|
||||||
full_name = f"{self._source_name.replace('.mp4', '')} ({stream['content']}).mp4"
|
full_name = f"{self._source_name.replace('.mp4', '')} ({stream['content']}).mp4"
|
||||||
video_url = stream["sources"]["mp4"][0]["src"]
|
video_url = stream["sources"]["mp4"][0]["src"]
|
||||||
items.append(IliasPageElement(IliasElementType.VIDEO, video_url, full_name))
|
items.append(IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, full_name))
|
||||||
|
|
||||||
return items
|
return items
|
||||||
|
|
||||||
@ -385,7 +388,7 @@ class IliasPage:
|
|||||||
|
|
||||||
return items
|
return items
|
||||||
|
|
||||||
def _find_video_entries(self) -> List[IliasPageElement]:
|
def _find_opencast_video_entries(self) -> List[IliasPageElement]:
|
||||||
# ILIAS has three stages for video pages
|
# ILIAS has three stages for video pages
|
||||||
# 1. The initial dummy page without any videos. This page contains the link to the listing
|
# 1. The initial dummy page without any videos. This page contains the link to the listing
|
||||||
# 2. The video listing which might be paginated
|
# 2. The video listing which might be paginated
|
||||||
@ -405,27 +408,27 @@ class IliasPage:
|
|||||||
query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
|
query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
|
||||||
url = url_set_query_params(url, query_params)
|
url = url_set_query_params(url, query_params)
|
||||||
log.explain("Found ILIAS video frame page, fetching actual content next")
|
log.explain("Found ILIAS video frame page, fetching actual content next")
|
||||||
return [IliasPageElement(IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
|
return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
|
||||||
|
|
||||||
is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
|
is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
|
||||||
|
|
||||||
if is_paginated and not self._page_type == IliasElementType.VIDEO_FOLDER:
|
if is_paginated and not self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER:
|
||||||
# We are in stage 2 - try to break pagination
|
# We are in stage 2 - try to break pagination
|
||||||
return self._find_video_entries_paginated()
|
return self._find_opencast_video_entries_paginated()
|
||||||
|
|
||||||
return self._find_video_entries_no_paging()
|
return self._find_opencast_video_entries_no_paging()
|
||||||
|
|
||||||
def _find_video_entries_paginated(self) -> List[IliasPageElement]:
|
def _find_opencast_video_entries_paginated(self) -> List[IliasPageElement]:
|
||||||
table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
|
table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
|
||||||
|
|
||||||
if table_element is None:
|
if table_element is None:
|
||||||
log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
|
log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
|
||||||
return self._find_video_entries_no_paging()
|
return self._find_opencast_video_entries_no_paging()
|
||||||
|
|
||||||
id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
|
id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
|
||||||
if id_match is None:
|
if id_match is None:
|
||||||
log.warn("Couldn't increase elements per page (table id not found). I might miss elements.")
|
log.warn("Couldn't increase elements per page (table id not found). I might miss elements.")
|
||||||
return self._find_video_entries_no_paging()
|
return self._find_opencast_video_entries_no_paging()
|
||||||
|
|
||||||
table_id = id_match.group(1)
|
table_id = id_match.group(1)
|
||||||
|
|
||||||
@ -434,9 +437,9 @@ class IliasPage:
|
|||||||
url = url_set_query_params(self._page_url, query_params)
|
url = url_set_query_params(self._page_url, query_params)
|
||||||
|
|
||||||
log.explain("Disabled pagination, retrying folder as a new entry")
|
log.explain("Disabled pagination, retrying folder as a new entry")
|
||||||
return [IliasPageElement(IliasElementType.VIDEO_FOLDER, url, "")]
|
return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
|
||||||
|
|
||||||
def _find_video_entries_no_paging(self) -> List[IliasPageElement]:
|
def _find_opencast_video_entries_no_paging(self) -> List[IliasPageElement]:
|
||||||
"""
|
"""
|
||||||
Crawls the "second stage" video page. This page contains the actual video urls.
|
Crawls the "second stage" video page. This page contains the actual video urls.
|
||||||
"""
|
"""
|
||||||
@ -448,11 +451,11 @@ class IliasPage:
|
|||||||
results: List[IliasPageElement] = []
|
results: List[IliasPageElement] = []
|
||||||
|
|
||||||
for link in video_links:
|
for link in video_links:
|
||||||
results.append(self._listed_video_to_element(link))
|
results.append(self._listed_opencast_video_to_element(link))
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def _listed_video_to_element(self, link: Tag) -> IliasPageElement:
|
def _listed_opencast_video_to_element(self, link: Tag) -> IliasPageElement:
|
||||||
# The link is part of a table with multiple columns, describing metadata.
|
# The link is part of a table with multiple columns, describing metadata.
|
||||||
# 6th or 7th child (1 indexed) is the modification time string. Try to find it
|
# 6th or 7th child (1 indexed) is the modification time string. Try to find it
|
||||||
# by parsing backwards from the end and finding something that looks like a date
|
# by parsing backwards from the end and finding something that looks like a date
|
||||||
@ -479,7 +482,9 @@ class IliasPage:
|
|||||||
video_url = self._abs_url_from_link(link)
|
video_url = self._abs_url_from_link(link)
|
||||||
|
|
||||||
log.explain(f"Found video {video_name!r} at {video_url}")
|
log.explain(f"Found video {video_name!r} at {video_url}")
|
||||||
return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
|
return IliasPageElement(
|
||||||
|
IliasElementType.OPENCAST_VIDEO_PLAYER, video_url, video_name, modification_time
|
||||||
|
)
|
||||||
|
|
||||||
def _find_exercise_entries(self) -> List[IliasPageElement]:
|
def _find_exercise_entries(self) -> List[IliasPageElement]:
|
||||||
if self._soup.find(id="tab_submission"):
|
if self._soup.find(id="tab_submission"):
|
||||||
@ -622,9 +627,48 @@ class IliasPage:
|
|||||||
result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
|
result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
|
||||||
|
|
||||||
result += self._find_cards()
|
result += self._find_cards()
|
||||||
|
result += self._find_mediacast_videos()
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def _find_mediacast_videos(self) -> List[IliasPageElement]:
|
||||||
|
videos: List[IliasPageElement] = []
|
||||||
|
|
||||||
|
for elem in cast(List[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
|
||||||
|
element_name = _sanitize_path_name(
|
||||||
|
elem.select_one(".ilPlayerPreviewDescription").getText().strip()
|
||||||
|
)
|
||||||
|
if not element_name.endswith(".mp4"):
|
||||||
|
# just to make sure it has some kinda-alrightish ending
|
||||||
|
element_name = element_name + ".mp4"
|
||||||
|
video_element = elem.find(name="video")
|
||||||
|
if not video_element:
|
||||||
|
_unexpected_html_warning()
|
||||||
|
log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
|
||||||
|
continue
|
||||||
|
|
||||||
|
videos.append(IliasPageElement(
|
||||||
|
type=IliasElementType.MEDIACAST_VIDEO,
|
||||||
|
url=self._abs_url_from_relative(video_element.get("src")),
|
||||||
|
name=element_name,
|
||||||
|
mtime=self._find_mediacast_video_mtime(elem.findParent(name="td"))
|
||||||
|
))
|
||||||
|
|
||||||
|
return videos
|
||||||
|
|
||||||
|
def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
|
||||||
|
description_td: Tag = enclosing_td.findPreviousSibling("td")
|
||||||
|
if not description_td:
|
||||||
|
return None
|
||||||
|
|
||||||
|
meta_tag: Tag = description_td.find_all("p")[-1]
|
||||||
|
if not meta_tag:
|
||||||
|
return None
|
||||||
|
|
||||||
|
updated_str = meta_tag.getText().strip().replace("\n", " ")
|
||||||
|
updated_str = re.sub(".+?: ", "", updated_str)
|
||||||
|
return demangle_date(updated_str)
|
||||||
|
|
||||||
def _is_in_expanded_meeting(self, tag: Tag) -> bool:
|
def _is_in_expanded_meeting(self, tag: Tag) -> bool:
|
||||||
"""
|
"""
|
||||||
Returns whether a file is part of an expanded meeting.
|
Returns whether a file is part of an expanded meeting.
|
||||||
@ -796,7 +840,7 @@ class IliasPage:
|
|||||||
icon: Tag = card_root.select_one(".il-card-repository-head .icon")
|
icon: Tag = card_root.select_one(".il-card-repository-head .icon")
|
||||||
|
|
||||||
if "opencast" in icon["class"] or "xoct" in icon["class"]:
|
if "opencast" in icon["class"] or "xoct" in icon["class"]:
|
||||||
return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
|
return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
|
||||||
if "exc" in icon["class"]:
|
if "exc" in icon["class"]:
|
||||||
return IliasElementType.EXERCISE
|
return IliasElementType.EXERCISE
|
||||||
if "webr" in icon["class"]:
|
if "webr" in icon["class"]:
|
||||||
@ -817,6 +861,8 @@ class IliasPage:
|
|||||||
return IliasElementType.SURVEY
|
return IliasElementType.SURVEY
|
||||||
if "file" in icon["class"]:
|
if "file" in icon["class"]:
|
||||||
return IliasElementType.FILE
|
return IliasElementType.FILE
|
||||||
|
if "mcst" in icon["class"]:
|
||||||
|
return IliasElementType.MEDIACAST_VIDEO_FOLDER
|
||||||
|
|
||||||
_unexpected_html_warning()
|
_unexpected_html_warning()
|
||||||
log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
|
log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
|
||||||
@ -858,6 +904,9 @@ class IliasPage:
|
|||||||
if "baseClass=ilLMPresentationGUI" in parsed_url.query:
|
if "baseClass=ilLMPresentationGUI" in parsed_url.query:
|
||||||
return IliasElementType.LEARNING_MODULE
|
return IliasElementType.LEARNING_MODULE
|
||||||
|
|
||||||
|
if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query:
|
||||||
|
return IliasElementType.MEDIACAST_VIDEO_FOLDER
|
||||||
|
|
||||||
# Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
|
# Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
|
||||||
# try to guess it from the image.
|
# try to guess it from the image.
|
||||||
|
|
||||||
@ -909,7 +958,7 @@ class IliasPage:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
if "opencast" in str(img_tag["alt"]).lower():
|
if "opencast" in str(img_tag["alt"]).lower():
|
||||||
return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
|
return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
|
||||||
|
|
||||||
if str(img_tag["src"]).endswith("icon_exc.svg"):
|
if str(img_tag["src"]).endswith("icon_exc.svg"):
|
||||||
return IliasElementType.EXERCISE
|
return IliasElementType.EXERCISE
|
||||||
@ -929,6 +978,9 @@ class IliasPage:
|
|||||||
if str(img_tag["src"]).endswith("icon_tst.svg"):
|
if str(img_tag["src"]).endswith("icon_tst.svg"):
|
||||||
return IliasElementType.TEST
|
return IliasElementType.TEST
|
||||||
|
|
||||||
|
if str(img_tag["src"]).endswith("icon_mcst.svg"):
|
||||||
|
return IliasElementType.MEDIACAST_VIDEO_FOLDER
|
||||||
|
|
||||||
return IliasElementType.FOLDER
|
return IliasElementType.FOLDER
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -86,15 +86,18 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
|
|||||||
IliasElementType.EXERCISE_FILES,
|
IliasElementType.EXERCISE_FILES,
|
||||||
IliasElementType.FOLDER,
|
IliasElementType.FOLDER,
|
||||||
IliasElementType.MEETING,
|
IliasElementType.MEETING,
|
||||||
IliasElementType.VIDEO_FOLDER,
|
IliasElementType.MEDIACAST_VIDEO_FOLDER,
|
||||||
IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
|
IliasElementType.OPENCAST_VIDEO_FOLDER,
|
||||||
|
IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
|
||||||
])
|
])
|
||||||
|
|
||||||
_VIDEO_ELEMENTS: Set[IliasElementType] = set([
|
_VIDEO_ELEMENTS: Set[IliasElementType] = set([
|
||||||
IliasElementType.VIDEO,
|
IliasElementType.MEDIACAST_VIDEO_FOLDER,
|
||||||
IliasElementType.VIDEO_PLAYER,
|
IliasElementType.MEDIACAST_VIDEO,
|
||||||
IliasElementType.VIDEO_FOLDER,
|
IliasElementType.OPENCAST_VIDEO,
|
||||||
IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
|
IliasElementType.OPENCAST_VIDEO_PLAYER,
|
||||||
|
IliasElementType.OPENCAST_VIDEO_FOLDER,
|
||||||
|
IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
||||||
@ -403,10 +406,12 @@ instance's greatest bottleneck.
|
|||||||
return await self._handle_link(element, element_path)
|
return await self._handle_link(element, element_path)
|
||||||
elif element.type == IliasElementType.BOOKING:
|
elif element.type == IliasElementType.BOOKING:
|
||||||
return await self._handle_booking(element, element_path)
|
return await self._handle_booking(element, element_path)
|
||||||
elif element.type == IliasElementType.VIDEO:
|
elif element.type == IliasElementType.OPENCAST_VIDEO:
|
||||||
|
return await self._handle_file(element, element_path)
|
||||||
|
elif element.type == IliasElementType.OPENCAST_VIDEO_PLAYER:
|
||||||
|
return await self._handle_opencast_video(element, element_path)
|
||||||
|
elif element.type == IliasElementType.MEDIACAST_VIDEO:
|
||||||
return await self._handle_file(element, element_path)
|
return await self._handle_file(element, element_path)
|
||||||
elif element.type == IliasElementType.VIDEO_PLAYER:
|
|
||||||
return await self._handle_video(element, element_path)
|
|
||||||
elif element.type in _DIRECTORY_PAGES:
|
elif element.type in _DIRECTORY_PAGES:
|
||||||
return await self._handle_ilias_page(element.url, element, element_path)
|
return await self._handle_ilias_page(element.url, element, element_path)
|
||||||
else:
|
else:
|
||||||
@ -523,7 +528,7 @@ instance's greatest bottleneck.
|
|||||||
|
|
||||||
raise CrawlError("resolve_link_target failed even after authenticating")
|
raise CrawlError("resolve_link_target failed even after authenticating")
|
||||||
|
|
||||||
async def _handle_video(
|
async def _handle_opencast_video(
|
||||||
self,
|
self,
|
||||||
element: IliasPageElement,
|
element: IliasPageElement,
|
||||||
element_path: PurePath,
|
element_path: PurePath,
|
||||||
@ -544,18 +549,18 @@ instance's greatest bottleneck.
|
|||||||
|
|
||||||
# If we do not want to crawl it (user filter) or we have every file
|
# If we do not want to crawl it (user filter) or we have every file
|
||||||
# from the cached mapping already, we can ignore this and bail
|
# from the cached mapping already, we can ignore this and bail
|
||||||
if not maybe_dl or self._all_videos_locally_present(element_path):
|
if not maybe_dl or self._all_opencast_videos_locally_present(element_path):
|
||||||
# Mark all existing cideos as known so they do not get deleted
|
# Mark all existing cideos as known so they do not get deleted
|
||||||
# during dleanup. We "downloaded" them, just without actually making
|
# during dleanup. We "downloaded" them, just without actually making
|
||||||
# a network request as we assumed they did not change.
|
# a network request as we assumed they did not change.
|
||||||
for video in self._previous_contained_videos(element_path):
|
for video in self._previous_contained_opencast_videos(element_path):
|
||||||
await self.download(video)
|
await self.download(video)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return self._download_video(element_path, element, maybe_dl)
|
return self._download_opencast_video(element_path, element, maybe_dl)
|
||||||
|
|
||||||
def _previous_contained_videos(self, video_path: PurePath) -> List[PurePath]:
|
def _previous_contained_opencast_videos(self, video_path: PurePath) -> List[PurePath]:
|
||||||
if not self.prev_report:
|
if not self.prev_report:
|
||||||
return []
|
return []
|
||||||
custom_value = self.prev_report.get_custom_value(str(video_path))
|
custom_value = self.prev_report.get_custom_value(str(video_path))
|
||||||
@ -565,12 +570,12 @@ instance's greatest bottleneck.
|
|||||||
folder = video_path.parent
|
folder = video_path.parent
|
||||||
return [PurePath(folder, name) for name in names]
|
return [PurePath(folder, name) for name in names]
|
||||||
|
|
||||||
def _all_videos_locally_present(self, video_path: PurePath) -> bool:
|
def _all_opencast_videos_locally_present(self, video_path: PurePath) -> bool:
|
||||||
if contained_videos := self._previous_contained_videos(video_path):
|
if contained_videos := self._previous_contained_opencast_videos(video_path):
|
||||||
log.explain_topic(f"Checking local cache for video {video_path.name}")
|
log.explain_topic(f"Checking local cache for video {video_path.name}")
|
||||||
all_found_locally = True
|
all_found_locally = True
|
||||||
for video in contained_videos:
|
for video in contained_videos:
|
||||||
transformed_path = self._to_local_video_path(video)
|
transformed_path = self._to_local_opencast_video_path(video)
|
||||||
if transformed_path:
|
if transformed_path:
|
||||||
exists_locally = self._output_dir.resolve(transformed_path).exists()
|
exists_locally = self._output_dir.resolve(transformed_path).exists()
|
||||||
all_found_locally = all_found_locally and exists_locally
|
all_found_locally = all_found_locally and exists_locally
|
||||||
@ -580,14 +585,14 @@ instance's greatest bottleneck.
|
|||||||
log.explain("Missing at least one video, continuing with requests!")
|
log.explain("Missing at least one video, continuing with requests!")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _to_local_video_path(self, path: PurePath) -> Optional[PurePath]:
|
def _to_local_opencast_video_path(self, path: PurePath) -> Optional[PurePath]:
|
||||||
if transformed := self._transformer.transform(path):
|
if transformed := self._transformer.transform(path):
|
||||||
return self._deduplicator.fixup_path(transformed)
|
return self._deduplicator.fixup_path(transformed)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@anoncritical
|
@anoncritical
|
||||||
@_iorepeat(3, "downloading video")
|
@_iorepeat(3, "downloading video")
|
||||||
async def _download_video(
|
async def _download_opencast_video(
|
||||||
self,
|
self,
|
||||||
original_path: PurePath,
|
original_path: PurePath,
|
||||||
element: IliasPageElement,
|
element: IliasPageElement,
|
||||||
@ -604,7 +609,7 @@ instance's greatest bottleneck.
|
|||||||
log.explain(f"Using single video mode for {element.name}")
|
log.explain(f"Using single video mode for {element.name}")
|
||||||
stream_element = stream_elements[0]
|
stream_element = stream_elements[0]
|
||||||
|
|
||||||
transformed_path = self._to_local_video_path(original_path)
|
transformed_path = self._to_local_opencast_video_path(original_path)
|
||||||
if not transformed_path:
|
if not transformed_path:
|
||||||
raise CrawlError(f"Download returned a path but transform did not for {original_path}")
|
raise CrawlError(f"Download returned a path but transform did not for {original_path}")
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user