Add no-videos flag to ILIAS crawler

This commit is contained in:
I-Al-Istannen 2021-05-23 12:36:09 +02:00
parent 3d4b997d4a
commit ecdedfa1cf
2 changed files with 22 additions and 0 deletions

View File

@ -136,6 +136,7 @@ This crawler crawls the KIT ILIAS instance. It performs remote calls to a poor S
- `link_file_plain_text`: If this is set to true, PFERD will generate plain-text files containing only the link - `link_file_plain_text`: If this is set to true, PFERD will generate plain-text files containing only the link
target for external links. If this is false or not specified, PFERD will generate a neat, pretty and functional target for external links. If this is false or not specified, PFERD will generate a neat, pretty and functional
HTML page instead. HTML page instead.
- `no-videos`: If this is set to true, PFERD will not crawl or download any videos.
## Authenticator types ## Authenticator types
### The `simple` authenticator ### The `simple` authenticator

View File

@ -57,6 +57,9 @@ class KitIliasWebCrawlerSection(CrawlerSection):
def link_file_use_plaintext(self) -> bool: def link_file_use_plaintext(self) -> bool:
return self.s.getboolean("link_file_plain_text", fallback=False) return self.s.getboolean("link_file_plain_text", fallback=False)
def no_videos(self) -> bool:
return self.s.getboolean("no-videos", fallback=True)
_DIRECTORY_PAGES: Set[IliasElementType] = set([ _DIRECTORY_PAGES: Set[IliasElementType] = set([
IliasElementType.EXERCISE, IliasElementType.EXERCISE,
@ -66,6 +69,13 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
]) ])
_VIDEO_ELEMENTS: Set[IliasElementType] = set([
IliasElementType.VIDEO,
IliasElementType.VIDEO_PLAYER,
IliasElementType.VIDEO_FOLDER,
IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
])
AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]]) AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
@ -153,6 +163,7 @@ class KitIliasWebCrawler(HttpCrawler):
self._target = section.target() self._target = section.target()
self._link_file_redirect_delay = section.link_file_redirect_delay() self._link_file_redirect_delay = section.link_file_redirect_delay()
self._link_file_use_plaintext = section.link_file_use_plaintext() self._link_file_use_plaintext = section.link_file_use_plaintext()
self._no_videos = section.no_videos()
async def _run(self) -> None: async def _run(self) -> None:
if isinstance(self._target, int): if isinstance(self._target, int):
@ -240,6 +251,16 @@ class KitIliasWebCrawler(HttpCrawler):
async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None: async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
element_path = PurePath(parent_path, element.name) element_path = PurePath(parent_path, element.name)
if element.type in _VIDEO_ELEMENTS:
log.explain_topic(f"Decision: Crawl video element {fmt_path(element_path)}")
if self._no_videos:
log.explain("Video crawling is disabled")
log.explain("Answer: no")
return
else:
log.explain("Video crawling is enabled")
log.explain("Answer: yes")
if element.type == IliasElementType.FILE: if element.type == IliasElementType.FILE:
await self._download_file(element, element_path) await self._download_file(element, element_path)
elif element.type == IliasElementType.FORUM: elif element.type == IliasElementType.FORUM: