mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Add no-videos flag to ILIAS crawler
This commit is contained in:
parent
3d4b997d4a
commit
ecdedfa1cf
@ -136,6 +136,7 @@ This crawler crawls the KIT ILIAS instance. It performs remote calls to a poor S
|
||||
- `link_file_plain_text`: If this is set to true, PFERD will generate plain-text files containing only the link
|
||||
target for external links. If this is false or not specified, PFERD will generate a neat, pretty and functional
|
||||
HTML page instead.
|
||||
- `no-videos`: If this is set to true, PFERD will not crawl or download any videos.
|
||||
## Authenticator types
|
||||
|
||||
### The `simple` authenticator
|
||||
|
@ -57,6 +57,9 @@ class KitIliasWebCrawlerSection(CrawlerSection):
|
||||
def link_file_use_plaintext(self) -> bool:
|
||||
return self.s.getboolean("link_file_plain_text", fallback=False)
|
||||
|
||||
def no_videos(self) -> bool:
|
||||
return self.s.getboolean("no-videos", fallback=True)
|
||||
|
||||
|
||||
_DIRECTORY_PAGES: Set[IliasElementType] = set([
|
||||
IliasElementType.EXERCISE,
|
||||
@ -66,6 +69,13 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
|
||||
IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
|
||||
])
|
||||
|
||||
_VIDEO_ELEMENTS: Set[IliasElementType] = set([
|
||||
IliasElementType.VIDEO,
|
||||
IliasElementType.VIDEO_PLAYER,
|
||||
IliasElementType.VIDEO_FOLDER,
|
||||
IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
|
||||
])
|
||||
|
||||
AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
|
||||
|
||||
|
||||
@ -153,6 +163,7 @@ class KitIliasWebCrawler(HttpCrawler):
|
||||
self._target = section.target()
|
||||
self._link_file_redirect_delay = section.link_file_redirect_delay()
|
||||
self._link_file_use_plaintext = section.link_file_use_plaintext()
|
||||
self._no_videos = section.no_videos()
|
||||
|
||||
async def _run(self) -> None:
|
||||
if isinstance(self._target, int):
|
||||
@ -240,6 +251,16 @@ class KitIliasWebCrawler(HttpCrawler):
|
||||
async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
|
||||
element_path = PurePath(parent_path, element.name)
|
||||
|
||||
if element.type in _VIDEO_ELEMENTS:
|
||||
log.explain_topic(f"Decision: Crawl video element {fmt_path(element_path)}")
|
||||
if self._no_videos:
|
||||
log.explain("Video crawling is disabled")
|
||||
log.explain("Answer: no")
|
||||
return
|
||||
else:
|
||||
log.explain("Video crawling is enabled")
|
||||
log.explain("Answer: yes")
|
||||
|
||||
if element.type == IliasElementType.FILE:
|
||||
await self._download_file(element, element_path)
|
||||
elif element.type == IliasElementType.FORUM:
|
||||
|
Loading…
x
Reference in New Issue
Block a user