mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Add no-videos flag to ILIAS crawler
This commit is contained in:
parent
3d4b997d4a
commit
ecdedfa1cf
@ -136,6 +136,7 @@ This crawler crawls the KIT ILIAS instance. It performs remote calls to a poor S
|
|||||||
- `link_file_plain_text`: If this is set to true, PFERD will generate plain-text files containing only the link
|
- `link_file_plain_text`: If this is set to true, PFERD will generate plain-text files containing only the link
|
||||||
target for external links. If this is false or not specified, PFERD will generate a neat, pretty and functional
|
target for external links. If this is false or not specified, PFERD will generate a neat, pretty and functional
|
||||||
HTML page instead.
|
HTML page instead.
|
||||||
|
- `no-videos`: If this is set to true, PFERD will not crawl or download any videos.
|
||||||
## Authenticator types
|
## Authenticator types
|
||||||
|
|
||||||
### The `simple` authenticator
|
### The `simple` authenticator
|
||||||
|
@ -57,6 +57,9 @@ class KitIliasWebCrawlerSection(CrawlerSection):
|
|||||||
def link_file_use_plaintext(self) -> bool:
|
def link_file_use_plaintext(self) -> bool:
|
||||||
return self.s.getboolean("link_file_plain_text", fallback=False)
|
return self.s.getboolean("link_file_plain_text", fallback=False)
|
||||||
|
|
||||||
|
def no_videos(self) -> bool:
|
||||||
|
return self.s.getboolean("no-videos", fallback=True)
|
||||||
|
|
||||||
|
|
||||||
_DIRECTORY_PAGES: Set[IliasElementType] = set([
|
_DIRECTORY_PAGES: Set[IliasElementType] = set([
|
||||||
IliasElementType.EXERCISE,
|
IliasElementType.EXERCISE,
|
||||||
@ -66,6 +69,13 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
|
|||||||
IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
|
IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
|
||||||
])
|
])
|
||||||
|
|
||||||
|
_VIDEO_ELEMENTS: Set[IliasElementType] = set([
|
||||||
|
IliasElementType.VIDEO,
|
||||||
|
IliasElementType.VIDEO_PLAYER,
|
||||||
|
IliasElementType.VIDEO_FOLDER,
|
||||||
|
IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
|
||||||
|
])
|
||||||
|
|
||||||
AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
|
AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
|
||||||
|
|
||||||
|
|
||||||
@ -153,6 +163,7 @@ class KitIliasWebCrawler(HttpCrawler):
|
|||||||
self._target = section.target()
|
self._target = section.target()
|
||||||
self._link_file_redirect_delay = section.link_file_redirect_delay()
|
self._link_file_redirect_delay = section.link_file_redirect_delay()
|
||||||
self._link_file_use_plaintext = section.link_file_use_plaintext()
|
self._link_file_use_plaintext = section.link_file_use_plaintext()
|
||||||
|
self._no_videos = section.no_videos()
|
||||||
|
|
||||||
async def _run(self) -> None:
|
async def _run(self) -> None:
|
||||||
if isinstance(self._target, int):
|
if isinstance(self._target, int):
|
||||||
@ -240,6 +251,16 @@ class KitIliasWebCrawler(HttpCrawler):
|
|||||||
async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
|
async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
|
||||||
element_path = PurePath(parent_path, element.name)
|
element_path = PurePath(parent_path, element.name)
|
||||||
|
|
||||||
|
if element.type in _VIDEO_ELEMENTS:
|
||||||
|
log.explain_topic(f"Decision: Crawl video element {fmt_path(element_path)}")
|
||||||
|
if self._no_videos:
|
||||||
|
log.explain("Video crawling is disabled")
|
||||||
|
log.explain("Answer: no")
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
log.explain("Video crawling is enabled")
|
||||||
|
log.explain("Answer: yes")
|
||||||
|
|
||||||
if element.type == IliasElementType.FILE:
|
if element.type == IliasElementType.FILE:
|
||||||
await self._download_file(element, element_path)
|
await self._download_file(element, element_path)
|
||||||
elif element.type == IliasElementType.FORUM:
|
elif element.type == IliasElementType.FORUM:
|
||||||
|
Loading…
Reference in New Issue
Block a user