Compare commits

...

5 Commits

5 changed files with 33 additions and 5 deletions

View File

@ -22,9 +22,18 @@ ambiguous situations.
## Unreleased ## Unreleased
## 3.4.2 - 2022-10-26
### Added
- Recognize and crawl content pages in cards
- Recognize and ignore surveys
### Fixed ### Fixed
- Forum crawling crashing when parsing empty (= 0 messages) threads - Forum crawling crashing when a thread has no messages at all
- Forum crawling crashing when a forum has no threads at all - Forum crawling crashing when a forum has no threads at all
- Ilias login failing in some cases
- Crawling of paginated future meetings
- IPD crawler handling of URLs without trailing slash
## 3.4.1 - 2022-08-17 ## 3.4.1 - 2022-08-17

View File

@ -24,6 +24,7 @@ class IliasElementType(Enum):
LINK = "link" LINK = "link"
BOOKING = "booking" BOOKING = "booking"
MEETING = "meeting" MEETING = "meeting"
SURVEY = "survey"
VIDEO = "video" VIDEO = "video"
VIDEO_PLAYER = "video_player" VIDEO_PLAYER = "video_player"
VIDEO_FOLDER = "video_folder" VIDEO_FOLDER = "video_folder"
@ -730,6 +731,10 @@ class IliasPage:
return IliasElementType.TEST return IliasElementType.TEST
if "fold" in icon["class"]: if "fold" in icon["class"]:
return IliasElementType.FOLDER return IliasElementType.FOLDER
if "copa" in icon["class"]:
return IliasElementType.FOLDER
if "svy" in icon["class"]:
return IliasElementType.SURVEY
_unexpected_html_warning() _unexpected_html_warning()
log.warn_contd(f"Could not extract type from {icon} for card title {card_title}") log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")

View File

@ -377,9 +377,20 @@ instance's greatest bottleneck.
return None return None
return await self._handle_forum(element, element_path) return await self._handle_forum(element, element_path)
elif element.type == IliasElementType.TEST: elif element.type == IliasElementType.TEST:
log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}") log.status(
log.explain("Tests contain no relevant files") "[bold bright_black]",
log.explain("Answer: No") "Ignored",
fmt_path(element_path),
"[bright_black](tests contain no relevant data)"
)
return None
elif element.type == IliasElementType.SURVEY:
log.status(
"[bold bright_black]",
"Ignored",
fmt_path(element_path),
"[bright_black](surveys contain no relevant data)"
)
return None return None
elif element.type == IliasElementType.LINK: elif element.type == IliasElementType.LINK:
return await self._handle_link(element, element_path) return await self._handle_link(element, element_path)

View File

@ -24,6 +24,9 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
if not target.startswith("https://"): if not target.startswith("https://"):
self.invalid_value("target", target, "Should be a URL") self.invalid_value("target", target, "Should be a URL")
if not target.endswith("/"):
target = target + "/"
return target return target
def link_regex(self) -> Pattern[str]: def link_regex(self) -> Pattern[str]:

View File

@ -1,2 +1,2 @@
NAME = "PFERD" NAME = "PFERD"
VERSION = "3.4.1" VERSION = "3.4.2"