Compare commits

...

5 Commits

Author SHA1 Message Date
Joscha
259cfc20cc Bump version to 3.4.2 2022-10-26 18:26:17 +02:00
Joscha
37b51a66d8 Update changelog 2022-10-26 18:22:37 +02:00
I-Al-Istannen
f47d2f11d8 Append trailing slash to kit-ipd links to ensure urljoin works as expected 2022-10-25 20:28:22 +02:00
I-Al-Istannen
1b6be6bd79 Handle content pages in cards 2022-10-24 18:37:26 +02:00
I-Al-Istannen
e1430e6298 Handle (and ignore) surveys 2022-10-24 18:37:26 +02:00
5 changed files with 33 additions and 5 deletions

View File

@@ -22,9 +22,18 @@ ambiguous situations.
## Unreleased
## 3.4.2 - 2022-10-26
### Added
- Recognize and crawl content pages in cards
- Recognize and ignore surveys
### Fixed
- Forum crawling crashing when parsing empty (= 0 messages) threads
- Forum crawling crashing when a thread has no messages at all
- Forum crawling crashing when a forum has no threads at all
- Ilias login failing in some cases
- Crawling of paginated future meetings
- IPD crawler handling of URLs without trailing slash
## 3.4.1 - 2022-08-17

View File

@@ -24,6 +24,7 @@ class IliasElementType(Enum):
LINK = "link"
BOOKING = "booking"
MEETING = "meeting"
SURVEY = "survey"
VIDEO = "video"
VIDEO_PLAYER = "video_player"
VIDEO_FOLDER = "video_folder"
@@ -730,6 +731,10 @@ class IliasPage:
return IliasElementType.TEST
if "fold" in icon["class"]:
return IliasElementType.FOLDER
if "copa" in icon["class"]:
return IliasElementType.FOLDER
if "svy" in icon["class"]:
return IliasElementType.SURVEY
_unexpected_html_warning()
log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")

View File

@@ -377,9 +377,20 @@ instance's greatest bottleneck.
return None
return await self._handle_forum(element, element_path)
elif element.type == IliasElementType.TEST:
log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
log.explain("Tests contain no relevant files")
log.explain("Answer: No")
log.status(
"[bold bright_black]",
"Ignored",
fmt_path(element_path),
"[bright_black](tests contain no relevant data)"
)
return None
elif element.type == IliasElementType.SURVEY:
log.status(
"[bold bright_black]",
"Ignored",
fmt_path(element_path),
"[bright_black](surveys contain no relevant data)"
)
return None
elif element.type == IliasElementType.LINK:
return await self._handle_link(element, element_path)

View File

@@ -24,6 +24,9 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
if not target.startswith("https://"):
self.invalid_value("target", target, "Should be a URL")
if not target.endswith("/"):
target = target + "/"
return target
def link_regex(self) -> Pattern[str]:

View File

@@ -1,2 +1,2 @@
NAME = "PFERD"
VERSION = "3.4.1"
VERSION = "3.4.2"