From b01f0934749ba613881446dfa0b41ebf803c3204 Mon Sep 17 00:00:00 2001 From: Philipp Fruck Date: Mon, 8 Apr 2024 11:55:48 +0200 Subject: [PATCH] fix: Element detection for other universities Other universities might use other URL schemes for different element types --- PFERD/crawl/ilias/kit_ilias_html.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index 866f7c0..54d56a0 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -48,6 +48,10 @@ class IliasPageElement: regexes = [ r"eid=(?P[0-9a-z\-]+)", r"file_(?P\d+)", + r"copa_(?P\d+)", + r"fold_(?P\d+)", + r"frm_(?P\d+)", + r"exc_(?P\d+)", r"ref_id=(?P\d+)", r"target=[a-z]+_(?P\d+)", r"mm_(?P\d+)" @@ -997,6 +1001,19 @@ class IliasPage: if "baseClass=ilSAHSPresentationGUI" in parsed_url.query: return IliasElementType.SCORM_LEARNING_MODULE + # other universities might have content type specified in URL path + if "_file_" in parsed_url.path: + return IliasElementType.FILE + + if "_fold_" in parsed_url.path or "_copa_" in parsed_url.path: + return IliasElementType.FOLDER + + if "_frm_" in parsed_url.path: + return IliasElementType.FORUM + + if "_exc_" in parsed_url.path: + return IliasElementType.EXERCISE + # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so # try to guess it from the image.