From 723a82f61786735f236baaa75c70d045b1bd6c8f Mon Sep 17 00:00:00 2001 From: Philipp Fruck Date: Thu, 23 Mar 2023 17:09:29 +0100 Subject: [PATCH] improve compatibility with other ilias instances Running against a custom Ilias instance files, folders and forms were not detected by the current implementation since the given Ilias instance uses URL paths instead of a target parameter --- PFERD/crawl/ilias/kit_ilias_html.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index c0ebdc9..c4a0b46 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -43,6 +43,8 @@ class IliasPageElement: regexes = [ r"eid=(?P[0-9a-z\-]+)", r"file_(?P\d+)", + r"fold_(?P\d+)", + r"frm_(?P\d+)", r"ref_id=(?P\d+)", r"target=[a-z]+_(?P\d+)" ] @@ -773,6 +775,16 @@ class IliasPage: if "cmdClass=ilobjtestgui" in parsed_url.query: return IliasElementType.TEST + # other universities might have content type specified in URL path + if "_file_" in parsed_url.path: + return IliasElementType.FILE + + if "_fold_" in parsed_url.path: + return IliasElementType.FOLDER + + if "_frm_" in parsed_url.path: + return IliasElementType.FORUM + # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so # try to guess it from the image.