diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index 866f7c0..54d56a0 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -48,6 +48,10 @@ class IliasPageElement: regexes = [ r"eid=(?P[0-9a-z\-]+)", r"file_(?P\d+)", + r"copa_(?P\d+)", + r"fold_(?P\d+)", + r"frm_(?P\d+)", + r"exc_(?P\d+)", r"ref_id=(?P\d+)", r"target=[a-z]+_(?P\d+)", r"mm_(?P\d+)" @@ -997,6 +1001,19 @@ class IliasPage: if "baseClass=ilSAHSPresentationGUI" in parsed_url.query: return IliasElementType.SCORM_LEARNING_MODULE + # other universities might have content type specified in URL path + if "_file_" in parsed_url.path: + return IliasElementType.FILE + + if "_fold_" in parsed_url.path or "_copa_" in parsed_url.path: + return IliasElementType.FOLDER + + if "_frm_" in parsed_url.path: + return IliasElementType.FORUM + + if "_exc_" in parsed_url.path: + return IliasElementType.EXERCISE + # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so # try to guess it from the image.