diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index c0ebdc9..c4a0b46 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -43,6 +43,8 @@ class IliasPageElement: regexes = [ r"eid=(?P[0-9a-z\-]+)", r"file_(?P\d+)", + r"fold_(?P\d+)", + r"frm_(?P\d+)", r"ref_id=(?P\d+)", r"target=[a-z]+_(?P\d+)" ] @@ -773,6 +775,16 @@ class IliasPage: if "cmdClass=ilobjtestgui" in parsed_url.query: return IliasElementType.TEST + # other universities might have content type specified in URL path + if "_file_" in parsed_url.path: + return IliasElementType.FILE + + if "_fold_" in parsed_url.path: + return IliasElementType.FOLDER + + if "_frm_" in parsed_url.path: + return IliasElementType.FORUM + # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so # try to guess it from the image.