diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 61df57a..36da7d4 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -16,6 +16,7 @@ TargetType = Union[str, int]
class IliasElementType(Enum):
EXERCISE = "exercise"
+ EXERCISE_FILES = "exercise_files" # own submitted files
FILE = "file"
FOLDER = "folder"
FORUM = "forum"
@@ -197,6 +198,43 @@ class IliasPage:
return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
def _find_exercise_entries(self) -> List[IliasPageElement]:
+ if self._soup.find(id="tab_submission"):
+ log.explain("Found submission tab. This is an exercise detail page")
+ return self._find_exercise_entries_detail_page()
+ log.explain("Found no submission tab. This is an exercise root page")
+ return self._find_exercise_entries_root_page()
+
+ def _find_exercise_entries_detail_page(self) -> List[IliasPageElement]:
+ results: List[IliasPageElement] = []
+
+ # Find all download links in the container (this will contain all the files)
+ download_links: List[Tag] = self._soup.findAll(
+ name="a",
+ # download links contain the given command class
+ attrs={"href": lambda x: x and "cmd=download" in x},
+ text="Download"
+ )
+
+ for link in download_links:
+ parent_row: Tag = link.findParent("tr")
+ children: List[Tag] = parent_row.findChildren("td")
+
+ #
+ # 0 1 2 3 4
+ name = _sanitize_path_name(children[1].getText().strip())
+ date = demangle_date(children[3].getText().strip())
+
+ log.explain(f"Found exercise detail entry {name!r}")
+ results.append(IliasPageElement(
+ IliasElementType.FILE,
+ self._abs_url_from_link(link),
+ name,
+ date
+ ))
+
+ return results
+
+ def _find_exercise_entries_root_page(self) -> List[IliasPageElement]:
results: List[IliasPageElement] = []
# Each assignment is in an accordion container
@@ -205,6 +243,8 @@ class IliasPage:
for container in assignment_containers:
# Fetch the container name out of the header to use it in the path
container_name = container.select_one(".ilAssignmentHeader").getText().strip()
+ log.explain(f"Found exercise container {container_name!r}")
+
# Find all download links in the container (this will contain all the files)
files: List[Tag] = container.findAll(
name="a",
@@ -213,8 +253,6 @@ class IliasPage:
text="Download"
)
- log.explain(f"Found exercise container {container_name!r}")
-
# Grab each file as you now have the link
for file_link in files:
# Two divs, side by side. Left is the name, right is the link ==> get left
@@ -231,6 +269,25 @@ class IliasPage:
None # We do not have any timestamp
))
+ # Find all links to file listings (e.g. "Submitted Files" for groups)
+ file_listings: List[Tag] = container.findAll(
+ name="a",
+ # download links contain the given command class
+ attrs={"href": lambda x: x and "cmdClass=ilexsubmissionfilegui" in x}
+ )
+
+ # Add each listing as a new
+ for listing in file_listings:
+ file_name = _sanitize_path_name(listing.getText().strip())
+ url = self._abs_url_from_link(listing)
+ log.explain(f"Found exercise detail {file_name!r} at {url}")
+ results.append(IliasPageElement(
+ IliasElementType.EXERCISE_FILES,
+ url,
+ container_name + "/" + file_name,
+ None # we do not have any timestamp
+ ))
+
return results
def _find_normal_entries(self) -> List[IliasPageElement]:
@@ -349,7 +406,7 @@ class IliasPage:
if found_parent is None:
_unexpected_html_warning()
- log.warn_contd(f"Tried to figure out element type, but did not find an icon for {url!r}")
+ log.warn_contd(f"Tried to figure out element type, but did not find an icon for {url}")
return None
# Find the small descriptive icon to figure out the type
@@ -357,7 +414,7 @@ class IliasPage:
if img_tag is None:
_unexpected_html_warning()
- log.warn_contd(f"Tried to figure out element type, but did not find an image for {url!r}")
+ log.warn_contd(f"Tried to figure out element type, but did not find an image for {url}")
return None
if "opencast" in str(img_tag["alt"]).lower():
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index d488974..11b27d1 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -61,6 +61,7 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
_DIRECTORY_PAGES: Set[IliasElementType] = set([
IliasElementType.EXERCISE,
+ IliasElementType.EXERCISE_FILES,
IliasElementType.FOLDER,
IliasElementType.MEETING,
IliasElementType.VIDEO_FOLDER,