mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Handle exercise detail containers in ILIAS html parser
This commit is contained in:
parent
d44f6966c2
commit
342076ee0e
@ -16,6 +16,7 @@ TargetType = Union[str, int]
|
|||||||
|
|
||||||
class IliasElementType(Enum):
|
class IliasElementType(Enum):
|
||||||
EXERCISE = "exercise"
|
EXERCISE = "exercise"
|
||||||
|
EXERCISE_FILES = "exercise_files" # own submitted files
|
||||||
FILE = "file"
|
FILE = "file"
|
||||||
FOLDER = "folder"
|
FOLDER = "folder"
|
||||||
FORUM = "forum"
|
FORUM = "forum"
|
||||||
@ -197,6 +198,43 @@ class IliasPage:
|
|||||||
return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
|
return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
|
||||||
|
|
||||||
def _find_exercise_entries(self) -> List[IliasPageElement]:
|
def _find_exercise_entries(self) -> List[IliasPageElement]:
|
||||||
|
if self._soup.find(id="tab_submission"):
|
||||||
|
log.explain("Found submission tab. This is an exercise detail page")
|
||||||
|
return self._find_exercise_entries_detail_page()
|
||||||
|
log.explain("Found no submission tab. This is an exercise root page")
|
||||||
|
return self._find_exercise_entries_root_page()
|
||||||
|
|
||||||
|
def _find_exercise_entries_detail_page(self) -> List[IliasPageElement]:
|
||||||
|
results: List[IliasPageElement] = []
|
||||||
|
|
||||||
|
# Find all download links in the container (this will contain all the files)
|
||||||
|
download_links: List[Tag] = self._soup.findAll(
|
||||||
|
name="a",
|
||||||
|
# download links contain the given command class
|
||||||
|
attrs={"href": lambda x: x and "cmd=download" in x},
|
||||||
|
text="Download"
|
||||||
|
)
|
||||||
|
|
||||||
|
for link in download_links:
|
||||||
|
parent_row: Tag = link.findParent("tr")
|
||||||
|
children: List[Tag] = parent_row.findChildren("td")
|
||||||
|
|
||||||
|
# <checkbox> <name> <uploader> <date> <download>
|
||||||
|
# 0 1 2 3 4
|
||||||
|
name = _sanitize_path_name(children[1].getText().strip())
|
||||||
|
date = demangle_date(children[3].getText().strip())
|
||||||
|
|
||||||
|
log.explain(f"Found exercise detail entry {name!r}")
|
||||||
|
results.append(IliasPageElement(
|
||||||
|
IliasElementType.FILE,
|
||||||
|
self._abs_url_from_link(link),
|
||||||
|
name,
|
||||||
|
date
|
||||||
|
))
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def _find_exercise_entries_root_page(self) -> List[IliasPageElement]:
|
||||||
results: List[IliasPageElement] = []
|
results: List[IliasPageElement] = []
|
||||||
|
|
||||||
# Each assignment is in an accordion container
|
# Each assignment is in an accordion container
|
||||||
@ -205,6 +243,8 @@ class IliasPage:
|
|||||||
for container in assignment_containers:
|
for container in assignment_containers:
|
||||||
# Fetch the container name out of the header to use it in the path
|
# Fetch the container name out of the header to use it in the path
|
||||||
container_name = container.select_one(".ilAssignmentHeader").getText().strip()
|
container_name = container.select_one(".ilAssignmentHeader").getText().strip()
|
||||||
|
log.explain(f"Found exercise container {container_name!r}")
|
||||||
|
|
||||||
# Find all download links in the container (this will contain all the files)
|
# Find all download links in the container (this will contain all the files)
|
||||||
files: List[Tag] = container.findAll(
|
files: List[Tag] = container.findAll(
|
||||||
name="a",
|
name="a",
|
||||||
@ -213,8 +253,6 @@ class IliasPage:
|
|||||||
text="Download"
|
text="Download"
|
||||||
)
|
)
|
||||||
|
|
||||||
log.explain(f"Found exercise container {container_name!r}")
|
|
||||||
|
|
||||||
# Grab each file as you now have the link
|
# Grab each file as you now have the link
|
||||||
for file_link in files:
|
for file_link in files:
|
||||||
# Two divs, side by side. Left is the name, right is the link ==> get left
|
# Two divs, side by side. Left is the name, right is the link ==> get left
|
||||||
@ -231,6 +269,25 @@ class IliasPage:
|
|||||||
None # We do not have any timestamp
|
None # We do not have any timestamp
|
||||||
))
|
))
|
||||||
|
|
||||||
|
# Find all links to file listings (e.g. "Submitted Files" for groups)
|
||||||
|
file_listings: List[Tag] = container.findAll(
|
||||||
|
name="a",
|
||||||
|
# download links contain the given command class
|
||||||
|
attrs={"href": lambda x: x and "cmdClass=ilexsubmissionfilegui" in x}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add each listing as a new
|
||||||
|
for listing in file_listings:
|
||||||
|
file_name = _sanitize_path_name(listing.getText().strip())
|
||||||
|
url = self._abs_url_from_link(listing)
|
||||||
|
log.explain(f"Found exercise detail {file_name!r} at {url}")
|
||||||
|
results.append(IliasPageElement(
|
||||||
|
IliasElementType.EXERCISE_FILES,
|
||||||
|
url,
|
||||||
|
container_name + "/" + file_name,
|
||||||
|
None # we do not have any timestamp
|
||||||
|
))
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def _find_normal_entries(self) -> List[IliasPageElement]:
|
def _find_normal_entries(self) -> List[IliasPageElement]:
|
||||||
@ -349,7 +406,7 @@ class IliasPage:
|
|||||||
|
|
||||||
if found_parent is None:
|
if found_parent is None:
|
||||||
_unexpected_html_warning()
|
_unexpected_html_warning()
|
||||||
log.warn_contd(f"Tried to figure out element type, but did not find an icon for {url!r}")
|
log.warn_contd(f"Tried to figure out element type, but did not find an icon for {url}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Find the small descriptive icon to figure out the type
|
# Find the small descriptive icon to figure out the type
|
||||||
@ -357,7 +414,7 @@ class IliasPage:
|
|||||||
|
|
||||||
if img_tag is None:
|
if img_tag is None:
|
||||||
_unexpected_html_warning()
|
_unexpected_html_warning()
|
||||||
log.warn_contd(f"Tried to figure out element type, but did not find an image for {url!r}")
|
log.warn_contd(f"Tried to figure out element type, but did not find an image for {url}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if "opencast" in str(img_tag["alt"]).lower():
|
if "opencast" in str(img_tag["alt"]).lower():
|
||||||
|
@ -61,6 +61,7 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
|
|||||||
|
|
||||||
_DIRECTORY_PAGES: Set[IliasElementType] = set([
|
_DIRECTORY_PAGES: Set[IliasElementType] = set([
|
||||||
IliasElementType.EXERCISE,
|
IliasElementType.EXERCISE,
|
||||||
|
IliasElementType.EXERCISE_FILES,
|
||||||
IliasElementType.FOLDER,
|
IliasElementType.FOLDER,
|
||||||
IliasElementType.MEETING,
|
IliasElementType.MEETING,
|
||||||
IliasElementType.VIDEO_FOLDER,
|
IliasElementType.VIDEO_FOLDER,
|
||||||
|
Loading…
Reference in New Issue
Block a user