Fix tables with more columns than expected

This commit is contained in:
I-Al-Istannen 2022-01-18 22:32:43 +01:00
parent 86947e4874
commit 7872fe5221

View File

@ -280,11 +280,22 @@ class IliasPage:
def _listed_video_to_element(self, link: Tag) -> IliasPageElement: def _listed_video_to_element(self, link: Tag) -> IliasPageElement:
# The link is part of a table with multiple columns, describing metadata. # The link is part of a table with multiple columns, describing metadata.
# 6th child (1 indexed) is the modification time string # 6th or 7th child (1 indexed) is the modification time string. Try to find it
modification_string = link.parent.parent.parent.select_one( # by parsing backwards from the end and finding something that looks like a date
"td.std:nth-child(6)" modification_time = None
).getText().strip() row: Tag = link.parent.parent.parent
modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M") column_count = len(row.select("td.std"))
for index in range(column_count, 0, -1):
modification_string = link.parent.parent.parent.select_one(
f"td.std:nth-child({index})"
).getText().strip()
if re.search(r"\d+\.\d+.\d+ - \d+:\d+", modification_string):
modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
break
if modification_time is None:
log.warn(f"Could not determine upload time for {link}")
modification_time = datetime.now()
title = link.parent.parent.parent.select_one("td.std:nth-child(3)").getText().strip() title = link.parent.parent.parent.select_one("td.std:nth-child(3)").getText().strip()
title += ".mp4" title += ".mp4"