mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
WIP: ilias crawler: Also crawl assignments
This commit is contained in:
parent
23db59e733
commit
eb7df036df
@ -78,6 +78,7 @@ class IliasCrawler:
|
|||||||
LOGGER.debug("Parsed url: %r", parsed_url)
|
LOGGER.debug("Parsed url: %r", parsed_url)
|
||||||
|
|
||||||
if "target=file_" in parsed_url.query:
|
if "target=file_" in parsed_url.query:
|
||||||
|
LOGGER.debug("Interpreted as file.")
|
||||||
return self._crawl_file(path, link_element, url)
|
return self._crawl_file(path, link_element, url)
|
||||||
|
|
||||||
# Skip forums
|
# Skip forums
|
||||||
@ -153,14 +154,18 @@ class IliasCrawler:
|
|||||||
LOGGER.debug("Skipping forum at %r", url)
|
LOGGER.debug("Skipping forum at %r", url)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
element_path = Path(path, link_element.getText().strip())
|
||||||
|
|
||||||
|
if str(img_tag["src"]).endswith("icon_exc.svg"):
|
||||||
|
LOGGER.debug("Crawling exercises at %r", url)
|
||||||
|
return self._crawl_exercises(element_path, url)
|
||||||
|
|
||||||
if "opencast" in str(img_tag["alt"]).lower():
|
if "opencast" in str(img_tag["alt"]).lower():
|
||||||
LOGGER.debug("Found video site: %r", url)
|
LOGGER.debug("Found video site: %r", url)
|
||||||
return self._crawl_video_directory(path, url)
|
return self._crawl_video_directory(element_path, url)
|
||||||
|
|
||||||
# Assume it is a folder
|
# Assume it is a folder
|
||||||
folder_name = link_element.getText()
|
return self._crawl_folder(element_path, self._abs_url_from_link(link_element))
|
||||||
folder_path = Path(path, folder_name)
|
|
||||||
return self._crawl_folder(folder_path, self._abs_url_from_link(link_element))
|
|
||||||
|
|
||||||
def _crawl_video_directory(self, path: Path, url: str) -> List[IliasDownloadInfo]:
|
def _crawl_video_directory(self, path: Path, url: str) -> List[IliasDownloadInfo]:
|
||||||
initial_soup = self._get_page(url, {})
|
initial_soup = self._get_page(url, {})
|
||||||
@ -210,6 +215,43 @@ class IliasCrawler:
|
|||||||
|
|
||||||
return [IliasDownloadInfo(Path(path, title), video_url, modification_time)]
|
return [IliasDownloadInfo(Path(path, title), video_url, modification_time)]
|
||||||
|
|
||||||
|
def _crawl_exercises(self, element_path: Path, url: str) -> List[IliasDownloadInfo]:
|
||||||
|
soup = self._get_page(url, {})
|
||||||
|
|
||||||
|
results: List[IliasDownloadInfo] = []
|
||||||
|
|
||||||
|
assignment_containers: List[bs4.Tag] = soup.select(".il_VAccordionInnerContainer")
|
||||||
|
|
||||||
|
for container in assignment_containers:
|
||||||
|
container_name = container.select_one(".ilAssignmentHeader").getText().strip()
|
||||||
|
files: List[bs4.Tag] = container.findAll(
|
||||||
|
name="a",
|
||||||
|
attrs={"href": lambda x: x and "cmdClass=ilexsubmissiongui" in x},
|
||||||
|
text="Download"
|
||||||
|
)
|
||||||
|
|
||||||
|
LOGGER.debug("Found exercise container %r", container_name)
|
||||||
|
|
||||||
|
end_date: datetime.datetime = datetime.datetime.now()
|
||||||
|
end_date_header: bs4.Tag = container.find(name="div", text="Abgabetermin")
|
||||||
|
if end_date_header is not None:
|
||||||
|
end_date_text = end_date_header.findNext("div").getText().strip()
|
||||||
|
end_date = demangle_date(end_date_text)
|
||||||
|
|
||||||
|
for file_link in files:
|
||||||
|
file_name = file_link.parent.findPrevious(name="div").getText().strip()
|
||||||
|
url = self._abs_url_from_link(file_link)
|
||||||
|
|
||||||
|
LOGGER.debug("Found file %r at %r", file_name, url)
|
||||||
|
|
||||||
|
results.append(IliasDownloadInfo(
|
||||||
|
Path(element_path, container_name, file_name),
|
||||||
|
url,
|
||||||
|
end_date
|
||||||
|
))
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
def _crawl_folder(self, path: Path, url: str) -> List[IliasDownloadInfo]:
|
def _crawl_folder(self, path: Path, url: str) -> List[IliasDownloadInfo]:
|
||||||
soup = self._get_page(url, {})
|
soup = self._get_page(url, {})
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ Helper methods to demangle an ILIAS date.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
import locale
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
@ -13,11 +14,16 @@ def demangle_date(date: str) -> datetime.datetime:
|
|||||||
"Heute, HH:MM"
|
"Heute, HH:MM"
|
||||||
"dd. mon.yyyy, HH:MM
|
"dd. mon.yyyy, HH:MM
|
||||||
"""
|
"""
|
||||||
|
saved = locale.setlocale(locale.LC_ALL)
|
||||||
|
try:
|
||||||
|
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
|
||||||
|
|
||||||
date = re.sub(r"\s+", " ", date)
|
date = re.sub(r"\s+", " ", date)
|
||||||
date = date.replace("Gestern", _yesterday().strftime("%d. %b %Y"))
|
date = date.replace("Gestern", _yesterday().strftime("%d. %b %Y"))
|
||||||
date = date.replace("Heute", datetime.date.today().strftime("%d. %b %Y"))
|
date = date.replace("Heute", datetime.date.today().strftime("%d. %b %Y"))
|
||||||
|
|
||||||
return datetime.datetime.strptime(date, "%d. %b %Y, %H:%M")
|
return datetime.datetime.strptime(date, "%d. %b %Y, %H:%M")
|
||||||
|
finally:
|
||||||
|
locale.setlocale(locale.LC_ALL, saved)
|
||||||
|
|
||||||
|
|
||||||
def _yesterday() -> datetime.date:
|
def _yesterday() -> datetime.date:
|
||||||
|
Loading…
Reference in New Issue
Block a user