mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
WIP: ilias-crawler: Demangle dates
This commit is contained in:
parent
ac65b06a8e
commit
23db59e733
@ -16,6 +16,7 @@ import bs4
|
|||||||
from ..cookie_jar import CookieJar
|
from ..cookie_jar import CookieJar
|
||||||
from ..utils import soupify
|
from ..utils import soupify
|
||||||
from .authenticators import IliasAuthenticator
|
from .authenticators import IliasAuthenticator
|
||||||
|
from .date_demangler import demangle_date
|
||||||
from .downloader import IliasDownloadInfo
|
from .downloader import IliasDownloadInfo
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
@ -102,10 +103,17 @@ class IliasCrawler:
|
|||||||
).select_one(".il_ItemProperties")
|
).select_one(".il_ItemProperties")
|
||||||
file_type = properties_parent.select_one("span.il_ItemProperty").getText().strip()
|
file_type = properties_parent.select_one("span.il_ItemProperty").getText().strip()
|
||||||
|
|
||||||
modifcation_date = datetime.datetime.now()
|
|
||||||
all_properties_text = properties_parent.getText().strip()
|
all_properties_text = properties_parent.getText().strip()
|
||||||
print("Property text is", all_properties_text)
|
modification_date_match = re.search(
|
||||||
# todo demangle date from text above
|
r"(((\d+\. \w+ \d+)|(Gestern)|(Heute)), \d+:\d+)",
|
||||||
|
all_properties_text
|
||||||
|
)
|
||||||
|
if modification_date_match is None:
|
||||||
|
modification_date = datetime.datetime.now()
|
||||||
|
LOGGER.warning("Could not extract start date from %r", all_properties_text)
|
||||||
|
else:
|
||||||
|
modification_date_str = modification_date_match.group(1)
|
||||||
|
modification_date = demangle_date(modification_date_str)
|
||||||
|
|
||||||
name = link_element.getText()
|
name = link_element.getText()
|
||||||
full_path = Path(path, name + "." + file_type)
|
full_path = Path(path, name + "." + file_type)
|
||||||
@ -116,7 +124,7 @@ class IliasCrawler:
|
|||||||
LOGGER.warning("Could not download file %r", url)
|
LOGGER.warning("Could not download file %r", url)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
return [IliasDownloadInfo(full_path, url, modifcation_date)]
|
return [IliasDownloadInfo(full_path, url, modification_date)]
|
||||||
|
|
||||||
def _switch_on_folder_like(
|
def _switch_on_folder_like(
|
||||||
self,
|
self,
|
||||||
@ -184,6 +192,7 @@ class IliasCrawler:
|
|||||||
title = link.parent.parent.parent.select_one(
|
title = link.parent.parent.parent.select_one(
|
||||||
"td.std:nth-child(3)"
|
"td.std:nth-child(3)"
|
||||||
).getText().strip()
|
).getText().strip()
|
||||||
|
title += ".mp4"
|
||||||
|
|
||||||
video_page_soup = self._get_page(video_page_url, {})
|
video_page_soup = self._get_page(video_page_url, {})
|
||||||
regex: re.Pattern = re.compile(
|
regex: re.Pattern = re.compile(
|
||||||
@ -243,7 +252,7 @@ class IliasCrawler:
|
|||||||
def _is_logged_in(soup: bs4.BeautifulSoup) -> bool:
|
def _is_logged_in(soup: bs4.BeautifulSoup) -> bool:
|
||||||
userlog = soup.find("li", {"id": "userlog"})
|
userlog = soup.find("li", {"id": "userlog"})
|
||||||
if userlog is not None:
|
if userlog is not None:
|
||||||
print("Found userlog")
|
LOGGER.debug("Auth: Found #userlog")
|
||||||
return True
|
return True
|
||||||
video_table = soup.find(
|
video_table = soup.find(
|
||||||
recursive=True,
|
recursive=True,
|
||||||
@ -251,19 +260,17 @@ class IliasCrawler:
|
|||||||
attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
|
attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
|
||||||
)
|
)
|
||||||
if video_table is not None:
|
if video_table is not None:
|
||||||
print("Found video")
|
LOGGER.debug("Auth: Found #tbl_xoct.+")
|
||||||
return True
|
return True
|
||||||
if soup.select_one("#playerContainer") is not None:
|
if soup.select_one("#playerContainer") is not None:
|
||||||
print("Found player")
|
LOGGER.debug("Auth: Found #playerContainer")
|
||||||
return True
|
return True
|
||||||
print("Ooops: ", soup)
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def run_as_test(ilias_url: str, course_id: int) -> List[IliasDownloadInfo]:
|
def run_as_test(ilias_url: str, course_id: int) -> List[IliasDownloadInfo]:
|
||||||
from ..organizer import Organizer
|
from ..organizer import Organizer
|
||||||
from .authenticators import KitShibbolethAuthenticator
|
from .authenticators import KitShibbolethAuthenticator
|
||||||
organizer = Organizer(Path("/tmp/test/inner"))
|
|
||||||
|
|
||||||
crawler = IliasCrawler(KitShibbolethAuthenticator(), ilias_url, str(course_id))
|
crawler = IliasCrawler(KitShibbolethAuthenticator(), ilias_url, str(course_id))
|
||||||
return crawler.crawl()
|
return crawler.crawl()
|
||||||
|
24
PFERD/ilias/date_demangler.py
Normal file
24
PFERD/ilias/date_demangler.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
"""
|
||||||
|
Helper methods to demangle an ILIAS date.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def demangle_date(date: str) -> datetime.datetime:
|
||||||
|
"""
|
||||||
|
Demangle a given date in one of the following formats:
|
||||||
|
"Gestern, HH:MM"
|
||||||
|
"Heute, HH:MM"
|
||||||
|
"dd. mon.yyyy, HH:MM
|
||||||
|
"""
|
||||||
|
date = re.sub(r"\s+", " ", date)
|
||||||
|
date = date.replace("Gestern", _yesterday().strftime("%d. %b %Y"))
|
||||||
|
date = date.replace("Heute", datetime.date.today().strftime("%d. %b %Y"))
|
||||||
|
|
||||||
|
return datetime.datetime.strptime(date, "%d. %b %Y, %H:%M")
|
||||||
|
|
||||||
|
|
||||||
|
def _yesterday() -> datetime.date:
|
||||||
|
return datetime.date.today() - datetime.timedelta(days=1)
|
Loading…
Reference in New Issue
Block a user