mirror of
https://github.com/Garmelon/PFERD.git
synced 2025-07-15 07:32:31 +02:00
Compare commits
4 Commits
42098dc3a5
...
v3.5.1
Author | SHA1 | Date | |
---|---|---|---|
![]() |
da627ff929 | ||
![]() |
c1b592ac29 | ||
![]() |
eb0c956d32 | ||
![]() |
ab0cb2d956 |
@@ -22,6 +22,11 @@ ambiguous situations.
|
|||||||
|
|
||||||
## Unreleased
|
## Unreleased
|
||||||
|
|
||||||
|
## 3.5.1 - 2024-04-09
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Support for ILIAS 8
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- Video name deduplication
|
- Video name deduplication
|
||||||
|
|
||||||
|
@@ -17,7 +17,7 @@ TargetType = Union[str, int]
|
|||||||
class IliasElementType(Enum):
|
class IliasElementType(Enum):
|
||||||
EXERCISE = "exercise"
|
EXERCISE = "exercise"
|
||||||
EXERCISE_FILES = "exercise_files" # own submitted files
|
EXERCISE_FILES = "exercise_files" # own submitted files
|
||||||
TEST = "test" # an online test. Will be ignored currently.
|
TEST = "test" # an online test. Will be ignored currently.
|
||||||
FILE = "file"
|
FILE = "file"
|
||||||
FOLDER = "folder"
|
FOLDER = "folder"
|
||||||
FORUM = "forum"
|
FORUM = "forum"
|
||||||
@@ -95,13 +95,9 @@ class IliasPage:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_root_page(soup: BeautifulSoup) -> bool:
|
def is_root_page(soup: BeautifulSoup) -> bool:
|
||||||
permalink = soup.find(id="current_perma_link")
|
if permalink := IliasPage.get_soup_permalink(soup):
|
||||||
if permalink is None:
|
return "goto.php?target=root_" in permalink
|
||||||
return False
|
return False
|
||||||
value = permalink.attrs.get("value")
|
|
||||||
if value is None:
|
|
||||||
return False
|
|
||||||
return "goto.php?target=root_" in value
|
|
||||||
|
|
||||||
def get_child_elements(self) -> List[IliasPageElement]:
|
def get_child_elements(self) -> List[IliasPageElement]:
|
||||||
"""
|
"""
|
||||||
@@ -279,16 +275,14 @@ class IliasPage:
|
|||||||
return self._soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x})
|
return self._soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x})
|
||||||
|
|
||||||
def _is_content_page(self) -> bool:
|
def _is_content_page(self) -> bool:
|
||||||
link = self._soup.find(id="current_perma_link")
|
if link := self.get_permalink():
|
||||||
if not link:
|
return "target=copa_" in link
|
||||||
return False
|
return False
|
||||||
return "target=copa_" in link.get("value")
|
|
||||||
|
|
||||||
def _is_learning_module_page(self) -> bool:
|
def _is_learning_module_page(self) -> bool:
|
||||||
link = self._soup.find(id="current_perma_link")
|
if link := self.get_permalink():
|
||||||
if not link:
|
return "target=pg_" in link
|
||||||
return False
|
return False
|
||||||
return "target=pg_" in link.get("value")
|
|
||||||
|
|
||||||
def _contains_collapsed_future_meetings(self) -> bool:
|
def _contains_collapsed_future_meetings(self) -> bool:
|
||||||
return self._uncollapse_future_meetings_url() is not None
|
return self._uncollapse_future_meetings_url() is not None
|
||||||
@@ -513,8 +507,8 @@ class IliasPage:
|
|||||||
modification_string = link.parent.parent.parent.select_one(
|
modification_string = link.parent.parent.parent.select_one(
|
||||||
f"td.std:nth-child({index})"
|
f"td.std:nth-child({index})"
|
||||||
).getText().strip()
|
).getText().strip()
|
||||||
if re.search(r"\d+\.\d+.\d+ - \d+:\d+", modification_string):
|
if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
|
||||||
modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
|
modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
|
||||||
break
|
break
|
||||||
|
|
||||||
if modification_time is None:
|
if modification_time is None:
|
||||||
@@ -613,7 +607,7 @@ class IliasPage:
|
|||||||
file_listings: List[Tag] = container.findAll(
|
file_listings: List[Tag] = container.findAll(
|
||||||
name="a",
|
name="a",
|
||||||
# download links contain the given command class
|
# download links contain the given command class
|
||||||
attrs={"href": lambda x: x and "cmdClass=ilexsubmissionfilegui" in x}
|
attrs={"href": lambda x: x and "cmdclass=ilexsubmissionfilegui" in x.lower()}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add each listing as a new
|
# Add each listing as a new
|
||||||
@@ -917,9 +911,9 @@ class IliasPage:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _find_type_from_link(
|
def _find_type_from_link(
|
||||||
element_name: str,
|
element_name: str,
|
||||||
link_element: Tag,
|
link_element: Tag,
|
||||||
url: str
|
url: str
|
||||||
) -> Optional[IliasElementType]:
|
) -> Optional[IliasElementType]:
|
||||||
"""
|
"""
|
||||||
Decides which sub crawler to use for a given top level element.
|
Decides which sub crawler to use for a given top level element.
|
||||||
@@ -1095,6 +1089,9 @@ class IliasPage:
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def get_permalink(self) -> Optional[str]:
|
||||||
|
return IliasPage.get_soup_permalink(self._soup)
|
||||||
|
|
||||||
def _abs_url_from_link(self, link_tag: Tag) -> str:
|
def _abs_url_from_link(self, link_tag: Tag) -> str:
|
||||||
"""
|
"""
|
||||||
Create an absolute url from an <a> tag.
|
Create an absolute url from an <a> tag.
|
||||||
@@ -1107,6 +1104,13 @@ class IliasPage:
|
|||||||
"""
|
"""
|
||||||
return urljoin(self._page_url, relative_url)
|
return urljoin(self._page_url, relative_url)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_soup_permalink(soup: BeautifulSoup) -> Optional[str]:
|
||||||
|
perma_link_element: Tag = soup.select_one(".il-footer-permanent-url > a")
|
||||||
|
if not perma_link_element or not perma_link_element.get("href"):
|
||||||
|
return None
|
||||||
|
return perma_link_element.get("href")
|
||||||
|
|
||||||
|
|
||||||
def _unexpected_html_warning() -> None:
|
def _unexpected_html_warning() -> None:
|
||||||
log.warn("Encountered unexpected HTML structure, ignoring element.")
|
log.warn("Encountered unexpected HTML structure, ignoring element.")
|
||||||
@@ -1130,7 +1134,7 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
|
|||||||
|
|
||||||
date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
|
date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
|
||||||
date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
|
date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
|
||||||
date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()), date_str, re.I)
|
date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()), date_str, re.I)
|
||||||
date_str = date_str.strip()
|
date_str = date_str.strip()
|
||||||
for german, english in zip(german_months, english_months):
|
for german, english in zip(german_months, english_months):
|
||||||
date_str = date_str.replace(german, english)
|
date_str = date_str.replace(german, english)
|
||||||
|
@@ -130,6 +130,7 @@ def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Calla
|
|||||||
raise CrawlError("Impossible return in ilias _iorepeat")
|
raise CrawlError("Impossible return in ilias _iorepeat")
|
||||||
|
|
||||||
return wrapper # type: ignore
|
return wrapper # type: ignore
|
||||||
|
|
||||||
return decorator
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
@@ -177,11 +178,11 @@ def _get_video_cache_key(element: IliasPageElement) -> str:
|
|||||||
|
|
||||||
class KitIliasWebCrawler(HttpCrawler):
|
class KitIliasWebCrawler(HttpCrawler):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
section: KitIliasWebCrawlerSection,
|
section: KitIliasWebCrawlerSection,
|
||||||
config: Config,
|
config: Config,
|
||||||
authenticators: Dict[str, Authenticator]
|
authenticators: Dict[str, Authenticator]
|
||||||
):
|
):
|
||||||
# Setting a main authenticator for cookie sharing
|
# Setting a main authenticator for cookie sharing
|
||||||
auth = section.auth(authenticators)
|
auth = section.auth(authenticators)
|
||||||
@@ -253,8 +254,8 @@ instance's greatest bottleneck.
|
|||||||
soup = await self._get_page(next_stage_url, root_page_allowed=True)
|
soup = await self._get_page(next_stage_url, root_page_allowed=True)
|
||||||
|
|
||||||
if current_parent is None and expected_id is not None:
|
if current_parent is None and expected_id is not None:
|
||||||
perma_link_element: Tag = soup.find(id="current_perma_link")
|
perma_link = IliasPage.get_soup_permalink(soup)
|
||||||
if not perma_link_element or "crs_" not in perma_link_element.get("value"):
|
if not perma_link or "crs_" not in perma_link:
|
||||||
raise CrawlError("Invalid course id? Didn't find anything looking like a course")
|
raise CrawlError("Invalid course id? Didn't find anything looking like a course")
|
||||||
|
|
||||||
log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
|
log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
|
||||||
@@ -674,12 +675,28 @@ instance's greatest bottleneck.
|
|||||||
|
|
||||||
async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
|
async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
|
||||||
async def try_stream() -> bool:
|
async def try_stream() -> bool:
|
||||||
async with self.session.get(url, allow_redirects=is_video) as resp:
|
next_url = url
|
||||||
if not is_video:
|
|
||||||
# Redirect means we weren't authenticated
|
# Normal files redirect to the magazine if we are not authenticated. As files could be HTML,
|
||||||
|
# we can not match on the content type here. Instead, we disallow redirects and inspect the
|
||||||
|
# new location. If we are redirected anywhere but the ILIAS 8 "sendfile" command, we assume
|
||||||
|
# our authentication expired.
|
||||||
|
if not is_video:
|
||||||
|
async with self.session.get(url, allow_redirects=False) as resp:
|
||||||
|
# Redirect to anything except a "sendfile" means we weren't authenticated
|
||||||
if hdrs.LOCATION in resp.headers:
|
if hdrs.LOCATION in resp.headers:
|
||||||
return False
|
if "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
|
||||||
# we wanted a video but got HTML
|
return False
|
||||||
|
# Directly follow the redirect to not make a second, unnecessary request
|
||||||
|
next_url = resp.headers[hdrs.LOCATION]
|
||||||
|
|
||||||
|
# Let's try this again and follow redirects
|
||||||
|
return await fetch_follow_redirects(next_url)
|
||||||
|
|
||||||
|
async def fetch_follow_redirects(file_url: str) -> bool:
|
||||||
|
async with self.session.get(file_url) as resp:
|
||||||
|
# We wanted a video but got HTML => Forbidden, auth expired. Logging in won't really
|
||||||
|
# solve that depending on the setup, but it is better than nothing.
|
||||||
if is_video and "html" in resp.content_type:
|
if is_video and "html" in resp.content_type:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -1052,9 +1069,9 @@ class KitShibbolethLogin:
|
|||||||
await sess.post(url, data=data)
|
await sess.post(url, data=data)
|
||||||
|
|
||||||
async def _authenticate_tfa(
|
async def _authenticate_tfa(
|
||||||
self,
|
self,
|
||||||
session: aiohttp.ClientSession,
|
session: aiohttp.ClientSession,
|
||||||
soup: BeautifulSoup
|
soup: BeautifulSoup
|
||||||
) -> BeautifulSoup:
|
) -> BeautifulSoup:
|
||||||
if not self._tfa_auth:
|
if not self._tfa_auth:
|
||||||
self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
|
self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
NAME = "PFERD"
|
NAME = "PFERD"
|
||||||
VERSION = "3.5.0"
|
VERSION = "3.5.1"
|
||||||
|
8
flake.lock
generated
8
flake.lock
generated
@@ -2,16 +2,16 @@
|
|||||||
"nodes": {
|
"nodes": {
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1694499547,
|
"lastModified": 1708979614,
|
||||||
"narHash": "sha256-R7xMz1Iia6JthWRHDn36s/E248WB1/je62ovC/dUVKI=",
|
"narHash": "sha256-FWLWmYojIg6TeqxSnHkKpHu5SGnFP5um1uUjH+wRV6g=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "e5f018cf150e29aac26c61dac0790ea023c46b24",
|
"rev": "b7ee09cf5614b02d289cd86fcfa6f24d4e078c2a",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"ref": "nixos-23.05",
|
"ref": "nixos-23.11",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
description = "Tool for downloading course-related files from ILIAS";
|
description = "Tool for downloading course-related files from ILIAS";
|
||||||
|
|
||||||
inputs = {
|
inputs = {
|
||||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.05";
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.11";
|
||||||
};
|
};
|
||||||
|
|
||||||
outputs = { self, nixpkgs }:
|
outputs = { self, nixpkgs }:
|
||||||
|
Reference in New Issue
Block a user