mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Crawl paginated past meetings
This commit is contained in:
parent
ad53185247
commit
df3514cd03
@ -29,6 +29,7 @@ ambiguous situations.
|
|||||||
- Abort crawling when encountering an unexpected ilias root page redirect
|
- Abort crawling when encountering an unexpected ilias root page redirect
|
||||||
- Remove size suffix for files in content pages
|
- Remove size suffix for files in content pages
|
||||||
- Sanitize ascii control characters on Windows
|
- Sanitize ascii control characters on Windows
|
||||||
|
- Crawling of paginated past meetings
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- `no-delete-prompt-override` conflict resolution strategy
|
- `no-delete-prompt-override` conflict resolution strategy
|
||||||
|
@ -293,7 +293,10 @@ class IliasPage:
|
|||||||
return self._uncollapse_future_meetings_url() is not None
|
return self._uncollapse_future_meetings_url() is not None
|
||||||
|
|
||||||
def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
|
def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
|
||||||
element = self._soup.find("a", attrs={"href": lambda x: x and "crs_next_sess=1" in x})
|
element = self._soup.find(
|
||||||
|
"a",
|
||||||
|
attrs={"href": lambda x: x and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
|
||||||
|
)
|
||||||
if not element:
|
if not element:
|
||||||
return None
|
return None
|
||||||
link = self._abs_url_from_link(element)
|
link = self._abs_url_from_link(element)
|
||||||
@ -991,7 +994,11 @@ class IliasPage:
|
|||||||
if img_tag is None:
|
if img_tag is None:
|
||||||
img_tag = found_parent.select_one("img.icon")
|
img_tag = found_parent.select_one("img.icon")
|
||||||
|
|
||||||
if img_tag is None and found_parent.find("a", attrs={"href": lambda x: x and "crs_next_sess=" in x}):
|
is_session_expansion_button = found_parent.find(
|
||||||
|
"a",
|
||||||
|
attrs={"href": lambda x: x and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
|
||||||
|
)
|
||||||
|
if img_tag is None and is_session_expansion_button:
|
||||||
log.explain("Found session expansion button, skipping it as it has no content")
|
log.explain("Found session expansion button, skipping it as it has no content")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user