mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Handle empty forums
This commit is contained in:
parent
4a51aaa4f5
commit
d72fc2760b
@ -24,6 +24,7 @@ ambiguous situations.
|
||||
|
||||
### Fixed
|
||||
- Forum crawling crashing when parsing empty (= 0 messages) threads
|
||||
- Forum crawling crashing when a forum has no threads at all
|
||||
|
||||
## 3.4.1 - 2022-08-17
|
||||
|
||||
|
@ -59,6 +59,7 @@ class IliasPageElement:
|
||||
class IliasDownloadForumData:
|
||||
url: str
|
||||
form_data: Dict[str, Union[str, List[str]]]
|
||||
empty: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -130,14 +131,16 @@ class IliasPage:
|
||||
return None
|
||||
post_url = self._abs_url_from_relative(form["action"])
|
||||
|
||||
thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
|
||||
|
||||
form_data: Dict[str, Union[str, List[ſtr]]] = {
|
||||
"thread_ids[]": [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})],
|
||||
"thread_ids[]": thread_ids,
|
||||
"selected_cmd2": "html",
|
||||
"select_cmd2": "Ausführen",
|
||||
"selected_cmd": "",
|
||||
}
|
||||
|
||||
return IliasDownloadForumData(post_url, form_data)
|
||||
return IliasDownloadForumData(url=post_url, form_data=form_data, empty=len(thread_ids) == 0)
|
||||
|
||||
def get_next_stage_element(self) -> Optional[IliasPageElement]:
|
||||
if self._is_forum_page():
|
||||
|
@ -658,7 +658,7 @@ instance's greatest bottleneck.
|
||||
@_iorepeat(3, "crawling forum")
|
||||
@anoncritical
|
||||
async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
|
||||
elements = []
|
||||
elements: List[IliasForumThread] = []
|
||||
|
||||
async with cl:
|
||||
next_stage_url = element.url
|
||||
@ -677,6 +677,10 @@ instance's greatest bottleneck.
|
||||
download_data = page.get_download_forum_data()
|
||||
if not download_data:
|
||||
raise CrawlWarning("Failed to extract forum data")
|
||||
if download_data.empty:
|
||||
log.explain("Forum had no threads")
|
||||
elements = []
|
||||
return
|
||||
html = await self._post_authenticated(download_data.url, download_data.form_data)
|
||||
elements = parse_ilias_forum_export(soupify(html))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user