mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Handle empty forums
This commit is contained in:
parent
4a51aaa4f5
commit
d72fc2760b
@ -24,6 +24,7 @@ ambiguous situations.
|
|||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- Forum crawling crashing when parsing empty (= 0 messages) threads
|
- Forum crawling crashing when parsing empty (= 0 messages) threads
|
||||||
|
- Forum crawling crashing when a forum has no threads at all
|
||||||
|
|
||||||
## 3.4.1 - 2022-08-17
|
## 3.4.1 - 2022-08-17
|
||||||
|
|
||||||
|
@ -59,6 +59,7 @@ class IliasPageElement:
|
|||||||
class IliasDownloadForumData:
|
class IliasDownloadForumData:
|
||||||
url: str
|
url: str
|
||||||
form_data: Dict[str, Union[str, List[str]]]
|
form_data: Dict[str, Union[str, List[str]]]
|
||||||
|
empty: bool
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -130,14 +131,16 @@ class IliasPage:
|
|||||||
return None
|
return None
|
||||||
post_url = self._abs_url_from_relative(form["action"])
|
post_url = self._abs_url_from_relative(form["action"])
|
||||||
|
|
||||||
|
thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
|
||||||
|
|
||||||
form_data: Dict[str, Union[str, List[ſtr]]] = {
|
form_data: Dict[str, Union[str, List[ſtr]]] = {
|
||||||
"thread_ids[]": [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})],
|
"thread_ids[]": thread_ids,
|
||||||
"selected_cmd2": "html",
|
"selected_cmd2": "html",
|
||||||
"select_cmd2": "Ausführen",
|
"select_cmd2": "Ausführen",
|
||||||
"selected_cmd": "",
|
"selected_cmd": "",
|
||||||
}
|
}
|
||||||
|
|
||||||
return IliasDownloadForumData(post_url, form_data)
|
return IliasDownloadForumData(url=post_url, form_data=form_data, empty=len(thread_ids) == 0)
|
||||||
|
|
||||||
def get_next_stage_element(self) -> Optional[IliasPageElement]:
|
def get_next_stage_element(self) -> Optional[IliasPageElement]:
|
||||||
if self._is_forum_page():
|
if self._is_forum_page():
|
||||||
|
@ -658,7 +658,7 @@ instance's greatest bottleneck.
|
|||||||
@_iorepeat(3, "crawling forum")
|
@_iorepeat(3, "crawling forum")
|
||||||
@anoncritical
|
@anoncritical
|
||||||
async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
|
async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
|
||||||
elements = []
|
elements: List[IliasForumThread] = []
|
||||||
|
|
||||||
async with cl:
|
async with cl:
|
||||||
next_stage_url = element.url
|
next_stage_url = element.url
|
||||||
@ -677,6 +677,10 @@ instance's greatest bottleneck.
|
|||||||
download_data = page.get_download_forum_data()
|
download_data = page.get_download_forum_data()
|
||||||
if not download_data:
|
if not download_data:
|
||||||
raise CrawlWarning("Failed to extract forum data")
|
raise CrawlWarning("Failed to extract forum data")
|
||||||
|
if download_data.empty:
|
||||||
|
log.explain("Forum had no threads")
|
||||||
|
elements = []
|
||||||
|
return
|
||||||
html = await self._post_authenticated(download_data.url, download_data.form_data)
|
html = await self._post_authenticated(download_data.url, download_data.form_data)
|
||||||
elements = parse_ilias_forum_export(soupify(html))
|
elements = parse_ilias_forum_export(soupify(html))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user