mirror of
				https://github.com/Garmelon/PFERD.git
				synced 2025-11-03 22:23:41 +01:00 
			
		
		
		
	Handle empty forums
This commit is contained in:
		@@ -24,6 +24,7 @@ ambiguous situations.
 | 
			
		||||
 | 
			
		||||
### Fixed
 | 
			
		||||
- Forum crawling crashing when parsing empty (= 0 messages) threads
 | 
			
		||||
- Forum crawling crashing when a forum has no threads at all
 | 
			
		||||
 | 
			
		||||
## 3.4.1 - 2022-08-17
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -59,6 +59,7 @@ class IliasPageElement:
 | 
			
		||||
class IliasDownloadForumData:
 | 
			
		||||
    url: str
 | 
			
		||||
    form_data: Dict[str, Union[str, List[str]]]
 | 
			
		||||
    empty: bool
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@dataclass
 | 
			
		||||
@@ -130,14 +131,16 @@ class IliasPage:
 | 
			
		||||
            return None
 | 
			
		||||
        post_url = self._abs_url_from_relative(form["action"])
 | 
			
		||||
 | 
			
		||||
        thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
 | 
			
		||||
 | 
			
		||||
        form_data: Dict[str, Union[str, List[ſtr]]] = {
 | 
			
		||||
            "thread_ids[]": [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})],
 | 
			
		||||
            "thread_ids[]": thread_ids,
 | 
			
		||||
            "selected_cmd2": "html",
 | 
			
		||||
            "select_cmd2": "Ausführen",
 | 
			
		||||
            "selected_cmd": "",
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return IliasDownloadForumData(post_url, form_data)
 | 
			
		||||
        return IliasDownloadForumData(url=post_url, form_data=form_data, empty=len(thread_ids) == 0)
 | 
			
		||||
 | 
			
		||||
    def get_next_stage_element(self) -> Optional[IliasPageElement]:
 | 
			
		||||
        if self._is_forum_page():
 | 
			
		||||
 
 | 
			
		||||
@@ -658,7 +658,7 @@ instance's greatest bottleneck.
 | 
			
		||||
    @_iorepeat(3, "crawling forum")
 | 
			
		||||
    @anoncritical
 | 
			
		||||
    async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
 | 
			
		||||
        elements = []
 | 
			
		||||
        elements: List[IliasForumThread] = []
 | 
			
		||||
 | 
			
		||||
        async with cl:
 | 
			
		||||
            next_stage_url = element.url
 | 
			
		||||
@@ -677,6 +677,10 @@ instance's greatest bottleneck.
 | 
			
		||||
            download_data = page.get_download_forum_data()
 | 
			
		||||
            if not download_data:
 | 
			
		||||
                raise CrawlWarning("Failed to extract forum data")
 | 
			
		||||
            if download_data.empty:
 | 
			
		||||
                log.explain("Forum had no threads")
 | 
			
		||||
                elements = []
 | 
			
		||||
                return
 | 
			
		||||
            html = await self._post_authenticated(download_data.url, download_data.form_data)
 | 
			
		||||
            elements = parse_ilias_forum_export(soupify(html))
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user