From bf27f4a6867c6c550c52d6de44c2e1ce1825f3ed Mon Sep 17 00:00:00 2001 From: Joscha Date: Thu, 24 Nov 2022 15:47:01 +0100 Subject: [PATCH] TODO --- PFERD/crawl/kit_ipd_crawler.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py index 7e8c417..7a92b8c 100644 --- a/PFERD/crawl/kit_ipd_crawler.py +++ b/PFERD/crawl/kit_ipd_crawler.py @@ -155,11 +155,11 @@ class KitIpdCrawler(HttpCrawler): sink.done() async def _get_page(self) -> Tuple[BeautifulSoup, str]: - async with self.session.get(self._url) as request: - # The web page for Algorithmen für Routenplanung contains some - # weird comments that beautifulsoup doesn't parse correctly. This - # hack enables those pages to be crawled, and should hopefully not - # cause issues on other pages. - content = (await request.read()).decode("utf-8") - content = re.sub(r"", "", content) - return soupify(content.encode("utf-8")), str(request.url) + response = self.session.get(self._url) + + # The web page for Algorithmen für Routenplanung contains some + # weird comments that beautifulsoup doesn't parse correctly. This + # hack enables those pages to be crawled, and should hopefully not + # cause issues on other pages. + content = re.sub(r"", "", response.text) + return soupify(content.encode("utf-8")), str(request.url)