"Fix" mypy errors

Thank you mypy, very cool. These types make things *so much better*. They don't just complicate everything and don't really help because they can not detect that an element queried by a tag is no navigable string...
2025-07-12 22:22:30 +02:00 · 2025-02-12 22:41:43 +01:00
parent 16a2dd5b15
commit bd9d7efe64
8 changed files with 224 additions and 204 deletions
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@ -3,7 +3,7 @@ import http.cookies
 import ssl
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast

 import aiohttp
 import certifi
@ -187,12 +187,12 @@ class HttpCrawler(Crawler):
            if level == 0 or (level == 1 and drop_h1):
                return PurePath()

-            level_heading = tag.find_previous(name=f"h{level}")
+            level_heading = cast(Optional[Tag], tag.find_previous(name=f"h{level}"))

            if level_heading is None:
                return find_associated_headings(tag, level - 1)

-            folder_name = level_heading.getText().strip()
+            folder_name = level_heading.get_text().strip()
            return find_associated_headings(level_heading, level - 1) / folder_name

        # start at level <h3> because paragraph-level headings are usually too granular for folder names
@ -231,6 +231,7 @@ class HttpCrawler(Crawler):

                etag_header = resp.headers.get("ETag")
                last_modified_header = resp.headers.get("Last-Modified")
+                last_modified = None

                if last_modified_header:
                    try: