From 722970a2556e0c24584bc46fd088b24eea8fc406 Mon Sep 17 00:00:00 2001 From: Joscha Date: Mon, 31 May 2021 20:04:56 +0000 Subject: [PATCH] Store cookies in text-based format Using the stdlib's http.cookie module, cookies are now stored as one "Set-Cookie" header per line. Previously, the aiohttp.CookieJar's save() and load() methods were used (which use pickling). --- CHANGELOG.md | 3 +++ PFERD/crawl/http_crawler.py | 26 +++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51e9a5b..f7e33ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,9 @@ ambiguous situations. - `--credential-file` option for `kit-ilias-web` command - Warning if using concurrent tasks with `kit-ilias-web` +### Changed +- Cookies are now stored in a text-based format + ### Fixed - Date parsing now also works correctly in non-group exercises diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py index 9f52c66..fa4cf29 100644 --- a/PFERD/crawl/http_crawler.py +++ b/PFERD/crawl/http_crawler.py @@ -1,7 +1,8 @@ import asyncio +import http.cookies import ssl from pathlib import Path, PurePath -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional import aiohttp import certifi @@ -105,6 +106,25 @@ class HttpCrawler(Crawler): self._shared_cookie_jar_paths.append(self._cookie_jar_path) + def _load_cookies_from_file(self, path: Path) -> None: + jar: Any = http.cookies.SimpleCookie() + with open(path) as f: + for i, line in enumerate(f): + # Names of headers are case insensitive + if line[:11].lower() == "set-cookie:": + jar.load(line[11:]) + else: + log.explain(f"Line {i} doesn't start with 'Set-Cookie:', ignoring it") + self._cookie_jar.update_cookies(jar) + + def _save_cookies_to_file(self, path: Path) -> None: + jar: Any = http.cookies.SimpleCookie() + for morsel in self._cookie_jar: + jar[morsel.key] = morsel + with open(path, "w") as f: + f.write(jar.output(sep="\n")) + f.write("\n") # A trailing newline is just common courtesy + def _load_cookies(self) -> None: log.explain_topic("Loading cookies") @@ -134,7 +154,7 @@ class HttpCrawler(Crawler): log.explain(f"Loading cookies from {fmt_real_path(cookie_jar_path)}") try: - self._cookie_jar.load(cookie_jar_path) + self._load_cookies_from_file(cookie_jar_path) except Exception as e: log.explain("Failed to load cookies") log.explain(str(e)) @@ -144,7 +164,7 @@ class HttpCrawler(Crawler): try: log.explain(f"Saving cookies to {fmt_real_path(self._cookie_jar_path)}") - self._cookie_jar.save(self._cookie_jar_path) + self._save_cookies_to_file(self._cookie_jar_path) except Exception as e: log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}") log.warn(str(e))