From 803e5628a22d49877ef1be6d8974901b91605c31 Mon Sep 17 00:00:00 2001 From: Joscha Date: Sun, 23 May 2021 11:30:16 +0200 Subject: [PATCH] Clean up logging Paths are now (hopefully) logged consistently across all crawlers --- PFERD/config.py | 12 ++--- PFERD/crawler.py | 10 ++-- PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 15 +++--- PFERD/http_crawler.py | 8 +-- PFERD/logging.py | 32 ++++++++++++ PFERD/output_dir.py | 22 ++++----- PFERD/transformer.py | 3 +- PFERD/utils.py | 49 +++++++++++-------- 8 files changed, 95 insertions(+), 56 deletions(-) diff --git a/PFERD/config.py b/PFERD/config.py index e68db53..3c69fc7 100644 --- a/PFERD/config.py +++ b/PFERD/config.py @@ -8,7 +8,7 @@ from typing import Any, List, NoReturn, Optional, Tuple from rich.markup import escape from .logging import log -from .utils import prompt_yes_no +from .utils import fmt_real_path, prompt_yes_no class ConfigLoadError(Exception): @@ -17,7 +17,7 @@ class ConfigLoadError(Exception): """ def __init__(self, path: Path, reason: str): - super().__init__(f"Failed to load config from {path}") + super().__init__(f"Failed to load config from {fmt_real_path(path)}") self.path = path self.reason = reason @@ -36,7 +36,7 @@ class ConfigOptionError(Exception): class ConfigDumpError(Exception): def __init__(self, path: Path, reason: str): - super().__init__(f"Failed to dump config to {path}") + super().__init__(f"Failed to dump config to {fmt_real_path(path)}") self.path = path self.reason = reason @@ -105,7 +105,7 @@ class Config: else: log.explain("Using default path") path = Config._default_path() - log.explain(f"Loading {str(path)!r}") + log.explain(f"Loading {fmt_real_path(path)}") # Using config.read_file instead of config.read because config.read # would just ignore a missing file and carry on. @@ -130,8 +130,8 @@ class Config: log.explain("Using default path") path = self._default_path() - log.explain(f"Dumping to {str(path.absolute())!r}") - log.print(f"[bold bright_cyan]Dumping[/] to {escape(repr(str(path.absolute())))}") + log.explain(f"Dumping to {fmt_real_path(path)}") + log.print(f"[bold bright_cyan]Dumping[/] to {escape(fmt_real_path(path))}") try: path.parent.mkdir(parents=True, exist_ok=True) diff --git a/PFERD/crawler.py b/PFERD/crawler.py index 61f1868..53640e3 100644 --- a/PFERD/crawler.py +++ b/PFERD/crawler.py @@ -12,7 +12,7 @@ from .logging import ProgressBar, log from .output_dir import FileSink, FileSinkToken, OnConflict, OutputDirectory, OutputDirError, Redownload from .report import MarkConflictError, MarkDuplicateError from .transformer import Transformer -from .utils import ReusableAsyncContextManager +from .utils import ReusableAsyncContextManager, fmt_path class CrawlWarning(Exception): @@ -217,7 +217,7 @@ class Crawler(ABC): ) async def crawl(self, path: PurePath) -> Optional[CrawlToken]: - log.explain_topic(f"Decision: Crawl {path}") + log.explain_topic(f"Decision: Crawl {fmt_path(path)}") if self._transformer.transform(path) is None: log.explain("Answer: No") @@ -225,7 +225,7 @@ class Crawler(ABC): log.explain("Answer: Yes") - desc = f"[bold bright_cyan]Crawling[/] {escape(str(path))}" + desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(path))}" return CrawlToken(self._limiter, desc) async def download( @@ -235,7 +235,7 @@ class Crawler(ABC): redownload: Optional[Redownload] = None, on_conflict: Optional[OnConflict] = None, ) -> Optional[DownloadToken]: - log.explain_topic(f"Decision: Download {path}") + log.explain_topic(f"Decision: Download {fmt_path(path)}") transformed_path = self._transformer.transform(path) if transformed_path is None: @@ -249,7 +249,7 @@ class Crawler(ABC): log.explain("Answer: Yes") - desc = f"[bold bright_cyan]Downloading[/] {escape(str(path))}" + desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(path))}" return DownloadToken(self._limiter, fs_token, desc) async def _cleanup(self) -> None: diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py index 1bdf5e4..424b4ba 100644 --- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py +++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py @@ -1,13 +1,11 @@ import asyncio import re from pathlib import PurePath -# TODO In Python 3.9 and above, AsyncContextManager is deprecated from typing import Any, Awaitable, Callable, Dict, Optional, Set, TypeVar, Union import aiohttp from aiohttp import hdrs from bs4 import BeautifulSoup, Tag -from rich.markup import escape from PFERD.authenticators import Authenticator from PFERD.config import Config @@ -17,6 +15,7 @@ from PFERD.logging import ProgressBar, log from PFERD.output_dir import FileSink, Redownload from PFERD.utils import soupify, url_set_query_param +from ...utils import fmt_path from .file_templates import link_template_plain, link_template_rich from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement @@ -86,10 +85,10 @@ def _iorepeat(attempts: int, name: str) -> Callable[[AWrapped], AWrapped]: last_exception = e except aiohttp.ClientConnectionError as e: # e.g. timeout, disconnect, resolve failed, etc. last_exception = e - log.explain_topic(f"Retrying operation {escape(name)}. Retries left: {attempts - 1 - round}") + log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}") if last_exception: - message = f"Error in I/O Operation: {escape(str(last_exception))}" + message = f"Error in I/O Operation: {last_exception}" raise CrawlWarning(message) from last_exception raise CrawlError("Impossible return in ilias _iorepeat") @@ -162,7 +161,7 @@ class KitIliasWebCrawler(HttpCrawler): log.explain_topic("Inferred crawl target: Personal desktop") await self._crawl_desktop() else: - log.explain_topic(f"Inferred crawl target: URL {escape(self._target)}") + log.explain_topic(f"Inferred crawl target: URL {self._target}") await self._crawl_url(self._target) async def _crawl_course(self, course_id: int) -> None: @@ -190,9 +189,7 @@ class KitIliasWebCrawler(HttpCrawler): if expected_id is not None: perma_link_element: Tag = soup.find(id="current_perma_link") if not perma_link_element or "crs_" not in perma_link_element.get("value"): - raise CrawlError( - "Invalid course id? I didn't find anything looking like a course" - ) + raise CrawlError("Invalid course id? Didn't find anything looking like a course") # Duplicated code, but the root page is special - we want to void fetching it twice! page = IliasPage(soup, url, None) @@ -236,7 +233,7 @@ class KitIliasWebCrawler(HttpCrawler): if element.type == IliasElementType.FILE: await self._download_file(element, element_path) elif element.type == IliasElementType.FORUM: - log.explain_topic(f"Decision: Crawl {escape(str(element_path))}") + log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}") log.explain("Forums are not supported") log.explain("Answer: No") elif element.type == IliasElementType.LINK: diff --git a/PFERD/http_crawler.py b/PFERD/http_crawler.py index b9cfeea..15e9ff1 100644 --- a/PFERD/http_crawler.py +++ b/PFERD/http_crawler.py @@ -3,11 +3,11 @@ from pathlib import PurePath from typing import Optional import aiohttp -from rich.markup import escape from .config import Config from .crawler import Crawler, CrawlerSection from .logging import log +from .utils import fmt_real_path from .version import NAME, VERSION @@ -59,14 +59,14 @@ class HttpCrawler(Crawler): async def _save_cookies(self) -> None: log.explain_topic("Saving cookies") if not self._current_cookie_jar: - log.explain("No cookie jar - save aborted") + log.explain("No cookie jar, save aborted") return try: self._current_cookie_jar.save(self._cookie_jar_path) - log.explain(f"Cookies saved to {escape(str(self.COOKIE_FILE))}") + log.explain(f"Cookies saved to {fmt_real_path(self._cookie_jar_path)}") except Exception: - log.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}") + log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}") async def run(self) -> None: self._current_cookie_jar = aiohttp.CookieJar() diff --git a/PFERD/logging.py b/PFERD/logging.py index beb92c6..9eb2c7c 100644 --- a/PFERD/logging.py +++ b/PFERD/logging.py @@ -112,6 +112,10 @@ class Log: self.print(line) def print(self, text: str) -> None: + """ + Print a normal message. Allows markup. + """ + if self._progress_suspended: self._lines.append(text) else: @@ -120,12 +124,24 @@ class Log: # TODO Print errors (and warnings?) to stderr def warn(self, text: str) -> None: + """ + Print a warning message. Allows no markup. + """ + self.print(f"[bold bright_red]Warning[/] {escape(text)}") def error(self, text: str) -> None: + """ + Print an error message. Allows no markup. + """ + self.print(f"[bold bright_red]Error[/] [red]{escape(text)}") def error_contd(self, text: str) -> None: + """ + Print further lines of an error message. Allows no markup. + """ + self.print(f"[red]{escape(text)}") def unexpected_exception(self) -> None: @@ -153,18 +169,34 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new """.strip()) def explain_topic(self, text: str) -> None: + """ + Print a top-level explain text. Allows no markup. + """ + if self.output_explain: self.print(f"[cyan]{escape(text)}") def explain(self, text: str) -> None: + """ + Print an indented explain text. Allows no markup. + """ + if self.output_explain: self.print(f" {escape(text)}") def action(self, text: str) -> None: + """ + Print a status update while crawling. Allows markup. + """ + if self.output_action: self.print(text) def report(self, text: str) -> None: + """ + Print a report after crawling. Allows markup. + """ + if self.output_report: self.print(text) diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py index f9a7c99..1f83de6 100644 --- a/PFERD/output_dir.py +++ b/PFERD/output_dir.py @@ -14,7 +14,7 @@ from rich.markup import escape from .logging import log from .report import Report -from .utils import ReusableAsyncContextManager, prompt_yes_no +from .utils import ReusableAsyncContextManager, fmt_path, fmt_real_path, prompt_yes_no SUFFIX_CHARS = string.ascii_lowercase + string.digits SUFFIX_LENGTH = 6 @@ -143,7 +143,7 @@ class OutputDirectory: self._report = Report() def prepare(self) -> None: - log.explain_topic(f"Creating base directory at {str(self._root.absolute())!r}") + log.explain_topic(f"Creating base directory at {fmt_real_path(self._root)}") try: self._root.mkdir(parents=True, exist_ok=True) @@ -159,9 +159,9 @@ class OutputDirectory: """ if ".." in path.parts: - raise OutputDirError(f"Forbidden segment '..' in path {path}") + raise OutputDirError(f"Forbidden segment '..' in path {fmt_path(path)}") if "." in path.parts: - raise OutputDirError(f"Forbidden segment '.' in path {path}") + raise OutputDirError(f"Forbidden segment '.' in path {fmt_path(path)}") return self._root / path def _should_download( @@ -213,7 +213,7 @@ class OutputDirectory: ) -> bool: if on_conflict == OnConflict.PROMPT: async with log.exclusive_output(): - prompt = f"Replace {path} with remote file?" + prompt = f"Replace {fmt_path(path)} with remote file?" return await prompt_yes_no(prompt, default=False) elif on_conflict == OnConflict.LOCAL_FIRST: return False @@ -232,7 +232,7 @@ class OutputDirectory: ) -> bool: if on_conflict == OnConflict.PROMPT: async with log.exclusive_output(): - prompt = f"Recursively delete {path} and replace with remote file?" + prompt = f"Recursively delete {fmt_path(path)} and replace with remote file?" return await prompt_yes_no(prompt, default=False) elif on_conflict == OnConflict.LOCAL_FIRST: return False @@ -252,7 +252,7 @@ class OutputDirectory: ) -> bool: if on_conflict == OnConflict.PROMPT: async with log.exclusive_output(): - prompt = f"Delete {parent} so remote file {path} can be downloaded?" + prompt = f"Delete {fmt_path(parent)} so remote file {fmt_path(path)} can be downloaded?" return await prompt_yes_no(prompt, default=False) elif on_conflict == OnConflict.LOCAL_FIRST: return False @@ -271,7 +271,7 @@ class OutputDirectory: ) -> bool: if on_conflict == OnConflict.PROMPT: async with log.exclusive_output(): - prompt = f"Delete {path}?" + prompt = f"Delete {fmt_path(path)}?" return await prompt_yes_no(prompt, default=False) elif on_conflict == OnConflict.LOCAL_FIRST: return False @@ -386,10 +386,10 @@ class OutputDirectory: self._update_metadata(info) if changed: - log.action(f"[bold bright_yellow]Changed[/] {escape(str(info.path))}") + log.action(f"[bold bright_yellow]Changed[/] {escape(fmt_path(info.path))}") self._report.change_file(info.path) else: - log.action(f"[bold bright_green]Added[/] {escape(str(info.path))}") + log.action(f"[bold bright_green]Added[/] {escape(fmt_path(info.path))}") self._report.add_file(info.path) async def cleanup(self) -> None: @@ -419,7 +419,7 @@ class OutputDirectory: if await self._conflict_delete_lf(self._on_conflict, pure): try: path.unlink() - log.action(f"[bold bright_magenta]Deleted[/] {escape(str(path))}") + log.action(f"[bold bright_magenta]Deleted[/] {escape(fmt_path(path))}") self._report.delete_file(pure) except OSError: pass diff --git a/PFERD/transformer.py b/PFERD/transformer.py index 2604c43..9670d0e 100644 --- a/PFERD/transformer.py +++ b/PFERD/transformer.py @@ -10,6 +10,7 @@ from pathlib import PurePath from typing import Dict, Optional, Union from .logging import log +from .utils import fmt_path class Rule(ABC): @@ -327,7 +328,7 @@ class Transformer: result = rule.transform(path) if isinstance(result, PurePath): - log.explain(f"Match! Transformed to {result}") + log.explain(f"Match! Transformed to {fmt_path(result)}") return result elif result: # Exclamation mark log.explain("Match! Ignored") diff --git a/PFERD/utils.py b/PFERD/utils.py index 1d11565..397feda 100644 --- a/PFERD/utils.py +++ b/PFERD/utils.py @@ -4,6 +4,7 @@ import sys import threading from abc import ABC, abstractmethod from contextlib import AsyncExitStack +from pathlib import Path, PurePath from types import TracebackType from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit @@ -34,6 +35,30 @@ async def agetpass(prompt: str) -> str: return await in_daemon_thread(lambda: getpass.getpass(prompt)) +async def prompt_yes_no(query: str, default: Optional[bool]) -> bool: + """ + Asks the user a yes/no question and returns their choice. + """ + + if default is True: + query += " [Y/n] " + elif default is False: + query += " [y/N] " + else: + query += " [y/n] " + + while True: + response = (await ainput(query)).strip().lower() + if response == "y": + return True + elif response == "n": + return False + elif response == "" and default is not None: + return default + + print("Please answer with 'y' or 'n'.") + + def soupify(data: bytes) -> bs4.BeautifulSoup: """ Parses HTML to a beautifulsoup object. @@ -66,28 +91,12 @@ def url_set_query_params(url: str, params: Dict[str, str]) -> str: return result -async def prompt_yes_no(query: str, default: Optional[bool]) -> bool: - """ - Asks the user a yes/no question and returns their choice. - """ +def fmt_path(path: PurePath) -> str: + return repr(str(path)) - if default is True: - query += " [Y/n] " - elif default is False: - query += " [y/N] " - else: - query += " [y/n] " - while True: - response = (await ainput(query)).strip().lower() - if response == "y": - return True - elif response == "n": - return False - elif response == "" and default is not None: - return default - - print("Please answer with 'y' or 'n'.") +def fmt_real_path(path: Path) -> str: + return repr(str(path.absolute())) class ReusableAsyncContextManager(ABC, Generic[T]):