mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Clean up logging
Paths are now (hopefully) logged consistently across all crawlers
This commit is contained in:
parent
c88f20859a
commit
803e5628a2
@ -8,7 +8,7 @@ from typing import Any, List, NoReturn, Optional, Tuple
|
||||
from rich.markup import escape
|
||||
|
||||
from .logging import log
|
||||
from .utils import prompt_yes_no
|
||||
from .utils import fmt_real_path, prompt_yes_no
|
||||
|
||||
|
||||
class ConfigLoadError(Exception):
|
||||
@ -17,7 +17,7 @@ class ConfigLoadError(Exception):
|
||||
"""
|
||||
|
||||
def __init__(self, path: Path, reason: str):
|
||||
super().__init__(f"Failed to load config from {path}")
|
||||
super().__init__(f"Failed to load config from {fmt_real_path(path)}")
|
||||
self.path = path
|
||||
self.reason = reason
|
||||
|
||||
@ -36,7 +36,7 @@ class ConfigOptionError(Exception):
|
||||
|
||||
class ConfigDumpError(Exception):
|
||||
def __init__(self, path: Path, reason: str):
|
||||
super().__init__(f"Failed to dump config to {path}")
|
||||
super().__init__(f"Failed to dump config to {fmt_real_path(path)}")
|
||||
self.path = path
|
||||
self.reason = reason
|
||||
|
||||
@ -105,7 +105,7 @@ class Config:
|
||||
else:
|
||||
log.explain("Using default path")
|
||||
path = Config._default_path()
|
||||
log.explain(f"Loading {str(path)!r}")
|
||||
log.explain(f"Loading {fmt_real_path(path)}")
|
||||
|
||||
# Using config.read_file instead of config.read because config.read
|
||||
# would just ignore a missing file and carry on.
|
||||
@ -130,8 +130,8 @@ class Config:
|
||||
log.explain("Using default path")
|
||||
path = self._default_path()
|
||||
|
||||
log.explain(f"Dumping to {str(path.absolute())!r}")
|
||||
log.print(f"[bold bright_cyan]Dumping[/] to {escape(repr(str(path.absolute())))}")
|
||||
log.explain(f"Dumping to {fmt_real_path(path)}")
|
||||
log.print(f"[bold bright_cyan]Dumping[/] to {escape(fmt_real_path(path))}")
|
||||
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
@ -12,7 +12,7 @@ from .logging import ProgressBar, log
|
||||
from .output_dir import FileSink, FileSinkToken, OnConflict, OutputDirectory, OutputDirError, Redownload
|
||||
from .report import MarkConflictError, MarkDuplicateError
|
||||
from .transformer import Transformer
|
||||
from .utils import ReusableAsyncContextManager
|
||||
from .utils import ReusableAsyncContextManager, fmt_path
|
||||
|
||||
|
||||
class CrawlWarning(Exception):
|
||||
@ -217,7 +217,7 @@ class Crawler(ABC):
|
||||
)
|
||||
|
||||
async def crawl(self, path: PurePath) -> Optional[CrawlToken]:
|
||||
log.explain_topic(f"Decision: Crawl {path}")
|
||||
log.explain_topic(f"Decision: Crawl {fmt_path(path)}")
|
||||
|
||||
if self._transformer.transform(path) is None:
|
||||
log.explain("Answer: No")
|
||||
@ -225,7 +225,7 @@ class Crawler(ABC):
|
||||
|
||||
log.explain("Answer: Yes")
|
||||
|
||||
desc = f"[bold bright_cyan]Crawling[/] {escape(str(path))}"
|
||||
desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(path))}"
|
||||
return CrawlToken(self._limiter, desc)
|
||||
|
||||
async def download(
|
||||
@ -235,7 +235,7 @@ class Crawler(ABC):
|
||||
redownload: Optional[Redownload] = None,
|
||||
on_conflict: Optional[OnConflict] = None,
|
||||
) -> Optional[DownloadToken]:
|
||||
log.explain_topic(f"Decision: Download {path}")
|
||||
log.explain_topic(f"Decision: Download {fmt_path(path)}")
|
||||
|
||||
transformed_path = self._transformer.transform(path)
|
||||
if transformed_path is None:
|
||||
@ -249,7 +249,7 @@ class Crawler(ABC):
|
||||
|
||||
log.explain("Answer: Yes")
|
||||
|
||||
desc = f"[bold bright_cyan]Downloading[/] {escape(str(path))}"
|
||||
desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(path))}"
|
||||
return DownloadToken(self._limiter, fs_token, desc)
|
||||
|
||||
async def _cleanup(self) -> None:
|
||||
|
@ -1,13 +1,11 @@
|
||||
import asyncio
|
||||
import re
|
||||
from pathlib import PurePath
|
||||
# TODO In Python 3.9 and above, AsyncContextManager is deprecated
|
||||
from typing import Any, Awaitable, Callable, Dict, Optional, Set, TypeVar, Union
|
||||
|
||||
import aiohttp
|
||||
from aiohttp import hdrs
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from rich.markup import escape
|
||||
|
||||
from PFERD.authenticators import Authenticator
|
||||
from PFERD.config import Config
|
||||
@ -17,6 +15,7 @@ from PFERD.logging import ProgressBar, log
|
||||
from PFERD.output_dir import FileSink, Redownload
|
||||
from PFERD.utils import soupify, url_set_query_param
|
||||
|
||||
from ...utils import fmt_path
|
||||
from .file_templates import link_template_plain, link_template_rich
|
||||
from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
|
||||
|
||||
@ -86,10 +85,10 @@ def _iorepeat(attempts: int, name: str) -> Callable[[AWrapped], AWrapped]:
|
||||
last_exception = e
|
||||
except aiohttp.ClientConnectionError as e: # e.g. timeout, disconnect, resolve failed, etc.
|
||||
last_exception = e
|
||||
log.explain_topic(f"Retrying operation {escape(name)}. Retries left: {attempts - 1 - round}")
|
||||
log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
|
||||
|
||||
if last_exception:
|
||||
message = f"Error in I/O Operation: {escape(str(last_exception))}"
|
||||
message = f"Error in I/O Operation: {last_exception}"
|
||||
raise CrawlWarning(message) from last_exception
|
||||
raise CrawlError("Impossible return in ilias _iorepeat")
|
||||
|
||||
@ -162,7 +161,7 @@ class KitIliasWebCrawler(HttpCrawler):
|
||||
log.explain_topic("Inferred crawl target: Personal desktop")
|
||||
await self._crawl_desktop()
|
||||
else:
|
||||
log.explain_topic(f"Inferred crawl target: URL {escape(self._target)}")
|
||||
log.explain_topic(f"Inferred crawl target: URL {self._target}")
|
||||
await self._crawl_url(self._target)
|
||||
|
||||
async def _crawl_course(self, course_id: int) -> None:
|
||||
@ -190,9 +189,7 @@ class KitIliasWebCrawler(HttpCrawler):
|
||||
if expected_id is not None:
|
||||
perma_link_element: Tag = soup.find(id="current_perma_link")
|
||||
if not perma_link_element or "crs_" not in perma_link_element.get("value"):
|
||||
raise CrawlError(
|
||||
"Invalid course id? I didn't find anything looking like a course"
|
||||
)
|
||||
raise CrawlError("Invalid course id? Didn't find anything looking like a course")
|
||||
|
||||
# Duplicated code, but the root page is special - we want to void fetching it twice!
|
||||
page = IliasPage(soup, url, None)
|
||||
@ -236,7 +233,7 @@ class KitIliasWebCrawler(HttpCrawler):
|
||||
if element.type == IliasElementType.FILE:
|
||||
await self._download_file(element, element_path)
|
||||
elif element.type == IliasElementType.FORUM:
|
||||
log.explain_topic(f"Decision: Crawl {escape(str(element_path))}")
|
||||
log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
|
||||
log.explain("Forums are not supported")
|
||||
log.explain("Answer: No")
|
||||
elif element.type == IliasElementType.LINK:
|
||||
|
@ -3,11 +3,11 @@ from pathlib import PurePath
|
||||
from typing import Optional
|
||||
|
||||
import aiohttp
|
||||
from rich.markup import escape
|
||||
|
||||
from .config import Config
|
||||
from .crawler import Crawler, CrawlerSection
|
||||
from .logging import log
|
||||
from .utils import fmt_real_path
|
||||
from .version import NAME, VERSION
|
||||
|
||||
|
||||
@ -59,14 +59,14 @@ class HttpCrawler(Crawler):
|
||||
async def _save_cookies(self) -> None:
|
||||
log.explain_topic("Saving cookies")
|
||||
if not self._current_cookie_jar:
|
||||
log.explain("No cookie jar - save aborted")
|
||||
log.explain("No cookie jar, save aborted")
|
||||
return
|
||||
|
||||
try:
|
||||
self._current_cookie_jar.save(self._cookie_jar_path)
|
||||
log.explain(f"Cookies saved to {escape(str(self.COOKIE_FILE))}")
|
||||
log.explain(f"Cookies saved to {fmt_real_path(self._cookie_jar_path)}")
|
||||
except Exception:
|
||||
log.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}")
|
||||
log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}")
|
||||
|
||||
async def run(self) -> None:
|
||||
self._current_cookie_jar = aiohttp.CookieJar()
|
||||
|
@ -112,6 +112,10 @@ class Log:
|
||||
self.print(line)
|
||||
|
||||
def print(self, text: str) -> None:
|
||||
"""
|
||||
Print a normal message. Allows markup.
|
||||
"""
|
||||
|
||||
if self._progress_suspended:
|
||||
self._lines.append(text)
|
||||
else:
|
||||
@ -120,12 +124,24 @@ class Log:
|
||||
# TODO Print errors (and warnings?) to stderr
|
||||
|
||||
def warn(self, text: str) -> None:
|
||||
"""
|
||||
Print a warning message. Allows no markup.
|
||||
"""
|
||||
|
||||
self.print(f"[bold bright_red]Warning[/] {escape(text)}")
|
||||
|
||||
def error(self, text: str) -> None:
|
||||
"""
|
||||
Print an error message. Allows no markup.
|
||||
"""
|
||||
|
||||
self.print(f"[bold bright_red]Error[/] [red]{escape(text)}")
|
||||
|
||||
def error_contd(self, text: str) -> None:
|
||||
"""
|
||||
Print further lines of an error message. Allows no markup.
|
||||
"""
|
||||
|
||||
self.print(f"[red]{escape(text)}")
|
||||
|
||||
def unexpected_exception(self) -> None:
|
||||
@ -153,18 +169,34 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
|
||||
""".strip())
|
||||
|
||||
def explain_topic(self, text: str) -> None:
|
||||
"""
|
||||
Print a top-level explain text. Allows no markup.
|
||||
"""
|
||||
|
||||
if self.output_explain:
|
||||
self.print(f"[cyan]{escape(text)}")
|
||||
|
||||
def explain(self, text: str) -> None:
|
||||
"""
|
||||
Print an indented explain text. Allows no markup.
|
||||
"""
|
||||
|
||||
if self.output_explain:
|
||||
self.print(f" {escape(text)}")
|
||||
|
||||
def action(self, text: str) -> None:
|
||||
"""
|
||||
Print a status update while crawling. Allows markup.
|
||||
"""
|
||||
|
||||
if self.output_action:
|
||||
self.print(text)
|
||||
|
||||
def report(self, text: str) -> None:
|
||||
"""
|
||||
Print a report after crawling. Allows markup.
|
||||
"""
|
||||
|
||||
if self.output_report:
|
||||
self.print(text)
|
||||
|
||||
|
@ -14,7 +14,7 @@ from rich.markup import escape
|
||||
|
||||
from .logging import log
|
||||
from .report import Report
|
||||
from .utils import ReusableAsyncContextManager, prompt_yes_no
|
||||
from .utils import ReusableAsyncContextManager, fmt_path, fmt_real_path, prompt_yes_no
|
||||
|
||||
SUFFIX_CHARS = string.ascii_lowercase + string.digits
|
||||
SUFFIX_LENGTH = 6
|
||||
@ -143,7 +143,7 @@ class OutputDirectory:
|
||||
self._report = Report()
|
||||
|
||||
def prepare(self) -> None:
|
||||
log.explain_topic(f"Creating base directory at {str(self._root.absolute())!r}")
|
||||
log.explain_topic(f"Creating base directory at {fmt_real_path(self._root)}")
|
||||
|
||||
try:
|
||||
self._root.mkdir(parents=True, exist_ok=True)
|
||||
@ -159,9 +159,9 @@ class OutputDirectory:
|
||||
"""
|
||||
|
||||
if ".." in path.parts:
|
||||
raise OutputDirError(f"Forbidden segment '..' in path {path}")
|
||||
raise OutputDirError(f"Forbidden segment '..' in path {fmt_path(path)}")
|
||||
if "." in path.parts:
|
||||
raise OutputDirError(f"Forbidden segment '.' in path {path}")
|
||||
raise OutputDirError(f"Forbidden segment '.' in path {fmt_path(path)}")
|
||||
return self._root / path
|
||||
|
||||
def _should_download(
|
||||
@ -213,7 +213,7 @@ class OutputDirectory:
|
||||
) -> bool:
|
||||
if on_conflict == OnConflict.PROMPT:
|
||||
async with log.exclusive_output():
|
||||
prompt = f"Replace {path} with remote file?"
|
||||
prompt = f"Replace {fmt_path(path)} with remote file?"
|
||||
return await prompt_yes_no(prompt, default=False)
|
||||
elif on_conflict == OnConflict.LOCAL_FIRST:
|
||||
return False
|
||||
@ -232,7 +232,7 @@ class OutputDirectory:
|
||||
) -> bool:
|
||||
if on_conflict == OnConflict.PROMPT:
|
||||
async with log.exclusive_output():
|
||||
prompt = f"Recursively delete {path} and replace with remote file?"
|
||||
prompt = f"Recursively delete {fmt_path(path)} and replace with remote file?"
|
||||
return await prompt_yes_no(prompt, default=False)
|
||||
elif on_conflict == OnConflict.LOCAL_FIRST:
|
||||
return False
|
||||
@ -252,7 +252,7 @@ class OutputDirectory:
|
||||
) -> bool:
|
||||
if on_conflict == OnConflict.PROMPT:
|
||||
async with log.exclusive_output():
|
||||
prompt = f"Delete {parent} so remote file {path} can be downloaded?"
|
||||
prompt = f"Delete {fmt_path(parent)} so remote file {fmt_path(path)} can be downloaded?"
|
||||
return await prompt_yes_no(prompt, default=False)
|
||||
elif on_conflict == OnConflict.LOCAL_FIRST:
|
||||
return False
|
||||
@ -271,7 +271,7 @@ class OutputDirectory:
|
||||
) -> bool:
|
||||
if on_conflict == OnConflict.PROMPT:
|
||||
async with log.exclusive_output():
|
||||
prompt = f"Delete {path}?"
|
||||
prompt = f"Delete {fmt_path(path)}?"
|
||||
return await prompt_yes_no(prompt, default=False)
|
||||
elif on_conflict == OnConflict.LOCAL_FIRST:
|
||||
return False
|
||||
@ -386,10 +386,10 @@ class OutputDirectory:
|
||||
self._update_metadata(info)
|
||||
|
||||
if changed:
|
||||
log.action(f"[bold bright_yellow]Changed[/] {escape(str(info.path))}")
|
||||
log.action(f"[bold bright_yellow]Changed[/] {escape(fmt_path(info.path))}")
|
||||
self._report.change_file(info.path)
|
||||
else:
|
||||
log.action(f"[bold bright_green]Added[/] {escape(str(info.path))}")
|
||||
log.action(f"[bold bright_green]Added[/] {escape(fmt_path(info.path))}")
|
||||
self._report.add_file(info.path)
|
||||
|
||||
async def cleanup(self) -> None:
|
||||
@ -419,7 +419,7 @@ class OutputDirectory:
|
||||
if await self._conflict_delete_lf(self._on_conflict, pure):
|
||||
try:
|
||||
path.unlink()
|
||||
log.action(f"[bold bright_magenta]Deleted[/] {escape(str(path))}")
|
||||
log.action(f"[bold bright_magenta]Deleted[/] {escape(fmt_path(path))}")
|
||||
self._report.delete_file(pure)
|
||||
except OSError:
|
||||
pass
|
||||
|
@ -10,6 +10,7 @@ from pathlib import PurePath
|
||||
from typing import Dict, Optional, Union
|
||||
|
||||
from .logging import log
|
||||
from .utils import fmt_path
|
||||
|
||||
|
||||
class Rule(ABC):
|
||||
@ -327,7 +328,7 @@ class Transformer:
|
||||
|
||||
result = rule.transform(path)
|
||||
if isinstance(result, PurePath):
|
||||
log.explain(f"Match! Transformed to {result}")
|
||||
log.explain(f"Match! Transformed to {fmt_path(result)}")
|
||||
return result
|
||||
elif result: # Exclamation mark
|
||||
log.explain("Match! Ignored")
|
||||
|
@ -4,6 +4,7 @@ import sys
|
||||
import threading
|
||||
from abc import ABC, abstractmethod
|
||||
from contextlib import AsyncExitStack
|
||||
from pathlib import Path, PurePath
|
||||
from types import TracebackType
|
||||
from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
|
||||
from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
|
||||
@ -34,6 +35,30 @@ async def agetpass(prompt: str) -> str:
|
||||
return await in_daemon_thread(lambda: getpass.getpass(prompt))
|
||||
|
||||
|
||||
async def prompt_yes_no(query: str, default: Optional[bool]) -> bool:
|
||||
"""
|
||||
Asks the user a yes/no question and returns their choice.
|
||||
"""
|
||||
|
||||
if default is True:
|
||||
query += " [Y/n] "
|
||||
elif default is False:
|
||||
query += " [y/N] "
|
||||
else:
|
||||
query += " [y/n] "
|
||||
|
||||
while True:
|
||||
response = (await ainput(query)).strip().lower()
|
||||
if response == "y":
|
||||
return True
|
||||
elif response == "n":
|
||||
return False
|
||||
elif response == "" and default is not None:
|
||||
return default
|
||||
|
||||
print("Please answer with 'y' or 'n'.")
|
||||
|
||||
|
||||
def soupify(data: bytes) -> bs4.BeautifulSoup:
|
||||
"""
|
||||
Parses HTML to a beautifulsoup object.
|
||||
@ -66,28 +91,12 @@ def url_set_query_params(url: str, params: Dict[str, str]) -> str:
|
||||
return result
|
||||
|
||||
|
||||
async def prompt_yes_no(query: str, default: Optional[bool]) -> bool:
|
||||
"""
|
||||
Asks the user a yes/no question and returns their choice.
|
||||
"""
|
||||
def fmt_path(path: PurePath) -> str:
|
||||
return repr(str(path))
|
||||
|
||||
if default is True:
|
||||
query += " [Y/n] "
|
||||
elif default is False:
|
||||
query += " [y/N] "
|
||||
else:
|
||||
query += " [y/n] "
|
||||
|
||||
while True:
|
||||
response = (await ainput(query)).strip().lower()
|
||||
if response == "y":
|
||||
return True
|
||||
elif response == "n":
|
||||
return False
|
||||
elif response == "" and default is not None:
|
||||
return default
|
||||
|
||||
print("Please answer with 'y' or 'n'.")
|
||||
def fmt_real_path(path: Path) -> str:
|
||||
return repr(str(path.absolute()))
|
||||
|
||||
|
||||
class ReusableAsyncContextManager(ABC, Generic[T]):
|
||||
|
Loading…
Reference in New Issue
Block a user