Improve output dir and report error handling

This commit is contained in:
Joscha 2021-05-22 20:54:42 +02:00
parent afac22c562
commit b4d97cd545
3 changed files with 29 additions and 38 deletions

View File

@ -13,7 +13,8 @@ from .authenticator import Authenticator
from .config import Config, Section from .config import Config, Section
from .limiter import Limiter from .limiter import Limiter
from .logging import ProgressBar, log from .logging import ProgressBar, log
from .output_dir import FileSink, OnConflict, OutputDirectory, Redownload from .output_dir import FileSink, OnConflict, OutputDirectory, OutputDirError, Redownload
from .report import MarkConflictError, MarkDuplicateError
from .transformer import Transformer from .transformer import Transformer
from .version import NAME, VERSION from .version import NAME, VERSION
@ -45,12 +46,10 @@ def noncritical(f: Wrapped) -> Wrapped:
try: try:
f(*args, **kwargs) f(*args, **kwargs)
except CrawlWarning as e: except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
log.print(f"[bold bright_red]Warning[/] {escape(str(e))}") log.warn(str(e))
crawler.error_free = False crawler.error_free = False
except CrawlError as e: except CrawlError:
# TODO Don't print error, just pass it on upwards
log.print(f"[bold bright_red]Error[/] [red]{escape(str(e))}")
crawler.error_free = False crawler.error_free = False
raise raise

View File

@ -14,7 +14,7 @@ from typing import AsyncContextManager, BinaryIO, Iterator, Optional, Tuple
from rich.markup import escape from rich.markup import escape
from .logging import log from .logging import log
from .report import MarkConflictException, MarkDuplicateException, Report from .report import Report
from .utils import ReusableAsyncContextManager, prompt_yes_no from .utils import ReusableAsyncContextManager, prompt_yes_no
SUFFIX_CHARS = string.ascii_lowercase + string.digits SUFFIX_CHARS = string.ascii_lowercase + string.digits
@ -22,7 +22,7 @@ SUFFIX_LENGTH = 6
TRIES = 5 TRIES = 5
class OutputDirException(Exception): class OutputDirError(Exception):
pass pass
@ -146,25 +146,15 @@ class OutputDirectory:
def register_reserved(self, path: PurePath) -> None: def register_reserved(self, path: PurePath) -> None:
self._report.mark_reserved(path) self._report.mark_reserved(path)
def _mark(self, path: PurePath) -> None:
"""
May throw an OutputDirException
"""
try:
self._report.mark(path)
except MarkDuplicateException:
raise OutputDirException("Another file has already been placed here.")
except MarkConflictException as e:
raise OutputDirException(f"Collides with other file: {e.collides_with}")
def resolve(self, path: PurePath) -> Path: def resolve(self, path: PurePath) -> Path:
""" """
May throw an OutputDirException. May throw an OutputDirError.
""" """
if ".." in path.parts: if ".." in path.parts:
raise OutputDirException(f"Path {path} contains forbidden '..'") raise OutputDirError(f"Forbidden segment '..' in path {path}")
if "." in path.parts:
raise OutputDirError(f"Forbidden segment '.' in path {path}")
return self._root / path return self._root / path
def _should_download( def _should_download(
@ -297,7 +287,7 @@ class OutputDirectory:
local_path: Path, local_path: Path,
) -> Tuple[Path, BinaryIO]: ) -> Tuple[Path, BinaryIO]:
""" """
May raise an OutputDirException. May raise an OutputDirError.
""" """
# Create tmp file # Create tmp file
@ -309,7 +299,7 @@ class OutputDirectory:
except FileExistsError: except FileExistsError:
pass # Try again pass # Try again
raise OutputDirException(f"Failed to create temporary file {tmp_path}") raise OutputDirError("Failed to create temporary file")
async def download( async def download(
self, self,
@ -319,7 +309,8 @@ class OutputDirectory:
on_conflict: Optional[OnConflict] = None, on_conflict: Optional[OnConflict] = None,
) -> Optional[AsyncContextManager[FileSink]]: ) -> Optional[AsyncContextManager[FileSink]]:
""" """
May throw an OutputDirException. May throw an OutputDirError, a MarkDuplicateError or a
MarkConflictError.
""" """
heuristics = Heuristics(mtime) heuristics = Heuristics(mtime)
@ -327,7 +318,7 @@ class OutputDirectory:
on_conflict = self._on_conflict if on_conflict is None else on_conflict on_conflict = self._on_conflict if on_conflict is None else on_conflict
local_path = self.resolve(path) local_path = self.resolve(path)
self._mark(path) self._report.mark(path)
if not self._should_download(local_path, heuristics, redownload): if not self._should_download(local_path, heuristics, redownload):
return None return None

View File

@ -1,19 +1,18 @@
from dataclasses import dataclass
from pathlib import PurePath from pathlib import PurePath
from typing import Set from typing import Set
@dataclass class MarkDuplicateError(Exception):
class MarkDuplicateException(Exception):
""" """
Tried to mark a file that was already marked. Tried to mark a file that was already marked.
""" """
path: PurePath def __init__(self, path: PurePath):
super().__init__(f"A previous file already used path {path}")
self.path = path
@dataclass class MarkConflictError(Exception):
class MarkConflictException(Exception):
""" """
Marking the path would have caused a conflict. Marking the path would have caused a conflict.
@ -24,8 +23,10 @@ class MarkConflictException(Exception):
usually not possible. usually not possible.
""" """
path: PurePath def __init__(self, path: PurePath, collides_with: PurePath):
collides_with: PurePath super().__init__(f"File at {path} collides with previous file at {collides_with}")
self.path = path
self.collides_with = collides_with
# TODO Use PurePath.is_relative_to when updating to 3.9 # TODO Use PurePath.is_relative_to when updating to 3.9
@ -58,16 +59,16 @@ class Report:
""" """
Mark a previously unknown file as known. Mark a previously unknown file as known.
May throw a MarkDuplicateException or a MarkConflictException. For more May throw a MarkDuplicateError or a MarkConflictError. For more detail,
detail, see the respective exception's docstring. see the respective exception's docstring.
""" """
for other in self.marked: for other in self.marked:
if path == other: if path == other:
raise MarkDuplicateException(path) raise MarkDuplicateError(path)
if is_relative_to(path, other) or is_relative_to(other, path): if is_relative_to(path, other) or is_relative_to(other, path):
raise MarkConflictException(path, other) raise MarkConflictError(path, other)
self.known_files.add(path) self.known_files.add(path)