Improve output dir and report error handling

This commit is contained in:
Joscha 2021-05-22 20:54:42 +02:00
parent afac22c562
commit b4d97cd545
3 changed files with 29 additions and 38 deletions

View File

@ -13,7 +13,8 @@ from .authenticator import Authenticator
from .config import Config, Section
from .limiter import Limiter
from .logging import ProgressBar, log
from .output_dir import FileSink, OnConflict, OutputDirectory, Redownload
from .output_dir import FileSink, OnConflict, OutputDirectory, OutputDirError, Redownload
from .report import MarkConflictError, MarkDuplicateError
from .transformer import Transformer
from .version import NAME, VERSION
@ -45,12 +46,10 @@ def noncritical(f: Wrapped) -> Wrapped:
try:
f(*args, **kwargs)
except CrawlWarning as e:
log.print(f"[bold bright_red]Warning[/] {escape(str(e))}")
except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
log.warn(str(e))
crawler.error_free = False
except CrawlError as e:
# TODO Don't print error, just pass it on upwards
log.print(f"[bold bright_red]Error[/] [red]{escape(str(e))}")
except CrawlError:
crawler.error_free = False
raise

View File

@ -14,7 +14,7 @@ from typing import AsyncContextManager, BinaryIO, Iterator, Optional, Tuple
from rich.markup import escape
from .logging import log
from .report import MarkConflictException, MarkDuplicateException, Report
from .report import Report
from .utils import ReusableAsyncContextManager, prompt_yes_no
SUFFIX_CHARS = string.ascii_lowercase + string.digits
@ -22,7 +22,7 @@ SUFFIX_LENGTH = 6
TRIES = 5
class OutputDirException(Exception):
class OutputDirError(Exception):
pass
@ -146,25 +146,15 @@ class OutputDirectory:
def register_reserved(self, path: PurePath) -> None:
self._report.mark_reserved(path)
def _mark(self, path: PurePath) -> None:
"""
May throw an OutputDirException
"""
try:
self._report.mark(path)
except MarkDuplicateException:
raise OutputDirException("Another file has already been placed here.")
except MarkConflictException as e:
raise OutputDirException(f"Collides with other file: {e.collides_with}")
def resolve(self, path: PurePath) -> Path:
"""
May throw an OutputDirException.
May throw an OutputDirError.
"""
if ".." in path.parts:
raise OutputDirException(f"Path {path} contains forbidden '..'")
raise OutputDirError(f"Forbidden segment '..' in path {path}")
if "." in path.parts:
raise OutputDirError(f"Forbidden segment '.' in path {path}")
return self._root / path
def _should_download(
@ -297,7 +287,7 @@ class OutputDirectory:
local_path: Path,
) -> Tuple[Path, BinaryIO]:
"""
May raise an OutputDirException.
May raise an OutputDirError.
"""
# Create tmp file
@ -309,7 +299,7 @@ class OutputDirectory:
except FileExistsError:
pass # Try again
raise OutputDirException(f"Failed to create temporary file {tmp_path}")
raise OutputDirError("Failed to create temporary file")
async def download(
self,
@ -319,7 +309,8 @@ class OutputDirectory:
on_conflict: Optional[OnConflict] = None,
) -> Optional[AsyncContextManager[FileSink]]:
"""
May throw an OutputDirException.
May throw an OutputDirError, a MarkDuplicateError or a
MarkConflictError.
"""
heuristics = Heuristics(mtime)
@ -327,7 +318,7 @@ class OutputDirectory:
on_conflict = self._on_conflict if on_conflict is None else on_conflict
local_path = self.resolve(path)
self._mark(path)
self._report.mark(path)
if not self._should_download(local_path, heuristics, redownload):
return None

View File

@ -1,19 +1,18 @@
from dataclasses import dataclass
from pathlib import PurePath
from typing import Set
@dataclass
class MarkDuplicateException(Exception):
class MarkDuplicateError(Exception):
"""
Tried to mark a file that was already marked.
"""
path: PurePath
def __init__(self, path: PurePath):
super().__init__(f"A previous file already used path {path}")
self.path = path
@dataclass
class MarkConflictException(Exception):
class MarkConflictError(Exception):
"""
Marking the path would have caused a conflict.
@ -24,8 +23,10 @@ class MarkConflictException(Exception):
usually not possible.
"""
path: PurePath
collides_with: PurePath
def __init__(self, path: PurePath, collides_with: PurePath):
super().__init__(f"File at {path} collides with previous file at {collides_with}")
self.path = path
self.collides_with = collides_with
# TODO Use PurePath.is_relative_to when updating to 3.9
@ -58,16 +59,16 @@ class Report:
"""
Mark a previously unknown file as known.
May throw a MarkDuplicateException or a MarkConflictException. For more
detail, see the respective exception's docstring.
May throw a MarkDuplicateError or a MarkConflictError. For more detail,
see the respective exception's docstring.
"""
for other in self.marked:
if path == other:
raise MarkDuplicateException(path)
raise MarkDuplicateError(path)
if is_relative_to(path, other) or is_relative_to(other, path):
raise MarkConflictException(path, other)
raise MarkConflictError(path, other)
self.known_files.add(path)