mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Improve output dir and report error handling
This commit is contained in:
parent
afac22c562
commit
b4d97cd545
@ -13,7 +13,8 @@ from .authenticator import Authenticator
|
||||
from .config import Config, Section
|
||||
from .limiter import Limiter
|
||||
from .logging import ProgressBar, log
|
||||
from .output_dir import FileSink, OnConflict, OutputDirectory, Redownload
|
||||
from .output_dir import FileSink, OnConflict, OutputDirectory, OutputDirError, Redownload
|
||||
from .report import MarkConflictError, MarkDuplicateError
|
||||
from .transformer import Transformer
|
||||
from .version import NAME, VERSION
|
||||
|
||||
@ -45,12 +46,10 @@ def noncritical(f: Wrapped) -> Wrapped:
|
||||
|
||||
try:
|
||||
f(*args, **kwargs)
|
||||
except CrawlWarning as e:
|
||||
log.print(f"[bold bright_red]Warning[/] {escape(str(e))}")
|
||||
except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
|
||||
log.warn(str(e))
|
||||
crawler.error_free = False
|
||||
except CrawlError as e:
|
||||
# TODO Don't print error, just pass it on upwards
|
||||
log.print(f"[bold bright_red]Error[/] [red]{escape(str(e))}")
|
||||
except CrawlError:
|
||||
crawler.error_free = False
|
||||
raise
|
||||
|
||||
|
@ -14,7 +14,7 @@ from typing import AsyncContextManager, BinaryIO, Iterator, Optional, Tuple
|
||||
from rich.markup import escape
|
||||
|
||||
from .logging import log
|
||||
from .report import MarkConflictException, MarkDuplicateException, Report
|
||||
from .report import Report
|
||||
from .utils import ReusableAsyncContextManager, prompt_yes_no
|
||||
|
||||
SUFFIX_CHARS = string.ascii_lowercase + string.digits
|
||||
@ -22,7 +22,7 @@ SUFFIX_LENGTH = 6
|
||||
TRIES = 5
|
||||
|
||||
|
||||
class OutputDirException(Exception):
|
||||
class OutputDirError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@ -146,25 +146,15 @@ class OutputDirectory:
|
||||
def register_reserved(self, path: PurePath) -> None:
|
||||
self._report.mark_reserved(path)
|
||||
|
||||
def _mark(self, path: PurePath) -> None:
|
||||
"""
|
||||
May throw an OutputDirException
|
||||
"""
|
||||
|
||||
try:
|
||||
self._report.mark(path)
|
||||
except MarkDuplicateException:
|
||||
raise OutputDirException("Another file has already been placed here.")
|
||||
except MarkConflictException as e:
|
||||
raise OutputDirException(f"Collides with other file: {e.collides_with}")
|
||||
|
||||
def resolve(self, path: PurePath) -> Path:
|
||||
"""
|
||||
May throw an OutputDirException.
|
||||
May throw an OutputDirError.
|
||||
"""
|
||||
|
||||
if ".." in path.parts:
|
||||
raise OutputDirException(f"Path {path} contains forbidden '..'")
|
||||
raise OutputDirError(f"Forbidden segment '..' in path {path}")
|
||||
if "." in path.parts:
|
||||
raise OutputDirError(f"Forbidden segment '.' in path {path}")
|
||||
return self._root / path
|
||||
|
||||
def _should_download(
|
||||
@ -297,7 +287,7 @@ class OutputDirectory:
|
||||
local_path: Path,
|
||||
) -> Tuple[Path, BinaryIO]:
|
||||
"""
|
||||
May raise an OutputDirException.
|
||||
May raise an OutputDirError.
|
||||
"""
|
||||
|
||||
# Create tmp file
|
||||
@ -309,7 +299,7 @@ class OutputDirectory:
|
||||
except FileExistsError:
|
||||
pass # Try again
|
||||
|
||||
raise OutputDirException(f"Failed to create temporary file {tmp_path}")
|
||||
raise OutputDirError("Failed to create temporary file")
|
||||
|
||||
async def download(
|
||||
self,
|
||||
@ -319,7 +309,8 @@ class OutputDirectory:
|
||||
on_conflict: Optional[OnConflict] = None,
|
||||
) -> Optional[AsyncContextManager[FileSink]]:
|
||||
"""
|
||||
May throw an OutputDirException.
|
||||
May throw an OutputDirError, a MarkDuplicateError or a
|
||||
MarkConflictError.
|
||||
"""
|
||||
|
||||
heuristics = Heuristics(mtime)
|
||||
@ -327,7 +318,7 @@ class OutputDirectory:
|
||||
on_conflict = self._on_conflict if on_conflict is None else on_conflict
|
||||
local_path = self.resolve(path)
|
||||
|
||||
self._mark(path)
|
||||
self._report.mark(path)
|
||||
|
||||
if not self._should_download(local_path, heuristics, redownload):
|
||||
return None
|
||||
|
@ -1,19 +1,18 @@
|
||||
from dataclasses import dataclass
|
||||
from pathlib import PurePath
|
||||
from typing import Set
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarkDuplicateException(Exception):
|
||||
class MarkDuplicateError(Exception):
|
||||
"""
|
||||
Tried to mark a file that was already marked.
|
||||
"""
|
||||
|
||||
path: PurePath
|
||||
def __init__(self, path: PurePath):
|
||||
super().__init__(f"A previous file already used path {path}")
|
||||
self.path = path
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarkConflictException(Exception):
|
||||
class MarkConflictError(Exception):
|
||||
"""
|
||||
Marking the path would have caused a conflict.
|
||||
|
||||
@ -24,8 +23,10 @@ class MarkConflictException(Exception):
|
||||
usually not possible.
|
||||
"""
|
||||
|
||||
path: PurePath
|
||||
collides_with: PurePath
|
||||
def __init__(self, path: PurePath, collides_with: PurePath):
|
||||
super().__init__(f"File at {path} collides with previous file at {collides_with}")
|
||||
self.path = path
|
||||
self.collides_with = collides_with
|
||||
|
||||
|
||||
# TODO Use PurePath.is_relative_to when updating to 3.9
|
||||
@ -58,16 +59,16 @@ class Report:
|
||||
"""
|
||||
Mark a previously unknown file as known.
|
||||
|
||||
May throw a MarkDuplicateException or a MarkConflictException. For more
|
||||
detail, see the respective exception's docstring.
|
||||
May throw a MarkDuplicateError or a MarkConflictError. For more detail,
|
||||
see the respective exception's docstring.
|
||||
"""
|
||||
|
||||
for other in self.marked:
|
||||
if path == other:
|
||||
raise MarkDuplicateException(path)
|
||||
raise MarkDuplicateError(path)
|
||||
|
||||
if is_relative_to(path, other) or is_relative_to(other, path):
|
||||
raise MarkConflictException(path, other)
|
||||
raise MarkConflictError(path, other)
|
||||
|
||||
self.known_files.add(path)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user