mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Improve output dir and report error handling
This commit is contained in:
parent
afac22c562
commit
b4d97cd545
@ -13,7 +13,8 @@ from .authenticator import Authenticator
|
|||||||
from .config import Config, Section
|
from .config import Config, Section
|
||||||
from .limiter import Limiter
|
from .limiter import Limiter
|
||||||
from .logging import ProgressBar, log
|
from .logging import ProgressBar, log
|
||||||
from .output_dir import FileSink, OnConflict, OutputDirectory, Redownload
|
from .output_dir import FileSink, OnConflict, OutputDirectory, OutputDirError, Redownload
|
||||||
|
from .report import MarkConflictError, MarkDuplicateError
|
||||||
from .transformer import Transformer
|
from .transformer import Transformer
|
||||||
from .version import NAME, VERSION
|
from .version import NAME, VERSION
|
||||||
|
|
||||||
@ -45,12 +46,10 @@ def noncritical(f: Wrapped) -> Wrapped:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
f(*args, **kwargs)
|
f(*args, **kwargs)
|
||||||
except CrawlWarning as e:
|
except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
|
||||||
log.print(f"[bold bright_red]Warning[/] {escape(str(e))}")
|
log.warn(str(e))
|
||||||
crawler.error_free = False
|
crawler.error_free = False
|
||||||
except CrawlError as e:
|
except CrawlError:
|
||||||
# TODO Don't print error, just pass it on upwards
|
|
||||||
log.print(f"[bold bright_red]Error[/] [red]{escape(str(e))}")
|
|
||||||
crawler.error_free = False
|
crawler.error_free = False
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@ from typing import AsyncContextManager, BinaryIO, Iterator, Optional, Tuple
|
|||||||
from rich.markup import escape
|
from rich.markup import escape
|
||||||
|
|
||||||
from .logging import log
|
from .logging import log
|
||||||
from .report import MarkConflictException, MarkDuplicateException, Report
|
from .report import Report
|
||||||
from .utils import ReusableAsyncContextManager, prompt_yes_no
|
from .utils import ReusableAsyncContextManager, prompt_yes_no
|
||||||
|
|
||||||
SUFFIX_CHARS = string.ascii_lowercase + string.digits
|
SUFFIX_CHARS = string.ascii_lowercase + string.digits
|
||||||
@ -22,7 +22,7 @@ SUFFIX_LENGTH = 6
|
|||||||
TRIES = 5
|
TRIES = 5
|
||||||
|
|
||||||
|
|
||||||
class OutputDirException(Exception):
|
class OutputDirError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@ -146,25 +146,15 @@ class OutputDirectory:
|
|||||||
def register_reserved(self, path: PurePath) -> None:
|
def register_reserved(self, path: PurePath) -> None:
|
||||||
self._report.mark_reserved(path)
|
self._report.mark_reserved(path)
|
||||||
|
|
||||||
def _mark(self, path: PurePath) -> None:
|
|
||||||
"""
|
|
||||||
May throw an OutputDirException
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
self._report.mark(path)
|
|
||||||
except MarkDuplicateException:
|
|
||||||
raise OutputDirException("Another file has already been placed here.")
|
|
||||||
except MarkConflictException as e:
|
|
||||||
raise OutputDirException(f"Collides with other file: {e.collides_with}")
|
|
||||||
|
|
||||||
def resolve(self, path: PurePath) -> Path:
|
def resolve(self, path: PurePath) -> Path:
|
||||||
"""
|
"""
|
||||||
May throw an OutputDirException.
|
May throw an OutputDirError.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if ".." in path.parts:
|
if ".." in path.parts:
|
||||||
raise OutputDirException(f"Path {path} contains forbidden '..'")
|
raise OutputDirError(f"Forbidden segment '..' in path {path}")
|
||||||
|
if "." in path.parts:
|
||||||
|
raise OutputDirError(f"Forbidden segment '.' in path {path}")
|
||||||
return self._root / path
|
return self._root / path
|
||||||
|
|
||||||
def _should_download(
|
def _should_download(
|
||||||
@ -297,7 +287,7 @@ class OutputDirectory:
|
|||||||
local_path: Path,
|
local_path: Path,
|
||||||
) -> Tuple[Path, BinaryIO]:
|
) -> Tuple[Path, BinaryIO]:
|
||||||
"""
|
"""
|
||||||
May raise an OutputDirException.
|
May raise an OutputDirError.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Create tmp file
|
# Create tmp file
|
||||||
@ -309,7 +299,7 @@ class OutputDirectory:
|
|||||||
except FileExistsError:
|
except FileExistsError:
|
||||||
pass # Try again
|
pass # Try again
|
||||||
|
|
||||||
raise OutputDirException(f"Failed to create temporary file {tmp_path}")
|
raise OutputDirError("Failed to create temporary file")
|
||||||
|
|
||||||
async def download(
|
async def download(
|
||||||
self,
|
self,
|
||||||
@ -319,7 +309,8 @@ class OutputDirectory:
|
|||||||
on_conflict: Optional[OnConflict] = None,
|
on_conflict: Optional[OnConflict] = None,
|
||||||
) -> Optional[AsyncContextManager[FileSink]]:
|
) -> Optional[AsyncContextManager[FileSink]]:
|
||||||
"""
|
"""
|
||||||
May throw an OutputDirException.
|
May throw an OutputDirError, a MarkDuplicateError or a
|
||||||
|
MarkConflictError.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
heuristics = Heuristics(mtime)
|
heuristics = Heuristics(mtime)
|
||||||
@ -327,7 +318,7 @@ class OutputDirectory:
|
|||||||
on_conflict = self._on_conflict if on_conflict is None else on_conflict
|
on_conflict = self._on_conflict if on_conflict is None else on_conflict
|
||||||
local_path = self.resolve(path)
|
local_path = self.resolve(path)
|
||||||
|
|
||||||
self._mark(path)
|
self._report.mark(path)
|
||||||
|
|
||||||
if not self._should_download(local_path, heuristics, redownload):
|
if not self._should_download(local_path, heuristics, redownload):
|
||||||
return None
|
return None
|
||||||
|
@ -1,19 +1,18 @@
|
|||||||
from dataclasses import dataclass
|
|
||||||
from pathlib import PurePath
|
from pathlib import PurePath
|
||||||
from typing import Set
|
from typing import Set
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
class MarkDuplicateError(Exception):
|
||||||
class MarkDuplicateException(Exception):
|
|
||||||
"""
|
"""
|
||||||
Tried to mark a file that was already marked.
|
Tried to mark a file that was already marked.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
path: PurePath
|
def __init__(self, path: PurePath):
|
||||||
|
super().__init__(f"A previous file already used path {path}")
|
||||||
|
self.path = path
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
class MarkConflictError(Exception):
|
||||||
class MarkConflictException(Exception):
|
|
||||||
"""
|
"""
|
||||||
Marking the path would have caused a conflict.
|
Marking the path would have caused a conflict.
|
||||||
|
|
||||||
@ -24,8 +23,10 @@ class MarkConflictException(Exception):
|
|||||||
usually not possible.
|
usually not possible.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
path: PurePath
|
def __init__(self, path: PurePath, collides_with: PurePath):
|
||||||
collides_with: PurePath
|
super().__init__(f"File at {path} collides with previous file at {collides_with}")
|
||||||
|
self.path = path
|
||||||
|
self.collides_with = collides_with
|
||||||
|
|
||||||
|
|
||||||
# TODO Use PurePath.is_relative_to when updating to 3.9
|
# TODO Use PurePath.is_relative_to when updating to 3.9
|
||||||
@ -58,16 +59,16 @@ class Report:
|
|||||||
"""
|
"""
|
||||||
Mark a previously unknown file as known.
|
Mark a previously unknown file as known.
|
||||||
|
|
||||||
May throw a MarkDuplicateException or a MarkConflictException. For more
|
May throw a MarkDuplicateError or a MarkConflictError. For more detail,
|
||||||
detail, see the respective exception's docstring.
|
see the respective exception's docstring.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for other in self.marked:
|
for other in self.marked:
|
||||||
if path == other:
|
if path == other:
|
||||||
raise MarkDuplicateException(path)
|
raise MarkDuplicateError(path)
|
||||||
|
|
||||||
if is_relative_to(path, other) or is_relative_to(other, path):
|
if is_relative_to(path, other) or is_relative_to(other, path):
|
||||||
raise MarkConflictException(path, other)
|
raise MarkConflictError(path, other)
|
||||||
|
|
||||||
self.known_files.add(path)
|
self.known_files.add(path)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user