mirror of
				https://github.com/Garmelon/PFERD.git
				synced 2025-11-04 06:32:52 +01:00 
			
		
		
		
	Improve output dir and report error handling
This commit is contained in:
		@@ -13,7 +13,8 @@ from .authenticator import Authenticator
 | 
			
		||||
from .config import Config, Section
 | 
			
		||||
from .limiter import Limiter
 | 
			
		||||
from .logging import ProgressBar, log
 | 
			
		||||
from .output_dir import FileSink, OnConflict, OutputDirectory, Redownload
 | 
			
		||||
from .output_dir import FileSink, OnConflict, OutputDirectory, OutputDirError, Redownload
 | 
			
		||||
from .report import MarkConflictError, MarkDuplicateError
 | 
			
		||||
from .transformer import Transformer
 | 
			
		||||
from .version import NAME, VERSION
 | 
			
		||||
 | 
			
		||||
@@ -45,12 +46,10 @@ def noncritical(f: Wrapped) -> Wrapped:
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            f(*args, **kwargs)
 | 
			
		||||
        except CrawlWarning as e:
 | 
			
		||||
            log.print(f"[bold bright_red]Warning[/] {escape(str(e))}")
 | 
			
		||||
        except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
 | 
			
		||||
            log.warn(str(e))
 | 
			
		||||
            crawler.error_free = False
 | 
			
		||||
        except CrawlError as e:
 | 
			
		||||
            # TODO Don't print error, just pass it on upwards
 | 
			
		||||
            log.print(f"[bold bright_red]Error[/] [red]{escape(str(e))}")
 | 
			
		||||
        except CrawlError:
 | 
			
		||||
            crawler.error_free = False
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -14,7 +14,7 @@ from typing import AsyncContextManager, BinaryIO, Iterator, Optional, Tuple
 | 
			
		||||
from rich.markup import escape
 | 
			
		||||
 | 
			
		||||
from .logging import log
 | 
			
		||||
from .report import MarkConflictException, MarkDuplicateException, Report
 | 
			
		||||
from .report import Report
 | 
			
		||||
from .utils import ReusableAsyncContextManager, prompt_yes_no
 | 
			
		||||
 | 
			
		||||
SUFFIX_CHARS = string.ascii_lowercase + string.digits
 | 
			
		||||
@@ -22,7 +22,7 @@ SUFFIX_LENGTH = 6
 | 
			
		||||
TRIES = 5
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class OutputDirException(Exception):
 | 
			
		||||
class OutputDirError(Exception):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -146,25 +146,15 @@ class OutputDirectory:
 | 
			
		||||
    def register_reserved(self, path: PurePath) -> None:
 | 
			
		||||
        self._report.mark_reserved(path)
 | 
			
		||||
 | 
			
		||||
    def _mark(self, path: PurePath) -> None:
 | 
			
		||||
        """
 | 
			
		||||
        May throw an OutputDirException
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            self._report.mark(path)
 | 
			
		||||
        except MarkDuplicateException:
 | 
			
		||||
            raise OutputDirException("Another file has already been placed here.")
 | 
			
		||||
        except MarkConflictException as e:
 | 
			
		||||
            raise OutputDirException(f"Collides with other file: {e.collides_with}")
 | 
			
		||||
 | 
			
		||||
    def resolve(self, path: PurePath) -> Path:
 | 
			
		||||
        """
 | 
			
		||||
        May throw an OutputDirException.
 | 
			
		||||
        May throw an OutputDirError.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        if ".." in path.parts:
 | 
			
		||||
            raise OutputDirException(f"Path {path} contains forbidden '..'")
 | 
			
		||||
            raise OutputDirError(f"Forbidden segment '..' in path {path}")
 | 
			
		||||
        if "." in path.parts:
 | 
			
		||||
            raise OutputDirError(f"Forbidden segment '.' in path {path}")
 | 
			
		||||
        return self._root / path
 | 
			
		||||
 | 
			
		||||
    def _should_download(
 | 
			
		||||
@@ -297,7 +287,7 @@ class OutputDirectory:
 | 
			
		||||
            local_path: Path,
 | 
			
		||||
    ) -> Tuple[Path, BinaryIO]:
 | 
			
		||||
        """
 | 
			
		||||
        May raise an OutputDirException.
 | 
			
		||||
        May raise an OutputDirError.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        # Create tmp file
 | 
			
		||||
@@ -309,7 +299,7 @@ class OutputDirectory:
 | 
			
		||||
            except FileExistsError:
 | 
			
		||||
                pass  # Try again
 | 
			
		||||
 | 
			
		||||
        raise OutputDirException(f"Failed to create temporary file {tmp_path}")
 | 
			
		||||
        raise OutputDirError("Failed to create temporary file")
 | 
			
		||||
 | 
			
		||||
    async def download(
 | 
			
		||||
            self,
 | 
			
		||||
@@ -319,7 +309,8 @@ class OutputDirectory:
 | 
			
		||||
            on_conflict: Optional[OnConflict] = None,
 | 
			
		||||
    ) -> Optional[AsyncContextManager[FileSink]]:
 | 
			
		||||
        """
 | 
			
		||||
        May throw an OutputDirException.
 | 
			
		||||
        May throw an OutputDirError, a MarkDuplicateError or a
 | 
			
		||||
        MarkConflictError.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        heuristics = Heuristics(mtime)
 | 
			
		||||
@@ -327,7 +318,7 @@ class OutputDirectory:
 | 
			
		||||
        on_conflict = self._on_conflict if on_conflict is None else on_conflict
 | 
			
		||||
        local_path = self.resolve(path)
 | 
			
		||||
 | 
			
		||||
        self._mark(path)
 | 
			
		||||
        self._report.mark(path)
 | 
			
		||||
 | 
			
		||||
        if not self._should_download(local_path, heuristics, redownload):
 | 
			
		||||
            return None
 | 
			
		||||
 
 | 
			
		||||
@@ -1,19 +1,18 @@
 | 
			
		||||
from dataclasses import dataclass
 | 
			
		||||
from pathlib import PurePath
 | 
			
		||||
from typing import Set
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@dataclass
 | 
			
		||||
class MarkDuplicateException(Exception):
 | 
			
		||||
class MarkDuplicateError(Exception):
 | 
			
		||||
    """
 | 
			
		||||
    Tried to mark a file that was already marked.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    path: PurePath
 | 
			
		||||
    def __init__(self, path: PurePath):
 | 
			
		||||
        super().__init__(f"A previous file already used path {path}")
 | 
			
		||||
        self.path = path
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@dataclass
 | 
			
		||||
class MarkConflictException(Exception):
 | 
			
		||||
class MarkConflictError(Exception):
 | 
			
		||||
    """
 | 
			
		||||
    Marking the path would have caused a conflict.
 | 
			
		||||
 | 
			
		||||
@@ -24,8 +23,10 @@ class MarkConflictException(Exception):
 | 
			
		||||
    usually not possible.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    path: PurePath
 | 
			
		||||
    collides_with: PurePath
 | 
			
		||||
    def __init__(self, path: PurePath, collides_with: PurePath):
 | 
			
		||||
        super().__init__(f"File at {path} collides with previous file at {collides_with}")
 | 
			
		||||
        self.path = path
 | 
			
		||||
        self.collides_with = collides_with
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# TODO Use PurePath.is_relative_to when updating to 3.9
 | 
			
		||||
@@ -58,16 +59,16 @@ class Report:
 | 
			
		||||
        """
 | 
			
		||||
        Mark a previously unknown file as known.
 | 
			
		||||
 | 
			
		||||
        May throw a MarkDuplicateException or a MarkConflictException. For more
 | 
			
		||||
        detail, see the respective exception's docstring.
 | 
			
		||||
        May throw a MarkDuplicateError or a MarkConflictError. For more detail,
 | 
			
		||||
        see the respective exception's docstring.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        for other in self.marked:
 | 
			
		||||
            if path == other:
 | 
			
		||||
                raise MarkDuplicateException(path)
 | 
			
		||||
                raise MarkDuplicateError(path)
 | 
			
		||||
 | 
			
		||||
            if is_relative_to(path, other) or is_relative_to(other, path):
 | 
			
		||||
                raise MarkConflictException(path, other)
 | 
			
		||||
                raise MarkConflictError(path, other)
 | 
			
		||||
 | 
			
		||||
        self.known_files.add(path)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user