mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Load and store reports
This commit is contained in:
parent
6e9f8fd391
commit
6ca0ecdf05
@ -11,7 +11,7 @@ from ..config import Config, Section
|
||||
from ..limiter import Limiter
|
||||
from ..logging import ProgressBar, log
|
||||
from ..output_dir import FileSink, FileSinkToken, OnConflict, OutputDirectory, OutputDirError, Redownload
|
||||
from ..report import MarkConflictError, MarkDuplicateError
|
||||
from ..report import MarkConflictError, MarkDuplicateError, Report
|
||||
from ..transformer import Transformer
|
||||
from ..utils import ReusableAsyncContextManager, fmt_path
|
||||
|
||||
@ -229,6 +229,14 @@ class Crawler(ABC):
|
||||
section.on_conflict(),
|
||||
)
|
||||
|
||||
@property
|
||||
def report(self) -> Report:
|
||||
return self._output_dir.report
|
||||
|
||||
@property
|
||||
def prev_report(self) -> Optional[Report]:
|
||||
return self._output_dir.prev_report
|
||||
|
||||
@staticmethod
|
||||
async def gather(awaitables: Sequence[Awaitable[Any]]) -> List[Any]:
|
||||
"""
|
||||
@ -298,8 +306,10 @@ class Crawler(ABC):
|
||||
|
||||
with log.show_progress():
|
||||
self._output_dir.prepare()
|
||||
self._output_dir.load_prev_report()
|
||||
await self._run()
|
||||
await self._cleanup()
|
||||
self._output_dir.store_report()
|
||||
|
||||
@abstractmethod
|
||||
async def _run(self) -> None:
|
||||
|
@ -1,4 +1,5 @@
|
||||
import filecmp
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
@ -13,7 +14,7 @@ from typing import BinaryIO, Iterator, Optional, Tuple
|
||||
from rich.markup import escape
|
||||
|
||||
from .logging import log
|
||||
from .report import Report
|
||||
from .report import Report, ReportLoadError
|
||||
from .utils import ReusableAsyncContextManager, fmt_path, fmt_real_path, prompt_yes_no
|
||||
|
||||
SUFFIX_CHARS = string.ascii_lowercase + string.digits
|
||||
@ -134,6 +135,8 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
|
||||
|
||||
|
||||
class OutputDirectory:
|
||||
REPORT_FILE = PurePath(".report")
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
root: Path,
|
||||
@ -144,7 +147,19 @@ class OutputDirectory:
|
||||
self._redownload = redownload
|
||||
self._on_conflict = on_conflict
|
||||
|
||||
self._report_path = self.resolve(self.REPORT_FILE)
|
||||
self._report = Report()
|
||||
self._prev_report: Optional[Report] = None
|
||||
|
||||
self.register_reserved(self.REPORT_FILE)
|
||||
|
||||
@property
|
||||
def report(self) -> Report:
|
||||
return self._report
|
||||
|
||||
@property
|
||||
def prev_report(self) -> Optional[Report]:
|
||||
return self._prev_report
|
||||
|
||||
def prepare(self) -> None:
|
||||
log.explain_topic(f"Creating base directory at {fmt_real_path(self._root)}")
|
||||
@ -452,3 +467,21 @@ class OutputDirectory:
|
||||
self._report.delete_file(pure)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def load_prev_report(self) -> None:
|
||||
log.explain_topic(f"Loading previous report from {fmt_real_path(self._report_path)}")
|
||||
try:
|
||||
self._prev_report = Report.load(self._report_path)
|
||||
log.explain("Loaded report successfully")
|
||||
except (OSError, json.JSONDecodeError, ReportLoadError) as e:
|
||||
log.explain("Failed to load report")
|
||||
log.explain(str(e))
|
||||
|
||||
def store_report(self) -> None:
|
||||
log.explain_topic(f"Storing report to {fmt_real_path(self._report_path)}")
|
||||
try:
|
||||
self._report.store(self._report_path)
|
||||
log.explain("Stored report successfully")
|
||||
except OSError as e:
|
||||
log.warn(f"Failed to save report to {fmt_real_path(self._report_path)}")
|
||||
log.warn_contd(str(e))
|
||||
|
@ -1,5 +1,10 @@
|
||||
from pathlib import PurePath
|
||||
from typing import Set
|
||||
import json
|
||||
from pathlib import Path, PurePath
|
||||
from typing import Any, Dict, List, Set
|
||||
|
||||
|
||||
class ReportLoadError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class MarkDuplicateError(Exception):
|
||||
@ -48,10 +53,66 @@ class Report:
|
||||
self.reserved_files: Set[PurePath] = set()
|
||||
self.known_files: Set[PurePath] = set()
|
||||
|
||||
self.new_files: Set[PurePath] = set()
|
||||
self.added_files: Set[PurePath] = set()
|
||||
self.changed_files: Set[PurePath] = set()
|
||||
self.deleted_files: Set[PurePath] = set()
|
||||
|
||||
@staticmethod
|
||||
def _get_list_of_strs(data: Dict[str, Any], key: str) -> List[str]:
|
||||
result: Any = data.get(key, [])
|
||||
|
||||
if not isinstance(result, list):
|
||||
raise ReportLoadError(f"Incorrect format: {key!r} is not a list")
|
||||
|
||||
for elem in result:
|
||||
if not isinstance(elem, str):
|
||||
raise ReportLoadError(f"Incorrect format: {key!r} must contain only strings")
|
||||
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def load(cls, path: Path) -> "Report":
|
||||
"""
|
||||
May raise OSError, JsonDecodeError, ReportLoadError.
|
||||
"""
|
||||
|
||||
with open(path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
if not isinstance(data, dict):
|
||||
raise ReportLoadError("Incorrect format: Root is not an object")
|
||||
|
||||
self = cls()
|
||||
for elem in self._get_list_of_strs(data, "reserved"):
|
||||
self.mark_reserved(PurePath(elem))
|
||||
for elem in self._get_list_of_strs(data, "known"):
|
||||
self.mark(PurePath(elem))
|
||||
for elem in self._get_list_of_strs(data, "added"):
|
||||
self.add_file(PurePath(elem))
|
||||
for elem in self._get_list_of_strs(data, "changed"):
|
||||
self.change_file(PurePath(elem))
|
||||
for elem in self._get_list_of_strs(data, "deleted"):
|
||||
self.delete_file(PurePath(elem))
|
||||
|
||||
return self
|
||||
|
||||
def store(self, path: Path) -> None:
|
||||
"""
|
||||
May raise OSError.
|
||||
"""
|
||||
|
||||
data = {
|
||||
"reserved": [str(path) for path in sorted(self.reserved_files)],
|
||||
"known": [str(path) for path in sorted(self.known_files)],
|
||||
"added": [str(path) for path in sorted(self.added_files)],
|
||||
"changed": [str(path) for path in sorted(self.changed_files)],
|
||||
"deleted": [str(path) for path in sorted(self.deleted_files)],
|
||||
}
|
||||
|
||||
with open(path, "w") as f:
|
||||
json.dump(data, f, indent=2, sort_keys=True)
|
||||
f.write("\n") # json.dump doesn't do this
|
||||
|
||||
def mark_reserved(self, path: PurePath) -> None:
|
||||
self.reserved_files.add(path)
|
||||
|
||||
@ -84,7 +145,7 @@ class Report:
|
||||
Unlike mark(), this function accepts any paths.
|
||||
"""
|
||||
|
||||
self.new_files.add(path)
|
||||
self.added_files.add(path)
|
||||
|
||||
def change_file(self, path: PurePath) -> None:
|
||||
"""
|
||||
|
Loading…
Reference in New Issue
Block a user