pferd/PFERD/report.py

177 lines
5.4 KiB
Python
Raw Normal View History

2021-05-23 20:46:12 +02:00
import json
from pathlib import Path, PurePath
from typing import Any, Dict, List, Set
class ReportLoadError(Exception):
pass
2021-05-02 00:56:10 +02:00
class MarkDuplicateError(Exception):
2021-05-02 00:56:10 +02:00
"""
Tried to mark a file that was already marked.
"""
def __init__(self, path: PurePath):
super().__init__(f"A previous file already used path {path}")
self.path = path
2021-05-02 00:56:10 +02:00
class MarkConflictError(Exception):
2021-05-02 00:56:10 +02:00
"""
Marking the path would have caused a conflict.
A conflict can have two reasons: Either the new file has the same path as
the parent directory of a known file, or a parent directory of the new file
has the same path as a known file. In either case, adding the new file
would require a file and a directory to share the same path, which is
usually not possible.
"""
def __init__(self, path: PurePath, collides_with: PurePath):
super().__init__(f"File at {path} collides with previous file at {collides_with}")
self.path = path
self.collides_with = collides_with
2021-05-02 00:56:10 +02:00
2021-05-06 01:02:40 +02:00
# TODO Use PurePath.is_relative_to when updating to 3.9
def is_relative_to(a: PurePath, b: PurePath) -> bool:
try:
a.relative_to(b)
return True
except ValueError:
return False
2021-05-02 00:56:10 +02:00
class Report:
"""
A report of a synchronization. Includes all files found by the crawler, as
well as the set of changes made to local files.
"""
2021-05-05 18:08:34 +02:00
def __init__(self) -> None:
2021-05-13 22:28:14 +02:00
self.reserved_files: Set[PurePath] = set()
2021-05-02 00:56:10 +02:00
self.known_files: Set[PurePath] = set()
2021-05-23 20:46:12 +02:00
self.added_files: Set[PurePath] = set()
2021-05-02 00:56:10 +02:00
self.changed_files: Set[PurePath] = set()
self.deleted_files: Set[PurePath] = set()
self.not_deleted_files: Set[PurePath] = set()
2021-05-02 00:56:10 +02:00
2021-05-23 20:46:12 +02:00
@staticmethod
def _get_list_of_strs(data: Dict[str, Any], key: str) -> List[str]:
result: Any = data.get(key, [])
if not isinstance(result, list):
raise ReportLoadError(f"Incorrect format: {key!r} is not a list")
for elem in result:
if not isinstance(elem, str):
raise ReportLoadError(f"Incorrect format: {key!r} must contain only strings")
return result
@classmethod
def load(cls, path: Path) -> "Report":
"""
May raise OSError, JsonDecodeError, ReportLoadError.
"""
with open(path) as f:
data = json.load(f)
if not isinstance(data, dict):
raise ReportLoadError("Incorrect format: Root is not an object")
self = cls()
for elem in self._get_list_of_strs(data, "reserved"):
self.mark_reserved(PurePath(elem))
for elem in self._get_list_of_strs(data, "known"):
self.mark(PurePath(elem))
for elem in self._get_list_of_strs(data, "added"):
self.add_file(PurePath(elem))
for elem in self._get_list_of_strs(data, "changed"):
self.change_file(PurePath(elem))
for elem in self._get_list_of_strs(data, "deleted"):
self.delete_file(PurePath(elem))
for elem in self._get_list_of_strs(data, "not_deleted"):
self.not_delete_file(PurePath(elem))
2021-05-23 20:46:12 +02:00
return self
def store(self, path: Path) -> None:
"""
May raise OSError.
"""
data = {
"reserved": [str(path) for path in sorted(self.reserved_files)],
"known": [str(path) for path in sorted(self.known_files)],
"added": [str(path) for path in sorted(self.added_files)],
"changed": [str(path) for path in sorted(self.changed_files)],
"deleted": [str(path) for path in sorted(self.deleted_files)],
"not_deleted": [str(path) for path in sorted(self.not_deleted_files)],
2021-05-23 20:46:12 +02:00
}
with open(path, "w") as f:
json.dump(data, f, indent=2, sort_keys=True)
f.write("\n") # json.dump doesn't do this
2021-05-13 22:28:14 +02:00
def mark_reserved(self, path: PurePath) -> None:
if path in self.marked:
raise RuntimeError("Trying to reserve an already reserved file")
2021-05-13 22:28:14 +02:00
self.reserved_files.add(path)
2021-05-05 18:08:34 +02:00
def mark(self, path: PurePath) -> None:
2021-05-02 00:56:10 +02:00
"""
Mark a previously unknown file as known.
May throw a MarkDuplicateError or a MarkConflictError. For more detail,
see the respective exception's docstring.
2021-05-02 00:56:10 +02:00
"""
2021-05-15 22:25:41 +02:00
for other in self.marked:
2021-05-13 22:28:14 +02:00
if path == other:
raise MarkDuplicateError(path)
2021-05-02 00:56:10 +02:00
2021-05-13 22:28:14 +02:00
if is_relative_to(path, other) or is_relative_to(other, path):
raise MarkConflictError(path, other)
2021-05-02 00:56:10 +02:00
self.known_files.add(path)
2021-05-15 22:25:41 +02:00
@property
def marked(self) -> Set[PurePath]:
return self.known_files | self.reserved_files
def is_marked(self, path: PurePath) -> bool:
return path in self.marked
2021-05-05 18:08:34 +02:00
def add_file(self, path: PurePath) -> None:
2021-05-02 00:56:10 +02:00
"""
Unlike mark(), this function accepts any paths.
"""
2021-05-23 20:46:12 +02:00
self.added_files.add(path)
2021-05-02 00:56:10 +02:00
2021-05-05 18:08:34 +02:00
def change_file(self, path: PurePath) -> None:
2021-05-02 00:56:10 +02:00
"""
Unlike mark(), this function accepts any paths.
"""
self.changed_files.add(path)
2021-05-05 18:08:34 +02:00
def delete_file(self, path: PurePath) -> None:
2021-05-02 00:56:10 +02:00
"""
Unlike mark(), this function accepts any paths.
"""
self.deleted_files.add(path)
def not_delete_file(self, path: PurePath) -> None:
"""
Unlike mark(), this function accepts any paths.
"""
self.not_deleted_files.add(path)