pferd/PFERD/transformer.py

379 lines
10 KiB
Python
Raw Normal View History

# I'm sorry that this code has become a bit dense and unreadable. While
# reading, it is important to remember what True and False mean. I'd love to
# have some proper sum-types for the inputs and outputs, they'd make this code
# a lot easier to understand.
import ast
2021-04-29 09:51:25 +02:00
import re
from abc import ABC, abstractmethod
from pathlib import PurePath
2021-05-27 13:56:01 +02:00
from typing import Dict, Optional, Sequence, Union
2021-04-29 09:51:25 +02:00
2021-05-22 22:38:56 +02:00
from .logging import log
from .utils import fmt_path, str_path
2021-05-22 22:38:56 +02:00
2021-04-29 09:51:25 +02:00
class Rule(ABC):
@abstractmethod
def transform(self, path: PurePath) -> Union[PurePath, bool]:
"""
Try to apply this rule to the path. Returns another path if the rule
was successfully applied, True if the rule matched but resulted in an
exclamation mark, and False if the rule didn't match at all.
"""
2021-04-29 09:51:25 +02:00
pass
# These rules all use a Union[T, bool] for their right side. They are passed a
# T if the arrow's right side was a normal string, True if it was an
# exclamation mark and False if it was missing entirely.
2021-04-29 09:51:25 +02:00
class NormalRule(Rule):
def __init__(self, left: PurePath, right: Union[PurePath, bool]):
2021-04-29 09:51:25 +02:00
self._left = left
self._right = right
def _match_prefix(self, path: PurePath) -> Optional[PurePath]:
2021-04-29 09:51:25 +02:00
left_parts = list(reversed(self._left.parts))
path_parts = list(reversed(path.parts))
if len(left_parts) > len(path_parts):
return None
while left_parts and path_parts:
left_part = left_parts.pop()
path_part = path_parts.pop()
if left_part != path_part:
return None
if left_parts:
return None
2021-05-22 22:44:59 +02:00
path_parts.reverse()
return PurePath(*path_parts)
2021-04-29 09:51:25 +02:00
def transform(self, path: PurePath) -> Union[PurePath, bool]:
2021-04-29 09:51:25 +02:00
if rest := self._match_prefix(path):
if isinstance(self._right, bool):
return self._right or path
else:
return self._right / rest
2021-04-29 09:51:25 +02:00
return False
2021-04-29 09:51:25 +02:00
class ExactRule(Rule):
def __init__(self, left: PurePath, right: Union[PurePath, bool]):
2021-04-29 09:51:25 +02:00
self._left = left
self._right = right
def transform(self, path: PurePath) -> Union[PurePath, bool]:
2021-04-29 09:51:25 +02:00
if path == self._left:
if isinstance(self._right, bool):
return self._right or path
else:
return self._right
2021-04-29 09:51:25 +02:00
return False
2021-04-29 09:51:25 +02:00
2021-05-15 15:06:45 +02:00
class NameRule(Rule):
def __init__(self, subrule: Rule):
self._subrule = subrule
def transform(self, path: PurePath) -> Union[PurePath, bool]:
2021-05-27 13:42:49 +02:00
matched = False
result = PurePath()
for part in path.parts:
part_result = self._subrule.transform(PurePath(part))
if isinstance(part_result, PurePath):
matched = True
result /= part_result
elif part_result:
# If any subrule call ignores its path segment, the entire path
# should be ignored
return True
else:
# The subrule doesn't modify this segment, but maybe other
# segments
result /= part
if matched:
2021-05-15 15:06:45 +02:00
return result
2021-05-27 13:42:49 +02:00
else:
# The subrule has modified no segments, so this name version of it
# doesn't match
return False
2021-05-15 15:06:45 +02:00
2021-04-29 09:51:25 +02:00
class ReRule(Rule):
def __init__(self, left: str, right: Union[str, bool]):
2021-04-29 09:51:25 +02:00
self._left = left
self._right = right
def transform(self, path: PurePath) -> Union[PurePath, bool]:
if match := re.fullmatch(self._left, str_path(path)):
if isinstance(self._right, bool):
return self._right or path
vars: Dict[str, Union[str, int, float]] = {}
2021-04-29 09:51:25 +02:00
2021-05-27 13:56:01 +02:00
# For some reason, mypy thinks that "groups" has type List[str].
# But since elements of "match.groups()" can be None, mypy is
# wrong.
groups: Sequence[Optional[str]] = [match[0]] + list(match.groups())
2021-04-29 09:51:25 +02:00
for i, group in enumerate(groups):
2021-05-27 13:56:01 +02:00
if group is None:
continue
vars[f"g{i}"] = group
2021-04-29 09:51:25 +02:00
try:
vars[f"i{i}"] = int(group)
2021-04-29 09:51:25 +02:00
except ValueError:
pass
try:
vars[f"f{i}"] = float(group)
2021-04-29 09:51:25 +02:00
except ValueError:
pass
result = eval(f"f{self._right!r}", vars)
return PurePath(result)
2021-04-29 09:51:25 +02:00
return False
2021-04-29 09:51:25 +02:00
2021-05-22 22:38:56 +02:00
class RuleParseError(Exception):
def __init__(self, line: "Line", reason: str):
super().__init__(f"Error in rule on line {line.line_nr}, column {line.index}: {reason}")
self.line = line
self.reason = reason
2021-04-29 09:51:25 +02:00
def pretty_print(self) -> None:
2021-05-22 22:38:56 +02:00
log.error(f"Error parsing rule on line {self.line.line_nr}:")
log.error_contd(self.line.line)
2021-04-29 09:51:25 +02:00
spaces = " " * self.line.index
2021-05-22 22:38:56 +02:00
log.error_contd(f"{spaces}^--- {self.reason}")
2021-04-29 09:51:25 +02:00
class Line:
def __init__(self, line: str, line_nr: int):
self._line = line
self._line_nr = line_nr
self._index = 0
def get(self) -> Optional[str]:
if self._index < len(self._line):
return self._line[self._index]
return None
@property
def line(self) -> str:
return self._line
@property
2021-05-25 15:47:09 +02:00
def line_nr(self) -> int:
return self._line_nr
2021-04-29 09:51:25 +02:00
@property
def index(self) -> int:
return self._index
@index.setter
def index(self, index: int) -> None:
self._index = index
def advance(self) -> None:
self._index += 1
def expect(self, string: str) -> None:
for char in string:
if self.get() == char:
self.advance()
else:
2021-05-22 22:38:56 +02:00
raise RuleParseError(self, f"Expected {char!r}")
2021-04-29 09:51:25 +02:00
QUOTATION_MARKS = {'"', "'"}
def parse_string_literal(line: Line) -> str:
escaped = False
# Points to first character of string literal
start_index = line.index
2021-04-29 09:51:25 +02:00
quotation_mark = line.get()
if quotation_mark not in QUOTATION_MARKS:
# This should never happen as long as this function is only called from
# parse_string.
2021-05-22 22:38:56 +02:00
raise RuleParseError(line, "Invalid quotation mark")
2021-04-29 09:51:25 +02:00
line.advance()
while c := line.get():
if escaped:
escaped = False
line.advance()
elif c == quotation_mark:
line.advance()
stop_index = line.index
literal = line.line[start_index:stop_index]
return ast.literal_eval(literal)
2021-04-29 09:51:25 +02:00
elif c == "\\":
escaped = True
line.advance()
else:
line.advance()
2021-05-22 22:38:56 +02:00
raise RuleParseError(line, "Expected end of string literal")
2021-04-29 09:51:25 +02:00
def parse_until_space_or_eol(line: Line) -> str:
result = []
while c := line.get():
if c == " ":
break
result.append(c)
line.advance()
return "".join(result)
def parse_string(line: Line) -> Union[str, bool]:
2021-04-29 09:51:25 +02:00
if line.get() in QUOTATION_MARKS:
return parse_string_literal(line)
else:
string = parse_until_space_or_eol(line)
if string == "!":
return True
return string
2021-04-29 09:51:25 +02:00
def parse_arrow(line: Line) -> str:
line.expect("-")
name = []
while True:
2021-05-15 15:06:45 +02:00
c = line.get()
if not c:
2021-05-22 22:38:56 +02:00
raise RuleParseError(line, "Expected rest of arrow")
2021-05-15 15:06:45 +02:00
elif c == "-":
line.advance()
c = line.get()
if not c:
2021-05-22 22:38:56 +02:00
raise RuleParseError(line, "Expected rest of arrow")
2021-05-15 15:06:45 +02:00
elif c == ">":
line.advance()
break # End of arrow
2021-04-29 09:51:25 +02:00
else:
2021-05-15 15:06:45 +02:00
name.append("-")
2021-05-22 22:39:29 +02:00
continue
2021-04-29 09:51:25 +02:00
else:
2021-05-15 15:06:45 +02:00
name.append(c)
line.advance()
2021-04-29 09:51:25 +02:00
return "".join(name)
def parse_whitespace(line: Line) -> None:
line.expect(" ")
while line.get() == " ":
line.advance()
2021-05-25 15:42:46 +02:00
def parse_eol(line: Line) -> None:
if line.get() is not None:
raise RuleParseError(line, "Expected end of line")
2021-04-29 09:51:25 +02:00
def parse_rule(line: Line) -> Rule:
# Parse left side
leftindex = line.index
2021-04-29 09:51:25 +02:00
left = parse_string(line)
if isinstance(left, bool):
line.index = leftindex
2021-05-22 22:38:56 +02:00
raise RuleParseError(line, "Left side can't be '!'")
2021-05-27 13:42:49 +02:00
leftpath = PurePath(left)
# Parse arrow
parse_whitespace(line)
2021-04-29 09:51:25 +02:00
arrowindex = line.index
arrowname = parse_arrow(line)
# Parse right side
if line.get():
parse_whitespace(line)
right = parse_string(line)
else:
right = False
rightpath: Union[PurePath, bool]
if isinstance(right, bool):
rightpath = right
else:
rightpath = PurePath(right)
2021-05-25 15:42:46 +02:00
parse_eol(line)
# Dispatch
2021-04-29 09:51:25 +02:00
if arrowname == "":
2021-05-27 13:42:49 +02:00
return NormalRule(leftpath, rightpath)
2021-05-15 15:06:45 +02:00
elif arrowname == "name":
2021-05-27 13:42:49 +02:00
if len(leftpath.parts) > 1:
line.index = leftindex
raise RuleParseError(line, "SOURCE must be a single name, not multiple segments")
return NameRule(ExactRule(leftpath, rightpath))
2021-04-29 09:51:25 +02:00
elif arrowname == "exact":
2021-05-27 13:42:49 +02:00
return ExactRule(leftpath, rightpath)
2021-04-29 09:51:25 +02:00
elif arrowname == "re":
return ReRule(left, right)
2021-05-15 15:06:45 +02:00
elif arrowname == "name-re":
return NameRule(ReRule(left, right))
2021-04-29 09:51:25 +02:00
else:
line.index = arrowindex + 1 # For nicer error message
2021-05-22 22:39:29 +02:00
raise RuleParseError(line, f"Invalid arrow name {arrowname!r}")
2021-04-29 09:51:25 +02:00
class Transformer:
def __init__(self, rules: str):
"""
May throw a RuleParseException.
"""
self._rules = []
for i, line in enumerate(rules.split("\n")):
line = line.strip()
if line:
rule = parse_rule(Line(line, i))
self._rules.append((line, rule))
2021-04-29 09:51:25 +02:00
def transform(self, path: PurePath) -> Optional[PurePath]:
for i, (line, rule) in enumerate(self._rules):
2021-05-23 10:44:18 +02:00
log.explain(f"Testing rule {i+1}: {line}")
2021-05-27 13:56:01 +02:00
try:
result = rule.transform(path)
except Exception as e:
log.warn(f"Error while testing rule {i+1}: {line}")
log.warn_contd(str(e))
continue
if isinstance(result, PurePath):
2021-05-23 11:45:14 +02:00
log.explain(f"Match found, transformed path to {fmt_path(result)}")
2021-04-29 09:51:25 +02:00
return result
elif result: # Exclamation mark
2021-05-23 11:45:14 +02:00
log.explain("Match found, path ignored")
return None
else:
continue
2021-04-29 09:51:25 +02:00
log.explain("No rule matched, path is unchanged")
2021-05-15 14:03:15 +02:00
return path