Elaborate on transforms and implement changes

2026-02-08 18:32:23 +01:00 · 2021-04-29 20:13:46 +02:00
parent 9ec19be113
commit e7a51decb0
2 changed files with 120 additions and 27 deletions
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -107,6 +107,21 @@ The `-re->` arrow uses regular expressions. `SOURCE` is a regular expression
 that must match the entire path. If this is the case, then the capturing groups
 are available in `TARGET` for formatting.
 `TARGET` uses Python's [format string syntax][3]. The *n*-th capturing group can
 be referred to as `{g<n>}` (e. g. `{g3}`). `{g0}` refers to the original path.
 If capturing group *n*'s contents are a valid integer, the integer value is
 available as `{i<n>}` (e. g. `{i3}`). If capturing group *n*'s contents are a
 valid float, the float value is available as `{f<n>}` (e. g. `{f3}`).
 Python's format string syntax has rich options for formatting its arguments. For
 example, to left-pad the capturing group 3 with the digit `0` to width 5, you
 can use `{i3:05}`.
 PFERD even allows you to write entire expressions inside the curly braces, for
 example `{g2.lower()}` or `{g3.replace(' ', '_')}`.
 [3]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
 ### Example: Tutorials
 You have ILIAS course with lots of tutorials, but are only interested in a
@@ -136,3 +151,24 @@ the `tutorials/` directory and thus not discover that `tutorials/tut02/`
 existed.
 Since the second rule is only relevant for crawling, the `TARGET` is left out.
 ### Example: Lecture slides
 You have a course with slides like `Lecture 3: Linear functions.PDF` and you
 would like to rename them to `03_linear_functions.pdf`.
 ```
 Lectures/
  |- Lecture 1: Introduction.PDF
  |- Lecture 2: Vectors and matrices.PDF
  |- Lecture 3: Linear functions.PDF
  ...
 ```
 To do this, you can use the most powerful of arrows, the regex arrow.
 ```
 "Lectures/Lecture (\\d+): (.*)\\.PDF" -re-> "Lectures/{i1:02}_{g2.lower().replace(' ', '_')}.pdf"
 ```
 Note the escaped backslashes on the `SOURCE` side.
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -1,3 +1,9 @@
 # I'm sorry that this code has become a bit dense and unreadable. While
 # reading, it is important to remember what True and False mean. I'd love to
 # have some proper sum-types for the inputs and outputs, they'd make this code
 # a lot easier to understand.
 import ast
 import re
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
@@ -7,12 +13,23 @@ from typing import Dict, Optional, Union
 class Rule(ABC):
    @abstractmethod
-    def transform(self, path: PurePath) -> Optional[PurePath]:
+    def transform(self, path: PurePath) -> Union[PurePath, bool]:
        """
        Try to apply this rule to the path. Returns another path if the rule
        was successfully applied, True if the rule matched but resulted in an
        exclamation mark, and False if the rule didn't match at all.
        """
        pass
 # These rules all use a Union[T, bool] for their right side. They are passed a
 # T if the arrow's right side was a normal string, True if it was an
 # exclamation mark and False if it was missing entirely.
 class NormalRule(Rule):
-    def __init__(self, left: PurePath, right: PurePath):
+    def __init__(self, left: PurePath, right: Union[PurePath, bool]):
        self._left = left
        self._right = right
@@ -35,49 +52,61 @@ class NormalRule(Rule):
        return PurePath(*path_parts)
-    def transform(self, path: PurePath) -> Optional[PurePath]:
+    def transform(self, path: PurePath) -> Union[PurePath, bool]:
        if rest := self._match_prefix(path):
            if isinstance(self._right, bool):
                return self._right or path
            else:
                return self._right / rest
-        return None
+        return False
 class ExactRule(Rule):
-    def __init__(self, left: PurePath, right: PurePath):
+    def __init__(self, left: PurePath, right: Union[PurePath, bool]):
        self._left = left
        self._right = right
-    def transform(self, path: PurePath) -> Optional[PurePath]:
+    def transform(self, path: PurePath) -> Union[PurePath, bool]:
        if path == self._left:
            if isinstance(self._right, bool):
                return self._right or path
            else:
                return self._right
-        return None
+        return False
 class ReRule(Rule):
-    def __init__(self, left: str, right: str):
+    def __init__(self, left: str, right: Union[str, bool]):
        self._left = left
        self._right = right
-    def transform(self, path: PurePath) -> Optional[PurePath]:
+    def transform(self, path: PurePath) -> Union[PurePath, bool]:
        if match := re.fullmatch(self._left, str(path)):
-            kwargs: Dict[str, Union[int, float]] = {}
+            if isinstance(self._right, bool):
                return self._right or path
            vars: Dict[str, Union[str, int, float]] = {}
            groups = [match[0]] + list(match.groups())
            for i, group in enumerate(groups):
                vars[f"g{i}"] = group
                try:
-                    kwargs[f"i{i}"] = int(group)
+                    vars[f"i{i}"] = int(group)
                except ValueError:
                    pass
                try:
-                    kwargs[f"f{i}"] = float(group)
+                    vars[f"f{i}"] = float(group)
                except ValueError:
                    pass
-            return PurePath(self._right.format(*groups, **kwargs))
+            result = eval(f"f{self._right!r}", vars)
            return PurePath(result)
-        return None
+        return False
@dataclass
@@ -136,7 +165,9 @@ QUOTATION_MARKS = {'"', "'"}
 def parse_string_literal(line: Line) -> str:
    escaped = False
-    result = []
+
    # Points to first character of string literal
    start_index = line.index
    quotation_mark = line.get()
    if quotation_mark not in QUOTATION_MARKS:
@@ -147,17 +178,17 @@ def parse_string_literal(line: Line) -> str:
    while c := line.get():
        if escaped:
            result.append(c)
            escaped = False
            line.advance()
        elif c == quotation_mark:
            line.advance()
-            return "".join(result)
+            stop_index = line.index
            literal = line.line[start_index:stop_index]
            return ast.literal_eval(literal)
        elif c == "\\":
            escaped = True
            line.advance()
        else:
            result.append(c)
            line.advance()
    raise RuleParseException(line, "Expected end of string literal")
@@ -174,11 +205,14 @@ def parse_until_space_or_eol(line: Line) -> str:
    return "".join(result)
-def parse_string(line: Line) -> str:
+def parse_string(line: Line) -> Union[str, bool]:
    if line.get() in QUOTATION_MARKS:
        return parse_string_literal(line)
    else:
-        return parse_until_space_or_eol(line)
+        string = parse_until_space_or_eol(line)
        if string == "!":
            return True
        return string
 def parse_arrow(line: Line) -> str:
@@ -200,17 +234,35 @@ def parse_arrow(line: Line) -> str:
 def parse_rule(line: Line) -> Rule:
    # Parse left side
    leftindex = line.index
    left = parse_string(line)
    if isinstance(left, bool):
        line.index = leftindex
        raise RuleParseException(line, "Left side can't be '!'")
    # Parse arrow
    line.expect(" ")
    arrowindex = line.index
    arrowname = parse_arrow(line)
    # Parse right side
    if line.get():
        line.expect(" ")
        right = parse_string(line)
    else:
        right = False
    rightpath: Union[PurePath, bool]
    if isinstance(right, bool):
        rightpath = right
    else:
        rightpath = PurePath(right)
    # Dispatch
    if arrowname == "":
-        return NormalRule(PurePath(left), PurePath(right))
+        return NormalRule(PurePath(left), rightpath)
    elif arrowname == "exact":
-        return ExactRule(PurePath(left), PurePath(right))
+        return ExactRule(PurePath(left), rightpath)
    elif arrowname == "re":
        return ReRule(left, right)
    else:
@@ -232,7 +284,12 @@ class Transformer:
    def transform(self, path: PurePath) -> Optional[PurePath]:
        for rule in self._rules:
-            if result := rule.transform(path):
+            result = rule.transform(path)
            if isinstance(result, PurePath):
                return result
            elif result:  # Exclamation mark
                return None
            else:
                continue
        return None