Elaborate on transforms and implement changes

2025-08-12 18:42:43 +02:00 · 2021-04-29 20:13:46 +02:00
parent 9ec19be113
commit e7a51decb0
2 changed files with 120 additions and 27 deletions
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -107,6 +107,21 @@ The `-re->` arrow uses regular expressions. `SOURCE` is a regular expression
 that must match the entire path. If this is the case, then the capturing groups
 are available in `TARGET` for formatting.

+`TARGET` uses Python's [format string syntax][3]. The *n*-th capturing group can
+be referred to as `{g<n>}` (e. g. `{g3}`). `{g0}` refers to the original path.
+If capturing group *n*'s contents are a valid integer, the integer value is
+available as `{i<n>}` (e. g. `{i3}`). If capturing group *n*'s contents are a
+valid float, the float value is available as `{f<n>}` (e. g. `{f3}`).
+
+Python's format string syntax has rich options for formatting its arguments. For
+example, to left-pad the capturing group 3 with the digit `0` to width 5, you
+can use `{i3:05}`.
+
+PFERD even allows you to write entire expressions inside the curly braces, for
+example `{g2.lower()}` or `{g3.replace(' ', '_')}`.
+
+[3]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
+
 ### Example: Tutorials

 You have ILIAS course with lots of tutorials, but are only interested in a
@@ -136,3 +151,24 @@ the `tutorials/` directory and thus not discover that `tutorials/tut02/`
 existed.

 Since the second rule is only relevant for crawling, the `TARGET` is left out.
+
+### Example: Lecture slides
+
+You have a course with slides like `Lecture 3: Linear functions.PDF` and you
+would like to rename them to `03_linear_functions.pdf`.
+
+```
+Lectures/
+  |- Lecture 1: Introduction.PDF
+  |- Lecture 2: Vectors and matrices.PDF
+  |- Lecture 3: Linear functions.PDF
+  ...
+```
+
+To do this, you can use the most powerful of arrows, the regex arrow.
+
+```
+"Lectures/Lecture (\\d+): (.*)\\.PDF" -re-> "Lectures/{i1:02}_{g2.lower().replace(' ', '_')}.pdf"
+```
+
+Note the escaped backslashes on the `SOURCE` side.
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -1,3 +1,9 @@
+# I'm sorry that this code has become a bit dense and unreadable. While
+# reading, it is important to remember what True and False mean. I'd love to
+# have some proper sum-types for the inputs and outputs, they'd make this code
+# a lot easier to understand.
+
+import ast
 import re
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
@@ -7,12 +13,23 @@ from typing import Dict, Optional, Union

 class Rule(ABC):
    @abstractmethod
-    def transform(self, path: PurePath) -> Optional[PurePath]:
+    def transform(self, path: PurePath) -> Union[PurePath, bool]:
+        """
+        Try to apply this rule to the path. Returns another path if the rule
+        was successfully applied, True if the rule matched but resulted in an
+        exclamation mark, and False if the rule didn't match at all.
+        """
+
        pass


+# These rules all use a Union[T, bool] for their right side. They are passed a
+# T if the arrow's right side was a normal string, True if it was an
+# exclamation mark and False if it was missing entirely.
+
 class NormalRule(Rule):
-    def __init__(self, left: PurePath, right: PurePath):
+    def __init__(self, left: PurePath, right: Union[PurePath, bool]):
+
        self._left = left
        self._right = right

@@ -35,49 +52,61 @@ class NormalRule(Rule):

        return PurePath(*path_parts)

-    def transform(self, path: PurePath) -> Optional[PurePath]:
+    def transform(self, path: PurePath) -> Union[PurePath, bool]:
        if rest := self._match_prefix(path):
-            return self._right / rest
+            if isinstance(self._right, bool):
+                return self._right or path
+            else:
+                return self._right / rest

-        return None
+        return False


 class ExactRule(Rule):
-    def __init__(self, left: PurePath, right: PurePath):
+    def __init__(self, left: PurePath, right: Union[PurePath, bool]):
        self._left = left
        self._right = right

-    def transform(self, path: PurePath) -> Optional[PurePath]:
+    def transform(self, path: PurePath) -> Union[PurePath, bool]:
        if path == self._left:
-            return self._right
+            if isinstance(self._right, bool):
+                return self._right or path
+            else:
+                return self._right

-        return None
+        return False


 class ReRule(Rule):
-    def __init__(self, left: str, right: str):
+    def __init__(self, left: str, right: Union[str, bool]):
        self._left = left
        self._right = right

-    def transform(self, path: PurePath) -> Optional[PurePath]:
+    def transform(self, path: PurePath) -> Union[PurePath, bool]:
        if match := re.fullmatch(self._left, str(path)):
-            kwargs: Dict[str, Union[int, float]] = {}
+            if isinstance(self._right, bool):
+                return self._right or path
+
+            vars: Dict[str, Union[str, int, float]] = {}

            groups = [match[0]] + list(match.groups())
            for i, group in enumerate(groups):
+                vars[f"g{i}"] = group
+
                try:
-                    kwargs[f"i{i}"] = int(group)
+                    vars[f"i{i}"] = int(group)
                except ValueError:
                    pass

                try:
-                    kwargs[f"f{i}"] = float(group)
+                    vars[f"f{i}"] = float(group)
                except ValueError:
                    pass

-            return PurePath(self._right.format(*groups, **kwargs))
+            result = eval(f"f{self._right!r}", vars)
+            return PurePath(result)

-        return None
+        return False


@dataclass
@@ -136,7 +165,9 @@ QUOTATION_MARKS = {'"', "'"}

 def parse_string_literal(line: Line) -> str:
    escaped = False
-    result = []
+
+    # Points to first character of string literal
+    start_index = line.index

    quotation_mark = line.get()
    if quotation_mark not in QUOTATION_MARKS:
@@ -147,17 +178,17 @@ def parse_string_literal(line: Line) -> str:

    while c := line.get():
        if escaped:
-            result.append(c)
            escaped = False
            line.advance()
        elif c == quotation_mark:
            line.advance()
-            return "".join(result)
+            stop_index = line.index
+            literal = line.line[start_index:stop_index]
+            return ast.literal_eval(literal)
        elif c == "\\":
            escaped = True
            line.advance()
        else:
-            result.append(c)
            line.advance()

    raise RuleParseException(line, "Expected end of string literal")
@@ -174,11 +205,14 @@ def parse_until_space_or_eol(line: Line) -> str:
    return "".join(result)


-def parse_string(line: Line) -> str:
+def parse_string(line: Line) -> Union[str, bool]:
    if line.get() in QUOTATION_MARKS:
        return parse_string_literal(line)
    else:
-        return parse_until_space_or_eol(line)
+        string = parse_until_space_or_eol(line)
+        if string == "!":
+            return True
+        return string


 def parse_arrow(line: Line) -> str:
@@ -200,17 +234,35 @@ def parse_arrow(line: Line) -> str:


 def parse_rule(line: Line) -> Rule:
+    # Parse left side
+    leftindex = line.index
    left = parse_string(line)
+    if isinstance(left, bool):
+        line.index = leftindex
+        raise RuleParseException(line, "Left side can't be '!'")
+
+    # Parse arrow
    line.expect(" ")
    arrowindex = line.index
    arrowname = parse_arrow(line)
-    line.expect(" ")
-    right = parse_string(line)

+    # Parse right side
+    if line.get():
+        line.expect(" ")
+        right = parse_string(line)
+    else:
+        right = False
+    rightpath: Union[PurePath, bool]
+    if isinstance(right, bool):
+        rightpath = right
+    else:
+        rightpath = PurePath(right)
+
+    # Dispatch
    if arrowname == "":
-        return NormalRule(PurePath(left), PurePath(right))
+        return NormalRule(PurePath(left), rightpath)
    elif arrowname == "exact":
-        return ExactRule(PurePath(left), PurePath(right))
+        return ExactRule(PurePath(left), rightpath)
    elif arrowname == "re":
        return ReRule(left, right)
    else:
@@ -232,7 +284,12 @@ class Transformer:

    def transform(self, path: PurePath) -> Optional[PurePath]:
        for rule in self._rules:
-            if result := rule.transform(path):
+            result = rule.transform(path)
+            if isinstance(result, PurePath):
                return result
+            elif result:  # Exclamation mark
+                return None
+            else:
+                continue

        return None