mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Elaborate on transforms and implement changes
This commit is contained in:
parent
9ec19be113
commit
e7a51decb0
36
CONFIG.md
36
CONFIG.md
@ -107,6 +107,21 @@ The `-re->` arrow uses regular expressions. `SOURCE` is a regular expression
|
|||||||
that must match the entire path. If this is the case, then the capturing groups
|
that must match the entire path. If this is the case, then the capturing groups
|
||||||
are available in `TARGET` for formatting.
|
are available in `TARGET` for formatting.
|
||||||
|
|
||||||
|
`TARGET` uses Python's [format string syntax][3]. The *n*-th capturing group can
|
||||||
|
be referred to as `{g<n>}` (e. g. `{g3}`). `{g0}` refers to the original path.
|
||||||
|
If capturing group *n*'s contents are a valid integer, the integer value is
|
||||||
|
available as `{i<n>}` (e. g. `{i3}`). If capturing group *n*'s contents are a
|
||||||
|
valid float, the float value is available as `{f<n>}` (e. g. `{f3}`).
|
||||||
|
|
||||||
|
Python's format string syntax has rich options for formatting its arguments. For
|
||||||
|
example, to left-pad the capturing group 3 with the digit `0` to width 5, you
|
||||||
|
can use `{i3:05}`.
|
||||||
|
|
||||||
|
PFERD even allows you to write entire expressions inside the curly braces, for
|
||||||
|
example `{g2.lower()}` or `{g3.replace(' ', '_')}`.
|
||||||
|
|
||||||
|
[3]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
|
||||||
|
|
||||||
### Example: Tutorials
|
### Example: Tutorials
|
||||||
|
|
||||||
You have ILIAS course with lots of tutorials, but are only interested in a
|
You have ILIAS course with lots of tutorials, but are only interested in a
|
||||||
@ -136,3 +151,24 @@ the `tutorials/` directory and thus not discover that `tutorials/tut02/`
|
|||||||
existed.
|
existed.
|
||||||
|
|
||||||
Since the second rule is only relevant for crawling, the `TARGET` is left out.
|
Since the second rule is only relevant for crawling, the `TARGET` is left out.
|
||||||
|
|
||||||
|
### Example: Lecture slides
|
||||||
|
|
||||||
|
You have a course with slides like `Lecture 3: Linear functions.PDF` and you
|
||||||
|
would like to rename them to `03_linear_functions.pdf`.
|
||||||
|
|
||||||
|
```
|
||||||
|
Lectures/
|
||||||
|
|- Lecture 1: Introduction.PDF
|
||||||
|
|- Lecture 2: Vectors and matrices.PDF
|
||||||
|
|- Lecture 3: Linear functions.PDF
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
To do this, you can use the most powerful of arrows, the regex arrow.
|
||||||
|
|
||||||
|
```
|
||||||
|
"Lectures/Lecture (\\d+): (.*)\\.PDF" -re-> "Lectures/{i1:02}_{g2.lower().replace(' ', '_')}.pdf"
|
||||||
|
```
|
||||||
|
|
||||||
|
Note the escaped backslashes on the `SOURCE` side.
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
# I'm sorry that this code has become a bit dense and unreadable. While
|
||||||
|
# reading, it is important to remember what True and False mean. I'd love to
|
||||||
|
# have some proper sum-types for the inputs and outputs, they'd make this code
|
||||||
|
# a lot easier to understand.
|
||||||
|
|
||||||
|
import ast
|
||||||
import re
|
import re
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@ -7,12 +13,23 @@ from typing import Dict, Optional, Union
|
|||||||
|
|
||||||
class Rule(ABC):
|
class Rule(ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def transform(self, path: PurePath) -> Optional[PurePath]:
|
def transform(self, path: PurePath) -> Union[PurePath, bool]:
|
||||||
|
"""
|
||||||
|
Try to apply this rule to the path. Returns another path if the rule
|
||||||
|
was successfully applied, True if the rule matched but resulted in an
|
||||||
|
exclamation mark, and False if the rule didn't match at all.
|
||||||
|
"""
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# These rules all use a Union[T, bool] for their right side. They are passed a
|
||||||
|
# T if the arrow's right side was a normal string, True if it was an
|
||||||
|
# exclamation mark and False if it was missing entirely.
|
||||||
|
|
||||||
class NormalRule(Rule):
|
class NormalRule(Rule):
|
||||||
def __init__(self, left: PurePath, right: PurePath):
|
def __init__(self, left: PurePath, right: Union[PurePath, bool]):
|
||||||
|
|
||||||
self._left = left
|
self._left = left
|
||||||
self._right = right
|
self._right = right
|
||||||
|
|
||||||
@ -35,49 +52,61 @@ class NormalRule(Rule):
|
|||||||
|
|
||||||
return PurePath(*path_parts)
|
return PurePath(*path_parts)
|
||||||
|
|
||||||
def transform(self, path: PurePath) -> Optional[PurePath]:
|
def transform(self, path: PurePath) -> Union[PurePath, bool]:
|
||||||
if rest := self._match_prefix(path):
|
if rest := self._match_prefix(path):
|
||||||
return self._right / rest
|
if isinstance(self._right, bool):
|
||||||
|
return self._right or path
|
||||||
|
else:
|
||||||
|
return self._right / rest
|
||||||
|
|
||||||
return None
|
return False
|
||||||
|
|
||||||
|
|
||||||
class ExactRule(Rule):
|
class ExactRule(Rule):
|
||||||
def __init__(self, left: PurePath, right: PurePath):
|
def __init__(self, left: PurePath, right: Union[PurePath, bool]):
|
||||||
self._left = left
|
self._left = left
|
||||||
self._right = right
|
self._right = right
|
||||||
|
|
||||||
def transform(self, path: PurePath) -> Optional[PurePath]:
|
def transform(self, path: PurePath) -> Union[PurePath, bool]:
|
||||||
if path == self._left:
|
if path == self._left:
|
||||||
return self._right
|
if isinstance(self._right, bool):
|
||||||
|
return self._right or path
|
||||||
|
else:
|
||||||
|
return self._right
|
||||||
|
|
||||||
return None
|
return False
|
||||||
|
|
||||||
|
|
||||||
class ReRule(Rule):
|
class ReRule(Rule):
|
||||||
def __init__(self, left: str, right: str):
|
def __init__(self, left: str, right: Union[str, bool]):
|
||||||
self._left = left
|
self._left = left
|
||||||
self._right = right
|
self._right = right
|
||||||
|
|
||||||
def transform(self, path: PurePath) -> Optional[PurePath]:
|
def transform(self, path: PurePath) -> Union[PurePath, bool]:
|
||||||
if match := re.fullmatch(self._left, str(path)):
|
if match := re.fullmatch(self._left, str(path)):
|
||||||
kwargs: Dict[str, Union[int, float]] = {}
|
if isinstance(self._right, bool):
|
||||||
|
return self._right or path
|
||||||
|
|
||||||
|
vars: Dict[str, Union[str, int, float]] = {}
|
||||||
|
|
||||||
groups = [match[0]] + list(match.groups())
|
groups = [match[0]] + list(match.groups())
|
||||||
for i, group in enumerate(groups):
|
for i, group in enumerate(groups):
|
||||||
|
vars[f"g{i}"] = group
|
||||||
|
|
||||||
try:
|
try:
|
||||||
kwargs[f"i{i}"] = int(group)
|
vars[f"i{i}"] = int(group)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
kwargs[f"f{i}"] = float(group)
|
vars[f"f{i}"] = float(group)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return PurePath(self._right.format(*groups, **kwargs))
|
result = eval(f"f{self._right!r}", vars)
|
||||||
|
return PurePath(result)
|
||||||
|
|
||||||
return None
|
return False
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -136,7 +165,9 @@ QUOTATION_MARKS = {'"', "'"}
|
|||||||
|
|
||||||
def parse_string_literal(line: Line) -> str:
|
def parse_string_literal(line: Line) -> str:
|
||||||
escaped = False
|
escaped = False
|
||||||
result = []
|
|
||||||
|
# Points to first character of string literal
|
||||||
|
start_index = line.index
|
||||||
|
|
||||||
quotation_mark = line.get()
|
quotation_mark = line.get()
|
||||||
if quotation_mark not in QUOTATION_MARKS:
|
if quotation_mark not in QUOTATION_MARKS:
|
||||||
@ -147,17 +178,17 @@ def parse_string_literal(line: Line) -> str:
|
|||||||
|
|
||||||
while c := line.get():
|
while c := line.get():
|
||||||
if escaped:
|
if escaped:
|
||||||
result.append(c)
|
|
||||||
escaped = False
|
escaped = False
|
||||||
line.advance()
|
line.advance()
|
||||||
elif c == quotation_mark:
|
elif c == quotation_mark:
|
||||||
line.advance()
|
line.advance()
|
||||||
return "".join(result)
|
stop_index = line.index
|
||||||
|
literal = line.line[start_index:stop_index]
|
||||||
|
return ast.literal_eval(literal)
|
||||||
elif c == "\\":
|
elif c == "\\":
|
||||||
escaped = True
|
escaped = True
|
||||||
line.advance()
|
line.advance()
|
||||||
else:
|
else:
|
||||||
result.append(c)
|
|
||||||
line.advance()
|
line.advance()
|
||||||
|
|
||||||
raise RuleParseException(line, "Expected end of string literal")
|
raise RuleParseException(line, "Expected end of string literal")
|
||||||
@ -174,11 +205,14 @@ def parse_until_space_or_eol(line: Line) -> str:
|
|||||||
return "".join(result)
|
return "".join(result)
|
||||||
|
|
||||||
|
|
||||||
def parse_string(line: Line) -> str:
|
def parse_string(line: Line) -> Union[str, bool]:
|
||||||
if line.get() in QUOTATION_MARKS:
|
if line.get() in QUOTATION_MARKS:
|
||||||
return parse_string_literal(line)
|
return parse_string_literal(line)
|
||||||
else:
|
else:
|
||||||
return parse_until_space_or_eol(line)
|
string = parse_until_space_or_eol(line)
|
||||||
|
if string == "!":
|
||||||
|
return True
|
||||||
|
return string
|
||||||
|
|
||||||
|
|
||||||
def parse_arrow(line: Line) -> str:
|
def parse_arrow(line: Line) -> str:
|
||||||
@ -200,17 +234,35 @@ def parse_arrow(line: Line) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def parse_rule(line: Line) -> Rule:
|
def parse_rule(line: Line) -> Rule:
|
||||||
|
# Parse left side
|
||||||
|
leftindex = line.index
|
||||||
left = parse_string(line)
|
left = parse_string(line)
|
||||||
|
if isinstance(left, bool):
|
||||||
|
line.index = leftindex
|
||||||
|
raise RuleParseException(line, "Left side can't be '!'")
|
||||||
|
|
||||||
|
# Parse arrow
|
||||||
line.expect(" ")
|
line.expect(" ")
|
||||||
arrowindex = line.index
|
arrowindex = line.index
|
||||||
arrowname = parse_arrow(line)
|
arrowname = parse_arrow(line)
|
||||||
line.expect(" ")
|
|
||||||
right = parse_string(line)
|
|
||||||
|
|
||||||
|
# Parse right side
|
||||||
|
if line.get():
|
||||||
|
line.expect(" ")
|
||||||
|
right = parse_string(line)
|
||||||
|
else:
|
||||||
|
right = False
|
||||||
|
rightpath: Union[PurePath, bool]
|
||||||
|
if isinstance(right, bool):
|
||||||
|
rightpath = right
|
||||||
|
else:
|
||||||
|
rightpath = PurePath(right)
|
||||||
|
|
||||||
|
# Dispatch
|
||||||
if arrowname == "":
|
if arrowname == "":
|
||||||
return NormalRule(PurePath(left), PurePath(right))
|
return NormalRule(PurePath(left), rightpath)
|
||||||
elif arrowname == "exact":
|
elif arrowname == "exact":
|
||||||
return ExactRule(PurePath(left), PurePath(right))
|
return ExactRule(PurePath(left), rightpath)
|
||||||
elif arrowname == "re":
|
elif arrowname == "re":
|
||||||
return ReRule(left, right)
|
return ReRule(left, right)
|
||||||
else:
|
else:
|
||||||
@ -232,7 +284,12 @@ class Transformer:
|
|||||||
|
|
||||||
def transform(self, path: PurePath) -> Optional[PurePath]:
|
def transform(self, path: PurePath) -> Optional[PurePath]:
|
||||||
for rule in self._rules:
|
for rule in self._rules:
|
||||||
if result := rule.transform(path):
|
result = rule.transform(path)
|
||||||
|
if isinstance(result, PurePath):
|
||||||
return result
|
return result
|
||||||
|
elif result: # Exclamation mark
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
Loading…
Reference in New Issue
Block a user