pferd/PFERD/transform.py

143 lines
4.1 KiB
Python
Raw Normal View History

2020-04-22 20:25:09 +02:00
"""
Transforms let the user define functions to decide where the downloaded files
should be placed locally. They let the user do more advanced things like moving
only files whose names match a regex, or renaming files from one numbering
scheme to another.
"""
import os
import re
2020-04-22 20:25:09 +02:00
from dataclasses import dataclass
2020-04-23 19:38:41 +02:00
from pathlib import PurePath
2020-04-24 20:39:30 +02:00
from typing import Callable, List, Optional, TypeVar
from .utils import PathLike, Regex, to_path, to_pattern
2020-04-22 20:25:09 +02:00
2020-04-23 19:38:41 +02:00
Transform = Callable[[PurePath], Optional[PurePath]]
2020-04-22 20:25:09 +02:00
@dataclass
class Transformable:
"""
An object that can be transformed by a Transform.
"""
2020-04-23 19:38:41 +02:00
path: PurePath
2020-04-22 20:25:09 +02:00
TF = TypeVar("TF", bound=Transformable)
def apply_transform(
2020-04-23 19:38:41 +02:00
transform: Transform,
2020-04-22 20:25:09 +02:00
transformables: List[TF],
) -> List[TF]:
"""
Apply a Transform to multiple Transformables, discarding those that were
not transformed by the Transform.
"""
result: List[TF] = []
for transformable in transformables:
new_path = transform(transformable.path)
if new_path:
2020-04-22 20:25:09 +02:00
transformable.path = new_path
result.append(transformable)
return result
2020-04-24 13:35:31 +02:00
# Transform combinators
def keep(path: PurePath) -> Optional[PurePath]:
return path
2020-04-24 20:00:21 +02:00
2020-04-24 13:35:31 +02:00
def attempt(*args: Transform) -> Transform:
def inner(path: PurePath) -> Optional[PurePath]:
for transform in args:
result = transform(path)
if result:
2020-04-24 13:35:31 +02:00
return result
return None
return inner
def optionally(transform: Transform) -> Transform:
return attempt(transform, lambda path: path)
def do(*args: Transform) -> Transform:
def inner(path: PurePath) -> Optional[PurePath]:
current = path
for transform in args:
result = transform(current)
if result:
2020-04-24 13:35:31 +02:00
current = result
else:
return None
return current
return inner
def predicate(pred: Callable[[PurePath], bool]) -> Transform:
def inner(path: PurePath) -> Optional[PurePath]:
if pred(path):
return path
return None
return inner
def glob(pattern: str) -> Transform:
return predicate(lambda path: path.match(pattern))
def move_dir(source_dir: PathLike, target_dir: PathLike) -> Transform:
2020-04-24 20:39:30 +02:00
source_path = to_path(source_dir)
target_path = to_path(target_dir)
2020-04-24 13:35:31 +02:00
def inner(path: PurePath) -> Optional[PurePath]:
if source_path in path.parents:
return target_path / path.relative_to(source_path)
return None
return inner
def move(source: PathLike, target: PathLike) -> Transform:
2020-04-24 20:39:30 +02:00
source_path = to_path(source)
target_path = to_path(target)
2020-04-24 13:35:31 +02:00
def inner(path: PurePath) -> Optional[PurePath]:
if path == source_path:
return target_path
return None
return inner
def rename(source: str, target: str) -> Transform:
def inner(path: PurePath) -> Optional[PurePath]:
if path.name == source:
return path.with_name(target)
return None
return inner
def re_move(regex: Regex, target: str) -> Transform:
def inner(path: PurePath) -> Optional[PurePath]:
match = to_pattern(regex).fullmatch(str(path))
if match:
2020-04-24 13:35:31 +02:00
groups = [match.group(0)]
groups.extend(match.groups())
return PurePath(target.format(*groups))
return None
return inner
def re_rename(regex: Regex, target: str) -> Transform:
def inner(path: PurePath) -> Optional[PurePath]:
match = to_pattern(regex).fullmatch(path.name)
if match:
2020-04-24 13:35:31 +02:00
groups = [match.group(0)]
groups.extend(match.groups())
return path.with_name(target.format(*groups))
return None
return inner
def sanitize_windows_path(path: PurePath) -> PurePath:
"""
A small function to escape characters that are forbidden in windows path names.
This method is a no-op on other operating systems.
"""
# Escape windows illegal path characters
if os.name == 'nt':
sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
return PurePath(*sanitized_parts)
return path