2020-04-22 18:25:09 +00:00
|
|
|
"""
|
|
|
|
Transforms let the user define functions to decide where the downloaded files
|
|
|
|
should be placed locally. They let the user do more advanced things like moving
|
|
|
|
only files whose names match a regex, or renaming files from one numbering
|
|
|
|
scheme to another.
|
|
|
|
"""
|
|
|
|
|
2020-11-12 20:52:46 +01:00
|
|
|
import os
|
|
|
|
import re
|
2020-04-22 18:25:09 +00:00
|
|
|
from dataclasses import dataclass
|
2020-04-23 17:38:41 +00:00
|
|
|
from pathlib import PurePath
|
2020-04-24 18:39:30 +00:00
|
|
|
from typing import Callable, List, Optional, TypeVar
|
|
|
|
|
|
|
|
from .utils import PathLike, Regex, to_path, to_pattern
|
2020-04-22 18:25:09 +00:00
|
|
|
|
2020-04-23 17:38:41 +00:00
|
|
|
Transform = Callable[[PurePath], Optional[PurePath]]
|
2020-04-22 18:25:09 +00:00
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Transformable:
|
|
|
|
"""
|
|
|
|
An object that can be transformed by a Transform.
|
|
|
|
"""
|
|
|
|
|
2020-04-23 17:38:41 +00:00
|
|
|
path: PurePath
|
2020-04-22 18:25:09 +00:00
|
|
|
|
|
|
|
|
|
|
|
TF = TypeVar("TF", bound=Transformable)
|
|
|
|
|
|
|
|
|
|
|
|
def apply_transform(
|
2020-04-23 17:38:41 +00:00
|
|
|
transform: Transform,
|
2020-04-22 18:25:09 +00:00
|
|
|
transformables: List[TF],
|
|
|
|
) -> List[TF]:
|
|
|
|
"""
|
|
|
|
Apply a Transform to multiple Transformables, discarding those that were
|
|
|
|
not transformed by the Transform.
|
|
|
|
"""
|
|
|
|
|
|
|
|
result: List[TF] = []
|
|
|
|
for transformable in transformables:
|
2020-05-08 21:21:33 +02:00
|
|
|
new_path = transform(transformable.path)
|
|
|
|
if new_path:
|
2020-04-22 18:25:09 +00:00
|
|
|
transformable.path = new_path
|
|
|
|
result.append(transformable)
|
|
|
|
return result
|
2020-04-24 11:35:31 +00:00
|
|
|
|
|
|
|
# Transform combinators
|
|
|
|
|
2020-11-12 20:52:46 +01:00
|
|
|
def keep(path: PurePath) -> Optional[PurePath]:
|
|
|
|
return path
|
2020-04-24 18:00:21 +00:00
|
|
|
|
2020-04-24 11:35:31 +00:00
|
|
|
def attempt(*args: Transform) -> Transform:
|
|
|
|
def inner(path: PurePath) -> Optional[PurePath]:
|
|
|
|
for transform in args:
|
2020-05-08 21:21:33 +02:00
|
|
|
result = transform(path)
|
|
|
|
if result:
|
2020-04-24 11:35:31 +00:00
|
|
|
return result
|
|
|
|
return None
|
|
|
|
return inner
|
|
|
|
|
|
|
|
def optionally(transform: Transform) -> Transform:
|
|
|
|
return attempt(transform, lambda path: path)
|
|
|
|
|
|
|
|
def do(*args: Transform) -> Transform:
|
|
|
|
def inner(path: PurePath) -> Optional[PurePath]:
|
|
|
|
current = path
|
|
|
|
for transform in args:
|
2020-05-08 21:21:33 +02:00
|
|
|
result = transform(current)
|
|
|
|
if result:
|
2020-04-24 11:35:31 +00:00
|
|
|
current = result
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
return current
|
|
|
|
return inner
|
|
|
|
|
|
|
|
def predicate(pred: Callable[[PurePath], bool]) -> Transform:
|
|
|
|
def inner(path: PurePath) -> Optional[PurePath]:
|
|
|
|
if pred(path):
|
|
|
|
return path
|
|
|
|
return None
|
|
|
|
return inner
|
|
|
|
|
|
|
|
def glob(pattern: str) -> Transform:
|
|
|
|
return predicate(lambda path: path.match(pattern))
|
|
|
|
|
|
|
|
def move_dir(source_dir: PathLike, target_dir: PathLike) -> Transform:
|
2020-04-24 18:39:30 +00:00
|
|
|
source_path = to_path(source_dir)
|
|
|
|
target_path = to_path(target_dir)
|
2020-04-24 11:35:31 +00:00
|
|
|
def inner(path: PurePath) -> Optional[PurePath]:
|
|
|
|
if source_path in path.parents:
|
|
|
|
return target_path / path.relative_to(source_path)
|
|
|
|
return None
|
|
|
|
return inner
|
|
|
|
|
|
|
|
def move(source: PathLike, target: PathLike) -> Transform:
|
2020-04-24 18:39:30 +00:00
|
|
|
source_path = to_path(source)
|
|
|
|
target_path = to_path(target)
|
2020-04-24 11:35:31 +00:00
|
|
|
def inner(path: PurePath) -> Optional[PurePath]:
|
|
|
|
if path == source_path:
|
|
|
|
return target_path
|
|
|
|
return None
|
|
|
|
return inner
|
|
|
|
|
|
|
|
def rename(source: str, target: str) -> Transform:
|
|
|
|
def inner(path: PurePath) -> Optional[PurePath]:
|
|
|
|
if path.name == source:
|
|
|
|
return path.with_name(target)
|
|
|
|
return None
|
|
|
|
return inner
|
|
|
|
|
|
|
|
def re_move(regex: Regex, target: str) -> Transform:
|
|
|
|
def inner(path: PurePath) -> Optional[PurePath]:
|
2020-05-08 21:21:33 +02:00
|
|
|
match = to_pattern(regex).fullmatch(str(path))
|
|
|
|
if match:
|
2020-04-24 11:35:31 +00:00
|
|
|
groups = [match.group(0)]
|
|
|
|
groups.extend(match.groups())
|
|
|
|
return PurePath(target.format(*groups))
|
|
|
|
return None
|
|
|
|
return inner
|
|
|
|
|
|
|
|
def re_rename(regex: Regex, target: str) -> Transform:
|
|
|
|
def inner(path: PurePath) -> Optional[PurePath]:
|
2020-05-08 21:21:33 +02:00
|
|
|
match = to_pattern(regex).fullmatch(path.name)
|
|
|
|
if match:
|
2020-04-24 11:35:31 +00:00
|
|
|
groups = [match.group(0)]
|
|
|
|
groups.extend(match.groups())
|
|
|
|
return path.with_name(target.format(*groups))
|
|
|
|
return None
|
|
|
|
return inner
|
2020-11-12 20:52:46 +01:00
|
|
|
|
|
|
|
|
2021-04-19 11:10:02 +02:00
|
|
|
def sanitize_windows_path(path: PurePath) -> PurePath:
|
2020-11-12 20:52:46 +01:00
|
|
|
"""
|
|
|
|
A small function to escape characters that are forbidden in windows path names.
|
|
|
|
This method is a no-op on other operating systems.
|
|
|
|
"""
|
|
|
|
# Escape windows illegal path characters
|
|
|
|
if os.name == 'nt':
|
|
|
|
sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
|
|
|
|
return PurePath(*sanitized_parts)
|
|
|
|
return path
|