2020-04-20 17:15:47 +02:00
|
|
|
"""
|
|
|
|
A few utility bobs and bits.
|
|
|
|
"""
|
|
|
|
|
2020-04-20 03:54:47 +02:00
|
|
|
import logging
|
|
|
|
from pathlib import Path, PurePath
|
|
|
|
from typing import Optional, Tuple
|
2018-11-24 09:27:33 +01:00
|
|
|
|
2020-04-20 18:38:18 +02:00
|
|
|
import bs4
|
2020-04-20 03:54:47 +02:00
|
|
|
import requests
|
|
|
|
from colorama import Fore, Style
|
|
|
|
|
|
|
|
|
|
|
|
def move(path: PurePath, from_folders: Tuple[str], to_folders: Tuple[str]) -> Optional[PurePath]:
|
2020-04-20 17:15:47 +02:00
|
|
|
"""
|
|
|
|
If the input path is located anywhere within from_folders, replace the
|
|
|
|
from_folders with to_folders. Returns None otherwise.
|
|
|
|
"""
|
|
|
|
|
|
|
|
length = len(from_folders)
|
|
|
|
if path.parts[:length] == from_folders:
|
|
|
|
return PurePath(*to_folders, *path.parts[length:])
|
2020-04-20 03:54:47 +02:00
|
|
|
return None
|
2018-11-27 09:52:27 +01:00
|
|
|
|
2020-04-20 14:29:28 +02:00
|
|
|
|
2020-04-20 03:54:47 +02:00
|
|
|
def rename(path: PurePath, to_name: str) -> PurePath:
|
2020-04-20 17:15:47 +02:00
|
|
|
"""
|
|
|
|
Set the file name of the input path to to_name.
|
|
|
|
"""
|
|
|
|
|
2020-04-20 03:54:47 +02:00
|
|
|
return PurePath(*path.parts[:-1], to_name)
|
2018-11-27 09:52:27 +01:00
|
|
|
|
2020-04-20 14:29:28 +02:00
|
|
|
|
2020-04-20 18:38:18 +02:00
|
|
|
def soupify(response: requests.Response) -> bs4.BeautifulSoup:
|
2020-04-20 19:27:26 +02:00
|
|
|
"""
|
|
|
|
Wrap a requests response in a bs4 object.
|
|
|
|
"""
|
|
|
|
|
2020-04-20 18:38:18 +02:00
|
|
|
return bs4.BeautifulSoup(response.text, "html.parser")
|
|
|
|
|
|
|
|
|
2020-04-20 03:54:47 +02:00
|
|
|
def stream_to_path(response: requests.Response, to_path: Path, chunk_size: int = 1024 ** 2) -> None:
|
2020-04-20 17:15:47 +02:00
|
|
|
"""
|
|
|
|
Download a requests response content to a file by streaming it. This
|
|
|
|
function avoids excessive memory usage when downloading large files. The
|
|
|
|
chunk_size is in bytes.
|
|
|
|
"""
|
|
|
|
|
2020-04-20 19:27:26 +02:00
|
|
|
with response:
|
|
|
|
with open(to_path, 'wb') as file_descriptor:
|
|
|
|
for chunk in response.iter_content(chunk_size=chunk_size):
|
|
|
|
file_descriptor.write(chunk)
|
2018-11-24 09:27:33 +01:00
|
|
|
|
2020-04-20 14:29:28 +02:00
|
|
|
|
|
|
|
def prompt_yes_no(question: str, default: Optional[bool] = None) -> bool:
|
2020-04-20 17:15:47 +02:00
|
|
|
"""
|
|
|
|
Prompts the user a yes/no question and returns their choice.
|
|
|
|
"""
|
|
|
|
|
2020-04-20 14:29:28 +02:00
|
|
|
if default is True:
|
|
|
|
prompt = "[Y/n]"
|
|
|
|
elif default is False:
|
|
|
|
prompt = "[y/N]"
|
|
|
|
else:
|
|
|
|
prompt = "[y/n]"
|
|
|
|
|
|
|
|
text = f"{question} {prompt} "
|
2020-04-20 17:15:47 +02:00
|
|
|
wrong_reply = "Please reply with 'yes'/'y' or 'no'/'n'."
|
2020-04-20 14:29:28 +02:00
|
|
|
|
|
|
|
while True:
|
|
|
|
response = input(text).strip().lower()
|
|
|
|
if response in {"yes", "ye", "y"}:
|
|
|
|
return True
|
2020-04-20 17:15:47 +02:00
|
|
|
if response in {"no", "n"}:
|
2020-04-20 14:29:28 +02:00
|
|
|
return False
|
2020-04-20 17:15:47 +02:00
|
|
|
if response == "" and default is not None:
|
|
|
|
return default
|
|
|
|
print(wrong_reply)
|
2020-04-20 14:29:28 +02:00
|
|
|
|
|
|
|
|
2020-04-20 15:39:38 +02:00
|
|
|
class ResolveException(Exception):
|
|
|
|
"""An exception while resolving a file."""
|
|
|
|
|
|
|
|
|
2020-04-20 19:27:26 +02:00
|
|
|
class Location:
|
2020-04-20 15:39:38 +02:00
|
|
|
"""
|
2020-04-20 19:27:26 +02:00
|
|
|
An object that has an inherent path.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, path: Path):
|
2020-04-23 11:48:09 +02:00
|
|
|
self._path = path.resolve()
|
2020-04-20 19:27:26 +02:00
|
|
|
|
|
|
|
@property
|
|
|
|
def path(self) -> Path:
|
|
|
|
"""
|
|
|
|
This object's location.
|
|
|
|
"""
|
|
|
|
|
|
|
|
return self._path
|
|
|
|
|
|
|
|
def resolve(self, target: Path) -> Path:
|
|
|
|
"""
|
|
|
|
Resolve a file relative to the path of this location.
|
|
|
|
|
|
|
|
Raises a [ResolveException] if the file is outside the given directory.
|
|
|
|
"""
|
|
|
|
absolute_path = self.path.joinpath(target).resolve()
|
2020-04-20 15:39:38 +02:00
|
|
|
|
2020-04-20 19:27:26 +02:00
|
|
|
# TODO Make this less inefficient
|
|
|
|
if self.path not in absolute_path.parents:
|
|
|
|
raise ResolveException(f"Path {target} is not inside directory {self.path}")
|
2020-04-20 15:39:38 +02:00
|
|
|
|
2020-04-20 19:27:26 +02:00
|
|
|
return absolute_path
|
2020-04-20 15:39:38 +02:00
|
|
|
|
|
|
|
|
2019-06-07 13:26:23 +02:00
|
|
|
class PrettyLogger:
|
2020-04-20 17:15:47 +02:00
|
|
|
"""
|
|
|
|
A logger that prints some specially formatted log messages in color.
|
|
|
|
"""
|
2019-06-07 13:26:23 +02:00
|
|
|
|
2020-04-20 03:54:47 +02:00
|
|
|
def __init__(self, logger: logging.Logger) -> None:
|
2019-06-07 13:26:23 +02:00
|
|
|
self.logger = logger
|
|
|
|
|
2020-04-20 03:54:47 +02:00
|
|
|
def modified_file(self, file_name: Path) -> None:
|
2020-04-20 17:15:47 +02:00
|
|
|
"""
|
|
|
|
An existing file has changed.
|
|
|
|
"""
|
|
|
|
|
2020-04-20 14:29:28 +02:00
|
|
|
self.logger.info(
|
2020-04-23 12:33:38 +02:00
|
|
|
f"{Fore.MAGENTA}{Style.BRIGHT}Modified {str(file_name)!r}.{Style.RESET_ALL}"
|
|
|
|
)
|
2019-06-07 13:26:23 +02:00
|
|
|
|
2020-04-20 03:54:47 +02:00
|
|
|
def new_file(self, file_name: Path) -> None:
|
2020-04-20 17:15:47 +02:00
|
|
|
"""
|
|
|
|
A new file has been downloaded.
|
|
|
|
"""
|
|
|
|
|
2020-04-20 14:29:28 +02:00
|
|
|
self.logger.info(
|
2020-04-23 12:33:38 +02:00
|
|
|
f"{Fore.GREEN}{Style.BRIGHT}Created {str(file_name)!r}.{Style.RESET_ALL}")
|
2019-06-07 13:26:23 +02:00
|
|
|
|
2020-04-20 03:54:47 +02:00
|
|
|
def ignored_file(self, file_name: Path) -> None:
|
2020-04-20 17:15:47 +02:00
|
|
|
"""
|
2020-04-23 12:33:38 +02:00
|
|
|
Nothing in particular happened to this file or directory.
|
2020-04-20 17:15:47 +02:00
|
|
|
"""
|
|
|
|
|
2020-04-23 12:33:38 +02:00
|
|
|
self.logger.info(f"{Style.DIM}Ignored {str(file_name)!r}.{Style.RESET_ALL}")
|
|
|
|
|
2020-04-23 13:54:58 +02:00
|
|
|
def filtered_path(self, path: Path, reason: str) -> None:
|
2020-04-23 12:33:38 +02:00
|
|
|
"""
|
|
|
|
A crawler filter rejected the given path.
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.logger.info(
|
2020-04-23 13:54:58 +02:00
|
|
|
f"{Style.DIM}Not considering {str(path)!r} due to filter rules"
|
|
|
|
f" ({Style.NORMAL}{reason}{Style.DIM})."
|
|
|
|
f"{Style.RESET_ALL}"
|
2020-04-23 12:33:38 +02:00
|
|
|
)
|
2019-06-07 13:26:23 +02:00
|
|
|
|
2020-04-20 17:15:47 +02:00
|
|
|
def starting_synchronizer(
|
|
|
|
self,
|
|
|
|
target_directory: Path,
|
|
|
|
synchronizer_name: str,
|
|
|
|
subject: Optional[str] = None,
|
|
|
|
) -> None:
|
|
|
|
"""
|
|
|
|
A special message marking that a synchronizer has been started.
|
|
|
|
"""
|
|
|
|
|
2019-06-07 13:26:23 +02:00
|
|
|
subject_str = f"{subject} " if subject else ""
|
|
|
|
self.logger.info("")
|
|
|
|
self.logger.info((
|
2020-04-23 12:33:38 +02:00
|
|
|
f"{Fore.CYAN}{Style.BRIGHT}Synchronizing {subject_str}to {str(target_directory)!r}"
|
2019-06-07 13:26:23 +02:00
|
|
|
f" using the {synchronizer_name} synchronizer.{Style.RESET_ALL}"
|
|
|
|
))
|