2020-04-20 17:15:47 +02:00
|
|
|
"""
|
|
|
|
A few utility bobs and bits.
|
|
|
|
"""
|
|
|
|
|
2020-04-24 20:39:30 +02:00
|
|
|
import re
|
2020-04-20 03:54:47 +02:00
|
|
|
from pathlib import Path, PurePath
|
2020-04-24 13:50:27 +02:00
|
|
|
from typing import Optional, Tuple, Union
|
2018-11-24 09:27:33 +01:00
|
|
|
|
2020-04-20 18:38:18 +02:00
|
|
|
import bs4
|
2020-04-20 03:54:47 +02:00
|
|
|
import requests
|
|
|
|
|
2020-05-08 00:26:33 +02:00
|
|
|
from .progress import ProgressSettings, progress_for, size_from_headers
|
|
|
|
|
2020-04-24 20:39:30 +02:00
|
|
|
PathLike = Union[PurePath, str, Tuple[str, ...]]
|
2020-04-20 03:54:47 +02:00
|
|
|
|
2020-04-20 17:15:47 +02:00
|
|
|
|
2020-04-24 20:39:30 +02:00
|
|
|
def to_path(pathlike: PathLike) -> Path:
|
2020-04-25 19:59:58 +02:00
|
|
|
"""
|
|
|
|
Convert a given PathLike into a Path.
|
|
|
|
"""
|
2020-04-24 20:39:30 +02:00
|
|
|
if isinstance(pathlike, tuple):
|
|
|
|
return Path(*pathlike)
|
|
|
|
return Path(pathlike)
|
2018-11-27 09:52:27 +01:00
|
|
|
|
2020-04-20 14:29:28 +02:00
|
|
|
|
2020-04-24 20:39:30 +02:00
|
|
|
Regex = Union[str, re.Pattern]
|
|
|
|
|
2020-04-20 17:15:47 +02:00
|
|
|
|
2020-04-24 20:39:30 +02:00
|
|
|
def to_pattern(regex: Regex) -> re.Pattern:
|
2020-04-25 19:59:58 +02:00
|
|
|
"""
|
|
|
|
Convert a regex to a re.Pattern.
|
|
|
|
"""
|
2020-04-24 20:39:30 +02:00
|
|
|
if isinstance(regex, re.Pattern):
|
|
|
|
return regex
|
|
|
|
return re.compile(regex)
|
2018-11-27 09:52:27 +01:00
|
|
|
|
2020-04-20 14:29:28 +02:00
|
|
|
|
2020-04-20 18:38:18 +02:00
|
|
|
def soupify(response: requests.Response) -> bs4.BeautifulSoup:
|
2020-04-20 19:27:26 +02:00
|
|
|
"""
|
|
|
|
Wrap a requests response in a bs4 object.
|
|
|
|
"""
|
|
|
|
|
2020-04-20 18:38:18 +02:00
|
|
|
return bs4.BeautifulSoup(response.text, "html.parser")
|
|
|
|
|
|
|
|
|
2020-05-08 00:26:33 +02:00
|
|
|
def stream_to_path(
|
|
|
|
response: requests.Response,
|
|
|
|
target: Path,
|
|
|
|
progress_name: Optional[str] = None,
|
|
|
|
chunk_size: int = 1024 ** 2
|
|
|
|
) -> None:
|
2020-04-20 17:15:47 +02:00
|
|
|
"""
|
|
|
|
Download a requests response content to a file by streaming it. This
|
|
|
|
function avoids excessive memory usage when downloading large files. The
|
|
|
|
chunk_size is in bytes.
|
2020-05-08 00:26:33 +02:00
|
|
|
|
|
|
|
If progress_name is None, no progress bar will be shown. Otherwise a progress
|
|
|
|
bar will appear, if the download is bigger than an internal threshold.
|
2020-04-20 17:15:47 +02:00
|
|
|
"""
|
|
|
|
|
2020-04-20 19:27:26 +02:00
|
|
|
with response:
|
2020-05-08 00:26:33 +02:00
|
|
|
length = size_from_headers(response)
|
|
|
|
if progress_name and length and int(length) > 1024 * 1024 * 10: # 10 MiB
|
|
|
|
settings: Optional[ProgressSettings] = ProgressSettings(progress_name, length)
|
|
|
|
else:
|
|
|
|
settings = None
|
|
|
|
|
2020-04-24 20:39:30 +02:00
|
|
|
with open(target, 'wb') as file_descriptor:
|
2020-05-08 00:26:33 +02:00
|
|
|
with progress_for(settings) as progress:
|
|
|
|
for chunk in response.iter_content(chunk_size=chunk_size):
|
|
|
|
file_descriptor.write(chunk)
|
|
|
|
progress.advance(len(chunk))
|
2018-11-24 09:27:33 +01:00
|
|
|
|
2020-04-20 14:29:28 +02:00
|
|
|
|
|
|
|
def prompt_yes_no(question: str, default: Optional[bool] = None) -> bool:
|
2020-04-20 17:15:47 +02:00
|
|
|
"""
|
|
|
|
Prompts the user a yes/no question and returns their choice.
|
|
|
|
"""
|
|
|
|
|
2020-04-20 14:29:28 +02:00
|
|
|
if default is True:
|
|
|
|
prompt = "[Y/n]"
|
|
|
|
elif default is False:
|
|
|
|
prompt = "[y/N]"
|
|
|
|
else:
|
|
|
|
prompt = "[y/n]"
|
|
|
|
|
|
|
|
text = f"{question} {prompt} "
|
2020-04-20 17:15:47 +02:00
|
|
|
wrong_reply = "Please reply with 'yes'/'y' or 'no'/'n'."
|
2020-04-20 14:29:28 +02:00
|
|
|
|
|
|
|
while True:
|
|
|
|
response = input(text).strip().lower()
|
|
|
|
if response in {"yes", "ye", "y"}:
|
|
|
|
return True
|
2020-04-20 17:15:47 +02:00
|
|
|
if response in {"no", "n"}:
|
2020-04-20 14:29:28 +02:00
|
|
|
return False
|
2020-04-20 17:15:47 +02:00
|
|
|
if response == "" and default is not None:
|
|
|
|
return default
|
|
|
|
print(wrong_reply)
|