Compare commits

..

13 Commits
v2.5.4 ... v2

10 changed files with 54 additions and 32 deletions

View File

@ -23,7 +23,7 @@ jobs:
python-version: '3.x' python-version: '3.x'
- name: "Install dependencies" - name: "Install dependencies"
run: "pip install setuptools pyinstaller rich requests beautifulsoup4 -f --upgrade" run: "pip install setuptools keyring pyinstaller rich requests beautifulsoup4 -f --upgrade"
- name: "Install sync_url.py" - name: "Install sync_url.py"
run: "pyinstaller sync_url.py -F" run: "pyinstaller sync_url.py -F"

View File

@ -37,3 +37,21 @@ def swallow_and_print_errors(function: TFun) -> TFun:
Console().print_exception() Console().print_exception()
return None return None
return cast(TFun, inner) return cast(TFun, inner)
def retry_on_io_exception(max_retries: int, message: str) -> Callable[[TFun], TFun]:
"""
Decorates a function and retries it on any exception until the max retries count is hit.
"""
def retry(function: TFun) -> TFun:
def inner(*args: Any, **kwargs: Any) -> Any:
for i in range(0, max_retries):
# pylint: disable=broad-except
try:
return function(*args, **kwargs)
except IOError as error:
PRETTY.warning(f"Error duing operation '{message}': {error}")
PRETTY.warning(
f"Retrying operation '{message}'. Remaining retries: {max_retries - 1 - i}")
return cast(TFun, inner)
return retry

View File

@ -15,7 +15,7 @@ from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
import bs4 import bs4
import requests import requests
from ..errors import FatalException from ..errors import FatalException, retry_on_io_exception
from ..logging import PrettyLogger from ..logging import PrettyLogger
from ..utils import soupify from ..utils import soupify
from .authenticators import IliasAuthenticator from .authenticators import IliasAuthenticator
@ -625,6 +625,7 @@ class IliasCrawler:
return results return results
@retry_on_io_exception(3, "fetching webpage")
def _get_page(self, url: str, params: Dict[str, Any], def _get_page(self, url: str, params: Dict[str, Any],
retry_count: int = 0) -> bs4.BeautifulSoup: retry_count: int = 0) -> bs4.BeautifulSoup:
""" """

View File

@ -20,7 +20,7 @@ def demangle_date(date: str) -> Optional[datetime.datetime]:
"Gestern, HH:MM" "Gestern, HH:MM"
"Heute, HH:MM" "Heute, HH:MM"
"Morgen, HH:MM" "Morgen, HH:MM"
"dd. mon.yyyy, HH:MM "dd. mon yyyy, HH:MM
""" """
saved = locale.setlocale(locale.LC_ALL) saved = locale.setlocale(locale.LC_ALL)
try: try:

View File

@ -10,6 +10,7 @@ from typing import Callable, List, Optional, Union
import bs4 import bs4
import requests import requests
from ..errors import retry_on_io_exception
from ..logging import PrettyLogger from ..logging import PrettyLogger
from ..organizer import Organizer from ..organizer import Organizer
from ..tmp_dir import TmpDir from ..tmp_dir import TmpDir
@ -116,26 +117,23 @@ class IliasDownloader:
""" """
LOGGER.debug("Downloading %r", info) LOGGER.debug("Downloading %r", info)
if not self._strategy(self._organizer, info): if not self._strategy(self._organizer, info):
self._organizer.mark(info.path) self._organizer.mark(info.path)
return return
tmp_file = self._tmp_dir.new_path() tmp_file = self._tmp_dir.new_path()
download_successful = False @retry_on_io_exception(3, "downloading file")
for _ in range(0, 3): def download_impl() -> bool:
try:
if not self._try_download(info, tmp_file): if not self._try_download(info, tmp_file):
LOGGER.info("Re-Authenticating due to download failure: %r", info) LOGGER.info("Re-Authenticating due to download failure: %r", info)
self._authenticator.authenticate(self._session) self._authenticator.authenticate(self._session)
raise IOError("Scheduled retry")
else: else:
download_successful = True return True
break
except IOError as e:
PRETTY.warning(f"I/O Error when downloading ({e}). Retrying...",)
LOGGER.info("Retrying download for %s", info.path)
if not download_successful: if not download_impl():
PRETTY.error(f"Download of file {info.path} failed too often! Skipping it...") PRETTY.error(f"Download of file {info.path} failed too often! Skipping it...")
return return

View File

@ -82,7 +82,10 @@ class IpdCrawler:
items: List[IpdDownloadInfo] = [] items: List[IpdDownloadInfo] = []
for link in page.findAll(name="a", attrs={"href": lambda x: x and x.endswith("pdf")}): def is_relevant_url(x: str) -> bool:
return x.endswith(".pdf") or x.endswith(".c") or x.endswith(".java") or x.endswith(".zip")
for link in page.findAll(name="a", attrs={"href": lambda x: x and is_relevant_url(x)}):
href: str = link.attrs.get("href") href: str = link.attrs.get("href")
name = href.split("/")[-1] name = href.split("/")[-1]

View File

@ -130,7 +130,7 @@ def re_rename(regex: Regex, target: str) -> Transform:
return inner return inner
def sanitize_windows_path(path: PurePath) -> Optional[PurePath]: def sanitize_windows_path(path: PurePath) -> PurePath:
""" """
A small function to escape characters that are forbidden in windows path names. A small function to escape characters that are forbidden in windows path names.
This method is a no-op on other operating systems. This method is a no-op on other operating systems.

View File

@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
To install PFERD or update your installation to the latest version, run this To install PFERD or update your installation to the latest version, run this
wherever you want to install or have already installed PFERD: wherever you want to install or have already installed PFERD:
``` ```
$ pip install git+https://github.com/Garmelon/PFERD@v2.5.4 $ pip install git+https://github.com/Garmelon/PFERD@v2.6.2
``` ```
The use of [venv] is recommended. The use of [venv] is recommended.
@ -60,8 +60,8 @@ $ mkdir Vorlesungen
$ cd Vorlesungen $ cd Vorlesungen
$ python3 -m venv .venv $ python3 -m venv .venv
$ source .venv/bin/activate $ source .venv/bin/activate
$ pip install git+https://github.com/Garmelon/PFERD@v2.5.4 $ pip install git+https://github.com/Garmelon/PFERD@v2.6.2
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.5.4/example_config.py $ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.6.2/example_config.py
$ python3 example_config.py $ python3 example_config.py
$ deactivate $ deactivate
``` ```

View File

@ -2,7 +2,7 @@ from setuptools import find_packages, setup
setup( setup(
name="PFERD", name="PFERD",
version="2.5.4", version="2.6.2",
packages=find_packages(), packages=find_packages(),
install_requires=[ install_requires=[
"requests>=2.21.0", "requests>=2.21.0",

View File

@ -26,9 +26,10 @@ _LOGGER = logging.getLogger("sync_url")
_PRETTY = PrettyLogger(_LOGGER) _PRETTY = PrettyLogger(_LOGGER)
def _extract_credentials(file_path: Optional[str]) -> UserPassAuthenticator: def _extract_credentials(file_path: Optional[str],
username: Optional[str], password: Optional[str]) -> UserPassAuthenticator:
if not file_path: if not file_path:
return UserPassAuthenticator("KIT ILIAS Shibboleth", None, None) return UserPassAuthenticator("KIT ILIAS Shibboleth", username, password)
if not Path(file_path).exists(): if not Path(file_path).exists():
_PRETTY.error("Credential file does not exist") _PRETTY.error("Credential file does not exist")
@ -74,7 +75,7 @@ def main() -> None:
"one line in the following format: '<user>:<password>'") "one line in the following format: '<user>:<password>'")
parser.add_argument("-k", "--keyring", action="store_true", parser.add_argument("-k", "--keyring", action="store_true",
help="Use the system keyring service for authentication") help="Use the system keyring service for authentication")
parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos") parser.add_argument('--no-videos', action="store_true", help="Don't download videos")
parser.add_argument('--local-first', action="store_true", parser.add_argument('--local-first', action="store_true",
help="Don't prompt for confirmation, keep existing files") help="Don't prompt for confirmation, keep existing files")
parser.add_argument('--remote-first', action="store_true", parser.add_argument('--remote-first', action="store_true",
@ -96,7 +97,7 @@ def main() -> None:
"KIT ILIAS Shibboleth", username=args.username, password=args.password "KIT ILIAS Shibboleth", username=args.username, password=args.password
) )
else: else:
inner_auth = _extract_credentials(args.credential_file) inner_auth = _extract_credentials(args.credential_file, args.username, args.password)
username, password = inner_auth.get_credentials() username, password = inner_auth.get_credentials()
authenticator = KitShibbolethAuthenticator(inner_auth) authenticator = KitShibbolethAuthenticator(inner_auth)
@ -121,7 +122,8 @@ def main() -> None:
# files may not escape the pferd_root with relative paths # files may not escape the pferd_root with relative paths
# note: Path(Path.cwd, Path(folder)) == Path(folder) if it is an absolute path # note: Path(Path.cwd, Path(folder)) == Path(folder) if it is an absolute path
pferd_root = Path(Path.cwd(), Path(folder)).parent pferd_root = Path(Path.cwd(), Path(folder)).parent
target = folder.name # Folder might be a *PurePath* at this point
target = Path(folder).resolve().name
pferd = Pferd(pferd_root, test_run=args.test_run) pferd = Pferd(pferd_root, test_run=args.test_run)
def dir_filter(_: Path, element: IliasElementType) -> bool: def dir_filter(_: Path, element: IliasElementType) -> bool:
@ -130,13 +132,13 @@ def main() -> None:
return True return True
if args.local_first: if args.local_first:
file_confilict_resolver: FileConflictResolver = _resolve_local_first file_conflict_resolver: FileConflictResolver = _resolve_local_first
elif args.no_delete: elif args.no_delete:
file_confilict_resolver = _resolve_no_delete file_conflict_resolver = _resolve_no_delete
elif args.remote_first: elif args.remote_first:
file_confilict_resolver = _resolve_remote_first file_conflict_resolver = _resolve_remote_first
else: else:
file_confilict_resolver = resolve_prompt_user file_conflict_resolver = resolve_prompt_user
pferd.enable_logging() pferd.enable_logging()
@ -148,7 +150,7 @@ def main() -> None:
dir_filter=dir_filter, dir_filter=dir_filter,
username=username, username=username,
password=password, password=password,
file_conflict_resolver=file_confilict_resolver, file_conflict_resolver=file_conflict_resolver,
transform=sanitize_windows_path transform=sanitize_windows_path
) )