Compare commits

...

24 Commits

Author SHA1 Message Date
cccd68e04a Bump version to v2.6.2 2021-04-29 00:18:26 +02:00
2bd40a5f30 Fix -p and -u flags 2021-04-29 00:17:51 +02:00
2ca1101326 Fix typo in sync_url 2021-04-19 14:53:16 +02:00
c1ab7485e2 Bump version to 2.6.1 2021-04-19 11:21:56 +02:00
29cd5d1a3c Reflect totality of sanitize_windows_path in return type 2021-04-19 11:10:02 +02:00
6d5d9333ad Force folder to be file-system path 2021-04-19 11:07:25 +02:00
7cc40595dc Allow synchronizing to directory "." 2021-04-14 20:25:25 +02:00
80ae5ddfaa Bump version to v2.6.0 2021-04-14 19:47:41 +02:00
4f480d117e Install keyring in CI 2021-04-14 19:24:05 +02:00
1f2af3a290 Retry on more I/O Errors 2021-04-13 11:43:22 +02:00
14cdfb6a69 Fix typo in date demangler doc 2021-04-13 11:19:51 +02:00
e2bf84392b [sync_url] Properly declare "no-videos" as flag 2021-04-08 18:12:27 +02:00
946b7a7931 Also crawl .c/.java/.zip from IPD page 2021-02-09 12:30:59 +01:00
9a9018751e Bump version 2021-02-06 22:54:05 +01:00
83b75e8254 syncurl: Sanitize element name on windows if it is used as folder name
Otherwise the name of the course might not be a invalid file name.
2021-02-06 22:53:26 +01:00
35c3fa205d Fixed description of activating venv (#22)
Add 'source' to the venv activate command in the readme

`source` was picked over `.` to conform to the python recommendation
(https://docs.python.org/3/library/venv.html#module-venv).

This patch also adds the `egg-info` you get when building to the
gitignore.
2021-01-28 21:24:09 +01:00
0b606f02fa Bump version 2021-01-17 10:33:10 +01:00
fb78a6e98e Retry ILIAS downloads a few times and only fail that file 2021-01-06 13:08:10 +01:00
5de68a0400 Bump version 2020-12-30 17:20:30 +01:00
f0562049b6 Remove Python 3.9 method in crawler 2020-12-30 17:18:04 +01:00
0e1077bb50 Bump version 2020-12-30 14:50:49 +01:00
c978e9edf4 Resolve a few pylint warnings 2020-12-30 14:45:46 +01:00
2714ac6be6 Send CSRF token to Shibboleth 2020-12-30 14:34:11 +01:00
9b048a9cfc Canonize meeting names to a properly formatted date 2020-12-30 14:32:59 +01:00
15 changed files with 94 additions and 31 deletions

View File

@ -23,7 +23,7 @@ jobs:
python-version: '3.x' python-version: '3.x'
- name: "Install dependencies" - name: "Install dependencies"
run: "pip install setuptools pyinstaller rich requests beautifulsoup4 -f --upgrade" run: "pip install setuptools keyring pyinstaller rich requests beautifulsoup4 -f --upgrade"
- name: "Install sync_url.py" - name: "Install sync_url.py"
run: "pyinstaller sync_url.py -F" run: "pyinstaller sync_url.py -F"

1
.gitignore vendored
View File

@ -8,6 +8,7 @@ build/
.env .env
.vscode .vscode
ilias_cookies.txt ilias_cookies.txt
PFERD.egg-info/
# PyInstaller # PyInstaller
sync_url.spec sync_url.spec

View File

@ -14,7 +14,7 @@ PRETTY = PrettyLogger(LOGGER)
try: try:
import keyring import keyring
except ImportError: except ImportError:
PRETTY.warning("Keyring module not found, KeyringAuthenticator won't work!") pass
class TfaAuthenticator: class TfaAuthenticator:

View File

@ -37,3 +37,21 @@ def swallow_and_print_errors(function: TFun) -> TFun:
Console().print_exception() Console().print_exception()
return None return None
return cast(TFun, inner) return cast(TFun, inner)
def retry_on_io_exception(max_retries: int, message: str) -> Callable[[TFun], TFun]:
"""
Decorates a function and retries it on any exception until the max retries count is hit.
"""
def retry(function: TFun) -> TFun:
def inner(*args: Any, **kwargs: Any) -> Any:
for i in range(0, max_retries):
# pylint: disable=broad-except
try:
return function(*args, **kwargs)
except IOError as error:
PRETTY.warning(f"Error duing operation '{message}': {error}")
PRETTY.warning(
f"Retrying operation '{message}'. Remaining retries: {max_retries - 1 - i}")
return cast(TFun, inner)
return retry

View File

@ -74,6 +74,8 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
form = soup.find("form", {"class": "full content", "method": "post"}) form = soup.find("form", {"class": "full content", "method": "post"})
action = form["action"] action = form["action"]
csrf_token = form.find("input", {"name": "csrf_token"})["value"]
# Equivalent: Enter credentials in # Equivalent: Enter credentials in
# https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
LOGGER.debug("Attempt to log in to Shibboleth using credentials") LOGGER.debug("Attempt to log in to Shibboleth using credentials")
@ -82,6 +84,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
"_eventId_proceed": "", "_eventId_proceed": "",
"j_username": self._auth.username, "j_username": self._auth.username,
"j_password": self._auth.password, "j_password": self._auth.password,
"csrf_token": csrf_token
} }
soup = soupify(sess.post(url, data=data)) soup = soupify(sess.post(url, data=data))

View File

@ -15,7 +15,7 @@ from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
import bs4 import bs4
import requests import requests
from ..errors import FatalException from ..errors import FatalException, retry_on_io_exception
from ..logging import PrettyLogger from ..logging import PrettyLogger
from ..utils import soupify from ..utils import soupify
from .authenticators import IliasAuthenticator from .authenticators import IliasAuthenticator
@ -40,6 +40,7 @@ class IliasElementType(Enum):
REGULAR_FILE = "REGULAR_FILE" REGULAR_FILE = "REGULAR_FILE"
VIDEO_FILE = "VIDEO_FILE" VIDEO_FILE = "VIDEO_FILE"
FORUM = "FORUM" FORUM = "FORUM"
MEETING = "MEETING"
EXTERNAL_LINK = "EXTERNAL_LINK" EXTERNAL_LINK = "EXTERNAL_LINK"
def is_folder(self) -> bool: def is_folder(self) -> bool:
@ -241,6 +242,8 @@ class IliasCrawler:
entries_to_process += self._crawl_video_directory(entry.path, url) entries_to_process += self._crawl_video_directory(entry.path, url)
continue continue
PRETTY.warning(f"Unknown type: {entry.entry_type}!")
return result return result
def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]: def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]:
@ -269,6 +272,25 @@ class IliasCrawler:
if element_type == IliasElementType.REGULAR_FILE: if element_type == IliasElementType.REGULAR_FILE:
result += self._crawl_file(folder_path, link, abs_url) result += self._crawl_file(folder_path, link, abs_url)
elif element_type == IliasElementType.MEETING:
meeting_name = str(element_path.name)
date_portion_str = meeting_name.split(" - ")[0]
date_portion = demangle_date(date_portion_str)
if not date_portion:
result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
continue
rest_of_name = meeting_name
if rest_of_name.startswith(date_portion_str):
rest_of_name = rest_of_name[len(date_portion_str):]
new_name = datetime.datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") \
+ rest_of_name
new_path = Path(folder_path, _sanitize_path_name(new_name))
result += [
IliasCrawlerEntry(new_path, abs_url, IliasElementType.REGULAR_FOLDER, None)
]
elif element_type is not None: elif element_type is not None:
result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)] result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
else: else:
@ -320,6 +342,8 @@ class IliasCrawler:
""" """
# pylint: disable=too-many-return-statements # pylint: disable=too-many-return-statements
found_parent: Optional[bs4.Tag] = None
# We look for the outer div of our inner link, to find information around it # We look for the outer div of our inner link, to find information around it
# (mostly the icon) # (mostly the icon)
for parent in link_element.parents: for parent in link_element.parents:
@ -350,6 +374,9 @@ class IliasCrawler:
if str(img_tag["src"]).endswith("frm.svg"): if str(img_tag["src"]).endswith("frm.svg"):
return IliasElementType.FORUM return IliasElementType.FORUM
if str(img_tag["src"]).endswith("sess.svg"):
return IliasElementType.MEETING
return IliasElementType.REGULAR_FOLDER return IliasElementType.REGULAR_FOLDER
@staticmethod @staticmethod
@ -598,6 +625,7 @@ class IliasCrawler:
return results return results
@retry_on_io_exception(3, "fetching webpage")
def _get_page(self, url: str, params: Dict[str, Any], def _get_page(self, url: str, params: Dict[str, Any],
retry_count: int = 0) -> bs4.BeautifulSoup: retry_count: int = 0) -> bs4.BeautifulSoup:
""" """

View File

@ -20,7 +20,7 @@ def demangle_date(date: str) -> Optional[datetime.datetime]:
"Gestern, HH:MM" "Gestern, HH:MM"
"Heute, HH:MM" "Heute, HH:MM"
"Morgen, HH:MM" "Morgen, HH:MM"
"dd. mon.yyyy, HH:MM "dd. mon yyyy, HH:MM
""" """
saved = locale.setlocale(locale.LC_ALL) saved = locale.setlocale(locale.LC_ALL)
try: try:

View File

@ -10,6 +10,7 @@ from typing import Callable, List, Optional, Union
import bs4 import bs4
import requests import requests
from ..errors import retry_on_io_exception
from ..logging import PrettyLogger from ..logging import PrettyLogger
from ..organizer import Organizer from ..organizer import Organizer
from ..tmp_dir import TmpDir from ..tmp_dir import TmpDir
@ -116,15 +117,25 @@ class IliasDownloader:
""" """
LOGGER.debug("Downloading %r", info) LOGGER.debug("Downloading %r", info)
if not self._strategy(self._organizer, info): if not self._strategy(self._organizer, info):
self._organizer.mark(info.path) self._organizer.mark(info.path)
return return
tmp_file = self._tmp_dir.new_path() tmp_file = self._tmp_dir.new_path()
while not self._try_download(info, tmp_file): @retry_on_io_exception(3, "downloading file")
LOGGER.info("Retrying download: %r", info) def download_impl() -> bool:
self._authenticator.authenticate(self._session) if not self._try_download(info, tmp_file):
LOGGER.info("Re-Authenticating due to download failure: %r", info)
self._authenticator.authenticate(self._session)
raise IOError("Scheduled retry")
else:
return True
if not download_impl():
PRETTY.error(f"Download of file {info.path} failed too often! Skipping it...")
return
dst_path = self._organizer.accept_file(tmp_file, info.path) dst_path = self._organizer.accept_file(tmp_file, info.path)
if dst_path and info.modification_date: if dst_path and info.modification_date:

View File

@ -82,7 +82,10 @@ class IpdCrawler:
items: List[IpdDownloadInfo] = [] items: List[IpdDownloadInfo] = []
for link in page.findAll(name="a", attrs={"href": lambda x: x and x.endswith("pdf")}): def is_relevant_url(x: str) -> bool:
return x.endswith(".pdf") or x.endswith(".c") or x.endswith(".java") or x.endswith(".zip")
for link in page.findAll(name="a", attrs={"href": lambda x: x and is_relevant_url(x)}):
href: str = link.attrs.get("href") href: str = link.attrs.get("href")
name = href.split("/")[-1] name = href.split("/")[-1]

View File

@ -3,13 +3,10 @@ Contains a few logger utility functions and implementations.
""" """
import logging import logging
from pathlib import Path from typing import Optional
from typing import List, Optional
from rich import print as rich_print
from rich._log_render import LogRender from rich._log_render import LogRender
from rich.console import Console from rich.console import Console
from rich.panel import Panel
from rich.style import Style from rich.style import Style
from rich.text import Text from rich.text import Text
from rich.theme import Theme from rich.theme import Theme

View File

@ -116,7 +116,7 @@ class Organizer(Location):
if self._is_marked(dst): if self._is_marked(dst):
PRETTY.warning(f"File {str(dst_absolute)!r} was already written!") PRETTY.warning(f"File {str(dst_absolute)!r} was already written!")
conflict = ConflictType.MARKED_FILE_OVERWRITTEN conflict = ConflictType.MARKED_FILE_OVERWRITTEN
if self._resolve_conflict(f"Overwrite file?", dst_absolute, conflict, default=False): if self._resolve_conflict("Overwrite file?", dst_absolute, conflict, default=False):
PRETTY.ignored_file(dst_absolute, "file was written previously") PRETTY.ignored_file(dst_absolute, "file was written previously")
return None return None

View File

@ -130,7 +130,7 @@ def re_rename(regex: Regex, target: str) -> Transform:
return inner return inner
def sanitize_windows_path(path: PurePath) -> Optional[PurePath]: def sanitize_windows_path(path: PurePath) -> PurePath:
""" """
A small function to escape characters that are forbidden in windows path names. A small function to escape characters that are forbidden in windows path names.
This method is a no-op on other operating systems. This method is a no-op on other operating systems.

View File

@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
To install PFERD or update your installation to the latest version, run this To install PFERD or update your installation to the latest version, run this
wherever you want to install or have already installed PFERD: wherever you want to install or have already installed PFERD:
``` ```
$ pip install git+https://github.com/Garmelon/PFERD@v2.5.0 $ pip install git+https://github.com/Garmelon/PFERD@v2.6.2
``` ```
The use of [venv] is recommended. The use of [venv] is recommended.
@ -59,9 +59,9 @@ A full example setup and initial use could look like:
$ mkdir Vorlesungen $ mkdir Vorlesungen
$ cd Vorlesungen $ cd Vorlesungen
$ python3 -m venv .venv $ python3 -m venv .venv
$ .venv/bin/activate $ source .venv/bin/activate
$ pip install git+https://github.com/Garmelon/PFERD@v2.5.0 $ pip install git+https://github.com/Garmelon/PFERD@v2.6.2
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.5.0/example_config.py $ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.6.2/example_config.py
$ python3 example_config.py $ python3 example_config.py
$ deactivate $ deactivate
``` ```
@ -69,7 +69,7 @@ $ deactivate
Subsequent runs of the program might look like: Subsequent runs of the program might look like:
``` ```
$ cd Vorlesungen $ cd Vorlesungen
$ .venv/bin/activate $ source .venv/bin/activate
$ python3 example_config.py $ python3 example_config.py
$ deactivate $ deactivate
``` ```

View File

@ -2,7 +2,7 @@ from setuptools import find_packages, setup
setup( setup(
name="PFERD", name="PFERD",
version="2.5.0", version="2.6.2",
packages=find_packages(), packages=find_packages(),
install_requires=[ install_requires=[
"requests>=2.21.0", "requests>=2.21.0",

View File

@ -26,9 +26,10 @@ _LOGGER = logging.getLogger("sync_url")
_PRETTY = PrettyLogger(_LOGGER) _PRETTY = PrettyLogger(_LOGGER)
def _extract_credentials(file_path: Optional[str]) -> UserPassAuthenticator: def _extract_credentials(file_path: Optional[str],
username: Optional[str], password: Optional[str]) -> UserPassAuthenticator:
if not file_path: if not file_path:
return UserPassAuthenticator("KIT ILIAS Shibboleth", None, None) return UserPassAuthenticator("KIT ILIAS Shibboleth", username, password)
if not Path(file_path).exists(): if not Path(file_path).exists():
_PRETTY.error("Credential file does not exist") _PRETTY.error("Credential file does not exist")
@ -74,7 +75,7 @@ def main() -> None:
"one line in the following format: '<user>:<password>'") "one line in the following format: '<user>:<password>'")
parser.add_argument("-k", "--keyring", action="store_true", parser.add_argument("-k", "--keyring", action="store_true",
help="Use the system keyring service for authentication") help="Use the system keyring service for authentication")
parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos") parser.add_argument('--no-videos', action="store_true", help="Don't download videos")
parser.add_argument('--local-first', action="store_true", parser.add_argument('--local-first', action="store_true",
help="Don't prompt for confirmation, keep existing files") help="Don't prompt for confirmation, keep existing files")
parser.add_argument('--remote-first', action="store_true", parser.add_argument('--remote-first', action="store_true",
@ -96,7 +97,7 @@ def main() -> None:
"KIT ILIAS Shibboleth", username=args.username, password=args.password "KIT ILIAS Shibboleth", username=args.username, password=args.password
) )
else: else:
inner_auth = _extract_credentials(args.credential_file) inner_auth = _extract_credentials(args.credential_file, args.username, args.password)
username, password = inner_auth.get_credentials() username, password = inner_auth.get_credentials()
authenticator = KitShibbolethAuthenticator(inner_auth) authenticator = KitShibbolethAuthenticator(inner_auth)
@ -113,7 +114,7 @@ def main() -> None:
if not element_name: if not element_name:
print("Error, could not get element name. Please specify a folder yourself.") print("Error, could not get element name. Please specify a folder yourself.")
return return
folder = Path(element_name) folder = sanitize_windows_path(Path(element_name.replace("/", "-").replace("\\", "-")))
cookie_jar.save_cookies() cookie_jar.save_cookies()
else: else:
folder = Path(args.folder) folder = Path(args.folder)
@ -121,7 +122,8 @@ def main() -> None:
# files may not escape the pferd_root with relative paths # files may not escape the pferd_root with relative paths
# note: Path(Path.cwd, Path(folder)) == Path(folder) if it is an absolute path # note: Path(Path.cwd, Path(folder)) == Path(folder) if it is an absolute path
pferd_root = Path(Path.cwd(), Path(folder)).parent pferd_root = Path(Path.cwd(), Path(folder)).parent
target = folder.name # Folder might be a *PurePath* at this point
target = Path(folder).resolve().name
pferd = Pferd(pferd_root, test_run=args.test_run) pferd = Pferd(pferd_root, test_run=args.test_run)
def dir_filter(_: Path, element: IliasElementType) -> bool: def dir_filter(_: Path, element: IliasElementType) -> bool:
@ -130,13 +132,13 @@ def main() -> None:
return True return True
if args.local_first: if args.local_first:
file_confilict_resolver: FileConflictResolver = _resolve_local_first file_conflict_resolver: FileConflictResolver = _resolve_local_first
elif args.no_delete: elif args.no_delete:
file_confilict_resolver = _resolve_no_delete file_conflict_resolver = _resolve_no_delete
elif args.remote_first: elif args.remote_first:
file_confilict_resolver = _resolve_remote_first file_conflict_resolver = _resolve_remote_first
else: else:
file_confilict_resolver = resolve_prompt_user file_conflict_resolver = resolve_prompt_user
pferd.enable_logging() pferd.enable_logging()
@ -148,7 +150,7 @@ def main() -> None:
dir_filter=dir_filter, dir_filter=dir_filter,
username=username, username=username,
password=password, password=password,
file_conflict_resolver=file_confilict_resolver, file_conflict_resolver=file_conflict_resolver,
transform=sanitize_windows_path transform=sanitize_windows_path
) )