Compare commits

...

16 Commits

9 changed files with 243 additions and 15 deletions

View File

@ -13,7 +13,7 @@ jobs:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [ubuntu-latest, windows-latest] os: [ubuntu-latest, windows-latest, macos-latest]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
@ -28,6 +28,9 @@ jobs:
- name: "Install sync_url.py" - name: "Install sync_url.py"
run: "pyinstaller sync_url.py -F" run: "pyinstaller sync_url.py -F"
- name: "Move artifact"
run: "mv dist/sync_url* dist/sync_url-${{ matrix.os }}"
- uses: actions/upload-artifact@v2 - uses: actions/upload-artifact@v2
with: with:
name: "Pferd Sync URL" name: "Pferd Sync URL"
@ -55,13 +58,17 @@ jobs:
- name: "look at folder structure" - name: "look at folder structure"
run: "ls -lah" run: "ls -lah"
- name: "Rename releases"
run: "mv sync_url-macos-latest pferd_sync_url_mac && mv sync_url-ubuntu-latest pferd_sync_url_linux && mv sync_url-windows-latest pferd_sync_url.exe"
- name: "Create release" - name: "Create release"
uses: softprops/action-gh-release@v1 uses: softprops/action-gh-release@v1
- name: "Upload release artifacts" - name: "Upload release artifacts"
uses: softprops/action-gh-release@v1 uses: softprops/action-gh-release@v1
with: with:
body: "Download sync_url (or sync_url.exe on Windows) and run it in the terminal or CMD." body: "Download the correct sync_url for your platform and run it in the terminal or CMD. You might need to make it executable on Linux/Mac with `chmod +x <file>`. Also please enclose the *url you pass to the program in double quotes* or your shell might silently screw it up!"
files: | files: |
sync_url pferd_sync_url_mac
sync_url.exe pferd_sync_url_linux
pferd_sync_url.exe

View File

@ -40,9 +40,9 @@ class DownloadSummary:
""" """
Merges ourselves with the passed summary. Modifies this object, but not the passed one. Merges ourselves with the passed summary. Modifies this object, but not the passed one.
""" """
self._new_files += summary.new_files self._new_files = list(set(self._new_files + summary.new_files))
self._modified_files += summary.modified_files self._modified_files = list(set(self._modified_files + summary.modified_files))
self._deleted_files += summary.deleted_files self._deleted_files = list(set(self._deleted_files + summary.deleted_files))
def add_deleted_file(self, path: Path) -> None: def add_deleted_file(self, path: Path) -> None:
""" """

View File

@ -593,10 +593,17 @@ class IliasCrawler:
return results return results
def _get_page(self, url: str, params: Dict[str, Any]) -> bs4.BeautifulSoup: def _get_page(self, url: str, params: Dict[str, Any],
retry_count: int = 0) -> bs4.BeautifulSoup:
""" """
Fetches a page from ILIAS, authenticating when needed. Fetches a page from ILIAS, authenticating when needed.
""" """
if retry_count >= 4:
raise FatalException("Could not get a proper page after 4 tries. "
"Maybe your URL is wrong, authentication fails continuously, "
"your ILIAS connection is spotty or ILIAS is not well.")
LOGGER.debug("Fetching %r", url) LOGGER.debug("Fetching %r", url)
response = self._session.get(url, params=params) response = self._session.get(url, params=params)
@ -617,7 +624,7 @@ class IliasCrawler:
self._authenticator.authenticate(self._session) self._authenticator.authenticate(self._session)
return self._get_page(url, params) return self._get_page(url, params, retry_count + 1)
@staticmethod @staticmethod
def _is_logged_in(soup: bs4.BeautifulSoup) -> bool: def _is_logged_in(soup: bs4.BeautifulSoup) -> bool:

151
PFERD/ipd.py Normal file
View File

@ -0,0 +1,151 @@
"""
Utility functions and a scraper/downloader for the IPD pages.
"""
import datetime
import logging
import math
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, List, Optional
from urllib.parse import urljoin
import bs4
import requests
from PFERD.errors import FatalException
from PFERD.utils import soupify
from .logging import PrettyLogger
from .organizer import Organizer
from .tmp_dir import TmpDir
from .transform import Transformable
from .utils import stream_to_path
LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER)
@dataclass
class IpdDownloadInfo(Transformable):
"""
Information about an ipd entry.
"""
url: str
modification_date: Optional[datetime.datetime]
IpdDownloadStrategy = Callable[[Organizer, IpdDownloadInfo], bool]
def ipd_download_new_or_modified(organizer: Organizer, info: IpdDownloadInfo) -> bool:
"""
Accepts new files or files with a more recent modification date.
"""
resolved_file = organizer.resolve(info.path)
if not resolved_file.exists():
return True
if not info.modification_date:
PRETTY.ignored_file(info.path, "could not find modification time, file exists")
return False
resolved_mod_time_seconds = resolved_file.stat().st_mtime
# Download if the info is newer
if info.modification_date.timestamp() > resolved_mod_time_seconds:
return True
PRETTY.ignored_file(info.path, "local file has newer or equal modification time")
return False
class IpdCrawler:
# pylint: disable=too-few-public-methods
"""
A crawler for IPD pages.
"""
def __init__(self, base_url: str):
self._base_url = base_url
def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
"""
Create an absolute url from an <a> tag.
"""
return urljoin(self._base_url, link_tag.get("href"))
def crawl(self) -> List[IpdDownloadInfo]:
"""
Crawls the playlist given in the constructor.
"""
page = soupify(requests.get(self._base_url))
items: List[IpdDownloadInfo] = []
for link in page.findAll(name="a", attrs={"href": lambda x: x and x.endswith("pdf")}):
href: str = link.attrs.get("href")
name = href.split("/")[-1]
modification_date: Optional[datetime.datetime] = None
try:
enclosing_row: bs4.Tag = link.findParent(name="tr")
if enclosing_row:
date_text = enclosing_row.find(name="td").text
modification_date = datetime.datetime.strptime(date_text, "%d.%m.%Y")
except ValueError:
modification_date = None
items.append(IpdDownloadInfo(
Path(name),
url=self._abs_url_from_link(link),
modification_date=modification_date
))
return items
class IpdDownloader:
"""
A downloader for ipd files.
"""
def __init__(self, tmp_dir: TmpDir, organizer: Organizer, strategy: IpdDownloadStrategy):
self._tmp_dir = tmp_dir
self._organizer = organizer
self._strategy = strategy
self._session = requests.session()
def download_all(self, infos: List[IpdDownloadInfo]) -> None:
"""
Download multiple files one after the other.
"""
for info in infos:
self.download(info)
def download(self, info: IpdDownloadInfo) -> None:
"""
Download a single file.
"""
if not self._strategy(self._organizer, info):
self._organizer.mark(info.path)
return
with self._session.get(info.url, stream=True) as response:
if response.status_code == 200:
tmp_file = self._tmp_dir.new_path()
stream_to_path(response, tmp_file, info.path.name)
dst_path = self._organizer.accept_file(tmp_file, info.path)
if dst_path and info.modification_date:
os.utime(
dst_path,
times=(
math.ceil(info.modification_date.timestamp()),
math.ceil(info.modification_date.timestamp())
)
)
elif response.status_code == 403:
raise FatalException("Received 403. Are you not using the KIT VPN?")
else:
PRETTY.warning(f"Could not download file, got response {response.status_code}")

View File

@ -124,6 +124,8 @@ class Organizer(Location):
self._cleanup(self.path) self._cleanup(self.path)
def _cleanup(self, start_dir: Path) -> None: def _cleanup(self, start_dir: Path) -> None:
if not start_dir.exists():
return
paths: List[Path] = list(start_dir.iterdir()) paths: List[Path] = list(start_dir.iterdir())
# Recursively clean paths # Recursively clean paths

View File

@ -14,6 +14,8 @@ from .errors import FatalException, swallow_and_print_errors
from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter, from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
IliasDownloader, IliasDownloadInfo, IliasDownloadStrategy, IliasDownloader, IliasDownloadInfo, IliasDownloadStrategy,
KitShibbolethAuthenticator, download_modified_or_new) KitShibbolethAuthenticator, download_modified_or_new)
from .ipd import (IpdCrawler, IpdDownloader, IpdDownloadInfo,
IpdDownloadStrategy, ipd_download_new_or_modified)
from .location import Location from .location import Location
from .logging import PrettyLogger, enable_logging from .logging import PrettyLogger, enable_logging
from .organizer import Organizer from .organizer import Organizer
@ -294,6 +296,60 @@ class Pferd(Location):
return organizer return organizer
@swallow_and_print_errors
def ipd_kit(
self,
target: Union[PathLike, Organizer],
url: str,
transform: Transform = lambda x: x,
download_strategy: IpdDownloadStrategy = ipd_download_new_or_modified,
clean: bool = True
) -> Organizer:
"""
Synchronizes a folder with a DIVA playlist.
Arguments:
target {Union[PathLike, Organizer]} -- The organizer / target folder to use.
url {str} -- the url to the page
Keyword Arguments:
transform {Transform} -- A transformation function for the output paths. Return None
to ignore a file. (default: {lambdax:x})
download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
be downloaded. Can save bandwidth and reduce the number of requests.
(default: {diva_download_new})
clean {bool} -- Whether to clean up when the method finishes.
"""
tmp_dir = self._tmp_dir.new_subdir()
if target is None:
PRETTY.starting_synchronizer("None", "IPD", url)
raise FatalException("Got 'None' as target directory, aborting")
if isinstance(target, Organizer):
organizer = target
else:
organizer = Organizer(self.resolve(to_path(target)))
PRETTY.starting_synchronizer(organizer.path, "IPD", url)
elements: List[IpdDownloadInfo] = IpdCrawler(url).crawl()
transformed = apply_transform(transform, elements)
if self._test_run:
self._print_transformables(transformed)
return organizer
downloader = IpdDownloader(tmp_dir=tmp_dir, organizer=organizer, strategy=download_strategy)
downloader.download_all(transformed)
if clean:
organizer.cleanup()
self._download_summary.merge(organizer.download_summary)
return organizer
@swallow_and_print_errors @swallow_and_print_errors
def diva_kit( def diva_kit(
self, self,
@ -352,4 +408,6 @@ class Pferd(Location):
if clean: if clean:
organizer.cleanup() organizer.cleanup()
self._download_summary.merge(organizer.download_summary)
return organizer return organizer

View File

@ -25,7 +25,10 @@ use, but doesn't expose all the configuration options and tweaks a full install
does. does.
1. Download the `sync_url` binary from the [latest release](https://github.com/Garmelon/PFERD/releases/latest). 1. Download the `sync_url` binary from the [latest release](https://github.com/Garmelon/PFERD/releases/latest).
2. Run the binary in your terminal (`./sync_url` or `sync_url.exe` in the CMD) to see the help and use it 2. Recognize that you most likely need to enclose the URL in `""` quotes to prevent your shell from interpreting `&` and other symbols
3. Run the binary in your terminal (`./sync_url` or `sync_url.exe` in the CMD) to see the help and use it. I'd recommend using the `--cookies` option.
If you are on **Linux/Mac**, you need to *make the file executable* using `chmod +x <file>`.
If you are on **Mac**, you need to allow this unverified program to run (see e.g. [here](https://www.switchingtomac.com/tutorials/osx/how-to-run-unverified-apps-on-macos/))
## Installation ## Installation
@ -34,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
To install PFERD or update your installation to the latest version, run this To install PFERD or update your installation to the latest version, run this
wherever you want to install or have already installed PFERD: wherever you want to install or have already installed PFERD:
``` ```
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.0 $ pip install git+https://github.com/Garmelon/PFERD@v2.4.2
``` ```
The use of [venv] is recommended. The use of [venv] is recommended.
@ -57,8 +60,8 @@ $ mkdir Vorlesungen
$ cd Vorlesungen $ cd Vorlesungen
$ python3 -m venv .venv $ python3 -m venv .venv
$ .venv/bin/activate $ .venv/bin/activate
$ pip install git+https://github.com/Garmelon/PFERD@v2.4.0 $ pip install git+https://github.com/Garmelon/PFERD@v2.4.2
$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.0/example_config.py $ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.2/example_config.py
$ python3 example_config.py $ python3 example_config.py
$ deactivate $ deactivate
``` ```

View File

@ -2,7 +2,7 @@ from setuptools import find_packages, setup
setup( setup(
name="PFERD", name="PFERD",
version="2.4.0", version="2.4.2",
packages=find_packages(), packages=find_packages(),
install_requires=[ install_requires=[
"requests>=2.21.0", "requests>=2.21.0",

View File

@ -19,7 +19,7 @@ def main() -> None:
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--test-run", action="store_true") parser.add_argument("--test-run", action="store_true")
parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in") parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in")
parser.add_argument('-f', '--no-videos', nargs='?', default=None, help="Don't download videos") parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos")
parser.add_argument('url', help="URL to the course page") parser.add_argument('url', help="URL to the course page")
parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into") parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into")
args = parser.parse_args() args = parser.parse_args()