Merge pull request #10 from Garmelon/sync-url

Add "Sync url" script from Christophe and release it automatically
2026-02-18 23:02:23 +01:00 · 2020-10-07 09:29:48 +02:00
parent e32a49480b d73c778b0a
commit 43100f69d5
6 changed files with 236 additions and 0 deletions
--- a/.github/workflows/package.yml
+++ b/.github/workflows/package.yml
@@ -0,0 +1,67 @@
+name: Package Application with Pyinstaller
+
+on:
+  push:
+    branches:
+      - "*"
+    tags:
+      - "v*"
+
+jobs:
+  build:
+
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest]
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'
+
+    - name: "Install dependencies"
+      run: "pip install setuptools pyinstaller rich requests beautifulsoup4 -f --upgrade"
+
+    - name: "Install sync_url.py"
+      run: "pyinstaller sync_url.py -F"
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: "Pferd Sync URL"
+        path: "dist/sync_url*"
+
+  release:
+    name: Release
+
+    needs: [build]
+    runs-on: ubuntu-latest
+    if: startsWith(github.ref, 'refs/tags/')
+
+    env:
+      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+    - name: "Checkout"
+      uses: actions/checkout@v2
+
+    - name: "Download artifacts"
+      uses: actions/download-artifact@v2
+      with:
+        name: "Pferd Sync URL"
+
+    - name: "look at folder structure"
+      run: "ls -lah"
+
+    - name: "Create release"
+      uses: softprops/action-gh-release@v1
+
+    - name: "Upload release artifacts"
+      uses: softprops/action-gh-release@v1
+      with:
+        body: "Download sync_url (or sync_url.exe on Windows) and run it in the terminal or CMD."
+        files: |
+          sync_url
+          sync_url.exe
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,7 @@ build/
 .env
 .vscode
 ilias_cookies.txt
+
+# PyInstaller
+sync_url.spec
+dist/
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -116,6 +116,16 @@ class IliasCrawler:

        return urlunsplit((scheme, netloc, path, new_query_string, fragment))

+    def recursive_crawl_url(self, url: str) -> List[IliasDownloadInfo]:
+        """
+        Crawls a given url *and all reachable elements in it*.
+
+        Args:
+            url {str} -- the *full* url to crawl
+        """
+        start_entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), url)
+        return self._iterate_entries_to_download_infos(start_entries)
+
    def crawl_course(self, course_id: str) -> List[IliasDownloadInfo]:
        """
        Starts the crawl process for a course, yielding a list of elements to (potentially)
@@ -235,6 +245,15 @@ class IliasCrawler:
        """
        soup = self._get_page(url, {})

+        if soup.find(id="headerimage"):
+            element: bs4.Tag = soup.find(id="headerimage")
+            if "opencast" in element.attrs["src"].lower():
+                PRETTY.warning(f"Switched to crawling a video at {folder_path}")
+                if not self.dir_filter(folder_path, IliasElementType.VIDEO_FOLDER):
+                    PRETTY.not_searching(folder_path, "user filter")
+                    return []
+                return self._crawl_video_directory(folder_path, url)
+
        result: List[IliasCrawlerEntry] = []

        # Fetch all links and throw them to the general interpreter
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -230,6 +230,70 @@ class Pferd(Location):

        return organizer

+    @swallow_and_print_errors
+    def ilias_kit_folder(
+            self,
+            target: PathLike,
+            full_url: str,
+            dir_filter: IliasDirectoryFilter = lambda x, y: True,
+            transform: Transform = lambda x: x,
+            cookies: Optional[PathLike] = None,
+            username: Optional[str] = None,
+            password: Optional[str] = None,
+            download_strategy: IliasDownloadStrategy = download_modified_or_new,
+            clean: bool = True,
+            timeout: int = 5,
+    ) -> Organizer:
+        """
+        Synchronizes a folder with a given folder on the ILIAS instance of the KIT.
+
+        Arguments:
+            target {Path}  -- the target path to write the data to
+            full_url {str} -- the full url of the folder/videos/course to crawl
+
+        Keyword Arguments:
+            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
+                crawler level, these directories and all of their content is skipped.
+                (default: {lambdax:True})
+            transform {Transform} -- A transformation function for the output paths. Return None
+                to ignore a file. (default: {lambdax:x})
+            cookies {Optional[Path]} -- The path to store and load cookies from.
+                (default: {None})
+            username {Optional[str]} -- The SCC username. If none is given, it will prompt
+                the user. (default: {None})
+            password {Optional[str]} -- The SCC password. If none is given, it will prompt
+                the user. (default: {None})
+            download_strategy {DownloadStrategy} -- A function to determine which files need to
+                be downloaded. Can save bandwidth and reduce the number of requests.
+                (default: {download_modified_or_new})
+            clean {bool} -- Whether to clean up when the method finishes.
+            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
+                requests bug.
+        """
+        # This authenticator only works with the KIT ilias instance.
+        authenticator = KitShibbolethAuthenticator(username=username, password=password)
+        PRETTY.starting_synchronizer(target, "ILIAS", "An ILIAS element by url")
+
+        if not full_url.startswith("https://ilias.studium.kit.edu"):
+            raise FatalException("Not a valid KIT ILIAS URL")
+
+        organizer = self._ilias(
+            target=target,
+            base_url="https://ilias.studium.kit.edu/",
+            crawl_function=lambda crawler: crawler.recursive_crawl_url(full_url),
+            authenticator=authenticator,
+            cookies=cookies,
+            dir_filter=dir_filter,
+            transform=transform,
+            download_strategy=download_strategy,
+            clean=clean,
+            timeout=timeout
+        )
+
+        self._download_summary.merge(organizer.download_summary)
+
+        return organizer
+
    @swallow_and_print_errors
    def diva_kit(
            self,
--- a/README.md
+++ b/README.md
@@ -2,6 +2,7 @@

 **P**rogramm zum **F**lotten, **E**infachen **R**unterladen von **D**ateien

+- [Quickstart with `sync_url`](#quickstart-with-sync_url)
 - [Installation](#installation)
    - [Upgrading from 2.0.0 to 2.1.0+](#upgrading-from-200-to-210)
 - [Example setup](#example-setup)
@@ -12,6 +13,20 @@
        - [Transform combinators](#transform-combinators)
    - [A short, but commented example](#a-short-but-commented-example)

+## Quickstart with `sync_url`
+
+The `sync_url` program allows you to just synchronize a given ILIAS URL (of a
+course, a folder, your personal desktop, etc.) without any extra configuration
+or setting up. Download the program, open ILIAS, copy the URL from the address
+bar and pass it to sync_url.
+
+It bundles everything it needs in one executable and is easy to
+use, but doesn't expose all the configuration options and tweaks a full install
+does.
+
+1. Download the `sync_url` binary from the [latest release](https://github.com/Garmelon/PFERD/releases/latest).
+2. Run the binary in your terminal (`./sync_url` or `sync_url.exe` in the CMD) to see the help and use it
+
 ## Installation

 Ensure that you have at least Python 3.8 installed.
--- a/sync_url.py
+++ b/sync_url.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+"""
+A simple script to download a course by name from ILIAS.
+"""
+
+import argparse
+from pathlib import Path
+from urllib.parse import urlparse
+
+from PFERD import Pferd
+from PFERD.cookie_jar import CookieJar
+from PFERD.ilias import (IliasCrawler, IliasElementType,
+                         KitShibbolethAuthenticator)
+from PFERD.utils import to_path
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test-run", action="store_true")
+    parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in")
+    parser.add_argument('-f', '--no-videos', nargs='?', default=None, help="Don't download videos")
+    parser.add_argument('url', help="URL to the course page")
+    parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into")
+    args = parser.parse_args()
+
+    url = urlparse(args.url)
+
+    cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
+    session = cookie_jar.create_session()
+    authenticator = KitShibbolethAuthenticator()
+    crawler = IliasCrawler(url.scheme + '://' + url.netloc, session,
+                           authenticator, lambda x, y: True)
+
+    cookie_jar.load_cookies()
+
+    if args.folder is not None:
+        folder = args.folder
+        # Initialize pferd at the *parent of the passed folder*
+        # This is needed so Pferd's internal protections against escaping the working directory
+        # do not trigger (e.g. if somebody names a file in ILIAS '../../bad thing.txt')
+        pferd = Pferd(Path(Path(__file__).parent, folder).parent, test_run=args.test_run)
+    else:
+        # fetch course name from ilias
+        folder = crawler.find_element_name(args.url)
+        cookie_jar.save_cookies()
+
+        # Initialize pferd at the location of the script
+        pferd = Pferd(Path(__file__).parent, test_run=args.test_run)
+
+    def dir_filter(_: Path, element: IliasElementType) -> bool:
+        if args.no_videos:
+            return element not in [IliasElementType.VIDEO_FILE, IliasElementType.VIDEO_FOLDER]
+        return True
+
+    pferd.enable_logging()
+    # fetch
+    pferd.ilias_kit_folder(
+        target=folder,
+        full_url=args.url,
+        cookies=args.cookies,
+        dir_filter=dir_filter
+    )
+
+
+if __name__ == "__main__":
+    main()