Adjust release names

Explain how to run sync_url on Mac
Fix a few typos in release body
2023-12-21 10:23:01 +01:00 · 2020-10-30 18:14:02 +01:00 · 2020-10-30 17:53:55 +01:00 · 2020-10-30 17:32:04 +01:00 · 2020-10-30 17:28:34 +01:00 · 2020-10-30 17:23:27 +01:00
18 changed files with 946 additions and 263 deletions
--- a/.github/workflows/package.yml
+++ b/.github/workflows/package.yml
@ -0,0 +1,74 @@
 name: Package Application with Pyinstaller
 on:
  push:
    branches:
      - "*"
    tags:
      - "v*"
 jobs:
  build:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
    steps:
    - uses: actions/checkout@v2
    - uses: actions/setup-python@v2
      with:
        python-version: '3.x'
    - name: "Install dependencies"
      run: "pip install setuptools pyinstaller rich requests beautifulsoup4 -f --upgrade"
    - name: "Install sync_url.py"
      run: "pyinstaller sync_url.py -F"
    - name: "Move artifact"
      run: "mv dist/sync_url* dist/sync_url-${{ matrix.os }}"
    - uses: actions/upload-artifact@v2
      with:
        name: "Pferd Sync URL"
        path: "dist/sync_url*"
  release:
    name: Release
    needs: [build]
    runs-on: ubuntu-latest
    if: startsWith(github.ref, 'refs/tags/')
    env:
      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
    steps:
    - name: "Checkout"
      uses: actions/checkout@v2
    - name: "Download artifacts"
      uses: actions/download-artifact@v2
      with:
        name: "Pferd Sync URL"
    - name: "look at folder structure"
      run: "ls -lah"
    - name: "Rename releases"
      run: "mv sync_url-macos-latest pferd_sync_url_mac && mv sync_url-ubuntu-latest pferd_sync_url_linux && mv sync_url-windows-latest pferd_sync_url.exe"
    - name: "Create release"
      uses: softprops/action-gh-release@v1
    - name: "Upload release artifacts"
      uses: softprops/action-gh-release@v1
      with:
        body: "Download the correct sync_url for your platform and run it in the terminal or CMD. You might need to make it executable on Linux/Mac with `chmod +x <file>`."
        files: |
          pferd_sync_url_mac
          pferd_sync_url_linux
          pferd_sync_url.exe
--- a/.gitignore
+++ b/.gitignore
@ -1,7 +1,14 @@
 __pycache__/
 .venv/
 venv/
 .idea/
 build/
 .mypy_cache/
 .tmp/
 .env
 .vscode
 ilias_cookies.txt
 # PyInstaller
 sync_url.spec
 dist/
--- a/18
+++ b/18
@ -0,0 +1,18 @@
 Copyright 2019-2020 Garmelon, I-Al-Istannen, danstooamerican, pavelzw
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
 the Software without restriction, including without limitation the rights to
 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 the Software, and to permit persons to whom the Software is furnished to do so,
 subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/PFERD/cookie_jar.py
+++ b/PFERD/cookie_jar.py
@ -22,7 +22,7 @@ class CookieJar:
        if cookie_file is None:
            self._cookies = LWPCookieJar()
        else:
-            self._cookies = LWPCookieJar(cookie_file)
+            self._cookies = LWPCookieJar(str(cookie_file.resolve()))
    @property
    def cookies(self) -> LWPCookieJar:
--- a/PFERD/download_summary.py
+++ b/PFERD/download_summary.py
@ -0,0 +1,69 @@
 """
 Provides a summary that keeps track of new modified or deleted files.
 """
 from pathlib import Path
 from typing import List
 class DownloadSummary:
    """
    Keeps track of all new, modified or deleted files and provides a summary.
    """
    def __init__(self) -> None:
        self._new_files: List[Path] = []
        self._modified_files: List[Path] = []
        self._deleted_files: List[Path] = []
    @property
    def new_files(self) -> List[Path]:
        """
        Returns all new files.
        """
        return self._new_files.copy()
    @property
    def modified_files(self) -> List[Path]:
        """
        Returns all modified files.
        """
        return self._modified_files.copy()
    @property
    def deleted_files(self) -> List[Path]:
        """
        Returns all deleted files.
        """
        return self._deleted_files.copy()
    def merge(self, summary: 'DownloadSummary') -> None:
        """
        Merges ourselves with the passed summary. Modifies this object, but not the passed one.
        """
        self._new_files += summary.new_files
        self._modified_files += summary.modified_files
        self._deleted_files += summary.deleted_files
    def add_deleted_file(self, path: Path) -> None:
        """
        Registers a file as deleted.
        """
        self._deleted_files.append(path)
    def add_modified_file(self, path: Path) -> None:
        """
        Registers a file as changed.
        """
        self._modified_files.append(path)
    def add_new_file(self, path: Path) -> None:
        """
        Registers a file as new.
        """
        self._new_files.append(path)
    def has_updates(self) -> bool:
        """
        Returns whether this summary has any updates.
        """
        return bool(self._new_files or self._modified_files or self._deleted_files)
--- a/PFERD/ilias/init.py
+++ b/PFERD/ilias/init.py
@ -3,7 +3,8 @@ Synchronizing files from ILIAS instances (https://www.ilias.de/).
 """
 from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
-from .crawler import IliasCrawler, IliasDirectoryFilter, IliasDirectoryType
+from .crawler import (IliasCrawler, IliasCrawlerEntry, IliasDirectoryFilter,
                      IliasElementType)
 from .downloader import (IliasDownloader, IliasDownloadInfo,
                         IliasDownloadStrategy, download_everything,
                         download_modified_or_new)
--- a/PFERD/ilias/authenticators.py
+++ b/PFERD/ilias/authenticators.py
@ -67,7 +67,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
        while not self._login_successful(soup):
            # Searching the form here so that this fails before asking for
            # credentials rather than after asking.
-            form = soup.find("form", {"class": "form2", "method": "post"})
+            form = soup.find("form", {"class": "full content", "method": "post"})
            action = form["action"]
            # Equivalent: Enter credentials in
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@ -8,7 +8,7 @@ import logging
 import re
 from enum import Enum
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
                          urlunsplit)
@ -26,16 +26,58 @@ LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
-class IliasDirectoryType(Enum):
+class IliasElementType(Enum):
    """
-    The type of an ilias directory.
+    The type of an ilias element.
    """
-    FOLDER = "FOLDER"
+    REGULAR_FOLDER = "REGULAR_FOLDER"
-    VIDEO = "VIDEO"
+    VIDEO_FOLDER = "VIDEO_FOLDER"
-    EXERCISE = "EXERCISE"
+    EXERCISE_FOLDER = "EXERCISE_FOLDER"
    REGULAR_FILE = "REGULAR_FILE"
    VIDEO_FILE = "VIDEO_FILE"
    FORUM = "FORUM"
    EXTERNAL_LINK = "EXTERNAL_LINK"
    def is_folder(self) -> bool:
        """
        Returns whether this type is some kind of folder.
        """
        return "FOLDER" in str(self.name)
-IliasDirectoryFilter = Callable[[Path, IliasDirectoryType], bool]
+IliasDirectoryFilter = Callable[[Path, IliasElementType], bool]
 class IliasCrawlerEntry:
    # pylint: disable=too-few-public-methods
    """
    An ILIAS crawler entry used internally to find, catalogue and recursively crawl elements.
    """
    def __init__(
            self,
            path: Path,
            url: Union[str, Callable[[], Optional[str]]],
            entry_type: IliasElementType,
            modification_date: Optional[datetime.datetime]
    ):
        self.path = path
        if isinstance(url, str):
            str_url = url
            self.url: Callable[[], Optional[str]] = lambda: str_url
        else:
            self.url = url
        self.entry_type = entry_type
        self.modification_date = modification_date
    def to_download_info(self) -> Optional[IliasDownloadInfo]:
        """
        Converts this crawler entry to an IliasDownloadInfo, if possible.
        This method will only succeed for *File* types.
        """
        if self.entry_type in [IliasElementType.REGULAR_FILE, IliasElementType.VIDEO_FILE]:
            return IliasDownloadInfo(self.path, self.url, self.modification_date)
        return None
 class IliasCrawler:
@ -62,12 +104,6 @@ class IliasCrawler:
        self._authenticator = authenticator
        self.dir_filter = dir_filter
    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
        """
        Create an absolute url from an <a> tag.
        """
        return urljoin(self._base_url, link_tag.get("href"))
    @staticmethod
    def _url_set_query_param(url: str, param: str, value: str) -> str:
        """
@ -80,6 +116,16 @@ class IliasCrawler:
        return urlunsplit((scheme, netloc, path, new_query_string, fragment))
    def recursive_crawl_url(self, url: str) -> List[IliasDownloadInfo]:
        """
        Crawls a given url *and all reachable elements in it*.
        Args:
            url {str} -- the *full* url to crawl
        """
        start_entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), url)
        return self._iterate_entries_to_download_infos(start_entries)
    def crawl_course(self, course_id: str) -> List[IliasDownloadInfo]:
        """
        Starts the crawl process for a course, yielding a list of elements to (potentially)
@ -98,15 +144,43 @@ class IliasCrawler:
        if not self._is_course_id_valid(root_url, course_id):
            raise FatalException(
-                "Invalid course id? The URL the server returned did not contain my id."
+                "Invalid course id? I didn't find anything looking like a course!"
            )
        # And treat it as a folder
-        return self._crawl_folder(Path(""), root_url)
+        entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), root_url)
        return self._iterate_entries_to_download_infos(entries)
    def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
        response: requests.Response = self._session.get(root_url)
-        return course_id in response.url
+        # We were redirected ==> Non-existant ID
        if course_id not in response.url:
            return False
        link_element: bs4.Tag = self._get_page(root_url, {}).find(id="current_perma_link")
        if not link_element:
            return False
        # It wasn't a course but a category list, forum, etc.
        return "crs_" in link_element.get("value")
    def find_course_name(self, course_id: str) -> Optional[str]:
        """
        Returns the name of a given course. None if it is not a valid course
        or it could not be found.
        """
        course_url = self._url_set_query_param(
            self._base_url + "/goto.php", "target", f"crs_{course_id}"
        )
        return self.find_element_name(course_url)
    def find_element_name(self, url: str) -> Optional[str]:
        """
        Returns the name of the element at the given URL, if it can find one.
        """
        focus_element: bs4.Tag = self._get_page(url, {}).find(id="il_mhead_t_focus")
        if not focus_element:
            return None
        return focus_element.text
    def crawl_personal_desktop(self) -> List[IliasDownloadInfo]:
        """
@ -115,14 +189,101 @@ class IliasCrawler:
        Raises:
            FatalException: if an unrecoverable error occurs
        """
-        return self._crawl_folder(Path(""), self._base_url + "?baseClass=ilPersonalDesktopGUI")
+        entries: List[IliasCrawlerEntry] = self._crawl_folder(
            Path(""), self._base_url + "?baseClass=ilPersonalDesktopGUI"
        )
        return self._iterate_entries_to_download_infos(entries)
-    def _switch_on_crawled_type(
+    def _iterate_entries_to_download_infos(
            self,
            entries: List[IliasCrawlerEntry]
    ) -> List[IliasDownloadInfo]:
        result: List[IliasDownloadInfo] = []
        entries_to_process: List[IliasCrawlerEntry] = entries.copy()
        while len(entries_to_process) > 0:
            entry = entries_to_process.pop()
            if entry.entry_type == IliasElementType.EXTERNAL_LINK:
                PRETTY.not_searching(entry.path, "external link")
                continue
            if entry.entry_type == IliasElementType.FORUM:
                PRETTY.not_searching(entry.path, "forum")
                continue
            if entry.entry_type.is_folder() and not self.dir_filter(entry.path, entry.entry_type):
                PRETTY.not_searching(entry.path, "user filter")
                continue
            download_info = entry.to_download_info()
            if download_info is not None:
                result.append(download_info)
                continue
            url = entry.url()
            if url is None:
                PRETTY.warning(f"Could not find url for {str(entry.path)!r}, skipping it")
                continue
            PRETTY.searching(entry.path)
            if entry.entry_type == IliasElementType.EXERCISE_FOLDER:
                entries_to_process += self._crawl_exercises(entry.path, url)
                continue
            if entry.entry_type == IliasElementType.REGULAR_FOLDER:
                entries_to_process += self._crawl_folder(entry.path, url)
                continue
            if entry.entry_type == IliasElementType.VIDEO_FOLDER:
                entries_to_process += self._crawl_video_directory(entry.path, url)
                continue
        return result
    def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]:
        """
        Crawl all files in a folder-like element.
        """
        soup = self._get_page(url, {})
        if soup.find(id="headerimage"):
            element: bs4.Tag = soup.find(id="headerimage")
            if "opencast" in element.attrs["src"].lower():
                PRETTY.warning(f"Switched to crawling a video at {folder_path}")
                if not self.dir_filter(folder_path, IliasElementType.VIDEO_FOLDER):
                    PRETTY.not_searching(folder_path, "user filter")
                    return []
                return self._crawl_video_directory(folder_path, url)
        result: List[IliasCrawlerEntry] = []
        # Fetch all links and throw them to the general interpreter
        links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
        for link in links:
            abs_url = self._abs_url_from_link(link)
            element_path = Path(folder_path, link.getText().strip())
            element_type = self._find_type_from_link(element_path, link, abs_url)
            if element_type == IliasElementType.REGULAR_FILE:
                result += self._crawl_file(folder_path, link, abs_url)
            elif element_type is not None:
                result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
            else:
                PRETTY.warning(f"Found element without a type at {str(element_path)!r}")
        return result
    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
        """
        Create an absolute url from an <a> tag.
        """
        return urljoin(self._base_url, link_tag.get("href"))
    @staticmethod
    def _find_type_from_link(
            path: Path,
            link_element: bs4.Tag,
            url: str
-    ) -> List[IliasDownloadInfo]:
+    ) -> Optional[IliasElementType]:
        """
        Decides which sub crawler to use for a given top level element.
        """
@ -131,28 +292,64 @@ class IliasCrawler:
        # file URLs contain "target=file"
        if "target=file_" in parsed_url.query:
-            LOGGER.debug("Interpreted as file.")
+            return IliasElementType.REGULAR_FILE
            return self._crawl_file(path, link_element, url)
        # Skip forums
        if "cmd=showThreads" in parsed_url.query:
-            LOGGER.debug("Skipping forum %r", url)
+            return IliasElementType.FORUM
            return []
        # Everything with a ref_id can *probably* be opened to reveal nested things
        # video groups, directories, exercises, etc
        if "ref_id=" in parsed_url.query:
-            LOGGER.debug("Processing folder-like...")
+            return IliasCrawler._find_type_from_folder_like(link_element, url)
            return self._switch_on_folder_like(path, link_element, url)
        PRETTY.warning(
-            "Got unkwarning element type in switch. I am not sure what horror I found on the"
+            "Got unknown element type in switch. I am not sure what horror I found on the"
            f" ILIAS page. The element was at {str(path)!r} and it is {link_element!r})"
        )
-        return []
+        return None
    @staticmethod
-    def _crawl_file(path: Path, link_element: bs4.Tag, url: str) -> List[IliasDownloadInfo]:
+    def _find_type_from_folder_like(link_element: bs4.Tag, url: str) -> Optional[IliasElementType]:
        """
        Try crawling something that looks like a folder.
        """
        # pylint: disable=too-many-return-statements
        # We look for the outer div of our inner link, to find information around it
        # (mostly the icon)
        for parent in link_element.parents:
            if "ilContainerListItemOuter" in parent["class"]:
                found_parent = parent
                break
        if found_parent is None:
            PRETTY.warning(f"Could not find element icon for {url!r}")
            return None
        # Find the small descriptive icon to figure out the type
        img_tag: Optional[bs4.Tag] = found_parent.select_one("img.ilListItemIcon")
        if img_tag is None:
            PRETTY.warning(f"Could not find image tag for {url!r}")
            return None
        if "opencast" in str(img_tag["alt"]).lower():
            return IliasElementType.VIDEO_FOLDER
        if str(img_tag["src"]).endswith("icon_exc.svg"):
            return IliasElementType.EXERCISE_FOLDER
        if str(img_tag["src"]).endswith("icon_webr.svg"):
            return IliasElementType.EXTERNAL_LINK
        if str(img_tag["src"]).endswith("frm.svg"):
            return IliasElementType.FORUM
        return IliasElementType.REGULAR_FOLDER
    @staticmethod
    def _crawl_file(path: Path, link_element: bs4.Tag, url: str) -> List[IliasCrawlerEntry]:
        """
        Crawls a file.
        """
@ -183,80 +380,11 @@ class IliasCrawler:
        name = link_element.getText()
        full_path = Path(path, name + "." + file_type)
-        return [IliasDownloadInfo(full_path, url, modification_date)]
+        return [
            IliasCrawlerEntry(full_path, url, IliasElementType.REGULAR_FILE, modification_date)
        ]
-    def _switch_on_folder_like(
+    def _crawl_video_directory(self, video_dir_path: Path, url: str) -> List[IliasCrawlerEntry]:
            self,
            parent_path: Path,
            link_element: bs4.Tag,
            url: str
    ) -> List[IliasDownloadInfo]:
        """
        Try crawling something that looks like a folder.
        """
        # pylint: disable=too-many-return-statements
        element_path = Path(parent_path, link_element.getText().strip())
        found_parent: Optional[bs4.Tag] = None
        # We look for the outer div of our inner link, to find information around it
        # (mostly the icon)
        for parent in link_element.parents:
            if "ilContainerListItemOuter" in parent["class"]:
                found_parent = parent
                break
        if found_parent is None:
            PRETTY.warning(f"Could not find element icon for {url!r}")
            return []
        # Find the small descriptive icon to figure out the type
        img_tag: Optional[bs4.Tag] = found_parent.select_one("img.ilListItemIcon")
        if img_tag is None:
            PRETTY.warning(f"Could not find image tag for {url!r}")
            return []
        directory_type = IliasDirectoryType.FOLDER
        if "opencast" in str(img_tag["alt"]).lower():
            directory_type = IliasDirectoryType.VIDEO
        if str(img_tag["src"]).endswith("icon_exc.svg"):
            directory_type = IliasDirectoryType.EXERCISE
        if not self.dir_filter(element_path, directory_type):
            PRETTY.not_searching(element_path, "user filter")
            return []
        PRETTY.searching(element_path)
        # A forum
        if str(img_tag["src"]).endswith("frm.svg"):
            LOGGER.debug("Skipping forum at %r", url)
            PRETTY.not_searching(element_path, "forum")
            return []
        # An exercise
        if directory_type == IliasDirectoryType.EXERCISE:
            LOGGER.debug("Crawling exercises at %r", url)
            return self._crawl_exercises(element_path, url)
        if str(img_tag["src"]).endswith("icon_webr.svg"):
            LOGGER.debug("Skipping external link at %r", url)
            PRETTY.not_searching(element_path, "external link")
            return []
        # Match the opencast video plugin
        if directory_type == IliasDirectoryType.VIDEO:
            LOGGER.debug("Found video site: %r", url)
            return self._crawl_video_directory(element_path, url)
        # Assume it is a folder
        return self._crawl_folder(element_path, self._abs_url_from_link(link_element))
    def _crawl_video_directory(self, video_dir_path: Path, url: str) -> List[IliasDownloadInfo]:
        """
        Crawl the video overview site.
        """
@ -272,6 +400,71 @@ class IliasCrawler:
            {"limit": 800, "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
        )
        # If we find a page selected, we probably need to respect pagination
        if self._is_paginated_video_page(video_list_soup):
            second_stage_url = self._abs_url_from_link(content_link)
            return self._crawl_paginated_video_directory(
                video_dir_path, video_list_soup, second_stage_url
            )
        return self._crawl_video_directory_second_stage(video_dir_path, video_list_soup)
    @staticmethod
    def _is_paginated_video_page(soup: bs4.BeautifulSoup) -> bool:
        return soup.find(id=re.compile(r"tab_page_sel.+")) is not None
    def _crawl_paginated_video_directory(
            self,
            video_dir_path: Path,
            paged_video_list_soup: bs4.BeautifulSoup,
            second_stage_url: str
    ) -> List[IliasCrawlerEntry]:
        LOGGER.info("Found paginated video page, trying 800 elements")
        # Try to find the table id. This can be used to build the query parameter indicating
        # you want 800 elements
        table_element: bs4.Tag = paged_video_list_soup.find(
            name="table", id=re.compile(r"tbl_xoct_.+")
        )
        if table_element is None:
            PRETTY.warning(
                "Could not increase elements per page (table not found)."
                " Some might not be crawled!"
            )
            return self._crawl_video_directory_second_stage(video_dir_path, paged_video_list_soup)
        match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
        if match is None:
            PRETTY.warning(
                "Could not increase elements per page (table id not found)."
                " Some might not be crawled!"
            )
            return self._crawl_video_directory_second_stage(video_dir_path, paged_video_list_soup)
        table_id = match.group(1)
        extended_video_page = self._get_page(
            second_stage_url,
            {f"tbl_xoct_{table_id}_trows": 800, "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
        )
        if self._is_paginated_video_page(extended_video_page):
            PRETTY.warning(
                "800 elements do not seem to be enough (or I failed to fetch that many)."
                " I will miss elements."
            )
        return self._crawl_video_directory_second_stage(video_dir_path, extended_video_page)
    def _crawl_video_directory_second_stage(
            self,
            video_dir_path: Path,
            video_list_soup: bs4.BeautifulSoup
    ) -> List[IliasCrawlerEntry]:
        """
        Crawls the "second stage" video page. This page contains the actual video urls.
        """
        direct_download_links: List[bs4.Tag] = video_list_soup.findAll(
            name="a", text=re.compile(r"\s*Download\s*")
        )
@ -281,10 +474,11 @@ class IliasCrawler:
            name="a", text=re.compile(r"\s*Abspielen\s*")
        )
-        results: List[IliasDownloadInfo] = []
+        results: List[IliasCrawlerEntry] = []
        # We can download everything directly!
-        if len(direct_download_links) == len(video_links):
+        # FIXME: Sadly the download button is currently broken, so never do that
        if False and len(direct_download_links) == len(video_links):
            for link in direct_download_links:
                results += self._crawl_single_video(video_dir_path, link, True)
        else:
@ -298,7 +492,7 @@ class IliasCrawler:
            parent_path: Path,
            link: bs4.Tag,
            direct_download: bool
-    ) -> List[IliasDownloadInfo]:
+    ) -> List[IliasCrawlerEntry]:
        """
        Crawl a single video based on its "Abspielen" link from the video listing.
        """
@ -316,46 +510,54 @@ class IliasCrawler:
        video_path: Path = Path(parent_path, title)
        video_url = self._abs_url_from_link(link)
        # The video had a direct download button we can use instead
        if direct_download:
            LOGGER.debug("Using direct download for video %r", str(video_path))
-            return [IliasDownloadInfo(
+            return [IliasCrawlerEntry(
-                video_path,
+                video_path, video_url, IliasElementType.VIDEO_FILE, modification_time
                self._abs_url_from_link(link),
                modification_time
            )]
-        # Fetch the actual video page. This is a small wrapper page initializing a javscript
+        return [IliasCrawlerEntry(
-        # player. Sadly we can not execute that JS. The actual video stream url is nowhere
+            video_path,
-        # on the page, but defined in a JS object inside a script tag, passed to the player
+            self._crawl_video_url_from_play_link(video_url),
-        # library.
+            IliasElementType.VIDEO_FILE,
-        # We do the impossible and RegEx the stream JSON object out of the page's HTML source
+            modification_time
-        video_page_url = self._abs_url_from_link(link)
+        )]
        video_page_soup = self._get_page(video_page_url, {})
        regex: re.Pattern = re.compile(
            r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
        )
        json_match = regex.search(str(video_page_soup))
-        if json_match is None:
+    def _crawl_video_url_from_play_link(self, play_url: str) -> Callable[[], Optional[str]]:
-            PRETTY.warning(f"Could not find json stream info for {video_page_url!r}")
+        def inner() -> Optional[str]:
-            return []
+            # Fetch the actual video page. This is a small wrapper page initializing a javscript
-        json_str = json_match.group(1)
+            # player. Sadly we can not execute that JS. The actual video stream url is nowhere
            # on the page, but defined in a JS object inside a script tag, passed to the player
            # library.
            # We do the impossible and RegEx the stream JSON object out of the page's HTML source
            video_page_soup = soupify(self._session.get(play_url))
            regex: re.Pattern = re.compile(
                r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
            )
            json_match = regex.search(str(video_page_soup))
-        # parse it
+            if json_match is None:
-        json_object = json.loads(json_str)
+                PRETTY.warning(f"Could not find json stream info for {play_url!r}")
-        # and fetch the video url!
+                return None
-        video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
+            json_str = json_match.group(1)
-        return [IliasDownloadInfo(video_path, video_url, modification_time)]
+            # parse it
            json_object = json.loads(json_str)
            # and fetch the video url!
            video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
            return video_url
        return inner
-    def _crawl_exercises(self, element_path: Path, url: str) -> List[IliasDownloadInfo]:
+    def _crawl_exercises(self, element_path: Path, url: str) -> List[IliasCrawlerEntry]:
        """
        Crawl files offered for download in exercises.
        """
        soup = self._get_page(url, {})
-        results: List[IliasDownloadInfo] = []
+        results: List[IliasCrawlerEntry] = []
        # Each assignment is in an accordion container
        assignment_containers: List[bs4.Tag] = soup.select(".il_VAccordionInnerContainer")
@ -382,30 +584,15 @@ class IliasCrawler:
                LOGGER.debug("Found file %r at %r", file_name, url)
-                results.append(IliasDownloadInfo(
+                results.append(IliasCrawlerEntry(
                    Path(element_path, container_name, file_name),
                    url,
                    IliasElementType.REGULAR_FILE,
                    None  # We do not have any timestamp
                ))
        return results
    def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasDownloadInfo]:
        """
        Crawl all files in a folder-like element.
        """
        soup = self._get_page(url, {})
        result: List[IliasDownloadInfo] = []
        # Fetch all links and throw them to the general interpreter
        links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
        for link in links:
            abs_url = self._abs_url_from_link(link)
            result += self._switch_on_crawled_type(folder_path, link, abs_url)
        return result
    def _get_page(self, url: str, params: Dict[str, Any]) -> bs4.BeautifulSoup:
        """
        Fetches a page from ILIAS, authenticating when needed.
@ -416,8 +603,10 @@ class IliasCrawler:
        content_type = response.headers["content-type"]
        if not content_type.startswith("text/html"):
-            # TODO: Correct exception type
+            raise FatalException(
-            raise Exception(f"Invalid content type {content_type}")
+                f"Invalid content type {content_type} when crawling ilias page"
                " {url!r} with {params!r}"
            )
        soup = soupify(response)
--- a/PFERD/ilias/downloader.py
+++ b/PFERD/ilias/downloader.py
@ -2,9 +2,10 @@
 import datetime
 import logging
-from dataclasses import dataclass
+import math
-from pathlib import Path
+import os
-from typing import Callable, List, Optional
+from pathlib import Path, PurePath
 from typing import Callable, List, Optional, Union
 import bs4
 import requests
@ -24,15 +25,24 @@ class ContentTypeException(Exception):
    """Thrown when the content type of the ilias element can not be handled."""
@dataclass
 class IliasDownloadInfo(Transformable):
    """
    This class describes a single file to be downloaded.
    """
-    url: str
+    def __init__(
-    modification_date: Optional[datetime.datetime]
+            self,
-    # parameters: Dict[str, Any] = field(default_factory=dict)
+            path: PurePath,
            url: Union[str, Callable[[], Optional[str]]],
            modifcation_date: Optional[datetime.datetime]
    ):
        super().__init__(path)
        if isinstance(url, str):
            string_url = url
            self.url: Callable[[], Optional[str]] = lambda: string_url
        else:
            self.url = url
        self.modification_date = modifcation_date
 IliasDownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
@ -74,9 +84,13 @@ class IliasDownloader:
            session: requests.Session,
            authenticator: IliasAuthenticator,
            strategy: IliasDownloadStrategy,
            timeout: int = 5
    ):
        """
        Create a new IliasDownloader.
        The timeout applies to the download request only, as bwcloud uses IPv6
        and requests has a problem with that: https://github.com/psf/requests/issues/5522
        """
        self._tmp_dir = tmp_dir
@ -84,6 +98,7 @@ class IliasDownloader:
        self._session = session
        self._authenticator = authenticator
        self._strategy = strategy
        self._timeout = timeout
    def download_all(self, infos: List[IliasDownloadInfo]) -> None:
        """
@ -108,16 +123,30 @@ class IliasDownloader:
        tmp_file = self._tmp_dir.new_path()
        while not self._try_download(info, tmp_file):
            LOGGER.info("Retrying download: %r", info)
            self._authenticator.authenticate(self._session)
-        self._organizer.accept_file(tmp_file, info.path)
+        dst_path = self._organizer.accept_file(tmp_file, info.path)
        if dst_path and info.modification_date:
            os.utime(
                dst_path,
                times=(
                    math.ceil(info.modification_date.timestamp()),
                    math.ceil(info.modification_date.timestamp())
                )
            )
    def _try_download(self, info: IliasDownloadInfo, target: Path) -> bool:
-        with self._session.get(info.url, stream=True) as response:
+        url = info.url()
-            content_type = response.headers["content-type"]
+        if url is None:
            PRETTY.warning(f"Could not download {str(info.path)!r} as I got no URL :/")
            return True
-            if content_type.startswith("text/html"):
+        with self._session.get(url, stream=True, timeout=self._timeout) as response:
-                # Dangit, we're probably not logged in.
+            content_type = response.headers["content-type"]
            has_content_disposition = "content-disposition" in response.headers
            if content_type.startswith("text/html") and not has_content_disposition:
                if self._is_logged_in(soupify(response)):
                    raise ContentTypeException("Attempting to download a web page, not a file")
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@ -3,14 +3,18 @@ Contains a few logger utility functions and implementations.
 """
 import logging
-from typing import Optional
+from pathlib import Path
 from typing import List, Optional
 from rich import print as rich_print
 from rich._log_render import LogRender
 from rich.console import Console
 from rich.panel import Panel
 from rich.style import Style
 from rich.text import Text
 from rich.theme import Theme
 from .download_summary import DownloadSummary
 from .utils import PathLike, to_path
 STYLE = "{"
@ -111,6 +115,15 @@ class PrettyLogger:
            f"[bold green]Created {self._format_path(path)}.[/bold green]"
        )
    def deleted_file(self, path: PathLike) -> None:
        """
        A file has been deleted.
        """
        self.logger.info(
            f"[bold red]Deleted {self._format_path(path)}.[/bold red]"
        )
    def ignored_file(self, path: PathLike, reason: str) -> None:
        """
        File was not downloaded or modified.
@ -138,6 +151,23 @@ class PrettyLogger:
            f"([/dim]{reason}[dim]).[/dim]"
        )
    def summary(self, download_summary: DownloadSummary) -> None:
        """
        Prints a download summary.
        """
        self.logger.info("")
        self.logger.info("[bold cyan]Download Summary[/bold cyan]")
        if not download_summary.has_updates():
            self.logger.info("[bold dim]Nothing changed![/bold dim]")
            return
        for new_file in download_summary.new_files:
            self.new_file(new_file)
        for modified_file in download_summary.modified_files:
            self.modified_file(modified_file)
        for deleted_files in download_summary.deleted_files:
            self.deleted_file(deleted_files)
    def starting_synchronizer(
            self,
            target_directory: PathLike,
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@ -5,10 +5,12 @@ A organizer is bound to a single directory.
 import filecmp
 import logging
 import os
 import shutil
 from pathlib import Path, PurePath
-from typing import List, Set
+from typing import List, Optional, Set
 from .download_summary import DownloadSummary
 from .location import Location
 from .logging import PrettyLogger
 from .utils import prompt_yes_no
@ -32,10 +34,28 @@ class Organizer(Location):
        # Keep the root dir
        self._known_files.add(path.resolve())
-    def accept_file(self, src: Path, dst: PurePath) -> None:
+        self.download_summary = DownloadSummary()
-        """Move a file to this organizer and mark it."""
+
-        src_absolute = src.resolve()
+    def accept_file(self, src: Path, dst: PurePath) -> Optional[Path]:
-        dst_absolute = self.resolve(dst)
+        """
        Move a file to this organizer and mark it.
        Returns the path the file was moved to, to allow the caller to adjust the metadata.
        As you might still need to adjust the metadata when the file was identical
        (e.g. update the timestamp), the path is also returned in this case.
        In all other cases (ignored, not overwritten, etc.) this method returns None.
        """
        # Windows limits the path length to 260 for *some* historical reason
        # If you want longer paths, you will have to add the "\\?\" prefix in front of
        # your path...
        # See:
        # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#maximum-path-length-limitation
        if os.name == 'nt':
            src_absolute = Path("\\\\?\\" + str(src.resolve()))
            dst_absolute = Path("\\\\?\\" + str(self.resolve(dst)))
        else:
            src_absolute = src.resolve()
            dst_absolute = self.resolve(dst)
        if not src_absolute.exists():
            raise FileAcceptException("Source file does not exist")
@ -49,7 +69,7 @@ class Organizer(Location):
            PRETTY.warning(f"File {str(dst_absolute)!r} was already written!")
            if not prompt_yes_no(f"Overwrite file?", default=False):
                PRETTY.ignored_file(dst_absolute, "file was written previously")
-                return
+                return None
        # Destination file is directory
        if dst_absolute.exists() and dst_absolute.is_dir():
@ -57,7 +77,7 @@ class Organizer(Location):
                shutil.rmtree(dst_absolute)
            else:
                PRETTY.warning(f"Could not add file {str(dst_absolute)!r}")
-                return
+                return None
        # Destination file exists
        if dst_absolute.exists() and dst_absolute.is_file():
@ -65,10 +85,12 @@ class Organizer(Location):
                # Bail out, nothing more to do
                PRETTY.ignored_file(dst_absolute, "same file contents")
                self.mark(dst)
-                return
+                return dst_absolute
            self.download_summary.add_modified_file(dst_absolute)
            PRETTY.modified_file(dst_absolute)
        else:
            self.download_summary.add_new_file(dst_absolute)
            PRETTY.new_file(dst_absolute)
        # Create parent dir if needed
@ -80,6 +102,8 @@ class Organizer(Location):
        self.mark(dst)
        return dst_absolute
    def mark(self, path: PurePath) -> None:
        """Mark a file as used so it will not get cleaned up."""
        absolute_path = self.resolve(path)
@ -115,9 +139,9 @@ class Organizer(Location):
        if start_dir.resolve() not in self._known_files and dir_empty:
            start_dir.rmdir()
-    @staticmethod
+    def _delete_file_if_confirmed(self, path: Path) -> None:
    def _delete_file_if_confirmed(path: Path) -> None:
        prompt = f"Do you want to delete {path}"
        if prompt_yes_no(prompt, False):
            self.download_summary.add_deleted_file(path)
            path.unlink()
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@ -9,6 +9,7 @@ from typing import Callable, List, Optional, Union
 from .cookie_jar import CookieJar
 from .diva import (DivaDownloader, DivaDownloadStrategy, DivaPlaylistCrawler,
                   diva_download_new)
 from .download_summary import DownloadSummary
 from .errors import FatalException, swallow_and_print_errors
 from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
                    IliasDownloader, IliasDownloadInfo, IliasDownloadStrategy,
@ -42,10 +43,10 @@ class Pferd(Location):
    ):
        super().__init__(Path(base_dir))
        self._download_summary = DownloadSummary()
        self._tmp_dir = TmpDir(self.resolve(tmp_dir))
        self._test_run = test_run
    @staticmethod
    def enable_logging() -> None:
        """
@ -54,7 +55,6 @@ class Pferd(Location):
        enable_logging()
    @staticmethod
    def _print_transformables(transformables: List[TF]) -> None:
        LOGGER.info("")
@ -72,7 +72,8 @@ class Pferd(Location):
            dir_filter: IliasDirectoryFilter,
            transform: Transform,
            download_strategy: IliasDownloadStrategy,
-            clean: bool = True
+            timeout: int,
            clean: bool = True,
    ) -> Organizer:
        # pylint: disable=too-many-locals
        cookie_jar = CookieJar(to_path(cookies) if cookies else None)
@ -81,7 +82,8 @@ class Pferd(Location):
        organizer = Organizer(self.resolve(to_path(target)))
        crawler = IliasCrawler(base_url, session, authenticator, dir_filter)
-        downloader = IliasDownloader(tmp_dir, organizer, session, authenticator, download_strategy)
+        downloader = IliasDownloader(tmp_dir, organizer, session,
                                     authenticator, download_strategy, timeout)
        cookie_jar.load_cookies()
        info = crawl_function(crawler)
@ -112,6 +114,7 @@ class Pferd(Location):
            password: Optional[str] = None,
            download_strategy: IliasDownloadStrategy = download_modified_or_new,
            clean: bool = True,
            timeout: int = 5,
    ) -> Organizer:
        """
        Synchronizes a folder with the ILIAS instance of the KIT.
@ -137,11 +140,14 @@ class Pferd(Location):
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {download_modified_or_new})
            clean {bool} -- Whether to clean up when the method finishes.
            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                requests bug.
        """
        # This authenticator only works with the KIT ilias instance.
        authenticator = KitShibbolethAuthenticator(username=username, password=password)
        PRETTY.starting_synchronizer(target, "ILIAS", course_id)
-        return self._ilias(
+
        organizer = self._ilias(
            target=target,
            base_url="https://ilias.studium.kit.edu/",
            crawl_function=lambda crawler: crawler.crawl_course(course_id),
@ -151,8 +157,19 @@ class Pferd(Location):
            transform=transform,
            download_strategy=download_strategy,
            clean=clean,
            timeout=timeout
        )
        self._download_summary.merge(organizer.download_summary)
        return organizer
    def print_summary(self) -> None:
        """
        Prints the accumulated download summary.
        """
        PRETTY.summary(self._download_summary)
    @swallow_and_print_errors
    def ilias_kit_personal_desktop(
            self,
@ -164,6 +181,7 @@ class Pferd(Location):
            password: Optional[str] = None,
            download_strategy: IliasDownloadStrategy = download_modified_or_new,
            clean: bool = True,
            timeout: int = 5,
    ) -> Organizer:
        """
        Synchronizes a folder with the ILIAS instance of the KIT. This method will crawl the ILIAS
@ -188,11 +206,14 @@ class Pferd(Location):
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {download_modified_or_new})
            clean {bool} -- Whether to clean up when the method finishes.
            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                requests bug.
        """
        # This authenticator only works with the KIT ilias instance.
        authenticator = KitShibbolethAuthenticator(username=username, password=password)
        PRETTY.starting_synchronizer(target, "ILIAS", "Personal Desktop")
-        return self._ilias(
+
        organizer = self._ilias(
            target=target,
            base_url="https://ilias.studium.kit.edu/",
            crawl_function=lambda crawler: crawler.crawl_personal_desktop(),
@ -202,8 +223,77 @@ class Pferd(Location):
            transform=transform,
            download_strategy=download_strategy,
            clean=clean,
            timeout=timeout
        )
        self._download_summary.merge(organizer.download_summary)
        return organizer
    @swallow_and_print_errors
    def ilias_kit_folder(
            self,
            target: PathLike,
            full_url: str,
            dir_filter: IliasDirectoryFilter = lambda x, y: True,
            transform: Transform = lambda x: x,
            cookies: Optional[PathLike] = None,
            username: Optional[str] = None,
            password: Optional[str] = None,
            download_strategy: IliasDownloadStrategy = download_modified_or_new,
            clean: bool = True,
            timeout: int = 5,
    ) -> Organizer:
        """
        Synchronizes a folder with a given folder on the ILIAS instance of the KIT.
        Arguments:
            target {Path}  -- the target path to write the data to
            full_url {str} -- the full url of the folder/videos/course to crawl
        Keyword Arguments:
            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
                crawler level, these directories and all of their content is skipped.
                (default: {lambdax:True})
            transform {Transform} -- A transformation function for the output paths. Return None
                to ignore a file. (default: {lambdax:x})
            cookies {Optional[Path]} -- The path to store and load cookies from.
                (default: {None})
            username {Optional[str]} -- The SCC username. If none is given, it will prompt
                the user. (default: {None})
            password {Optional[str]} -- The SCC password. If none is given, it will prompt
                the user. (default: {None})
            download_strategy {DownloadStrategy} -- A function to determine which files need to
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {download_modified_or_new})
            clean {bool} -- Whether to clean up when the method finishes.
            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                requests bug.
        """
        # This authenticator only works with the KIT ilias instance.
        authenticator = KitShibbolethAuthenticator(username=username, password=password)
        PRETTY.starting_synchronizer(target, "ILIAS", "An ILIAS element by url")
        if not full_url.startswith("https://ilias.studium.kit.edu"):
            raise FatalException("Not a valid KIT ILIAS URL")
        organizer = self._ilias(
            target=target,
            base_url="https://ilias.studium.kit.edu/",
            crawl_function=lambda crawler: crawler.recursive_crawl_url(full_url),
            authenticator=authenticator,
            cookies=cookies,
            dir_filter=dir_filter,
            transform=transform,
            download_strategy=download_strategy,
            clean=clean,
            timeout=timeout
        )
        self._download_summary.merge(organizer.download_summary)
        return organizer
    @swallow_and_print_errors
    def diva_kit(
            self,
--- a/PFERD/progress.py
+++ b/PFERD/progress.py
@ -7,8 +7,7 @@ from types import TracebackType
 from typing import Optional, Type
 import requests
-from rich.console import Console, ConsoleOptions, Control, RenderResult
+from rich.console import Console
 from rich.live_render import LiveRender
 from rich.progress import (BarColumn, DownloadColumn, Progress, TaskID,
                           TextColumn, TimeRemainingColumn,
                           TransferSpeedColumn)
@ -23,7 +22,8 @@ _progress: Progress = Progress(
    TransferSpeedColumn(),
    "•",
    TimeRemainingColumn(),
-    console=Console(file=sys.stdout)
+    console=Console(file=sys.stdout),
    transient=True
 )
@ -61,18 +61,6 @@ def progress_for(settings: Optional[ProgressSettings]) -> 'ProgressContextManage
    return ProgressContextManager(settings)
 class _OneLineUp(LiveRender):
    """
    Render a control code for moving one line upwards.
    """
    def __init__(self) -> None:
        super().__init__("not rendered")
    def __console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
        yield Control(f"\r\x1b[1A")
 class ProgressContextManager:
    """
    A context manager used for displaying progress.
@ -113,9 +101,6 @@ class ProgressContextManager:
            _progress.stop()
            _progress.refresh()
            # And we existed, so remove the line above (remove_task leaves one behind)
            Console().print(_OneLineUp())
        return None
    def advance(self, amount: float) -> None:
--- a/README.md
+++ b/README.md
@ -2,35 +2,65 @@
 **P**rogramm zum **F**lotten, **E**infachen **R**unterladen von **D**ateien
 - [Quickstart with `sync_url`](#quickstart-with-sync_url)
 - [Installation](#installation)
    - [Upgrading from 2.0.0 to 2.1.0+](#upgrading-from-200-to-210)
 - [Example setup](#example-setup)
 - [Usage](#usage)
    - [General concepts](#general-concepts)
    - [Constructing transforms](#constructing-transforms)
        - [Transform creators](#transform-creators)
        - [Transform combinators](#transform-combinators)
    - [A short, but commented example](#a-short-but-commented-example)
 ## Quickstart with `sync_url`
 The `sync_url` program allows you to just synchronize a given ILIAS URL (of a
 course, a folder, your personal desktop, etc.) without any extra configuration
 or setting up. Download the program, open ILIAS, copy the URL from the address
 bar and pass it to sync_url.
 It bundles everything it needs in one executable and is easy to
 use, but doesn't expose all the configuration options and tweaks a full install
 does.
 1. Download the `sync_url` binary from the [latest release](https://github.com/Garmelon/PFERD/releases/latest).
 2. Run the binary in your terminal (`./sync_url` or `sync_url.exe` in the CMD) to see the help and use it. I'd recommend using the `--cookies` option.  
  If you are on **Linux/Mac**, you need to *make the file executable* using `chmod +x <file>`.  
  If you are on **Mac**, you need to allow this unverified program to run (see e.g. [here](https://www.switchingtomac.com/tutorials/osx/how-to-run-unverified-apps-on-macos/))
 ## Installation
 Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
-wherever you want to install/have installed PFERD:
+wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.0.0
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.1
 ```
-The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
+The use of [venv] is recommended.
 [venv]: https://docs.python.org/3/library/venv.html
 ### Upgrading from 2.0.0 to 2.1.0+
 - The `IliasDirectoryType` type was renamed to `IliasElementType` and is now far more detailed.
  The new values are: `REGULAR_FOLDER`, `VIDEO_FOLDER`, `EXERCISE_FOLDER`, `REGULAR_FILE`, `VIDEO_FILE`, `FORUM`, `EXTERNAL_LINK`.
 - Forums and external links are skipped automatically if you use the `kit_ilias` helper.
 ## Example setup
 In this example, `python3` refers to at least Python 3.8.
 If you just want to get started and crawl *your entire ILIAS Desktop* instead
 of a given set of courses, please replace `example_config.py` with
 `example_config_personal_desktop.py` in all of the instructions below (`curl` call and
 `python3` run command).
 A full example setup and initial use could look like:
 ```
 $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.0.0
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.1
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.0.0/example_config.py
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.1/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
@ -43,50 +73,93 @@ $ python3 example_config.py
 $ deactivate
 ```
 If you just want to get started and crawl *your entire ILIAS Desktop* instead
 of a given set of courses, please replace `example_config.py` with
 `example_config_personal_desktop.py` in all of the instructions below (`curl` call and
 `python3` run command).
 ## Usage
 ### General concepts
 A PFERD config is a normal python file that starts multiple *synchronizers*
 which do all the heavy lifting. While you can create and wire them up manually,
 you are encouraged to use the helper methods provided in `PFERD.Pferd`.
 The synchronizers take some input arguments specific to their service and a
-*transformer*. The transformer receives the computed path of an element in
+*transform*. The transform receives the computed path of an element in ILIAS and
-ILIAS and can return either an output path (so you can rename files or move
+can return either an output path (so you can rename files or move them around as
-them around as you wish) or `None` if you do not want to save the given file.
+you wish) or `None` if you do not want to save the given file.
 Additionally the ILIAS synchronizer allows you to define a *crawl filter*. This
-filter also receives the computed path as the input, but is only called or
+filter also receives the computed path as the input, but is only called for
-*directoties*. If you return `True`, the directory will be crawled and
+*directories*. If you return `True`, the directory will be crawled and
 searched. If you return `False` the directory will be ignored and nothing in it
-will be passed to the transformer.
+will be passed to the transform.
-In order to help you with writing your own transformers and filters, PFERD
+### Constructing transforms
 ships with a few powerful building blocks:
-| Method | Description |
+While transforms are just normal python functions, writing them by hand can
-|--------|-------------|
+quickly become tedious. In order to help you with writing your own transforms
-| `glob`   | Returns a transform that returns `None` if the glob does not match and the unmodified path otherwise. |
+and filters, PFERD defines a few useful transform creators and combinators in
-| `predicate`   | Returns a transform that returns `None` if the predicate does not match the path and the unmodified path otherwise. |
+the `PFERD.transform` module:
 | `move_dir(source, target)`   | Returns a transform that moves all files from the `source` to the `target` dir. |
 | `move(source, target)`   | Returns a transform that moves the `source` file to `target`. |
 | `rename(old, new)`   | Renames a single file. |
 | `re_move(regex, sub)`   | Moves all files matching the given regular expression. The different captured groups are available under their index and can be used together with normal python format methods: `re_move(r"Blatt (\d+)\.pdf", "Blätter/Blatt_{1:0>2}.pdf"),`. |
 | `re_rename(old, new)`   | Same as `re_move` but operates on the path *names* instead of the full path. |
-And PFERD also offers a few combinator functions:
+#### Transform creators
-* **`keep`**  
+These methods let you create a few basic transform building blocks:
-  `keep` just returns the input path unchanged. It can be very useful as the
+
-  last argument in an `attempt` call, to leave everything not matching a rule
+- **`glob(glob)`**  
-  unchanged.
+  Creates a transform that returns the unchanged path if the glob matches the path and `None` otherwise.
-* **`optionally(transformer)`**  
+  See also [Path.match].  
-  Wraps a given transformer and returns its result if it is not `None`.
+  Example: `glob("Übung/*.pdf")`
 - **`predicate(pred)`**  
  Creates a transform that returns the unchanged path if `pred(path)` returns a truthy value.
  Returns `None` otherwise.  
  Example: `predicate(lambda path: len(path.parts) == 3)`
 - **`move_dir(source, target)`**  
  Creates a transform that moves all files from the `source` to the `target` directory.  
  Example: `move_dir("Übung/", "Blätter/")`
 - **`move(source, target)`**  
  Creates a transform that moves the `source` file to `target`.  
  Example: `move("Vorlesung/VL02_Automten.pdf", "Vorlesung/VL02_Automaten.pdf")`
 - **`rename(source, target)`**  
  Creates a transform that renames all files named `source` to `target`.
  This transform works on the file names, not paths, and thus works no matter where the file is located.  
  Example: `rename("VL02_Automten.pdf", "VL02_Automaten.pdf")`
 - **`re_move(regex, target)`**  
  Creates a transform that moves all files matching `regex` to `target`.
  The transform `str.format` on the `target` string with the contents of the capturing groups before returning it.
  The capturing groups can be accessed via their index.
  See also [Match.group].  
  Example: `re_move(r"Übung/Blatt (\d+)\.pdf", "Blätter/Blatt_{1:0>2}.pdf")`
 - **`re_rename(regex, target)`**  
  Creates a transform that renames all files matching `regex` to `target`.
  This transform works on the file names, not paths, and thus works no matter where the file is located.  
  Example: `re_rename(r"VL(\d+)(.*)\.pdf", "Vorlesung_Nr_{1}__{2}.pdf")`
 All movement or rename transforms above return `None` if a file doesn't match
 their movement or renaming criteria. This enables them to be used as building
 blocks to build up more complex transforms.
 In addition, `PFERD.transform` also defines the `keep` transform which returns its input path unchanged.
 This behaviour can be very useful when creating more complex transforms.
 See below for example usage.
 [Path.match]: https://docs.python.org/3/library/pathlib.html#pathlib.Path.match
 [Match.group]: https://docs.python.org/3/library/re.html#re.Match.group
 #### Transform combinators
 These methods let you combine transforms into more complex transforms:
 - **`optionally(transform)`**  
  Wraps a given transform and returns its result if it is not `None`.
  Otherwise returns the input path unchanged.
-* **`do(transformers)`**  
+  See below for example usage.
-  `do` accepts a series of transformers and applies them in the given order to
+* **`do(transforms)`**  
-  the result of the previous one. If any transformer returns `None`, do
+  Accepts a series of transforms and applies them in the given order to the result of the previous one.
-  short-circuits and also returns `None`. This can be used to perform multiple
+  If any transform returns `None`, `do` short-circuits and also returns `None`.
-  renames in a row:
+  This can be used to perform multiple renames in a row:
  ```py
  do(
      # Move them
@ -95,13 +168,12 @@ And PFERD also offers a few combinator functions:
      optionally(re_rename("(.*).m4v.mp4", "{1}.mp4")),
      # Remove the 'dbs' prefix (if they have any)
      optionally(re_rename("(?i)dbs-(.+)", "{1}")),
-  ),
+  )
  ```
-* **`attempt(transformers)`**  
+- **`attempt(transforms)`**  
-  `attempt` applies the passed transformers in the given order until it finds
+  Applies the passed transforms in the given order until it finds one that does not return `None`.
-  one that does not return `None`. If it does not find any, it returns `None`.
+  If it does not find any, it returns `None`.
-  This can be used to give a list of possible transformations and it will
+  This can be used to give a list of possible transformations and automatically pick the first one that fits:
  automatically pick the first one that fits:
  ```py
  attempt(
      # Move all videos. If a video is passed in, this `re_move` will succeed
@ -114,17 +186,26 @@ And PFERD also offers a few combinator functions:
  )
  ```
-All of these combinators are used in the provided example config, if you want
+All of these combinators are used in the provided example configs, if you want
-to see some more true-to-live usages.
+to see some more real-life usages.
 ### A short, but commented example
 ```py
-def filter_course(path: PurePath) -> bool:
+from pathlib import Path, PurePath
-    # Note that glob returns a Transformer
+from PFERD import Pferd
-    #  - a function from PurePath -> Optional[PurePath]
+from PFERD.ilias import IliasElementType
-    # So we need to apply the result of 'glob' to our input path.
+from PFERD.transform import *
-    # We need to crawl the 'Tutorien' folder as it contains the one we want.
+
 # This filter will later be used by the ILIAS crawler to decide whether it
 # should crawl a directory (or directory-like structure).
 def filter_course(path: PurePath, type: IliasElementType) -> bool:
    # Note that glob returns a Transform, which is a function from PurePath ->
    # Optional[PurePath]. Because of this, we need to apply the result of
    # 'glob' to our input path. The returned value will be truthy (a Path) if
    # the transform succeeded, or `None` if it failed.
    # We need to crawl the 'Tutorien' folder as it contains one that we want.
    if glob("Tutorien/")(path):
        return True
    # If we found 'Tutorium 10', keep it!
@ -137,21 +218,35 @@ def filter_course(path: PurePath) -> bool:
    # All other dirs (including subdirs of 'Tutorium 10') should be searched :)
    return True
-enable_logging() # needed once before calling a Pferd method
+
-# Create a Pferd instance rooted in the same directory as the script file
+# This transform will later be used to rename a few files. It can also be used
-# This is not a test run, so files will be downloaded (default, can be omitted)
+# to ignore some files.
 transform_course = attempt(
    # We don't care about the other tuts and would instead prefer a cleaner
    # directory structure.
    move_dir("Tutorien/Tutorium 10/", "Tutorium/"),
    # We don't want to modify any other files, so we're going to keep them
    # exactly as they are.
    keep
 )
 # Enable and configure the text output. Needs to be called before calling any
 # other PFERD methods.
 Pferd.enable_logging()
 # Create a Pferd instance rooted in the same directory as the script file. This
 # is not a test run, so files will be downloaded (default, can be omitted).
 pferd = Pferd(Path(__file__).parent, test_run=False)
 # Use the ilias_kit helper to synchronize an ILIAS course
 pferd.ilias_kit(
-    # The folder all of the course's content should be placed in
+    # The directory that all of the downloaded files should be placed in
-    Path("My cool course"),
+    "My_cool_course/",
    # The course ID (found in the URL when on the course page in ILIAS)
    "course id",
-    # A path to a cookie jar. If you synchronize multiple ILIAS courses, setting this
+    # A path to a cookie jar. If you synchronize multiple ILIAS courses,
-    # to a common value requires you to only login once.
+    # setting this to a common value requires you to only log in once.
    cookies=Path("ilias_cookies.txt"),
-    # A transform to apply to all found paths
+    # A transform can rename, move or filter out certain files
    transform=transform_course,
    # A crawl filter limits what paths the cralwer searches
    dir_filter=filter_course,
--- a/example_config.py
+++ b/example_config.py
@ -2,7 +2,7 @@ import argparse
 from pathlib import Path, PurePath
 from PFERD import Pferd
-from PFERD.ilias import IliasDirectoryType
+from PFERD.ilias import IliasElementType
 from PFERD.transform import (attempt, do, glob, keep, move, move_dir,
                             optionally, re_move, re_rename)
@ -49,7 +49,7 @@ tf_ss_2020_pg = attempt(
 )
-def df_ss_2020_or1(path: PurePath, _type: IliasDirectoryType) -> bool:
+def df_ss_2020_or1(path: PurePath, _type: IliasElementType) -> bool:
    if glob("Tutorien/")(path):
        return True
    if glob("Tutorien/Tutorium 10, dienstags 15:45 Uhr/")(path):
@ -124,6 +124,8 @@ def main() -> None:
            cookies="ilias_cookies.txt",
        )
    # Prints a summary listing all new, modified or deleted files
    pferd.print_summary()
 if __name__ == "__main__":
    main()
--- a/example_config_personal_desktop.py
+++ b/example_config_personal_desktop.py
@ -30,6 +30,9 @@ def main() -> None:
        cookies="ilias_cookies.txt",
    )
    # Prints a summary listing all new, modified or deleted files
    pferd.print_summary()
 if __name__ == "__main__":
    main()
--- a/setup.py
+++ b/setup.py
@ -2,12 +2,12 @@ from setuptools import find_packages, setup
 setup(
    name="PFERD",
-    version="2.0.0",
+    version="2.4.1",
    packages=find_packages(),
    install_requires=[
        "requests>=2.21.0",
        "beautifulsoup4>=4.7.1",
-        "rich>=1.0.0"
+        "rich>=2.1.0"
    ],
 )
--- a/sync_url.py
+++ b/sync_url.py
@ -0,0 +1,67 @@
 #!/usr/bin/env python
 """
 A simple script to download a course by name from ILIAS.
 """
 import argparse
 from pathlib import Path
 from urllib.parse import urlparse
 from PFERD import Pferd
 from PFERD.cookie_jar import CookieJar
 from PFERD.ilias import (IliasCrawler, IliasElementType,
                         KitShibbolethAuthenticator)
 from PFERD.utils import to_path
 def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--test-run", action="store_true")
    parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in")
    parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos")
    parser.add_argument('url', help="URL to the course page")
    parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into")
    args = parser.parse_args()
    url = urlparse(args.url)
    cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
    session = cookie_jar.create_session()
    authenticator = KitShibbolethAuthenticator()
    crawler = IliasCrawler(url.scheme + '://' + url.netloc, session,
                           authenticator, lambda x, y: True)
    cookie_jar.load_cookies()
    if args.folder is not None:
        folder = args.folder
        # Initialize pferd at the *parent of the passed folder*
        # This is needed so Pferd's internal protections against escaping the working directory
        # do not trigger (e.g. if somebody names a file in ILIAS '../../bad thing.txt')
        pferd = Pferd(Path(Path(__file__).parent, folder).parent, test_run=args.test_run)
    else:
        # fetch course name from ilias
        folder = crawler.find_element_name(args.url)
        cookie_jar.save_cookies()
        # Initialize pferd at the location of the script
        pferd = Pferd(Path(__file__).parent, test_run=args.test_run)
    def dir_filter(_: Path, element: IliasElementType) -> bool:
        if args.no_videos:
            return element not in [IliasElementType.VIDEO_FILE, IliasElementType.VIDEO_FOLDER]
        return True
    pferd.enable_logging()
    # fetch
    pferd.ilias_kit_folder(
        target=folder,
        full_url=args.url,
        cookies=args.cookies,
        dir_filter=dir_filter
    )
 if __name__ == "__main__":
    main()
Author	SHA1	Message	Date
I-Al-Istannen	9ea03bda3e	Adjust release names	2020-10-30 18:14:02 +01:00
I-Al-Istannen	07de5bea8b	Explain how to run sync_url on Mac	2020-10-30 17:53:55 +01:00
I-Al-Istannen	f0d572c110	Fix a few typos in release body	2020-10-30 17:32:04 +01:00
I-Al-Istannen	076067e22d	Bump version	2020-10-30 17:28:34 +01:00
I-Al-Istannen	ebb6e63c5c	Add MacOS to CI	2020-10-30 17:23:27 +01:00
I-Al-Istannen	0c3f35a2d2	Do not provide a shorthand for "no-videos"	2020-10-30 17:01:10 +01:00
I-Al-Istannen	521890ae78	Update README.md	2020-10-28 23:24:18 +01:00
I-Al-Istannen	3f7c73df80	Release new minor version	2020-10-07 09:32:17 +02:00
I-Al-Istannen	43100f69d5	Merge pull request #10 from Garmelon/sync-url Add "Sync url" script from Christophe and release it automatically	2020-10-07 09:29:48 +02:00
I-Al-Istannen	d73c778b0a	Add sync_url instructions to README	2020-10-06 17:50:28 +02:00
I-Al-Istannen	73c3eb0984	Add option to skip videos in sync_url	2020-10-06 17:20:47 +02:00
I-Al-Istannen	a519cbe05d	Add sync_url workflow	2020-10-06 12:42:20 +02:00
I-Al-Istannen	b3ad9783c4	Ignore pyinstaller files	2020-10-06 11:43:20 +02:00
I-Al-Istannen	c1ccb6c53e	Allow crawling videos with sync_url	2020-10-06 10:46:06 +02:00
I-Al-Istannen	51a713fa04	Allow crawling courses or folders with sync_url Video folders do not work, if they are passed directly. Their containing folder must be specified instead.	2020-09-28 20:00:01 +02:00
I-Al-Istannen	74ea039458	Fix a few lint errors and pferd quirks in sync_url	2020-09-28 19:42:59 +02:00
I-Al-Istannen	aaa6a2b6a4	Merge pull request #9 from TheChristophe/master Add simple course-download-by-url script	2020-09-28 19:25:45 +02:00
I-Al-Istannen	e32a49480b	Expose methods to look up course/element names by id / url	2020-09-28 19:16:52 +02:00
Christophe	be65051f9d	Support downloading folders in get-by-url script	2020-09-28 18:16:33 +02:00
Christophe	3387bc5f20	Add simple course-download-by-url script	2020-09-28 17:49:36 +02:00
I-Al-Istannen	3f0ae729d6	Expand "is course" check to not download magazines or other weird things	2020-09-28 16:43:58 +02:00
I-Al-Istannen	8e8c1c031a	Version 2.3.0	2020-09-03 21:47:10 +02:00
I-Al-Istannen	55678d7fee	Pass string down to FileCookieJar Some python versions just can't handle it despite the documentation stating they should.	2020-08-12 09:09:14 +02:00
I-Al-Istannen	a57ee8b96b	Add timeout to video downloads to work around requests IPv6 bug	2020-08-11 14:40:30 +02:00
Joscha	e367da925e	Bump version to 2.2.1	2020-07-28 19:55:32 +00:00
Joscha	77a109bb7e	Fix ilias shibboleth authenticator The shibboleth site got a visual overhaul that slightly changed the classes of a form we need.	2020-07-28 19:13:51 +00:00
I-Al-Istannen	a3e1864a26	Allow long paths on windows If you start PFERD a few folders deep in your home directory, it is quite easy to reach the maximum path length limit on Windows (260 chars). This patch opts in to long paths ("\\?\" prefix) which lift that restriction at the cost of ugly path names.	2020-07-25 13:44:49 +02:00
I-Al-Istannen	41cbcc509c	Update version to 2.2.0	2020-07-15 22:47:44 +02:00
I-Al-Istannen	77874b432b	Also add personal_desktop to download summary	2020-07-15 22:47:44 +02:00
I-Al-Istannen	5c4c785e60	Fix HTML file downloading Previously PFERD thought any HTML file was a "Error, no access" page when downloading. Now it checks whether ILIAS sends a content-disposition header, telling the browser to download the file. If that is the case, it was just a HTML file uploaded to ILIAS. If it has no header, it is probably an error message.	2020-07-15 15:12:14 +02:00
I-Al-Istannen	2aed4f6d1f	Only query the dir_filter for directories	2020-07-13 13:36:12 +02:00
I-Al-Istannen	34152fbe54	Set mtime and atime to ILIAS dates where possible	2020-07-13 13:29:18 +02:00
Joscha	4047fe78f3	Fix README formatting	2020-07-11 18:22:33 +00:00
Joscha	c28347122e	Improve README - Added a table of contents - Reworked the transform section - Fixed the commented example	2020-07-11 18:16:33 +00:00
Joscha	5b38ab8cf1	Add MIT license	2020-07-08 09:46:27 +00:00
I-Al-Istannen	bb25d32f03	Fix typo in README	2020-06-29 16:18:33 +02:00
I-Al-Istannen	ecaedea709	Merge pull request #8 from pavelzw/master Fix version number	2020-06-26 17:52:05 +02:00
Pavel Zwerschke	f05d1b1261	Fix version number	2020-06-26 17:49:47 +02:00
I-Al-Istannen	6aaa3071f9	Update README with new version	2020-06-26 17:35:03 +02:00
I-Al-Istannen	c26c9352f1	Make DownloadSummary private, provide property accessors	2020-06-26 17:30:45 +02:00
I-Al-Istannen	d9ea688145	Use pretty logger for summaries	2020-06-26 17:24:36 +02:00
I-Al-Istannen	e8be6e498e	Add summary to example_config_personal_desktop	2020-06-26 17:24:36 +02:00
I-Al-Istannen	e4b1fac045	Satisfy pylint	2020-06-26 15:38:22 +02:00
Joscha	402ae81335	Fix type hints	2020-06-26 13:17:44 +00:00
Daniel Augustin	52f31e2783	Add type hints to DownloadSummary	2020-06-26 13:02:37 +02:00
Daniel Augustin	739522a151	Move download summary into a separate class	2020-06-25 23:07:11 +02:00
Daniel Augustin	6c034209b6	Add deleted files to summary	2020-06-25 22:00:28 +02:00
Daniel Augustin	f6fbd5e4bb	Add download summary	2020-06-25 19:19:34 +02:00
I-Al-Istannen	7024db1f13	Use transient progessbar This will ensure no pesky newline ends up in the output, even on windows.	2020-06-25 18:03:12 +02:00
I-Al-Istannen	23bfa42a0d	Never use the direct download button, as it is currently broken	2020-06-11 13:31:01 +02:00
I-Al-Istannen	fdb57884ed	Touch files with same content to update timestamps	2020-05-31 20:27:15 +02:00
I-Al-Istannen	f614b95a00	Adjust version in setup.py	2020-05-30 19:07:02 +02:00
I-Al-Istannen	8198c9ecaa	Reorder methods a bit	2020-05-30 19:06:36 +02:00
I-Al-Istannen	086b15d10f	Crawl a bit more iteratively	2020-05-30 15:47:15 +02:00
I-Al-Istannen	9d6ce331a5	Use IliasCrawlerEntry entries in the ilias scraper	2020-05-30 15:20:51 +02:00
I-Al-Istannen	821c7ade26	Move video url extraction logic to crawler	2020-05-30 00:22:31 +02:00
I-Al-Istannen	b969a1854a	Remove unneeded whitespace	2020-05-30 00:22:31 +02:00
I-Al-Istannen	62535b4452	Unpack videos in ILIAS downloader	2020-05-21 22:12:52 +02:00
I-Al-Istannen	c0056e5669	Correctly crawl video pages with multiple pages	2020-05-21 21:38:07 +02:00