Fix video name deduplication

Fix typos in ilias crawler and use set literals
Move is_logged_in helper to kit_ilias_html
2023-12-21 10:23:01 +01:00 · 2023-12-09 23:08:42 +01:00 · 2023-11-30 20:57:57 +01:00 · 2023-11-16 11:19:20 +01:00 · 2023-09-13 23:13:30 +02:00 · 2023-09-13 22:23:36 +02:00
75 changed files with 7963 additions and 3762 deletions
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@ -0,0 +1,78 @@
 name: build-and-release
 on: push
 defaults:
  run:
    shell: bash
 jobs:
  build:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
        python: ["3.9"]
    steps:
      - uses: actions/checkout@v3
      - uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python }}
      - name: Set up project
        if: matrix.os != 'windows-latest'
        run: ./scripts/setup
      - name: Set up project on windows
        if: matrix.os == 'windows-latest'
        # For some reason, `pip install --upgrade pip` doesn't work on
        # 'windows-latest'. The installed pip version works fine however.
        run: ./scripts/setup --no-pip
      - name: Run checks
        run: ./scripts/check
      - name: Build
        run: ./scripts/build
      - name: Rename binary
        # Glob in source location because on windows pyinstaller creates a file
        # named "pferd.exe"
        run: mv dist/pferd* dist/pferd-${{ matrix.os }}
      - name: Upload binary
        uses: actions/upload-artifact@v3
        with:
          name: Binaries
          path: dist/pferd-${{ matrix.os }}
  release:
    runs-on: ubuntu-latest
    if: startsWith(github.ref, 'refs/tags/v')
    needs: build
    steps:
      - name: Download binaries
        uses: actions/download-artifact@v3
        with:
          name: Binaries
      - name: Rename binaries
        run: |
          mv pferd-ubuntu-latest pferd-linux
          mv pferd-windows-latest pferd-windows.exe
          mv pferd-macos-latest pferd-mac
      - name: Create release
        uses: softprops/action-gh-release@v1
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        with:
          files: |
            pferd-linux
            pferd-windows.exe
            pferd-mac
--- a/.github/workflows/package.yml
+++ b/.github/workflows/package.yml
@ -1,74 +0,0 @@
 name: Package Application with Pyinstaller
 on:
  push:
    branches:
      - "*"
    tags:
      - "v*"
 jobs:
  build:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
    steps:
    - uses: actions/checkout@v2
    - uses: actions/setup-python@v2
      with:
        python-version: '3.x'
    - name: "Install dependencies"
      run: "pip install setuptools keyring pyinstaller rich requests beautifulsoup4 -f --upgrade"
    - name: "Install sync_url.py"
      run: "pyinstaller sync_url.py -F"
    - name: "Move artifact"
      run: "mv dist/sync_url* dist/sync_url-${{ matrix.os }}"
    - uses: actions/upload-artifact@v2
      with:
        name: "Pferd Sync URL"
        path: "dist/sync_url*"
  release:
    name: Release
    needs: [build]
    runs-on: ubuntu-latest
    if: startsWith(github.ref, 'refs/tags/')
    env:
      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
    steps:
    - name: "Checkout"
      uses: actions/checkout@v2
    - name: "Download artifacts"
      uses: actions/download-artifact@v2
      with:
        name: "Pferd Sync URL"
    - name: "look at folder structure"
      run: "ls -lah"
    - name: "Rename releases"
      run: "mv sync_url-macos-latest pferd_sync_url_mac && mv sync_url-ubuntu-latest pferd_sync_url_linux && mv sync_url-windows-latest pferd_sync_url.exe"
    - name: "Create release"
      uses: softprops/action-gh-release@v1
    - name: "Upload release artifacts"
      uses: softprops/action-gh-release@v1
      with:
        body: "Download the correct sync_url for your platform and run it in the terminal or CMD. You might need to make it executable on Linux/Mac with `chmod +x <file>`. Also please enclose the *url you pass to the program in double quotes* or your shell might silently screw it up!"
        files: |
          pferd_sync_url_mac
          pferd_sync_url_linux
          pferd_sync_url.exe
--- a/.gitignore
+++ b/.gitignore
@ -1,15 +1,11 @@
 __pycache__/
 .venv/
 venv/
 .idea/
 build/
 .mypy_cache/
-.tmp/
+/.venv/
-.env
+/PFERD.egg-info/
-.vscode
+__pycache__/
-ilias_cookies.txt
+/.vscode/
-PFERD.egg-info/
+/.idea/
-# PyInstaller
+# pyinstaller
-sync_url.spec
+/pferd.spec
-dist/
+/build/
 /dist/
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,211 @@
 # Changelog
 All notable changes to this project will be documented in this file. The format
 is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 This project has its own custom versioning scheme. Version numbers consist of
 three parts (e. g. `3.1.5`).
 - The first number is increased on major rewrites or changes. What classifies as
  a major change is up to the maintainers. This is pretty rare and a PFERD
  version 4 should hopefully not be necessary.
 - The second number is increased on backwards-incompatible changes in behaviour.
  This refers to any change that would make an existing setup behave differently
  (e. g. renaming options or changing crawler behaviour). If this number is
  increased, it may be necessary for you to adapt your own setup.
 - The third number is increased on backwards-compatible changes (e. g. adding
  new options or commands, changing documentation, fixing bugs). Updates that
  only increase this number should be safe and not require manual intervention.
 We will try to correctly classify changes as backwards-compatible or
 backwards-incompatible, but may occasionally make mistakes or stumble across
 ambiguous situations.
 ## Unreleased
 ### Fixed
 - Video name deduplication
 ## 3.5.0 - 2023-09-13
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
 - Support for ILIAS learning modules
 - `show_not_deleted` option to stop printing the "Not Deleted" status or report
  message. This combines nicely with the `no-delete-prompt-override` strategy,
  causing PFERD to mostly ignore local-only files.
 - Support for mediacast video listings
 - Crawling of files in info tab
 ### Changed
 - Remove size suffix for files in content pages
 ### Fixed
 - Crawling of courses with the timeline view as the default tab
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
 - Abort crawling when encountering an unexpected ilias root page redirect
 - Sanitize ascii control characters on Windows
 - Crawling of paginated past meetings
 - Ignore SCORM learning modules
 ## 3.4.3 - 2022-11-29
 ### Added
 - Missing documentation for `forums` option
 ### Changed
 - Clear up error message shown when multiple paths are found to an element
 ### Fixed
 - IPD crawler unnecessarily appending trailing slashes
 - Crawling opencast when ILIAS is set to English
 ## 3.4.2 - 2022-10-26
 ### Added
 - Recognize and crawl content pages in cards
 - Recognize and ignore surveys
 ### Fixed
 - Forum crawling crashing when a thread has no messages at all
 - Forum crawling crashing when a forum has no threads at all
 - Ilias login failing in some cases
 - Crawling of paginated future meetings
 - IPD crawler handling of URLs without trailing slash
 ## 3.4.1 - 2022-08-17
 ### Added
 - Download of page descriptions
 - Forum download support
 - `pass` authenticator
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
 - Mention hrefs in IPD crawler's `--explain` output for users of `link_regex` option
 - Simplify default IPD crawler `link_regex`
 ### Fixed
 - IPD crawler crashes on some sites
 - Meeting name normalization for yesterday, today and tomorrow
 - Crawling of meeting file previews
 - Login with new login button html layout
 - Descriptions for courses are now placed in the correct subfolder when
  downloading the whole desktop
 ## 3.4.0 - 2022-05-01
 ### Added
 - Message when Shibboleth entitlements need to be manually reviewed
 - Links to unofficial packages and repology in the readme
 ### Changed
 - Increase minimum supported Python version to 3.9
 - Support video listings with more columns
 - Use UTF-8 when reading/writing the config file
 ### Fixed
 - Crash during authentication when the Shibboleth session is still valid
 ## 3.3.1 - 2022-01-15
 ### Fixed
 - ILIAS login
 - Local video cache if `windows_paths` is enabled
 ## 3.3.0 - 2022-01-09
 ### Added
 - A KIT IPD crawler
 - Support for ILIAS cards
 - (Rudimentary) support for content pages
 - Support for multi-stream videos
 - Support for ILIAS 7
 ### Removed
 - [Interpolation](https://docs.python.org/3/library/configparser.html#interpolation-of-values) in config file
 ### Fixed
 - Crawling of recursive courses
 - Crawling files directly placed on the personal desktop
 - Ignore timestamps at the unix epoch as they crash on windows
 ## 3.2.0 - 2021-08-04
 ### Added
 - `--skip` command line option
 - Support for ILIAS booking objects
 ### Changed
 - Using multiple path segments on left side of `-name->` now results in an
  error. This was already forbidden by the documentation but silently accepted
  by PFERD.
 - More consistent path printing in some `--explain` messages
 ### Fixed
 - Nondeterministic name deduplication due to ILIAS reordering elements
 - More exceptions are handled properly
 ## 3.1.0 - 2021-06-13
 If your config file doesn't do weird things with transforms, it should continue
 to work. If your `-re->` arrows behave weirdly, try replacing them with
 `-exact-re->` arrows. If you're on Windows, you might need to switch from `\`
 path separators to `/` in your regex rules.
 ### Added
 - `skip` option for crawlers
 - Rules with `>>` instead of `>` as arrow head
 - `-exact-re->` arrow (behaves like `-re->` did previously)
 ### Changed
 - The `-re->` arrow can now rename directories (like `-->`)
 - Use `/` instead of `\` as path separator for (regex) rules on Windows
 - Use the label to the left for exercises instead of the button name to
  determine the folder name
 ### Fixed
 - Video pagination handling in ILIAS crawler
 ## 3.0.1 - 2021-06-01
 ### Added
 - `credential-file` authenticator
 - `--credential-file` option for `kit-ilias-web` command
 - Warning if using concurrent tasks with `kit-ilias-web`
 ### Changed
 - Cookies are now stored in a text-based format
 ### Fixed
 - Date parsing now also works correctly in non-group exercises
 ## 3.0.0 - 2021-05-31
 ### Added
 - Proper config files
 - Concurrent crawling
 - Crawl external ILIAS links
 - Crawl uploaded exercise solutions
 - Explain what PFERD is doing and why (`--explain`)
 - More control over output (`--status`, `--report`)
 - Debug transform rules with `--debug-transforms`
 - Print report after exiting via Ctrl+C
 - Store crawler reports in `.report` JSON file
 - Extensive config file documentation (`CONFIG.md`)
 - Documentation for developers (`DEV.md`)
 - This changelog
 ### Changed
 - Rewrote almost everything
 - Better error messages
 - Redesigned CLI
 - Redesigned transform rules
 - ILIAS crawling logic (paths may be different)
 - Better support for weird paths on Windows
 - Set user agent (`PFERD/<version>`)
 ### Removed
 - Backwards compatibility with 2.x
 - Python files as config files
 - Some types of crawlers
--- a/CONFIG.md
+++ b/CONFIG.md
@ -0,0 +1,479 @@
 # Config file format
 A config file consists of sections. A section begins with a `[section]` header,
 which is followed by a list of `key = value` pairs. Comments must be on their
 own line and start with `#`. Multiline values must be indented beyond their key.
 Boolean values can be `yes` or `no`. For more details and some examples on the
 format, see the [configparser documentation][1] ([interpolation][2] is
 disabled).
 [1]: <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure> "Supported INI File Structure"
 [2]: <https://docs.python.org/3/library/configparser.html#interpolation-of-values> "Interpolation of values"
 ## The `DEFAULT` section
 This section contains global configuration values. It can also be used to set
 default values for the other sections.
 - `working_dir`: The directory PFERD operates in. Set to an absolute path to
  make PFERD operate the same regardless of where it is executed from. All other
  paths in the config file are interpreted relative to this path. If this path
  is relative, it is interpreted relative to the script's working dir. `~` is
  expanded to the current user's home directory. (Default: `.`)
 - `explain`: Whether PFERD should log and explain its actions and decisions in
  detail. (Default: `no`)
 - `status`: Whether PFERD should print status updates (like `Crawled ...`,
  `Added ...`) while running a crawler. (Default: `yes`)
 - `report`: Whether PFERD should print a report of added, changed and deleted
   local files for all crawlers before exiting. (Default: `yes`)
 - `show_not_deleted`: Whether PFERD should print messages in status and report
   when a local-only file wasn't deleted. Combines nicely with the
   `no-delete-prompt-override` conflict resolution strategy.
 - `share_cookies`: Whether crawlers should share cookies where applicable. For
  example, some crawlers share cookies if they crawl the same website using the
  same account. (Default: `yes`)
 ## The `crawl:*` sections
 Sections whose names start with `crawl:` are used to configure crawlers. The
 rest of the section name specifies the name of the crawler.
 A crawler synchronizes a remote resource to a local directory. There are
 different types of crawlers for different kinds of resources, e.g. ILIAS
 courses or lecture websites.
 Each crawl section represents an instance of a specific type of crawler. The
 `type` option is used to specify the crawler type. The crawler's name is usually
 used as the output directory. New crawlers can be created simply by adding a new
 crawl section to the config file.
 Depending on a crawler's type, it may have different options. For more details,
 see the type's [documentation](#crawler-types) below. The following options are
 common to all crawlers:
 - `type`: The available types are specified in [this section](#crawler-types).
 - `skip`: Whether the crawler should be skipped during normal execution. The
  crawler can still be executed manually using the `--crawler` or `-C` flags.
  (Default: `no`)
 - `output_dir`: The directory the crawler synchronizes files to. A crawler will
  never place any files outside this directory. (Default: the crawler's name)
 - `redownload`: When to download a file that is already present locally.
  (Default: `never-smart`)
    - `never`: If a file is present locally, it is not downloaded again.
    - `never-smart`: Like `never`, but PFERD tries to detect if an already
      downloaded files has changed via some (unreliable) heuristics.
    - `always`: All files are always downloaded, regardless of whether they are
      already present locally.
    - `always-smart`: Like `always`, but PFERD tries to avoid unnecessary
      downloads via some (unreliable) heuristics.
 - `on_conflict`: What to do when the local and remote versions of a file or
  directory differ, including when a file is replaced by a directory or a
  directory by a file. (Default: `prompt`)
    - `prompt`: Always ask the user before overwriting or deleting local files
      and directories.
    - `local-first`: Always keep the local file or directory. Equivalent to
      using `prompt` and always choosing "no". Implies that `redownload` is set
      to `never`.
    - `remote-first`: Always keep the remote file or directory. Equivalent to
      using `prompt` and always choosing "yes".
    - `no-delete`: Never delete local files, but overwrite local files if the
      remote file is different.
    - `no-delete-prompt-overwrite`: Never delete local files, but prompt to
      overwrite local files if the remote file is different. Combines nicely
      with the `show_not_deleted` option.
 - `transform`: Rules for renaming and excluding certain files and directories.
  For more details, see [this section](#transformation-rules). (Default: empty)
 - `tasks`: The maximum number of concurrent tasks (such as crawling or
  downloading). (Default: `1`)
 - `downloads`: How many of those tasks can be download tasks at the same time.
  Must not be greater than `tasks`. (Default: Same as `tasks`)
 - `task_delay`: Time (in seconds) that the crawler should wait between
  subsequent tasks. Can be used as a sort of rate limit to avoid unnecessary
  load for the crawl target. (Default: `0.0`)
 - `windows_paths`: Whether PFERD should find alternative names for paths that
  are invalid on Windows. (Default: `yes` on Windows, `no` otherwise)
 Some crawlers may also require credentials for authentication. To configure how
 the crawler obtains its credentials, the `auth` option is used. It is set to the
 full name of an auth section (including the `auth:` prefix).
 Here is a simple example:
 ```ini
 [auth:example]
 type = simple
 username = foo
 password = bar
 [crawl:something]
 type = some-complex-crawler
 auth = auth:example
 on_conflict = no-delete
 tasks = 3
 ```
 ## The `auth:*` sections
 Sections whose names start with `auth:` are used to configure authenticators. An
 authenticator provides a username and a password to one or more crawlers.
 Authenticators work similar to crawlers: A section represents an authenticator
 instance whose name is the rest of the section name. The type is specified by
 the `type` option.
 Depending on an authenticator's type, it may have different options. For more
 details, see the type's [documentation](#authenticator-types) below. The only
 option common to all authenticators is `type`:
 - `type`: The types are specified in [this section](#authenticator-types).
 ## Crawler types
 ### The `local` crawler
 This crawler crawls a local directory. It is really simple and mostly useful for
 testing different setups. The various delay options are meant to make the
 crawler simulate a slower, network-based crawler.
 - `target`: Path to the local directory to crawl. (Required)
 - `crawl_delay`: Artificial delay (in seconds) to simulate for crawl requests.
  (Default: `0.0`)
 - `download_delay`: Artificial delay (in seconds) to simulate for download
  requests. (Default: `0.0`)
 - `download_speed`: Download speed (in bytes per second) to simulate. (Optional)
 ### The `kit-ipd` crawler
 This crawler crawls a KIT-IPD page by url. The root page can be crawled from
 outside the KIT network so you will be informed about any new/deleted files,
 but downloading files requires you to be within. Adding a show delay between
 requests is likely a good idea.
 - `target`: URL to a KIT-IPD page
 - `link_regex`: A regex that is matched against the `href` part of links. If it
  matches, the given link is downloaded as a file. This is used to extract
  files from KIT-IPD pages. (Default: `^.*?[^/]+\.(pdf|zip|c|cpp|java)$`)
 ### The `kit-ilias-web` crawler
 This crawler crawls the KIT ILIAS instance.
 ILIAS is not great at handling too many concurrent requests. To avoid
 unnecessary load, please limit `tasks` to `1`.
 There is a spike in ILIAS usage at the beginning of lectures, so please don't
 run PFERD during those times.
 If you're automatically running PFERD periodically (e. g. via cron or a systemd
 timer), please randomize the start time or at least don't use the full hour. For
 systemd timers, this can be accomplished using the `RandomizedDelaySec` option.
 Also, please schedule the script to run in periods of low activity. Running the
 script once per day should be fine.
 - `target`: The ILIAS element to crawl. (Required)
    - `desktop`: Crawl your personal desktop
    - `<course id>`: Crawl the course with the given id
    - `<url>`: Crawl a given element by URL (preferably the permanent URL linked
      at the bottom of its ILIAS page)
 - `auth`: Name of auth section to use for login. (Required)
 - `tfa_auth`: Name of auth section to use for two-factor authentication. Only
  uses the auth section's password. (Default: Anonymous `tfa` authenticator)
 - `links`: How to represent external links. (Default: `fancy`)
    - `ignore`: Don't download links.
    - `plaintext`: A text file containing only the URL.
    - `fancy`: A HTML file looking like the ILIAS link element.
    - `internet-shortcut`: An internet shortcut file (`.url` file).
 - `link_redirect_delay`: Time (in seconds) until `fancy` link files will
  redirect to the actual URL. Set to a negative value to disable the automatic
  redirect. (Default: `-1`)
 - `videos`: Whether to download videos. (Default: `no`)
 - `forums`: Whether to download forum threads. (Default: `no`)
 - `http_timeout`: The timeout (in seconds) for all HTTP requests. (Default:
  `20.0`)
 ## Authenticator types
 ### The `simple` authenticator
 With this authenticator, the username and password can be set directly in the
 config file. If the username or password are not specified, the user is prompted
 via the terminal.
 - `username`: The username. (Optional)
 - `password`: The password. (Optional)
 ### The `credential-file` authenticator
 This authenticator reads a username and a password from a credential file.
 - `path`: Path to the credential file. (Required)
 The credential file has exactly two lines (trailing newline optional). The first
 line starts with `username=` and contains the username, the second line starts
 with `password=` and contains the password. The username and password may
 contain any characters except a line break.
 ```
 username=AzureDiamond
 password=hunter2
 ```
 ### The `keyring` authenticator
 This authenticator uses the system keyring to store passwords. The username can
 be set directly in the config file. If the username is not specified, the user
 is prompted via the terminal. If the keyring contains no entry or the entry is
 incorrect, the user is prompted for a password via the terminal and the password
 is stored in the keyring.
 - `username`: The username. (Optional)
 - `keyring_name`: The service name PFERD uses for storing credentials. (Default:
  `PFERD`)
 ### The `pass` authenticator
 This authenticator queries the [`pass` password manager][3] for a username and
 password. It tries to be mostly compatible with [browserpass][4] and
 [passff][5], so see those links for an overview of the format. If PFERD fails
 to load your password, you can use the `--explain` flag to see why.
 - `passname`: The name of the password to use (Required)
 - `username_prefixes`: A comma-separated list of username line prefixes
  (Default: `login,username,user`)
 - `password_prefixes`: A comma-separated list of password line prefixes
  (Default: `password,pass,secret`)
 [3]: <https://www.passwordstore.org/> "Pass: The Standard Unix Password Manager"
 [4]: <https://github.com/browserpass/browserpass-extension#organizing-password-store> "Organizing password store"
 [5]: <https://github.com/passff/passff#multi-line-format> "Multi-line format"
 ### The `tfa` authenticator
 This authenticator prompts the user on the console for a two-factor
 authentication token. The token is provided as password and it is not cached.
 This authenticator does not support usernames.
 ## Transformation rules
 Transformation rules are rules for renaming and excluding files and directories.
 They are specified line-by-line in a crawler's `transform` option. When a
 crawler needs to apply a rule to a path, it goes through this list top-to-bottom
 and applies the first matching rule.
 To see this process in action, you can use the `--debug-transforms` or flag or
 the `--explain` flag.
 Each rule has the format `SOURCE ARROW TARGET` (e. g. `foo/bar --> foo/baz`).
 The arrow specifies how the source and target are interpreted. The different
 kinds of arrows are documented below.
 `SOURCE` and `TARGET` are either a bunch of characters without spaces (e. g.
 `foo/bar`) or string literals (e. g, `"foo/b a r"`). The former syntax has no
 concept of escaping characters, so the backslash is just another character. The
 string literals however support Python's escape syntax (e. g.
 `"foo\\bar\tbaz"`). This also means that in string literals, backslashes must be
 escaped.
 `TARGET` can additionally be a single exclamation mark `!` (*not* `"!"`). When a
 rule with a `!` as target matches a path, the corresponding file or directory is
 ignored by the crawler instead of renamed.
 `TARGET` can also be omitted entirely. When a rule without target matches a
 path, the path is returned unmodified. This is useful to prevent rules further
 down from matching instead.
 Each arrow's behaviour can be modified slightly by changing the arrow's head
 from `>` to `>>`. When a rule with a `>>` arrow head matches a path, it doesn't
 return immediately like a normal arrow. Instead, it replaces the current path
 with its output and continues on to the next rule. In effect, this means that
 multiple rules can be applied sequentially.
 ### The `-->` arrow
 The `-->` arrow is a basic renaming operation for files and directories. If a
 path matches `SOURCE`, it is renamed to `TARGET`.
 Example: `foo/bar --> baz`
 - Doesn't match `foo`, `a/foo/bar` or `foo/baz`
 - Converts `foo/bar` into `baz`
 - Converts `foo/bar/wargl` into `baz/wargl`
 Example: `foo/bar --> !`
 - Doesn't match `foo`, `a/foo/bar` or `foo/baz`
 - Ignores `foo/bar` and any of its children
 ### The `-name->` arrow
 The `-name->` arrow lets you rename files and directories by their name,
 regardless of where they appear in the file tree. Because of this, its `SOURCE`
 must not contain multiple path segments, only a single name. This restriction
 does not apply to its `TARGET`.
 Example: `foo -name-> bar/baz`
 - Doesn't match `a/foobar/b` or `x/Foo/y/z`
 - Converts `hello/foo` into `hello/bar/baz`
 - Converts `foo/world` into `bar/baz/world`
 - Converts `a/foo/b/c/foo` into `a/bar/baz/b/c/bar/baz`
 Example: `foo -name-> !`
 - Doesn't match `a/foobar/b` or `x/Foo/y/z`
 - Ignores any path containing a segment `foo`
 ### The `-exact->` arrow
 The `-exact->` arrow requires the path to match `SOURCE` exactly. The examples
 below show why this is useful.
 Example: `foo/bar -exact-> baz`
 - Doesn't match `foo`, `a/foo/bar` or `foo/baz`
 - Converts `foo/bar` into `baz`
 - Doesn't match `foo/bar/wargl`
 Example: `foo/bar -exact-> !`
 - Doesn't match `foo`, `a/foo/bar` or `foo/baz`
 - Ignores only `foo/bar`, not its children
 ### The `-re->` arrow
 The `-re->` arrow is like the `-->` arrow but with regular expressions. `SOURCE`
 is a regular expression and `TARGET` an f-string based template. If a path
 matches `SOURCE`, the output path is created using `TARGET` as template.
 `SOURCE` is automatically anchored.
 `TARGET` uses Python's [format string syntax][6]. The *n*-th capturing group can
 be referred to as `{g<n>}` (e.g. `{g3}`). `{g0}` refers to the original path.
 If capturing group *n*'s contents are a valid integer, the integer value is
 available as `{i<n>}` (e.g. `{i3}`). If capturing group *n*'s contents are a
 valid float, the float value is available as `{f<n>}` (e.g. `{f3}`). If a
 capturing group is not present (e.g. when matching the string `cd` with the
 regex `(ab)?cd`), the corresponding variables are not defined.
 Python's format string syntax has rich options for formatting its arguments. For
 example, to left-pad the capturing group 3 with the digit `0` to width 5, you
 can use `{i3:05}`.
 PFERD even allows you to write entire expressions inside the curly braces, for
 example `{g2.lower()}` or `{g3.replace(' ', '_')}`.
 Example: `f(oo+)/be?ar -re-> B{g1.upper()}H/fear`
 - Doesn't match `a/foo/bar`, `foo/abc/bar`, `afoo/bar` or `foo/bars`
 - Converts `foo/bar` into `BOOH/fear`
 - Converts `fooooo/bear` into `BOOOOOH/fear`
 - Converts `foo/bar/baz` into `BOOH/fear/baz`
 [6]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
 ### The `-name-re->` arrow
 The `-name-re>` arrow is like a combination of the `-name->` and `-re->` arrows.
 Example: `(.*)\.jpeg -name-re-> {g1}.jpg`
 - Doesn't match `foo/bar.png`, `baz.JPEG` or `hello,jpeg`
 - Converts `foo/bar.jpeg` into `foo/bar.jpg`
 - Converts `foo.jpeg/bar/baz.jpeg` into `foo.jpg/bar/baz.jpg`
 Example: `\..+ -name-re-> !`
 - Doesn't match `.`, `test`, `a.b`
 - Ignores all files and directories starting with `.`.
 ### The `-exact-re->` arrow
 The `-exact-re>` arrow is like a combination of the `-exact->` and `-re->`
 arrows.
 Example: `f(oo+)/be?ar -exactre-> B{g1.upper()}H/fear`
 - Doesn't match `a/foo/bar`, `foo/abc/bar`, `afoo/bar` or `foo/bars`
 - Converts `foo/bar` into `BOOH/fear`
 - Converts `fooooo/bear` into `BOOOOOH/fear`
 - Doesn't match `foo/bar/baz`
 ### Example: Tutorials
 You have an ILIAS course with lots of tutorials, but are only interested in a
 single one.
 ```
 tutorials/
  |- tut_01/
  |- tut_02/
  |- tut_03/
  ...
 ```
 You can use a mix of normal and exact arrows to get rid of the other ones and
 move the `tutorials/tut_02/` folder to `my_tut/`:
 ```
 tutorials/tut_02 --> my_tut
 tutorials -exact->
 tutorials --> !
 ```
 The second rule is required for many crawlers since they use the rules to decide
 which directories to crawl. If it was missing when the crawler looks at
 `tutorials/`, the third rule would match. This means the crawler would not crawl
 the `tutorials/` directory and thus not discover that `tutorials/tut02/` exists.
 Since the second rule is only relevant for crawling, the `TARGET` is left out.
 ### Example: Lecture slides
 You have a course with slides like `Lecture 3: Linear functions.PDF` and you
 would like to rename them to `03_linear_functions.pdf`.
 ```
 Lectures/
  |- Lecture 1: Introduction.PDF
  |- Lecture 2: Vectors and matrices.PDF
  |- Lecture 3: Linear functions.PDF
  ...
 ```
 To do this, you can use the most powerful of arrows: The regex arrow.
 ```
 "Lectures/Lecture (\\d+): (.*)\\.PDF" -re-> "Lectures/{i1:02}_{g2.lower().replace(' ', '_')}.pdf"
 ```
 Note the escaped backslashes on the `SOURCE` side.
 ### Example: Crawl a Python project
 You are crawling a Python project and want to ignore all hidden files (files
 whose name starts with a `.`), all `__pycache__` directories and all markdown
 files (for some weird reason).
 ```
 .gitignore
 .mypy_cache/
 .venv/
 CONFIG.md
 PFERD/
  |- __init__.py
  |- __main__.py
  |- __pycache__/
  |- authenticator.py
  |- config.py
  ...
 README.md
 ...
 ```
 For this task, the name arrows can be used.
 ```
 \..*        -name-re-> !
 __pycache__ -name->    !
 .*\.md      -name-re-> !
 ```
 ### Example: Clean up names
 You want to convert all paths into lowercase and replace spaces with underscores
 before applying any rules. This can be achieved using the `>>` arrow heads.
 ```
 (.*) -re->> "{g1.lower().replace(' ', '_')}"
 <other rules go here>
 ```
--- a/DEV.md
+++ b/DEV.md
@ -0,0 +1,89 @@
 # PFERD Development Guide
 PFERD is packaged following the [Python Packaging User Guide][ppug] (in
 particular [this][ppug-1] and [this][ppug-2] guide).
 [ppug]: <https://packaging.python.org/> "Python Packaging User Guide"
 [ppug-1]: <https://packaging.python.org/tutorials/packaging-projects/> "Packaging Python Projects"
 [ppug-2]: <https://packaging.python.org/guides/distributing-packages-using-setuptools/> "Packaging and distributing projects"
 ## Setting up a dev environment
 The use of [venv][venv] is recommended. To initially set up a development
 environment, run these commands in the same directory as this file:
 ```
 $ python -m venv .venv
 $ . .venv/bin/activate
 $ ./scripts/setup
 ```
 The setup script installs a few required dependencies and tools. It also
 installs PFERD via `pip install --editable .`, which means that you can just run
 `pferd` as if it was installed normally. Since PFERD was installed with
 `--editable`, there is no need to re-run `pip install` when the source code is
 changed.
 If you get any errors because pip can't update itself, try running
 `./scripts/setup --no-pip` instead of `./scripts/setup`.
 For more details, see [this part of the Python Tutorial][venv-tut] and
 [this section on "development mode"][ppug-dev].
 [venv]: <https://docs.python.org/3/library/venv.html> "venv - Creation of virtual environments"
 [venv-tut]: <https://docs.python.org/3/tutorial/venv.html> "12. Virtual Environments and Packages"
 [ppug-dev]: <https://packaging.python.org/guides/distributing-packages-using-setuptools/#working-in-development-mode> "Working in “development mode”"
 ## Checking and formatting the code
 To run a set of checks against the code, run `./scripts/check` in the repo's
 root directory. This script will run a few tools installed by `./scripts/setup`
 against the entire project.
 To format the code, run `./scripts/format` in the repo's root directory.
 Before committing changes, please make sure the checks return no warnings and
 the code is formatted.
 ## Contributing
 When submitting a PR that adds, changes or modifies a feature, please ensure
 that the corresponding documentation is updated as well. Also, please ensure
 that `./scripts/check` returns no warnings and the code has been run through
 `./scripts/format`.
 In your first PR, please add your name to the `LICENSE` file.
 ## Releasing a new version
 This section describes the steps required to release a new version of PFERD.
 Usually, they don't need to performed manually and `scripts/bump-version` can be
 used instead.
 1. Update the version number in `PFERD/version.py`
 2. Update `CHANGELOG.md`
 3. Commit changes to `master` with message `Bump version to <version>` (e. g. `Bump version to 3.2.5`)
 4. Create annotated tag named `v<version>` (e. g. `v3.2.5`)
    - Copy changes from changelog
    - Remove `#` symbols (which git would interpret as comments)
    - As the first line, add `Version <version> - <date>` (e. g. `Version 3.2.5 - 2021-05-24`)
    - Leave the second line empty
 5. Fast-forward `latest` to `master`
 6. Push `master`, `latest` and the new tag
 Example tag annotation:
 ```
 Version 3.2.5 - 2021-05-24
 Added
 - Support for concurrent downloads
 - Support for proper config files
 - This changelog
 Changed
 - Rewrote almost everything
 - Redesigned CLI
 Removed
 - Backwards compatibility with 2.x
 ```
--- a/4
+++ b/4
@ -1,4 +1,6 @@
-Copyright 2019-2020 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe, Scriptim
+Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
                    TheChristophe, Scriptim, thelukasprobst, Toorero,
                    Mr-Pine
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
--- a/PFERD/init.py
+++ b/PFERD/init.py
@ -1,8 +0,0 @@
 # pylint: disable=invalid-name
 """
 This module exports only what you need for a basic configuration. If you want a
 more complex configuration, you need to import the other submodules manually.
 """
 from .pferd import Pferd
--- a/PFERD/main.py
+++ b/PFERD/main.py
@ -0,0 +1,169 @@
 import argparse
 import asyncio
 import configparser
 import os
 import sys
 from pathlib import Path
 from .auth import AuthLoadError
 from .cli import PARSER, ParserLoadError, load_default_section
 from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
 from .logging import log
 from .pferd import Pferd, PferdLoadError
 from .transformer import RuleParseError
 def load_config_parser(args: argparse.Namespace) -> configparser.ConfigParser:
    log.explain_topic("Loading config")
    parser = configparser.ConfigParser(interpolation=None)
    if args.command is None:
        log.explain("No CLI command specified, loading config from file")
        Config.load_parser(parser, path=args.config)
    else:
        log.explain("CLI command specified, loading config from its arguments")
        if args.command:
            args.command(args, parser)
    load_default_section(args, parser)
    return parser
 def load_config(args: argparse.Namespace) -> Config:
    try:
        return Config(load_config_parser(args))
    except ConfigLoadError as e:
        log.error(str(e))
        log.error_contd(e.reason)
        sys.exit(1)
    except ParserLoadError as e:
        log.error(str(e))
        sys.exit(1)
 def configure_logging_from_args(args: argparse.Namespace) -> None:
    if args.explain is not None:
        log.output_explain = args.explain
    if args.status is not None:
        log.output_status = args.status
    if args.show_not_deleted is not None:
        log.output_not_deleted = args.show_not_deleted
    if args.report is not None:
        log.output_report = args.report
    # We want to prevent any unnecessary output if we're printing the config to
    # stdout, otherwise it would not be a valid config file.
    if args.dump_config_to == "-":
        log.output_explain = False
        log.output_status = False
        log.output_report = False
 def configure_logging_from_config(args: argparse.Namespace, config: Config) -> None:
    # In configure_logging_from_args(), all normal logging is already disabled
    # whenever we dump the config. We don't want to override that decision with
    # values from the config file.
    if args.dump_config_to == "-":
        return
    try:
        if args.explain is None:
            log.output_explain = config.default_section.explain()
        if args.status is None:
            log.output_status = config.default_section.status()
        if args.report is None:
            log.output_report = config.default_section.report()
        if args.show_not_deleted is None:
            log.output_not_deleted = config.default_section.show_not_deleted()
    except ConfigOptionError as e:
        log.error(str(e))
        sys.exit(1)
 def dump_config(args: argparse.Namespace, config: Config) -> None:
    log.explain_topic("Dumping config")
    if args.dump_config and args.dump_config_to is not None:
        log.error("--dump-config and --dump-config-to can't be specified at the same time")
        sys.exit(1)
    try:
        if args.dump_config:
            config.dump()
        elif args.dump_config_to == "-":
            config.dump_to_stdout()
        else:
            config.dump(Path(args.dump_config_to))
    except ConfigDumpError as e:
        log.error(str(e))
        log.error_contd(e.reason)
        sys.exit(1)
 def main() -> None:
    args = PARSER.parse_args()
    # Configuring logging happens in two stages because CLI args have
    # precedence over config file options and loading the config already
    # produces some kinds of log messages (usually only explain()-s).
    configure_logging_from_args(args)
    config = load_config(args)
    # Now, after loading the config file, we can apply its logging settings in
    # all places that were not already covered by CLI args.
    configure_logging_from_config(args, config)
    if args.dump_config or args.dump_config_to is not None:
        dump_config(args, config)
        sys.exit()
    try:
        pferd = Pferd(config, args.crawler, args.skip)
    except PferdLoadError as e:
        log.unlock()
        log.error(str(e))
        sys.exit(1)
    try:
        if os.name == "nt":
            # A "workaround" for the windows event loop somehow crashing after
            # asyncio.run() completes. See:
            # https://bugs.python.org/issue39232
            # https://github.com/encode/httpx/issues/914#issuecomment-780023632
            # TODO Fix this properly
            loop = asyncio.get_event_loop()
            loop.run_until_complete(pferd.run(args.debug_transforms))
            loop.run_until_complete(asyncio.sleep(1))
            loop.close()
        else:
            asyncio.run(pferd.run(args.debug_transforms))
    except (ConfigOptionError, AuthLoadError) as e:
        log.unlock()
        log.error(str(e))
        sys.exit(1)
    except RuleParseError as e:
        log.unlock()
        e.pretty_print()
        sys.exit(1)
    except KeyboardInterrupt:
        log.unlock()
        log.explain_topic("Interrupted, exiting immediately")
        log.explain("Open files and connections are left for the OS to clean up")
        pferd.print_report()
        # TODO Clean up tmp files
        # And when those files *do* actually get cleaned up properly,
        # reconsider if this should really exit with 1
        sys.exit(1)
    except Exception:
        log.unlock()
        log.unexpected_exception()
        pferd.print_report()
        sys.exit(1)
    else:
        pferd.print_report()
 if __name__ == "__main__":
    main()
--- a/PFERD/auth/init.py
+++ b/PFERD/auth/init.py
@ -0,0 +1,29 @@
 from configparser import SectionProxy
 from typing import Callable, Dict
 from ..config import Config
 from .authenticator import Authenticator, AuthError, AuthLoadError, AuthSection  # noqa: F401
 from .credential_file import CredentialFileAuthenticator, CredentialFileAuthSection
 from .keyring import KeyringAuthenticator, KeyringAuthSection
 from .pass_ import PassAuthenticator, PassAuthSection
 from .simple import SimpleAuthenticator, SimpleAuthSection
 from .tfa import TfaAuthenticator
 AuthConstructor = Callable[[
    str,                # Name (without the "auth:" prefix)
    SectionProxy,       # Authenticator's section of global config
    Config,             # Global config
 ], Authenticator]
 AUTHENTICATORS: Dict[str, AuthConstructor] = {
    "credential-file": lambda n, s, c:
        CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
    "keyring": lambda n, s, c:
        KeyringAuthenticator(n, KeyringAuthSection(s)),
    "pass": lambda n, s, c:
        PassAuthenticator(n, PassAuthSection(s)),
    "simple": lambda n, s, c:
        SimpleAuthenticator(n, SimpleAuthSection(s)),
    "tfa": lambda n, s, c:
        TfaAuthenticator(n),
 }
--- a/PFERD/auth/authenticator.py
+++ b/PFERD/auth/authenticator.py
@ -0,0 +1,80 @@
 from abc import ABC, abstractmethod
 from typing import Tuple
 from ..config import Section
 class AuthLoadError(Exception):
    pass
 class AuthError(Exception):
    pass
 class AuthSection(Section):
    def type(self) -> str:
        value = self.s.get("type")
        if value is None:
            self.missing_value("type")
        return value
 class Authenticator(ABC):
    def __init__(self, name: str) -> None:
        """
        Initialize an authenticator from its name and its section in the config
        file.
        If you are writing your own constructor for your own authenticator,
        make sure to call this constructor first (via super().__init__).
        May throw an AuthLoadError.
        """
        self.name = name
    @abstractmethod
    async def credentials(self) -> Tuple[str, str]:
        pass
    async def username(self) -> str:
        username, _ = await self.credentials()
        return username
    async def password(self) -> str:
        _, password = await self.credentials()
        return password
    def invalidate_credentials(self) -> None:
        """
        Tell the authenticator that some or all of its credentials are invalid.
        Authenticators should overwrite this function if they have a way to
        deal with this issue that is likely to result in valid credentials
        (e. g. prompting the user).
        """
        raise AuthError("Invalid credentials")
    def invalidate_username(self) -> None:
        """
        Tell the authenticator that specifically its username is invalid.
        Authenticators should overwrite this function if they have a way to
        deal with this issue that is likely to result in valid credentials
        (e. g. prompting the user).
        """
        raise AuthError("Invalid username")
    def invalidate_password(self) -> None:
        """
        Tell the authenticator that specifically its password is invalid.
        Authenticators should overwrite this function if they have a way to
        deal with this issue that is likely to result in valid credentials
        (e. g. prompting the user).
        """
        raise AuthError("Invalid password")
--- a/PFERD/auth/credential_file.py
+++ b/PFERD/auth/credential_file.py
@ -0,0 +1,46 @@
 from pathlib import Path
 from typing import Tuple
 from ..config import Config
 from ..utils import fmt_real_path
 from .authenticator import Authenticator, AuthLoadError, AuthSection
 class CredentialFileAuthSection(AuthSection):
    def path(self) -> Path:
        value = self.s.get("path")
        if value is None:
            self.missing_value("path")
        return Path(value)
 class CredentialFileAuthenticator(Authenticator):
    def __init__(self, name: str, section: CredentialFileAuthSection, config: Config) -> None:
        super().__init__(name)
        path = config.default_section.working_dir() / section.path()
        try:
            with open(path, encoding="utf-8") as f:
                lines = list(f)
        except UnicodeDecodeError:
            raise AuthLoadError(f"Credential file at {fmt_real_path(path)} is not encoded using UTF-8")
        except OSError as e:
            raise AuthLoadError(f"No credential file at {fmt_real_path(path)}") from e
        if len(lines) != 2:
            raise AuthLoadError("Credential file must be two lines long")
        [uline, pline] = lines
        uline = uline[:-1]  # Remove trailing newline
        if pline.endswith("\n"):
            pline = pline[:-1]
        if not uline.startswith("username="):
            raise AuthLoadError("First line must start with 'username='")
        if not pline.startswith("password="):
            raise AuthLoadError("Second line must start with 'password='")
        self._username = uline[9:]
        self._password = pline[9:]
    async def credentials(self) -> Tuple[str, str]:
        return self._username, self._password
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@ -0,0 +1,65 @@
 from typing import Optional, Tuple
 import keyring
 from ..logging import log
 from ..utils import agetpass, ainput
 from ..version import NAME
 from .authenticator import Authenticator, AuthError, AuthSection
 class KeyringAuthSection(AuthSection):
    def username(self) -> Optional[str]:
        return self.s.get("username")
    def keyring_name(self) -> str:
        return self.s.get("keyring_name", fallback=NAME)
 class KeyringAuthenticator(Authenticator):
    def __init__(self, name: str, section: KeyringAuthSection) -> None:
        super().__init__(name)
        self._username = section.username()
        self._password: Optional[str] = None
        self._keyring_name = section.keyring_name()
        self._password_invalidated = False
        self._username_fixed = section.username() is not None
    async def credentials(self) -> Tuple[str, str]:
        # Request the username
        if self._username is None:
            async with log.exclusive_output():
                self._username = await ainput("Username: ")
        # First try looking it up in the keyring.
        # Do not look it up if it was invalidated - we want to re-prompt in this case
        if self._password is None and not self._password_invalidated:
            self._password = keyring.get_password(self._keyring_name, self._username)
        # If that fails it wasn't saved in the keyring - we need to
        # read it from the user and store it
        if self._password is None:
            async with log.exclusive_output():
                self._password = await agetpass("Password: ")
                keyring.set_password(self._keyring_name, self._username, self._password)
        self._password_invalidated = False
        return self._username, self._password
    def invalidate_credentials(self) -> None:
        if not self._username_fixed:
            self.invalidate_username()
        self.invalidate_password()
    def invalidate_username(self) -> None:
        if self._username_fixed:
            raise AuthError("Configured username is invalid")
        else:
            self._username = None
    def invalidate_password(self) -> None:
        self._password = None
        self._password_invalidated = True
--- a/PFERD/auth/pass_.py
+++ b/PFERD/auth/pass_.py
@ -0,0 +1,98 @@
 import re
 import subprocess
 from typing import List, Tuple
 from ..logging import log
 from .authenticator import Authenticator, AuthError, AuthSection
 class PassAuthSection(AuthSection):
    def passname(self) -> str:
        if (value := self.s.get("passname")) is None:
            self.missing_value("passname")
        return value
    def username_prefixes(self) -> List[str]:
        value = self.s.get("username_prefixes", "login,username,user")
        return [prefix.lower() for prefix in value.split(",")]
    def password_prefixes(self) -> List[str]:
        value = self.s.get("password_prefixes", "password,pass,secret")
        return [prefix.lower() for prefix in value.split(",")]
 class PassAuthenticator(Authenticator):
    PREFIXED_LINE_RE = r"([a-zA-Z]+):\s?(.*)"  # to be used with fullmatch
    def __init__(self, name: str, section: PassAuthSection) -> None:
        super().__init__(name)
        self._passname = section.passname()
        self._username_prefixes = section.username_prefixes()
        self._password_prefixes = section.password_prefixes()
    async def credentials(self) -> Tuple[str, str]:
        log.explain_topic("Obtaining credentials from pass")
        try:
            log.explain(f"Calling 'pass show {self._passname}'")
            result = subprocess.check_output(["pass", "show", self._passname], text=True)
        except subprocess.CalledProcessError as e:
            raise AuthError(f"Failed to get password info from {self._passname}: {e}")
        prefixed = {}
        unprefixed = []
        for line in result.strip().splitlines():
            if match := re.fullmatch(self.PREFIXED_LINE_RE, line):
                prefix = match.group(1).lower()
                value = match.group(2)
                log.explain(f"Found prefixed line {line!r} with prefix {prefix!r}, value {value!r}")
                if prefix in prefixed:
                    raise AuthError(f"Prefix {prefix} specified multiple times")
                prefixed[prefix] = value
            else:
                log.explain(f"Found unprefixed line {line!r}")
                unprefixed.append(line)
        username = None
        for prefix in self._username_prefixes:
            log.explain(f"Looking for username at prefix {prefix!r}")
            if prefix in prefixed:
                username = prefixed[prefix]
                log.explain(f"Found username {username!r}")
                break
        password = None
        for prefix in self._password_prefixes:
            log.explain(f"Looking for password at prefix {prefix!r}")
            if prefix in prefixed:
                password = prefixed[prefix]
                log.explain(f"Found password {password!r}")
                break
        if password is None and username is None:
            log.explain("No username and password found so far")
            log.explain("Using first unprefixed line as password")
            log.explain("Using second unprefixed line as username")
        elif password is None:
            log.explain("No password found so far")
            log.explain("Using first unprefixed line as password")
        elif username is None:
            log.explain("No username found so far")
            log.explain("Using first unprefixed line as username")
        if password is None:
            if not unprefixed:
                log.explain("Not enough unprefixed lines left")
                raise AuthError("Password could not be determined")
            password = unprefixed.pop(0)
            log.explain(f"Found password {password!r}")
        if username is None:
            if not unprefixed:
                log.explain("Not enough unprefixed lines left")
                raise AuthError("Username could not be determined")
            username = unprefixed.pop(0)
            log.explain(f"Found username {username!r}")
        return username, password
--- a/PFERD/auth/simple.py
+++ b/PFERD/auth/simple.py
@ -0,0 +1,62 @@
 from typing import Optional, Tuple
 from ..logging import log
 from ..utils import agetpass, ainput
 from .authenticator import Authenticator, AuthError, AuthSection
 class SimpleAuthSection(AuthSection):
    def username(self) -> Optional[str]:
        return self.s.get("username")
    def password(self) -> Optional[str]:
        return self.s.get("password")
 class SimpleAuthenticator(Authenticator):
    def __init__(self, name: str, section: SimpleAuthSection) -> None:
        super().__init__(name)
        self._username = section.username()
        self._password = section.password()
        self._username_fixed = self.username is not None
        self._password_fixed = self.password is not None
    async def credentials(self) -> Tuple[str, str]:
        if self._username is not None and self._password is not None:
            return self._username, self._password
        async with log.exclusive_output():
            if self._username is None:
                self._username = await ainput("Username: ")
            else:
                print(f"Username: {self._username}")
            if self._password is None:
                self._password = await agetpass("Password: ")
            # Intentionally returned inside the context manager so we know
            # they're both not None
            return self._username, self._password
    def invalidate_credentials(self) -> None:
        if self._username_fixed and self._password_fixed:
            raise AuthError("Configured credentials are invalid")
        if not self._username_fixed:
            self._username = None
        if not self._password_fixed:
            self._password = None
    def invalidate_username(self) -> None:
        if self._username_fixed:
            raise AuthError("Configured username is invalid")
        else:
            self._username = None
    def invalidate_password(self) -> None:
        if self._password_fixed:
            raise AuthError("Configured password is invalid")
        else:
            self._password = None
--- a/PFERD/auth/tfa.py
+++ b/PFERD/auth/tfa.py
@ -0,0 +1,30 @@
 from typing import Tuple
 from ..logging import log
 from ..utils import ainput
 from .authenticator import Authenticator, AuthError
 class TfaAuthenticator(Authenticator):
    def __init__(self, name: str) -> None:
        super().__init__(name)
    async def username(self) -> str:
        raise AuthError("TFA authenticator does not support usernames")
    async def password(self) -> str:
        async with log.exclusive_output():
            code = await ainput("TFA code: ")
            return code
    async def credentials(self) -> Tuple[str, str]:
        raise AuthError("TFA authenticator does not support usernames")
    def invalidate_username(self) -> None:
        raise AuthError("TFA authenticator does not support usernames")
    def invalidate_password(self) -> None:
        pass
    def invalidate_credentials(self) -> None:
        pass
--- a/PFERD/authenticators.py
+++ b/PFERD/authenticators.py
@ -1,214 +0,0 @@
 """
 General authenticators useful in many situations
 """
 import getpass
 import logging
 from typing import Optional, Tuple
 from .logging import PrettyLogger
 LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
 try:
    import keyring
 except ImportError:
    pass
 class TfaAuthenticator:
    # pylint: disable=too-few-public-methods
    """
    An authenticator for a TFA token. Always prompts the user, as the token can not be cached.
    """
    def __init__(self, reason: str):
        """
        Create a new tfa authenticator.
        Arguments:
            reason {str} -- the reason for obtaining the credentials
        """
        self._reason = reason
    def get_token(self) -> str:
        # pylint: disable=no-self-use
        """
        Prompts the user for the token and returns it.
        """
        print(f"Enter credentials ({self._reason})")
        return getpass.getpass("TFA Token: ")
 class UserPassAuthenticator:
    """
    An authenticator for username-password combinations that prompts the user
    for missing information.
    """
    def __init__(
            self,
            reason: str,
            username: Optional[str] = None,
            password: Optional[str] = None,
    ) -> None:
        """
        reason   - what the credentials are used for
        username - the username (if already known)
        password - the password (if already known)
        """
        self._reason = reason
        self._given_username = username
        self._given_password = password
        self._username = username
        self._password = password
    def get_credentials(self) -> Tuple[str, str]:
        """
        Returns a tuple (username, password). Prompts user for username or
        password when necessary.
        """
        if self._username is None and self._given_username is not None:
            self._username = self._given_username
        if self._password is None and self._given_password is not None:
            self._password = self._given_password
        if self._username is None or self._password is None:
            print(f"Enter credentials ({self._reason})")
        username: str
        if self._username is None:
            username = input("Username: ")
            self._username = username
        else:
            username = self._username
        password: str
        if self._password is None:
            password = getpass.getpass(prompt="Password: ")
            self._password = password
        else:
            password = self._password
        return (username, password)
    @property
    def username(self) -> str:
        """
        The username. Accessing this property may cause the authenticator to
        prompt the user.
        """
        (username, _) = self.get_credentials()
        return username
    @property
    def password(self) -> str:
        """
        The password. Accessing this property may cause the authenticator to
        prompt the user.
        """
        (_, password) = self.get_credentials()
        return password
    def invalidate_credentials(self) -> None:
        """
        Marks the credentials as invalid. If only a username was supplied in
        the constructor, assumes that the username is valid and only the
        password is invalid. If only a password was supplied in the
        constructor, assumes that the password is valid and only the username
        is invalid. Otherwise, assumes that username and password are both
        invalid.
        """
        self._username = None
        self._password = None
        if self._given_username is not None and self._given_password is not None:
            self._given_username = None
            self._given_password = None
 class KeyringAuthenticator(UserPassAuthenticator):
    """
    An authenticator for username-password combinations that stores the
    password using the system keyring service and prompts the user for missing
    information.
    """
    def get_credentials(self) -> Tuple[str, str]:
        """
        Returns a tuple (username, password). Prompts user for username or
        password when necessary.
        """
        if self._username is None and self._given_username is not None:
            self._username = self._given_username
        if self._password is None and self._given_password is not None:
            self._password = self._given_password
        if self._username is not None and self._password is None:
            self._load_password()
        if self._username is None or self._password is None:
            print(f"Enter credentials ({self._reason})")
        username: str
        if self._username is None:
            username = input("Username: ")
            self._username = username
        else:
            username = self._username
        if self._password is None:
            self._load_password()
        password: str
        if self._password is None:
            password = getpass.getpass(prompt="Password: ")
            self._password = password
            self._save_password()
        else:
            password = self._password
        return (username, password)
    def _load_password(self) -> None:
        """
        Loads the saved password associated with self._username from the system
        keyring service (or None if not password has been saved yet) and stores
        it in self._password.
        """
        self._password = keyring.get_password("pferd-ilias", self._username)
    def _save_password(self) -> None:
        """
        Saves self._password to the system keyring service and associates it
        with self._username.
        """
        keyring.set_password("pferd-ilias", self._username, self._password)
    def invalidate_credentials(self) -> None:
        """
        Marks the credentials as invalid. If only a username was supplied in
        the constructor, assumes that the username is valid and only the
        password is invalid. If only a password was supplied in the
        constructor, assumes that the password is valid and only the username
        is invalid. Otherwise, assumes that username and password are both
        invalid.
        """
        try:
            keyring.delete_password("pferd-ilias", self._username)
        except keyring.errors.PasswordDeleteError:
            pass
        super().invalidate_credentials()
--- a/PFERD/cli/init.py
+++ b/PFERD/cli/init.py
@ -0,0 +1,13 @@
 # isort: skip_file
 # The order of imports matters because each command module registers itself
 # with the parser from ".parser" and the import order affects the order in
 # which they appear in the help. Because of this, isort is disabled for this
 # file. Also, since we're reexporting or just using the side effect of
 # importing itself, we get a few linting warnings, which we're disabling as
 # well.
 from . import command_local  # noqa: F401 imported but unused
 from . import command_kit_ilias_web  # noqa: F401 imported but unused
 from . import command_kit_ipd  # noqa: F401 imported but unused
 from .parser import PARSER, ParserLoadError, load_default_section  # noqa: F401 imported but unused
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@ -0,0 +1,120 @@
 import argparse
 import configparser
 from pathlib import Path
 from ..crawl.ilias.file_templates import Links
 from ..logging import log
 from .parser import (CRAWLER_PARSER, SUBPARSERS, BooleanOptionalAction, ParserLoadError, load_crawler,
                     show_value_error)
 SUBPARSER = SUBPARSERS.add_parser(
    "kit-ilias-web",
    parents=[CRAWLER_PARSER],
 )
 GROUP = SUBPARSER.add_argument_group(
    title="kit-ilias-web crawler arguments",
    description="arguments for the 'kit-ilias-web' crawler",
 )
 GROUP.add_argument(
    "target",
    type=str,
    metavar="TARGET",
    help="course id, 'desktop', or ILIAS URL to crawl"
 )
 GROUP.add_argument(
    "output",
    type=Path,
    metavar="OUTPUT",
    help="output directory"
 )
 GROUP.add_argument(
    "--username", "-u",
    type=str,
    metavar="USERNAME",
    help="user name for authentication"
 )
 GROUP.add_argument(
    "--keyring",
    action=BooleanOptionalAction,
    help="use the system keyring to store and retrieve passwords"
 )
 GROUP.add_argument(
    "--credential-file",
    type=Path,
    metavar="PATH",
    help="read username and password from a credential file"
 )
 GROUP.add_argument(
    "--links",
    type=show_value_error(Links.from_string),
    metavar="OPTION",
    help="how to represent external links"
 )
 GROUP.add_argument(
    "--link-redirect-delay",
    type=int,
    metavar="SECONDS",
    help="time before 'fancy' links redirect to to their target (-1 to disable)"
 )
 GROUP.add_argument(
    "--videos",
    action=BooleanOptionalAction,
    help="crawl and download videos"
 )
 GROUP.add_argument(
    "--forums",
    action=BooleanOptionalAction,
    help="crawl and download forum posts"
 )
 GROUP.add_argument(
    "--http-timeout", "-t",
    type=float,
    metavar="SECONDS",
    help="timeout for all HTTP requests"
 )
 def load(
        args: argparse.Namespace,
        parser: configparser.ConfigParser,
 ) -> None:
    log.explain("Creating config for command 'kit-ilias-web'")
    parser["crawl:ilias"] = {}
    section = parser["crawl:ilias"]
    load_crawler(args, section)
    section["type"] = "kit-ilias-web"
    section["target"] = str(args.target)
    section["output_dir"] = str(args.output)
    section["auth"] = "auth:ilias"
    if args.links is not None:
        section["links"] = str(args.links.value)
    if args.link_redirect_delay is not None:
        section["link_redirect_delay"] = str(args.link_redirect_delay)
    if args.videos is not None:
        section["videos"] = "yes" if args.videos else "no"
    if args.forums is not None:
        section["forums"] = "yes" if args.forums else "no"
    if args.http_timeout is not None:
        section["http_timeout"] = str(args.http_timeout)
    parser["auth:ilias"] = {}
    auth_section = parser["auth:ilias"]
    if args.credential_file is not None:
        if args.username is not None:
            raise ParserLoadError("--credential-file and --username can't be used together")
        if args.keyring:
            raise ParserLoadError("--credential-file and --keyring can't be used together")
        auth_section["type"] = "credential-file"
        auth_section["path"] = str(args.credential_file)
    elif args.keyring:
        auth_section["type"] = "keyring"
    else:
        auth_section["type"] = "simple"
    if args.username is not None:
        auth_section["username"] = args.username
 SUBPARSER.set_defaults(command=load)
--- a/PFERD/cli/command_kit_ipd.py
+++ b/PFERD/cli/command_kit_ipd.py
@ -0,0 +1,54 @@
 import argparse
 import configparser
 from pathlib import Path
 from ..logging import log
 from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
 SUBPARSER = SUBPARSERS.add_parser(
    "kit-ipd",
    parents=[CRAWLER_PARSER],
 )
 GROUP = SUBPARSER.add_argument_group(
    title="kit ipd crawler arguments",
    description="arguments for the 'kit-ipd' crawler",
 )
 GROUP.add_argument(
    "--link-regex",
    type=str,
    metavar="REGEX",
    help="href-matching regex to identify downloadable files"
 )
 GROUP.add_argument(
    "target",
    type=str,
    metavar="TARGET",
    help="url to crawl"
 )
 GROUP.add_argument(
    "output",
    type=Path,
    metavar="OUTPUT",
    help="output directory"
 )
 def load(
        args: argparse.Namespace,
        parser: configparser.ConfigParser,
 ) -> None:
    log.explain("Creating config for command 'kit-ipd'")
    parser["crawl:kit-ipd"] = {}
    section = parser["crawl:kit-ipd"]
    load_crawler(args, section)
    section["type"] = "kit-ipd"
    section["target"] = str(args.target)
    section["output_dir"] = str(args.output)
    if args.link_regex:
        section["link_regex"] = str(args.link_regex)
 SUBPARSER.set_defaults(command=load)
--- a/PFERD/cli/command_local.py
+++ b/PFERD/cli/command_local.py
@ -0,0 +1,70 @@
 import argparse
 import configparser
 from pathlib import Path
 from ..logging import log
 from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
 SUBPARSER = SUBPARSERS.add_parser(
    "local",
    parents=[CRAWLER_PARSER],
 )
 GROUP = SUBPARSER.add_argument_group(
    title="local crawler arguments",
    description="arguments for the 'local' crawler",
 )
 GROUP.add_argument(
    "target",
    type=Path,
    metavar="TARGET",
    help="directory to crawl"
 )
 GROUP.add_argument(
    "output",
    type=Path,
    metavar="OUTPUT",
    help="output directory"
 )
 GROUP.add_argument(
    "--crawl-delay",
    type=float,
    metavar="SECONDS",
    help="artificial delay to simulate for crawl requests"
 )
 GROUP.add_argument(
    "--download-delay",
    type=float,
    metavar="SECONDS",
    help="artificial delay to simulate for download requests"
 )
 GROUP.add_argument(
    "--download-speed",
    type=int,
    metavar="BYTES_PER_SECOND",
    help="download speed to simulate"
 )
 def load(
        args: argparse.Namespace,
        parser: configparser.ConfigParser,
 ) -> None:
    log.explain("Creating config for command 'local'")
    parser["crawl:local"] = {}
    section = parser["crawl:local"]
    load_crawler(args, section)
    section["type"] = "local"
    section["target"] = str(args.target)
    section["output_dir"] = str(args.output)
    if args.crawl_delay is not None:
        section["crawl_delay"] = str(args.crawl_delay)
    if args.download_delay is not None:
        section["download_delay"] = str(args.download_delay)
    if args.download_speed is not None:
        section["download_speed"] = str(args.download_speed)
 SUBPARSER.set_defaults(command=load)
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@ -0,0 +1,245 @@
 import argparse
 import configparser
 from argparse import ArgumentTypeError
 from pathlib import Path
 from typing import Any, Callable, List, Optional, Sequence, Union
 from ..output_dir import OnConflict, Redownload
 from ..version import NAME, VERSION
 class ParserLoadError(Exception):
    pass
 # TODO Replace with argparse version when updating to 3.9?
 class BooleanOptionalAction(argparse.Action):
    def __init__(
            self,
            option_strings: List[str],
            dest: Any,
            default: Any = None,
            type: Any = None,
            choices: Any = None,
            required: Any = False,
            help: Any = None,
            metavar: Any = None,
    ):
        if len(option_strings) != 1:
            raise ValueError("There must be exactly one option string")
        [self.name] = option_strings
        if not self.name.startswith("--"):
            raise ValueError(f"{self.name!r} doesn't start with '--'")
        if self.name.startswith("--no-"):
            raise ValueError(f"{self.name!r} starts with '--no-'")
        options = [self.name, "--no-" + self.name[2:]]
        super().__init__(
            options,
            dest,
            nargs=0,
            default=default,
            type=type,
            choices=choices,
            required=required,
            help=help,
            metavar=metavar,
        )
    def __call__(
            self,
            parser: argparse.ArgumentParser,
            namespace: argparse.Namespace,
            values: Union[str, Sequence[Any], None],
            option_string: Optional[str] = None,
    ) -> None:
        if option_string and option_string in self.option_strings:
            value = not option_string.startswith("--no-")
            setattr(namespace, self.dest, value)
    def format_usage(self) -> str:
        return "--[no-]" + self.name[2:]
 def show_value_error(inner: Callable[[str], Any]) -> Callable[[str], Any]:
    """
    Some validation functions (like the from_string in our enums) raise a ValueError.
    Argparse only pretty-prints ArgumentTypeErrors though, so we need to wrap our ValueErrors.
    """
    def wrapper(input: str) -> Any:
        try:
            return inner(input)
        except ValueError as e:
            raise ArgumentTypeError(e)
    return wrapper
 CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
 CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
    title="general crawler arguments",
    description="arguments common to all crawlers",
 )
 CRAWLER_PARSER_GROUP.add_argument(
    "--redownload", "-r",
    type=show_value_error(Redownload.from_string),
    metavar="OPTION",
    help="when to download a file that's already present locally"
 )
 CRAWLER_PARSER_GROUP.add_argument(
    "--on-conflict",
    type=show_value_error(OnConflict.from_string),
    metavar="OPTION",
    help="what to do when local and remote files or directories differ"
 )
 CRAWLER_PARSER_GROUP.add_argument(
    "--transform", "-T",
    action="append",
    type=str,
    metavar="RULE",
    help="add a single transformation rule. Can be specified multiple times"
 )
 CRAWLER_PARSER_GROUP.add_argument(
    "--tasks", "-n",
    type=int,
    metavar="N",
    help="maximum number of concurrent tasks (crawling, downloading)"
 )
 CRAWLER_PARSER_GROUP.add_argument(
    "--downloads", "-N",
    type=int,
    metavar="N",
    help="maximum number of tasks that may download data at the same time"
 )
 CRAWLER_PARSER_GROUP.add_argument(
    "--task-delay", "-d",
    type=float,
    metavar="SECONDS",
    help="time the crawler should wait between subsequent tasks"
 )
 CRAWLER_PARSER_GROUP.add_argument(
    "--windows-paths",
    action=BooleanOptionalAction,
    help="whether to repair invalid paths on windows"
 )
 def load_crawler(
        args: argparse.Namespace,
        section: configparser.SectionProxy,
 ) -> None:
    if args.redownload is not None:
        section["redownload"] = args.redownload.value
    if args.on_conflict is not None:
        section["on_conflict"] = args.on_conflict.value
    if args.transform is not None:
        section["transform"] = "\n" + "\n".join(args.transform)
    if args.tasks is not None:
        section["tasks"] = str(args.tasks)
    if args.downloads is not None:
        section["downloads"] = str(args.downloads)
    if args.task_delay is not None:
        section["task_delay"] = str(args.task_delay)
    if args.windows_paths is not None:
        section["windows_paths"] = "yes" if args.windows_paths else "no"
 PARSER = argparse.ArgumentParser()
 PARSER.set_defaults(command=None)
 PARSER.add_argument(
    "--version",
    action="version",
    version=f"{NAME} {VERSION} (https://github.com/Garmelon/PFERD)",
 )
 PARSER.add_argument(
    "--config", "-c",
    type=Path,
    metavar="PATH",
    help="custom config file"
 )
 PARSER.add_argument(
    "--dump-config",
    action="store_true",
    help="dump current configuration to the default config path and exit"
 )
 PARSER.add_argument(
    "--dump-config-to",
    metavar="PATH",
    help="dump current configuration to a file and exit."
    " Use '-' as path to print to stdout instead"
 )
 PARSER.add_argument(
    "--debug-transforms",
    action="store_true",
    help="apply transform rules to files of previous run"
 )
 PARSER.add_argument(
    "--crawler", "-C",
    action="append",
    type=str,
    metavar="NAME",
    help="only execute a single crawler."
    " Can be specified multiple times to execute multiple crawlers"
 )
 PARSER.add_argument(
    "--skip", "-S",
    action="append",
    type=str,
    metavar="NAME",
    help="don't execute this particular crawler."
    " Can be specified multiple times to skip multiple crawlers"
 )
 PARSER.add_argument(
    "--working-dir",
    type=Path,
    metavar="PATH",
    help="custom working directory"
 )
 PARSER.add_argument(
    "--explain",
    action=BooleanOptionalAction,
    help="log and explain in detail what PFERD is doing"
 )
 PARSER.add_argument(
    "--status",
    action=BooleanOptionalAction,
    help="print status updates while PFERD is crawling"
 )
 PARSER.add_argument(
    "--report",
    action=BooleanOptionalAction,
    help="print a report of all local changes before exiting"
 )
 PARSER.add_argument(
    "--share-cookies",
    action=BooleanOptionalAction,
    help="whether crawlers should share cookies where applicable"
 )
 PARSER.add_argument(
    "--show-not-deleted",
    action=BooleanOptionalAction,
    help="print messages in status and report when PFERD did not delete a local only file"
 )
 def load_default_section(
        args: argparse.Namespace,
        parser: configparser.ConfigParser,
 ) -> None:
    section = parser[parser.default_section]
    if args.working_dir is not None:
        section["working_dir"] = str(args.working_dir)
    if args.explain is not None:
        section["explain"] = "yes" if args.explain else "no"
    if args.status is not None:
        section["status"] = "yes" if args.status else "no"
    if args.report is not None:
        section["report"] = "yes" if args.report else "no"
    if args.share_cookies is not None:
        section["share_cookies"] = "yes" if args.share_cookies else "no"
    if args.show_not_deleted is not None:
        section["show_not_deleted"] = "yes" if args.show_not_deleted else "no"
 SUBPARSERS = PARSER.add_subparsers(title="crawlers")
--- a/PFERD/config.py
+++ b/PFERD/config.py
@ -0,0 +1,193 @@
 import asyncio
 import os
 import sys
 from configparser import ConfigParser, SectionProxy
 from pathlib import Path
 from typing import Any, List, NoReturn, Optional, Tuple
 from rich.markup import escape
 from .logging import log
 from .utils import fmt_real_path, prompt_yes_no
 class ConfigLoadError(Exception):
    """
    Something went wrong while loading the config from a file.
    """
    def __init__(self, path: Path, reason: str):
        super().__init__(f"Failed to load config from {fmt_real_path(path)}")
        self.path = path
        self.reason = reason
 class ConfigOptionError(Exception):
    """
    An option in the config file has an invalid or missing value.
    """
    def __init__(self, section: str, key: str, desc: str):
        super().__init__(f"Section {section!r}, key {key!r}: {desc}")
        self.section = section
        self.key = key
        self.desc = desc
 class ConfigDumpError(Exception):
    def __init__(self, path: Path, reason: str):
        super().__init__(f"Failed to dump config to {fmt_real_path(path)}")
        self.path = path
        self.reason = reason
 class Section:
    """
    Base class for the crawler and auth section classes.
    """
    def __init__(self, section: SectionProxy):
        self.s = section
    def error(self, key: str, desc: str) -> NoReturn:
        raise ConfigOptionError(self.s.name, key, desc)
    def invalid_value(
            self,
            key: str,
            value: Any,
            reason: Optional[str],
    ) -> NoReturn:
        if reason is None:
            self.error(key, f"Invalid value {value!r}")
        else:
            self.error(key, f"Invalid value {value!r}: {reason}")
    def missing_value(self, key: str) -> NoReturn:
        self.error(key, "Missing value")
 class DefaultSection(Section):
    def working_dir(self) -> Path:
        # TODO Change to working dir instead of manually prepending it to paths
        pathstr = self.s.get("working_dir", ".")
        return Path(pathstr).expanduser()
    def explain(self) -> bool:
        return self.s.getboolean("explain", fallback=False)
    def status(self) -> bool:
        return self.s.getboolean("status", fallback=True)
    def report(self) -> bool:
        return self.s.getboolean("report", fallback=True)
    def show_not_deleted(self) -> bool:
        return self.s.getboolean("show_not_deleted", fallback=True)
    def share_cookies(self) -> bool:
        return self.s.getboolean("share_cookies", fallback=True)
 class Config:
    @staticmethod
    def _default_path() -> Path:
        if os.name == "posix":
            return Path("~/.config/PFERD/pferd.cfg").expanduser()
        elif os.name == "nt":
            return Path("~/AppData/Roaming/PFERD/pferd.cfg").expanduser()
        else:
            return Path("~/.pferd.cfg").expanduser()
    def __init__(self, parser: ConfigParser):
        self._parser = parser
        self._default_section = DefaultSection(parser[parser.default_section])
    @property
    def default_section(self) -> DefaultSection:
        return self._default_section
    @staticmethod
    def load_parser(parser: ConfigParser, path: Optional[Path] = None) -> None:
        """
        May throw a ConfigLoadError.
        """
        if path:
            log.explain("Path specified on CLI")
        else:
            log.explain("Using default path")
            path = Config._default_path()
        log.explain(f"Loading {fmt_real_path(path)}")
        # Using config.read_file instead of config.read because config.read
        # would just ignore a missing file and carry on.
        try:
            with open(path, encoding="utf-8") as f:
                parser.read_file(f, source=str(path))
        except FileNotFoundError:
            raise ConfigLoadError(path, "File does not exist")
        except IsADirectoryError:
            raise ConfigLoadError(path, "That's a directory, not a file")
        except PermissionError:
            raise ConfigLoadError(path, "Insufficient permissions")
        except UnicodeDecodeError:
            raise ConfigLoadError(path, "File is not encoded using UTF-8")
    def dump(self, path: Optional[Path] = None) -> None:
        """
        May throw a ConfigDumpError.
        """
        if path:
            log.explain("Using custom path")
        else:
            log.explain("Using default path")
            path = self._default_path()
        log.explain(f"Dumping to {fmt_real_path(path)}")
        log.print(f"[bold bright_cyan]Dumping[/] to {escape(fmt_real_path(path))}")
        try:
            path.parent.mkdir(parents=True, exist_ok=True)
        except PermissionError:
            raise ConfigDumpError(path, "Could not create parent directory")
        try:
            # Ensuring we don't accidentally overwrite any existing files by
            # always asking before overwriting a file.
            try:
                # x = open for exclusive creation, failing if the file already
                # exists
                with open(path, "x", encoding="utf-8") as f:
                    self._parser.write(f)
            except FileExistsError:
                print("That file already exists.")
                if asyncio.run(prompt_yes_no("Overwrite it?", default=False)):
                    with open(path, "w", encoding="utf-8") as f:
                        self._parser.write(f)
                else:
                    raise ConfigDumpError(path, "File already exists")
        except IsADirectoryError:
            raise ConfigDumpError(path, "That's a directory, not a file")
        except PermissionError:
            raise ConfigDumpError(path, "Insufficient permissions")
    def dump_to_stdout(self) -> None:
        self._parser.write(sys.stdout)
    def crawl_sections(self) -> List[Tuple[str, SectionProxy]]:
        result = []
        for name, proxy in self._parser.items():
            if name.startswith("crawl:"):
                result.append((name, proxy))
        return result
    def auth_sections(self) -> List[Tuple[str, SectionProxy]]:
        result = []
        for name, proxy in self._parser.items():
            if name.startswith("auth:"):
                result.append((name, proxy))
        return result
--- a/PFERD/cookie_jar.py
+++ b/PFERD/cookie_jar.py
@ -1,69 +0,0 @@
 """A helper for requests cookies."""
 import logging
 from http.cookiejar import LoadError, LWPCookieJar
 from pathlib import Path
 from typing import Optional
 import requests
 LOGGER = logging.getLogger(__name__)
 class CookieJar:
    """A cookie jar that can be persisted."""
    def __init__(self, cookie_file: Optional[Path] = None) -> None:
        """Create a new cookie jar at the given path.
        If the path is None, the cookies will not be persisted.
        """
        self._cookies: LWPCookieJar
        if cookie_file is None:
            self._cookies = LWPCookieJar()
        else:
            self._cookies = LWPCookieJar(str(cookie_file.resolve()))
    @property
    def cookies(self) -> LWPCookieJar:
        """Return the requests cookie jar."""
        return self._cookies
    def load_cookies(self) -> None:
        """Load all cookies from the file given in the constructor."""
        if self._cookies.filename is None:
            return
        try:
            LOGGER.info("Loading old cookies from %s", self._cookies.filename)
            self._cookies.load(ignore_discard=True)
        except (FileNotFoundError, LoadError):
            LOGGER.warning(
                "No valid cookie file found at %s, continuing with no cookies",
                self._cookies.filename
            )
    def save_cookies(self, reason: Optional[str] = None) -> None:
        """Save the cookies in the file given in the constructor."""
        if self._cookies.filename is None:
            return
        if reason is None:
            LOGGER.info("Saving cookies")
        else:
            LOGGER.info("Saving cookies (%s)", reason)
        # TODO figure out why ignore_discard is set
        # TODO possibly catch a few more exceptions
        self._cookies.save(ignore_discard=True)
    def create_session(self) -> requests.Session:
        """Create a new session using the cookie jar."""
        sess = requests.Session()
        # From the request docs: "All requests code should work out of the box
        # with externally provided instances of CookieJar, e.g. LWPCookieJar
        # and FileCookieJar."
        sess.cookies = self.cookies  # type: ignore
        return sess
--- a/PFERD/crawl/init.py
+++ b/PFERD/crawl/init.py
@ -0,0 +1,25 @@
 from configparser import SectionProxy
 from typing import Callable, Dict
 from ..auth import Authenticator
 from ..config import Config
 from .crawler import Crawler, CrawlError, CrawlerSection  # noqa: F401
 from .ilias import KitIliasWebCrawler, KitIliasWebCrawlerSection
 from .kit_ipd_crawler import KitIpdCrawler, KitIpdCrawlerSection
 from .local_crawler import LocalCrawler, LocalCrawlerSection
 CrawlerConstructor = Callable[[
    str,                       # Name (without the "crawl:" prefix)
    SectionProxy,              # Crawler's section of global config
    Config,                    # Global config
    Dict[str, Authenticator],  # Loaded authenticators by name
 ], Crawler]
 CRAWLERS: Dict[str, CrawlerConstructor] = {
    "local": lambda n, s, c, a:
        LocalCrawler(n, LocalCrawlerSection(s), c),
    "kit-ilias-web": lambda n, s, c, a:
        KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
    "kit-ipd": lambda n, s, c, a:
        KitIpdCrawler(n, KitIpdCrawlerSection(s), c),
 }
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@ -0,0 +1,369 @@
 import asyncio
 import os
 from abc import ABC, abstractmethod
 from collections.abc import Awaitable, Coroutine
 from datetime import datetime
 from pathlib import Path, PurePath
 from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, TypeVar
 from ..auth import Authenticator
 from ..config import Config, Section
 from ..deduplicator import Deduplicator
 from ..limiter import Limiter
 from ..logging import ProgressBar, log
 from ..output_dir import FileSink, FileSinkToken, OnConflict, OutputDirectory, OutputDirError, Redownload
 from ..report import MarkConflictError, MarkDuplicateError, Report
 from ..transformer import Transformer
 from ..utils import ReusableAsyncContextManager, fmt_path
 class CrawlWarning(Exception):
    pass
 class CrawlError(Exception):
    pass
 Wrapped = TypeVar("Wrapped", bound=Callable[..., None])
 def noncritical(f: Wrapped) -> Wrapped:
    """
    Catches and logs a few noncritical exceptions occurring during the function
    call, mainly CrawlWarning.
    If any exception occurs during the function call, the crawler's error_free
    variable is set to False. This includes noncritical exceptions.
    Warning: Must only be applied to member functions of the Crawler class!
    """
    def wrapper(*args: Any, **kwargs: Any) -> None:
        if not (args and isinstance(args[0], Crawler)):
            raise RuntimeError("@noncritical must only applied to Crawler methods")
        crawler = args[0]
        try:
            f(*args, **kwargs)
        except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
            crawler.report.add_warning(str(e))
            log.warn(str(e))
            crawler.error_free = False
        except Exception as e:
            crawler.error_free = False
            crawler.report.add_error(str(e))
            raise
    return wrapper  # type: ignore
 AWrapped = TypeVar("AWrapped", bound=Callable[..., Coroutine[Any, Any, Optional[Any]]])
 def anoncritical(f: AWrapped) -> AWrapped:
    """
    An async version of @noncritical.
    Catches and logs a few noncritical exceptions occurring during the function
    call, mainly CrawlWarning.
    If any exception occurs during the function call, the crawler's error_free
    variable is set to False. This includes noncritical exceptions.
    Warning: Must only be applied to member functions of the Crawler class!
    """
    async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
        if not (args and isinstance(args[0], Crawler)):
            raise RuntimeError("@anoncritical must only applied to Crawler methods")
        crawler = args[0]
        try:
            return await f(*args, **kwargs)
        except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
            log.warn(str(e))
            crawler.error_free = False
            crawler.report.add_warning(str(e))
        except Exception as e:
            crawler.error_free = False
            crawler.report.add_error(str(e))
            raise
        return None
    return wrapper  # type: ignore
 class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
    def __init__(self, limiter: Limiter, path: PurePath):
        super().__init__()
        self._limiter = limiter
        self._path = path
    @property
    def path(self) -> PurePath:
        return self._path
    async def _on_aenter(self) -> ProgressBar:
        self._stack.callback(lambda: log.status("[bold cyan]", "Crawled", fmt_path(self._path)))
        await self._stack.enter_async_context(self._limiter.limit_crawl())
        bar = self._stack.enter_context(log.crawl_bar("[bold bright_cyan]", "Crawling", fmt_path(self._path)))
        return bar
 class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
    def __init__(self, limiter: Limiter, fs_token: FileSinkToken, path: PurePath):
        super().__init__()
        self._limiter = limiter
        self._fs_token = fs_token
        self._path = path
    @property
    def path(self) -> PurePath:
        return self._path
    async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]:
        await self._stack.enter_async_context(self._limiter.limit_download())
        sink = await self._stack.enter_async_context(self._fs_token)
        # The "Downloaded ..." message is printed in the output dir, not here
        bar = self._stack.enter_context(log.download_bar("[bold bright_cyan]", "Downloading",
                                                         fmt_path(self._path)))
        return bar, sink
 class CrawlerSection(Section):
    def type(self) -> str:
        value = self.s.get("type")
        if value is None:
            self.missing_value("type")
        return value
    def skip(self) -> bool:
        return self.s.getboolean("skip", fallback=False)
    def output_dir(self, name: str) -> Path:
        # TODO Use removeprefix() after switching to 3.9
        if name.startswith("crawl:"):
            name = name[len("crawl:"):]
        return Path(self.s.get("output_dir", name)).expanduser()
    def redownload(self) -> Redownload:
        value = self.s.get("redownload", "never-smart")
        try:
            return Redownload.from_string(value)
        except ValueError as e:
            self.invalid_value(
                "redownload",
                value,
                str(e).capitalize(),
            )
    def on_conflict(self) -> OnConflict:
        value = self.s.get("on_conflict", "prompt")
        try:
            return OnConflict.from_string(value)
        except ValueError as e:
            self.invalid_value(
                "on_conflict",
                value,
                str(e).capitalize(),
            )
    def transform(self) -> str:
        return self.s.get("transform", "")
    def tasks(self) -> int:
        value = self.s.getint("tasks", fallback=1)
        if value <= 0:
            self.invalid_value("tasks", value, "Must be greater than 0")
        return value
    def downloads(self) -> int:
        tasks = self.tasks()
        value = self.s.getint("downloads", fallback=None)
        if value is None:
            return tasks
        if value <= 0:
            self.invalid_value("downloads", value, "Must be greater than 0")
        if value > tasks:
            self.invalid_value("downloads", value, "Must not be greater than tasks")
        return value
    def task_delay(self) -> float:
        value = self.s.getfloat("task_delay", fallback=0.0)
        if value < 0:
            self.invalid_value("task_delay", value, "Must not be negative")
        return value
    def windows_paths(self) -> bool:
        on_windows = os.name == "nt"
        return self.s.getboolean("windows_paths", fallback=on_windows)
    def auth(self, authenticators: Dict[str, Authenticator]) -> Authenticator:
        value = self.s.get("auth")
        if value is None:
            self.missing_value("auth")
        auth = authenticators.get(value)
        if auth is None:
            self.invalid_value("auth", value, "No such auth section exists")
        return auth
 class Crawler(ABC):
    def __init__(
            self,
            name: str,
            section: CrawlerSection,
            config: Config,
    ) -> None:
        """
        Initialize a crawler from its name and its section in the config file.
        If you are writing your own constructor for your own crawler, make sure
        to call this constructor first (via super().__init__).
        May throw a CrawlerLoadException.
        """
        self.name = name
        self.error_free = True
        self._limiter = Limiter(
            task_limit=section.tasks(),
            download_limit=section.downloads(),
            task_delay=section.task_delay(),
        )
        self._deduplicator = Deduplicator(section.windows_paths())
        self._transformer = Transformer(section.transform())
        self._output_dir = OutputDirectory(
            config.default_section.working_dir() / section.output_dir(name),
            section.redownload(),
            section.on_conflict(),
        )
    @property
    def report(self) -> Report:
        return self._output_dir.report
    @property
    def prev_report(self) -> Optional[Report]:
        return self._output_dir.prev_report
    @staticmethod
    async def gather(awaitables: Sequence[Awaitable[Any]]) -> List[Any]:
        """
        Similar to asyncio.gather. However, in the case of an exception, all
        still running tasks are cancelled and the exception is rethrown.
        This should always be preferred over asyncio.gather in crawler code so
        that an exception like CrawlError may actually stop the crawler.
        """
        tasks = [asyncio.ensure_future(aw) for aw in awaitables]
        result = asyncio.gather(*tasks)
        try:
            return await result
        except:  # noqa: E722
            for task in tasks:
                task.cancel()
            raise
    async def crawl(self, path: PurePath) -> Optional[CrawlToken]:
        log.explain_topic(f"Decision: Crawl {fmt_path(path)}")
        path = self._deduplicator.mark(path)
        self._output_dir.report.found(path)
        if self._transformer.transform(path) is None:
            log.explain("Answer: No")
            log.status("[bold bright_black]", "Ignored", fmt_path(path))
            return None
        log.explain("Answer: Yes")
        return CrawlToken(self._limiter, path)
    async def download(
            self,
            path: PurePath,
            mtime: Optional[datetime] = None,
            redownload: Optional[Redownload] = None,
            on_conflict: Optional[OnConflict] = None,
    ) -> Optional[DownloadToken]:
        log.explain_topic(f"Decision: Download {fmt_path(path)}")
        path = self._deduplicator.mark(path)
        self._output_dir.report.found(path)
        transformed_path = self._transformer.transform(path)
        if transformed_path is None:
            log.explain("Answer: No")
            log.status("[bold bright_black]", "Ignored", fmt_path(path))
            return None
        fs_token = await self._output_dir.download(path, transformed_path, mtime, redownload, on_conflict)
        if fs_token is None:
            log.explain("Answer: No")
            return None
        log.explain("Answer: Yes")
        return DownloadToken(self._limiter, fs_token, path)
    async def _cleanup(self) -> None:
        log.explain_topic("Decision: Clean up files")
        if self.error_free:
            log.explain("No warnings or errors occurred during this run")
            log.explain("Answer: Yes")
            await self._output_dir.cleanup()
        else:
            log.explain("Warnings or errors occurred during this run")
            log.explain("Answer: No")
    @anoncritical
    async def run(self) -> None:
        """
        Start the crawling process. Call this function if you want to use a
        crawler.
        """
        with log.show_progress():
            self._output_dir.prepare()
            self._output_dir.load_prev_report()
            await self._run()
            await self._cleanup()
            self._output_dir.store_report()
    @abstractmethod
    async def _run(self) -> None:
        """
        Overwrite this function if you are writing a crawler.
        This function must not return before all crawling is complete. To crawl
        multiple things concurrently, asyncio.gather can be used.
        """
        pass
    def debug_transforms(self) -> None:
        self._output_dir.load_prev_report()
        if not self.prev_report:
            log.warn("Couldn't find or load old report")
            return
        seen: Set[PurePath] = set()
        for known in sorted(self.prev_report.found_paths):
            looking_at = list(reversed(known.parents)) + [known]
            for path in looking_at:
                if path in seen:
                    continue
                log.explain_topic(f"Transforming {fmt_path(path)}")
                self._transformer.transform(path)
                seen.add(path)
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@ -0,0 +1,199 @@
 import asyncio
 import http.cookies
 import ssl
 from pathlib import Path, PurePath
 from typing import Any, Dict, List, Optional
 import aiohttp
 import certifi
 from aiohttp.client import ClientTimeout
 from ..auth import Authenticator
 from ..config import Config
 from ..logging import log
 from ..utils import fmt_real_path
 from ..version import NAME, VERSION
 from .crawler import Crawler, CrawlerSection
 class HttpCrawlerSection(CrawlerSection):
    def http_timeout(self) -> float:
        return self.s.getfloat("http_timeout", fallback=20)
 class HttpCrawler(Crawler):
    COOKIE_FILE = PurePath(".cookies")
    def __init__(
            self,
            name: str,
            section: HttpCrawlerSection,
            config: Config,
            shared_auth: Optional[Authenticator] = None,
    ) -> None:
        super().__init__(name, section, config)
        self._authentication_id = 0
        self._authentication_lock = asyncio.Lock()
        self._request_count = 0
        self._http_timeout = section.http_timeout()
        self._cookie_jar_path = self._output_dir.resolve(self.COOKIE_FILE)
        self._shared_cookie_jar_paths: Optional[List[Path]] = None
        self._shared_auth = shared_auth
        self._output_dir.register_reserved(self.COOKIE_FILE)
    async def _current_auth_id(self) -> int:
        """
        Returns the id for the current authentication, i.e. an identifier for the last
        successful call to [authenticate].
        This method must be called before any request that might authenticate is made, so the
        HttpCrawler can properly track when [authenticate] can return early and when actual
        authentication is necessary.
        """
        # We acquire the lock here to ensure we wait for any concurrent authenticate to finish.
        # This should reduce the amount of requests we make: If an authentication is in progress
        # all future requests wait for authentication to complete.
        async with self._authentication_lock:
            self._request_count += 1
            return self._authentication_id
    async def authenticate(self, caller_auth_id: int) -> None:
        """
        Starts the authentication process. The main work is offloaded to _authenticate, which
        you should overwrite in a subclass if needed. This method should *NOT* be overwritten.
        The [caller_auth_id] should be the result of a [_current_auth_id] call made *before*
        the request was made. This ensures that authentication is not performed needlessly.
        """
        async with self._authentication_lock:
            log.explain_topic("Authenticating")
            # Another thread successfully called authenticate in-between
            # We do not want to perform auth again, so we return here. We can
            # assume the other thread suceeded as authenticate will throw an error
            # if it failed and aborts the crawl process.
            if caller_auth_id != self._authentication_id:
                log.explain(
                    "Authentication skipped due to auth id mismatch."
                    "A previous authentication beat us to the race."
                )
                return
            log.explain("Calling crawler-specific authenticate")
            await self._authenticate()
            self._authentication_id += 1
            # Saving the cookies after the first auth ensures we won't need to re-authenticate
            # on the next run, should this one be aborted or crash
            self._save_cookies()
    async def _authenticate(self) -> None:
        """
        Performs authentication. This method must only return normally if authentication suceeded.
        In all other cases it must either retry internally or throw a terminal exception.
        """
        raise RuntimeError("_authenticate() was called but crawler doesn't provide an implementation")
    def share_cookies(self, shared: Dict[Authenticator, List[Path]]) -> None:
        if not self._shared_auth:
            return
        if self._shared_auth in shared:
            self._shared_cookie_jar_paths = shared[self._shared_auth]
        else:
            self._shared_cookie_jar_paths = []
            shared[self._shared_auth] = self._shared_cookie_jar_paths
        self._shared_cookie_jar_paths.append(self._cookie_jar_path)
    def _load_cookies_from_file(self, path: Path) -> None:
        jar: Any = http.cookies.SimpleCookie()
        with open(path, encoding="utf-8") as f:
            for i, line in enumerate(f):
                # Names of headers are case insensitive
                if line[:11].lower() == "set-cookie:":
                    jar.load(line[11:])
                else:
                    log.explain(f"Line {i} doesn't start with 'Set-Cookie:', ignoring it")
        self._cookie_jar.update_cookies(jar)
    def _save_cookies_to_file(self, path: Path) -> None:
        jar: Any = http.cookies.SimpleCookie()
        for morsel in self._cookie_jar:
            jar[morsel.key] = morsel
        with open(path, "w", encoding="utf-8") as f:
            f.write(jar.output(sep="\n"))
            f.write("\n")  # A trailing newline is just common courtesy
    def _load_cookies(self) -> None:
        log.explain_topic("Loading cookies")
        cookie_jar_path: Optional[Path] = None
        if self._shared_cookie_jar_paths is None:
            log.explain("Not sharing any cookies")
            cookie_jar_path = self._cookie_jar_path
        else:
            log.explain("Sharing cookies")
            max_mtime: Optional[float] = None
            for path in self._shared_cookie_jar_paths:
                if not path.is_file():
                    log.explain(f"{fmt_real_path(path)} is not a file")
                    continue
                mtime = path.stat().st_mtime
                if max_mtime is None or mtime > max_mtime:
                    log.explain(f"{fmt_real_path(path)} has newest mtime so far")
                    max_mtime = mtime
                    cookie_jar_path = path
                else:
                    log.explain(f"{fmt_real_path(path)} has older mtime")
        if cookie_jar_path is None:
            log.explain("Couldn't find a suitable cookie file")
            return
        log.explain(f"Loading cookies from {fmt_real_path(cookie_jar_path)}")
        try:
            self._load_cookies_from_file(cookie_jar_path)
        except Exception as e:
            log.explain("Failed to load cookies")
            log.explain(str(e))
    def _save_cookies(self) -> None:
        log.explain_topic("Saving cookies")
        try:
            log.explain(f"Saving cookies to {fmt_real_path(self._cookie_jar_path)}")
            self._save_cookies_to_file(self._cookie_jar_path)
        except Exception as e:
            log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}")
            log.warn(str(e))
    async def run(self) -> None:
        self._request_count = 0
        self._cookie_jar = aiohttp.CookieJar()
        self._load_cookies()
        async with aiohttp.ClientSession(
                headers={"User-Agent": f"{NAME}/{VERSION}"},
                cookie_jar=self._cookie_jar,
                connector=aiohttp.TCPConnector(ssl=ssl.create_default_context(cafile=certifi.where())),
                timeout=ClientTimeout(
                    # 30 minutes. No download in the history of downloads was longer than 30 minutes.
                    # This is enough to transfer a 600 MB file over a 3 Mib/s connection.
                    # Allowing an arbitrary value could be annoying for overnight batch jobs
                    total=15 * 60,
                    connect=self._http_timeout,
                    sock_connect=self._http_timeout,
                    sock_read=self._http_timeout,
                )
        ) as session:
            self.session = session
            try:
                await super().run()
            finally:
                del self.session
        log.explain_topic(f"Total amount of HTTP requests: {self._request_count}")
        # They are saved in authenticate, but a final save won't hurt
        self._save_cookies()
--- a/PFERD/crawl/ilias/init.py
+++ b/PFERD/crawl/ilias/init.py
@ -0,0 +1,3 @@
 from .kit_ilias_web_crawler import KitIliasWebCrawler, KitIliasWebCrawlerSection
 __all__ = ["KitIliasWebCrawler", "KitIliasWebCrawlerSection"]
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@ -0,0 +1,201 @@
 from enum import Enum
 from typing import Optional
 import bs4
 from PFERD.utils import soupify
 _link_template_plain = "{{link}}"
 _link_template_fancy = """
 <!DOCTYPE html>
 <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>ILIAS - Link: {{name}}</title>
        <meta http-equiv = "refresh" content = "{{redirect_delay}}; url = {{link}}" />
    </head>
    <style>
    * {
        box-sizing: border-box;
    }
    .center-flex {
        display: flex;
        align-items: center;
        justify-content: center;
    }
    body {
        padding: 0;
        margin: 0;
        background-color: #f0f0f0;
        font-family: "Open Sans", Verdana, Arial, Helvetica, sans-serif;
        height: 100vh;
    }
    .row {
        background-color: white;
        min-width: 500px;
        max-width: 90vw;
        display: flex;
        padding: 1em;
    }
    .logo {
        flex: 0 1;
        margin-right: 1em;
        fill: #009682;
    }
    .tile {
        flex: 1 0;
        display: flex;
        flex-direction: column;
        justify-content: center;
    }
    .top-row {
        padding-bottom: 5px;
        font-size: 15px;
    }
    a {
        color: #009682;
        text-decoration: none;
    }
    a:hover {
        text-decoration: underline;
    }
    .bottom-row {
        font-size: 13px;
    }
    .menu-button {
        border: 1px solid black;
        margin-left: 4em;
        width: 25px;
        height: 25px;
        flex: 0 0 25px;
        background-color: #b3e0da;
        font-size: 13px;
        color: #222;
    }
    </style>
    <body class="center-flex">
        <div class="row">
            <div class="logo center-flex">
                <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
                    <path d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm9.567 9.098c-.059-.058-.127-.108-.206-.138-.258-.101-1.35.603-1.515.256-.108-.231-.327.148-.578.008-.121-.067-.459-.52-.611-.465-.312.112.479.974.694 1.087.203-.154.86-.469 1.002-.039.271.812-.745 1.702-1.264 2.171-.775.702-.63-.454-1.159-.86-.277-.213-.274-.667-.555-.824-.125-.071-.7-.732-.694-.821l-.017.167c-.095.072-.297-.27-.319-.325 0 .298.485.772.646 1.011.273.409.42 1.005.756 1.339.179.18.866.923 1.045.908l.921-.437c.649.154-1.531 3.237-1.738 3.619-.171.321.139 1.112.114 1.49-.029.437-.374.579-.7.817-.35.255-.268.752-.562.934-.521.321-.897 1.366-1.639 1.361-.219-.001-1.151.364-1.273.007-.095-.258-.223-.455-.356-.71-.131-.25-.015-.51-.175-.731-.11-.154-.479-.502-.513-.684-.002-.157.118-.632.283-.715.231-.118.044-.462.016-.663-.048-.357-.27-.652-.535-.859-.393-.302-.189-.542-.098-.974 0-.206-.126-.476-.402-.396-.57.166-.396-.445-.812-.417-.299.021-.543.211-.821.295-.349.104-.707-.083-1.053-.126-1.421-.179-1.885-1.804-1.514-2.976.037-.192-.115-.547-.048-.696.159-.352.485-.752.768-1.021.16-.152.365-.113.553-.231.29-.182.294-.558.578-.789.404-.328.956-.321 1.482-.392.281-.037 1.35-.268 1.518-.06 0 .039.193.611-.019.578.438.023 1.061.756 1.476.585.213-.089.135-.744.573-.427.265.19 1.45.275 1.696.07.152-.125.236-.939.053-1.031.117.116-.618.125-.686.099-.122-.044-.235.115-.43.025.117.055-.651-.358-.22-.674-.181.132-.349-.037-.544.109-.135.109.062.181-.13.277-.305.155-.535-.53-.649-.607-.118-.077-1.024-.713-.777-.298l.797.793c-.04.026-.209-.289-.209-.059.053-.136.02.585-.105.35-.056-.09.091-.14.006-.271 0-.085-.23-.169-.275-.228-.126-.157-.462-.502-.644-.585-.05-.024-.771.088-.832.111-.071.099-.131.203-.181.314-.149.055-.29.127-.423.216l-.159.356c-.068.061-.772.294-.776.303.03-.076-.492-.172-.457-.324.038-.167.215-.687.169-.877-.048-.199 1.085.287 1.158-.238.029-.227.047-.492-.316-.531.069.008.702-.249.807-.364.148-.169.486-.447.731-.447.286 0 .225-.417.356-.622.133.053-.071.38.088.512-.01-.104.45.057.494.033.105-.056.691-.023.601-.299-.101-.28.052-.197.183-.255-.02.008.248-.458.363-.456-.104-.089-.398.112-.516.103-.308-.024-.177-.525-.061-.672.09-.116-.246-.258-.25-.036-.006.332-.314.633-.243 1.075.109.666-.743-.161-.816-.115-.283.172-.515-.216-.368-.449.149-.238.51-.226.659-.48.104-.179.227-.389.388-.524.541-.454.689-.091 1.229-.042.526.048.178.125.105.327-.07.192.289.261.413.1.071-.092.232-.326.301-.499.07-.175.578-.2.527-.365 2.72 1.148 4.827 3.465 5.694 6.318zm-11.113-3.779l.068-.087.073-.019c.042-.034.086-.118.151-.104.043.009.146.095.111.148-.037.054-.066-.049-.081.101-.018.169-.188.167-.313.222-.087.037-.175-.018-.09-.104l.088-.108-.007-.049zm.442.245c.046-.045.138-.008.151-.094.014-.084.078-.178-.008-.335-.022-.042.116-.082.051-.137l-.109.032s.155-.668.364-.366l-.089.103c.135.134.172.47.215.687.127.066.324.078.098.192.117-.02-.618.314-.715.178-.072-.083.317-.139.307-.173-.004-.011-.317-.02-.265-.087zm1.43-3.547l-.356.326c-.36.298-1.28.883-1.793.705-.524-.18-1.647.667-1.826.673-.067.003.002-.641.36-.689-.141.021.993-.575 1.185-.805.678-.146 1.381-.227 2.104-.227l.326.017zm-5.086 1.19c.07.082.278.092-.026.288-.183.11-.377.809-.548.809-.51.223-.542-.439-1.109.413-.078.115-.395.158-.644.236.685-.688 1.468-1.279 2.327-1.746zm-5.24 8.793c0-.541.055-1.068.139-1.586l.292.185c.113.135.113.719.169.911.139.482.484.751.748 1.19.155.261.414.923.332 1.197.109-.179 1.081.824 1.259 1.033.418.492.74 1.088.061 1.574-.219.158.334 1.14.049 1.382l-.365.094c-.225.138-.235.397-.166.631-1.562-1.765-2.518-4.076-2.518-6.611zm14.347-5.823c.083-.01-.107.167-.107.167.033.256.222.396.581.527.437.157.038.455-.213.385-.139-.039-.854-.255-.879.025 0 .167-.679.001-.573-.175.073-.119.05-.387.186-.562.193-.255.38-.116.386.032-.001.394.398-.373.619-.399z"/>
                </svg>
            </div>
            <div class="tile">
                <div class="top-row">
                    <a href="{{link}}">{{name}}</a>
                </div>
                <div class="bottom-row">{{description}}</div>
            </div>
            <div class="menu-button center-flex"> ⯆ </div>
        </div>
    </body>
 </html>
 """.strip()  # noqa: E501 line too long
 _link_template_internet_shortcut = """
 [InternetShortcut]
 URL={{link}}
 """.strip()
 _learning_module_template = """
 <!DOCTYPE html>
 <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>{{name}}</title>
    </head>
    <style>
    * {
        box-sizing: border-box;
    }
    .center-flex {
        display: flex;
        align-items: center;
        justify-content: center;
    }
    .nav {
        display: flex;
        justify-content: space-between;
    }
    </style>
    <body class="center-flex">
 {{body}}
    </body>
 </html>
 """
 def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next: Optional[str]) -> str:
    # Seems to be comments, ignore those.
    for elem in body.select(".il-copg-mob-fullscreen-modal"):
        elem.decompose()
    nav_template = """
        <div class="nav">
            {{left}}
            {{right}}
        </div>
    """
    if prev and body.select_one(".ilc_page_lnav_LeftNavigation"):
        text = body.select_one(".ilc_page_lnav_LeftNavigation").getText().strip()
        left = f'<a href="{prev}">{text}</a>'
    else:
        left = "<span></span>"
    if next and body.select_one(".ilc_page_rnav_RightNavigation"):
        text = body.select_one(".ilc_page_rnav_RightNavigation").getText().strip()
        right = f'<a href="{next}">{text}</a>'
    else:
        right = "<span></span>"
    if top_nav := body.select_one(".ilc_page_tnav_TopNavigation"):
        top_nav.replace_with(
            soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
        )
    if bot_nav := body.select_one(".ilc_page_bnav_BottomNavigation"):
        bot_nav.replace_with(soupify(nav_template.replace(
            "{{left}}", left).replace("{{right}}", right).encode())
        )
    body = body.prettify()
    return _learning_module_template.replace("{{body}}", body).replace("{{name}}", name)
 class Links(Enum):
    IGNORE = "ignore"
    PLAINTEXT = "plaintext"
    FANCY = "fancy"
    INTERNET_SHORTCUT = "internet-shortcut"
    def template(self) -> Optional[str]:
        if self == Links.FANCY:
            return _link_template_fancy
        elif self == Links.PLAINTEXT:
            return _link_template_plain
        elif self == Links.INTERNET_SHORTCUT:
            return _link_template_internet_shortcut
        elif self == Links.IGNORE:
            return None
        raise ValueError("Missing switch case")
    def extension(self) -> Optional[str]:
        if self == Links.FANCY:
            return ".html"
        elif self == Links.PLAINTEXT:
            return ".txt"
        elif self == Links.INTERNET_SHORTCUT:
            return ".url"
        elif self == Links.IGNORE:
            return None
        raise ValueError("Missing switch case")
    @staticmethod
    def from_string(string: str) -> "Links":
        try:
            return Links(string)
        except ValueError:
            raise ValueError("must be one of 'ignore', 'plaintext',"
                             " 'html', 'internet-shortcut'")
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@ -0,0 +1,91 @@
 from bs4 import BeautifulSoup, Comment, Tag
 _STYLE_TAG_CONTENT = """
    .ilc_text_block_Information {
      background-color: #f5f7fa;
    }
    div.ilc_text_block_Standard {
      margin-bottom: 10px;
      margin-top: 10px;
    }
    span.ilc_text_inline_Strong {
      font-weight: bold;
    }
    .accordion-head {
      background-color: #f5f7fa;
      padding: 0.5rem 0;
    }
    h3 {
      margin-top: 0.5rem;
      margin-bottom: 1rem;
    }
    br.visible-break {
      margin-bottom: 1rem;
    }
    article {
      margin: 0.5rem 0;
    }
    body {
      padding: 1em;
      grid-template-columns: 1fr min(60rem, 90%) 1fr;
      line-height: 1.2;
    }
 """
 _ARTICLE_WORTHY_CLASSES = [
    "ilc_text_block_Information",
    "ilc_section_Attention",
    "ilc_section_Link",
 ]
 def insert_base_markup(soup: BeautifulSoup) -> BeautifulSoup:
    head = soup.new_tag("head")
    soup.insert(0, head)
    simplecss_link: Tag = soup.new_tag("link")
    # <link rel="stylesheet" href="https://cdn.simplecss.org/simple.css">
    simplecss_link["rel"] = "stylesheet"
    simplecss_link["href"] = "https://cdn.simplecss.org/simple.css"
    head.append(simplecss_link)
    # Basic style tags for compat
    style: Tag = soup.new_tag("style")
    style.append(_STYLE_TAG_CONTENT)
    head.append(style)
    return soup
 def clean(soup: BeautifulSoup) -> BeautifulSoup:
    for block in soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES):
        block.name = "article"
    for block in soup.find_all("h3"):
        block.name = "div"
    for block in soup.find_all("h1"):
        block.name = "h3"
    for block in soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap"):
        block.name = "h3"
        block["class"] += ["accordion-head"]
    for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
        children = list(dummy.children)
        if not children:
            dummy.decompose()
        if len(children) > 1:
            continue
        if isinstance(type(children[0]), Comment):
            dummy.decompose()
    for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
        hrule_imposter.insert(0, soup.new_tag("hr"))
    return soup
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@ -0,0 +1,170 @@
 import os
 import re
 from dataclasses import dataclass
 from pathlib import PurePath
 from typing import Awaitable, List, Optional, Pattern, Set, Tuple, Union
 from urllib.parse import urljoin
 from bs4 import BeautifulSoup, Tag
 from ..config import Config
 from ..logging import ProgressBar, log
 from ..output_dir import FileSink
 from ..utils import soupify
 from .crawler import CrawlError
 from .http_crawler import HttpCrawler, HttpCrawlerSection
 class KitIpdCrawlerSection(HttpCrawlerSection):
    def target(self) -> str:
        target = self.s.get("target")
        if not target:
            self.missing_value("target")
        if not target.startswith("https://"):
            self.invalid_value("target", target, "Should be a URL")
        return target
    def link_regex(self) -> Pattern[str]:
        regex = self.s.get("link_regex", r"^.*?[^/]+\.(pdf|zip|c|cpp|java)$")
        return re.compile(regex)
@dataclass(unsafe_hash=True)
 class KitIpdFile:
    name: str
    url: str
@dataclass
 class KitIpdFolder:
    name: str
    files: List[KitIpdFile]
    def explain(self) -> None:
        log.explain_topic(f"Folder {self.name!r}")
        for file in self.files:
            log.explain(f"File {file.name!r} (href={file.url!r})")
    def __hash__(self) -> int:
        return self.name.__hash__()
 class KitIpdCrawler(HttpCrawler):
    def __init__(
            self,
            name: str,
            section: KitIpdCrawlerSection,
            config: Config,
    ):
        super().__init__(name, section, config)
        self._url = section.target()
        self._file_regex = section.link_regex()
    async def _run(self) -> None:
        maybe_cl = await self.crawl(PurePath("."))
        if not maybe_cl:
            return
        tasks: List[Awaitable[None]] = []
        async with maybe_cl:
            for item in await self._fetch_items():
                if isinstance(item, KitIpdFolder):
                    tasks.append(self._crawl_folder(item))
                else:
                    # Orphan files are placed in the root folder
                    tasks.append(self._download_file(PurePath("."), item))
        await self.gather(tasks)
    async def _crawl_folder(self, folder: KitIpdFolder) -> None:
        path = PurePath(folder.name)
        if not await self.crawl(path):
            return
        tasks = [self._download_file(path, file) for file in folder.files]
        await self.gather(tasks)
    async def _download_file(self, parent: PurePath, file: KitIpdFile) -> None:
        element_path = parent / file.name
        maybe_dl = await self.download(element_path)
        if not maybe_dl:
            return
        async with maybe_dl as (bar, sink):
            await self._stream_from_url(file.url, sink, bar)
    async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]:
        page, url = await self.get_page()
        elements: List[Tag] = self._find_file_links(page)
        items: Set[Union[KitIpdFile, KitIpdFolder]] = set()
        for element in elements:
            folder_label = self._find_folder_label(element)
            if folder_label:
                folder = self._extract_folder(folder_label, url)
                if folder not in items:
                    items.add(folder)
                    folder.explain()
            else:
                file = self._extract_file(element, url)
                items.add(file)
                log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})")
                log.explain("Attributing it to root folder")
        return items
    def _extract_folder(self, folder_tag: Tag, url: str) -> KitIpdFolder:
        files: List[KitIpdFile] = []
        name = folder_tag.getText().strip()
        container: Tag = folder_tag.findNextSibling(name="table")
        for link in self._find_file_links(container):
            files.append(self._extract_file(link, url))
        return KitIpdFolder(name, files)
    @staticmethod
    def _find_folder_label(file_link: Tag) -> Optional[Tag]:
        enclosing_table: Tag = file_link.findParent(name="table")
        if enclosing_table is None:
            return None
        return enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
    def _extract_file(self, link: Tag, url: str) -> KitIpdFile:
        url = self._abs_url_from_link(url, link)
        name = os.path.basename(url)
        return KitIpdFile(name, url)
    def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> List[Tag]:
        return tag.findAll(name="a", attrs={"href": self._file_regex})
    def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
        return urljoin(url, link_tag.get("href"))
    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:
        async with self.session.get(url, allow_redirects=False) as resp:
            if resp.status == 403:
                raise CrawlError("Received a 403. Are you within the KIT network/VPN?")
            if resp.content_length:
                bar.set_total(resp.content_length)
            async for data in resp.content.iter_chunked(1024):
                sink.file.write(data)
                bar.advance(len(data))
            sink.done()
    async def get_page(self) -> Tuple[BeautifulSoup, str]:
        async with self.session.get(self._url) as request:
            # The web page for Algorithmen für Routenplanung contains some
            # weird comments that beautifulsoup doesn't parse correctly. This
            # hack enables those pages to be crawled, and should hopefully not
            # cause issues on other pages.
            content = (await request.read()).decode("utf-8")
            content = re.sub(r"<!--.*?-->", "", content)
            return soupify(content.encode("utf-8")), str(request.url)
--- a/PFERD/crawl/local_crawler.py
+++ b/PFERD/crawl/local_crawler.py
@ -0,0 +1,117 @@
 import asyncio
 import datetime
 import random
 from pathlib import Path, PurePath
 from typing import Optional
 from ..config import Config
 from .crawler import Crawler, CrawlerSection, anoncritical
 class LocalCrawlerSection(CrawlerSection):
    def target(self) -> Path:
        value = self.s.get("target")
        if value is None:
            self.missing_value("target")
        return Path(value).expanduser()
    def crawl_delay(self) -> float:
        value = self.s.getfloat("crawl_delay", fallback=0.0)
        if value < 0:
            self.invalid_value("crawl_delay", value,
                               "Must not be negative")
        return value
    def download_delay(self) -> float:
        value = self.s.getfloat("download_delay", fallback=0.0)
        if value < 0:
            self.invalid_value("download_delay", value,
                               "Must not be negative")
        return value
    def download_speed(self) -> Optional[int]:
        value = self.s.getint("download_speed")
        if value is not None and value <= 0:
            self.invalid_value("download_speed", value,
                               "Must be greater than 0")
        return value
 class LocalCrawler(Crawler):
    def __init__(
            self,
            name: str,
            section: LocalCrawlerSection,
            config: Config,
    ):
        super().__init__(name, section, config)
        self._target = config.default_section.working_dir() / section.target()
        self._crawl_delay = section.crawl_delay()
        self._download_delay = section.download_delay()
        self._download_speed = section.download_speed()
        if self._download_speed:
            self._block_size = self._download_speed // 10
        else:
            self._block_size = 1024**2  # 1 MiB
    async def _run(self) -> None:
        await self._crawl_path(self._target, PurePath())
    @anoncritical
    async def _crawl_path(self, path: Path, pure: PurePath) -> None:
        if path.is_dir():
            await self._crawl_dir(path, pure)
        elif path.is_file():
            await self._crawl_file(path, pure)
    async def _crawl_dir(self, path: Path, pure: PurePath) -> None:
        cl = await self.crawl(pure)
        if not cl:
            return
        tasks = []
        async with cl:
            await asyncio.sleep(random.uniform(
                0.5 * self._crawl_delay,
                self._crawl_delay,
            ))
            for child in path.iterdir():
                pure_child = cl.path / child.name
                tasks.append(self._crawl_path(child, pure_child))
        await self.gather(tasks)
    async def _crawl_file(self, path: Path, pure: PurePath) -> None:
        stat = path.stat()
        mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
        dl = await self.download(pure, mtime=mtime)
        if not dl:
            return
        async with dl as (bar, sink):
            await asyncio.sleep(random.uniform(
                0.5 * self._download_delay,
                self._download_delay,
            ))
            bar.set_total(stat.st_size)
            with open(path, "rb") as f:
                while True:
                    data = f.read(self._block_size)
                    if len(data) == 0:
                        break
                    sink.file.write(data)
                    bar.advance(len(data))
                    if self._download_speed:
                        delay = self._block_size / self._download_speed
                        delay = random.uniform(0.8 * delay, 1.2 * delay)
                        await asyncio.sleep(delay)
                sink.done()
--- a/PFERD/deduplicator.py
+++ b/PFERD/deduplicator.py
@ -0,0 +1,85 @@
 from pathlib import PurePath
 from typing import Iterator, Set
 from .logging import log
 from .utils import fmt_path
 def name_variants(path: PurePath) -> Iterator[PurePath]:
    separator = " " if " " in path.stem else "_"
    i = 1
    while True:
        yield path.parent / f"{path.stem}{separator}{i}{path.suffix}"
        i += 1
 class Deduplicator:
    FORBIDDEN_CHARS = '<>:"/\\|?*' + "".join([chr(i) for i in range(0, 32)])
    FORBIDDEN_NAMES = {
        "CON", "PRN", "AUX", "NUL",
        "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
        "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
    }
    def __init__(self, windows_paths: bool) -> None:
        self._windows_paths = windows_paths
        self._known: Set[PurePath] = set()
    def _add(self, path: PurePath) -> None:
        self._known.add(path)
        # The last parent is just "."
        for parent in list(path.parents)[:-1]:
            self._known.add(parent)
    def _fixup_element(self, name: str) -> str:
        # For historical reasons, windows paths have some odd restrictions that
        # we're trying to avoid. See:
        # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file
        for char in self.FORBIDDEN_CHARS:
            name = name.replace(char, "_")
        path = PurePath(name)
        if path.stem in self.FORBIDDEN_NAMES:
            name = f"{path.stem}_{path.suffix}"
        if name.endswith(" ") or name.endswith("."):
            name += "_"
        return name
    def _fixup_for_windows(self, path: PurePath) -> PurePath:
        new_path = PurePath(*[self._fixup_element(elem) for elem in path.parts])
        if new_path != path:
            log.explain(f"Changed path to {fmt_path(new_path)} for windows compatibility")
        return new_path
    def fixup_path(self, path: PurePath) -> PurePath:
        """Fixes up the path for windows, if enabled. Returns the path unchanged otherwise."""
        if self._windows_paths:
            return self._fixup_for_windows(path)
        return path
    def mark(self, path: PurePath) -> PurePath:
        if self._windows_paths:
            path = self._fixup_for_windows(path)
        if path not in self._known:
            self._add(path)
            return path
        log.explain(f"Path {fmt_path(path)} is already taken, finding a new name")
        for variant in name_variants(path):
            if variant in self._known:
                log.explain(f"Path {fmt_path(variant)} is taken as well")
                continue
            log.explain(f"Found unused path {fmt_path(variant)}")
            self._add(variant)
            return variant
        # The "name_variants" iterator returns infinitely many paths
        raise RuntimeError("Unreachable")
--- a/PFERD/diva.py
+++ b/PFERD/diva.py
@ -1,169 +0,0 @@
 """
 Utility functions and a scraper/downloader for the KIT DIVA portal.
 """
 import logging
 import re
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Callable, List, Optional
 import requests
 from .errors import FatalException
 from .logging import PrettyLogger
 from .organizer import Organizer
 from .tmp_dir import TmpDir
 from .transform import Transformable
 from .utils import stream_to_path
 LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
@dataclass
 class DivaDownloadInfo(Transformable):
    """
    Information about a DIVA video
    """
    url: str
 DivaDownloadStrategy = Callable[[Organizer, DivaDownloadInfo], bool]
 def diva_download_new(organizer: Organizer, info: DivaDownloadInfo) -> bool:
    """
    Accepts only new files.
    """
    resolved_file = organizer.resolve(info.path)
    if not resolved_file.exists():
        return True
    PRETTY.ignored_file(info.path, "local file exists")
    return False
 class DivaPlaylistCrawler:
    # pylint: disable=too-few-public-methods
    """
    A crawler for DIVA playlists.
    """
    _PLAYLIST_BASE_URL = "https://mediaservice.bibliothek.kit.edu/asset/detail/"
    _COLLECTION_BASE_URL = "https://mediaservice.bibliothek.kit.edu/asset/collection.json"
    def __init__(self, playlist_id: str):
        self._id = playlist_id
    @classmethod
    def fetch_id(cls, playlist_link: str) -> str:
        """
        Fetches the ID for a playerlist, given the base link
        (e.g. https://mediaservice.bibliothek.kit.edu/#/details/DIVA-2019-271).
        Raises a FatalException, if the id can not be resolved
        """
        match = re.match(r".+#/details/(.+)", playlist_link)
        if match is None:
            raise FatalException(
                "DIVA: Invalid playlist link format, could not extract details."
            )
        base_name = match.group(1)
        response = requests.get(cls._PLAYLIST_BASE_URL + base_name + ".json")
        if response.status_code != 200:
            raise FatalException(
                f"DIVA: Got non-200 status code ({response.status_code}))"
                f"when requesting {response.url!r}!"
            )
        body = response.json()
        if body["error"]:
            raise FatalException(f"DIVA: Server returned error {body['error']!r}.")
        return body["result"]["collection"]["id"]
    def crawl(self) -> List[DivaDownloadInfo]:
        """
        Crawls the playlist given in the constructor.
        """
        response = requests.get(self._COLLECTION_BASE_URL, params={"collection": self._id})
        if response.status_code != 200:
            raise FatalException(f"Server returned status {response.status_code}.")
        body = response.json()
        if body["error"]:
            raise FatalException(f"Server returned error {body['error']!r}.")
        result = body["result"]
        if result["resultCount"] > result["pageSize"]:
            PRETTY.warning("Did not receive all results, some will be missing")
        download_infos: List[DivaDownloadInfo] = []
        for video in result["resultList"]:
            title = video["title"]
            collection_title = self._follow_path(["collection", "title"], video)
            url = self._follow_path(
                ["resourceList", "derivateList", "mp4", "url"],
                video
            )
            if url and collection_title and title:
                path = Path(collection_title, title + ".mp4")
                download_infos.append(DivaDownloadInfo(path, url))
            else:
                PRETTY.warning(f"Incomplete video found: {title!r} {collection_title!r} {url!r}")
        return download_infos
    @staticmethod
    def _follow_path(path: List[str], obj: Any) -> Optional[Any]:
        """
        Follows a property path through an object, bailing at the first None.
        """
        current = obj
        for path_step in path:
            if path_step in current:
                current = current[path_step]
            else:
                return None
        return current
 class DivaDownloader:
    """
    A downloader for DIVA videos.
    """
    def __init__(self, tmp_dir: TmpDir, organizer: Organizer, strategy: DivaDownloadStrategy):
        self._tmp_dir = tmp_dir
        self._organizer = organizer
        self._strategy = strategy
        self._session = requests.session()
    def download_all(self, infos: List[DivaDownloadInfo]) -> None:
        """
        Download multiple files one after the other.
        """
        for info in infos:
            self.download(info)
    def download(self, info: DivaDownloadInfo) -> None:
        """
        Download a single file.
        """
        if not self._strategy(self._organizer, info):
            self._organizer.mark(info.path)
            return
        with self._session.get(info.url, stream=True) as response:
            if response.status_code == 200:
                tmp_file = self._tmp_dir.new_path()
                stream_to_path(response, tmp_file, info.path.name)
                self._organizer.accept_file(tmp_file, info.path)
            else:
                PRETTY.warning(f"Could not download file, got response {response.status_code}")
--- a/PFERD/download_summary.py
+++ b/PFERD/download_summary.py
@ -1,75 +0,0 @@
 """
 Provides a summary that keeps track of new modified or deleted files.
 """
 from pathlib import Path
 from typing import List
 def _mergeNoDuplicate(first: List[Path], second: List[Path]) -> List[Path]:
    tmp = list(set(first + second))
    tmp.sort(key=lambda x: str(x.resolve()))
    return tmp
 class DownloadSummary:
    """
    Keeps track of all new, modified or deleted files and provides a summary.
    """
    def __init__(self) -> None:
        self._new_files: List[Path] = []
        self._modified_files: List[Path] = []
        self._deleted_files: List[Path] = []
    @property
    def new_files(self) -> List[Path]:
        """
        Returns all new files.
        """
        return self._new_files.copy()
    @property
    def modified_files(self) -> List[Path]:
        """
        Returns all modified files.
        """
        return self._modified_files.copy()
    @property
    def deleted_files(self) -> List[Path]:
        """
        Returns all deleted files.
        """
        return self._deleted_files.copy()
    def merge(self, summary: 'DownloadSummary') -> None:
        """
        Merges ourselves with the passed summary. Modifies this object, but not the passed one.
        """
        self._new_files = _mergeNoDuplicate(self._new_files, summary.new_files)
        self._modified_files = _mergeNoDuplicate(self._modified_files, summary.modified_files)
        self._deleted_files = _mergeNoDuplicate(self._deleted_files, summary.deleted_files)
    def add_deleted_file(self, path: Path) -> None:
        """
        Registers a file as deleted.
        """
        self._deleted_files.append(path)
    def add_modified_file(self, path: Path) -> None:
        """
        Registers a file as changed.
        """
        self._modified_files.append(path)
    def add_new_file(self, path: Path) -> None:
        """
        Registers a file as new.
        """
        self._new_files.append(path)
    def has_updates(self) -> bool:
        """
        Returns whether this summary has any updates.
        """
        return bool(self._new_files or self._modified_files or self._deleted_files)
--- a/PFERD/downloaders.py
+++ b/PFERD/downloaders.py
@ -1,72 +0,0 @@
 """
 General downloaders useful in many situations
 """
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional
 import requests
 import requests.auth
 from .organizer import Organizer
 from .tmp_dir import TmpDir
 from .transform import Transformable
 from .utils import stream_to_path
@dataclass
 class HttpDownloadInfo(Transformable):
    """
    This class describes a single file to be downloaded.
    """
    url: str
    parameters: Dict[str, Any] = field(default_factory=dict)
 class HttpDownloader:
    """A HTTP downloader that can handle HTTP basic auth."""
    def __init__(
            self,
            tmp_dir: TmpDir,
            organizer: Organizer,
            username: Optional[str],
            password: Optional[str],
    ):
        """Create a new http downloader."""
        self._organizer = organizer
        self._tmp_dir = tmp_dir
        self._username = username
        self._password = password
        self._session = self._build_session()
    def _build_session(self) -> requests.Session:
        session = requests.Session()
        if self._username and self._password:
            session.auth = requests.auth.HTTPBasicAuth(
                self._username, self._password
            )
        return session
    def download_all(self, infos: List[HttpDownloadInfo]) -> None:
        """
        Download multiple files one after the other.
        """
        for info in infos:
            self.download(info)
    def download(self, info: HttpDownloadInfo) -> None:
        """
        Download a single file.
        """
        with self._session.get(info.url, params=info.parameters, stream=True) as response:
            if response.status_code == 200:
                tmp_file = self._tmp_dir.new_path()
                stream_to_path(response, tmp_file, info.path.name)
                self._organizer.accept_file(tmp_file, info.path)
            else:
                # TODO use proper exception
                raise Exception(f"Could not download file, got response {response.status_code}")
--- a/PFERD/errors.py
+++ b/PFERD/errors.py
@ -1,57 +0,0 @@
 """
 An error logging decorator.
 """
 import logging
 from typing import Any, Callable, TypeVar, cast
 from rich.console import Console
 from .logging import PrettyLogger
 LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
 class FatalException(Exception):
    """
    A fatal exception occurred. Recovery is not possible.
    """
 TFun = TypeVar('TFun', bound=Callable[..., Any])
 def swallow_and_print_errors(function: TFun) -> TFun:
    """
    Decorates a function, swallows all errors, logs them and returns none if one occurred.
    """
    def inner(*args: Any, **kwargs: Any) -> Any:
        # pylint: disable=broad-except
        try:
            return function(*args, **kwargs)
        except FatalException as error:
            PRETTY.error(str(error))
            return None
        except Exception as error:
            Console().print_exception()
            return None
    return cast(TFun, inner)
 def retry_on_io_exception(max_retries: int, message: str) -> Callable[[TFun], TFun]:
    """
    Decorates a function and retries it on any exception until the max retries count is hit.
    """
    def retry(function: TFun) -> TFun:
        def inner(*args: Any, **kwargs: Any) -> Any:
            for i in range(0, max_retries):
                # pylint: disable=broad-except
                try:
                    return function(*args, **kwargs)
                except IOError as error:
                    PRETTY.warning(f"Error duing operation '{message}': {error}")
                    PRETTY.warning(
                        f"Retrying operation '{message}'. Remaining retries: {max_retries - 1 - i}")
        return cast(TFun, inner)
    return retry
--- a/PFERD/ilias/init.py
+++ b/PFERD/ilias/init.py
@ -1,10 +0,0 @@
 """
 Synchronizing files from ILIAS instances (https://www.ilias.de/).
 """
 from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
 from .crawler import (IliasCrawler, IliasCrawlerEntry, IliasDirectoryFilter,
                      IliasElementType)
 from .downloader import (IliasDownloader, IliasDownloadInfo,
                         IliasDownloadStrategy, download_everything,
                         download_modified_or_new)
--- a/PFERD/ilias/authenticators.py
+++ b/PFERD/ilias/authenticators.py
@ -1,138 +0,0 @@
 """
 Authenticators that can obtain proper ILIAS session cookies.
 """
 import abc
 import logging
 from typing import Optional
 import bs4
 import requests
 from ..authenticators import TfaAuthenticator, UserPassAuthenticator
 from ..utils import soupify
 LOGGER = logging.getLogger(__name__)
 class IliasAuthenticator(abc.ABC):
    # pylint: disable=too-few-public-methods
    """
    An authenticator that logs an existing requests session into an ILIAS
    account.
    """
    @abc.abstractmethod
    def authenticate(self, sess: requests.Session) -> None:
        """
        Log a requests session into this authenticator's ILIAS account.
        """
 class KitShibbolethAuthenticator(IliasAuthenticator):
    # pylint: disable=too-few-public-methods
    """
    Authenticate via KIT's shibboleth system.
    """
    def __init__(self, authenticator: Optional[UserPassAuthenticator] = None) -> None:
        if authenticator:
            self._auth = authenticator
        else:
            self._auth = UserPassAuthenticator("KIT ILIAS Shibboleth")
        self._tfa_auth = TfaAuthenticator("KIT ILIAS Shibboleth")
    def authenticate(self, sess: requests.Session) -> None:
        """
        Performs the ILIAS Shibboleth authentication dance and saves the login
        cookies it receieves.
        This function should only be called whenever it is detected that you're
        not logged in. The cookies obtained should be good for a few minutes,
        maybe even an hour or two.
        """
        # Equivalent: Click on "Mit KIT-Account anmelden" button in
        # https://ilias.studium.kit.edu/login.php
        LOGGER.debug("Begin authentication process with ILIAS")
        url = "https://ilias.studium.kit.edu/Shibboleth.sso/Login"
        data = {
            "sendLogin": "1",
            "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
            "target": "/shib_login.php",
            "home_organization_selection": "Mit KIT-Account anmelden",
        }
        soup = soupify(sess.post(url, data=data))
        # Attempt to login using credentials, if necessary
        while not self._login_successful(soup):
            # Searching the form here so that this fails before asking for
            # credentials rather than after asking.
            form = soup.find("form", {"class": "full content", "method": "post"})
            action = form["action"]
            csrf_token = form.find("input", {"name": "csrf_token"})["value"]
            # Equivalent: Enter credentials in
            # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
            LOGGER.debug("Attempt to log in to Shibboleth using credentials")
            url = "https://idp.scc.kit.edu" + action
            data = {
                "_eventId_proceed": "",
                "j_username": self._auth.username,
                "j_password": self._auth.password,
                "csrf_token": csrf_token
            }
            soup = soupify(sess.post(url, data=data))
            if self._tfa_required(soup):
                soup = self._authenticate_tfa(sess, soup)
            if not self._login_successful(soup):
                print("Incorrect credentials.")
                self._auth.invalidate_credentials()
        # Equivalent: Being redirected via JS automatically
        # (or clicking "Continue" if you have JS disabled)
        LOGGER.debug("Redirect back to ILIAS with login information")
        relay_state = soup.find("input", {"name": "RelayState"})
        saml_response = soup.find("input", {"name": "SAMLResponse"})
        url = "https://ilias.studium.kit.edu/Shibboleth.sso/SAML2/POST"
        data = {  # using the info obtained in the while loop above
            "RelayState": relay_state["value"],
            "SAMLResponse": saml_response["value"],
        }
        sess.post(url, data=data)
    def _authenticate_tfa(
            self,
            session: requests.Session,
            soup: bs4.BeautifulSoup
    ) -> bs4.BeautifulSoup:
        # Searching the form here so that this fails before asking for
        # credentials rather than after asking.
        form = soup.find("form", {"method": "post"})
        action = form["action"]
        # Equivalent: Enter token in
        # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
        LOGGER.debug("Attempt to log in to Shibboleth with TFA token")
        url = "https://idp.scc.kit.edu" + action
        data = {
            "_eventId_proceed": "",
            "j_tokenNumber": self._tfa_auth.get_token()
        }
        return soupify(session.post(url, data=data))
    @staticmethod
    def _login_successful(soup: bs4.BeautifulSoup) -> bool:
        relay_state = soup.find("input", {"name": "RelayState"})
        saml_response = soup.find("input", {"name": "SAMLResponse"})
        return relay_state is not None and saml_response is not None
    @staticmethod
    def _tfa_required(soup: bs4.BeautifulSoup) -> bool:
        return soup.find(id="j_tokenNumber") is not None
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@ -1,684 +0,0 @@
 """
 Contains an ILIAS crawler alongside helper functions.
 """
 import datetime
 import json
 import logging
 import re
 from enum import Enum
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Union
 from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
                          urlunsplit)
 import bs4
 import requests
 from ..errors import FatalException, retry_on_io_exception
 from ..logging import PrettyLogger
 from ..utils import soupify
 from .authenticators import IliasAuthenticator
 from .date_demangler import demangle_date
 from .downloader import IliasDownloadInfo
 LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
 def _sanitize_path_name(name: str) -> str:
    return name.replace("/", "-").replace("\\", "-")
 class IliasElementType(Enum):
    """
    The type of an ilias element.
    """
    REGULAR_FOLDER = "REGULAR_FOLDER"
    VIDEO_FOLDER = "VIDEO_FOLDER"
    EXERCISE_FOLDER = "EXERCISE_FOLDER"
    REGULAR_FILE = "REGULAR_FILE"
    VIDEO_FILE = "VIDEO_FILE"
    FORUM = "FORUM"
    MEETING = "MEETING"
    EXTERNAL_LINK = "EXTERNAL_LINK"
    def is_folder(self) -> bool:
        """
        Returns whether this type is some kind of folder.
        """
        return "FOLDER" in str(self.name)
 IliasDirectoryFilter = Callable[[Path, IliasElementType], bool]
 class IliasCrawlerEntry:
    # pylint: disable=too-few-public-methods
    """
    An ILIAS crawler entry used internally to find, catalogue and recursively crawl elements.
    """
    def __init__(
            self,
            path: Path,
            url: Union[str, Callable[[], Optional[str]]],
            entry_type: IliasElementType,
            modification_date: Optional[datetime.datetime]
    ):
        self.path = path
        if isinstance(url, str):
            str_url = url
            self.url: Callable[[], Optional[str]] = lambda: str_url
        else:
            self.url = url
        self.entry_type = entry_type
        self.modification_date = modification_date
    def to_download_info(self) -> Optional[IliasDownloadInfo]:
        """
        Converts this crawler entry to an IliasDownloadInfo, if possible.
        This method will only succeed for *File* types.
        """
        if self.entry_type in [IliasElementType.REGULAR_FILE, IliasElementType.VIDEO_FILE]:
            return IliasDownloadInfo(self.path, self.url, self.modification_date)
        return None
 class IliasCrawler:
    # pylint: disable=too-few-public-methods
    """
    A crawler for ILIAS.
    """
    # pylint: disable=too-many-arguments
    def __init__(
            self,
            base_url: str,
            session: requests.Session,
            authenticator: IliasAuthenticator,
            dir_filter: IliasDirectoryFilter
    ):
        """
        Create a new ILIAS crawler.
        """
        self._base_url = base_url
        self._session = session
        self._authenticator = authenticator
        self.dir_filter = dir_filter
    @staticmethod
    def _url_set_query_param(url: str, param: str, value: str) -> str:
        """
        Set a query parameter in an url, overwriting existing ones with the same name.
        """
        scheme, netloc, path, query, fragment = urlsplit(url)
        query_parameters = parse_qs(query)
        query_parameters[param] = [value]
        new_query_string = urlencode(query_parameters, doseq=True)
        return urlunsplit((scheme, netloc, path, new_query_string, fragment))
    def recursive_crawl_url(self, url: str) -> List[IliasDownloadInfo]:
        """
        Crawls a given url *and all reachable elements in it*.
        Args:
            url {str} -- the *full* url to crawl
        """
        start_entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), url)
        return self._iterate_entries_to_download_infos(start_entries)
    def crawl_course(self, course_id: str) -> List[IliasDownloadInfo]:
        """
        Starts the crawl process for a course, yielding a list of elements to (potentially)
        download.
        Arguments:
            course_id {str} -- the course id
        Raises:
            FatalException: if an unrecoverable error occurs or the course id is not valid
        """
        # Start crawling at the given course
        root_url = self._url_set_query_param(
            self._base_url + "/goto.php", "target", f"crs_{course_id}"
        )
        if not self._is_course_id_valid(root_url, course_id):
            raise FatalException(
                "Invalid course id? I didn't find anything looking like a course!"
            )
        # And treat it as a folder
        entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), root_url)
        return self._iterate_entries_to_download_infos(entries)
    def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
        response: requests.Response = self._session.get(root_url)
        # We were redirected ==> Non-existant ID
        if course_id not in response.url:
            return False
        link_element: bs4.Tag = self._get_page(root_url, {}).find(id="current_perma_link")
        if not link_element:
            return False
        # It wasn't a course but a category list, forum, etc.
        return "crs_" in link_element.get("value")
    def find_course_name(self, course_id: str) -> Optional[str]:
        """
        Returns the name of a given course. None if it is not a valid course
        or it could not be found.
        """
        course_url = self._url_set_query_param(
            self._base_url + "/goto.php", "target", f"crs_{course_id}"
        )
        return self.find_element_name(course_url)
    def find_element_name(self, url: str) -> Optional[str]:
        """
        Returns the name of the element at the given URL, if it can find one.
        """
        focus_element: bs4.Tag = self._get_page(url, {}).find(id="il_mhead_t_focus")
        if not focus_element:
            return None
        return focus_element.text
    def crawl_personal_desktop(self) -> List[IliasDownloadInfo]:
        """
        Crawls the ILIAS personal desktop (and every subelements that can be reached from there).
        Raises:
            FatalException: if an unrecoverable error occurs
        """
        entries: List[IliasCrawlerEntry] = self._crawl_folder(
            Path(""), self._base_url + "?baseClass=ilPersonalDesktopGUI"
        )
        return self._iterate_entries_to_download_infos(entries)
    def _iterate_entries_to_download_infos(
            self,
            entries: List[IliasCrawlerEntry]
    ) -> List[IliasDownloadInfo]:
        result: List[IliasDownloadInfo] = []
        entries_to_process: List[IliasCrawlerEntry] = entries.copy()
        while len(entries_to_process) > 0:
            entry = entries_to_process.pop()
            if entry.entry_type == IliasElementType.EXTERNAL_LINK:
                PRETTY.not_searching(entry.path, "external link")
                continue
            if entry.entry_type == IliasElementType.FORUM:
                PRETTY.not_searching(entry.path, "forum")
                continue
            if entry.entry_type.is_folder() and not self.dir_filter(entry.path, entry.entry_type):
                PRETTY.not_searching(entry.path, "user filter")
                continue
            download_info = entry.to_download_info()
            if download_info is not None:
                result.append(download_info)
                continue
            url = entry.url()
            if url is None:
                PRETTY.warning(f"Could not find url for {str(entry.path)!r}, skipping it")
                continue
            PRETTY.searching(entry.path)
            if entry.entry_type == IliasElementType.EXERCISE_FOLDER:
                entries_to_process += self._crawl_exercises(entry.path, url)
                continue
            if entry.entry_type == IliasElementType.REGULAR_FOLDER:
                entries_to_process += self._crawl_folder(entry.path, url)
                continue
            if entry.entry_type == IliasElementType.VIDEO_FOLDER:
                entries_to_process += self._crawl_video_directory(entry.path, url)
                continue
            PRETTY.warning(f"Unknown type: {entry.entry_type}!")
        return result
    def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]:
        """
        Crawl all files in a folder-like element.
        """
        soup = self._get_page(url, {})
        if soup.find(id="headerimage"):
            element: bs4.Tag = soup.find(id="headerimage")
            if "opencast" in element.attrs["src"].lower():
                PRETTY.warning(f"Switched to crawling a video at {folder_path}")
                if not self.dir_filter(folder_path, IliasElementType.VIDEO_FOLDER):
                    PRETTY.not_searching(folder_path, "user filter")
                    return []
                return self._crawl_video_directory(folder_path, url)
        result: List[IliasCrawlerEntry] = []
        # Fetch all links and throw them to the general interpreter
        links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
        for link in links:
            abs_url = self._abs_url_from_link(link)
            element_path = Path(folder_path, _sanitize_path_name(link.getText().strip()))
            element_type = self._find_type_from_link(element_path, link, abs_url)
            if element_type == IliasElementType.REGULAR_FILE:
                result += self._crawl_file(folder_path, link, abs_url)
            elif element_type == IliasElementType.MEETING:
                meeting_name = str(element_path.name)
                date_portion_str = meeting_name.split(" - ")[0]
                date_portion = demangle_date(date_portion_str)
                if not date_portion:
                    result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
                    continue
                rest_of_name = meeting_name
                if rest_of_name.startswith(date_portion_str):
                    rest_of_name = rest_of_name[len(date_portion_str):]
                new_name = datetime.datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") \
                    + rest_of_name
                new_path = Path(folder_path, _sanitize_path_name(new_name))
                result += [
                    IliasCrawlerEntry(new_path, abs_url, IliasElementType.REGULAR_FOLDER, None)
                ]
            elif element_type is not None:
                result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
            else:
                PRETTY.warning(f"Found element without a type at {str(element_path)!r}")
        return result
    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
        """
        Create an absolute url from an <a> tag.
        """
        return urljoin(self._base_url, link_tag.get("href"))
    @staticmethod
    def _find_type_from_link(
            path: Path,
            link_element: bs4.Tag,
            url: str
    ) -> Optional[IliasElementType]:
        """
        Decides which sub crawler to use for a given top level element.
        """
        parsed_url = urlparse(url)
        LOGGER.debug("Parsed url: %r", parsed_url)
        # file URLs contain "target=file"
        if "target=file_" in parsed_url.query:
            return IliasElementType.REGULAR_FILE
        # Skip forums
        if "cmd=showThreads" in parsed_url.query:
            return IliasElementType.FORUM
        # Everything with a ref_id can *probably* be opened to reveal nested things
        # video groups, directories, exercises, etc
        if "ref_id=" in parsed_url.query:
            return IliasCrawler._find_type_from_folder_like(link_element, url)
        PRETTY.warning(
            "Got unknown element type in switch. I am not sure what horror I found on the"
            f" ILIAS page. The element was at {str(path)!r} and it is {link_element!r})"
        )
        return None
    @staticmethod
    def _find_type_from_folder_like(link_element: bs4.Tag, url: str) -> Optional[IliasElementType]:
        """
        Try crawling something that looks like a folder.
        """
        # pylint: disable=too-many-return-statements
        found_parent: Optional[bs4.Tag] = None
        # We look for the outer div of our inner link, to find information around it
        # (mostly the icon)
        for parent in link_element.parents:
            if "ilContainerListItemOuter" in parent["class"]:
                found_parent = parent
                break
        if found_parent is None:
            PRETTY.warning(f"Could not find element icon for {url!r}")
            return None
        # Find the small descriptive icon to figure out the type
        img_tag: Optional[bs4.Tag] = found_parent.select_one("img.ilListItemIcon")
        if img_tag is None:
            PRETTY.warning(f"Could not find image tag for {url!r}")
            return None
        if "opencast" in str(img_tag["alt"]).lower():
            return IliasElementType.VIDEO_FOLDER
        if str(img_tag["src"]).endswith("icon_exc.svg"):
            return IliasElementType.EXERCISE_FOLDER
        if str(img_tag["src"]).endswith("icon_webr.svg"):
            return IliasElementType.EXTERNAL_LINK
        if str(img_tag["src"]).endswith("frm.svg"):
            return IliasElementType.FORUM
        if str(img_tag["src"]).endswith("sess.svg"):
            return IliasElementType.MEETING
        return IliasElementType.REGULAR_FOLDER
    @staticmethod
    def _crawl_file(path: Path, link_element: bs4.Tag, url: str) -> List[IliasCrawlerEntry]:
        """
        Crawls a file.
        """
        # Files have a list of properties (type, modification date, size, etc.)
        # In a series of divs.
        # Find the parent containing all those divs, so we can filter our what we need
        properties_parent: bs4.Tag = link_element.findParent(
            "div", {"class": lambda x: "il_ContainerListItem" in x}
        ).select_one(".il_ItemProperties")
        # The first one is always the filetype
        file_type = properties_parent.select_one("span.il_ItemProperty").getText().strip()
        # The rest does not have a stable order. Grab the whole text and reg-ex the date
        # out of it
        all_properties_text = properties_parent.getText().strip()
        modification_date_match = re.search(
            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
            all_properties_text
        )
        if modification_date_match is None:
            modification_date = None
            PRETTY.warning(f"Could not extract start date from {all_properties_text!r}")
        else:
            modification_date_str = modification_date_match.group(1)
            modification_date = demangle_date(modification_date_str)
        # Grab the name from the link text
        name = _sanitize_path_name(link_element.getText())
        full_path = Path(path, name + "." + file_type)
        return [
            IliasCrawlerEntry(full_path, url, IliasElementType.REGULAR_FILE, modification_date)
        ]
    def _crawl_video_directory(self, video_dir_path: Path, url: str) -> List[IliasCrawlerEntry]:
        """
        Crawl the video overview site.
        """
        initial_soup = self._get_page(url, {})
        # The page is actually emtpy but contains a much needed token in the link below.
        # That token can be used to fetch the *actual* video listing
        content_link: bs4.Tag = initial_soup.select_one("#tab_series a")
        # Fetch the actual video listing. The given parameters return all videos (max 800)
        # in a standalone html page
        video_list_soup = self._get_page(
            self._abs_url_from_link(content_link),
            {"limit": 800, "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
        )
        # If we find a page selected, we probably need to respect pagination
        if self._is_paginated_video_page(video_list_soup):
            second_stage_url = self._abs_url_from_link(content_link)
            return self._crawl_paginated_video_directory(
                video_dir_path, video_list_soup, second_stage_url
            )
        return self._crawl_video_directory_second_stage(video_dir_path, video_list_soup)
    @staticmethod
    def _is_paginated_video_page(soup: bs4.BeautifulSoup) -> bool:
        return soup.find(id=re.compile(r"tab_page_sel.+")) is not None
    def _crawl_paginated_video_directory(
            self,
            video_dir_path: Path,
            paged_video_list_soup: bs4.BeautifulSoup,
            second_stage_url: str
    ) -> List[IliasCrawlerEntry]:
        LOGGER.info("Found paginated video page, trying 800 elements")
        # Try to find the table id. This can be used to build the query parameter indicating
        # you want 800 elements
        table_element: bs4.Tag = paged_video_list_soup.find(
            name="table", id=re.compile(r"tbl_xoct_.+")
        )
        if table_element is None:
            PRETTY.warning(
                "Could not increase elements per page (table not found)."
                " Some might not be crawled!"
            )
            return self._crawl_video_directory_second_stage(video_dir_path, paged_video_list_soup)
        match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
        if match is None:
            PRETTY.warning(
                "Could not increase elements per page (table id not found)."
                " Some might not be crawled!"
            )
            return self._crawl_video_directory_second_stage(video_dir_path, paged_video_list_soup)
        table_id = match.group(1)
        extended_video_page = self._get_page(
            second_stage_url,
            {f"tbl_xoct_{table_id}_trows": 800, "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
        )
        if self._is_paginated_video_page(extended_video_page):
            PRETTY.warning(
                "800 elements do not seem to be enough (or I failed to fetch that many)."
                " I will miss elements."
            )
        return self._crawl_video_directory_second_stage(video_dir_path, extended_video_page)
    def _crawl_video_directory_second_stage(
            self,
            video_dir_path: Path,
            video_list_soup: bs4.BeautifulSoup
    ) -> List[IliasCrawlerEntry]:
        """
        Crawls the "second stage" video page. This page contains the actual video urls.
        """
        direct_download_links: List[bs4.Tag] = video_list_soup.findAll(
            name="a", text=re.compile(r"\s*Download\s*")
        )
        # Video start links are marked with an "Abspielen" link
        video_links: List[bs4.Tag] = video_list_soup.findAll(
            name="a", text=re.compile(r"\s*Abspielen\s*")
        )
        results: List[IliasCrawlerEntry] = []
        # We can download everything directly!
        # FIXME: Sadly the download button is currently broken, so never do that
        if False and len(direct_download_links) == len(video_links):
            for link in direct_download_links:
                results += self._crawl_single_video(video_dir_path, link, True)
        else:
            for link in video_links:
                results += self._crawl_single_video(video_dir_path, link, False)
        return results
    def _crawl_single_video(
            self,
            parent_path: Path,
            link: bs4.Tag,
            direct_download: bool
    ) -> List[IliasCrawlerEntry]:
        """
        Crawl a single video based on its "Abspielen" link from the video listing.
        """
        # The link is part of a table with multiple columns, describing metadata.
        # 6th child (1 indexed) is the modification time string
        modification_string = link.parent.parent.parent.select_one(
            "td.std:nth-child(6)"
        ).getText().strip()
        modification_time = datetime.datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
        title = link.parent.parent.parent.select_one(
            "td.std:nth-child(3)"
        ).getText().strip()
        title += ".mp4"
        video_path: Path = Path(parent_path, _sanitize_path_name(title))
        video_url = self._abs_url_from_link(link)
        # The video had a direct download button we can use instead
        if direct_download:
            LOGGER.debug("Using direct download for video %r", str(video_path))
            return [IliasCrawlerEntry(
                video_path, video_url, IliasElementType.VIDEO_FILE, modification_time
            )]
        return [IliasCrawlerEntry(
            video_path,
            self._crawl_video_url_from_play_link(video_url),
            IliasElementType.VIDEO_FILE,
            modification_time
        )]
    def _crawl_video_url_from_play_link(self, play_url: str) -> Callable[[], Optional[str]]:
        def inner() -> Optional[str]:
            # Fetch the actual video page. This is a small wrapper page initializing a javscript
            # player. Sadly we can not execute that JS. The actual video stream url is nowhere
            # on the page, but defined in a JS object inside a script tag, passed to the player
            # library.
            # We do the impossible and RegEx the stream JSON object out of the page's HTML source
            video_page_soup = soupify(self._session.get(play_url))
            regex: re.Pattern = re.compile(
                r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
            )
            json_match = regex.search(str(video_page_soup))
            if json_match is None:
                PRETTY.warning(f"Could not find json stream info for {play_url!r}")
                return None
            json_str = json_match.group(1)
            # parse it
            json_object = json.loads(json_str)
            # and fetch the video url!
            video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
            return video_url
        return inner
    def _crawl_exercises(self, element_path: Path, url: str) -> List[IliasCrawlerEntry]:
        """
        Crawl files offered for download in exercises.
        """
        soup = self._get_page(url, {})
        results: List[IliasCrawlerEntry] = []
        # Each assignment is in an accordion container
        assignment_containers: List[bs4.Tag] = soup.select(".il_VAccordionInnerContainer")
        for container in assignment_containers:
            # Fetch the container name out of the header to use it in the path
            container_name = container.select_one(".ilAssignmentHeader").getText().strip()
            # Find all download links in the container (this will contain all the files)
            files: List[bs4.Tag] = container.findAll(
                name="a",
                # download links contain the given command class
                attrs={"href": lambda x: x and "cmdClass=ilexsubmissiongui" in x},
                text="Download"
            )
            LOGGER.debug("Found exercise container %r", container_name)
            # Grab each file as you now have the link
            for file_link in files:
                # Two divs, side by side. Left is the name, right is the link ==> get left
                # sibling
                file_name = file_link.parent.findPrevious(name="div").getText().strip()
                file_name = _sanitize_path_name(file_name)
                url = self._abs_url_from_link(file_link)
                LOGGER.debug("Found file %r at %r", file_name, url)
                results.append(IliasCrawlerEntry(
                    Path(element_path, container_name, file_name),
                    url,
                    IliasElementType.REGULAR_FILE,
                    None  # We do not have any timestamp
                ))
        return results
    @retry_on_io_exception(3, "fetching webpage")
    def _get_page(self, url: str, params: Dict[str, Any],
                  retry_count: int = 0) -> bs4.BeautifulSoup:
        """
        Fetches a page from ILIAS, authenticating when needed.
        """
        if retry_count >= 4:
            raise FatalException("Could not get a proper page after 4 tries. "
                                 "Maybe your URL is wrong, authentication fails continuously, "
                                 "your ILIAS connection is spotty or ILIAS is not well.")
        LOGGER.debug("Fetching %r", url)
        response = self._session.get(url, params=params)
        content_type = response.headers["content-type"]
        if not content_type.startswith("text/html"):
            raise FatalException(
                f"Invalid content type {content_type} when crawling ilias page"
                " {url!r} with {params!r}"
            )
        soup = soupify(response)
        if self._is_logged_in(soup):
            return soup
        LOGGER.info("Not authenticated, changing that...")
        self._authenticator.authenticate(self._session)
        return self._get_page(url, params, retry_count + 1)
    @staticmethod
    def _is_logged_in(soup: bs4.BeautifulSoup) -> bool:
        # Normal ILIAS pages
        userlog = soup.find("li", {"id": "userlog"})
        if userlog is not None:
            LOGGER.debug("Auth: Found #userlog")
            return True
        # Video listing embeds do not have complete ILIAS html. Try to match them by
        # their video listing table
        video_table = soup.find(
            recursive=True,
            name="table",
            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
        )
        if video_table is not None:
            LOGGER.debug("Auth: Found #tbl_xoct.+")
            return True
        # The individual video player wrapper page has nothing of the above.
        # Match it by its playerContainer.
        if soup.select_one("#playerContainer") is not None:
            LOGGER.debug("Auth: Found #playerContainer")
            return True
        return False
--- a/PFERD/ilias/date_demangler.py
+++ b/PFERD/ilias/date_demangler.py
@ -1,51 +0,0 @@
 """
 Helper methods to demangle an ILIAS date.
 """
 import datetime
 import locale
 import logging
 import re
 from typing import Optional
 from ..logging import PrettyLogger
 LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
 def demangle_date(date: str) -> Optional[datetime.datetime]:
    """
    Demangle a given date in one of the following formats:
    "Gestern, HH:MM"
    "Heute, HH:MM"
    "Morgen, HH:MM"
    "dd. mon yyyy, HH:MM
    """
    saved = locale.setlocale(locale.LC_ALL)
    try:
        try:
            locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
        except locale.Error:
            PRETTY.warning(
                "Could not set language to german. Assuming you use english everywhere."
            )
        date = re.sub(r"\s+", " ", date)
        date = re.sub("Gestern|Yesterday", _yesterday().strftime("%d. %b %Y"), date, re.I)
        date = re.sub("Heute|Today", datetime.date.today().strftime("%d. %b %Y"), date, re.I)
        date = re.sub("Morgen|Tomorrow", _tomorrow().strftime("%d. %b %Y"), date, re.I)
        return datetime.datetime.strptime(date, "%d. %b %Y, %H:%M")
    except ValueError:
        PRETTY.warning(f"Could not parse date {date!r}")
        return None
    finally:
        locale.setlocale(locale.LC_ALL, saved)
 def _yesterday() -> datetime.date:
    return datetime.date.today() - datetime.timedelta(days=1)
 def _tomorrow() -> datetime.date:
    return datetime.date.today() + datetime.timedelta(days=1)
--- a/PFERD/ilias/downloader.py
+++ b/PFERD/ilias/downloader.py
@ -1,173 +0,0 @@
 """Contains a downloader for ILIAS."""
 import datetime
 import logging
 import math
 import os
 from pathlib import Path, PurePath
 from typing import Callable, List, Optional, Union
 import bs4
 import requests
 from ..errors import retry_on_io_exception
 from ..logging import PrettyLogger
 from ..organizer import Organizer
 from ..tmp_dir import TmpDir
 from ..transform import Transformable
 from ..utils import soupify, stream_to_path
 from .authenticators import IliasAuthenticator
 LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
 class ContentTypeException(Exception):
    """Thrown when the content type of the ilias element can not be handled."""
 class IliasDownloadInfo(Transformable):
    """
    This class describes a single file to be downloaded.
    """
    def __init__(
            self,
            path: PurePath,
            url: Union[str, Callable[[], Optional[str]]],
            modifcation_date: Optional[datetime.datetime]
    ):
        super().__init__(path)
        if isinstance(url, str):
            string_url = url
            self.url: Callable[[], Optional[str]] = lambda: string_url
        else:
            self.url = url
        self.modification_date = modifcation_date
 IliasDownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
 def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool:
    # pylint: disable=unused-argument
    """
    Accepts everything.
    """
    return True
 def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool:
    """
    Accepts new files or files with a more recent modification date.
    """
    resolved_file = organizer.resolve(info.path)
    if not resolved_file.exists() or info.modification_date is None:
        return True
    resolved_mod_time_seconds = resolved_file.stat().st_mtime
    # Download if the info is newer
    if info.modification_date.timestamp() > resolved_mod_time_seconds:
        return True
    PRETTY.ignored_file(info.path, "local file has newer or equal modification time")
    return False
 class IliasDownloader:
    # pylint: disable=too-many-arguments
    """A downloader for ILIAS."""
    def __init__(
            self,
            tmp_dir: TmpDir,
            organizer: Organizer,
            session: requests.Session,
            authenticator: IliasAuthenticator,
            strategy: IliasDownloadStrategy,
            timeout: int = 5
    ):
        """
        Create a new IliasDownloader.
        The timeout applies to the download request only, as bwcloud uses IPv6
        and requests has a problem with that: https://github.com/psf/requests/issues/5522
        """
        self._tmp_dir = tmp_dir
        self._organizer = organizer
        self._session = session
        self._authenticator = authenticator
        self._strategy = strategy
        self._timeout = timeout
    def download_all(self, infos: List[IliasDownloadInfo]) -> None:
        """
        Download multiple files one after the other.
        """
        for info in infos:
            self.download(info)
    def download(self, info: IliasDownloadInfo) -> None:
        """
        Download a file from ILIAS.
        Retries authentication until eternity if it could not fetch the file.
        """
        LOGGER.debug("Downloading %r", info)
        if not self._strategy(self._organizer, info):
            self._organizer.mark(info.path)
            return
        tmp_file = self._tmp_dir.new_path()
        @retry_on_io_exception(3, "downloading file")
        def download_impl() -> bool:
            if not self._try_download(info, tmp_file):
                LOGGER.info("Re-Authenticating due to download failure: %r", info)
                self._authenticator.authenticate(self._session)
                raise IOError("Scheduled retry")
            else:
                return True
        if not download_impl():
            PRETTY.error(f"Download of file {info.path} failed too often! Skipping it...")
            return
        dst_path = self._organizer.accept_file(tmp_file, info.path)
        if dst_path and info.modification_date:
            os.utime(
                dst_path,
                times=(
                    math.ceil(info.modification_date.timestamp()),
                    math.ceil(info.modification_date.timestamp())
                )
            )
    def _try_download(self, info: IliasDownloadInfo, target: Path) -> bool:
        url = info.url()
        if url is None:
            PRETTY.warning(f"Could not download {str(info.path)!r} as I got no URL :/")
            return True
        with self._session.get(url, stream=True, timeout=self._timeout) as response:
            content_type = response.headers["content-type"]
            has_content_disposition = "content-disposition" in response.headers
            if content_type.startswith("text/html") and not has_content_disposition:
                if self._is_logged_in(soupify(response)):
                    raise ContentTypeException("Attempting to download a web page, not a file")
                return False
            # Yay, we got the file :)
            stream_to_path(response, target, info.path.name)
            return True
    @staticmethod
    def _is_logged_in(soup: bs4.BeautifulSoup) -> bool:
        userlog = soup.find("li", {"id": "userlog"})
        return userlog is not None
--- a/PFERD/ipd.py
+++ b/PFERD/ipd.py
@ -1,154 +0,0 @@
 """
 Utility functions and a scraper/downloader for the IPD pages.
 """
 import datetime
 import logging
 import math
 import os
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Callable, List, Optional
 from urllib.parse import urljoin
 import bs4
 import requests
 from PFERD.errors import FatalException
 from PFERD.utils import soupify
 from .logging import PrettyLogger
 from .organizer import Organizer
 from .tmp_dir import TmpDir
 from .transform import Transformable
 from .utils import stream_to_path
 LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
@dataclass
 class IpdDownloadInfo(Transformable):
    """
    Information about an ipd entry.
    """
    url: str
    modification_date: Optional[datetime.datetime]
 IpdDownloadStrategy = Callable[[Organizer, IpdDownloadInfo], bool]
 def ipd_download_new_or_modified(organizer: Organizer, info: IpdDownloadInfo) -> bool:
    """
    Accepts new files or files with a more recent modification date.
    """
    resolved_file = organizer.resolve(info.path)
    if not resolved_file.exists():
        return True
    if not info.modification_date:
        PRETTY.ignored_file(info.path, "could not find modification time, file exists")
        return False
    resolved_mod_time_seconds = resolved_file.stat().st_mtime
    # Download if the info is newer
    if info.modification_date.timestamp() > resolved_mod_time_seconds:
        return True
    PRETTY.ignored_file(info.path, "local file has newer or equal modification time")
    return False
 class IpdCrawler:
    # pylint: disable=too-few-public-methods
    """
    A crawler for IPD pages.
    """
    def __init__(self, base_url: str):
        self._base_url = base_url
    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
        """
        Create an absolute url from an <a> tag.
        """
        return urljoin(self._base_url, link_tag.get("href"))
    def crawl(self) -> List[IpdDownloadInfo]:
        """
        Crawls the playlist given in the constructor.
        """
        page = soupify(requests.get(self._base_url))
        items: List[IpdDownloadInfo] = []
        def is_relevant_url(x: str) -> bool:
            return x.endswith(".pdf") or x.endswith(".c") or x.endswith(".java") or x.endswith(".zip")
        for link in page.findAll(name="a", attrs={"href": lambda x: x and is_relevant_url(x)}):
            href: str = link.attrs.get("href")
            name = href.split("/")[-1]
            modification_date: Optional[datetime.datetime] = None
            try:
                enclosing_row: bs4.Tag = link.findParent(name="tr")
                if enclosing_row:
                    date_text = enclosing_row.find(name="td").text
                    modification_date = datetime.datetime.strptime(date_text, "%d.%m.%Y")
            except ValueError:
                modification_date = None
            items.append(IpdDownloadInfo(
                Path(name),
                url=self._abs_url_from_link(link),
                modification_date=modification_date
            ))
        return items
 class IpdDownloader:
    """
    A downloader for ipd files.
    """
    def __init__(self, tmp_dir: TmpDir, organizer: Organizer, strategy: IpdDownloadStrategy):
        self._tmp_dir = tmp_dir
        self._organizer = organizer
        self._strategy = strategy
        self._session = requests.session()
    def download_all(self, infos: List[IpdDownloadInfo]) -> None:
        """
        Download multiple files one after the other.
        """
        for info in infos:
            self.download(info)
    def download(self, info: IpdDownloadInfo) -> None:
        """
        Download a single file.
        """
        if not self._strategy(self._organizer, info):
            self._organizer.mark(info.path)
            return
        with self._session.get(info.url, stream=True) as response:
            if response.status_code == 200:
                tmp_file = self._tmp_dir.new_path()
                stream_to_path(response, tmp_file, info.path.name)
                dst_path = self._organizer.accept_file(tmp_file, info.path)
                if dst_path and info.modification_date:
                    os.utime(
                        dst_path,
                        times=(
                            math.ceil(info.modification_date.timestamp()),
                            math.ceil(info.modification_date.timestamp())
                        )
                    )
            elif response.status_code == 403:
                raise FatalException("Received 403. Are you not using the KIT VPN?")
            else:
                PRETTY.warning(f"Could not download file, got response {response.status_code}")
--- a/PFERD/limiter.py
+++ b/PFERD/limiter.py
@ -0,0 +1,97 @@
 import asyncio
 import time
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
 from typing import AsyncIterator, Optional
@dataclass
 class Slot:
    active: bool = False
    last_left: Optional[float] = None
 class Limiter:
    def __init__(
            self,
            task_limit: int,
            download_limit: int,
            task_delay: float
    ):
        if task_limit <= 0:
            raise ValueError("task limit must be at least 1")
        if download_limit <= 0:
            raise ValueError("download limit must be at least 1")
        if download_limit > task_limit:
            raise ValueError("download limit can't be greater than task limit")
        if task_delay < 0:
            raise ValueError("Task delay must not be negative")
        self._slots = [Slot() for _ in range(task_limit)]
        self._downloads = download_limit
        self._delay = task_delay
        self._condition = asyncio.Condition()
    def _acquire_slot(self) -> Optional[Slot]:
        for slot in self._slots:
            if not slot.active:
                slot.active = True
                return slot
        return None
    async def _wait_for_slot_delay(self, slot: Slot) -> None:
        if slot.last_left is not None:
            delay = slot.last_left + self._delay - time.time()
            if delay > 0:
                await asyncio.sleep(delay)
    def _release_slot(self, slot: Slot) -> None:
        slot.last_left = time.time()
        slot.active = False
    @asynccontextmanager
    async def limit_crawl(self) -> AsyncIterator[None]:
        slot: Slot
        async with self._condition:
            while True:
                if found_slot := self._acquire_slot():
                    slot = found_slot
                    break
                await self._condition.wait()
        await self._wait_for_slot_delay(slot)
        try:
            yield
        finally:
            async with self._condition:
                self._release_slot(slot)
                self._condition.notify_all()
    @asynccontextmanager
    async def limit_download(self) -> AsyncIterator[None]:
        slot: Slot
        async with self._condition:
            while True:
                if self._downloads <= 0:
                    await self._condition.wait()
                    continue
                if found_slot := self._acquire_slot():
                    slot = found_slot
                    self._downloads -= 1
                    break
                await self._condition.wait()
        await self._wait_for_slot_delay(slot)
        try:
            yield
        finally:
            async with self._condition:
                self._release_slot(slot)
                self._downloads += 1
                self._condition.notify_all()
--- a/PFERD/location.py
+++ b/PFERD/location.py
@ -1,41 +0,0 @@
 """
 Contains a Location class for objects with an inherent path.
 """
 from pathlib import Path, PurePath
 class ResolveException(Exception):
    """An exception while resolving a file."""
    # TODO take care of this when doing exception handling
 class Location:
    """
    An object that has an inherent path.
    """
    def __init__(self, path: Path):
        self._path = path.resolve()
    @property
    def path(self) -> Path:
        """
        This object's location.
        """
        return self._path
    def resolve(self, target: PurePath) -> Path:
        """
        Resolve a file relative to the path of this location.
        Raises a [ResolveException] if the file is outside the given directory.
        """
        absolute_path = self.path.joinpath(target).resolve()
        # TODO Make this less inefficient
        if self.path not in absolute_path.parents:
            raise ResolveException(f"Path {target} is not inside directory {self.path}")
        return absolute_path
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@ -1,184 +1,291 @@
-"""
+import asyncio
-Contains a few logger utility functions and implementations.
+import sys
-"""
+import traceback
 from contextlib import asynccontextmanager, contextmanager
 # TODO In Python 3.9 and above, ContextManager is deprecated
 from typing import AsyncIterator, ContextManager, Iterator, List, Optional
-import logging
+from rich.console import Console, Group
-from typing import Optional
+from rich.live import Live
-
+from rich.markup import escape
-from rich._log_render import LogRender
+from rich.panel import Panel
-from rich.console import Console
+from rich.progress import (BarColumn, DownloadColumn, Progress, TaskID, TextColumn, TimeRemainingColumn,
-from rich.style import Style
+                           TransferSpeedColumn)
-from rich.text import Text
+from rich.table import Column
 from rich.theme import Theme
 from .download_summary import DownloadSummary
 from .utils import PathLike, to_path
 STYLE = "{"
 FORMAT = "[{levelname:<7}] {message}"
 DATE_FORMAT = "%F %T"
-def enable_logging(name: str = "PFERD", level: int = logging.INFO) -> None:
+class ProgressBar:
-    """
+    def __init__(self, progress: Progress, taskid: TaskID):
-    Enable and configure logging via the logging module.
+        self._progress = progress
-    """
+        self._taskid = taskid
-    logger = logging.getLogger(name)
+    def advance(self, amount: float = 1) -> None:
-    logger.setLevel(level)
+        self._progress.advance(self._taskid, advance=amount)
    logger.addHandler(RichLoggingHandler(level=level))
-    # This should be logged by our own handler, and not the root logger's
+    def set_total(self, total: float) -> None:
-    # default handler, so we don't pass it on to the root logger.
+        self._progress.update(self._taskid, total=total)
-    logger.propagate = False
+        self._progress.start_task(self._taskid)
-class RichLoggingHandler(logging.Handler):
+class Log:
-    """
+    STATUS_WIDTH = 11
    A logging handler that uses rich for highlighting
    """
-    def __init__(self, level: int) -> None:
+    def __init__(self) -> None:
-        super().__init__(level=level)
+        self.console = Console(highlight=False)
        self.console = Console(theme=Theme({
            "logging.level.warning": Style(color="yellow")
        }))
        self._log_render = LogRender(show_level=True, show_time=False, show_path=False)
-    def emit(self, record: logging.LogRecord) -> None:
+        self._crawl_progress = Progress(
-        """
+            TextColumn("{task.description}", table_column=Column(ratio=1)),
-        Invoked by logging.
+            BarColumn(),
-        """
+            TimeRemainingColumn(),
-        log_style = f"logging.level.{record.levelname.lower()}"
+            expand=True,
-        message = self.format(record)
+        )
-
+        self._download_progress = Progress(
-        level = Text()
+            TextColumn("{task.description}", table_column=Column(ratio=1)),
-        level.append(record.levelname, log_style)
+            TransferSpeedColumn(),
-        message_text = Text.from_markup(message)
+            DownloadColumn(),
-
+            BarColumn(),
-        self.console.print(
+            TimeRemainingColumn(),
-            self._log_render(
+            expand=True,
                self.console,
                [message_text],
                level=level,
            )
        )
        self._live = Live(console=self.console, transient=True)
        self._update_live()
-class PrettyLogger:
+        self._showing_progress = False
-    """
+        self._progress_suspended = False
-    A logger that prints some specially formatted log messages in color.
+        self._lock = asyncio.Lock()
-    """
+        self._lines: List[str] = []
-    def __init__(self, logger: logging.Logger) -> None:
+        # Whether different parts of the output are enabled or disabled
-        self.logger = logger
+        self.output_explain = False
        self.output_status = True
        self.output_not_deleted = True
        self.output_report = True
-    @staticmethod
+    def _update_live(self) -> None:
-    def _format_path(path: PathLike) -> str:
+        elements = []
-        return repr(str(to_path(path)))
+        if self._crawl_progress.task_ids:
            elements.append(self._crawl_progress)
        if self._download_progress.task_ids:
            elements.append(self._download_progress)
-    def error(self, message: str) -> None:
+        group = Group(*elements)
        self._live.update(group)
    @contextmanager
    def show_progress(self) -> Iterator[None]:
        if self._showing_progress:
            raise RuntimeError("Calling 'show_progress' while already showing progress")
        self._showing_progress = True
        try:
            with self._live:
                yield
        finally:
            self._showing_progress = False
    @asynccontextmanager
    async def exclusive_output(self) -> AsyncIterator[None]:
        if not self._showing_progress:
            raise RuntimeError("Calling 'exclusive_output' while not showing progress")
        async with self._lock:
            self._progress_suspended = True
            self._live.stop()
            try:
                yield
            finally:
                self._live.start()
                self._progress_suspended = False
                for line in self._lines:
                    self.print(line)
                self._lines = []
    def unlock(self) -> None:
        """
-        Print an error message indicating some operation fatally failed.
+        Get rid of an exclusive output state.
        """
        self.logger.error(
            f"[bold red]{message}[/bold red]"
        )
-    def warning(self, message: str) -> None:
+        This function is meant to let PFERD print log messages after the event
-        """
+        loop was forcibly stopped and if it will not be started up again. After
-        Print a warning message indicating some operation failed, but the error can be recovered
+        this is called, it is not safe to use any functions except the logging
-        or ignored.
+        functions (print, warn, ...).
        """
        self.logger.warning(
            f"[bold yellow]{message}[/bold yellow]"
        )
    def modified_file(self, path: PathLike) -> None:
        """
        An existing file has changed.
        """
-        self.logger.info(
+        self._progress_suspended = False
-            f"[bold magenta]Modified {self._format_path(path)}.[/bold magenta]"
+        for line in self._lines:
-        )
+            self.print(line)
-    def new_file(self, path: PathLike) -> None:
+    def print(self, text: str) -> None:
        """
-        A new file has been downloaded.
+        Print a normal message. Allows markup.
        """
-        self.logger.info(
+        if self._progress_suspended:
-            f"[bold green]Created {self._format_path(path)}.[/bold green]"
+            self._lines.append(text)
-        )
+        else:
            self.console.print(text)
-    def deleted_file(self, path: PathLike) -> None:
+    # TODO Print errors (and warnings?) to stderr
    def warn(self, text: str) -> None:
        """
-        A file has been deleted.
+        Print a warning message. Allows no markup.
        """
-        self.logger.info(
+        self.print(f"[bold bright_red]Warning[/] {escape(text)}")
            f"[bold red]Deleted {self._format_path(path)}.[/bold red]"
        )
-    def ignored_file(self, path: PathLike, reason: str) -> None:
+    def warn_contd(self, text: str) -> None:
        """
-        File was not downloaded or modified.
+        Print further lines of a warning message. Allows no markup.
        """
-        self.logger.info(
+        self.print(f"{escape(text)}")
            f"[dim]Ignored {self._format_path(path)} "
            f"([/dim]{reason}[dim]).[/dim]"
        )
-    def searching(self, path: PathLike) -> None:
+    def error(self, text: str) -> None:
        """
-        A crawler searches a particular object.
+        Print an error message. Allows no markup.
        """
-        self.logger.info(f"Searching {self._format_path(path)}")
+        self.print(f"[bold bright_red]Error[/] [red]{escape(text)}")
-    def not_searching(self, path: PathLike, reason: str) -> None:
+    def error_contd(self, text: str) -> None:
        """
-        A crawler does not search a particular object.
+        Print further lines of an error message. Allows no markup.
        """
-        self.logger.info(
+        self.print(f"[red]{escape(text)}")
            f"[dim]Not searching {self._format_path(path)} "
            f"([/dim]{reason}[dim]).[/dim]"
        )
-    def summary(self, download_summary: DownloadSummary) -> None:
+    def unexpected_exception(self) -> None:
        """
-        Prints a download summary.
+        Call this in an "except" clause to log an unexpected exception.
        """
        self.logger.info("")
        self.logger.info("[bold cyan]Download Summary[/bold cyan]")
        if not download_summary.has_updates():
            self.logger.info("[bold dim]Nothing changed![/bold dim]")
            return
-        for new_file in download_summary.new_files:
+        t, v, tb = sys.exc_info()
-            self.new_file(new_file)
+        if t is None or v is None or tb is None:
-        for modified_file in download_summary.modified_files:
+            # We're not currently handling an exception, so somebody probably
-            self.modified_file(modified_file)
+            # called this function where they shouldn't.
-        for deleted_files in download_summary.deleted_files:
+            self.error("Something unexpected happened")
-            self.deleted_file(deleted_files)
+            self.error_contd("")
            for line in traceback.format_stack():
                self.error_contd(line[:-1])  # Without the newline
            self.error_contd("")
        else:
            self.error("An unexpected exception occurred")
            self.error_contd("")
            self.error_contd(traceback.format_exc())
-    def starting_synchronizer(
+        # Our print function doesn't take types other than strings, but the
        # underlying rich.print function does. This call is a special case
        # anyways, and we're calling it internally, so this should be fine.
        self.print(Panel.fit("""
 Please copy your program output and send it to the PFERD maintainers, either
 directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
        """.strip()))  # type: ignore
    def explain_topic(self, text: str) -> None:
        """
        Print a top-level explain text. Allows no markup.
        """
        if self.output_explain:
            self.print(f"[yellow]{escape(text)}")
    def explain(self, text: str) -> None:
        """
        Print an indented explain text. Allows no markup.
        """
        if self.output_explain:
            self.print(f"  {escape(text)}")
    def status(self, style: str, action: str, text: str, suffix: str = "") -> None:
        """
        Print a status update while crawling. Allows markup in the "style"
        argument which will be applied to the "action" string.
        """
        if self.output_status:
            action = escape(f"{action:<{self.STATUS_WIDTH}}")
            self.print(f"{style}{action}[/] {escape(text)} {suffix}")
    def not_deleted(self, style: str, action: str, text: str, suffix: str = "") -> None:
        """
        Print a message for a local only file that wasn't
        deleted while crawling. Allows markup in the "style"
        argument which will be applied to the "action" string.
        """
        if self.output_status and self.output_not_deleted:
            action = escape(f"{action:<{self.STATUS_WIDTH}}")
            self.print(f"{style}{action}[/] {escape(text)} {suffix}")
    def report(self, text: str) -> None:
        """
        Print a report after crawling. Allows markup.
        """
        if self.output_report:
            self.print(text)
    def report_not_deleted(self, text: str) -> None:
        """
        Print a report for a local only file that wasn't deleted after crawling. Allows markup.
        """
        if self.output_report and self.output_not_deleted:
            self.print(text)
    @contextmanager
    def _bar(
            self,
-            target_directory: PathLike,
+            progress: Progress,
-            synchronizer_name: str,
+            description: str,
-            subject: Optional[str] = None,
+            total: Optional[float],
-    ) -> None:
+    ) -> Iterator[ProgressBar]:
        if total is None:
            # Indeterminate progress bar
            taskid = progress.add_task(description, start=False)
        else:
            taskid = progress.add_task(description, total=total)
        self._update_live()
        try:
            yield ProgressBar(progress, taskid)
        finally:
            progress.remove_task(taskid)
            self._update_live()
    def crawl_bar(
            self,
            style: str,
            action: str,
            text: str,
            total: Optional[float] = None,
    ) -> ContextManager[ProgressBar]:
        """
-        A special message marking that a synchronizer has been started.
+        Allows markup in the "style" argument which will be applied to the
        "action" string.
        """
-        subject_str = f"{subject} " if subject else ""
+        action = escape(f"{action:<{self.STATUS_WIDTH}}")
-        self.logger.info("")
+        description = f"{style}{action}[/] {text}"
-        self.logger.info((
+        return self._bar(self._crawl_progress, description, total)
-            f"[bold cyan]Synchronizing "
+
-            f"{subject_str}to {self._format_path(target_directory)} "
+    def download_bar(
-            f"using the {synchronizer_name} synchronizer.[/bold cyan]"
+            self,
-        ))
+            style: str,
            action: str,
            text: str,
            total: Optional[float] = None,
    ) -> ContextManager[ProgressBar]:
        """
        Allows markup in the "style" argument which will be applied to the
        "action" string.
        """
        action = escape(f"{action:<{self.STATUS_WIDTH}}")
        description = f"{style}{action}[/] {text}"
        return self._bar(self._download_progress, description, total)
 log = Log()
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@ -1,224 +0,0 @@
 """A simple helper for managing downloaded files.
 A organizer is bound to a single directory.
 """
 import filecmp
 import logging
 import os
 import shutil
 from enum import Enum
 from pathlib import Path, PurePath
 from typing import Callable, List, Optional, Set
 from .download_summary import DownloadSummary
 from .location import Location
 from .logging import PrettyLogger
 from .utils import prompt_yes_no
 LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
 class ConflictType(Enum):
    """
    The type of the conflict. A file might not exist anymore and will be deleted
    or it might be overwritten with a newer version.
    FILE_OVERWRITTEN: An existing file will be updated
    MARKED_FILE_OVERWRITTEN: A file is written for the second+ time in this run
    FILE_DELETED: The file was deleted
    """
    FILE_OVERWRITTEN = "overwritten"
    MARKED_FILE_OVERWRITTEN = "marked_file_overwritten"
    FILE_DELETED = "deleted"
 class FileConflictResolution(Enum):
    """
    The reaction when confronted with a file conflict:
    DESTROY_EXISTING: Delete/overwrite the current file
    KEEP_EXISTING: Keep the current file
    DEFAULT: Do whatever the PFERD authors thought is sensible
    PROMPT: Interactively ask the user
    """
    DESTROY_EXISTING = "destroy"
    KEEP_EXISTING = "keep"
    DEFAULT = "default"
    PROMPT = "prompt"
 FileConflictResolver = Callable[[PurePath, ConflictType], FileConflictResolution]
 def resolve_prompt_user(_path: PurePath, conflict: ConflictType) -> FileConflictResolution:
    """
    Resolves conflicts by asking the user if a file was written twice or will be deleted.
    """
    if conflict == ConflictType.FILE_OVERWRITTEN:
        return FileConflictResolution.DESTROY_EXISTING
    return FileConflictResolution.PROMPT
 class FileAcceptException(Exception):
    """An exception while accepting a file."""
 class Organizer(Location):
    """A helper for managing downloaded files."""
    def __init__(self, path: Path, conflict_resolver: FileConflictResolver = resolve_prompt_user):
        """Create a new organizer for a given path."""
        super().__init__(path)
        self._known_files: Set[Path] = set()
        # Keep the root dir
        self._known_files.add(path.resolve())
        self.download_summary = DownloadSummary()
        self.conflict_resolver = conflict_resolver
    def accept_file(self, src: Path, dst: PurePath) -> Optional[Path]:
        """
        Move a file to this organizer and mark it.
        Returns the path the file was moved to, to allow the caller to adjust the metadata.
        As you might still need to adjust the metadata when the file was identical
        (e.g. update the timestamp), the path is also returned in this case.
        In all other cases (ignored, not overwritten, etc.) this method returns None.
        """
        # Windows limits the path length to 260 for *some* historical reason
        # If you want longer paths, you will have to add the "\\?\" prefix in front of
        # your path...
        # See:
        # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#maximum-path-length-limitation
        if os.name == 'nt':
            src_absolute = Path("\\\\?\\" + str(src.resolve()))
            dst_absolute = Path("\\\\?\\" + str(self.resolve(dst)))
        else:
            src_absolute = src.resolve()
            dst_absolute = self.resolve(dst)
        if not src_absolute.exists():
            raise FileAcceptException("Source file does not exist")
        if not src_absolute.is_file():
            raise FileAcceptException("Source is a directory")
        LOGGER.debug("Copying %s to %s", src_absolute, dst_absolute)
        if self._is_marked(dst):
            PRETTY.warning(f"File {str(dst_absolute)!r} was already written!")
            conflict = ConflictType.MARKED_FILE_OVERWRITTEN
            if self._resolve_conflict("Overwrite file?", dst_absolute, conflict, default=False):
                PRETTY.ignored_file(dst_absolute, "file was written previously")
                return None
        # Destination file is directory
        if dst_absolute.exists() and dst_absolute.is_dir():
            prompt = f"Overwrite folder {dst_absolute} with file?"
            conflict = ConflictType.FILE_OVERWRITTEN
            if self._resolve_conflict(prompt, dst_absolute, conflict, default=False):
                shutil.rmtree(dst_absolute)
            else:
                PRETTY.warning(f"Could not add file {str(dst_absolute)!r}")
                return None
        # Destination file exists
        if dst_absolute.exists() and dst_absolute.is_file():
            if filecmp.cmp(str(src_absolute), str(dst_absolute), shallow=False):
                # Bail out, nothing more to do
                PRETTY.ignored_file(dst_absolute, "same file contents")
                self.mark(dst)
                return dst_absolute
            prompt = f"Overwrite file {dst_absolute}?"
            conflict = ConflictType.FILE_OVERWRITTEN
            if not self._resolve_conflict(prompt, dst_absolute, conflict, default=True):
                PRETTY.ignored_file(dst_absolute, "user conflict resolution")
                return None
            self.download_summary.add_modified_file(dst_absolute)
            PRETTY.modified_file(dst_absolute)
        else:
            self.download_summary.add_new_file(dst_absolute)
            PRETTY.new_file(dst_absolute)
        # Create parent dir if needed
        dst_parent_dir: Path = dst_absolute.parent
        dst_parent_dir.mkdir(exist_ok=True, parents=True)
        # Move file
        shutil.move(str(src_absolute), str(dst_absolute))
        self.mark(dst)
        return dst_absolute
    def mark(self, path: PurePath) -> None:
        """Mark a file as used so it will not get cleaned up."""
        absolute_path = self.resolve(path)
        self._known_files.add(absolute_path)
        LOGGER.debug("Tracked %s", absolute_path)
    def _is_marked(self, path: PurePath) -> bool:
        """
        Checks whether a file is marked.
        """
        absolute_path = self.resolve(path)
        return absolute_path in self._known_files
    def cleanup(self) -> None:
        """Remove all untracked files in the organizer's dir."""
        LOGGER.debug("Deleting all untracked files...")
        self._cleanup(self.path)
    def _cleanup(self, start_dir: Path) -> None:
        if not start_dir.exists():
            return
        paths: List[Path] = list(start_dir.iterdir())
        # Recursively clean paths
        for path in paths:
            if path.is_dir():
                self._cleanup(path)
            else:
                if path.resolve() not in self._known_files:
                    self._delete_file_if_confirmed(path)
        # Delete dir if it was empty and untracked
        dir_empty = len(list(start_dir.iterdir())) == 0
        if start_dir.resolve() not in self._known_files and dir_empty:
            start_dir.rmdir()
    def _delete_file_if_confirmed(self, path: Path) -> None:
        prompt = f"Do you want to delete {path}"
        if self._resolve_conflict(prompt, path, ConflictType.FILE_DELETED, default=False):
            self.download_summary.add_deleted_file(path)
            path.unlink()
        else:
            PRETTY.ignored_file(path, "user conflict resolution")
    def _resolve_conflict(
            self, prompt: str, path: Path, conflict: ConflictType, default: bool
    ) -> bool:
        if not self.conflict_resolver:
            return prompt_yes_no(prompt, default=default)
        result = self.conflict_resolver(path, conflict)
        if result == FileConflictResolution.DEFAULT:
            return default
        if result == FileConflictResolution.KEEP_EXISTING:
            return False
        if result == FileConflictResolution.DESTROY_EXISTING:
            return True
        return prompt_yes_no(prompt, default=default)
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@ -0,0 +1,518 @@
 import filecmp
 import json
 import os
 import random
 import shutil
 import string
 from contextlib import contextmanager
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
 from pathlib import Path, PurePath
 from typing import BinaryIO, Iterator, Optional, Tuple
 from .logging import log
 from .report import Report, ReportLoadError
 from .utils import ReusableAsyncContextManager, fmt_path, fmt_real_path, prompt_yes_no
 SUFFIX_CHARS = string.ascii_lowercase + string.digits
 SUFFIX_LENGTH = 6
 TRIES = 5
 class OutputDirError(Exception):
    pass
 class Redownload(Enum):
    NEVER = "never"
    NEVER_SMART = "never-smart"
    ALWAYS = "always"
    ALWAYS_SMART = "always-smart"
    @staticmethod
    def from_string(string: str) -> "Redownload":
        try:
            return Redownload(string)
        except ValueError:
            raise ValueError("must be one of 'never', 'never-smart',"
                             " 'always', 'always-smart'")
 class OnConflict(Enum):
    PROMPT = "prompt"
    LOCAL_FIRST = "local-first"
    REMOTE_FIRST = "remote-first"
    NO_DELETE = "no-delete"
    NO_DELETE_PROMPT_OVERWRITE = "no-delete-prompt-overwrite"
    @staticmethod
    def from_string(string: str) -> "OnConflict":
        try:
            return OnConflict(string)
        except ValueError:
            raise ValueError("must be one of 'prompt', 'local-first',"
                             " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'")
@dataclass
 class Heuristics:
    mtime: Optional[datetime]
 class FileSink:
    def __init__(self, file: BinaryIO):
        self._file = file
        self._done = False
    @property
    def file(self) -> BinaryIO:
        return self._file
    def done(self) -> None:
        self._done = True
    def is_done(self) -> bool:
        return self._done
@dataclass
 class DownloadInfo:
    remote_path: PurePath
    path: PurePath
    local_path: Path
    tmp_path: Path
    heuristics: Heuristics
    on_conflict: OnConflict
    success: bool = False
 class FileSinkToken(ReusableAsyncContextManager[FileSink]):
    # Whenever this class is entered, it creates a new temporary file and
    # returns a corresponding FileSink.
    #
    # When it is exited again, the file is closed and information about the
    # download handed back to the OutputDirectory.
    def __init__(
            self,
            output_dir: "OutputDirectory",
            remote_path: PurePath,
            path: PurePath,
            local_path: Path,
            heuristics: Heuristics,
            on_conflict: OnConflict,
    ):
        super().__init__()
        self._output_dir = output_dir
        self._remote_path = remote_path
        self._path = path
        self._local_path = local_path
        self._heuristics = heuristics
        self._on_conflict = on_conflict
    async def _on_aenter(self) -> FileSink:
        tmp_path, file = await self._output_dir._create_tmp_file(self._local_path)
        sink = FileSink(file)
        async def after_download() -> None:
            await self._output_dir._after_download(DownloadInfo(
                self._remote_path,
                self._path,
                self._local_path,
                tmp_path,
                self._heuristics,
                self._on_conflict,
                sink.is_done(),
            ))
        self._stack.push_async_callback(after_download)
        self._stack.enter_context(file)
        return sink
 class OutputDirectory:
    REPORT_FILE = PurePath(".report")
    def __init__(
            self,
            root: Path,
            redownload: Redownload,
            on_conflict: OnConflict,
    ):
        if os.name == "nt":
            # Windows limits the path length to 260 for some historical reason.
            # If you want longer paths, you will have to add the "\\?\" prefix
            # in front of your path. See:
            # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#maximum-path-length-limitation
            self._root = Path("\\\\?\\" + str(root.absolute()))
        else:
            self._root = root
        self._redownload = redownload
        self._on_conflict = on_conflict
        self._report_path = self.resolve(self.REPORT_FILE)
        self._report = Report()
        self._prev_report: Optional[Report] = None
        self.register_reserved(self.REPORT_FILE)
    @property
    def report(self) -> Report:
        return self._report
    @property
    def prev_report(self) -> Optional[Report]:
        return self._prev_report
    def prepare(self) -> None:
        log.explain_topic(f"Creating base directory at {fmt_real_path(self._root)}")
        try:
            self._root.mkdir(parents=True, exist_ok=True)
        except OSError:
            raise OutputDirError("Failed to create base directory")
    def register_reserved(self, path: PurePath) -> None:
        self._report.mark_reserved(path)
    def resolve(self, path: PurePath) -> Path:
        """
        May throw an OutputDirError.
        """
        if ".." in path.parts:
            raise OutputDirError(f"Forbidden segment '..' in path {fmt_path(path)}")
        if "." in path.parts:
            raise OutputDirError(f"Forbidden segment '.' in path {fmt_path(path)}")
        return self._root / path
    def _should_download(
            self,
            local_path: Path,
            heuristics: Heuristics,
            redownload: Redownload,
            on_conflict: OnConflict,
    ) -> bool:
        if not local_path.exists():
            log.explain("No corresponding file present locally")
            return True
        if on_conflict == OnConflict.LOCAL_FIRST:
            # Whatever is here, it will never be overwritten, so we don't need
            # to download the file.
            log.explain("Conflict resolution is 'local-first' and path exists")
            return False
        if not local_path.is_file():
            # We know that there is *something* here that's not a file.
            log.explain("Non-file (probably a directory) present locally")
            # If on_conflict is LOCAL_FIRST or NO_DELETE, we know that it would
            # never be overwritten. It also doesn't have any relevant stats to
            # update. This means that we don't have to download the file
            # because we'd just always throw it away again.
            if on_conflict in {OnConflict.LOCAL_FIRST, OnConflict.NO_DELETE}:
                log.explain(f"Conflict resolution is {on_conflict.value!r}")
                return False
            return True
        log.explain(f"Redownload policy is {redownload.value}")
        if redownload == Redownload.NEVER:
            return False
        elif redownload == Redownload.ALWAYS:
            return True
        stat = local_path.stat()
        remote_newer = None
        # Python on Windows crashes when faced with timestamps around the unix epoch
        if heuristics.mtime and (os.name != "nt" or heuristics.mtime.year > 1970):
            mtime = heuristics.mtime
            remote_newer = mtime.timestamp() > stat.st_mtime
            if remote_newer:
                log.explain("Remote file seems to be newer")
            else:
                log.explain("Remote file doesn't seem to be newer")
        if redownload == Redownload.NEVER_SMART:
            if remote_newer is None:
                return False
            else:
                return remote_newer
        elif redownload == Redownload.ALWAYS_SMART:
            if remote_newer is None:
                return True
            else:
                return remote_newer
        # This should never be reached
        raise ValueError(f"{redownload!r} is not a valid redownload policy")
    # The following conflict resolution functions all return False if the local
    # file(s) should be kept and True if they should be replaced by the remote
    # files.
    async def _conflict_lfrf(
            self,
            on_conflict: OnConflict,
            path: PurePath,
    ) -> bool:
        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
            async with log.exclusive_output():
                prompt = f"Replace {fmt_path(path)} with remote file?"
                return await prompt_yes_no(prompt, default=False)
        elif on_conflict == OnConflict.LOCAL_FIRST:
            return False
        elif on_conflict == OnConflict.REMOTE_FIRST:
            return True
        elif on_conflict == OnConflict.NO_DELETE:
            return True
        # This should never be reached
        raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
    async def _conflict_ldrf(
            self,
            on_conflict: OnConflict,
            path: PurePath,
    ) -> bool:
        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
            async with log.exclusive_output():
                prompt = f"Recursively delete {fmt_path(path)} and replace with remote file?"
                return await prompt_yes_no(prompt, default=False)
        elif on_conflict == OnConflict.LOCAL_FIRST:
            return False
        elif on_conflict == OnConflict.REMOTE_FIRST:
            return True
        elif on_conflict == OnConflict.NO_DELETE:
            return False
        # This should never be reached
        raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
    async def _conflict_lfrd(
            self,
            on_conflict: OnConflict,
            path: PurePath,
            parent: PurePath,
    ) -> bool:
        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
            async with log.exclusive_output():
                prompt = f"Delete {fmt_path(parent)} so remote file {fmt_path(path)} can be downloaded?"
                return await prompt_yes_no(prompt, default=False)
        elif on_conflict == OnConflict.LOCAL_FIRST:
            return False
        elif on_conflict == OnConflict.REMOTE_FIRST:
            return True
        elif on_conflict == OnConflict.NO_DELETE:
            return False
        # This should never be reached
        raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
    async def _conflict_delete_lf(
            self,
            on_conflict: OnConflict,
            path: PurePath,
    ) -> bool:
        if on_conflict == OnConflict.PROMPT:
            async with log.exclusive_output():
                prompt = f"Delete {fmt_path(path)}?"
                return await prompt_yes_no(prompt, default=False)
        elif on_conflict == OnConflict.LOCAL_FIRST:
            return False
        elif on_conflict == OnConflict.REMOTE_FIRST:
            return True
        elif on_conflict in {OnConflict.NO_DELETE, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
            return False
        # This should never be reached
        raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
    def _tmp_path(self, base: Path, suffix_length: int) -> Path:
        prefix = "" if base.name.startswith(".") else "."
        suffix = "".join(random.choices(SUFFIX_CHARS, k=suffix_length))
        name = f"{prefix}{base.name}.tmp.{suffix}"
        return base.parent / name
    async def _create_tmp_file(
            self,
            local_path: Path,
    ) -> Tuple[Path, BinaryIO]:
        """
        May raise an OutputDirError.
        """
        # Create tmp file
        for attempt in range(TRIES):
            suffix_length = SUFFIX_LENGTH + 2 * attempt
            tmp_path = self._tmp_path(local_path, suffix_length)
            try:
                return tmp_path, open(tmp_path, "xb")
            except FileExistsError:
                pass  # Try again
        raise OutputDirError("Failed to create temporary file")
    async def download(
            self,
            remote_path: PurePath,
            path: PurePath,
            mtime: Optional[datetime] = None,
            redownload: Optional[Redownload] = None,
            on_conflict: Optional[OnConflict] = None,
    ) -> Optional[FileSinkToken]:
        """
        May throw an OutputDirError, a MarkDuplicateError or a
        MarkConflictError.
        """
        heuristics = Heuristics(mtime)
        redownload = self._redownload if redownload is None else redownload
        on_conflict = self._on_conflict if on_conflict is None else on_conflict
        local_path = self.resolve(path)
        self._report.mark(path)
        if not self._should_download(local_path, heuristics, redownload, on_conflict):
            return None
        # Detect and solve local-dir-remote-file conflict
        if local_path.is_dir():
            log.explain("Conflict: There's a directory in place of the local file")
            if await self._conflict_ldrf(on_conflict, path):
                log.explain("Result: Delete the obstructing directory")
                shutil.rmtree(local_path)
            else:
                log.explain("Result: Keep the obstructing directory")
                return None
        # Detect and solve local-file-remote-dir conflict
        for parent in path.parents:
            local_parent = self.resolve(parent)
            if local_parent.exists() and not local_parent.is_dir():
                log.explain("Conflict: One of the local file's parents is a file")
                if await self._conflict_lfrd(on_conflict, path, parent):
                    log.explain("Result: Delete the obstructing file")
                    local_parent.unlink()
                    break
                else:
                    log.explain("Result: Keep the obstructing file")
                    return None
        # Ensure parent directory exists
        local_path.parent.mkdir(parents=True, exist_ok=True)
        return FileSinkToken(self, remote_path, path, local_path, heuristics, on_conflict)
    def _update_metadata(self, info: DownloadInfo) -> None:
        if mtime := info.heuristics.mtime:
            mtimestamp = mtime.timestamp()
            os.utime(info.local_path, times=(mtimestamp, mtimestamp))
    @contextmanager
    def _ensure_deleted(self, path: Path) -> Iterator[None]:
        try:
            yield
        finally:
            path.unlink(missing_ok=True)
    async def _after_download(self, info: DownloadInfo) -> None:
        with self._ensure_deleted(info.tmp_path):
            log.status("[bold cyan]", "Downloaded", fmt_path(info.remote_path))
            log.explain_topic(f"Processing downloaded file for {fmt_path(info.path)}")
            changed = False
            if not info.success:
                log.explain("Download unsuccessful, aborting")
                return
            # Solve conflicts arising from existing local file
            if info.local_path.exists():
                changed = True
                if filecmp.cmp(info.local_path, info.tmp_path):
                    log.explain("Contents identical with existing file")
                    log.explain("Updating metadata of existing file")
                    self._update_metadata(info)
                    return
                log.explain("Conflict: The local and remote versions differ")
                if await self._conflict_lfrf(info.on_conflict, info.path):
                    log.explain("Result: Replacing local with remote version")
                else:
                    log.explain("Result: Keeping local version")
                    return
            info.tmp_path.replace(info.local_path)
            log.explain("Updating file metadata")
            self._update_metadata(info)
            if changed:
                log.status("[bold bright_yellow]", "Changed", fmt_path(info.path))
                self._report.change_file(info.path)
            else:
                log.status("[bold bright_green]", "Added", fmt_path(info.path))
                self._report.add_file(info.path)
    async def cleanup(self) -> None:
        await self._cleanup_dir(self._root, PurePath(), delete_self=False)
    async def _cleanup(self, path: Path, pure: PurePath) -> None:
        if path.is_dir():
            await self._cleanup_dir(path, pure)
        elif path.is_file():
            await self._cleanup_file(path, pure)
    async def _cleanup_dir(self, path: Path, pure: PurePath, delete_self: bool = True) -> None:
        for child in sorted(path.iterdir()):
            pure_child = pure / child.name
            await self._cleanup(child, pure_child)
        if delete_self:
            try:
                path.rmdir()
            except OSError:
                pass
    async def _cleanup_file(self, path: Path, pure: PurePath) -> None:
        if self._report.is_marked(pure):
            return
        if await self._conflict_delete_lf(self._on_conflict, pure):
            try:
                path.unlink()
                log.status("[bold bright_magenta]", "Deleted", fmt_path(pure))
                self._report.delete_file(pure)
            except OSError:
                pass
        else:
            log.not_deleted("[bold bright_magenta]", "Not deleted", fmt_path(pure))
            self._report.not_delete_file(pure)
    def load_prev_report(self) -> None:
        log.explain_topic(f"Loading previous report from {fmt_real_path(self._report_path)}")
        try:
            self._prev_report = Report.load(self._report_path)
            log.explain("Loaded report successfully")
        except (OSError, UnicodeDecodeError, json.JSONDecodeError, ReportLoadError) as e:
            log.explain("Failed to load report")
            log.explain(str(e))
    def store_report(self) -> None:
        log.explain_topic(f"Storing report to {fmt_real_path(self._report_path)}")
        try:
            self._report.store(self._report_path)
            log.explain("Stored report successfully")
        except OSError as e:
            log.warn(f"Failed to save report to {fmt_real_path(self._report_path)}")
            log.warn_contd(str(e))
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@ -1,440 +1,194 @@
 """
 Convenience functions for using PFERD.
 """
 import logging
 from pathlib import Path
-from typing import Callable, List, Optional, Union
+from typing import Dict, List, Optional
-from .authenticators import UserPassAuthenticator
+from rich.markup import escape
 from .cookie_jar import CookieJar
 from .diva import (DivaDownloader, DivaDownloadStrategy, DivaPlaylistCrawler,
                   diva_download_new)
 from .download_summary import DownloadSummary
 from .errors import FatalException, swallow_and_print_errors
 from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
                    IliasDownloader, IliasDownloadInfo, IliasDownloadStrategy,
                    KitShibbolethAuthenticator, download_modified_or_new)
 from .ipd import (IpdCrawler, IpdDownloader, IpdDownloadInfo,
                  IpdDownloadStrategy, ipd_download_new_or_modified)
 from .location import Location
 from .logging import PrettyLogger, enable_logging
 from .organizer import FileConflictResolver, Organizer, resolve_prompt_user
 from .tmp_dir import TmpDir
 from .transform import TF, Transform, apply_transform
 from .utils import PathLike, to_path
-# TODO save known-good cookies as soon as possible
+from .auth import AUTHENTICATORS, Authenticator, AuthError, AuthSection
 from .config import Config, ConfigOptionError
 from .crawl import CRAWLERS, Crawler, CrawlError, CrawlerSection, KitIliasWebCrawler
 from .logging import log
 from .utils import fmt_path
-LOGGER = logging.getLogger(__name__)
+class PferdLoadError(Exception):
-PRETTY = PrettyLogger(LOGGER)
+    pass
-class Pferd(Location):
+class Pferd:
-    # pylint: disable=too-many-arguments
+    def __init__(self, config: Config, cli_crawlers: Optional[List[str]], cli_skips: Optional[List[str]]):
-    """
+        """
-    The main entrypoint in your Pferd usage: This class combines a number of
+        May throw PferdLoadError.
-    useful shortcuts for running synchronizers in a single interface.
+        """
    """
-    def __init__(
+        self._config = config
        self._crawlers_to_run = self._find_crawlers_to_run(config, cli_crawlers, cli_skips)
        self._authenticators: Dict[str, Authenticator] = {}
        self._crawlers: Dict[str, Crawler] = {}
    def _find_config_crawlers(self, config: Config) -> List[str]:
        crawl_sections = []
        for name, section in config.crawl_sections():
            if CrawlerSection(section).skip():
                log.explain(f"Skipping {name!r}")
            else:
                crawl_sections.append(name)
        return crawl_sections
    def _find_cli_crawlers(self, config: Config, cli_crawlers: List[str]) -> List[str]:
        if len(cli_crawlers) != len(set(cli_crawlers)):
            raise PferdLoadError("Some crawlers were selected multiple times")
        crawl_sections = [name for name, _ in config.crawl_sections()]
        crawlers_to_run = []  # With crawl: prefix
        unknown_names = []  # Without crawl: prefix
        for name in cli_crawlers:
            section_name = f"crawl:{name}"
            if section_name in crawl_sections:
                log.explain(f"Crawler section named {section_name!r} exists")
                crawlers_to_run.append(section_name)
            else:
                log.explain(f"There's no crawler section named {section_name!r}")
                unknown_names.append(name)
        if unknown_names:
            if len(unknown_names) == 1:
                [name] = unknown_names
                raise PferdLoadError(f"There is no crawler named {name!r}")
            else:
                names_str = ", ".join(repr(name) for name in unknown_names)
                raise PferdLoadError(f"There are no crawlers named {names_str}")
        return crawlers_to_run
    def _find_crawlers_to_run(
            self,
-            base_dir: Path,
+            config: Config,
-            tmp_dir: Path = Path(".tmp"),
+            cli_crawlers: Optional[List[str]],
-            test_run: bool = False
+            cli_skips: Optional[List[str]],
-    ):
+    ) -> List[str]:
-        super().__init__(Path(base_dir))
+        log.explain_topic("Deciding which crawlers to run")
-        self._download_summary = DownloadSummary()
+        crawlers: List[str]
-        self._tmp_dir = TmpDir(self.resolve(tmp_dir))
+        if cli_crawlers is None:
-        self._test_run = test_run
+            log.explain("No crawlers specified on CLI")
-
+            log.explain("Running crawlers specified in config")
-    @staticmethod
+            crawlers = self._find_config_crawlers(config)
    def enable_logging() -> None:
        """
        Enable and configure logging via the logging module.
        """
        enable_logging()
    @staticmethod
    def _print_transformables(transformables: List[TF]) -> None:
        LOGGER.info("")
        LOGGER.info("Results of the test run:")
        for transformable in transformables:
            LOGGER.info(transformable.path)
    @staticmethod
    def _get_authenticator(
            username: Optional[str], password: Optional[str]
    ) -> KitShibbolethAuthenticator:
        inner_auth = UserPassAuthenticator("ILIAS - Pferd.py", username, password)
        return KitShibbolethAuthenticator(inner_auth)
    def _ilias(
            self,
            target: PathLike,
            base_url: str,
            crawl_function: Callable[[IliasCrawler], List[IliasDownloadInfo]],
            authenticator: IliasAuthenticator,
            cookies: Optional[PathLike],
            dir_filter: IliasDirectoryFilter,
            transform: Transform,
            download_strategy: IliasDownloadStrategy,
            timeout: int,
            clean: bool = True,
            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
    ) -> Organizer:
        # pylint: disable=too-many-locals
        cookie_jar = CookieJar(to_path(cookies) if cookies else None)
        session = cookie_jar.create_session()
        tmp_dir = self._tmp_dir.new_subdir()
        organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
        crawler = IliasCrawler(base_url, session, authenticator, dir_filter)
        downloader = IliasDownloader(tmp_dir, organizer, session,
                                     authenticator, download_strategy, timeout)
        cookie_jar.load_cookies()
        info = crawl_function(crawler)
        cookie_jar.save_cookies()
        transformed = apply_transform(transform, info)
        if self._test_run:
            self._print_transformables(transformed)
            return organizer
        downloader.download_all(transformed)
        cookie_jar.save_cookies()
        if clean:
            organizer.cleanup()
        return organizer
    @swallow_and_print_errors
    def ilias_kit(
            self,
            target: PathLike,
            course_id: str,
            dir_filter: IliasDirectoryFilter = lambda x, y: True,
            transform: Transform = lambda x: x,
            cookies: Optional[PathLike] = None,
            username: Optional[str] = None,
            password: Optional[str] = None,
            download_strategy: IliasDownloadStrategy = download_modified_or_new,
            clean: bool = True,
            timeout: int = 5,
            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
    ) -> Organizer:
        """
        Synchronizes a folder with the ILIAS instance of the KIT.
        Arguments:
            target {Path} -- the target path to write the data to
            course_id {str} -- the id of the main course page (found in the URL after ref_id
                when opening the course homepage)
        Keyword Arguments:
            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
                crawler level, these directories and all of their content is skipped.
                (default: {lambdax:True})
            transform {Transform} -- A transformation function for the output paths. Return None
                to ignore a file. (default: {lambdax:x})
            cookies {Optional[Path]} -- The path to store and load cookies from.
                (default: {None})
            username {Optional[str]} -- The SCC username. If none is given, it will prompt
                the user. (default: {None})
            password {Optional[str]} -- The SCC password. If none is given, it will prompt
                the user. (default: {None})
            download_strategy {DownloadStrategy} -- A function to determine which files need to
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {download_modified_or_new})
            clean {bool} -- Whether to clean up when the method finishes.
            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                requests bug.
            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
                with overwriting or deleting files. The default always asks the user.
        """
        # This authenticator only works with the KIT ilias instance.
        authenticator = Pferd._get_authenticator(username=username, password=password)
        PRETTY.starting_synchronizer(target, "ILIAS", course_id)
        organizer = self._ilias(
            target=target,
            base_url="https://ilias.studium.kit.edu/",
            crawl_function=lambda crawler: crawler.crawl_course(course_id),
            authenticator=authenticator,
            cookies=cookies,
            dir_filter=dir_filter,
            transform=transform,
            download_strategy=download_strategy,
            clean=clean,
            timeout=timeout,
            file_conflict_resolver=file_conflict_resolver
        )
        self._download_summary.merge(organizer.download_summary)
        return organizer
    def print_summary(self) -> None:
        """
        Prints the accumulated download summary.
        """
        PRETTY.summary(self._download_summary)
    @swallow_and_print_errors
    def ilias_kit_personal_desktop(
            self,
            target: PathLike,
            dir_filter: IliasDirectoryFilter = lambda x, y: True,
            transform: Transform = lambda x: x,
            cookies: Optional[PathLike] = None,
            username: Optional[str] = None,
            password: Optional[str] = None,
            download_strategy: IliasDownloadStrategy = download_modified_or_new,
            clean: bool = True,
            timeout: int = 5,
            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
    ) -> Organizer:
        """
        Synchronizes a folder with the ILIAS instance of the KIT. This method will crawl the ILIAS
        "personal desktop" instead of a single course.
        Arguments:
            target {Path} -- the target path to write the data to
        Keyword Arguments:
            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
                crawler level, these directories and all of their content is skipped.
                (default: {lambdax:True})
            transform {Transform} -- A transformation function for the output paths. Return None
                to ignore a file. (default: {lambdax:x})
            cookies {Optional[Path]} -- The path to store and load cookies from.
                (default: {None})
            username {Optional[str]} -- The SCC username. If none is given, it will prompt
                the user. (default: {None})
            password {Optional[str]} -- The SCC password. If none is given, it will prompt
                the user. (default: {None})
            download_strategy {DownloadStrategy} -- A function to determine which files need to
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {download_modified_or_new})
            clean {bool} -- Whether to clean up when the method finishes.
            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                requests bug.
            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
                with overwriting or deleting files. The default always asks the user.
        """
        # This authenticator only works with the KIT ilias instance.
        authenticator = Pferd._get_authenticator(username, password)
        PRETTY.starting_synchronizer(target, "ILIAS", "Personal Desktop")
        organizer = self._ilias(
            target=target,
            base_url="https://ilias.studium.kit.edu/",
            crawl_function=lambda crawler: crawler.crawl_personal_desktop(),
            authenticator=authenticator,
            cookies=cookies,
            dir_filter=dir_filter,
            transform=transform,
            download_strategy=download_strategy,
            clean=clean,
            timeout=timeout,
            file_conflict_resolver=file_conflict_resolver
        )
        self._download_summary.merge(organizer.download_summary)
        return organizer
    @swallow_and_print_errors
    def ilias_kit_folder(
            self,
            target: PathLike,
            full_url: str,
            dir_filter: IliasDirectoryFilter = lambda x, y: True,
            transform: Transform = lambda x: x,
            cookies: Optional[PathLike] = None,
            username: Optional[str] = None,
            password: Optional[str] = None,
            download_strategy: IliasDownloadStrategy = download_modified_or_new,
            clean: bool = True,
            timeout: int = 5,
            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
    ) -> Organizer:
        """
        Synchronizes a folder with a given folder on the ILIAS instance of the KIT.
        Arguments:
            target {Path}  -- the target path to write the data to
            full_url {str} -- the full url of the folder/videos/course to crawl
        Keyword Arguments:
            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
                crawler level, these directories and all of their content is skipped.
                (default: {lambdax:True})
            transform {Transform} -- A transformation function for the output paths. Return None
                to ignore a file. (default: {lambdax:x})
            cookies {Optional[Path]} -- The path to store and load cookies from.
                (default: {None})
            username {Optional[str]} -- The SCC username. If none is given, it will prompt
                the user. (default: {None})
            password {Optional[str]} -- The SCC password. If none is given, it will prompt
                the user. (default: {None})
            download_strategy {DownloadStrategy} -- A function to determine which files need to
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {download_modified_or_new})
            clean {bool} -- Whether to clean up when the method finishes.
            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                requests bug.
            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
                with overwriting or deleting files. The default always asks the user.
        """
        # This authenticator only works with the KIT ilias instance.
        authenticator = Pferd._get_authenticator(username=username, password=password)
        PRETTY.starting_synchronizer(target, "ILIAS", "An ILIAS element by url")
        if not full_url.startswith("https://ilias.studium.kit.edu"):
            raise FatalException("Not a valid KIT ILIAS URL")
        organizer = self._ilias(
            target=target,
            base_url="https://ilias.studium.kit.edu/",
            crawl_function=lambda crawler: crawler.recursive_crawl_url(full_url),
            authenticator=authenticator,
            cookies=cookies,
            dir_filter=dir_filter,
            transform=transform,
            download_strategy=download_strategy,
            clean=clean,
            timeout=timeout,
            file_conflict_resolver=file_conflict_resolver
        )
        self._download_summary.merge(organizer.download_summary)
        return organizer
    @swallow_and_print_errors
    def ipd_kit(
            self,
            target: Union[PathLike, Organizer],
            url: str,
            transform: Transform = lambda x: x,
            download_strategy: IpdDownloadStrategy = ipd_download_new_or_modified,
            clean: bool = True,
            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
    ) -> Organizer:
        """
        Synchronizes a folder with a DIVA playlist.
        Arguments:
            target {Union[PathLike, Organizer]} -- The organizer / target folder to use.
            url {str} -- the url to the page
        Keyword Arguments:
            transform {Transform} -- A transformation function for the output paths. Return None
                to ignore a file. (default: {lambdax:x})
            download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {diva_download_new})
            clean {bool} -- Whether to clean up when the method finishes.
            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
                with overwriting or deleting files. The default always asks the user.
        """
        tmp_dir = self._tmp_dir.new_subdir()
        if target is None:
            PRETTY.starting_synchronizer("None", "IPD", url)
            raise FatalException("Got 'None' as target directory, aborting")
        if isinstance(target, Organizer):
            organizer = target
        else:
-            organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
+            log.explain("Crawlers specified on CLI")
            crawlers = self._find_cli_crawlers(config, cli_crawlers)
-        PRETTY.starting_synchronizer(organizer.path, "IPD", url)
+        skips = {f"crawl:{name}" for name in cli_skips} if cli_skips else set()
        for crawler in crawlers:
            if crawler in skips:
                log.explain(f"Skipping crawler {crawler!r}")
        crawlers = [crawler for crawler in crawlers if crawler not in skips]
-        elements: List[IpdDownloadInfo] = IpdCrawler(url).crawl()
+        return crawlers
        transformed = apply_transform(transform, elements)
-        if self._test_run:
+    def _load_authenticators(self) -> None:
-            self._print_transformables(transformed)
+        for name, section in self._config.auth_sections():
-            return organizer
+            log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
-        downloader = IpdDownloader(tmp_dir=tmp_dir, organizer=organizer, strategy=download_strategy)
+            auth_type = AuthSection(section).type()
-        downloader.download_all(transformed)
+            authenticator_constructor = AUTHENTICATORS.get(auth_type)
            if authenticator_constructor is None:
                raise ConfigOptionError(name, "type", f"Unknown authenticator type: {auth_type!r}")
-        if clean:
+            authenticator = authenticator_constructor(name, section, self._config)
-            organizer.cleanup()
+            self._authenticators[name] = authenticator
-        self._download_summary.merge(organizer.download_summary)
+    def _load_crawlers(self) -> None:
        # Cookie sharing
        kit_ilias_web_paths: Dict[Authenticator, List[Path]] = {}
-        return organizer
+        for name, section in self._config.crawl_sections():
            log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
-    @swallow_and_print_errors
+            crawl_type = CrawlerSection(section).type()
-    def diva_kit(
+            crawler_constructor = CRAWLERS.get(crawl_type)
-            self,
+            if crawler_constructor is None:
-            target: Union[PathLike, Organizer],
+                raise ConfigOptionError(name, "type", f"Unknown crawler type: {crawl_type!r}")
-            playlist_location: str,
+
-            transform: Transform = lambda x: x,
+            crawler = crawler_constructor(name, section, self._config, self._authenticators)
-            download_strategy: DivaDownloadStrategy = diva_download_new,
+            self._crawlers[name] = crawler
-            clean: bool = True,
+
-            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
+            if self._config.default_section.share_cookies():
-    ) -> Organizer:
+                if isinstance(crawler, KitIliasWebCrawler):
                    crawler.share_cookies(kit_ilias_web_paths)
    def debug_transforms(self) -> None:
        for name in self._crawlers_to_run:
            crawler = self._crawlers[name]
            log.print("")
            log.print(f"[bold bright_cyan]Debugging transforms[/] for {escape(name)}")
            crawler.debug_transforms()
    async def run(self, debug_transforms: bool) -> None:
        """
-        Synchronizes a folder with a DIVA playlist.
+        May throw ConfigOptionError.
        Arguments:
            organizer {Organizer} -- The organizer to use.
            playlist_location {str} -- the playlist id or the playlist URL
              in the format 'https://mediaservice.bibliothek.kit.edu/#/details/DIVA-2019-271'
        Keyword Arguments:
            transform {Transform} -- A transformation function for the output paths. Return None
                to ignore a file. (default: {lambdax:x})
            download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {diva_download_new})
            clean {bool} -- Whether to clean up when the method finishes.
            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
                with overwriting or deleting files. The default always asks the user.
        """
        tmp_dir = self._tmp_dir.new_subdir()
-        if playlist_location.startswith("http"):
+        # These two functions must run inside the same event loop as the
-            playlist_id = DivaPlaylistCrawler.fetch_id(playlist_link=playlist_location)
+        # crawlers, so that any new objects (like Conditions or Futures) can
-        else:
+        # obtain the correct event loop.
-            playlist_id = playlist_location
+        self._load_authenticators()
        self._load_crawlers()
-        if target is None:
+        if debug_transforms:
-            PRETTY.starting_synchronizer("None", "DIVA", playlist_id)
+            log.output_explain = True
-            raise FatalException("Got 'None' as target directory, aborting")
+            log.output_report = False
            self.debug_transforms()
            return
-        if isinstance(target, Organizer):
+        log.print("")
            organizer = target
        else:
            organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
-        PRETTY.starting_synchronizer(organizer.path, "DIVA", playlist_id)
+        for name in self._crawlers_to_run:
            crawler = self._crawlers[name]
-        crawler = DivaPlaylistCrawler(playlist_id)
+            log.print(f"[bold bright_cyan]Running[/] {escape(name)}")
        downloader = DivaDownloader(tmp_dir, organizer, download_strategy)
-        info = crawler.crawl()
+            try:
                await crawler.run()
            except (CrawlError, AuthError) as e:
                log.error(str(e))
            except Exception:
                log.unexpected_exception()
-        transformed = apply_transform(transform, info)
+    def print_report(self) -> None:
-        if self._test_run:
+        for name in self._crawlers_to_run:
-            self._print_transformables(transformed)
+            crawler = self._crawlers.get(name)
-            return organizer
+            if crawler is None:
                continue  # Crawler failed to load
-        downloader.download_all(transformed)
+            log.report("")
            log.report(f"[bold bright_cyan]Report[/] for {escape(name)}")
-        if clean:
+            something_changed = False
-            organizer.cleanup()
+            for path in sorted(crawler.report.added_files):
                something_changed = True
                log.report(f"  [bold bright_green]Added[/] {fmt_path(path)}")
            for path in sorted(crawler.report.changed_files):
                something_changed = True
                log.report(f"  [bold bright_yellow]Changed[/] {fmt_path(path)}")
            for path in sorted(crawler.report.deleted_files):
                something_changed = True
                log.report(f"  [bold bright_magenta]Deleted[/] {fmt_path(path)}")
            for path in sorted(crawler.report.not_deleted_files):
                something_changed = True
                log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
-        self._download_summary.merge(organizer.download_summary)
+            for warning in crawler.report.encountered_warnings:
                something_changed = True
                log.report(f"  [bold bright_red]Warning[/] {warning}")
-        return organizer
+            for error in crawler.report.encountered_errors:
                something_changed = True
                log.report(f"  [bold bright_red]Error[/] {error}")
            if not something_changed:
                log.report("  Nothing changed")
--- a/PFERD/progress.py
+++ b/PFERD/progress.py
@ -1,111 +0,0 @@
 """
 A small progress bar implementation.
 """
 import sys
 from dataclasses import dataclass
 from types import TracebackType
 from typing import Optional, Type
 import requests
 from rich.console import Console
 from rich.progress import (BarColumn, DownloadColumn, Progress, TaskID,
                           TextColumn, TimeRemainingColumn,
                           TransferSpeedColumn)
 _progress: Progress = Progress(
    TextColumn("[bold blue]{task.fields[name]}", justify="right"),
    BarColumn(bar_width=None),
    "[progress.percentage]{task.percentage:>3.1f}%",
    "•",
    DownloadColumn(),
    "•",
    TransferSpeedColumn(),
    "•",
    TimeRemainingColumn(),
    console=Console(file=sys.stdout),
    transient=True
 )
 def size_from_headers(response: requests.Response) -> Optional[int]:
    """
    Return the size of the download based on the response headers.
    Arguments:
        response {requests.Response} -- the response
    Returns:
        Optional[int] -- the size
    """
    if "Content-Length" in response.headers:
        return int(response.headers["Content-Length"])
    return None
@dataclass
 class ProgressSettings:
    """
    Settings you can pass to customize the progress bar.
    """
    name: str
    max_size: int
 def progress_for(settings: Optional[ProgressSettings]) -> 'ProgressContextManager':
    """
    Returns a context manager that displays progress
    Returns:
        ProgressContextManager -- the progress manager
    """
    return ProgressContextManager(settings)
 class ProgressContextManager:
    """
    A context manager used for displaying progress.
    """
    def __init__(self, settings: Optional[ProgressSettings]):
        self._settings = settings
        self._task_id: Optional[TaskID] = None
    def __enter__(self) -> 'ProgressContextManager':
        """Context manager entry function."""
        if not self._settings:
            return self
        _progress.start()
        self._task_id = _progress.add_task(
            self._settings.name,
            total=self._settings.max_size,
            name=self._settings.name
        )
        return self
    # pylint: disable=useless-return
    def __exit__(
            self,
            exc_type: Optional[Type[BaseException]],
            exc_value: Optional[BaseException],
            traceback: Optional[TracebackType],
    ) -> Optional[bool]:
        """Context manager exit function. Removes the task."""
        if self._task_id is None:
            return None
        _progress.remove_task(self._task_id)
        if len(_progress.task_ids) == 0:
            # We need to clean up after ourselves, as we were the last one
            _progress.stop()
            _progress.refresh()
        return None
    def advance(self, amount: float) -> None:
        """
        Advances the progress bar.
        """
        if self._task_id is not None:
            _progress.advance(self._task_id, amount)
--- a/PFERD/report.py
+++ b/PFERD/report.py
@ -0,0 +1,238 @@
 import json
 from pathlib import Path, PurePath
 from typing import Any, Dict, List, Optional, Set
 class ReportLoadError(Exception):
    pass
 class MarkDuplicateError(Exception):
    """
    Tried to mark a file that was already marked.
    """
    def __init__(self, path: PurePath):
        super().__init__(f"A previous file already used path {path}")
        self.path = path
 class MarkConflictError(Exception):
    """
    Marking the path would have caused a conflict.
    A conflict can have two reasons: Either the new file has the same path as
    the parent directory of a known file, or a parent directory of the new file
    has the same path as a known file. In either case, adding the new file
    would require a file and a directory to share the same path, which is
    usually not possible.
    """
    def __init__(self, path: PurePath, collides_with: PurePath):
        super().__init__(f"File at {path} collides with previous file at {collides_with}")
        self.path = path
        self.collides_with = collides_with
 # TODO Use PurePath.is_relative_to when updating to 3.9
 def is_relative_to(a: PurePath, b: PurePath) -> bool:
    try:
        a.relative_to(b)
        return True
    except ValueError:
        return False
 class Report:
    """
    A report of a synchronization. Includes all files found by the crawler, as
    well as the set of changes made to local files.
    """
    def __init__(self) -> None:
        # Paths found by the crawler, untransformed
        self.found_paths: Set[PurePath] = set()
        # Files reserved for metadata files (e. g. the report file or cookies)
        # that can't be overwritten by user transforms and won't be cleaned up
        # at the end.
        self.reserved_files: Set[PurePath] = set()
        # Files found by the crawler, transformed. Only includes files that
        # were downloaded (or a download was attempted)
        self.known_files: Set[PurePath] = set()
        self.added_files: Set[PurePath] = set()
        self.changed_files: Set[PurePath] = set()
        self.deleted_files: Set[PurePath] = set()
        # Files that should have been deleted by the cleanup but weren't
        self.not_deleted_files: Set[PurePath] = set()
        # Custom crawler-specific data
        self.custom: Dict[str, Any] = dict()
        # Encountered errors and warnings
        self.encountered_warnings: List[str] = []
        self.encountered_errors: List[str] = []
    @staticmethod
    def _get_list_of_strs(data: Dict[str, Any], key: str) -> List[str]:
        result: Any = data.get(key, [])
        if not isinstance(result, list):
            raise ReportLoadError(f"Incorrect format: {key!r} is not a list")
        for elem in result:
            if not isinstance(elem, str):
                raise ReportLoadError(f"Incorrect format: {key!r} must contain only strings")
        return result
    @staticmethod
    def _get_str_dictionary(data: Dict[str, Any], key: str) -> Dict[str, Any]:
        result: Dict[str, Any] = data.get(key, {})
        if not isinstance(result, dict):
            raise ReportLoadError(f"Incorrect format: {key!r} is not a dictionary")
        return result
    @classmethod
    def load(cls, path: Path) -> "Report":
        """
        May raise OSError, UnicodeDecodeError, JsonDecodeError, ReportLoadError.
        """
        with open(path, encoding="utf-8") as f:
            data = json.load(f)
        if not isinstance(data, dict):
            raise ReportLoadError("Incorrect format: Root is not an object")
        self = cls()
        for elem in self._get_list_of_strs(data, "found"):
            self.found(PurePath(elem))
        for elem in self._get_list_of_strs(data, "reserved"):
            self.mark_reserved(PurePath(elem))
        for elem in self._get_list_of_strs(data, "known"):
            self.mark(PurePath(elem))
        for elem in self._get_list_of_strs(data, "added"):
            self.add_file(PurePath(elem))
        for elem in self._get_list_of_strs(data, "changed"):
            self.change_file(PurePath(elem))
        for elem in self._get_list_of_strs(data, "deleted"):
            self.delete_file(PurePath(elem))
        for elem in self._get_list_of_strs(data, "not_deleted"):
            self.not_delete_file(PurePath(elem))
        self.custom = self._get_str_dictionary(data, "custom")
        self.encountered_errors = self._get_list_of_strs(data, "encountered_errors")
        self.encountered_warnings = self._get_list_of_strs(data, "encountered_warnings")
        return self
    def store(self, path: Path) -> None:
        """
        May raise OSError.
        """
        data = {
            "found": [str(path) for path in sorted(self.found_paths)],
            "reserved": [str(path) for path in sorted(self.reserved_files)],
            "known": [str(path) for path in sorted(self.known_files)],
            "added": [str(path) for path in sorted(self.added_files)],
            "changed": [str(path) for path in sorted(self.changed_files)],
            "deleted": [str(path) for path in sorted(self.deleted_files)],
            "not_deleted": [str(path) for path in sorted(self.not_deleted_files)],
            "custom": self.custom,
            "encountered_warnings": self.encountered_warnings,
            "encountered_errors": self.encountered_errors,
        }
        with open(path, "w", encoding="utf-8") as f:
            json.dump(data, f, indent=2, sort_keys=True)
            f.write("\n")  # json.dump doesn't do this
    def found(self, path: PurePath) -> None:
        self.found_paths.add(path)
    def mark_reserved(self, path: PurePath) -> None:
        if path in self.marked:
            raise RuntimeError("Trying to reserve an already reserved file")
        self.reserved_files.add(path)
    def mark(self, path: PurePath) -> None:
        """
        Mark a previously unknown file as known.
        May throw a MarkDuplicateError or a MarkConflictError. For more detail,
        see the respective exception's docstring.
        """
        for other in self.marked:
            if path == other:
                raise MarkDuplicateError(path)
            if is_relative_to(path, other) or is_relative_to(other, path):
                raise MarkConflictError(path, other)
        self.known_files.add(path)
    @property
    def marked(self) -> Set[PurePath]:
        return self.known_files | self.reserved_files
    def is_marked(self, path: PurePath) -> bool:
        return path in self.marked
    def add_file(self, path: PurePath) -> None:
        """
        Unlike mark(), this function accepts any paths.
        """
        self.added_files.add(path)
    def change_file(self, path: PurePath) -> None:
        """
        Unlike mark(), this function accepts any paths.
        """
        self.changed_files.add(path)
    def delete_file(self, path: PurePath) -> None:
        """
        Unlike mark(), this function accepts any paths.
        """
        self.deleted_files.add(path)
    def not_delete_file(self, path: PurePath) -> None:
        """
        Unlike mark(), this function accepts any paths.
        """
        self.not_deleted_files.add(path)
    def add_custom_value(self, key: str, value: Any) -> None:
        """
        Adds a custom value under the passed key, overwriting any existing
        """
        self.custom[key] = value
    def get_custom_value(self, key: str) -> Optional[Any]:
        """
        Retrieves a custom value for the given key.
        """
        return self.custom.get(key)
    def add_error(self, error: str) -> None:
        """
        Adds an error to this report's error list.
        """
        self.encountered_errors.append(error)
    def add_warning(self, warning: str) -> None:
        """
        Adds a warning to this report's warning list.
        """
        self.encountered_warnings.append(warning)
--- a/PFERD/tmp_dir.py
+++ b/PFERD/tmp_dir.py
@ -1,79 +0,0 @@
 """Helper functions and classes for temporary folders."""
 import logging
 import shutil
 from pathlib import Path
 from types import TracebackType
 from typing import Optional, Type
 from .location import Location
 LOGGER = logging.getLogger(__name__)
 class TmpDir(Location):
    """A temporary folder that can create files or nested temp folders."""
    def __init__(self, path: Path):
        """Create a new temporary folder for the given path."""
        super().__init__(path)
        self._counter = 0
        self.cleanup()
        self.path.mkdir(parents=True, exist_ok=True)
    def __str__(self) -> str:
        """Format the folder as a string."""
        return f"Folder at {self.path}"
    def __enter__(self) -> 'TmpDir':
        """Context manager entry function."""
        return self
    # pylint: disable=useless-return
    def __exit__(
            self,
            exc_type: Optional[Type[BaseException]],
            exc_value: Optional[BaseException],
            traceback: Optional[TracebackType],
    ) -> Optional[bool]:
        """Context manager exit function. Calls cleanup()."""
        self.cleanup()
        return None
    def new_path(self, prefix: Optional[str] = None) -> Path:
        """
        Return a unique path inside the directory. Doesn't create a file or
        directory.
        """
        name = f"{prefix if prefix else 'tmp'}-{self._inc_and_get_counter():03}"
        LOGGER.debug("Creating temp file %s", name)
        return self.resolve(Path(name))
    def new_subdir(self, prefix: Optional[str] = None) -> 'TmpDir':
        """
        Create a new nested temporary folder and return it.
        """
        name = f"{prefix if prefix else 'tmp'}-{self._inc_and_get_counter():03}"
        sub_path = self.resolve(Path(name))
        sub_path.mkdir(parents=True)
        LOGGER.debug("Creating temp dir %s at %s", name, sub_path)
        return TmpDir(sub_path)
    def cleanup(self) -> None:
        """Delete this folder and all contained files."""
        LOGGER.debug("Deleting temp folder %s", self.path)
        if self.path.resolve().exists():
            shutil.rmtree(self.path.resolve())
    def _inc_and_get_counter(self) -> int:
        """Get and increment the counter by one."""
        counter = self._counter
        self._counter += 1
        return counter
--- a/PFERD/transform.py
+++ b/PFERD/transform.py
@ -1,142 +0,0 @@
 """
 Transforms let the user define functions to decide where the downloaded files
 should be placed locally. They let the user do more advanced things like moving
 only files whose names match a regex, or renaming files from one numbering
 scheme to another.
 """
 import os
 import re
 from dataclasses import dataclass
 from pathlib import PurePath
 from typing import Callable, List, Optional, TypeVar
 from .utils import PathLike, Regex, to_path, to_pattern
 Transform = Callable[[PurePath], Optional[PurePath]]
@dataclass
 class Transformable:
    """
    An object that can be transformed by a Transform.
    """
    path: PurePath
 TF = TypeVar("TF", bound=Transformable)
 def apply_transform(
        transform: Transform,
        transformables: List[TF],
 ) -> List[TF]:
    """
    Apply a Transform to multiple Transformables, discarding those that were
    not transformed by the Transform.
    """
    result: List[TF] = []
    for transformable in transformables:
        new_path = transform(transformable.path)
        if new_path:
            transformable.path = new_path
            result.append(transformable)
    return result
 # Transform combinators
 def keep(path: PurePath) -> Optional[PurePath]:
    return path
 def attempt(*args: Transform) -> Transform:
    def inner(path: PurePath) -> Optional[PurePath]:
        for transform in args:
            result = transform(path)
            if result:
                return result
        return None
    return inner
 def optionally(transform: Transform) -> Transform:
    return attempt(transform, lambda path: path)
 def do(*args: Transform) -> Transform:
    def inner(path: PurePath) -> Optional[PurePath]:
        current = path
        for transform in args:
            result = transform(current)
            if result:
                current = result
            else:
                return None
        return current
    return inner
 def predicate(pred: Callable[[PurePath], bool]) -> Transform:
    def inner(path: PurePath) -> Optional[PurePath]:
        if pred(path):
            return path
        return None
    return inner
 def glob(pattern: str) -> Transform:
    return predicate(lambda path: path.match(pattern))
 def move_dir(source_dir: PathLike, target_dir: PathLike) -> Transform:
    source_path = to_path(source_dir)
    target_path = to_path(target_dir)
    def inner(path: PurePath) -> Optional[PurePath]:
        if source_path in path.parents:
            return target_path / path.relative_to(source_path)
        return None
    return inner
 def move(source: PathLike, target: PathLike) -> Transform:
    source_path = to_path(source)
    target_path = to_path(target)
    def inner(path: PurePath) -> Optional[PurePath]:
        if path == source_path:
            return target_path
        return None
    return inner
 def rename(source: str, target: str) -> Transform:
    def inner(path: PurePath) -> Optional[PurePath]:
        if path.name == source:
            return path.with_name(target)
        return None
    return inner
 def re_move(regex: Regex, target: str) -> Transform:
    def inner(path: PurePath) -> Optional[PurePath]:
        match = to_pattern(regex).fullmatch(str(path))
        if match:
            groups = [match.group(0)]
            groups.extend(match.groups())
            return PurePath(target.format(*groups))
        return None
    return inner
 def re_rename(regex: Regex, target: str) -> Transform:
    def inner(path: PurePath) -> Optional[PurePath]:
        match = to_pattern(regex).fullmatch(path.name)
        if match:
            groups = [match.group(0)]
            groups.extend(match.groups())
            return path.with_name(target.format(*groups))
        return None
    return inner
 def sanitize_windows_path(path: PurePath) -> PurePath:
    """
    A small function to escape characters that are forbidden in windows path names.
    This method is a no-op on other operating systems.
    """
    # Escape windows illegal path characters
    if os.name == 'nt':
        sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
        return PurePath(*sanitized_parts)
    return path
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@ -0,0 +1,439 @@
 import ast
 import re
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import PurePath
 from typing import Callable, Dict, List, Optional, Sequence, TypeVar, Union
 from .logging import log
 from .utils import fmt_path, str_path
 class ArrowHead(Enum):
    NORMAL = 0
    SEQUENCE = 1
 class Ignore:
    pass
 class Empty:
    pass
 RightSide = Union[str, Ignore, Empty]
@dataclass
 class Transformed:
    path: PurePath
 class Ignored:
    pass
 TransformResult = Optional[Union[Transformed, Ignored]]
@dataclass
 class Rule:
    left: str
    left_index: int
    name: str
    head: ArrowHead
    right: RightSide
    right_index: int
    def right_result(self, path: PurePath) -> Union[str, Transformed, Ignored]:
        if isinstance(self.right, str):
            return self.right
        elif isinstance(self.right, Ignore):
            return Ignored()
        elif isinstance(self.right, Empty):
            return Transformed(path)
        else:
            raise RuntimeError(f"Right side has invalid type {type(self.right)}")
 class Transformation(ABC):
    def __init__(self, rule: Rule):
        self.rule = rule
    @abstractmethod
    def transform(self, path: PurePath) -> TransformResult:
        pass
 class ExactTf(Transformation):
    def transform(self, path: PurePath) -> TransformResult:
        if path != PurePath(self.rule.left):
            return None
        right = self.rule.right_result(path)
        if not isinstance(right, str):
            return right
        return Transformed(PurePath(right))
 class ExactReTf(Transformation):
    def transform(self, path: PurePath) -> TransformResult:
        match = re.fullmatch(self.rule.left, str_path(path))
        if not match:
            return None
        right = self.rule.right_result(path)
        if not isinstance(right, str):
            return right
        # For some reason, mypy thinks that "groups" has type List[str]. But
        # since elements of "match.groups()" can be None, mypy is wrong.
        groups: Sequence[Optional[str]] = [match[0]] + list(match.groups())
        locals_dir: Dict[str, Union[str, int, float]] = {}
        for i, group in enumerate(groups):
            if group is None:
                continue
            locals_dir[f"g{i}"] = group
            try:
                locals_dir[f"i{i}"] = int(group)
            except ValueError:
                pass
            try:
                locals_dir[f"f{i}"] = float(group)
            except ValueError:
                pass
        result = eval(f"f{right!r}", {}, locals_dir)
        return Transformed(PurePath(result))
 class RenamingParentsTf(Transformation):
    def __init__(self, sub_tf: Transformation):
        super().__init__(sub_tf.rule)
        self.sub_tf = sub_tf
    def transform(self, path: PurePath) -> TransformResult:
        for i in range(len(path.parts), -1, -1):
            parent = PurePath(*path.parts[:i])
            child = PurePath(*path.parts[i:])
            transformed = self.sub_tf.transform(parent)
            if not transformed:
                continue
            elif isinstance(transformed, Transformed):
                return Transformed(transformed.path / child)
            elif isinstance(transformed, Ignored):
                return transformed
            else:
                raise RuntimeError(f"Invalid transform result of type {type(transformed)}: {transformed}")
        return None
 class RenamingPartsTf(Transformation):
    def __init__(self, sub_tf: Transformation):
        super().__init__(sub_tf.rule)
        self.sub_tf = sub_tf
    def transform(self, path: PurePath) -> TransformResult:
        result = PurePath()
        any_part_matched = False
        for part in path.parts:
            transformed = self.sub_tf.transform(PurePath(part))
            if not transformed:
                result /= part
            elif isinstance(transformed, Transformed):
                result /= transformed.path
                any_part_matched = True
            elif isinstance(transformed, Ignored):
                return transformed
            else:
                raise RuntimeError(f"Invalid transform result of type {type(transformed)}: {transformed}")
        if any_part_matched:
            return Transformed(result)
        else:
            return None
 class RuleParseError(Exception):
    def __init__(self, line: "Line", reason: str):
        super().__init__(f"Error in rule on line {line.line_nr}, column {line.index}: {reason}")
        self.line = line
        self.reason = reason
    def pretty_print(self) -> None:
        log.error(f"Error parsing rule on line {self.line.line_nr}:")
        log.error_contd(self.line.line)
        spaces = " " * self.line.index
        log.error_contd(f"{spaces}^--- {self.reason}")
 T = TypeVar("T")
 class Line:
    def __init__(self, line: str, line_nr: int):
        self._line = line
        self._line_nr = line_nr
        self._index = 0
    @property
    def line(self) -> str:
        return self._line
    @property
    def line_nr(self) -> int:
        return self._line_nr
    @property
    def index(self) -> int:
        return self._index
    @index.setter
    def index(self, index: int) -> None:
        self._index = index
    @property
    def rest(self) -> str:
        return self.line[self.index:]
    def peek(self, amount: int = 1) -> str:
        return self.rest[:amount]
    def take(self, amount: int = 1) -> str:
        string = self.peek(amount)
        self.index += len(string)
        return string
    def expect(self, string: str) -> str:
        if self.peek(len(string)) == string:
            return self.take(len(string))
        else:
            raise RuleParseError(self, f"Expected {string!r}")
    def expect_with(self, string: str, value: T) -> T:
        self.expect(string)
        return value
    def one_of(self, parsers: List[Callable[[], T]], description: str) -> T:
        for parser in parsers:
            index = self.index
            try:
                return parser()
            except RuleParseError:
                self.index = index
        raise RuleParseError(self, description)
 # RULE = LEFT SPACE '-' NAME '-' HEAD (SPACE RIGHT)?
 # SPACE = ' '+
 # NAME = '' | 'exact' | 'name' | 're' | 'exact-re' | 'name-re'
 # HEAD = '>' | '>>'
 # LEFT = STR | QUOTED_STR
 # RIGHT = STR | QUOTED_STR | '!'
 def parse_zero_or_more_spaces(line: Line) -> None:
    while line.peek() == " ":
        line.take()
 def parse_one_or_more_spaces(line: Line) -> None:
    line.expect(" ")
    parse_zero_or_more_spaces(line)
 def parse_str(line: Line) -> str:
    result = []
    while c := line.peek():
        if c == " ":
            break
        else:
            line.take()
            result.append(c)
    if result:
        return "".join(result)
    else:
        raise RuleParseError(line, "Expected non-space character")
 QUOTATION_MARKS = {'"', "'"}
 def parse_quoted_str(line: Line) -> str:
    escaped = False
    # Points to first character of string literal
    start_index = line.index
    quotation_mark = line.peek()
    if quotation_mark not in QUOTATION_MARKS:
        raise RuleParseError(line, "Expected quotation mark")
    line.take()
    while c := line.peek():
        if escaped:
            escaped = False
            line.take()
        elif c == quotation_mark:
            line.take()
            stop_index = line.index
            literal = line.line[start_index:stop_index]
            try:
                return ast.literal_eval(literal)
            except SyntaxError as e:
                line.index = start_index
                raise RuleParseError(line, str(e)) from e
        elif c == "\\":
            escaped = True
            line.take()
        else:
            line.take()
    raise RuleParseError(line, "Expected end of string literal")
 def parse_left(line: Line) -> str:
    if line.peek() in QUOTATION_MARKS:
        return parse_quoted_str(line)
    else:
        return parse_str(line)
 def parse_right(line: Line) -> Union[str, Ignore]:
    c = line.peek()
    if c in QUOTATION_MARKS:
        return parse_quoted_str(line)
    else:
        string = parse_str(line)
        if string == "!":
            return Ignore()
        return string
 def parse_arrow_name(line: Line) -> str:
    return line.one_of([
        lambda: line.expect("exact-re"),
        lambda: line.expect("exact"),
        lambda: line.expect("name-re"),
        lambda: line.expect("name"),
        lambda: line.expect("re"),
        lambda: line.expect(""),
    ], "Expected arrow name")
 def parse_arrow_head(line: Line) -> ArrowHead:
    return line.one_of([
        lambda: line.expect_with(">>", ArrowHead.SEQUENCE),
        lambda: line.expect_with(">", ArrowHead.NORMAL),
    ], "Expected arrow head")
 def parse_eol(line: Line) -> None:
    if line.peek():
        raise RuleParseError(line, "Expected end of line")
 def parse_rule(line: Line) -> Rule:
    parse_zero_or_more_spaces(line)
    left_index = line.index
    left = parse_left(line)
    parse_one_or_more_spaces(line)
    line.expect("-")
    name = parse_arrow_name(line)
    line.expect("-")
    head = parse_arrow_head(line)
    right_index = line.index
    right: RightSide
    try:
        parse_zero_or_more_spaces(line)
        parse_eol(line)
        right = Empty()
    except RuleParseError:
        line.index = right_index
        parse_one_or_more_spaces(line)
        right = parse_right(line)
        parse_eol(line)
    return Rule(left, left_index, name, head, right, right_index)
 def parse_transformation(line: Line) -> Transformation:
    rule = parse_rule(line)
    if rule.name == "":
        return RenamingParentsTf(ExactTf(rule))
    elif rule.name == "exact":
        return ExactTf(rule)
    elif rule.name == "name":
        if len(PurePath(rule.left).parts) > 1:
            line.index = rule.left_index
            raise RuleParseError(line, "Expected name, not multiple segments")
        return RenamingPartsTf(ExactTf(rule))
    elif rule.name == "re":
        return RenamingParentsTf(ExactReTf(rule))
    elif rule.name == "exact-re":
        return ExactReTf(rule)
    elif rule.name == "name-re":
        return RenamingPartsTf(ExactReTf(rule))
    else:
        raise RuntimeError(f"Invalid arrow name {rule.name!r}")
 class Transformer:
    def __init__(self, rules: str):
        """
        May throw a RuleParseException.
        """
        self._tfs = []
        for i, line in enumerate(rules.split("\n")):
            line = line.strip()
            if line:
                tf = parse_transformation(Line(line, i))
                self._tfs.append((line, tf))
    def transform(self, path: PurePath) -> Optional[PurePath]:
        for i, (line, tf) in enumerate(self._tfs):
            log.explain(f"Testing rule {i+1}: {line}")
            try:
                result = tf.transform(path)
            except Exception as e:
                log.warn(f"Error while testing rule {i+1}: {line}")
                log.warn_contd(str(e))
                continue
            if not result:
                continue
            if isinstance(result, Ignored):
                log.explain("Match found, path ignored")
                return None
            if tf.rule.head == ArrowHead.NORMAL:
                log.explain(f"Match found, transformed path to {fmt_path(result.path)}")
                path = result.path
                break
            elif tf.rule.head == ArrowHead.SEQUENCE:
                log.explain(f"Match found, updated path to {fmt_path(result.path)}")
                path = result.path
            else:
                raise RuntimeError(f"Invalid transform result of type {type(result)}: {result}")
        log.explain(f"Final result: {fmt_path(path)}")
        return path
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@ -1,98 +1,144 @@
-"""
+import asyncio
-A few utility bobs and bits.
+import getpass
-"""
+import sys
-
+import threading
-import re
+from abc import ABC, abstractmethod
 from contextlib import AsyncExitStack
 from pathlib import Path, PurePath
-from typing import Optional, Tuple, Union
+from types import TracebackType
 from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
 from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
 import bs4
 import requests
-from .progress import ProgressSettings, progress_for, size_from_headers
+T = TypeVar("T")
 PathLike = Union[PurePath, str, Tuple[str, ...]]
-def to_path(pathlike: PathLike) -> Path:
+async def in_daemon_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
    loop = asyncio.get_running_loop()
    future: asyncio.Future[T] = asyncio.Future()
    def thread_func() -> None:
        result = func()
        loop.call_soon_threadsafe(future.set_result, result)
    threading.Thread(target=thread_func, daemon=True).start()
    return await future
 async def ainput(prompt: str) -> str:
    return await in_daemon_thread(lambda: input(prompt))
 async def agetpass(prompt: str) -> str:
    return await in_daemon_thread(lambda: getpass.getpass(prompt))
 async def prompt_yes_no(query: str, default: Optional[bool]) -> bool:
    """
-    Convert a given PathLike into a Path.
+    Asks the user a yes/no question and returns their choice.
    """
    if isinstance(pathlike, tuple):
        return Path(*pathlike)
    return Path(pathlike)
 Regex = Union[str, re.Pattern]
 def to_pattern(regex: Regex) -> re.Pattern:
    """
    Convert a regex to a re.Pattern.
    """
    if isinstance(regex, re.Pattern):
        return regex
    return re.compile(regex)
 def soupify(response: requests.Response) -> bs4.BeautifulSoup:
    """
    Wrap a requests response in a bs4 object.
    """
    return bs4.BeautifulSoup(response.text, "html.parser")
 def stream_to_path(
        response: requests.Response,
        target: Path,
        progress_name: Optional[str] = None,
        chunk_size: int = 1024 ** 2
 ) -> None:
    """
    Download a requests response content to a file by streaming it. This
    function avoids excessive memory usage when downloading large files. The
    chunk_size is in bytes.
    If progress_name is None, no progress bar will be shown. Otherwise a progress
    bar will appear, if the download is bigger than an internal threshold.
    """
    with response:
        length = size_from_headers(response)
        if progress_name and length and int(length) > 1024 * 1024 * 10:  # 10 MiB
            settings: Optional[ProgressSettings] = ProgressSettings(progress_name, length)
        else:
            settings = None
        with open(target, 'wb') as file_descriptor:
            with progress_for(settings) as progress:
                for chunk in response.iter_content(chunk_size=chunk_size):
                    file_descriptor.write(chunk)
                    progress.advance(len(chunk))
 def prompt_yes_no(question: str, default: Optional[bool] = None) -> bool:
    """
    Prompts the user a yes/no question and returns their choice.
    """
    if default is True:
-        prompt = "[Y/n]"
+        query += " [Y/n] "
    elif default is False:
-        prompt = "[y/N]"
+        query += " [y/N] "
    else:
-        prompt = "[y/n]"
+        query += " [y/n] "
    text = f"{question} {prompt} "
    wrong_reply = "Please reply with 'yes'/'y' or 'no'/'n'."
    while True:
-        response = input(text).strip().lower()
+        response = (await ainput(query)).strip().lower()
-        if response in {"yes", "ye", "y"}:
+        if response == "y":
            return True
-        if response in {"no", "n"}:
+        elif response == "n":
            return False
-        if response == "" and default is not None:
+        elif response == "" and default is not None:
            return default
-        print(wrong_reply)
+
        print("Please answer with 'y' or 'n'.")
 def soupify(data: bytes) -> bs4.BeautifulSoup:
    """
    Parses HTML to a beautifulsoup object.
    """
    return bs4.BeautifulSoup(data, "html.parser")
 def url_set_query_param(url: str, param: str, value: str) -> str:
    """
    Set a query parameter in an url, overwriting existing ones with the same name.
    """
    scheme, netloc, path, query, fragment = urlsplit(url)
    query_parameters = parse_qs(query)
    query_parameters[param] = [value]
    new_query_string = urlencode(query_parameters, doseq=True)
    return urlunsplit((scheme, netloc, path, new_query_string, fragment))
 def url_set_query_params(url: str, params: Dict[str, str]) -> str:
    """
    Sets multiple query parameters in an url, overwriting existing ones.
    """
    result = url
    for key, val in params.items():
        result = url_set_query_param(result, key, val)
    return result
 def str_path(path: PurePath) -> str:
    if not path.parts:
        return "."
    return "/".join(path.parts)
 def fmt_path(path: PurePath) -> str:
    return repr(str_path(path))
 def fmt_real_path(path: Path) -> str:
    return repr(str(path.absolute()))
 class ReusableAsyncContextManager(ABC, Generic[T]):
    def __init__(self) -> None:
        self._active = False
        self._stack = AsyncExitStack()
    @abstractmethod
    async def _on_aenter(self) -> T:
        pass
    async def __aenter__(self) -> T:
        if self._active:
            raise RuntimeError("Nested or otherwise concurrent usage is not allowed")
        self._active = True
        await self._stack.__aenter__()
        # See https://stackoverflow.com/a/13075071
        try:
            result: T = await self._on_aenter()
        except:  # noqa: E722 do not use bare 'except'
            if not await self.__aexit__(*sys.exc_info()):
                raise
        return result
    async def __aexit__(
            self,
            exc_type: Optional[Type[BaseException]],
            exc_value: Optional[BaseException],
            traceback: Optional[TracebackType],
    ) -> Optional[bool]:
        if not self._active:
            raise RuntimeError("__aexit__ called too many times")
        result = await self._stack.__aexit__(exc_type, exc_value, traceback)
        self._active = False
        return result
--- a/PFERD/version.py
+++ b/PFERD/version.py
@ -0,0 +1,2 @@
 NAME = "PFERD"
 VERSION = "3.5.0"
--- a/README.md
+++ b/README.md
@ -2,254 +2,146 @@
 **P**rogramm zum **F**lotten, **E**infachen **R**unterladen von **D**ateien
- [Quickstart with `sync_url`](#quickstart-with-sync_url)
+Other resources:
 - [Installation](#installation)
    - [Upgrading from 2.0.0 to 2.1.0+](#upgrading-from-200-to-210)
 - [Example setup](#example-setup)
 - [Usage](#usage)
    - [General concepts](#general-concepts)
    - [Constructing transforms](#constructing-transforms)
        - [Transform creators](#transform-creators)
        - [Transform combinators](#transform-combinators)
    - [A short, but commented example](#a-short-but-commented-example)
-## Quickstart with `sync_url`
+- [Config file format](CONFIG.md)
-
+- [Changelog](CHANGELOG.md)
-The `sync_url` program allows you to just synchronize a given ILIAS URL (of a
+- [Development Guide](DEV.md)
 course, a folder, your personal desktop, etc.) without any extra configuration
 or setting up. Download the program, open ILIAS, copy the URL from the address
 bar and pass it to sync_url.
 It bundles everything it needs in one executable and is easy to
 use, but doesn't expose all the configuration options and tweaks a full install
 does.
 1. Download the `sync_url` binary from the [latest release](https://github.com/Garmelon/PFERD/releases/latest).
 2. Recognize that you most likely need to enclose the URL in `""` quotes to prevent your shell from interpreting `&` and other symbols
 3. Run the binary in your terminal (`./sync_url` or `sync_url.exe` in the CMD) to see the help and use it. I'd recommend using the `--cookies` option.  
  If you are on **Linux/Mac**, you need to *make the file executable* using `chmod +x <file>`.  
  If you are on **Mac**, you need to allow this unverified program to run (see e.g. [here](https://www.switchingtomac.com/tutorials/osx/how-to-run-unverified-apps-on-macos/))
 ## Installation
-Ensure that you have at least Python 3.8 installed.
+### Direct download
 Binaries for Linux, Windows and Mac can be downloaded directly from the
 [latest release](https://github.com/Garmelon/PFERD/releases/latest).
 ### With pip
 Ensure you have at least Python 3.9 installed. Run the following command to
 install PFERD or upgrade it to the latest version:
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.6.1
+$ pip install --upgrade git+https://github.com/Garmelon/PFERD@latest
 ```
-The use of [venv] is recommended.
+The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
-[venv]: https://docs.python.org/3/library/venv.html
+### With package managers
-### Upgrading from 2.0.0 to 2.1.0+
+Unofficial packages are available for:
 - [AUR](https://aur.archlinux.org/packages/pferd)
 - [brew](https://formulae.brew.sh/formula/pferd)
 - [conda-forge](https://github.com/conda-forge/pferd-feedstock)
 - [nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/misc/pferd/default.nix)
 - [PyPi](https://pypi.org/project/pferd)
- The `IliasDirectoryType` type was renamed to `IliasElementType` and is now far more detailed.
+See also PFERD's [repology page](https://repology.org/project/pferd/versions).
  The new values are: `REGULAR_FOLDER`, `VIDEO_FOLDER`, `EXERCISE_FOLDER`, `REGULAR_FILE`, `VIDEO_FILE`, `FORUM`, `EXTERNAL_LINK`.
 - Forums and external links are skipped automatically if you use the `kit_ilias` helper.
-## Example setup
+## Basic usage
-In this example, `python3` refers to at least Python 3.8.
+PFERD can be run directly from the command line with no config file. Run `pferd
 -h` to get an overview of available commands and options. Run `pferd <command>
 -h` to see which options a command has.
 For example, you can download your personal desktop from the KIT ILIAS like
 this:
 A full example setup and initial use could look like:
 ```
-$ mkdir Vorlesungen
+$ pferd kit-ilias-web desktop <output_directory>
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ source .venv/bin/activate
 $ pip install git+https://github.com/Garmelon/PFERD@v2.6.1
 $ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.6.1/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
-Subsequent runs of the program might look like:
+Also, you can download most ILIAS pages directly like this:
 ```
-$ cd Vorlesungen
+$ pferd kit-ilias-web <url> <output_directory>
 $ source .venv/bin/activate
 $ python3 example_config.py
 $ deactivate
 ```
-If you just want to get started and crawl *your entire ILIAS Desktop* instead
+However, the CLI only lets you download a single thing at a time, and the
-of a given set of courses, please replace `example_config.py` with
+resulting command can grow long quite quickly. Because of this, PFERD can also
-`example_config_personal_desktop.py` in all of the instructions below (`curl` call and
+be used with a config file.
 `python3` run command).
-## Usage
+To get started, just take a command you've been using and add `--dump-config`
 directly after `pferd`, like this:
-### General concepts
+```
-
+$ pferd --dump-config kit-ilias-web <url> <output_directory>
-A PFERD config is a normal python file that starts multiple *synchronizers*
+```
-which do all the heavy lifting. While you can create and wire them up manually,
+
-you are encouraged to use the helper methods provided in `PFERD.Pferd`.
+This will make PFERD write its current configuration to its default config file
-
+path. You can then run `pferd` without a command and it will execute the config
-The synchronizers take some input arguments specific to their service and a
+file. Alternatively, you can use `--dump-config-to` and specify a path yourself.
-*transform*. The transform receives the computed path of an element in ILIAS and
+Using `--dump-config-to -` will print the configuration to stdout instead of a
-can return either an output path (so you can rename files or move them around as
+file, which is a good way to see what is actually going on when using a CLI
-you wish) or `None` if you do not want to save the given file.
+command.
-
+
-Additionally the ILIAS synchronizer allows you to define a *crawl filter*. This
+Another good way to see what PFERD is doing is the `--explain` option. When
-filter also receives the computed path as the input, but is only called for
+enabled, PFERD explains in detail what it is doing and why. This can help with
-*directories*. If you return `True`, the directory will be crawled and
+debugging your own config.
-searched. If you return `False` the directory will be ignored and nothing in it
+
-will be passed to the transform.
+If you don't want to run all crawlers from your config file, you can specify the
-
+crawlers you want to run with `--crawler` or `-C`, like this:
-### Constructing transforms
+
-
+```
-While transforms are just normal python functions, writing them by hand can
+$ pferd -C crawler1 -C crawler2
-quickly become tedious. In order to help you with writing your own transforms
+```
-and filters, PFERD defines a few useful transform creators and combinators in
+
-the `PFERD.transform` module:
+## Advanced usage
-
+
-#### Transform creators
+PFERD supports lots of different options. For example, you can configure PFERD
-
+to [use your system's keyring](CONFIG.md#the-keyring-authenticator) instead of
-These methods let you create a few basic transform building blocks:
+prompting you for your username and password. PFERD also supports
-
+[transformation rules](CONFIG.md#transformation-rules) that let you rename or
- **`glob(glob)`**  
+exclude certain files.
-  Creates a transform that returns the unchanged path if the glob matches the path and `None` otherwise.
+
-  See also [Path.match].  
+For more details, see the comprehensive [config format documentation](CONFIG.md).
-  Example: `glob("Übung/*.pdf")`
+
- **`predicate(pred)`**  
+## Example
-  Creates a transform that returns the unchanged path if `pred(path)` returns a truthy value.
+
-  Returns `None` otherwise.  
+This example downloads a few courses from the KIT ILIAS with a common keyring
-  Example: `predicate(lambda path: len(path.parts) == 3)`
+authenticator. It reorganizes and ignores some files.
- **`move_dir(source, target)`**  
+
-  Creates a transform that moves all files from the `source` to the `target` directory.  
+```ini
-  Example: `move_dir("Übung/", "Blätter/")`
+[DEFAULT]
- **`move(source, target)`**  
+# All paths will be relative to this.
-  Creates a transform that moves the `source` file to `target`.  
+# The crawler output directories will be <working_dir>/Foo and <working_dir>/Bar.
-  Example: `move("Vorlesung/VL02_Automten.pdf", "Vorlesung/VL02_Automaten.pdf")`
+working_dir = ~/stud
- **`rename(source, target)`**  
+# If files vanish from ILIAS the local files are not deleted, allowing us to
-  Creates a transform that renames all files named `source` to `target`.
+# take a look at them before deleting them ourselves.
-  This transform works on the file names, not paths, and thus works no matter where the file is located.  
+on_conflict = no-delete
-  Example: `rename("VL02_Automten.pdf", "VL02_Automaten.pdf")`
+
- **`re_move(regex, target)`**  
+[auth:ilias]
-  Creates a transform that moves all files matching `regex` to `target`.
+type = keyring
-  The transform `str.format` on the `target` string with the contents of the capturing groups before returning it.
+username = foo
-  The capturing groups can be accessed via their index.
+
-  See also [Match.group].  
+[crawl:Foo]
-  Example: `re_move(r"Übung/Blatt (\d+)\.pdf", "Blätter/Blatt_{1:0>2}.pdf")`
+type = kit-ilias-web
- **`re_rename(regex, target)`**  
+auth = auth:ilias
-  Creates a transform that renames all files matching `regex` to `target`.
+# Crawl a course by its ID (found as `ref_id=ID` in the URL)
-  This transform works on the file names, not paths, and thus works no matter where the file is located.  
+target = 1234567
-  Example: `re_rename(r"VL(\d+)(.*)\.pdf", "Vorlesung_Nr_{1}__{2}.pdf")`
+
-
+# Plaintext files are easier to read by other tools
-All movement or rename transforms above return `None` if a file doesn't match
+links = plaintext
-their movement or renaming criteria. This enables them to be used as building
+
-blocks to build up more complex transforms.
+transform =
-
+  # Ignore unneeded folders
-In addition, `PFERD.transform` also defines the `keep` transform which returns its input path unchanged.
+  Online-Tests --> !
-This behaviour can be very useful when creating more complex transforms.
+  Vorlesungswerbung --> !
-See below for example usage.
+
-
+  # Rename folders
-[Path.match]: https://docs.python.org/3/library/pathlib.html#pathlib.Path.match
+  Lehrbücher --> Vorlesung
-[Match.group]: https://docs.python.org/3/library/re.html#re.Match.group
+  # Note the ">>" arrow head which lets us apply further rules to files moved to "Übung"
-
+  Übungsunterlagen -->> Übung
-#### Transform combinators
+
-
+  # Move exercises to own folder. Rename them to "Blatt-XX.pdf" to make them sort properly
-These methods let you combine transforms into more complex transforms:
+  "Übung/(\d+). Übungsblatt.pdf" -re-> Blätter/Blatt-{i1:02}.pdf
-
+  # Move solutions to own folder. Rename them to "Blatt-XX-Lösung.pdf" to make them sort properly
- **`optionally(transform)`**  
+  "Übung/(\d+). Übungsblatt.*Musterlösung.pdf" -re-> Blätter/Blatt-{i1:02}-Lösung.pdf
-  Wraps a given transform and returns its result if it is not `None`.
+
-  Otherwise returns the input path unchanged.
+  # The course has nested folders with the same name - flatten them
-  See below for example usage.
+  "Übung/(.+?)/\\1" -re-> Übung/{g1}
-* **`do(transforms)`**  
+
-  Accepts a series of transforms and applies them in the given order to the result of the previous one.
+[crawl:Bar]
-  If any transform returns `None`, `do` short-circuits and also returns `None`.
+type = kit-ilias-web
-  This can be used to perform multiple renames in a row:
+auth = auth:ilias
-  ```py
+target = 1337420
  do(
      # Move them
      move_dir("Vorlesungsmaterial/Vorlesungsvideos/", "Vorlesung/Videos/"),
      # Fix extensions (if they have any)
      optionally(re_rename("(.*).m4v.mp4", "{1}.mp4")),
      # Remove the 'dbs' prefix (if they have any)
      optionally(re_rename("(?i)dbs-(.+)", "{1}")),
  )
  ```
 - **`attempt(transforms)`**  
  Applies the passed transforms in the given order until it finds one that does not return `None`.
  If it does not find any, it returns `None`.
  This can be used to give a list of possible transformations and automatically pick the first one that fits:
  ```py
  attempt(
      # Move all videos. If a video is passed in, this `re_move` will succeed
      # and attempt short-circuits with the result.
      re_move(r"Vorlesungsmaterial/.*/(.+?)\.mp4", "Vorlesung/Videos/{1}.mp4"),
      # Move the whole folder to a nicer name - now without any mp4!
      move_dir("Vorlesungsmaterial/", "Vorlesung/"),
      # If we got another file, keep it.
      keep,
  )
  ```
 All of these combinators are used in the provided example configs, if you want
 to see some more real-life usages.
 ### A short, but commented example
 ```py
 from pathlib import Path, PurePath
 from PFERD import Pferd
 from PFERD.ilias import IliasElementType
 from PFERD.transform import *
 # This filter will later be used by the ILIAS crawler to decide whether it
 # should crawl a directory (or directory-like structure).
 def filter_course(path: PurePath, type: IliasElementType) -> bool:
    # Note that glob returns a Transform, which is a function from PurePath ->
    # Optional[PurePath]. Because of this, we need to apply the result of
    # 'glob' to our input path. The returned value will be truthy (a Path) if
    # the transform succeeded, or `None` if it failed.
    # We need to crawl the 'Tutorien' folder as it contains one that we want.
    if glob("Tutorien/")(path):
        return True
    # If we found 'Tutorium 10', keep it!
    if glob("Tutorien/Tutorium 10")(path):
        return True
    # Discard all other folders inside 'Tutorien'
    if glob("Tutorien/*")(path):
        return False
    # All other dirs (including subdirs of 'Tutorium 10') should be searched :)
    return True
 # This transform will later be used to rename a few files. It can also be used
 # to ignore some files.
 transform_course = attempt(
    # We don't care about the other tuts and would instead prefer a cleaner
    # directory structure.
    move_dir("Tutorien/Tutorium 10/", "Tutorium/"),
    # We don't want to modify any other files, so we're going to keep them
    # exactly as they are.
    keep
 )
 # Enable and configure the text output. Needs to be called before calling any
 # other PFERD methods.
 Pferd.enable_logging()
 # Create a Pferd instance rooted in the same directory as the script file. This
 # is not a test run, so files will be downloaded (default, can be omitted).
 pferd = Pferd(Path(__file__).parent, test_run=False)
 # Use the ilias_kit helper to synchronize an ILIAS course
 pferd.ilias_kit(
    # The directory that all of the downloaded files should be placed in
    "My_cool_course/",
    # The course ID (found in the URL when on the course page in ILIAS)
    "course id",
    # A path to a cookie jar. If you synchronize multiple ILIAS courses,
    # setting this to a common value requires you to only log in once.
    cookies=Path("ilias_cookies.txt"),
    # A transform can rename, move or filter out certain files
    transform=transform_course,
    # A crawl filter limits what paths the cralwer searches
    dir_filter=filter_course,
 )
 ```
--- a/example_config.py
+++ b/example_config.py
@ -1,131 +0,0 @@
 import argparse
 from pathlib import Path, PurePath
 from PFERD import Pferd
 from PFERD.ilias import IliasElementType
 from PFERD.transform import (attempt, do, glob, keep, move, move_dir,
                             optionally, re_move, re_rename)
 tf_ss_2020_numerik = attempt(
    re_move(r"Übungsblätter/(\d+)\. Übungsblatt/.*", "Blätter/Blatt_{1:0>2}.pdf"),
    keep,
 )
 tf_ss_2020_db = attempt(
    move_dir("Begrüßungsvideo/", "Vorlesung/Videos/"),
    do(
        move_dir("Vorlesungsmaterial/Vorlesungsvideos/", "Vorlesung/Videos/"),
        optionally(re_rename("(.*).m4v.mp4", "{1}.mp4")),
        optionally(re_rename("(?i)dbs-(.+)", "{1}")),
    ),
    move_dir("Vorlesungsmaterial/", "Vorlesung/"),
    keep,
 )
 tf_ss_2020_rechnernetze = attempt(
    re_move(r"Vorlesungsmaterial/.*/(.+?)\.mp4", "Vorlesung/Videos/{1}.mp4"),
    move_dir("Vorlesungsmaterial/", "Vorlesung/"),
    keep,
 )
 tf_ss_2020_sicherheit = attempt(
    move_dir("Vorlesungsvideos/", "Vorlesung/Videos/"),
    move_dir("Übungsvideos/", "Übung/Videos/"),
    re_move(r"VL(.*)\.pdf", "Vorlesung/{1}.pdf"),
    re_move(r"Übungsblatt (\d+)\.pdf", "Blätter/Blatt_{1:0>2}.pdf"),
    move("Chiffrat.txt", "Blätter/Blatt_01_Chiffrat.txt"),
    keep,
 )
 tf_ss_2020_pg = attempt(
    move_dir("Vorlesungsaufzeichnungen/", "Vorlesung/Videos/"),
    move_dir("Vorlesungsmaterial/", "Vorlesung/"),
    re_move(r"Übungen/uebungsblatt(\d+).pdf", "Blätter/Blatt_{1:0>2}.pdf"),
    keep,
 )
 def df_ss_2020_or1(path: PurePath, _type: IliasElementType) -> bool:
    if glob("Tutorien/")(path):
        return True
    if glob("Tutorien/Tutorium 10, dienstags 15:45 Uhr/")(path):
        return True
    if glob("Tutorien/*")(path):
        return False
    return True
 tf_ss_2020_or1 = attempt(
    move_dir("Vorlesung/Unbeschriebene Folien/", "Vorlesung/Folien/"),
    move_dir("Video zur Organisation/", "Vorlesung/Videos/"),
    keep,
 )
 def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--test-run", action="store_true")
    parser.add_argument("synchronizers", nargs="*")
    args = parser.parse_args()
    pferd = Pferd(Path(__file__).parent, test_run=args.test_run)
    pferd.enable_logging()
    if not args.synchronizers or "numerik" in args.synchronizers:
        pferd.ilias_kit(
            target="Numerik",
            course_id="1083036",
            transform=tf_ss_2020_numerik,
            cookies="ilias_cookies.txt",
        )
    if not args.synchronizers or "db" in args.synchronizers:
        pferd.ilias_kit(
            target="DB",
            course_id="1101554",
            transform=tf_ss_2020_db,
            cookies="ilias_cookies.txt",
        )
    if not args.synchronizers or "rechnernetze" in args.synchronizers:
        pferd.ilias_kit(
            target="Rechnernetze",
            course_id="1099996",
            transform=tf_ss_2020_rechnernetze,
            cookies="ilias_cookies.txt",
        )
    if not args.synchronizers or "sicherheit" in args.synchronizers:
        pferd.ilias_kit(
            target="Sicherheit",
            course_id="1101980",
            transform=tf_ss_2020_sicherheit,
            cookies="ilias_cookies.txt",
        )
    if not args.synchronizers or "pg" in args.synchronizers:
        pferd.ilias_kit(
            target="PG",
            course_id="1106095",
            transform=tf_ss_2020_pg,
            cookies="ilias_cookies.txt",
        )
    if not args.synchronizers or "or1" in args.synchronizers:
        pferd.ilias_kit(
            target="OR1",
            course_id="1105941",
            dir_filter=df_ss_2020_or1,
            transform=tf_ss_2020_or1,
            cookies="ilias_cookies.txt",
        )
    # Prints a summary listing all new, modified or deleted files
    pferd.print_summary()
 if __name__ == "__main__":
    main()
--- a/example_config_personal_desktop.py
+++ b/example_config_personal_desktop.py
@ -1,38 +0,0 @@
 """
 This is a small config that just crawls the ILIAS Personal Desktop.
 It does not filter or rename anything, it just gobbles up everything it can find.
 Note that this still includes a test-run switch, so you can see what it *would* download.
 You can enable that with the "--test-run" command line switch,
 i. e. "python3 example_config_minimal.py --test-run".
 """
 import argparse
 from pathlib import Path
 from PFERD import Pferd
 def main() -> None:
    # Parse command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--test-run", action="store_true")
    args = parser.parse_args()
    # Create the Pferd helper instance
    pferd = Pferd(Path(__file__).parent, test_run=args.test_run)
    pferd.enable_logging()
    # Synchronize the personal desktop into the "ILIAS" directory.
    # It saves the cookies, so you only need to log in again when the ILIAS cookies expire.
    pferd.ilias_kit_personal_desktop(
        "ILIAS",
        cookies="ilias_cookies.txt",
    )
    # Prints a summary listing all new, modified or deleted files
    pferd.print_summary()
 if __name__ == "__main__":
    main()
--- a/flake.lock
+++ b/flake.lock
@ -0,0 +1,27 @@
 {
  "nodes": {
    "nixpkgs": {
      "locked": {
        "lastModified": 1694499547,
        "narHash": "sha256-R7xMz1Iia6JthWRHDn36s/E248WB1/je62ovC/dUVKI=",
        "owner": "NixOS",
        "repo": "nixpkgs",
        "rev": "e5f018cf150e29aac26c61dac0790ea023c46b24",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
        "ref": "nixos-23.05",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
        "nixpkgs": "nixpkgs"
      }
    }
  },
  "root": "root",
  "version": 7
 }
--- a/flake.nix
+++ b/flake.nix
@ -0,0 +1,41 @@
 {
  description = "Tool for downloading course-related files from ILIAS";
  inputs = {
    nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.05";
  };
  outputs = { self, nixpkgs }:
    let
      # Helper function to generate an attrset '{ x86_64-linux = f "x86_64-linux"; ... }'.
      forAllSystems = nixpkgs.lib.genAttrs nixpkgs.lib.systems.flakeExposed;
    in
    {
      packages = forAllSystems (system:
        let pkgs = import nixpkgs { inherit system; };
        in
        rec {
          default = pkgs.python3Packages.buildPythonApplication rec {
            pname = "pferd";
            # Performing black magic
            # Don't worry, I sacrificed enough goats for the next few years
            version = (pkgs.lib.importTOML ./PFERD/version.py).VERSION;
            format = "pyproject";
            src = ./.;
            nativeBuildInputs = with pkgs.python3Packages; [
              setuptools
            ];
            propagatedBuildInputs = with pkgs.python3Packages; [
              aiohttp
              beautifulsoup4
              rich
              keyring
              certifi
            ];
          };
        });
    };
 }
--- a/mypy.ini
+++ b/mypy.ini
@ -1,7 +1,11 @@
 [mypy]
 disallow_any_generics = True
 disallow_untyped_defs = True
 disallow_incomplete_defs = True
 no_implicit_optional = True
 warn_unused_ignores = True
 warn_unreachable = True
 show_error_context = True
 [mypy-rich.*,bs4,keyring]
 ignore_missing_imports = True
--- a/pferd.py
+++ b/pferd.py
@ -0,0 +1,6 @@
 # File used by pyinstaller to create the executable
 from PFERD.__main__ import main
 if __name__ == "__main__":
    main()
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,3 @@
 [build-system]
 requires = ["setuptools", "wheel"]
 build-backend = "setuptools.build_meta"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +0,0 @@
 requests>=2.21.0
 beautifulsoup4>=4.7.1
 rich>=2.1.0
 keyring>=21.5.0
--- a/scripts/build
+++ b/scripts/build
@ -0,0 +1,5 @@
 #!/usr/bin/env bash
 set -e
 pyinstaller --onefile pferd.py
--- a/scripts/bump-version
+++ b/scripts/bump-version
@ -0,0 +1,111 @@
 #!/usr/bin/env python3
 import argparse
 import time
 import re
 from subprocess import run
 def load_changelog():
    with open("CHANGELOG.md") as f:
        return list(f)
 def extract_changes(lines):
    lines = iter(lines)
    changes = []
    # Find "Unreleased" section
    for line in lines:
        if line.strip() == "## Unreleased":
            break
    next(lines)
    # Read all lines from that section
    for line in lines:
        if line.startswith("## "):
            # Found the beginning of the next section
            break
        elif line.startswith("### "):
            # Found a heading in the current section
            # Remove "#" symbols so git doesn't interpret the line as a comment later
            changes.append(line[4:])
        else:
            changes.append(line)
    # Remove trailing empty lines
    while changes and not changes[-1].strip():
        changes.pop()
    return changes
 def update_version(version):
    with open("PFERD/version.py") as f:
        text = f.read()
    text = re.sub(r'VERSION = ".*"', f'VERSION = "{version}"', text)
    with open("PFERD/version.py", "w") as f:
        f.write(text)
 def update_changelog(lines, version, date):
    lines = iter(lines)
    new_lines = []
    # Find "Unreleased" section
    for line in lines:
        new_lines.append(line)
        if line.strip() == "## Unreleased":
            break
    # Add new heading below that
    new_lines.append("\n")
    new_lines.append(f"## {version} - {date}\n")
    # Add remaining lines
    for line in lines:
        new_lines.append(line)
    with open("CHANGELOG.md", "w") as f:
        f.write("".join(new_lines))
 def commit_changes(version):
    run(["git", "add", "CHANGELOG.md", "PFERD/version.py"])
    run(["git", "commit", "-m", f"Bump version to {version}"])
 def create_tag(version, annotation):
    run(["git", "tag", "-am", annotation, f"v{version}"])
 def fastforward_latest():
    run(["git", "branch", "-f", "latest", "HEAD"])
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("version")
    args = parser.parse_args()
    version = args.version
    date = time.strftime("%Y-%m-%d")
    changelog = load_changelog()
    changes = extract_changes(changelog)
    annotation = f"Version {version} - {date}\n\n{''.join(changes)}"
    update_version(version)
    update_changelog(changelog, version, date)
    commit_changes(version)
    create_tag(version, annotation)
    fastforward_latest()
    print()
    print("Now the only thing left is to publish the changes:")
    print(f"  $ git push origin master latest v{version}")
 if __name__ == "__main__":
    main()
--- a/scripts/check
+++ b/scripts/check
@ -0,0 +1,6 @@
 #!/usr/bin/env bash
 set -e
 mypy PFERD
 flake8 PFERD
--- a/scripts/format
+++ b/scripts/format
@ -0,0 +1,6 @@
 #!/usr/bin/env bash
 set -e
 autopep8 --recursive --in-place PFERD
 isort PFERD
--- a/scripts/setup
+++ b/scripts/setup
@ -0,0 +1,17 @@
 #!/usr/bin/env bash
 set -e
 # Updating pip and setuptools because some older versions don't recognize the
 # project setup correctly
 if [[ $1 != '--no-pip' ]]; then
    pip install --upgrade pip
 fi
 pip install --upgrade setuptools
 # Installing PFERD itself
 pip install --editable .
 # Installing tools and type hints
 pip install --upgrade mypy flake8 autopep8 isort pyinstaller
 pip install --upgrade types-chardet types-certifi
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,23 @@
 [metadata]
 name = PFERD
 version = attr: PFERD.version.VERSION
 [options]
 packages = find:
 python_requires = >=3.9
 install_requires =
  aiohttp>=3.8.1
  beautifulsoup4>=4.10.0
  rich>=11.0.0
  keyring>=23.5.0
  certifi>=2021.10.8
 [options.entry_points]
 console_scripts =
  pferd = PFERD.__main__:main
 [flake8]
 max_line_length = 110
 [isort]
 line_length = 110
--- a/setup.py
+++ b/setup.py
@ -1,17 +0,0 @@
 from setuptools import find_packages, setup
 setup(
    name="PFERD",
    version="2.6.1",
    packages=find_packages(),
    install_requires=[
        "requests>=2.21.0",
        "beautifulsoup4>=4.7.1",
        "rich>=2.1.0",
        "keyring>=21.5.0"
    ],
 )
 # When updating the version, also:
 # - update the README.md installation instructions
 # - set a tag on the update commit
--- a/sync_url.py
+++ b/sync_url.py
@ -1,160 +0,0 @@
 #!/usr/bin/env python
 """
 A simple script to download a course by name from ILIAS.
 """
 import argparse
 import logging
 import sys
 from pathlib import Path, PurePath
 from typing import Optional
 from urllib.parse import urlparse
 from PFERD import Pferd
 from PFERD.authenticators import KeyringAuthenticator, UserPassAuthenticator
 from PFERD.cookie_jar import CookieJar
 from PFERD.ilias import (IliasCrawler, IliasElementType,
                         KitShibbolethAuthenticator)
 from PFERD.logging import PrettyLogger, enable_logging
 from PFERD.organizer import (ConflictType, FileConflictResolution,
                             FileConflictResolver, resolve_prompt_user)
 from PFERD.transform import sanitize_windows_path
 from PFERD.utils import to_path
 _LOGGER = logging.getLogger("sync_url")
 _PRETTY = PrettyLogger(_LOGGER)
 def _extract_credentials(file_path: Optional[str]) -> UserPassAuthenticator:
    if not file_path:
        return UserPassAuthenticator("KIT ILIAS Shibboleth", None, None)
    if not Path(file_path).exists():
        _PRETTY.error("Credential file does not exist")
        sys.exit(1)
    with open(file_path, "r") as file:
        first_line = file.read().splitlines()[0]
        read_name, *read_password = first_line.split(":", 1)
        name = read_name if read_name else None
        password = read_password[0] if read_password else None
        return UserPassAuthenticator("KIT ILIAS Shibboleth", username=name, password=password)
 def _resolve_remote_first(_path: PurePath, _conflict: ConflictType) -> FileConflictResolution:
    return FileConflictResolution.DESTROY_EXISTING
 def _resolve_local_first(_path: PurePath, _conflict: ConflictType) -> FileConflictResolution:
    return FileConflictResolution.KEEP_EXISTING
 def _resolve_no_delete(_path: PurePath, conflict: ConflictType) -> FileConflictResolution:
    # Update files
    if conflict == ConflictType.FILE_OVERWRITTEN:
        return FileConflictResolution.DESTROY_EXISTING
    if conflict == ConflictType.MARKED_FILE_OVERWRITTEN:
        return FileConflictResolution.DESTROY_EXISTING
    # But do not delete them
    return FileConflictResolution.KEEP_EXISTING
 def main() -> None:
    enable_logging(name="sync_url")
    parser = argparse.ArgumentParser()
    parser.add_argument("--test-run", action="store_true")
    parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in")
    parser.add_argument('-u', '--username', nargs='?', default=None, help="Username for Ilias")
    parser.add_argument('-p', '--password', nargs='?', default=None, help="Password for Ilias")
    parser.add_argument('--credential-file', nargs='?', default=None,
                        help="Path to a file containing credentials for Ilias. The file must have "
                        "one line in the following format: '<user>:<password>'")
    parser.add_argument("-k", "--keyring", action="store_true",
                        help="Use the system keyring service for authentication")
    parser.add_argument('--no-videos', action="store_true", help="Don't download videos")
    parser.add_argument('--local-first', action="store_true",
                        help="Don't prompt for confirmation, keep existing files")
    parser.add_argument('--remote-first', action="store_true",
                        help="Don't prompt for confirmation, delete and overwrite local files")
    parser.add_argument('--no-delete', action="store_true",
                        help="Don't prompt for confirmation, overwrite local files, don't delete")
    parser.add_argument('url', help="URL to the course page")
    parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into")
    args = parser.parse_args()
    cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
    session = cookie_jar.create_session()
    if args.keyring:
        if not args.username:
            _PRETTY.error("Keyring auth selected but no --username passed!")
            return
        inner_auth: UserPassAuthenticator = KeyringAuthenticator(
            "KIT ILIAS Shibboleth", username=args.username, password=args.password
        )
    else:
        inner_auth = _extract_credentials(args.credential_file)
    username, password = inner_auth.get_credentials()
    authenticator = KitShibbolethAuthenticator(inner_auth)
    url = urlparse(args.url)
    crawler = IliasCrawler(url.scheme + '://' + url.netloc, session,
                           authenticator, lambda x, y: True)
    cookie_jar.load_cookies()
    if args.folder is None:
        element_name = crawler.find_element_name(args.url)
        if not element_name:
            print("Error, could not get element name. Please specify a folder yourself.")
            return
        folder = sanitize_windows_path(Path(element_name.replace("/", "-").replace("\\", "-")))
        cookie_jar.save_cookies()
    else:
        folder = Path(args.folder)
    # files may not escape the pferd_root with relative paths
    # note: Path(Path.cwd, Path(folder)) == Path(folder) if it is an absolute path
    pferd_root = Path(Path.cwd(), Path(folder)).parent
    # Folder might be a *PurePath* at this point
    target = Path(folder).resolve().name
    pferd = Pferd(pferd_root, test_run=args.test_run)
    def dir_filter(_: Path, element: IliasElementType) -> bool:
        if args.no_videos:
            return element not in [IliasElementType.VIDEO_FILE, IliasElementType.VIDEO_FOLDER]
        return True
    if args.local_first:
        file_confilict_resolver: FileConflictResolver = _resolve_local_first
    elif args.no_delete:
        file_confilict_resolver = _resolve_no_delete
    elif args.remote_first:
        file_confilict_resolver = _resolve_remote_first
    else:
        file_confilict_resolver = resolve_prompt_user
    pferd.enable_logging()
    # fetch
    pferd.ilias_kit_folder(
        target=target,
        full_url=args.url,
        cookies=args.cookies,
        dir_filter=dir_filter,
        username=username,
        password=password,
        file_conflict_resolver=file_confilict_resolver,
        transform=sanitize_windows_path
    )
    pferd.print_summary()
 if __name__ == "__main__":
    main()
		`@ -0,0 +1,3 @@`
							`from .kit_ilias_web_crawler import KitIliasWebCrawler, KitIliasWebCrawlerSection`

							`__all__ = ["KitIliasWebCrawler", "KitIliasWebCrawlerSection"]`