mirror of
				https://github.com/Garmelon/PFERD.git
				synced 2025-10-22 09:42:31 +02:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			v3.8.3
			...
			debug/mtim
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 03efa17cf1 | 
							
								
								
									
										10
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
								
							| @@ -1,10 +0,0 @@ | ||||
| version: 2 | ||||
| updates: | ||||
|   - package-ecosystem: github-actions | ||||
|     directory: / | ||||
|     schedule: | ||||
|       interval: monthly | ||||
|     groups: | ||||
|       gh-actions: | ||||
|         patterns: | ||||
|           - "*" | ||||
							
								
								
									
										31
									
								
								.github/workflows/build-and-release.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										31
									
								
								.github/workflows/build-and-release.yml
									
									
									
									
										vendored
									
									
								
							| @@ -1,6 +1,6 @@ | ||||
| name: build-and-release | ||||
|  | ||||
| on: [push, pull_request] | ||||
| on: push | ||||
|  | ||||
| defaults: | ||||
|   run: | ||||
| @@ -13,12 +13,13 @@ jobs: | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         os: [ubuntu-latest, windows-latest, macos-13, macos-latest] | ||||
|         python: ["3.11"] | ||||
|         os: [ubuntu-latest, windows-latest, macos-latest] | ||||
|         python: ["3.9"] | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|  | ||||
|       - uses: actions/setup-python@v5 | ||||
|       - uses: actions/checkout@v3 | ||||
|  | ||||
|       - uses: actions/setup-python@v4 | ||||
|         with: | ||||
|           python-version: ${{ matrix.python }} | ||||
|  | ||||
| @@ -33,12 +34,7 @@ jobs: | ||||
|         run: ./scripts/setup --no-pip | ||||
|  | ||||
|       - name: Run checks | ||||
|         run: | | ||||
|           ./scripts/check | ||||
|           ./scripts/format | ||||
|  | ||||
|       - name: Assert no changes | ||||
|         run: git diff --exit-code | ||||
|         run: ./scripts/check | ||||
|  | ||||
|       - name: Build | ||||
|         run: ./scripts/build | ||||
| @@ -49,9 +45,9 @@ jobs: | ||||
|         run: mv dist/pferd* dist/pferd-${{ matrix.os }} | ||||
|  | ||||
|       - name: Upload binary | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         uses: actions/upload-artifact@v3 | ||||
|         with: | ||||
|           name: pferd-${{ matrix.os }} | ||||
|           name: Binaries | ||||
|           path: dist/pferd-${{ matrix.os }} | ||||
|  | ||||
|   release: | ||||
| @@ -61,20 +57,18 @@ jobs: | ||||
|     steps: | ||||
|  | ||||
|       - name: Download binaries | ||||
|         uses: actions/download-artifact@v4 | ||||
|         uses: actions/download-artifact@v3 | ||||
|         with: | ||||
|           pattern: pferd-* | ||||
|           merge-multiple: true | ||||
|           name: Binaries | ||||
|  | ||||
|       - name: Rename binaries | ||||
|         run: | | ||||
|           mv pferd-ubuntu-latest pferd-linux | ||||
|           mv pferd-windows-latest pferd-windows.exe | ||||
|           mv pferd-macos-13 pferd-mac-x86_64 | ||||
|           mv pferd-macos-latest pferd-mac | ||||
|  | ||||
|       - name: Create release | ||||
|         uses: softprops/action-gh-release@v2 | ||||
|         uses: softprops/action-gh-release@v1 | ||||
|         env: | ||||
|           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|         with: | ||||
| @@ -82,4 +76,3 @@ jobs: | ||||
|             pferd-linux | ||||
|             pferd-windows.exe | ||||
|             pferd-mac | ||||
|             pferd-mac-x86_64 | ||||
|   | ||||
							
								
								
									
										94
									
								
								CHANGELOG.md
									
									
									
									
									
								
							
							
						
						
									
										94
									
								
								CHANGELOG.md
									
									
									
									
									
								
							| @@ -22,100 +22,6 @@ ambiguous situations. | ||||
|  | ||||
| ## Unreleased | ||||
|  | ||||
| ## 3.8.3 - 2025-07-01 | ||||
|  | ||||
| ## Added | ||||
| - Support for link collections.   | ||||
|   In "fancy" mode, a single HTML file with multiple links is generated. | ||||
|   In all other modes, PFERD creates a folder for the collection and a new file | ||||
|   for every link inside. | ||||
|  | ||||
| ## Fixed | ||||
| - Crawling of exercises with instructions | ||||
| - Don't download unavailable elements.   | ||||
|   Elements that are unavailable (for example, because their availability is | ||||
|   time restricted) will not download the HTML for the info page anymore. | ||||
| - `base_url` argument for `ilias-web` crawler causing crashes | ||||
|  | ||||
| ## 3.8.2 - 2025-04-29 | ||||
|  | ||||
| ## Changed | ||||
| - Explicitly mention that wikis are not supported at the moment and ignore them | ||||
|  | ||||
| ## Fixed | ||||
| - Ilias-native login | ||||
| - Exercise crawling | ||||
|  | ||||
| ## 3.8.1 - 2025-04-17 | ||||
|  | ||||
| ## Fixed | ||||
| - Description html files now specify at UTF-8 encoding | ||||
| - Images in descriptions now always have a white background | ||||
|  | ||||
| ## 3.8.0 - 2025-04-16 | ||||
|  | ||||
| ### Added | ||||
| - Support for ILIAS 9 | ||||
|  | ||||
| ### Changed | ||||
| - Added prettier CSS to forum threads | ||||
| - Downloaded forum threads now link to the forum instead of the ILIAS thread | ||||
| - Increase minimum supported Python version to 3.11 | ||||
| - Do not crawl nested courses (courses linked in other courses) | ||||
|  | ||||
| ## Fixed | ||||
| - File links in report on Windows | ||||
| - TOTP authentication in KIT Shibboleth | ||||
| - Forum crawling only considering the first 20 entries | ||||
|  | ||||
| ## 3.7.0 - 2024-11-13 | ||||
|  | ||||
| ### Added | ||||
| - Support for MOB videos in page descriptions | ||||
| - Clickable links in the report to directly open new/modified/not-deleted files | ||||
| - Support for non KIT shibboleth login | ||||
|  | ||||
| ### Changed | ||||
| - Remove videos from description pages | ||||
| - Perform ILIAS cycle detection after processing the transform to allow | ||||
|   ignoring duplicated elements | ||||
| - Parse headings (h1-h3) as folders in kit-ipd crawler | ||||
|  | ||||
| ### Fixed | ||||
| - Personal desktop/dashboard/favorites crawling | ||||
| - Crawling of nested courses | ||||
| - Downloading of links with no target URL | ||||
| - Handle row flex on description pages | ||||
| - Add `<!DOCTYPE html>` heading to forum threads to fix mime type detection | ||||
| - Handle groups in cards | ||||
|  | ||||
| ## 3.6.0 - 2024-10-23 | ||||
|  | ||||
| ### Added | ||||
| - Generic `ilias-web` crawler and `ilias-web` CLI command | ||||
| - Support for the course overview page. Using this URL as a target might cause | ||||
|   duplication warnings, as subgroups are listed separately. | ||||
| - Support for named capture groups in regex transforms | ||||
| - Crawl custom item groups as folders | ||||
|  | ||||
| ### Fixed | ||||
| - Normalization of meeting names in cards | ||||
| - Sanitization of slashes in exercise container names | ||||
|  | ||||
| ## 3.5.2 - 2024-04-14 | ||||
|  | ||||
| ### Fixed | ||||
| - Crawling of personal desktop with ILIAS 8 | ||||
| - Crawling of empty personal desktops | ||||
|  | ||||
| ## 3.5.1 - 2024-04-09 | ||||
|  | ||||
| ### Added | ||||
| - Support for ILIAS 8 | ||||
|  | ||||
| ### Fixed | ||||
| - Video name deduplication | ||||
|  | ||||
| ## 3.5.0 - 2023-09-13 | ||||
|  | ||||
| ### Added | ||||
|   | ||||
							
								
								
									
										85
									
								
								CONFIG.md
									
									
									
									
									
								
							
							
						
						
									
										85
									
								
								CONFIG.md
									
									
									
									
									
								
							| @@ -4,11 +4,11 @@ A config file consists of sections. A section begins with a `[section]` header, | ||||
| which is followed by a list of `key = value` pairs. Comments must be on their | ||||
| own line and start with `#`. Multiline values must be indented beyond their key. | ||||
| Boolean values can be `yes` or `no`. For more details and some examples on the | ||||
| format, see the [configparser documentation][cp-file] | ||||
| ([interpolation][cp-interp] is disabled). | ||||
| format, see the [configparser documentation][1] ([interpolation][2] is | ||||
| disabled). | ||||
|  | ||||
| [cp-file]: <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure> "Supported INI File Structure" | ||||
| [cp-interp]: <https://docs.python.org/3/library/configparser.html#interpolation-of-values> "Interpolation of values" | ||||
| [1]: <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure> "Supported INI File Structure" | ||||
| [2]: <https://docs.python.org/3/library/configparser.html#interpolation-of-values> "Interpolation of values" | ||||
|  | ||||
| ## The `DEFAULT` section | ||||
|  | ||||
| @@ -146,7 +146,7 @@ crawler simulate a slower, network-based crawler. | ||||
|  | ||||
| This crawler crawls a KIT-IPD page by url. The root page can be crawled from | ||||
| outside the KIT network so you will be informed about any new/deleted files, | ||||
| but downloading files requires you to be within. Adding a short delay between | ||||
| but downloading files requires you to be within. Adding a show delay between | ||||
| requests is likely a good idea. | ||||
|  | ||||
| - `target`: URL to a KIT-IPD page | ||||
| @@ -154,64 +154,6 @@ requests is likely a good idea. | ||||
|   matches, the given link is downloaded as a file. This is used to extract | ||||
|   files from KIT-IPD pages. (Default: `^.*?[^/]+\.(pdf|zip|c|cpp|java)$`) | ||||
|  | ||||
| ### The `ilias-web` crawler | ||||
|  | ||||
| This crawler crawls a generic ILIAS instance. | ||||
|  | ||||
| Inspired by [this ILIAS downloader][ilias-dl], the following configurations should work | ||||
| out of the box for the corresponding universities: | ||||
|  | ||||
| [ilias-dl]: https://github.com/V3lop5/ilias-downloader/blob/main/configs "ilias-downloader configs" | ||||
|  | ||||
| | University      | `base_url`                              | `login_type` | `client_id`   | | ||||
| |-----------------|-----------------------------------------|--------------|---------------| | ||||
| | FH Aachen       | https://www.ili.fh-aachen.de            | local        | elearning     | | ||||
| | Uni Köln        | https://www.ilias.uni-koeln.de/ilias    | local        | uk            | | ||||
| | Uni Konstanz    | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ | | ||||
| | Uni Stuttgart   | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart | | ||||
| | Uni Tübingen    | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth   |               | | ||||
| | KIT ILIAS Pilot | https://pilot.ilias.studium.kit.edu     | shibboleth   | pilot         | | ||||
|  | ||||
| If your university isn't listed, try navigating to your instance's login page. | ||||
| Assuming no custom login service is used, the URL will look something like this: | ||||
|  | ||||
| ```jinja | ||||
| {{ base_url }}/login.php?client_id={{ client_id }}&cmd=force_login&lang= | ||||
| ``` | ||||
|  | ||||
| If the values work, feel free to submit a PR and add them to the table above. | ||||
|  | ||||
| - `base_url`: The URL where the ILIAS instance is located. (Required) | ||||
| - `login_type`: How you authenticate. (Required) | ||||
|     - `local`: Use `client_id` for authentication. | ||||
|     - `shibboleth`: Use shibboleth for authentication. | ||||
| - `client_id`: An ID used for authentication if `login_type` is `local`. Is | ||||
|   ignored if `login_type` is `shibboleth`. | ||||
| - `target`: The ILIAS element to crawl. (Required) | ||||
|     - `desktop`: Crawl your personal desktop / dashboard | ||||
|     - `<course id>`: Crawl the course with the given id | ||||
|     - `<url>`: Crawl a given element by URL (preferably the permanent URL linked | ||||
|       at the bottom of its ILIAS page).   | ||||
|       This also supports the "My Courses" overview page to download *all* | ||||
|       courses. Note that this might produce confusing local directory layouts | ||||
|       and duplication warnings if you are a member of an ILIAS group. The | ||||
|       `desktop` target is generally preferable. | ||||
| - `auth`: Name of auth section to use for login. (Required) | ||||
| - `tfa_auth`: Name of auth section to use for two-factor authentication. Only | ||||
|   uses the auth section's password. (Default: Anonymous `tfa` authenticator) | ||||
| - `links`: How to represent external links. (Default: `fancy`) | ||||
|     - `ignore`: Don't download links. | ||||
|     - `plaintext`: A text file containing only the URL. | ||||
|     - `fancy`: A HTML file looking like the ILIAS link element. | ||||
|     - `internet-shortcut`: An internet shortcut file (`.url` file). | ||||
| - `link_redirect_delay`: Time (in seconds) until `fancy` link files will | ||||
|   redirect to the actual URL. Set to a negative value to disable the automatic | ||||
|   redirect. (Default: `-1`) | ||||
| - `videos`: Whether to download videos. (Default: `no`) | ||||
| - `forums`: Whether to download forum threads. (Default: `no`) | ||||
| - `http_timeout`: The timeout (in seconds) for all HTTP requests. (Default: | ||||
|   `20.0`) | ||||
|  | ||||
| ### The `kit-ilias-web` crawler | ||||
|  | ||||
| This crawler crawls the KIT ILIAS instance. | ||||
| @@ -290,10 +232,10 @@ is stored in the keyring. | ||||
|  | ||||
| ### The `pass` authenticator | ||||
|  | ||||
| This authenticator queries the [`pass` password manager][pass] for a username | ||||
| and password. It tries to be mostly compatible with [browserpass][browserpass] | ||||
| and [passff][passff], so see those links for an overview of the format. If PFERD | ||||
| fails to load your password, you can use the `--explain` flag to see why. | ||||
| This authenticator queries the [`pass` password manager][3] for a username and | ||||
| password. It tries to be mostly compatible with [browserpass][4] and | ||||
| [passff][5], so see those links for an overview of the format. If PFERD fails | ||||
| to load your password, you can use the `--explain` flag to see why. | ||||
|  | ||||
| - `passname`: The name of the password to use (Required) | ||||
| - `username_prefixes`: A comma-separated list of username line prefixes | ||||
| @@ -301,9 +243,9 @@ fails to load your password, you can use the `--explain` flag to see why. | ||||
| - `password_prefixes`: A comma-separated list of password line prefixes | ||||
|   (Default: `password,pass,secret`) | ||||
|  | ||||
| [pass]: <https://www.passwordstore.org/> "Pass: The Standard Unix Password Manager" | ||||
| [browserpass]: <https://github.com/browserpass/browserpass-extension#organizing-password-store> "Organizing password store" | ||||
| [passff]: <https://github.com/passff/passff#multi-line-format> "Multi-line format" | ||||
| [3]: <https://www.passwordstore.org/> "Pass: The Standard Unix Password Manager" | ||||
| [4]: <https://github.com/browserpass/browserpass-extension#organizing-password-store> "Organizing password store" | ||||
| [5]: <https://github.com/passff/passff#multi-line-format> "Multi-line format" | ||||
|  | ||||
| ### The `tfa` authenticator | ||||
|  | ||||
| @@ -402,8 +344,7 @@ matches `SOURCE`, the output path is created using `TARGET` as template. | ||||
| be referred to as `{g<n>}` (e.g. `{g3}`). `{g0}` refers to the original path. | ||||
| If capturing group *n*'s contents are a valid integer, the integer value is | ||||
| available as `{i<n>}` (e.g. `{i3}`). If capturing group *n*'s contents are a | ||||
| valid float, the float value is available as `{f<n>}` (e.g. `{f3}`). Named capture | ||||
| groups (e.g. `(?P<name>)`) are available by their name (e.g. `{name}`). If a | ||||
| valid float, the float value is available as `{f<n>}` (e.g. `{f3}`). If a | ||||
| capturing group is not present (e.g. when matching the string `cd` with the | ||||
| regex `(ab)?cd`), the corresponding variables are not defined. | ||||
|  | ||||
|   | ||||
							
								
								
									
										4
									
								
								LICENSE
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								LICENSE
									
									
									
									
									
								
							| @@ -1,6 +1,6 @@ | ||||
| Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, | ||||
| Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, | ||||
|                     TheChristophe, Scriptim, thelukasprobst, Toorero, | ||||
|                     Mr-Pine, p-fruck, PinieP | ||||
|                     Mr-Pine | ||||
|  | ||||
| Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
| this software and associated documentation files (the "Software"), to deal in | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| from typing import Optional, Tuple, cast | ||||
| from typing import Optional, Tuple | ||||
|  | ||||
| import keyring | ||||
|  | ||||
| @@ -13,7 +13,7 @@ class KeyringAuthSection(AuthSection): | ||||
|         return self.s.get("username") | ||||
|  | ||||
|     def keyring_name(self) -> str: | ||||
|         return cast(str, self.s.get("keyring_name", fallback=NAME)) | ||||
|         return self.s.get("keyring_name", fallback=NAME) | ||||
|  | ||||
|  | ||||
| class KeyringAuthenticator(Authenticator): | ||||
|   | ||||
| @@ -8,7 +8,6 @@ | ||||
| # well. | ||||
|  | ||||
| from . import command_local  # noqa: F401 imported but unused | ||||
| from . import command_ilias_web  # noqa: F401 imported but unused | ||||
| from . import command_kit_ilias_web  # noqa: F401 imported but unused | ||||
| from . import command_kit_ipd  # noqa: F401 imported but unused | ||||
| from .parser import PARSER, ParserLoadError, load_default_section  # noqa: F401 imported but unused | ||||
|   | ||||
| @@ -1,56 +0,0 @@ | ||||
| import argparse | ||||
| import configparser | ||||
|  | ||||
| from ..logging import log | ||||
| from .common_ilias_args import configure_common_group_args, load_common | ||||
| from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler | ||||
|  | ||||
| COMMAND_NAME = "ilias-web" | ||||
|  | ||||
| SUBPARSER = SUBPARSERS.add_parser( | ||||
|     COMMAND_NAME, | ||||
|     parents=[CRAWLER_PARSER], | ||||
| ) | ||||
|  | ||||
| GROUP = SUBPARSER.add_argument_group( | ||||
|     title=f"{COMMAND_NAME} crawler arguments", | ||||
|     description=f"arguments for the '{COMMAND_NAME}' crawler", | ||||
| ) | ||||
|  | ||||
| GROUP.add_argument( | ||||
|     "--base-url", | ||||
|     type=str, | ||||
|     metavar="BASE_URL", | ||||
|     help="The base url of the ilias instance" | ||||
| ) | ||||
|  | ||||
| GROUP.add_argument( | ||||
|     "--client-id", | ||||
|     type=str, | ||||
|     metavar="CLIENT_ID", | ||||
|     help="The client id of the ilias instance" | ||||
| ) | ||||
|  | ||||
| configure_common_group_args(GROUP) | ||||
|  | ||||
|  | ||||
| def load( | ||||
|         args: argparse.Namespace, | ||||
|         parser: configparser.ConfigParser, | ||||
| ) -> None: | ||||
|     log.explain(f"Creating config for command '{COMMAND_NAME}'") | ||||
|  | ||||
|     parser["crawl:ilias"] = {} | ||||
|     section = parser["crawl:ilias"] | ||||
|     load_crawler(args, section) | ||||
|  | ||||
|     section["type"] = COMMAND_NAME | ||||
|     if args.base_url is not None: | ||||
|         section["base_url"] = args.base_url | ||||
|     if args.client_id is not None: | ||||
|         section["client_id"] = args.client_id | ||||
|  | ||||
|     load_common(section, args, parser) | ||||
|  | ||||
|  | ||||
| SUBPARSER.set_defaults(command=load) | ||||
| @@ -1,37 +1,120 @@ | ||||
| import argparse | ||||
| import configparser | ||||
| from pathlib import Path | ||||
|  | ||||
| from ..crawl.ilias.file_templates import Links | ||||
| from ..logging import log | ||||
| from .common_ilias_args import configure_common_group_args, load_common | ||||
| from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler | ||||
|  | ||||
| COMMAND_NAME = "kit-ilias-web" | ||||
| from .parser import (CRAWLER_PARSER, SUBPARSERS, BooleanOptionalAction, ParserLoadError, load_crawler, | ||||
|                      show_value_error) | ||||
|  | ||||
| SUBPARSER = SUBPARSERS.add_parser( | ||||
|     COMMAND_NAME, | ||||
|     "kit-ilias-web", | ||||
|     parents=[CRAWLER_PARSER], | ||||
| ) | ||||
|  | ||||
| GROUP = SUBPARSER.add_argument_group( | ||||
|     title=f"{COMMAND_NAME} crawler arguments", | ||||
|     description=f"arguments for the '{COMMAND_NAME}' crawler", | ||||
|     title="kit-ilias-web crawler arguments", | ||||
|     description="arguments for the 'kit-ilias-web' crawler", | ||||
| ) | ||||
| GROUP.add_argument( | ||||
|     "target", | ||||
|     type=str, | ||||
|     metavar="TARGET", | ||||
|     help="course id, 'desktop', or ILIAS URL to crawl" | ||||
| ) | ||||
| GROUP.add_argument( | ||||
|     "output", | ||||
|     type=Path, | ||||
|     metavar="OUTPUT", | ||||
|     help="output directory" | ||||
| ) | ||||
| GROUP.add_argument( | ||||
|     "--username", "-u", | ||||
|     type=str, | ||||
|     metavar="USERNAME", | ||||
|     help="user name for authentication" | ||||
| ) | ||||
| GROUP.add_argument( | ||||
|     "--keyring", | ||||
|     action=BooleanOptionalAction, | ||||
|     help="use the system keyring to store and retrieve passwords" | ||||
| ) | ||||
| GROUP.add_argument( | ||||
|     "--credential-file", | ||||
|     type=Path, | ||||
|     metavar="PATH", | ||||
|     help="read username and password from a credential file" | ||||
| ) | ||||
| GROUP.add_argument( | ||||
|     "--links", | ||||
|     type=show_value_error(Links.from_string), | ||||
|     metavar="OPTION", | ||||
|     help="how to represent external links" | ||||
| ) | ||||
| GROUP.add_argument( | ||||
|     "--link-redirect-delay", | ||||
|     type=int, | ||||
|     metavar="SECONDS", | ||||
|     help="time before 'fancy' links redirect to to their target (-1 to disable)" | ||||
| ) | ||||
| GROUP.add_argument( | ||||
|     "--videos", | ||||
|     action=BooleanOptionalAction, | ||||
|     help="crawl and download videos" | ||||
| ) | ||||
| GROUP.add_argument( | ||||
|     "--forums", | ||||
|     action=BooleanOptionalAction, | ||||
|     help="crawl and download forum posts" | ||||
| ) | ||||
| GROUP.add_argument( | ||||
|     "--http-timeout", "-t", | ||||
|     type=float, | ||||
|     metavar="SECONDS", | ||||
|     help="timeout for all HTTP requests" | ||||
| ) | ||||
|  | ||||
| configure_common_group_args(GROUP) | ||||
|  | ||||
|  | ||||
| def load( | ||||
|         args: argparse.Namespace, | ||||
|         parser: configparser.ConfigParser, | ||||
| ) -> None: | ||||
|     log.explain(f"Creating config for command '{COMMAND_NAME}'") | ||||
|     log.explain("Creating config for command 'kit-ilias-web'") | ||||
|  | ||||
|     parser["crawl:ilias"] = {} | ||||
|     section = parser["crawl:ilias"] | ||||
|     load_crawler(args, section) | ||||
|  | ||||
|     section["type"] = COMMAND_NAME | ||||
|     load_common(section, args, parser) | ||||
|     section["type"] = "kit-ilias-web" | ||||
|     section["target"] = str(args.target) | ||||
|     section["output_dir"] = str(args.output) | ||||
|     section["auth"] = "auth:ilias" | ||||
|     if args.links is not None: | ||||
|         section["links"] = str(args.links.value) | ||||
|     if args.link_redirect_delay is not None: | ||||
|         section["link_redirect_delay"] = str(args.link_redirect_delay) | ||||
|     if args.videos is not None: | ||||
|         section["videos"] = "yes" if args.videos else "no" | ||||
|     if args.forums is not None: | ||||
|         section["forums"] = "yes" if args.forums else "no" | ||||
|     if args.http_timeout is not None: | ||||
|         section["http_timeout"] = str(args.http_timeout) | ||||
|  | ||||
|     parser["auth:ilias"] = {} | ||||
|     auth_section = parser["auth:ilias"] | ||||
|     if args.credential_file is not None: | ||||
|         if args.username is not None: | ||||
|             raise ParserLoadError("--credential-file and --username can't be used together") | ||||
|         if args.keyring: | ||||
|             raise ParserLoadError("--credential-file and --keyring can't be used together") | ||||
|         auth_section["type"] = "credential-file" | ||||
|         auth_section["path"] = str(args.credential_file) | ||||
|     elif args.keyring: | ||||
|         auth_section["type"] = "keyring" | ||||
|     else: | ||||
|         auth_section["type"] = "simple" | ||||
|     if args.username is not None: | ||||
|         auth_section["username"] = args.username | ||||
|  | ||||
|  | ||||
| SUBPARSER.set_defaults(command=load) | ||||
|   | ||||
| @@ -1,104 +0,0 @@ | ||||
| import argparse | ||||
| import configparser | ||||
| from pathlib import Path | ||||
|  | ||||
| from ..crawl.ilias.file_templates import Links | ||||
| from .parser import BooleanOptionalAction, ParserLoadError, show_value_error | ||||
|  | ||||
|  | ||||
| def configure_common_group_args(group: argparse._ArgumentGroup) -> None: | ||||
|     """These arguments are shared between the KIT and generic Ilias web command.""" | ||||
|     group.add_argument( | ||||
|         "target", | ||||
|         type=str, | ||||
|         metavar="TARGET", | ||||
|         help="course id, 'desktop', or ILIAS URL to crawl" | ||||
|     ) | ||||
|     group.add_argument( | ||||
|         "output", | ||||
|         type=Path, | ||||
|         metavar="OUTPUT", | ||||
|         help="output directory" | ||||
|     ) | ||||
|     group.add_argument( | ||||
|         "--username", "-u", | ||||
|         type=str, | ||||
|         metavar="USERNAME", | ||||
|         help="user name for authentication" | ||||
|     ) | ||||
|     group.add_argument( | ||||
|         "--keyring", | ||||
|         action=BooleanOptionalAction, | ||||
|         help="use the system keyring to store and retrieve passwords" | ||||
|     ) | ||||
|     group.add_argument( | ||||
|         "--credential-file", | ||||
|         type=Path, | ||||
|         metavar="PATH", | ||||
|         help="read username and password from a credential file" | ||||
|     ) | ||||
|     group.add_argument( | ||||
|         "--links", | ||||
|         type=show_value_error(Links.from_string), | ||||
|         metavar="OPTION", | ||||
|         help="how to represent external links" | ||||
|     ) | ||||
|     group.add_argument( | ||||
|         "--link-redirect-delay", | ||||
|         type=int, | ||||
|         metavar="SECONDS", | ||||
|         help="time before 'fancy' links redirect to to their target (-1 to disable)" | ||||
|     ) | ||||
|     group.add_argument( | ||||
|         "--videos", | ||||
|         action=BooleanOptionalAction, | ||||
|         help="crawl and download videos" | ||||
|     ) | ||||
|     group.add_argument( | ||||
|         "--forums", | ||||
|         action=BooleanOptionalAction, | ||||
|         help="crawl and download forum posts" | ||||
|     ) | ||||
|     group.add_argument( | ||||
|         "--http-timeout", "-t", | ||||
|         type=float, | ||||
|         metavar="SECONDS", | ||||
|         help="timeout for all HTTP requests" | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def load_common( | ||||
|     section: configparser.SectionProxy, | ||||
|     args: argparse.Namespace, | ||||
|     parser: configparser.ConfigParser, | ||||
| ) -> None: | ||||
|     """Load common config between generic and KIT ilias web command""" | ||||
|     section["target"] = str(args.target) | ||||
|     section["output_dir"] = str(args.output) | ||||
|     section["auth"] = "auth:ilias" | ||||
|     if args.links is not None: | ||||
|         section["links"] = str(args.links.value) | ||||
|     if args.link_redirect_delay is not None: | ||||
|         section["link_redirect_delay"] = str(args.link_redirect_delay) | ||||
|     if args.videos is not None: | ||||
|         section["videos"] = "yes" if args.videos else "no" | ||||
|     if args.forums is not None: | ||||
|         section["forums"] = "yes" if args.forums else "no" | ||||
|     if args.http_timeout is not None: | ||||
|         section["http_timeout"] = str(args.http_timeout) | ||||
|  | ||||
|     parser["auth:ilias"] = {} | ||||
|     auth_section = parser["auth:ilias"] | ||||
|     if args.credential_file is not None: | ||||
|         if args.username is not None: | ||||
|             raise ParserLoadError("--credential-file and --username can't be used together") | ||||
|         if args.keyring: | ||||
|             raise ParserLoadError("--credential-file and --keyring can't be used together") | ||||
|         auth_section["type"] = "credential-file" | ||||
|         auth_section["path"] = str(args.credential_file) | ||||
|     elif args.keyring: | ||||
|         auth_section["type"] = "keyring" | ||||
|     else: | ||||
|         auth_section["type"] = "simple" | ||||
|     if args.username is not None: | ||||
|         auth_section["username"] = args.username | ||||
| @@ -4,7 +4,7 @@ from typing import Callable, Dict | ||||
| from ..auth import Authenticator | ||||
| from ..config import Config | ||||
| from .crawler import Crawler, CrawlError, CrawlerSection  # noqa: F401 | ||||
| from .ilias import IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler, KitIliasWebCrawlerSection | ||||
| from .ilias import KitIliasWebCrawler, KitIliasWebCrawlerSection | ||||
| from .kit_ipd_crawler import KitIpdCrawler, KitIpdCrawlerSection | ||||
| from .local_crawler import LocalCrawler, LocalCrawlerSection | ||||
|  | ||||
| @@ -18,8 +18,6 @@ CrawlerConstructor = Callable[[ | ||||
| CRAWLERS: Dict[str, CrawlerConstructor] = { | ||||
|     "local": lambda n, s, c, a: | ||||
|         LocalCrawler(n, LocalCrawlerSection(s), c), | ||||
|     "ilias-web": lambda n, s, c, a: | ||||
|         IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a), | ||||
|     "kit-ilias-web": lambda n, s, c, a: | ||||
|         KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a), | ||||
|     "kit-ipd": lambda n, s, c, a: | ||||
|   | ||||
| @@ -149,7 +149,9 @@ class CrawlerSection(Section): | ||||
|         return self.s.getboolean("skip", fallback=False) | ||||
|  | ||||
|     def output_dir(self, name: str) -> Path: | ||||
|         name = name.removeprefix("crawl:") | ||||
|         # TODO Use removeprefix() after switching to 3.9 | ||||
|         if name.startswith("crawl:"): | ||||
|             name = name[len("crawl:"):] | ||||
|         return Path(self.s.get("output_dir", name)).expanduser() | ||||
|  | ||||
|     def redownload(self) -> Redownload: | ||||
| @@ -256,10 +258,6 @@ class Crawler(ABC): | ||||
|     def prev_report(self) -> Optional[Report]: | ||||
|         return self._output_dir.prev_report | ||||
|  | ||||
|     @property | ||||
|     def output_dir(self) -> OutputDirectory: | ||||
|         return self._output_dir | ||||
|  | ||||
|     @staticmethod | ||||
|     async def gather(awaitables: Sequence[Awaitable[Any]]) -> List[Any]: | ||||
|         """ | ||||
| @@ -292,40 +290,9 @@ class Crawler(ABC): | ||||
|         log.explain("Answer: Yes") | ||||
|         return CrawlToken(self._limiter, path) | ||||
|  | ||||
|     def should_try_download( | ||||
|             self, | ||||
|             path: PurePath, | ||||
|             *, | ||||
|             etag_differs: Optional[bool] = None, | ||||
|             mtime: Optional[datetime] = None, | ||||
|             redownload: Optional[Redownload] = None, | ||||
|             on_conflict: Optional[OnConflict] = None, | ||||
|     ) -> bool: | ||||
|         log.explain_topic(f"Decision: Should Download {fmt_path(path)}") | ||||
|  | ||||
|         if self._transformer.transform(path) is None: | ||||
|             log.explain("Answer: No (ignored)") | ||||
|             return False | ||||
|  | ||||
|         should_download = self._output_dir.should_try_download( | ||||
|             path, | ||||
|             etag_differs=etag_differs, | ||||
|             mtime=mtime, | ||||
|             redownload=redownload, | ||||
|             on_conflict=on_conflict | ||||
|         ) | ||||
|         if should_download: | ||||
|             log.explain("Answer: Yes") | ||||
|             return True | ||||
|         else: | ||||
|             log.explain("Answer: No") | ||||
|             return False | ||||
|  | ||||
|     async def download( | ||||
|             self, | ||||
|             path: PurePath, | ||||
|             *, | ||||
|             etag_differs: Optional[bool] = None, | ||||
|             mtime: Optional[datetime] = None, | ||||
|             redownload: Optional[Redownload] = None, | ||||
|             on_conflict: Optional[OnConflict] = None, | ||||
| @@ -340,14 +307,7 @@ class Crawler(ABC): | ||||
|             log.status("[bold bright_black]", "Ignored", fmt_path(path)) | ||||
|             return None | ||||
|  | ||||
|         fs_token = await self._output_dir.download( | ||||
|             path, | ||||
|             transformed_path, | ||||
|             etag_differs=etag_differs, | ||||
|             mtime=mtime, | ||||
|             redownload=redownload, | ||||
|             on_conflict=on_conflict | ||||
|         ) | ||||
|         fs_token = await self._output_dir.download(path, transformed_path, mtime, redownload, on_conflict) | ||||
|         if fs_token is None: | ||||
|             log.explain("Answer: No") | ||||
|             return None | ||||
|   | ||||
| @@ -1,14 +1,12 @@ | ||||
| import asyncio | ||||
| import http.cookies | ||||
| import ssl | ||||
| from datetime import datetime | ||||
| from pathlib import Path, PurePath | ||||
| from typing import Any, Dict, List, Optional, Tuple, cast | ||||
| from typing import Any, Dict, List, Optional | ||||
|  | ||||
| import aiohttp | ||||
| import certifi | ||||
| from aiohttp.client import ClientTimeout | ||||
| from bs4 import Tag | ||||
|  | ||||
| from ..auth import Authenticator | ||||
| from ..config import Config | ||||
| @@ -17,12 +15,10 @@ from ..utils import fmt_real_path | ||||
| from ..version import NAME, VERSION | ||||
| from .crawler import Crawler, CrawlerSection | ||||
|  | ||||
| ETAGS_CUSTOM_REPORT_VALUE_KEY = "etags" | ||||
|  | ||||
|  | ||||
| class HttpCrawlerSection(CrawlerSection): | ||||
|     def http_timeout(self) -> float: | ||||
|         return self.s.getfloat("http_timeout", fallback=30) | ||||
|         return self.s.getfloat("http_timeout", fallback=20) | ||||
|  | ||||
|  | ||||
| class HttpCrawler(Crawler): | ||||
| @@ -173,79 +169,6 @@ class HttpCrawler(Crawler): | ||||
|             log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}") | ||||
|             log.warn(str(e)) | ||||
|  | ||||
|     @staticmethod | ||||
|     def get_folder_structure_from_heading_hierarchy(file_link: Tag, drop_h1: bool = False) -> PurePath: | ||||
|         """ | ||||
|         Retrieves the hierarchy of headings associated with the give file link and constructs a folder | ||||
|         structure from them. | ||||
|  | ||||
|         <h1> level headings usually only appear once and serve as the page title, so they would introduce | ||||
|         redundant nesting. To avoid this, <h1> headings are ignored via the drop_h1 parameter. | ||||
|         """ | ||||
|  | ||||
|         def find_associated_headings(tag: Tag, level: int) -> PurePath: | ||||
|             if level == 0 or (level == 1 and drop_h1): | ||||
|                 return PurePath() | ||||
|  | ||||
|             level_heading = cast(Optional[Tag], tag.find_previous(name=f"h{level}")) | ||||
|  | ||||
|             if level_heading is None: | ||||
|                 return find_associated_headings(tag, level - 1) | ||||
|  | ||||
|             folder_name = level_heading.get_text().strip() | ||||
|             return find_associated_headings(level_heading, level - 1) / folder_name | ||||
|  | ||||
|         # start at level <h3> because paragraph-level headings are usually too granular for folder names | ||||
|         return find_associated_headings(file_link, 3) | ||||
|  | ||||
|     def _get_previous_etag_from_report(self, path: PurePath) -> Optional[str]: | ||||
|         """ | ||||
|         If available, retrieves the entity tag for a given path which was stored in the previous report. | ||||
|         """ | ||||
|         if not self._output_dir.prev_report: | ||||
|             return None | ||||
|  | ||||
|         etags = self._output_dir.prev_report.get_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY) or {} | ||||
|         return etags.get(str(path)) | ||||
|  | ||||
|     def _add_etag_to_report(self, path: PurePath, etag: Optional[str]) -> None: | ||||
|         """ | ||||
|         Adds an entity tag for a given path to the report's custom values. | ||||
|         """ | ||||
|         if not etag: | ||||
|             return | ||||
|  | ||||
|         etags = self._output_dir.report.get_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY) or {} | ||||
|         etags[str(path)] = etag | ||||
|         self._output_dir.report.add_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY, etags) | ||||
|  | ||||
|     async def _request_resource_version(self, resource_url: str) -> Tuple[Optional[str], Optional[datetime]]: | ||||
|         """ | ||||
|         Requests the ETag and Last-Modified headers of a resource via a HEAD request. | ||||
|         If no entity tag / modification date can be obtained, the according value will be None. | ||||
|         """ | ||||
|         try: | ||||
|             async with self.session.head(resource_url) as resp: | ||||
|                 if resp.status != 200: | ||||
|                     return None, None | ||||
|  | ||||
|                 etag_header = resp.headers.get("ETag") | ||||
|                 last_modified_header = resp.headers.get("Last-Modified") | ||||
|                 last_modified = None | ||||
|  | ||||
|                 if last_modified_header: | ||||
|                     try: | ||||
|                         # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified#directives | ||||
|                         datetime_format = "%a, %d %b %Y %H:%M:%S GMT" | ||||
|                         last_modified = datetime.strptime(last_modified_header, datetime_format) | ||||
|                     except ValueError: | ||||
|                         # last_modified remains None | ||||
|                         pass | ||||
|  | ||||
|                 return etag_header, last_modified | ||||
|         except aiohttp.ClientError: | ||||
|             return None, None | ||||
|  | ||||
|     async def run(self) -> None: | ||||
|         self._request_count = 0 | ||||
|         self._cookie_jar = aiohttp.CookieJar() | ||||
| @@ -263,12 +186,7 @@ class HttpCrawler(Crawler): | ||||
|                     connect=self._http_timeout, | ||||
|                     sock_connect=self._http_timeout, | ||||
|                     sock_read=self._http_timeout, | ||||
|                 ), | ||||
|                 # See https://github.com/aio-libs/aiohttp/issues/6626 | ||||
|                 # Without this aiohttp will mangle the redirect header from Shibboleth, invalidating the | ||||
|                 # passed signature. Shibboleth will not accept the broken signature and authentication will | ||||
|                 # fail. | ||||
|                 requote_redirect_url=False | ||||
|                 ) | ||||
|         ) as session: | ||||
|             self.session = session | ||||
|             try: | ||||
|   | ||||
| @@ -1,9 +1,3 @@ | ||||
| from .kit_ilias_web_crawler import (IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler, | ||||
|                                     KitIliasWebCrawlerSection) | ||||
| from .kit_ilias_web_crawler import KitIliasWebCrawler, KitIliasWebCrawlerSection | ||||
|  | ||||
| __all__ = [ | ||||
|     "IliasWebCrawler", | ||||
|     "IliasWebCrawlerSection", | ||||
|     "KitIliasWebCrawler", | ||||
|     "KitIliasWebCrawlerSection", | ||||
| ] | ||||
| __all__ = ["KitIliasWebCrawler", "KitIliasWebCrawlerSection"] | ||||
|   | ||||
| @@ -1,40 +0,0 @@ | ||||
| import asyncio | ||||
| from typing import Any, Callable, Optional | ||||
|  | ||||
| import aiohttp | ||||
|  | ||||
| from ...logging import log | ||||
| from ..crawler import AWrapped, CrawlError, CrawlWarning | ||||
|  | ||||
|  | ||||
| def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]: | ||||
|     def decorator(f: AWrapped) -> AWrapped: | ||||
|         async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]: | ||||
|             last_exception: Optional[BaseException] = None | ||||
|             for round in range(attempts): | ||||
|                 try: | ||||
|                     return await f(*args, **kwargs) | ||||
|                 except aiohttp.ContentTypeError:  # invalid content type | ||||
|                     raise CrawlWarning("ILIAS returned an invalid content type") | ||||
|                 except aiohttp.TooManyRedirects: | ||||
|                     raise CrawlWarning("Got stuck in a redirect loop") | ||||
|                 except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes | ||||
|                     last_exception = e | ||||
|                 except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc. | ||||
|                     last_exception = e | ||||
|                 except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler | ||||
|                     last_exception = e | ||||
|                 log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}") | ||||
|                 log.explain(f"Last exception: {last_exception!r}") | ||||
|  | ||||
|             if last_exception: | ||||
|                 message = f"Error in I/O Operation: {last_exception!r}" | ||||
|                 if failure_is_error: | ||||
|                     raise CrawlError(message) from last_exception | ||||
|                 else: | ||||
|                     raise CrawlWarning(message) from last_exception | ||||
|             raise CrawlError("Impossible return in ilias _iorepeat") | ||||
|  | ||||
|         return wrapper  # type: ignore | ||||
|  | ||||
|     return decorator | ||||
| @@ -1,7 +1,5 @@ | ||||
| import dataclasses | ||||
| import re | ||||
| from enum import Enum | ||||
| from typing import Optional, cast | ||||
| from typing import Optional | ||||
|  | ||||
| import bs4 | ||||
|  | ||||
| @@ -14,9 +12,7 @@ _link_template_fancy = """ | ||||
|     <head> | ||||
|         <meta charset="UTF-8"> | ||||
|         <title>ILIAS - Link: {{name}}</title> | ||||
|         <!-- REPEAT REMOVE START --> | ||||
|         <meta http-equiv = "refresh" content = "{{redirect_delay}}; url = {{link}}" /> | ||||
|         <!-- REPEAT REMOVE END --> | ||||
|     </head> | ||||
|  | ||||
|     <style> | ||||
| @@ -27,8 +23,6 @@ _link_template_fancy = """ | ||||
|         display: flex; | ||||
|         align-items: center; | ||||
|         justify-content: center; | ||||
|         flex-direction: column; | ||||
|         gap: 4px; | ||||
|     } | ||||
|     body { | ||||
|         padding: 0; | ||||
| @@ -37,15 +31,10 @@ _link_template_fancy = """ | ||||
|         font-family: "Open Sans", Verdana, Arial, Helvetica, sans-serif; | ||||
|         height: 100vh; | ||||
|     } | ||||
|     .column { | ||||
|         min-width: 500px; | ||||
|         max-width: 90vw; | ||||
|         display: flex; | ||||
|         flex-direction: column; | ||||
|         row-gap: 5px; | ||||
|     } | ||||
|     .row { | ||||
|         background-color: white; | ||||
|         min-width: 500px; | ||||
|         max-width: 90vw; | ||||
|         display: flex; | ||||
|         padding: 1em; | ||||
|     } | ||||
| @@ -86,23 +75,19 @@ _link_template_fancy = """ | ||||
|     } | ||||
|     </style> | ||||
|     <body class="center-flex"> | ||||
|         <div class="column"> | ||||
|         <!-- REPEAT START --> | ||||
|             <div class="row"> | ||||
|                 <div class="logo center-flex"> | ||||
|                     <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"> | ||||
|                         <path d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm9.567 9.098c-.059-.058-.127-.108-.206-.138-.258-.101-1.35.603-1.515.256-.108-.231-.327.148-.578.008-.121-.067-.459-.52-.611-.465-.312.112.479.974.694 1.087.203-.154.86-.469 1.002-.039.271.812-.745 1.702-1.264 2.171-.775.702-.63-.454-1.159-.86-.277-.213-.274-.667-.555-.824-.125-.071-.7-.732-.694-.821l-.017.167c-.095.072-.297-.27-.319-.325 0 .298.485.772.646 1.011.273.409.42 1.005.756 1.339.179.18.866.923 1.045.908l.921-.437c.649.154-1.531 3.237-1.738 3.619-.171.321.139 1.112.114 1.49-.029.437-.374.579-.7.817-.35.255-.268.752-.562.934-.521.321-.897 1.366-1.639 1.361-.219-.001-1.151.364-1.273.007-.095-.258-.223-.455-.356-.71-.131-.25-.015-.51-.175-.731-.11-.154-.479-.502-.513-.684-.002-.157.118-.632.283-.715.231-.118.044-.462.016-.663-.048-.357-.27-.652-.535-.859-.393-.302-.189-.542-.098-.974 0-.206-.126-.476-.402-.396-.57.166-.396-.445-.812-.417-.299.021-.543.211-.821.295-.349.104-.707-.083-1.053-.126-1.421-.179-1.885-1.804-1.514-2.976.037-.192-.115-.547-.048-.696.159-.352.485-.752.768-1.021.16-.152.365-.113.553-.231.29-.182.294-.558.578-.789.404-.328.956-.321 1.482-.392.281-.037 1.35-.268 1.518-.06 0 .039.193.611-.019.578.438.023 1.061.756 1.476.585.213-.089.135-.744.573-.427.265.19 1.45.275 1.696.07.152-.125.236-.939.053-1.031.117.116-.618.125-.686.099-.122-.044-.235.115-.43.025.117.055-.651-.358-.22-.674-.181.132-.349-.037-.544.109-.135.109.062.181-.13.277-.305.155-.535-.53-.649-.607-.118-.077-1.024-.713-.777-.298l.797.793c-.04.026-.209-.289-.209-.059.053-.136.02.585-.105.35-.056-.09.091-.14.006-.271 0-.085-.23-.169-.275-.228-.126-.157-.462-.502-.644-.585-.05-.024-.771.088-.832.111-.071.099-.131.203-.181.314-.149.055-.29.127-.423.216l-.159.356c-.068.061-.772.294-.776.303.03-.076-.492-.172-.457-.324.038-.167.215-.687.169-.877-.048-.199 1.085.287 1.158-.238.029-.227.047-.492-.316-.531.069.008.702-.249.807-.364.148-.169.486-.447.731-.447.286 0 .225-.417.356-.622.133.053-.071.38.088.512-.01-.104.45.057.494.033.105-.056.691-.023.601-.299-.101-.28.052-.197.183-.255-.02.008.248-.458.363-.456-.104-.089-.398.112-.516.103-.308-.024-.177-.525-.061-.672.09-.116-.246-.258-.25-.036-.006.332-.314.633-.243 1.075.109.666-.743-.161-.816-.115-.283.172-.515-.216-.368-.449.149-.238.51-.226.659-.48.104-.179.227-.389.388-.524.541-.454.689-.091 1.229-.042.526.048.178.125.105.327-.07.192.289.261.413.1.071-.092.232-.326.301-.499.07-.175.578-.2.527-.365 2.72 1.148 4.827 3.465 5.694 6.318zm-11.113-3.779l.068-.087.073-.019c.042-.034.086-.118.151-.104.043.009.146.095.111.148-.037.054-.066-.049-.081.101-.018.169-.188.167-.313.222-.087.037-.175-.018-.09-.104l.088-.108-.007-.049zm.442.245c.046-.045.138-.008.151-.094.014-.084.078-.178-.008-.335-.022-.042.116-.082.051-.137l-.109.032s.155-.668.364-.366l-.089.103c.135.134.172.47.215.687.127.066.324.078.098.192.117-.02-.618.314-.715.178-.072-.083.317-.139.307-.173-.004-.011-.317-.02-.265-.087zm1.43-3.547l-.356.326c-.36.298-1.28.883-1.793.705-.524-.18-1.647.667-1.826.673-.067.003.002-.641.36-.689-.141.021.993-.575 1.185-.805.678-.146 1.381-.227 2.104-.227l.326.017zm-5.086 1.19c.07.082.278.092-.026.288-.183.11-.377.809-.548.809-.51.223-.542-.439-1.109.413-.078.115-.395.158-.644.236.685-.688 1.468-1.279 2.327-1.746zm-5.24 8.793c0-.541.055-1.068.139-1.586l.292.185c.113.135.113.719.169.911.139.482.484.751.748 1.19.155.261.414.923.332 1.197.109-.179 1.081.824 1.259 1.033.418.492.74 1.088.061 1.574-.219.158.334 1.14.049 1.382l-.365.094c-.225.138-.235.397-.166.631-1.562-1.765-2.518-4.076-2.518-6.611zm14.347-5.823c.083-.01-.107.167-.107.167.033.256.222.396.581.527.437.157.038.455-.213.385-.139-.039-.854-.255-.879.025 0 .167-.679.001-.573-.175.073-.119.05-.387.186-.562.193-.255.38-.116.386.032-.001.394.398-.373.619-.399z"/> | ||||
|                     </svg> | ||||
|                 </div> | ||||
|                 <div class="tile"> | ||||
|                     <div class="top-row"> | ||||
|                         <a href="{{link}}">{{name}}</a> | ||||
|                     </div> | ||||
|                     <div class="bottom-row">{{description}}</div> | ||||
|                 </div> | ||||
|                 <div class="menu-button center-flex"> ⯆ </div> | ||||
|         <div class="row"> | ||||
|             <div class="logo center-flex"> | ||||
|                 <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"> | ||||
|                     <path d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm9.567 9.098c-.059-.058-.127-.108-.206-.138-.258-.101-1.35.603-1.515.256-.108-.231-.327.148-.578.008-.121-.067-.459-.52-.611-.465-.312.112.479.974.694 1.087.203-.154.86-.469 1.002-.039.271.812-.745 1.702-1.264 2.171-.775.702-.63-.454-1.159-.86-.277-.213-.274-.667-.555-.824-.125-.071-.7-.732-.694-.821l-.017.167c-.095.072-.297-.27-.319-.325 0 .298.485.772.646 1.011.273.409.42 1.005.756 1.339.179.18.866.923 1.045.908l.921-.437c.649.154-1.531 3.237-1.738 3.619-.171.321.139 1.112.114 1.49-.029.437-.374.579-.7.817-.35.255-.268.752-.562.934-.521.321-.897 1.366-1.639 1.361-.219-.001-1.151.364-1.273.007-.095-.258-.223-.455-.356-.71-.131-.25-.015-.51-.175-.731-.11-.154-.479-.502-.513-.684-.002-.157.118-.632.283-.715.231-.118.044-.462.016-.663-.048-.357-.27-.652-.535-.859-.393-.302-.189-.542-.098-.974 0-.206-.126-.476-.402-.396-.57.166-.396-.445-.812-.417-.299.021-.543.211-.821.295-.349.104-.707-.083-1.053-.126-1.421-.179-1.885-1.804-1.514-2.976.037-.192-.115-.547-.048-.696.159-.352.485-.752.768-1.021.16-.152.365-.113.553-.231.29-.182.294-.558.578-.789.404-.328.956-.321 1.482-.392.281-.037 1.35-.268 1.518-.06 0 .039.193.611-.019.578.438.023 1.061.756 1.476.585.213-.089.135-.744.573-.427.265.19 1.45.275 1.696.07.152-.125.236-.939.053-1.031.117.116-.618.125-.686.099-.122-.044-.235.115-.43.025.117.055-.651-.358-.22-.674-.181.132-.349-.037-.544.109-.135.109.062.181-.13.277-.305.155-.535-.53-.649-.607-.118-.077-1.024-.713-.777-.298l.797.793c-.04.026-.209-.289-.209-.059.053-.136.02.585-.105.35-.056-.09.091-.14.006-.271 0-.085-.23-.169-.275-.228-.126-.157-.462-.502-.644-.585-.05-.024-.771.088-.832.111-.071.099-.131.203-.181.314-.149.055-.29.127-.423.216l-.159.356c-.068.061-.772.294-.776.303.03-.076-.492-.172-.457-.324.038-.167.215-.687.169-.877-.048-.199 1.085.287 1.158-.238.029-.227.047-.492-.316-.531.069.008.702-.249.807-.364.148-.169.486-.447.731-.447.286 0 .225-.417.356-.622.133.053-.071.38.088.512-.01-.104.45.057.494.033.105-.056.691-.023.601-.299-.101-.28.052-.197.183-.255-.02.008.248-.458.363-.456-.104-.089-.398.112-.516.103-.308-.024-.177-.525-.061-.672.09-.116-.246-.258-.25-.036-.006.332-.314.633-.243 1.075.109.666-.743-.161-.816-.115-.283.172-.515-.216-.368-.449.149-.238.51-.226.659-.48.104-.179.227-.389.388-.524.541-.454.689-.091 1.229-.042.526.048.178.125.105.327-.07.192.289.261.413.1.071-.092.232-.326.301-.499.07-.175.578-.2.527-.365 2.72 1.148 4.827 3.465 5.694 6.318zm-11.113-3.779l.068-.087.073-.019c.042-.034.086-.118.151-.104.043.009.146.095.111.148-.037.054-.066-.049-.081.101-.018.169-.188.167-.313.222-.087.037-.175-.018-.09-.104l.088-.108-.007-.049zm.442.245c.046-.045.138-.008.151-.094.014-.084.078-.178-.008-.335-.022-.042.116-.082.051-.137l-.109.032s.155-.668.364-.366l-.089.103c.135.134.172.47.215.687.127.066.324.078.098.192.117-.02-.618.314-.715.178-.072-.083.317-.139.307-.173-.004-.011-.317-.02-.265-.087zm1.43-3.547l-.356.326c-.36.298-1.28.883-1.793.705-.524-.18-1.647.667-1.826.673-.067.003.002-.641.36-.689-.141.021.993-.575 1.185-.805.678-.146 1.381-.227 2.104-.227l.326.017zm-5.086 1.19c.07.082.278.092-.026.288-.183.11-.377.809-.548.809-.51.223-.542-.439-1.109.413-.078.115-.395.158-.644.236.685-.688 1.468-1.279 2.327-1.746zm-5.24 8.793c0-.541.055-1.068.139-1.586l.292.185c.113.135.113.719.169.911.139.482.484.751.748 1.19.155.261.414.923.332 1.197.109-.179 1.081.824 1.259 1.033.418.492.74 1.088.061 1.574-.219.158.334 1.14.049 1.382l-.365.094c-.225.138-.235.397-.166.631-1.562-1.765-2.518-4.076-2.518-6.611zm14.347-5.823c.083-.01-.107.167-.107.167.033.256.222.396.581.527.437.157.038.455-.213.385-.139-.039-.854-.255-.879.025 0 .167-.679.001-.573-.175.073-.119.05-.387.186-.562.193-.255.38-.116.386.032-.001.394.398-.373.619-.399z"/> | ||||
|                 </svg> | ||||
|             </div> | ||||
|         <!-- REPEAT END --> | ||||
|             <div class="tile"> | ||||
|                 <div class="top-row"> | ||||
|                     <a href="{{link}}">{{name}}</a> | ||||
|                 </div> | ||||
|                 <div class="bottom-row">{{description}}</div> | ||||
|             </div> | ||||
|             <div class="menu-button center-flex"> ⯆ </div> | ||||
|         </div> | ||||
|     </body> | ||||
| </html> | ||||
| @@ -141,88 +126,6 @@ _learning_module_template = """ | ||||
| </html> | ||||
| """ | ||||
|  | ||||
| _forum_thread_template = """ | ||||
| <!DOCTYPE html> | ||||
| <html lang="en"> | ||||
|     <head> | ||||
|         <meta charset="UTF-8"> | ||||
|         <title>ILIAS - Forum: {{name}}</title> | ||||
|         <style> | ||||
|             * { | ||||
|                 box-sizing: border-box; | ||||
|             } | ||||
|             body { | ||||
|                 font-family: 'Open Sans', Verdana, Arial, Helvetica, sans-serif; | ||||
|                 padding: 8px; | ||||
|             } | ||||
|             ul, ol, p { | ||||
|                 margin: 1.2em 0; | ||||
|             } | ||||
|             p { | ||||
|                 margin-top: 8px; | ||||
|                 margin-bottom: 8px; | ||||
|             } | ||||
|             a { | ||||
|                 color: #00876c; | ||||
|                 text-decoration: none; | ||||
|                 cursor: pointer; | ||||
|             } | ||||
|             a:hover { | ||||
|                 text-decoration: underline; | ||||
|             } | ||||
|             body > p:first-child > span:first-child { | ||||
|                 font-size: 1.6em; | ||||
|             } | ||||
|             body > p:first-child > span:first-child ~ span.default { | ||||
|                 display: inline-block; | ||||
|                 font-size: 1.2em; | ||||
|                 padding-bottom: 8px; | ||||
|             } | ||||
|             .ilFrmPostContent { | ||||
|                 margin-top: 8px; | ||||
|                 max-width: 64em; | ||||
|             } | ||||
|             .ilFrmPostContent > *:first-child { | ||||
|                 margin-top: 0px; | ||||
|             } | ||||
|             .ilFrmPostTitle { | ||||
|                 margin-top: 24px; | ||||
|                 color: #00876c; | ||||
|                 font-weight: bold; | ||||
|             } | ||||
|             #ilFrmPostList { | ||||
|                 list-style: none; | ||||
|                 padding-left: 0; | ||||
|             } | ||||
|             li.ilFrmPostRow { | ||||
|                 padding: 3px 0 3px 3px; | ||||
|                 margin-bottom: 24px; | ||||
|                 border-left: 6px solid #dddddd; | ||||
|             } | ||||
|             .ilFrmPostRow > div { | ||||
|                 display: flex; | ||||
|             } | ||||
|             .ilFrmPostImage img { | ||||
|                 margin: 0 !important; | ||||
|                 padding: 6px 9px 9px 6px; | ||||
|             } | ||||
|             .ilUserIcon { | ||||
|                 width: 115px; | ||||
|             } | ||||
|             .small { | ||||
|                 text-decoration: none; | ||||
|                 font-size: 0.75rem; | ||||
|                 color: #6f6f6f; | ||||
|             } | ||||
|         </style> | ||||
|     </head> | ||||
|     <body> | ||||
|     {{heading}} | ||||
|     {{content}} | ||||
|     </body> | ||||
| </html> | ||||
| """.strip()  # noqa: E501 line too long | ||||
|  | ||||
|  | ||||
| def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next: Optional[str]) -> str: | ||||
|     # Seems to be comments, ignore those. | ||||
| @@ -236,13 +139,13 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next | ||||
|         </div> | ||||
|     """ | ||||
|     if prev and body.select_one(".ilc_page_lnav_LeftNavigation"): | ||||
|         text = cast(bs4.Tag, body.select_one(".ilc_page_lnav_LeftNavigation")).get_text().strip() | ||||
|         text = body.select_one(".ilc_page_lnav_LeftNavigation").getText().strip() | ||||
|         left = f'<a href="{prev}">{text}</a>' | ||||
|     else: | ||||
|         left = "<span></span>" | ||||
|  | ||||
|     if next and body.select_one(".ilc_page_rnav_RightNavigation"): | ||||
|         text = cast(bs4.Tag, body.select_one(".ilc_page_rnav_RightNavigation")).get_text().strip() | ||||
|         text = body.select_one(".ilc_page_rnav_RightNavigation").getText().strip() | ||||
|         right = f'<a href="{next}">{text}</a>' | ||||
|     else: | ||||
|         right = "<span></span>" | ||||
| @@ -257,24 +160,8 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next | ||||
|             "{{left}}", left).replace("{{right}}", right).encode()) | ||||
|         ) | ||||
|  | ||||
|     body_str = cast(str, body.prettify()) | ||||
|     return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name) | ||||
|  | ||||
|  | ||||
| def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Tag) -> str: | ||||
|     if title := cast(Optional[bs4.Tag], heading.find(name="b")): | ||||
|         title.wrap(bs4.Tag(name="a", attrs={"href": url})) | ||||
|     return _forum_thread_template \ | ||||
|         .replace("{{name}}", name) \ | ||||
|         .replace("{{heading}}", cast(str, heading.prettify())) \ | ||||
|         .replace("{{content}}", cast(str, content.prettify())) | ||||
|  | ||||
|  | ||||
| @dataclasses.dataclass | ||||
| class LinkData: | ||||
|     name: str | ||||
|     url: str | ||||
|     description: str | ||||
|     body = body.prettify() | ||||
|     return _learning_module_template.replace("{{body}}", body).replace("{{name}}", name) | ||||
|  | ||||
|  | ||||
| class Links(Enum): | ||||
| @@ -294,11 +181,6 @@ class Links(Enum): | ||||
|             return None | ||||
|         raise ValueError("Missing switch case") | ||||
|  | ||||
|     def collection_as_one(self) -> bool: | ||||
|         if self == Links.FANCY: | ||||
|             return True | ||||
|         return False | ||||
|  | ||||
|     def extension(self) -> Optional[str]: | ||||
|         if self == Links.FANCY: | ||||
|             return ".html" | ||||
| @@ -310,48 +192,10 @@ class Links(Enum): | ||||
|             return None | ||||
|         raise ValueError("Missing switch case") | ||||
|  | ||||
|     def interpolate(self, redirect_delay: int, collection_name: str, links: list[LinkData]) -> str: | ||||
|         template = self.template() | ||||
|         if template is None: | ||||
|             raise ValueError("Cannot interpolate ignored links") | ||||
|  | ||||
|         if len(links) == 1: | ||||
|             link = links[0] | ||||
|             content = template | ||||
|             content = content.replace("{{link}}", link.url) | ||||
|             content = content.replace("{{name}}", link.name) | ||||
|             content = content.replace("{{description}}", link.description) | ||||
|             content = content.replace("{{redirect_delay}}", str(redirect_delay)) | ||||
|             return content | ||||
|         if self == Links.PLAINTEXT or self == Links.INTERNET_SHORTCUT: | ||||
|             return "\n".join(f"{link.url}" for link in links) | ||||
|  | ||||
|         # All others get coerced to fancy | ||||
|         content = cast(str, Links.FANCY.template()) | ||||
|         repeated_content = cast( | ||||
|             re.Match[str], | ||||
|             re.search(r"<!-- REPEAT START -->([\s\S]+)<!-- REPEAT END -->", content) | ||||
|         ).group(1) | ||||
|  | ||||
|         parts = [] | ||||
|         for link in links: | ||||
|             instance = repeated_content | ||||
|             instance = instance.replace("{{link}}", link.url) | ||||
|             instance = instance.replace("{{name}}", link.name) | ||||
|             instance = instance.replace("{{description}}", link.description) | ||||
|             instance = instance.replace("{{redirect_delay}}", str(redirect_delay)) | ||||
|             parts.append(instance) | ||||
|  | ||||
|         content = content.replace(repeated_content, "\n".join(parts)) | ||||
|         content = content.replace("{{name}}", collection_name) | ||||
|         content = re.sub(r"<!-- REPEAT REMOVE START -->[\s\S]+<!-- REPEAT REMOVE END -->", "", content) | ||||
|  | ||||
|         return content | ||||
|  | ||||
|     @staticmethod | ||||
|     def from_string(string: str) -> "Links": | ||||
|         try: | ||||
|             return Links(string) | ||||
|         except ValueError: | ||||
|             options = [f"'{option.value}'" for option in Links] | ||||
|             raise ValueError(f"must be one of {', '.join(options)}") | ||||
|             raise ValueError("must be one of 'ignore', 'plaintext'," | ||||
|                              " 'html', 'internet-shortcut'") | ||||
|   | ||||
| @@ -1,5 +1,3 @@ | ||||
| from typing import cast | ||||
|  | ||||
| from bs4 import BeautifulSoup, Comment, Tag | ||||
|  | ||||
| _STYLE_TAG_CONTENT = """ | ||||
| @@ -14,13 +12,6 @@ _STYLE_TAG_CONTENT = """ | ||||
|       font-weight: bold; | ||||
|     } | ||||
|  | ||||
|     .row-flex { | ||||
|       display: flex; | ||||
|     } | ||||
|     .row-flex-wrap { | ||||
|       flex-wrap: wrap; | ||||
|     } | ||||
|  | ||||
|     .accordion-head { | ||||
|       background-color: #f5f7fa; | ||||
|       padding: 0.5rem 0; | ||||
| @@ -39,10 +30,6 @@ _STYLE_TAG_CONTENT = """ | ||||
|       margin: 0.5rem 0; | ||||
|     } | ||||
|  | ||||
|     img { | ||||
|         background-color: white; | ||||
|     } | ||||
|  | ||||
|     body { | ||||
|       padding: 1em; | ||||
|       grid-template-columns: 1fr min(60rem, 90%) 1fr; | ||||
| @@ -60,11 +47,12 @@ _ARTICLE_WORTHY_CLASSES = [ | ||||
| def insert_base_markup(soup: BeautifulSoup) -> BeautifulSoup: | ||||
|     head = soup.new_tag("head") | ||||
|     soup.insert(0, head) | ||||
|     # Force UTF-8 encoding | ||||
|     head.append(soup.new_tag("meta", charset="utf-8")) | ||||
|  | ||||
|     simplecss_link: Tag = soup.new_tag("link") | ||||
|     # <link rel="stylesheet" href="https://cdn.simplecss.org/simple.css"> | ||||
|     head.append(soup.new_tag("link", rel="stylesheet", href="https://cdn.simplecss.org/simple.css")) | ||||
|     simplecss_link["rel"] = "stylesheet" | ||||
|     simplecss_link["href"] = "https://cdn.simplecss.org/simple.css" | ||||
|     head.append(simplecss_link) | ||||
|  | ||||
|     # Basic style tags for compat | ||||
|     style: Tag = soup.new_tag("style") | ||||
| @@ -75,18 +63,18 @@ def insert_base_markup(soup: BeautifulSoup) -> BeautifulSoup: | ||||
|  | ||||
|  | ||||
| def clean(soup: BeautifulSoup) -> BeautifulSoup: | ||||
|     for block in cast(list[Tag], soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES)): | ||||
|     for block in soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES): | ||||
|         block.name = "article" | ||||
|  | ||||
|     for block in cast(list[Tag], soup.find_all("h3")): | ||||
|     for block in soup.find_all("h3"): | ||||
|         block.name = "div" | ||||
|  | ||||
|     for block in cast(list[Tag], soup.find_all("h1")): | ||||
|     for block in soup.find_all("h1"): | ||||
|         block.name = "h3" | ||||
|  | ||||
|     for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")): | ||||
|     for block in soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap"): | ||||
|         block.name = "h3" | ||||
|         block["class"] += ["accordion-head"]  # type: ignore | ||||
|         block["class"] += ["accordion-head"] | ||||
|  | ||||
|     for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"): | ||||
|         children = list(dummy.children) | ||||
| @@ -97,12 +85,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup: | ||||
|         if isinstance(type(children[0]), Comment): | ||||
|             dummy.decompose() | ||||
|  | ||||
|     # Delete video figures, as they can not be internalized anyway | ||||
|     for video in soup.select(".ilc_media_cont_MediaContainerHighlighted .ilPageVideo"): | ||||
|         if figure := video.find_parent("figure"): | ||||
|             figure.decompose() | ||||
|  | ||||
|     for hrule_imposter in cast(list[Tag], soup.find_all(class_="ilc_section_Separator")): | ||||
|     for hrule_imposter in soup.find_all(class_="ilc_section_Separator"): | ||||
|         hrule_imposter.insert(0, soup.new_tag("hr")) | ||||
|  | ||||
|     return soup | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,129 +0,0 @@ | ||||
| from typing import Any, Optional, cast | ||||
|  | ||||
| import aiohttp | ||||
| import yarl | ||||
| from bs4 import BeautifulSoup, Tag | ||||
|  | ||||
| from ...auth import Authenticator, TfaAuthenticator | ||||
| from ...logging import log | ||||
| from ...utils import soupify | ||||
| from ..crawler import CrawlError | ||||
|  | ||||
|  | ||||
| class ShibbolethLogin: | ||||
|     """ | ||||
|     Login via shibboleth system. | ||||
|     """ | ||||
|  | ||||
|     def __init__( | ||||
|         self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator] | ||||
|     ) -> None: | ||||
|         self._ilias_url = ilias_url | ||||
|         self._auth = authenticator | ||||
|         self._tfa_auth = tfa_authenticator | ||||
|  | ||||
|     async def login(self, sess: aiohttp.ClientSession) -> None: | ||||
|         """ | ||||
|         Performs the ILIAS Shibboleth authentication dance and saves the login | ||||
|         cookies it receieves. | ||||
|  | ||||
|         This function should only be called whenever it is detected that you're | ||||
|         not logged in. The cookies obtained should be good for a few minutes, | ||||
|         maybe even an hour or two. | ||||
|         """ | ||||
|  | ||||
|         # Equivalent: Click on "Mit KIT-Account anmelden" button in | ||||
|         # https://ilias.studium.kit.edu/login.php | ||||
|         url = f"{self._ilias_url}/shib_login.php" | ||||
|         async with sess.get(url) as response: | ||||
|             shib_url = response.url | ||||
|             if str(shib_url).startswith(self._ilias_url): | ||||
|                 log.explain( | ||||
|                     "ILIAS recognized our shib token and logged us in in the background, returning" | ||||
|                 ) | ||||
|                 return | ||||
|             soup: BeautifulSoup = soupify(await response.read()) | ||||
|  | ||||
|         # Attempt to login using credentials, if necessary | ||||
|         while not self._login_successful(soup): | ||||
|             # Searching the form here so that this fails before asking for | ||||
|             # credentials rather than after asking. | ||||
|             form = cast(Tag, soup.find("form", {"method": "post"})) | ||||
|             action = cast(str, form["action"]) | ||||
|  | ||||
|             # Equivalent: Enter credentials in | ||||
|             # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO | ||||
|             url = str(shib_url.origin()) + action | ||||
|             username, password = await self._auth.credentials() | ||||
|             data = { | ||||
|                 "_eventId_proceed": "", | ||||
|                 "j_username": username, | ||||
|                 "j_password": password, | ||||
|                 "fudis_web_authn_assertion_input": "", | ||||
|             } | ||||
|             if csrf_token_input := form.find("input", {"name": "csrf_token"}): | ||||
|                 data["csrf_token"] = csrf_token_input["value"]  # type: ignore | ||||
|             soup = await _post(sess, url, data) | ||||
|  | ||||
|             if soup.find(id="attributeRelease"): | ||||
|                 raise CrawlError( | ||||
|                     "ILIAS Shibboleth entitlements changed! " | ||||
|                     "Please log in once in your browser and review them" | ||||
|                 ) | ||||
|  | ||||
|             if self._tfa_required(soup): | ||||
|                 soup = await self._authenticate_tfa(sess, soup, shib_url) | ||||
|  | ||||
|             if not self._login_successful(soup): | ||||
|                 self._auth.invalidate_credentials() | ||||
|  | ||||
|         # Equivalent: Being redirected via JS automatically | ||||
|         # (or clicking "Continue" if you have JS disabled) | ||||
|         relay_state = cast(Tag, soup.find("input", {"name": "RelayState"})) | ||||
|         saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"})) | ||||
|         url = form = soup.find("form", {"method": "post"})["action"]  # type: ignore | ||||
|         data = {  # using the info obtained in the while loop above | ||||
|             "RelayState": cast(str, relay_state["value"]), | ||||
|             "SAMLResponse": cast(str, saml_response["value"]), | ||||
|         } | ||||
|         await sess.post(cast(str, url), data=data) | ||||
|  | ||||
|     async def _authenticate_tfa( | ||||
|         self, session: aiohttp.ClientSession, soup: BeautifulSoup, shib_url: yarl.URL | ||||
|     ) -> BeautifulSoup: | ||||
|         if not self._tfa_auth: | ||||
|             self._tfa_auth = TfaAuthenticator("ilias-anon-tfa") | ||||
|  | ||||
|         tfa_token = await self._tfa_auth.password() | ||||
|  | ||||
|         # Searching the form here so that this fails before asking for | ||||
|         # credentials rather than after asking. | ||||
|         form = cast(Tag, soup.find("form", {"method": "post"})) | ||||
|         action = cast(str, form["action"]) | ||||
|  | ||||
|         # Equivalent: Enter token in | ||||
|         # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO | ||||
|         url = str(shib_url.origin()) + action | ||||
|         username, password = await self._auth.credentials() | ||||
|         data = { | ||||
|             "_eventId_proceed": "", | ||||
|             "fudis_otp_input": tfa_token, | ||||
|         } | ||||
|         if csrf_token_input := form.find("input", {"name": "csrf_token"}): | ||||
|             data["csrf_token"] = csrf_token_input["value"]  # type: ignore | ||||
|         return await _post(session, url, data) | ||||
|  | ||||
|     @staticmethod | ||||
|     def _login_successful(soup: BeautifulSoup) -> bool: | ||||
|         relay_state = soup.find("input", {"name": "RelayState"}) | ||||
|         saml_response = soup.find("input", {"name": "SAMLResponse"}) | ||||
|         return relay_state is not None and saml_response is not None | ||||
|  | ||||
|     @staticmethod | ||||
|     def _tfa_required(soup: BeautifulSoup) -> bool: | ||||
|         return soup.find(id="fudiscr-form") is not None | ||||
|  | ||||
|  | ||||
| async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup: | ||||
|     async with session.post(url, data=data) as response: | ||||
|         return soupify(await response.read()) | ||||
| @@ -1,9 +1,8 @@ | ||||
| import os | ||||
| import re | ||||
| from dataclasses import dataclass | ||||
| from datetime import datetime | ||||
| from pathlib import PurePath | ||||
| from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union, cast | ||||
| from typing import Awaitable, List, Optional, Pattern, Set, Tuple, Union | ||||
| from urllib.parse import urljoin | ||||
|  | ||||
| from bs4 import BeautifulSoup, Tag | ||||
| @@ -32,24 +31,24 @@ class KitIpdCrawlerSection(HttpCrawlerSection): | ||||
|         return re.compile(regex) | ||||
|  | ||||
|  | ||||
| @dataclass | ||||
| @dataclass(unsafe_hash=True) | ||||
| class KitIpdFile: | ||||
|     name: str | ||||
|     url: str | ||||
|  | ||||
|     def explain(self) -> None: | ||||
|         log.explain(f"File {self.name!r} (href={self.url!r})") | ||||
|  | ||||
|  | ||||
| @dataclass | ||||
| class KitIpdFolder: | ||||
|     name: str | ||||
|     entries: List[Union[KitIpdFile, "KitIpdFolder"]] | ||||
|     files: List[KitIpdFile] | ||||
|  | ||||
|     def explain(self) -> None: | ||||
|         log.explain_topic(f"Folder {self.name!r}") | ||||
|         for entry in self.entries: | ||||
|             entry.explain() | ||||
|         for file in self.files: | ||||
|             log.explain(f"File {file.name!r} (href={file.url!r})") | ||||
|  | ||||
|     def __hash__(self) -> int: | ||||
|         return self.name.__hash__() | ||||
|  | ||||
|  | ||||
| class KitIpdCrawler(HttpCrawler): | ||||
| @@ -73,96 +72,81 @@ class KitIpdCrawler(HttpCrawler): | ||||
|  | ||||
|         async with maybe_cl: | ||||
|             for item in await self._fetch_items(): | ||||
|                 item.explain() | ||||
|                 if isinstance(item, KitIpdFolder): | ||||
|                     tasks.append(self._crawl_folder(PurePath("."), item)) | ||||
|                     tasks.append(self._crawl_folder(item)) | ||||
|                 else: | ||||
|                     log.explain_topic(f"Orphan file {item.name!r} (href={item.url!r})") | ||||
|                     log.explain("Attributing it to root folder") | ||||
|                     # do this here to at least be sequential and not parallel (rate limiting is hard, as the | ||||
|                     # crawl abstraction does not hold for these requests) | ||||
|                     etag, mtime = await self._request_resource_version(item.url) | ||||
|                     tasks.append(self._download_file(PurePath("."), item, etag, mtime)) | ||||
|                     # Orphan files are placed in the root folder | ||||
|                     tasks.append(self._download_file(PurePath("."), item)) | ||||
|  | ||||
|         await self.gather(tasks) | ||||
|  | ||||
|     async def _crawl_folder(self, parent: PurePath, folder: KitIpdFolder) -> None: | ||||
|         path = parent / folder.name | ||||
|     async def _crawl_folder(self, folder: KitIpdFolder) -> None: | ||||
|         path = PurePath(folder.name) | ||||
|         if not await self.crawl(path): | ||||
|             return | ||||
|  | ||||
|         tasks = [] | ||||
|         for entry in folder.entries: | ||||
|             if isinstance(entry, KitIpdFolder): | ||||
|                 tasks.append(self._crawl_folder(path, entry)) | ||||
|             else: | ||||
|                 # do this here to at least be sequential and not parallel (rate limiting is hard, as the crawl | ||||
|                 # abstraction does not hold for these requests) | ||||
|                 etag, mtime = await self._request_resource_version(entry.url) | ||||
|                 tasks.append(self._download_file(path, entry, etag, mtime)) | ||||
|         tasks = [self._download_file(path, file) for file in folder.files] | ||||
|  | ||||
|         await self.gather(tasks) | ||||
|  | ||||
|     async def _download_file( | ||||
|         self, | ||||
|         parent: PurePath, | ||||
|         file: KitIpdFile, | ||||
|         etag: Optional[str], | ||||
|         mtime: Optional[datetime] | ||||
|     ) -> None: | ||||
|     async def _download_file(self, parent: PurePath, file: KitIpdFile) -> None: | ||||
|         element_path = parent / file.name | ||||
|  | ||||
|         prev_etag = self._get_previous_etag_from_report(element_path) | ||||
|         etag_differs = None if prev_etag is None else prev_etag != etag | ||||
|  | ||||
|         maybe_dl = await self.download(element_path, etag_differs=etag_differs, mtime=mtime) | ||||
|         maybe_dl = await self.download(element_path) | ||||
|         if not maybe_dl: | ||||
|             # keep storing the known file's etag | ||||
|             if prev_etag: | ||||
|                 self._add_etag_to_report(element_path, prev_etag) | ||||
|             return | ||||
|  | ||||
|         async with maybe_dl as (bar, sink): | ||||
|             await self._stream_from_url(file.url, element_path, sink, bar) | ||||
|             await self._stream_from_url(file.url, sink, bar) | ||||
|  | ||||
|     async def _fetch_items(self) -> Iterable[Union[KitIpdFile, KitIpdFolder]]: | ||||
|     async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]: | ||||
|         page, url = await self.get_page() | ||||
|         elements: List[Tag] = self._find_file_links(page) | ||||
|         items: Set[Union[KitIpdFile, KitIpdFolder]] = set() | ||||
|  | ||||
|         # do not add unnecessary nesting for a single <h1> heading | ||||
|         drop_h1: bool = len(page.find_all(name="h1")) <= 1 | ||||
|  | ||||
|         folder_tree: KitIpdFolder = KitIpdFolder(".", []) | ||||
|         for element in elements: | ||||
|             parent = HttpCrawler.get_folder_structure_from_heading_hierarchy(element, drop_h1) | ||||
|             file = self._extract_file(element, url) | ||||
|             folder_label = self._find_folder_label(element) | ||||
|             if folder_label: | ||||
|                 folder = self._extract_folder(folder_label, url) | ||||
|                 if folder not in items: | ||||
|                     items.add(folder) | ||||
|                     folder.explain() | ||||
|             else: | ||||
|                 file = self._extract_file(element, url) | ||||
|                 items.add(file) | ||||
|                 log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})") | ||||
|                 log.explain("Attributing it to root folder") | ||||
|  | ||||
|             current_folder: KitIpdFolder = folder_tree | ||||
|             for folder_name in parent.parts: | ||||
|                 # helps the type checker to verify that current_folder is indeed a folder | ||||
|                 def subfolders() -> Generator[KitIpdFolder, Any, None]: | ||||
|                     return (entry for entry in current_folder.entries if isinstance(entry, KitIpdFolder)) | ||||
|         return items | ||||
|  | ||||
|                 if not any(entry.name == folder_name for entry in subfolders()): | ||||
|                     current_folder.entries.append(KitIpdFolder(folder_name, [])) | ||||
|                 current_folder = next(entry for entry in subfolders() if entry.name == folder_name) | ||||
|     def _extract_folder(self, folder_tag: Tag, url: str) -> KitIpdFolder: | ||||
|         files: List[KitIpdFile] = [] | ||||
|         name = folder_tag.getText().strip() | ||||
|  | ||||
|             current_folder.entries.append(file) | ||||
|         container: Tag = folder_tag.findNextSibling(name="table") | ||||
|         for link in self._find_file_links(container): | ||||
|             files.append(self._extract_file(link, url)) | ||||
|  | ||||
|         return folder_tree.entries | ||||
|         return KitIpdFolder(name, files) | ||||
|  | ||||
|     @staticmethod | ||||
|     def _find_folder_label(file_link: Tag) -> Optional[Tag]: | ||||
|         enclosing_table: Tag = file_link.findParent(name="table") | ||||
|         if enclosing_table is None: | ||||
|             return None | ||||
|         return enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$")) | ||||
|  | ||||
|     def _extract_file(self, link: Tag, url: str) -> KitIpdFile: | ||||
|         url = self._abs_url_from_link(url, link) | ||||
|         name = os.path.basename(url) | ||||
|         return KitIpdFile(name, url) | ||||
|  | ||||
|     def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> list[Tag]: | ||||
|         return cast(list[Tag], tag.find_all(name="a", attrs={"href": self._file_regex})) | ||||
|     def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> List[Tag]: | ||||
|         return tag.findAll(name="a", attrs={"href": self._file_regex}) | ||||
|  | ||||
|     def _abs_url_from_link(self, url: str, link_tag: Tag) -> str: | ||||
|         return urljoin(url, cast(str, link_tag.get("href"))) | ||||
|         return urljoin(url, link_tag.get("href")) | ||||
|  | ||||
|     async def _stream_from_url(self, url: str, path: PurePath, sink: FileSink, bar: ProgressBar) -> None: | ||||
|     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None: | ||||
|         async with self.session.get(url, allow_redirects=False) as resp: | ||||
|             if resp.status == 403: | ||||
|                 raise CrawlError("Received a 403. Are you within the KIT network/VPN?") | ||||
| @@ -175,8 +159,6 @@ class KitIpdCrawler(HttpCrawler): | ||||
|  | ||||
|             sink.done() | ||||
|  | ||||
|             self._add_etag_to_report(path, resp.headers.get("ETag")) | ||||
|  | ||||
|     async def get_page(self) -> Tuple[BeautifulSoup, str]: | ||||
|         async with self.session.get(self._url) as request: | ||||
|             # The web page for Algorithmen für Routenplanung contains some | ||||
|   | ||||
| @@ -1,8 +1,9 @@ | ||||
| import asyncio | ||||
| import sys | ||||
| import traceback | ||||
| from contextlib import AbstractContextManager, asynccontextmanager, contextmanager | ||||
| from typing import AsyncIterator, Iterator, List, Optional | ||||
| from contextlib import asynccontextmanager, contextmanager | ||||
| # TODO In Python 3.9 and above, ContextManager is deprecated | ||||
| from typing import AsyncIterator, ContextManager, Iterator, List, Optional | ||||
|  | ||||
| from rich.console import Console, Group | ||||
| from rich.live import Live | ||||
| @@ -260,7 +261,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new | ||||
|             action: str, | ||||
|             text: str, | ||||
|             total: Optional[float] = None, | ||||
|     ) -> AbstractContextManager[ProgressBar]: | ||||
|     ) -> ContextManager[ProgressBar]: | ||||
|         """ | ||||
|         Allows markup in the "style" argument which will be applied to the | ||||
|         "action" string. | ||||
| @@ -276,7 +277,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new | ||||
|             action: str, | ||||
|             text: str, | ||||
|             total: Optional[float] = None, | ||||
|     ) -> AbstractContextManager[ProgressBar]: | ||||
|     ) -> ContextManager[ProgressBar]: | ||||
|         """ | ||||
|         Allows markup in the "style" argument which will be applied to the | ||||
|         "action" string. | ||||
|   | ||||
| @@ -57,7 +57,6 @@ class OnConflict(Enum): | ||||
|  | ||||
| @dataclass | ||||
| class Heuristics: | ||||
|     etag_differs: Optional[bool] | ||||
|     mtime: Optional[datetime] | ||||
|  | ||||
|  | ||||
| @@ -234,16 +233,8 @@ class OutputDirectory: | ||||
|  | ||||
|         remote_newer = None | ||||
|  | ||||
|         # ETag should be a more reliable indicator than mtime, so we check it first | ||||
|         if heuristics.etag_differs is not None: | ||||
|             remote_newer = heuristics.etag_differs | ||||
|             if remote_newer: | ||||
|                 log.explain("Remote file's entity tag differs") | ||||
|             else: | ||||
|                 log.explain("Remote file's entity tag is the same") | ||||
|  | ||||
|         # Python on Windows crashes when faced with timestamps around the unix epoch | ||||
|         if remote_newer is None and heuristics.mtime and (os.name != "nt" or heuristics.mtime.year > 1970): | ||||
|         if heuristics.mtime and (os.name != "nt" or heuristics.mtime.year > 1970): | ||||
|             mtime = heuristics.mtime | ||||
|             remote_newer = mtime.timestamp() > stat.st_mtime | ||||
|             if remote_newer: | ||||
| @@ -371,28 +362,10 @@ class OutputDirectory: | ||||
|  | ||||
|         raise OutputDirError("Failed to create temporary file") | ||||
|  | ||||
|     def should_try_download( | ||||
|         self, | ||||
|         path: PurePath, | ||||
|         *, | ||||
|         etag_differs: Optional[bool] = None, | ||||
|         mtime: Optional[datetime] = None, | ||||
|         redownload: Optional[Redownload] = None, | ||||
|         on_conflict: Optional[OnConflict] = None, | ||||
|     ) -> bool: | ||||
|         heuristics = Heuristics(etag_differs, mtime) | ||||
|         redownload = self._redownload if redownload is None else redownload | ||||
|         on_conflict = self._on_conflict if on_conflict is None else on_conflict | ||||
|         local_path = self.resolve(path) | ||||
|  | ||||
|         return self._should_download(local_path, heuristics, redownload, on_conflict) | ||||
|  | ||||
|     async def download( | ||||
|             self, | ||||
|             remote_path: PurePath, | ||||
|             path: PurePath, | ||||
|             *, | ||||
|             etag_differs: Optional[bool] = None, | ||||
|             mtime: Optional[datetime] = None, | ||||
|             redownload: Optional[Redownload] = None, | ||||
|             on_conflict: Optional[OnConflict] = None, | ||||
| @@ -402,7 +375,7 @@ class OutputDirectory: | ||||
|         MarkConflictError. | ||||
|         """ | ||||
|  | ||||
|         heuristics = Heuristics(etag_differs, mtime) | ||||
|         heuristics = Heuristics(mtime) | ||||
|         redownload = self._redownload if redownload is None else redownload | ||||
|         on_conflict = self._on_conflict if on_conflict is None else on_conflict | ||||
|         local_path = self.resolve(path) | ||||
| @@ -442,6 +415,7 @@ class OutputDirectory: | ||||
|  | ||||
|     def _update_metadata(self, info: DownloadInfo) -> None: | ||||
|         if mtime := info.heuristics.mtime: | ||||
|             log.explain(f"Setting mtime to {mtime}") | ||||
|             mtimestamp = mtime.timestamp() | ||||
|             os.utime(info.local_path, times=(mtimestamp, mtimestamp)) | ||||
|  | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| from pathlib import Path, PurePath | ||||
| from pathlib import Path | ||||
| from typing import Dict, List, Optional | ||||
|  | ||||
| from rich.markup import escape | ||||
| @@ -168,24 +168,19 @@ class Pferd: | ||||
|             log.report("") | ||||
|             log.report(f"[bold bright_cyan]Report[/] for {escape(name)}") | ||||
|  | ||||
|             def fmt_path_link(relative_path: PurePath) -> str: | ||||
|                 # We need to URL-encode the path because it might contain spaces or special characters | ||||
|                 link = crawler.output_dir.resolve(relative_path).absolute().as_uri() | ||||
|                 return f"[link={link}]{fmt_path(relative_path)}[/link]" | ||||
|  | ||||
|             something_changed = False | ||||
|             for path in sorted(crawler.report.added_files): | ||||
|                 something_changed = True | ||||
|                 log.report(f"  [bold bright_green]Added[/] {fmt_path_link(path)}") | ||||
|                 log.report(f"  [bold bright_green]Added[/] {fmt_path(path)}") | ||||
|             for path in sorted(crawler.report.changed_files): | ||||
|                 something_changed = True | ||||
|                 log.report(f"  [bold bright_yellow]Changed[/] {fmt_path_link(path)}") | ||||
|                 log.report(f"  [bold bright_yellow]Changed[/] {fmt_path(path)}") | ||||
|             for path in sorted(crawler.report.deleted_files): | ||||
|                 something_changed = True | ||||
|                 log.report(f"  [bold bright_magenta]Deleted[/] {fmt_path(path)}") | ||||
|             for path in sorted(crawler.report.not_deleted_files): | ||||
|                 something_changed = True | ||||
|                 log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path_link(path)}") | ||||
|                 log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}") | ||||
|  | ||||
|             for warning in crawler.report.encountered_warnings: | ||||
|                 something_changed = True | ||||
|   | ||||
| @@ -34,6 +34,15 @@ class MarkConflictError(Exception): | ||||
|         self.collides_with = collides_with | ||||
|  | ||||
|  | ||||
| # TODO Use PurePath.is_relative_to when updating to 3.9 | ||||
| def is_relative_to(a: PurePath, b: PurePath) -> bool: | ||||
|     try: | ||||
|         a.relative_to(b) | ||||
|         return True | ||||
|     except ValueError: | ||||
|         return False | ||||
|  | ||||
|  | ||||
| class Report: | ||||
|     """ | ||||
|     A report of a synchronization. Includes all files found by the crawler, as | ||||
| @@ -164,7 +173,7 @@ class Report: | ||||
|             if path == other: | ||||
|                 raise MarkDuplicateError(path) | ||||
|  | ||||
|             if path.is_relative_to(other) or other.is_relative_to(path): | ||||
|             if is_relative_to(path, other) or is_relative_to(other, path): | ||||
|                 raise MarkConflictError(path, other) | ||||
|  | ||||
|         self.known_files.add(path) | ||||
|   | ||||
| @@ -110,10 +110,6 @@ class ExactReTf(Transformation): | ||||
|             except ValueError: | ||||
|                 pass | ||||
|  | ||||
|         named_groups: Dict[str, str] = match.groupdict() | ||||
|         for name, capture in named_groups.items(): | ||||
|             locals_dir[name] = capture | ||||
|  | ||||
|         result = eval(f"f{right!r}", {}, locals_dir) | ||||
|         return Transformed(PurePath(result)) | ||||
|  | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
| NAME = "PFERD" | ||||
| VERSION = "3.8.3" | ||||
| VERSION = "3.5.0" | ||||
|   | ||||
							
								
								
									
										13
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								README.md
									
									
									
									
									
								
							| @@ -17,7 +17,7 @@ Binaries for Linux, Windows and Mac can be downloaded directly from the | ||||
|  | ||||
| ### With pip | ||||
|  | ||||
| Ensure you have at least Python 3.11 installed. Run the following command to | ||||
| Ensure you have at least Python 3.9 installed. Run the following command to | ||||
| install PFERD or upgrade it to the latest version: | ||||
|  | ||||
| ``` | ||||
| @@ -56,17 +56,6 @@ Also, you can download most ILIAS pages directly like this: | ||||
| $ pferd kit-ilias-web <url> <output_directory> | ||||
| ``` | ||||
|  | ||||
| PFERD supports other ILIAS instances as well, using the `ilias-web` crawler (see | ||||
| the [config section on `ilias-web`](CONFIG.md#the-ilias-web-crawler) for more | ||||
| detail on the `base-url` and `client-id` parameters): | ||||
|  | ||||
| ``` | ||||
| $ pferd ilias-web \ | ||||
|     --base-url https://ilias.my-university.example \ | ||||
|     --client-id My_University desktop \ | ||||
|     <output_directory> | ||||
| ``` | ||||
|  | ||||
| However, the CLI only lets you download a single thing at a time, and the | ||||
| resulting command can grow long quite quickly. Because of this, PFERD can also | ||||
| be used with a config file. | ||||
|   | ||||
							
								
								
									
										8
									
								
								flake.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										8
									
								
								flake.lock
									
									
									
										generated
									
									
									
								
							| @@ -2,16 +2,16 @@ | ||||
|   "nodes": { | ||||
|     "nixpkgs": { | ||||
|       "locked": { | ||||
|         "lastModified": 1744440957, | ||||
|         "narHash": "sha256-FHlSkNqFmPxPJvy+6fNLaNeWnF1lZSgqVCl/eWaJRc4=", | ||||
|         "lastModified": 1694499547, | ||||
|         "narHash": "sha256-R7xMz1Iia6JthWRHDn36s/E248WB1/je62ovC/dUVKI=", | ||||
|         "owner": "NixOS", | ||||
|         "repo": "nixpkgs", | ||||
|         "rev": "26d499fc9f1d567283d5d56fcf367edd815dba1d", | ||||
|         "rev": "e5f018cf150e29aac26c61dac0790ea023c46b24", | ||||
|         "type": "github" | ||||
|       }, | ||||
|       "original": { | ||||
|         "owner": "NixOS", | ||||
|         "ref": "nixos-24.11", | ||||
|         "ref": "nixos-23.05", | ||||
|         "repo": "nixpkgs", | ||||
|         "type": "github" | ||||
|       } | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
|   description = "Tool for downloading course-related files from ILIAS"; | ||||
|  | ||||
|   inputs = { | ||||
|     nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11"; | ||||
|     nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.05"; | ||||
|   }; | ||||
|  | ||||
|   outputs = { self, nixpkgs }: | ||||
|   | ||||
							
								
								
									
										11
									
								
								mypy.ini
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								mypy.ini
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| [mypy] | ||||
| disallow_any_generics = True | ||||
| disallow_untyped_defs = True | ||||
| disallow_incomplete_defs = True | ||||
| no_implicit_optional = True | ||||
| warn_unused_ignores = True | ||||
| warn_unreachable = True | ||||
| show_error_context = True | ||||
|  | ||||
| [mypy-rich.*,bs4,keyring] | ||||
| ignore_missing_imports = True | ||||
| @@ -1,42 +1,3 @@ | ||||
| [build-system] | ||||
| requires = ["setuptools", "wheel"] | ||||
| build-backend = "setuptools.build_meta" | ||||
|  | ||||
| [project] | ||||
| name = "PFERD" | ||||
| dependencies = [ | ||||
|   "aiohttp>=3.8.1", | ||||
|   "beautifulsoup4>=4.10.0", | ||||
|   "rich>=11.0.0", | ||||
|   "keyring>=23.5.0", | ||||
|   "certifi>=2021.10.8" | ||||
| ] | ||||
| dynamic = ["version"] | ||||
| requires-python = ">=3.11" | ||||
|  | ||||
| [project.scripts] | ||||
| pferd = "PFERD.__main__:main" | ||||
|  | ||||
| [tool.setuptools.dynamic] | ||||
| version = {attr = "PFERD.version.VERSION"} | ||||
|  | ||||
| [tool.flake8] | ||||
| max-line-length = 110 | ||||
|  | ||||
| [tool.isort] | ||||
| line_length = 110 | ||||
|  | ||||
| [tool.autopep8] | ||||
| max_line_length = 110 | ||||
| in-place = true | ||||
| recursive = true | ||||
|  | ||||
| [tool.mypy] | ||||
| disallow_any_generics = true | ||||
| disallow_untyped_defs = true | ||||
| disallow_incomplete_defs = true | ||||
| no_implicit_optional = true | ||||
| warn_unused_ignores = true | ||||
| warn_unreachable = true | ||||
| show_error_context = true | ||||
| ignore_missing_imports = true | ||||
|   | ||||
| @@ -1,8 +1,8 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import argparse | ||||
| import re | ||||
| import time | ||||
| import re | ||||
| from subprocess import run | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -2,5 +2,5 @@ | ||||
|  | ||||
| set -e | ||||
|  | ||||
| mypy . | ||||
| mypy PFERD | ||||
| flake8 PFERD | ||||
|   | ||||
| @@ -2,5 +2,5 @@ | ||||
|  | ||||
| set -e | ||||
|  | ||||
| autopep8 . | ||||
| isort . | ||||
| autopep8 --recursive --in-place PFERD | ||||
| isort PFERD | ||||
|   | ||||
| @@ -13,5 +13,5 @@ pip install --upgrade setuptools | ||||
| pip install --editable . | ||||
|  | ||||
| # Installing tools and type hints | ||||
| pip install --upgrade mypy flake8 flake8-pyproject autopep8 isort pyinstaller | ||||
| pip install --upgrade mypy flake8 autopep8 isort pyinstaller | ||||
| pip install --upgrade types-chardet types-certifi | ||||
|   | ||||
							
								
								
									
										23
									
								
								setup.cfg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								setup.cfg
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
| [metadata] | ||||
| name = PFERD | ||||
| version = attr: PFERD.version.VERSION | ||||
|  | ||||
| [options] | ||||
| packages = find: | ||||
| python_requires = >=3.9 | ||||
| install_requires = | ||||
|   aiohttp>=3.8.1 | ||||
|   beautifulsoup4>=4.10.0 | ||||
|   rich>=11.0.0 | ||||
|   keyring>=23.5.0 | ||||
|   certifi>=2021.10.8 | ||||
|  | ||||
| [options.entry_points] | ||||
| console_scripts = | ||||
|   pferd = PFERD.__main__:main | ||||
|  | ||||
| [flake8] | ||||
| max_line_length = 110 | ||||
|  | ||||
| [isort] | ||||
| line_length = 110 | ||||
		Reference in New Issue
	
	Block a user