Check for new versions at startup

2026-01-11 14:52:30 +01:00 · 2022-10-24 17:31:34 +02:00
18 changed files with 126 additions and 494 deletions
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -17,9 +17,9 @@ jobs:
        python: ["3.9"]
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v2
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python }}
@@ -45,7 +45,7 @@ jobs:
        run: mv dist/pferd* dist/pferd-${{ matrix.os }}
      - name: Upload binary
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v2
        with:
          name: Binaries
          path: dist/pferd-${{ matrix.os }}
@@ -57,7 +57,7 @@ jobs:
    steps:
      - name: Download binaries
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v2
        with:
          name: Binaries
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,42 +23,8 @@ ambiguous situations.
 ## Unreleased
 ### Fixed
- Crawling of courses with the timeline view as the default tab
+- Forum crawling crashing when parsing empty (= 0 messages) threads
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
 - Abort crawling when encountering an unexpected ilias root page redirect
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
 - support for ILIAS learning modules
 - `show_not_deleted` option to stop printing the "Not Deleted" status or report
  message. This combines nicely with the `no-delete-prompt-override` strategy,
  causing PFERD to mostly ignore local-only files.
 ## 3.4.3 - 2022-11-29
 ### Added
 - Missing documentation for `forums` option
 ### Changed
 - Clear up error message shown when multiple paths are found to an element
 ### Fixed
 - IPD crawler unnecessarily appending trailing slashes
 - Crawling opencast when ILIAS is set to English
 ## 3.4.2 - 2022-10-26
 ### Added
 - Recognize and crawl content pages in cards
 - Recognize and ignore surveys
 ### Fixed
 - Forum crawling crashing when a thread has no messages at all
 - Forum crawling crashing when a forum has no threads at all
 - Ilias login failing in some cases
 - Crawling of paginated future meetings
 - IPD crawler handling of URLs without trailing slash
 ## 3.4.1 - 2022-08-17
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -26,9 +26,6 @@ default values for the other sections.
  `Added ...`) while running a crawler. (Default: `yes`)
 - `report`: Whether PFERD should print a report of added, changed and deleted
   local files for all crawlers before exiting. (Default: `yes`)
 - `show_not_deleted`: Whether PFERD should print messages in status and report
   when a local-only file wasn't deleted. Combines nicely with the
   `no-delete-prompt-override` conflict resolution strategy.
 - `share_cookies`: Whether crawlers should share cookies where applicable. For
  example, some crawlers share cookies if they crawl the same website using the
  same account. (Default: `yes`)
@@ -78,9 +75,6 @@ common to all crawlers:
      using `prompt` and always choosing "yes".
    - `no-delete`: Never delete local files, but overwrite local files if the
      remote file is different.
    - `no-delete-prompt-overwrite`: Never delete local files, but prompt to
      overwrite local files if the remote file is different. Combines nicely
      with the `show_not_deleted` option.
 - `transform`: Rules for renaming and excluding certain files and directories.
  For more details, see [this section](#transformation-rules). (Default: empty)
 - `tasks`: The maximum number of concurrent tasks (such as crawling or
@@ -92,9 +86,6 @@ common to all crawlers:
  load for the crawl target. (Default: `0.0`)
 - `windows_paths`: Whether PFERD should find alternative names for paths that
  are invalid on Windows. (Default: `yes` on Windows, `no` otherwise)
 - `aliases`: List of strings that are considered as an alias when invoking with
  the `--crawler` or `-C` flag. If there is more than one crawl section with
  the same aliases all are selected. Thereby, you can group different crawlers.
 Some crawlers may also require credentials for authentication. To configure how
 the crawler obtains its credentials, the `auth` option is used. It is set to the
@@ -109,7 +100,6 @@ username = foo
 password = bar
 [crawl:something]
 aliases = [sth, some]
 type = some-complex-crawler
 auth = auth:example
 on_conflict = no-delete
@@ -191,7 +181,6 @@ script once per day should be fine.
  redirect to the actual URL. Set to a negative value to disable the automatic
  redirect. (Default: `-1`)
 - `videos`: Whether to download videos. (Default: `no`)
 - `forums`: Whether to download forum threads. (Default: `no`)
 - `http_timeout`: The timeout (in seconds) for all HTTP requests. (Default:
  `20.0`)
@@ -300,7 +289,7 @@ path matches `SOURCE`, it is renamed to `TARGET`.
 Example: `foo/bar --> baz`
 - Doesn't match `foo`, `a/foo/bar` or `foo/baz`
 - Converts `foo/bar` into `baz`
- Converts `foo/bar/wargl` into `baz/wargl`
+- Converts `foo/bar/wargl` into `bar/wargl`
 Example: `foo/bar --> !`
 - Doesn't match `foo`, `a/foo/bar` or `foo/baz`
--- a/3
+++ b/3
@@ -1,6 +1,5 @@
 Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
-                    TheChristophe, Scriptim, thelukasprobst, Toorero,
+                    TheChristophe, Scriptim, thelukasprobst, Toorero
                    Mr-Pine
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
--- a/PFERD/main.py
+++ b/PFERD/main.py
@@ -5,6 +5,8 @@ import os
 import sys
 from pathlib import Path
 from PFERD.update import check_for_updates
 from .auth import AuthLoadError
 from .cli import PARSER, ParserLoadError, load_default_section
 from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
@@ -47,8 +49,6 @@ def configure_logging_from_args(args: argparse.Namespace) -> None:
        log.output_explain = args.explain
    if args.status is not None:
        log.output_status = args.status
    if args.show_not_deleted is not None:
        log.output_not_deleted = args.show_not_deleted
    if args.report is not None:
        log.output_report = args.report
@@ -74,8 +74,6 @@ def configure_logging_from_config(args: argparse.Namespace, config: Config) -> N
            log.output_status = config.default_section.status()
        if args.report is None:
            log.output_report = config.default_section.report()
        if args.show_not_deleted is None:
            log.output_not_deleted = config.default_section.show_not_deleted()
    except ConfigOptionError as e:
        log.error(str(e))
        sys.exit(1)
@@ -138,6 +136,11 @@ def main() -> None:
            loop.run_until_complete(asyncio.sleep(1))
            loop.close()
        else:
            log.explain_topic("Checking for updates")
            if not args.skip_update_check:
                asyncio.run(check_for_updates())
            else:
                log.explain("Update check skipped due to configuration option")
            asyncio.run(pferd.run(args.debug_transforms))
    except (ConfigOptionError, AuthLoadError) as e:
        log.unlock()
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -151,6 +151,11 @@ PARSER.add_argument(
    action="version",
    version=f"{NAME} {VERSION} (https://github.com/Garmelon/PFERD)",
 )
 PARSER.add_argument(
    "--skip-update-check",
    action="store_true",
    help="disable automatic update checks at startup"
 )
 PARSER.add_argument(
    "--config", "-c",
    type=Path,
@@ -215,11 +220,6 @@ PARSER.add_argument(
    action=BooleanOptionalAction,
    help="whether crawlers should share cookies where applicable"
 )
 PARSER.add_argument(
    "--show-not-deleted",
    action=BooleanOptionalAction,
    help="print messages in status and report when PFERD did not delete a local only file"
 )
 def load_default_section(
@@ -238,7 +238,6 @@ def load_default_section(
        section["report"] = "yes" if args.report else "no"
    if args.share_cookies is not None:
        section["share_cookies"] = "yes" if args.share_cookies else "no"
-    if args.show_not_deleted is not None:
+
        section["show_not_deleted"] = "yes" if args.show_not_deleted else "no"
 SUBPARSERS = PARSER.add_subparsers(title="crawlers")
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -82,9 +82,6 @@ class DefaultSection(Section):
    def report(self) -> bool:
        return self.s.getboolean("report", fallback=True)
    def show_not_deleted(self) -> bool:
        return self.s.getboolean("show_not_deleted", fallback=True)
    def share_cookies(self) -> bool:
        return self.s.getboolean("share_cookies", fallback=True)
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -1,10 +1,6 @@
 from enum import Enum
 from typing import Optional
 import bs4
 from PFERD.utils import soupify
 _link_template_plain = "{{link}}"
 _link_template_fancy = """
 <!DOCTYPE html>
@@ -98,71 +94,6 @@ _link_template_internet_shortcut = """
 URL={{link}}
 """.strip()
 _learning_module_template = """
 <!DOCTYPE html>
 <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>{{name}}</title>
    </head>
    <style>
    * {
        box-sizing: border-box;
    }
    .center-flex {
        display: flex;
        align-items: center;
        justify-content: center;
    }
    .nav {
        display: flex;
        justify-content: space-between;
    }
    </style>
    <body class="center-flex">
 {{body}}
    </body>
 </html>
 """
 def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next: Optional[str]) -> str:
    # Seems to be comments, ignore those.
    for elem in body.select(".il-copg-mob-fullscreen-modal"):
        elem.decompose()
    nav_template = """
        <div class="nav">
            {{left}}
            {{right}}
        </div>
    """
    if prev and body.select_one(".ilc_page_lnav_LeftNavigation"):
        text = body.select_one(".ilc_page_lnav_LeftNavigation").getText().strip()
        left = f'<a href="{prev}">{text}</a>'
    else:
        left = "<span></span>"
    if next and body.select_one(".ilc_page_rnav_RightNavigation"):
        text = body.select_one(".ilc_page_rnav_RightNavigation").getText().strip()
        right = f'<a href="{next}">{text}</a>'
    else:
        right = "<span></span>"
    if top_nav := body.select_one(".ilc_page_tnav_TopNavigation"):
        top_nav.replace_with(
            soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
        )
    if bot_nav := body.select_one(".ilc_page_bnav_BottomNavigation"):
        bot_nav.replace_with(soupify(nav_template.replace(
            "{{left}}", left).replace("{{right}}", right).encode())
        )
    body = body.prettify()
    return _learning_module_template.replace("{{body}}", body).replace("{{name}}", name)
 class Links(Enum):
    IGNORE = "ignore"
@@ -171,24 +102,24 @@ class Links(Enum):
    INTERNET_SHORTCUT = "internet-shortcut"
    def template(self) -> Optional[str]:
-        if self == Links.FANCY:
+        if self == self.FANCY:
            return _link_template_fancy
-        elif self == Links.PLAINTEXT:
+        elif self == self.PLAINTEXT:
            return _link_template_plain
-        elif self == Links.INTERNET_SHORTCUT:
+        elif self == self.INTERNET_SHORTCUT:
            return _link_template_internet_shortcut
-        elif self == Links.IGNORE:
+        elif self == self.IGNORE:
            return None
        raise ValueError("Missing switch case")
    def extension(self) -> Optional[str]:
-        if self == Links.FANCY:
+        if self == self.FANCY:
            return ".html"
-        elif self == Links.PLAINTEXT:
+        elif self == self.PLAINTEXT:
            return ".txt"
-        elif self == Links.INTERNET_SHORTCUT:
+        elif self == self.INTERNET_SHORTCUT:
            return ".url"
-        elif self == Links.IGNORE:
+        elif self == self.IGNORE:
            return None
        raise ValueError("Missing switch case")
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -82,7 +82,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
            dummy.decompose()
        if len(children) > 1:
            continue
-        if isinstance(type(children[0]), Comment):
+        if type(children[0]) == Comment:
            dummy.decompose()
    for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -22,10 +22,8 @@ class IliasElementType(Enum):
    FOLDER = "folder"
    FORUM = "forum"
    LINK = "link"
    LEARNING_MODULE = "learning_module"
    BOOKING = "booking"
    MEETING = "meeting"
    SURVEY = "survey"
    VIDEO = "video"
    VIDEO_PLAYER = "video_player"
    VIDEO_FOLDER = "video_folder"
@@ -72,14 +70,6 @@ class IliasForumThread:
    mtime: Optional[datetime]
@dataclass
 class IliasLearningModulePage:
    title: str
    content: Tag
    next_url: Optional[str]
    previous_url: Optional[str]
 class IliasPage:
    def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
@@ -88,16 +78,6 @@ class IliasPage:
        self._page_type = source_element.type if source_element else None
        self._source_name = source_element.name if source_element else ""
    @staticmethod
    def is_root_page(soup: BeautifulSoup) -> bool:
        permalink = soup.find(id="current_perma_link")
        if permalink is None:
            return False
        value = permalink.attrs.get("value")
        if value is None:
            return False
        return "goto.php?target=root_" in value
    def get_child_elements(self) -> List[IliasPageElement]:
        """
        Return all child page elements you can find here.
@@ -145,34 +125,6 @@ class IliasPage:
        return BeautifulSoup(raw_html, "html.parser")
    def get_learning_module_data(self) -> Optional[IliasLearningModulePage]:
        if not self._is_learning_module_page():
            return None
        content = self._soup.select_one("#ilLMPageContent")
        title = self._soup.select_one(".ilc_page_title_PageTitle").getText().strip()
        return IliasLearningModulePage(
            title=title,
            content=content,
            next_url=self._find_learning_module_next(),
            previous_url=self._find_learning_module_prev()
        )
    def _find_learning_module_next(self) -> Optional[str]:
        for link in self._soup.select("a.ilc_page_rnavlink_RightNavigationLink"):
            url = self._abs_url_from_link(link)
            if "baseClass=ilLMPresentationGUI" not in url:
                continue
            return url
        return None
    def _find_learning_module_prev(self) -> Optional[str]:
        for link in self._soup.select("a.ilc_page_lnavlink_LeftNavigationLink"):
            url = self._abs_url_from_link(link)
            if "baseClass=ilLMPresentationGUI" not in url:
                continue
            return url
        return None
    def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
        form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
        if not form:
@@ -181,7 +133,7 @@ class IliasPage:
        thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
-        form_data: Dict[str, Union[str, List[str]]] = {
+        form_data: Dict[str, Union[str, List[ſtr]]] = {
            "thread_ids[]": thread_ids,
            "selected_cmd2": "html",
            "select_cmd2": "Ausführen",
@@ -205,8 +157,6 @@ class IliasPage:
        if self._contains_collapsed_future_meetings():
            log.explain("Requesting *all* future meetings")
            return self._uncollapse_future_meetings_url()
        if not self._is_content_tab_selected():
            return self._select_content_page_url()
        return None
    def _is_forum_page(self) -> bool:
@@ -259,12 +209,6 @@ class IliasPage:
            return False
        return "target=copa_" in link.get("value")
    def _is_learning_module_page(self) -> bool:
        link = self._soup.find(id="current_perma_link")
        if not link:
            return False
        return "target=pg_" in link.get("value")
    def _contains_collapsed_future_meetings(self) -> bool:
        return self._uncollapse_future_meetings_url() is not None
@@ -275,27 +219,6 @@ class IliasPage:
        link = self._abs_url_from_link(element)
        return IliasPageElement(IliasElementType.FOLDER, link, "show all meetings")
    def _is_content_tab_selected(self) -> bool:
        return self._select_content_page_url() is None
    def _select_content_page_url(self) -> Optional[IliasPageElement]:
        tab = self._soup.find(
            id="tab_view_content",
            attrs={"class": lambda x: x is not None and "active" not in x}
        )
        # Already selected (or not found)
        if not tab:
            return None
        link = tab.find("a")
        if link:
            link = self._abs_url_from_link(link)
            return IliasPageElement(IliasElementType.FOLDER, link, "select content page")
        _unexpected_html_warning()
        log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
        log.warn_contd("PFERD might not find content on the course's main page.")
        return None
    def _player_to_video(self) -> List[IliasPageElement]:
        # Fetch the actual video page. This is a small wrapper page initializing a javscript
        # player. Sadly we can not execute that JS. The actual video stream url is nowhere
@@ -442,7 +365,7 @@ class IliasPage:
        """
        # Video start links are marked with an "Abspielen" link
        video_links: List[Tag] = self._soup.findAll(
-            name="a", text=re.compile(r"\s*(Abspielen|Play)\s*")
+            name="a", text=re.compile(r"\s*Abspielen\s*")
        )
        results: List[IliasPageElement] = []
@@ -761,11 +684,7 @@ class IliasPage:
                "div",
                attrs={"class": lambda x: x and "caption" in x},
            )
-            caption_container = caption_parent.find_next_sibling("div")
+            description = caption_parent.find_next_sibling("div").getText().strip()
            if caption_container:
                description = caption_container.getText().strip()
            else:
                description = None
            if not type:
                _unexpected_html_warning()
@@ -795,7 +714,7 @@ class IliasPage:
        icon: Tag = card_root.select_one(".il-card-repository-head .icon")
-        if "opencast" in icon["class"] or "xoct" in icon["class"]:
+        if "opencast" in icon["class"]:
            return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
        if "exc" in icon["class"]:
            return IliasElementType.EXERCISE
@@ -811,12 +730,6 @@ class IliasPage:
            return IliasElementType.TEST
        if "fold" in icon["class"]:
            return IliasElementType.FOLDER
        if "copa" in icon["class"]:
            return IliasElementType.FOLDER
        if "svy" in icon["class"]:
            return IliasElementType.SURVEY
        if "file" in icon["class"]:
            return IliasElementType.FILE
        _unexpected_html_warning()
        log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
@@ -855,9 +768,6 @@ class IliasPage:
        if "cmdClass=ilobjtestgui" in parsed_url.query:
            return IliasElementType.TEST
        if "baseClass=ilLMPresentationGUI" in parsed_url.query:
            return IliasElementType.LEARNING_MODULE
        # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
        # try to guess it from the image.
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,11 +1,8 @@
 import asyncio
 import base64
 import os
 import re
 from collections.abc import Awaitable, Coroutine
 from pathlib import PurePath
-from typing import Any, Callable, Dict, List, Literal, Optional, Set, Union, cast
+from typing import Any, Callable, Dict, List, Optional, Set, Union, cast
 from urllib.parse import urljoin
 import aiohttp
 import yarl
@@ -19,10 +16,10 @@ from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
-from .file_templates import Links, learning_module_template
+from .file_templates import Links
 from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasPage, IliasPageElement,
-                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+                             _sanitize_path_name, parse_ilias_forum_export)
 TargetType = Union[str, int]
@@ -197,7 +194,7 @@ instance's greatest bottleneck.
        self._links = section.links()
        self._videos = section.videos()
        self._forums = section.forums()
-        self._visited_urls: Dict[str, PurePath] = dict()
+        self._visited_urls: Set[str] = set()
    async def _run(self) -> None:
        if isinstance(self._target, int):
@@ -242,7 +239,7 @@ instance's greatest bottleneck.
                # Duplicated code, but the root page is special - we want to avoid fetching it twice!
                while next_stage_url:
-                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
+                    soup = await self._get_page(next_stage_url)
                    if current_parent is None and expected_id is not None:
                        perma_link_element: Tag = soup.find(id="current_perma_link")
@@ -351,11 +348,9 @@ instance's greatest bottleneck.
    ) -> Optional[Coroutine[Any, Any, None]]:
        if element.url in self._visited_urls:
            raise CrawlWarning(
-                f"Found second path to element {element.name!r} at {element.url!r}. "
+                f"Found second path to element {element.name!r} at {element.url!r}. Aborting subpath"
                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
                + f"Second path: {fmt_path(parent_path)}."
            )
-        self._visited_urls[element.url] = parent_path
+        self._visited_urls.add(element.url)
        element_path = PurePath(parent_path, element.name)
@@ -382,23 +377,10 @@ instance's greatest bottleneck.
                return None
            return await self._handle_forum(element, element_path)
        elif element.type == IliasElementType.TEST:
-            log.status(
+            log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
-                "[bold bright_black]",
+            log.explain("Tests contain no relevant files")
-                "Ignored",
+            log.explain("Answer: No")
                fmt_path(element_path),
                "[bright_black](tests contain no relevant data)"
            )
            return None
        elif element.type == IliasElementType.SURVEY:
            log.status(
                "[bold bright_black]",
                "Ignored",
                fmt_path(element_path),
                "[bright_black](surveys contain no relevant data)"
            )
            return None
        elif element.type == IliasElementType.LEARNING_MODULE:
            return await self._handle_learning_module(element, element_path)
        elif element.type == IliasElementType.LINK:
            return await self._handle_link(element, element_path)
        elif element.type == IliasElementType.BOOKING:
@@ -744,141 +726,12 @@ instance's greatest bottleneck.
            sink.file.write(content.encode("utf-8"))
            sink.done()
-    async def _handle_learning_module(
+    async def _get_page(self, url: str) -> BeautifulSoup:
        self,
        element: IliasPageElement,
        element_path: PurePath,
    ) -> Optional[Coroutine[Any, Any, None]]:
        maybe_cl = await self.crawl(element_path)
        if not maybe_cl:
            return None
        return self._crawl_learning_module(element, maybe_cl)
    @_iorepeat(3, "crawling learning module")
    @anoncritical
    async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
        elements: List[IliasLearningModulePage] = []
        async with cl:
            log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
            log.explain(f"URL: {element.url}")
            soup = await self._get_page(element.url)
            page = IliasPage(soup, element.url, None)
            if next := page.get_learning_module_data():
                elements.extend(await self._crawl_learning_module_direction(
                    cl.path, next.previous_url, "left"
                ))
                elements.append(next)
                elements.extend(await self._crawl_learning_module_direction(
                    cl.path, next.next_url, "right"
                ))
        # Reflect their natural ordering in the file names
        for index, lm_element in enumerate(elements):
            lm_element.title = f"{index:02}_{lm_element.title}"
        tasks: List[Awaitable[None]] = []
        for index, elem in enumerate(elements):
            prev_url = elements[index - 1].title if index > 0 else None
            next_url = elements[index + 1].title if index < len(elements) - 1 else None
            tasks.append(asyncio.create_task(
                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
            ))
        # And execute them
        await self.gather(tasks)
    async def _crawl_learning_module_direction(
        self,
        path: PurePath,
        start_url: Optional[str],
        dir: Union[Literal["left"], Literal["right"]]
    ) -> List[IliasLearningModulePage]:
        elements: List[IliasLearningModulePage] = []
        if not start_url:
            return elements
        next_element_url: Optional[str] = start_url
        counter = 0
        while next_element_url:
            log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
            log.explain(f"URL: {next_element_url}")
            soup = await self._get_page(next_element_url)
            page = IliasPage(soup, next_element_url, None)
            if next := page.get_learning_module_data():
                elements.append(next)
                if dir == "left":
                    next_element_url = next.previous_url
                else:
                    next_element_url = next.next_url
            counter += 1
        return elements
    @anoncritical
    @_iorepeat(3, "saving learning module page")
    async def _download_learning_module_page(
        self,
        parent_path: PurePath,
        element: IliasLearningModulePage,
        prev: Optional[str],
        next: Optional[str]
    ) -> None:
        path = parent_path / (_sanitize_path_name(element.title) + ".html")
        maybe_dl = await self.download(path)
        if not maybe_dl:
            return
        my_path = self._transformer.transform(maybe_dl.path)
        if not my_path:
            return
        if prev:
            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
            if prev_p:
                prev = os.path.relpath(prev_p, my_path.parent)
            else:
                prev = None
        if next:
            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
            if next_p:
                next = os.path.relpath(next_p, my_path.parent)
            else:
                next = None
        async with maybe_dl as (bar, sink):
            content = element.content
            content = await self.internalize_images(content)
            sink.file.write(learning_module_template(content, maybe_dl.path.name, prev, next).encode("utf-8"))
            sink.done()
    async def internalize_images(self, tag: Tag) -> Tag:
        """
        Tries to fetch ILIAS images and embed them as base64 data.
        """
        log.explain_topic("Internalizing images")
        for elem in tag.find_all(recursive=True):
            if not isinstance(elem, Tag):
                continue
            if elem.name == "img":
                if src := elem.attrs.get("src", None):
                    url = urljoin(_ILIAS_URL, src)
                    if not url.startswith(_ILIAS_URL):
                        continue
                    log.explain(f"Internalizing {url!r}")
                    img = await self._get_authenticated(url)
                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
                # For unknown reasons the protocol seems to be stripped.
                elem.attrs["src"] = "https:" + elem.attrs["src"]
        return tag
    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
        auth_id = await self._current_auth_id()
        async with self.session.get(url) as request:
            soup = soupify(await request.read())
            if self._is_logged_in(soup):
-                return self._verify_page(soup, url, root_page_allowed)
+                return soup
        # We weren't authenticated, so try to do that
        await self.authenticate(auth_id)
@@ -887,26 +740,14 @@ instance's greatest bottleneck.
        async with self.session.get(url) as request:
            soup = soupify(await request.read())
            if self._is_logged_in(soup):
                return self._verify_page(soup, url, root_page_allowed)
        raise CrawlError("get_page failed even after authenticating")
    def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
        if IliasPage.is_root_page(soup) and not root_page_allowed:
            raise CrawlError(
                "Unexpectedly encountered ILIAS root page. "
                "This usually happens because the ILIAS instance is broken. "
                "If so, wait a day or two and try again. "
                "It could also happen because a crawled element links to the ILIAS root page. "
                "If so, use a transform with a ! as target to ignore the particular element. "
                f"The redirect came from {url}"
            )
                return soup
        raise CrawlError("get_page failed even after authenticating")
    async def _post_authenticated(
        self,
        url: str,
        data: dict[str, Union[str, List[str]]]
-    ) -> bytes:
+    ) -> BeautifulSoup:
        auth_id = await self._current_auth_id()
        form_data = aiohttp.FormData()
@@ -926,22 +767,6 @@ instance's greatest bottleneck.
                return await request.read()
        raise CrawlError("post_authenticated failed even after authenticating")
    async def _get_authenticated(self, url: str) -> bytes:
        auth_id = await self._current_auth_id()
        async with self.session.get(url, allow_redirects=False) as request:
            if request.status == 200:
                return await request.read()
        # We weren't authenticated, so try to do that
        await self.authenticate(auth_id)
        # Retry once after authenticating. If this fails, we will die.
        async with self.session.get(url, allow_redirects=False) as request:
            if request.status == 200:
                return await request.read()
        raise CrawlError("get_authenticated failed even after authenticating")
    # We repeat this as the login method in shibboleth doesn't handle I/O errors.
    # Shibboleth is quite reliable as well, the repeat is likely not critical here.
    @ _iorepeat(3, "Login", failure_is_error=True)
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -2,7 +2,7 @@ import os
 import re
 from dataclasses import dataclass
 from pathlib import PurePath
-from typing import Awaitable, List, Optional, Pattern, Set, Tuple, Union
+from typing import Awaitable, List, Optional, Pattern, Set, Union
 from urllib.parse import urljoin
 from bs4 import BeautifulSoup, Tag
@@ -99,32 +99,32 @@ class KitIpdCrawler(HttpCrawler):
            await self._stream_from_url(file.url, sink, bar)
    async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]:
-        page, url = await self.get_page()
+        page = await self.get_page()
        elements: List[Tag] = self._find_file_links(page)
        items: Set[Union[KitIpdFile, KitIpdFolder]] = set()
        for element in elements:
            folder_label = self._find_folder_label(element)
            if folder_label:
-                folder = self._extract_folder(folder_label, url)
+                folder = self._extract_folder(folder_label)
                if folder not in items:
                    items.add(folder)
                    folder.explain()
            else:
-                file = self._extract_file(element, url)
+                file = self._extract_file(element)
                items.add(file)
                log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})")
                log.explain("Attributing it to root folder")
        return items
-    def _extract_folder(self, folder_tag: Tag, url: str) -> KitIpdFolder:
+    def _extract_folder(self, folder_tag: Tag) -> KitIpdFolder:
        files: List[KitIpdFile] = []
        name = folder_tag.getText().strip()
        container: Tag = folder_tag.findNextSibling(name="table")
        for link in self._find_file_links(container):
-            files.append(self._extract_file(link, url))
+            files.append(self._extract_file(link))
        return KitIpdFolder(name, files)
@@ -135,16 +135,16 @@ class KitIpdCrawler(HttpCrawler):
            return None
        return enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
-    def _extract_file(self, link: Tag, url: str) -> KitIpdFile:
+    def _extract_file(self, link: Tag) -> KitIpdFile:
-        url = self._abs_url_from_link(url, link)
+        url = self._abs_url_from_link(link)
        name = os.path.basename(url)
        return KitIpdFile(name, url)
    def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> List[Tag]:
        return tag.findAll(name="a", attrs={"href": self._file_regex})
-    def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
+    def _abs_url_from_link(self, link_tag: Tag) -> str:
-        return urljoin(url, link_tag.get("href"))
+        return urljoin(self._url, link_tag.get("href"))
    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:
        async with self.session.get(url, allow_redirects=False) as resp:
@@ -159,7 +159,7 @@ class KitIpdCrawler(HttpCrawler):
            sink.done()
-    async def get_page(self) -> Tuple[BeautifulSoup, str]:
+    async def get_page(self) -> BeautifulSoup:
        async with self.session.get(self._url) as request:
            # The web page for Algorithmen für Routenplanung contains some
            # weird comments that beautifulsoup doesn't parse correctly. This
@@ -167,4 +167,4 @@ class KitIpdCrawler(HttpCrawler):
            # cause issues on other pages.
            content = (await request.read()).decode("utf-8")
            content = re.sub(r"<!--.*?-->", "", content)
-            return soupify(content.encode("utf-8")), str(request.url)
+            return soupify(content.encode("utf-8"))
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -59,7 +59,6 @@ class Log:
        # Whether different parts of the output are enabled or disabled
        self.output_explain = False
        self.output_status = True
        self.output_not_deleted = True
        self.output_report = True
    def _update_live(self) -> None:
@@ -208,17 +207,6 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
            action = escape(f"{action:<{self.STATUS_WIDTH}}")
            self.print(f"{style}{action}[/] {escape(text)} {suffix}")
    def not_deleted(self, style: str, action: str, text: str, suffix: str = "") -> None:
        """
        Print a message for a local only file that wasn't
        deleted while crawling. Allows markup in the "style"
        argument which will be applied to the "action" string.
        """
        if self.output_status and self.output_not_deleted:
            action = escape(f"{action:<{self.STATUS_WIDTH}}")
            self.print(f"{style}{action}[/] {escape(text)} {suffix}")
    def report(self, text: str) -> None:
        """
        Print a report after crawling. Allows markup.
@@ -227,14 +215,6 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
        if self.output_report:
            self.print(text)
    def report_not_deleted(self, text: str) -> None:
        """
        Print a report for a local only file that wasn't deleted after crawling. Allows markup.
        """
        if self.output_report and self.output_not_deleted:
            self.print(text)
    @contextmanager
    def _bar(
            self,
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -44,7 +44,6 @@ class OnConflict(Enum):
    LOCAL_FIRST = "local-first"
    REMOTE_FIRST = "remote-first"
    NO_DELETE = "no-delete"
    NO_DELETE_PROMPT_OVERWRITE = "no-delete-prompt-overwrite"
    @staticmethod
    def from_string(string: str) -> "OnConflict":
@@ -52,7 +51,7 @@ class OnConflict(Enum):
            return OnConflict(string)
        except ValueError:
            raise ValueError("must be one of 'prompt', 'local-first',"
-                             " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'")
+                             " 'remote-first', 'no-delete'")
@dataclass
@@ -265,7 +264,7 @@ class OutputDirectory:
            on_conflict: OnConflict,
            path: PurePath,
    ) -> bool:
-        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
+        if on_conflict == OnConflict.PROMPT:
            async with log.exclusive_output():
                prompt = f"Replace {fmt_path(path)} with remote file?"
                return await prompt_yes_no(prompt, default=False)
@@ -284,7 +283,7 @@ class OutputDirectory:
            on_conflict: OnConflict,
            path: PurePath,
    ) -> bool:
-        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
+        if on_conflict == OnConflict.PROMPT:
            async with log.exclusive_output():
                prompt = f"Recursively delete {fmt_path(path)} and replace with remote file?"
                return await prompt_yes_no(prompt, default=False)
@@ -304,7 +303,7 @@ class OutputDirectory:
            path: PurePath,
            parent: PurePath,
    ) -> bool:
-        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
+        if on_conflict == OnConflict.PROMPT:
            async with log.exclusive_output():
                prompt = f"Delete {fmt_path(parent)} so remote file {fmt_path(path)} can be downloaded?"
                return await prompt_yes_no(prompt, default=False)
@@ -331,7 +330,7 @@ class OutputDirectory:
            return False
        elif on_conflict == OnConflict.REMOTE_FIRST:
            return True
-        elif on_conflict in {OnConflict.NO_DELETE, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
+        elif on_conflict == OnConflict.NO_DELETE:
            return False
        # This should never be reached
@@ -496,7 +495,7 @@ class OutputDirectory:
            except OSError:
                pass
        else:
-            log.not_deleted("[bold bright_magenta]", "Not deleted", fmt_path(pure))
+            log.status("[bold bright_magenta]", "Not deleted", fmt_path(pure))
            self._report.not_delete_file(pure)
    def load_prev_report(self) -> None:
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Dict, List, Optional, Set
+from typing import Dict, List, Optional
 from rich.markup import escape
@@ -43,24 +43,16 @@ class Pferd:
        crawl_sections = [name for name, _ in config.crawl_sections()]
-        crawlers_to_run = set()  # With crawl: prefix
+        crawlers_to_run = []  # With crawl: prefix
        unknown_names = []  # Without crawl: prefix
        for name in cli_crawlers:
            section_name = f"crawl:{name}"
            if section_name in crawl_sections:
                log.explain(f"Crawler section named {section_name!r} exists")
-                crawlers_to_run.add(section_name)
+                crawlers_to_run.append(section_name)
-            # interprete name as alias of a crawler
+            else:
-            alias_names = self._find_crawlers_by_alias(name, config)
+                log.explain(f"There's no crawler section named {section_name!r}")
            if alias_names:
                crawlers_to_run.update(alias_names)
                log.explain_topic(f"Crawler alias {name!r} found corresponding crawler sections:")
                for alias_name in alias_names:
                    log.explain(f"Crawler section named {alias_name!r} with alias {name!r} exists")
            if not section_name in crawl_sections and not alias_names:
                log.explain(f"There's neither a crawler section named {section_name!r} nor does a crawler with alias {name!r} exist.")
                unknown_names.append(name)
        if unknown_names:
@@ -73,14 +65,6 @@ class Pferd:
        return crawlers_to_run
    def _find_crawlers_by_alias(self, alias: str, config: Config) -> Set[str]:
        alias_names = set()
        for (section_name, section) in config.crawl_sections():
            section_aliases = section.get("aliases", [])
            if alias in section_aliases:
                alias_names.add(section_name)
        return alias_names
    def _find_crawlers_to_run(
            self,
            config: Config,
@@ -196,7 +180,7 @@ class Pferd:
                log.report(f"  [bold bright_magenta]Deleted[/] {fmt_path(path)}")
            for path in sorted(crawler.report.not_deleted_files):
                something_changed = True
-                log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
+                log.report(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
            for warning in crawler.report.encountered_warnings:
                something_changed = True
--- a/PFERD/update.py
+++ b/PFERD/update.py
@@ -0,0 +1,53 @@
 from dataclasses import dataclass
 import ssl
 from typing import Optional
 import aiohttp
 import certifi
 from .version import NAME, VERSION
 from .logging import log
@dataclass
 class PferdUpdate:
    release_url: str
    version: str
 def _build_session() -> aiohttp.ClientSession:
    return aiohttp.ClientSession(
        headers={"User-Agent": f"{NAME}/{VERSION}"},
        connector=aiohttp.TCPConnector(ssl=ssl.create_default_context(cafile=certifi.where())),
        timeout=aiohttp.ClientTimeout(
            total=15 * 60,
            connect=10,
            sock_connect=10,
            sock_read=10,
        )
    )
 async def check_for_updates() -> None:
    if new_version := await get_newer_version():
        log.warn(
            f"{NAME} version out of date. "
            + f"You are running version {VERSION!r} but {new_version.version!r} was found on GitHub."
        )
        log.warn_contd(f"You can download it on GitHub: {new_version.release_url}")
    else:
        log.explain("No update found")
 async def get_newer_version() -> Optional[PferdUpdate]:
    async with _build_session() as session:
        async with session.get(
            "https://api.github.com/repos/Garmelon/Pferd/releases/latest",
            headers={"Accept": "application/vnd.github+json"}
        ) as response:
            release_information = await response.json()
            tag_name: str = release_information["tag_name"]
            tag_name = tag_name.removeprefix("v")
            if VERSION == tag_name:
                return None
            return PferdUpdate(release_url=release_information["html_url"], version=tag_name)
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.4.3"
+VERSION = "3.4.1"
--- a/README.md
+++ b/README.md
@@ -30,10 +30,7 @@ The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
 Unofficial packages are available for:
 - [AUR](https://aur.archlinux.org/packages/pferd)
 - [brew](https://formulae.brew.sh/formula/pferd)
 - [conda-forge](https://github.com/conda-forge/pferd-feedstock)
 - [nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/misc/pferd/default.nix)
 - [PyPi](https://pypi.org/project/pferd)
 See also PFERD's [repology page](https://repology.org/project/pferd/versions).
`@@ -1,2 +1,2 @@`
	`NAME = "PFERD"`	`NAME = "PFERD"`
	`VERSION = "3.4.3"`	`VERSION = "3.4.1"`