Merge 77c1f1516c into e41a22149e

Add default show-not-deleted option
If set to `no`, PFERD won't print status or report messages for not deleted files
2025-12-16 12:02:27 +01:00 · 2023-08-26 17:16:00 +02:00 · 2023-08-26 17:13:45 +02:00 · 2023-08-02 13:34:54 +02:00 · 2023-07-29 18:36:33 +02:00 · 2023-07-29 18:36:33 +02:00
13 changed files with 388 additions and 32 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,14 @@ ambiguous situations.
 - Crawling of courses with the timeline view as the default tab
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
 - Abort crawling when encountering an unexpected ilias root page redirect
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
 - support for ILIAS learning modules
 - `show_not_deleted` option to stop printing the "Not Deleted" status or report
  message. This combines nicely with the `no-delete-prompt-override` strategy,
  causing PFERD to mostly ignore local-only files.
 ## 3.4.3 - 2022-11-29
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -26,6 +26,9 @@ default values for the other sections.
  `Added ...`) while running a crawler. (Default: `yes`)
 - `report`: Whether PFERD should print a report of added, changed and deleted
   local files for all crawlers before exiting. (Default: `yes`)
 - `show_not_deleted`: Whether PFERD should print messages in status and report
   when a local-only file wasn't deleted. Combines nicely with the
   `no-delete-prompt-override` conflict resolution strategy.
 - `share_cookies`: Whether crawlers should share cookies where applicable. For
  example, some crawlers share cookies if they crawl the same website using the
  same account. (Default: `yes`)
@@ -75,6 +78,9 @@ common to all crawlers:
      using `prompt` and always choosing "yes".
    - `no-delete`: Never delete local files, but overwrite local files if the
      remote file is different.
    - `no-delete-prompt-overwrite`: Never delete local files, but prompt to
      overwrite local files if the remote file is different. Combines nicely
      with the `show_not_deleted` option.
 - `transform`: Rules for renaming and excluding certain files and directories.
  For more details, see [this section](#transformation-rules). (Default: empty)
 - `tasks`: The maximum number of concurrent tasks (such as crawling or
@@ -86,6 +92,9 @@ common to all crawlers:
  load for the crawl target. (Default: `0.0`)
 - `windows_paths`: Whether PFERD should find alternative names for paths that
  are invalid on Windows. (Default: `yes` on Windows, `no` otherwise)
 - `aliases`: List of strings that are considered as an alias when invoking with
  the `--crawler` or `-C` flag. If there is more than one crawl section with
  the same aliases all are selected. Thereby, you can group different crawlers.
 Some crawlers may also require credentials for authentication. To configure how
 the crawler obtains its credentials, the `auth` option is used. It is set to the
@@ -100,6 +109,7 @@ username = foo
 password = bar
 [crawl:something]
 aliases = [sth, some]
 type = some-complex-crawler
 auth = auth:example
 on_conflict = no-delete
--- a/3
+++ b/3
@@ -1,5 +1,6 @@
 Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
-                    TheChristophe, Scriptim, thelukasprobst, Toorero
+                    TheChristophe, Scriptim, thelukasprobst, Toorero,
                    Mr-Pine
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
--- a/PFERD/main.py
+++ b/PFERD/main.py
@@ -47,6 +47,8 @@ def configure_logging_from_args(args: argparse.Namespace) -> None:
        log.output_explain = args.explain
    if args.status is not None:
        log.output_status = args.status
    if args.show_not_deleted is not None:
        log.output_not_deleted = args.show_not_deleted
    if args.report is not None:
        log.output_report = args.report
@@ -72,6 +74,8 @@ def configure_logging_from_config(args: argparse.Namespace, config: Config) -> N
            log.output_status = config.default_section.status()
        if args.report is None:
            log.output_report = config.default_section.report()
        if args.show_not_deleted is None:
            log.output_not_deleted = config.default_section.show_not_deleted()
    except ConfigOptionError as e:
        log.error(str(e))
        sys.exit(1)
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -215,6 +215,11 @@ PARSER.add_argument(
    action=BooleanOptionalAction,
    help="whether crawlers should share cookies where applicable"
 )
 PARSER.add_argument(
    "--show-not-deleted",
    action=BooleanOptionalAction,
    help="print messages in status and report when PFERD did not delete a local only file"
 )
 def load_default_section(
@@ -233,6 +238,7 @@ def load_default_section(
        section["report"] = "yes" if args.report else "no"
    if args.share_cookies is not None:
        section["share_cookies"] = "yes" if args.share_cookies else "no"
-
+    if args.show_not_deleted is not None:
        section["show_not_deleted"] = "yes" if args.show_not_deleted else "no"
 SUBPARSERS = PARSER.add_subparsers(title="crawlers")
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -82,6 +82,9 @@ class DefaultSection(Section):
    def report(self) -> bool:
        return self.s.getboolean("report", fallback=True)
    def show_not_deleted(self) -> bool:
        return self.s.getboolean("show_not_deleted", fallback=True)
    def share_cookies(self) -> bool:
        return self.s.getboolean("share_cookies", fallback=True)
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -1,6 +1,10 @@
 from enum import Enum
 from typing import Optional
 import bs4
 from PFERD.utils import soupify
 _link_template_plain = "{{link}}"
 _link_template_fancy = """
 <!DOCTYPE html>
@@ -94,6 +98,71 @@ _link_template_internet_shortcut = """
 URL={{link}}
 """.strip()
 _learning_module_template = """
 <!DOCTYPE html>
 <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>{{name}}</title>
    </head>
    <style>
    * {
        box-sizing: border-box;
    }
    .center-flex {
        display: flex;
        align-items: center;
        justify-content: center;
    }
    .nav {
        display: flex;
        justify-content: space-between;
    }
    </style>
    <body class="center-flex">
 {{body}}
    </body>
 </html>
 """
 def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next: Optional[str]) -> str:
    # Seems to be comments, ignore those.
    for elem in body.select(".il-copg-mob-fullscreen-modal"):
        elem.decompose()
    nav_template = """
        <div class="nav">
            {{left}}
            {{right}}
        </div>
    """
    if prev and body.select_one(".ilc_page_lnav_LeftNavigation"):
        text = body.select_one(".ilc_page_lnav_LeftNavigation").getText().strip()
        left = f'<a href="{prev}">{text}</a>'
    else:
        left = "<span></span>"
    if next and body.select_one(".ilc_page_rnav_RightNavigation"):
        text = body.select_one(".ilc_page_rnav_RightNavigation").getText().strip()
        right = f'<a href="{next}">{text}</a>'
    else:
        right = "<span></span>"
    if top_nav := body.select_one(".ilc_page_tnav_TopNavigation"):
        top_nav.replace_with(
            soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
        )
    if bot_nav := body.select_one(".ilc_page_bnav_BottomNavigation"):
        bot_nav.replace_with(soupify(nav_template.replace(
            "{{left}}", left).replace("{{right}}", right).encode())
        )
    body = body.prettify()
    return _learning_module_template.replace("{{body}}", body).replace("{{name}}", name)
 class Links(Enum):
    IGNORE = "ignore"
@@ -102,24 +171,24 @@ class Links(Enum):
    INTERNET_SHORTCUT = "internet-shortcut"
    def template(self) -> Optional[str]:
-        if self == self.FANCY:
+        if self == Links.FANCY:
            return _link_template_fancy
-        elif self == self.PLAINTEXT:
+        elif self == Links.PLAINTEXT:
            return _link_template_plain
-        elif self == self.INTERNET_SHORTCUT:
+        elif self == Links.INTERNET_SHORTCUT:
            return _link_template_internet_shortcut
-        elif self == self.IGNORE:
+        elif self == Links.IGNORE:
            return None
        raise ValueError("Missing switch case")
    def extension(self) -> Optional[str]:
-        if self == self.FANCY:
+        if self == Links.FANCY:
            return ".html"
-        elif self == self.PLAINTEXT:
+        elif self == Links.PLAINTEXT:
            return ".txt"
-        elif self == self.INTERNET_SHORTCUT:
+        elif self == Links.INTERNET_SHORTCUT:
            return ".url"
-        elif self == self.IGNORE:
+        elif self == Links.IGNORE:
            return None
        raise ValueError("Missing switch case")
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -82,7 +82,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
            dummy.decompose()
        if len(children) > 1:
            continue
-        if type(children[0]) == Comment:
+        if isinstance(type(children[0]), Comment):
            dummy.decompose()
    for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -22,6 +22,7 @@ class IliasElementType(Enum):
    FOLDER = "folder"
    FORUM = "forum"
    LINK = "link"
    LEARNING_MODULE = "learning_module"
    BOOKING = "booking"
    MEETING = "meeting"
    SURVEY = "survey"
@@ -71,6 +72,14 @@ class IliasForumThread:
    mtime: Optional[datetime]
@dataclass
 class IliasLearningModulePage:
    title: str
    content: Tag
    next_url: Optional[str]
    previous_url: Optional[str]
 class IliasPage:
    def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
@@ -79,6 +88,16 @@ class IliasPage:
        self._page_type = source_element.type if source_element else None
        self._source_name = source_element.name if source_element else ""
    @staticmethod
    def is_root_page(soup: BeautifulSoup) -> bool:
        permalink = soup.find(id="current_perma_link")
        if permalink is None:
            return False
        value = permalink.attrs.get("value")
        if value is None:
            return False
        return "goto.php?target=root_" in value
    def get_child_elements(self) -> List[IliasPageElement]:
        """
        Return all child page elements you can find here.
@@ -126,6 +145,34 @@ class IliasPage:
        return BeautifulSoup(raw_html, "html.parser")
    def get_learning_module_data(self) -> Optional[IliasLearningModulePage]:
        if not self._is_learning_module_page():
            return None
        content = self._soup.select_one("#ilLMPageContent")
        title = self._soup.select_one(".ilc_page_title_PageTitle").getText().strip()
        return IliasLearningModulePage(
            title=title,
            content=content,
            next_url=self._find_learning_module_next(),
            previous_url=self._find_learning_module_prev()
        )
    def _find_learning_module_next(self) -> Optional[str]:
        for link in self._soup.select("a.ilc_page_rnavlink_RightNavigationLink"):
            url = self._abs_url_from_link(link)
            if "baseClass=ilLMPresentationGUI" not in url:
                continue
            return url
        return None
    def _find_learning_module_prev(self) -> Optional[str]:
        for link in self._soup.select("a.ilc_page_lnavlink_LeftNavigationLink"):
            url = self._abs_url_from_link(link)
            if "baseClass=ilLMPresentationGUI" not in url:
                continue
            return url
        return None
    def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
        form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
        if not form:
@@ -212,6 +259,12 @@ class IliasPage:
            return False
        return "target=copa_" in link.get("value")
    def _is_learning_module_page(self) -> bool:
        link = self._soup.find(id="current_perma_link")
        if not link:
            return False
        return "target=pg_" in link.get("value")
    def _contains_collapsed_future_meetings(self) -> bool:
        return self._uncollapse_future_meetings_url() is not None
@@ -802,6 +855,9 @@ class IliasPage:
        if "cmdClass=ilobjtestgui" in parsed_url.query:
            return IliasElementType.TEST
        if "baseClass=ilLMPresentationGUI" in parsed_url.query:
            return IliasElementType.LEARNING_MODULE
        # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
        # try to guess it from the image.
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,8 +1,11 @@
 import asyncio
 import base64
 import os
 import re
 from collections.abc import Awaitable, Coroutine
 from pathlib import PurePath
-from typing import Any, Callable, Dict, List, Optional, Set, Union, cast
+from typing import Any, Callable, Dict, List, Literal, Optional, Set, Union, cast
 from urllib.parse import urljoin
 import aiohttp
 import yarl
@@ -16,10 +19,10 @@ from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
-from .file_templates import Links
+from .file_templates import Links, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasPage, IliasPageElement,
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             _sanitize_path_name, parse_ilias_forum_export)
+                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
 TargetType = Union[str, int]
@@ -239,7 +242,7 @@ instance's greatest bottleneck.
                # Duplicated code, but the root page is special - we want to avoid fetching it twice!
                while next_stage_url:
-                    soup = await self._get_page(next_stage_url)
+                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
                    if current_parent is None and expected_id is not None:
                        perma_link_element: Tag = soup.find(id="current_perma_link")
@@ -394,6 +397,8 @@ instance's greatest bottleneck.
                "[bright_black](surveys contain no relevant data)"
            )
            return None
        elif element.type == IliasElementType.LEARNING_MODULE:
            return await self._handle_learning_module(element, element_path)
        elif element.type == IliasElementType.LINK:
            return await self._handle_link(element, element_path)
        elif element.type == IliasElementType.BOOKING:
@@ -739,12 +744,141 @@ instance's greatest bottleneck.
            sink.file.write(content.encode("utf-8"))
            sink.done()
-    async def _get_page(self, url: str) -> BeautifulSoup:
+    async def _handle_learning_module(
        self,
        element: IliasPageElement,
        element_path: PurePath,
    ) -> Optional[Coroutine[Any, Any, None]]:
        maybe_cl = await self.crawl(element_path)
        if not maybe_cl:
            return None
        return self._crawl_learning_module(element, maybe_cl)
    @_iorepeat(3, "crawling learning module")
    @anoncritical
    async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
        elements: List[IliasLearningModulePage] = []
        async with cl:
            log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
            log.explain(f"URL: {element.url}")
            soup = await self._get_page(element.url)
            page = IliasPage(soup, element.url, None)
            if next := page.get_learning_module_data():
                elements.extend(await self._crawl_learning_module_direction(
                    cl.path, next.previous_url, "left"
                ))
                elements.append(next)
                elements.extend(await self._crawl_learning_module_direction(
                    cl.path, next.next_url, "right"
                ))
        # Reflect their natural ordering in the file names
        for index, lm_element in enumerate(elements):
            lm_element.title = f"{index:02}_{lm_element.title}"
        tasks: List[Awaitable[None]] = []
        for index, elem in enumerate(elements):
            prev_url = elements[index - 1].title if index > 0 else None
            next_url = elements[index + 1].title if index < len(elements) - 1 else None
            tasks.append(asyncio.create_task(
                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
            ))
        # And execute them
        await self.gather(tasks)
    async def _crawl_learning_module_direction(
        self,
        path: PurePath,
        start_url: Optional[str],
        dir: Union[Literal["left"], Literal["right"]]
    ) -> List[IliasLearningModulePage]:
        elements: List[IliasLearningModulePage] = []
        if not start_url:
            return elements
        next_element_url: Optional[str] = start_url
        counter = 0
        while next_element_url:
            log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
            log.explain(f"URL: {next_element_url}")
            soup = await self._get_page(next_element_url)
            page = IliasPage(soup, next_element_url, None)
            if next := page.get_learning_module_data():
                elements.append(next)
                if dir == "left":
                    next_element_url = next.previous_url
                else:
                    next_element_url = next.next_url
            counter += 1
        return elements
    @anoncritical
    @_iorepeat(3, "saving learning module page")
    async def _download_learning_module_page(
        self,
        parent_path: PurePath,
        element: IliasLearningModulePage,
        prev: Optional[str],
        next: Optional[str]
    ) -> None:
        path = parent_path / (_sanitize_path_name(element.title) + ".html")
        maybe_dl = await self.download(path)
        if not maybe_dl:
            return
        my_path = self._transformer.transform(maybe_dl.path)
        if not my_path:
            return
        if prev:
            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
            if prev_p:
                prev = os.path.relpath(prev_p, my_path.parent)
            else:
                prev = None
        if next:
            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
            if next_p:
                next = os.path.relpath(next_p, my_path.parent)
            else:
                next = None
        async with maybe_dl as (bar, sink):
            content = element.content
            content = await self.internalize_images(content)
            sink.file.write(learning_module_template(content, maybe_dl.path.name, prev, next).encode("utf-8"))
            sink.done()
    async def internalize_images(self, tag: Tag) -> Tag:
        """
        Tries to fetch ILIAS images and embed them as base64 data.
        """
        log.explain_topic("Internalizing images")
        for elem in tag.find_all(recursive=True):
            if not isinstance(elem, Tag):
                continue
            if elem.name == "img":
                if src := elem.attrs.get("src", None):
                    url = urljoin(_ILIAS_URL, src)
                    if not url.startswith(_ILIAS_URL):
                        continue
                    log.explain(f"Internalizing {url!r}")
                    img = await self._get_authenticated(url)
                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
                # For unknown reasons the protocol seems to be stripped.
                elem.attrs["src"] = "https:" + elem.attrs["src"]
        return tag
    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
        auth_id = await self._current_auth_id()
        async with self.session.get(url) as request:
            soup = soupify(await request.read())
            if self._is_logged_in(soup):
-                return soup
+                return self._verify_page(soup, url, root_page_allowed)
        # We weren't authenticated, so try to do that
        await self.authenticate(auth_id)
@@ -753,14 +887,26 @@ instance's greatest bottleneck.
        async with self.session.get(url) as request:
            soup = soupify(await request.read())
            if self._is_logged_in(soup):
-                return soup
+                return self._verify_page(soup, url, root_page_allowed)
        raise CrawlError("get_page failed even after authenticating")
    def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
        if IliasPage.is_root_page(soup) and not root_page_allowed:
            raise CrawlError(
                "Unexpectedly encountered ILIAS root page. "
                "This usually happens because the ILIAS instance is broken. "
                "If so, wait a day or two and try again. "
                "It could also happen because a crawled element links to the ILIAS root page. "
                "If so, use a transform with a ! as target to ignore the particular element. "
                f"The redirect came from {url}"
            )
        return soup
    async def _post_authenticated(
        self,
        url: str,
        data: dict[str, Union[str, List[str]]]
-    ) -> BeautifulSoup:
+    ) -> bytes:
        auth_id = await self._current_auth_id()
        form_data = aiohttp.FormData()
@@ -780,6 +926,22 @@ instance's greatest bottleneck.
                return await request.read()
        raise CrawlError("post_authenticated failed even after authenticating")
    async def _get_authenticated(self, url: str) -> bytes:
        auth_id = await self._current_auth_id()
        async with self.session.get(url, allow_redirects=False) as request:
            if request.status == 200:
                return await request.read()
        # We weren't authenticated, so try to do that
        await self.authenticate(auth_id)
        # Retry once after authenticating. If this fails, we will die.
        async with self.session.get(url, allow_redirects=False) as request:
            if request.status == 200:
                return await request.read()
        raise CrawlError("get_authenticated failed even after authenticating")
    # We repeat this as the login method in shibboleth doesn't handle I/O errors.
    # Shibboleth is quite reliable as well, the repeat is likely not critical here.
    @ _iorepeat(3, "Login", failure_is_error=True)
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -59,6 +59,7 @@ class Log:
        # Whether different parts of the output are enabled or disabled
        self.output_explain = False
        self.output_status = True
        self.output_not_deleted = True
        self.output_report = True
    def _update_live(self) -> None:
@@ -207,6 +208,17 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
            action = escape(f"{action:<{self.STATUS_WIDTH}}")
            self.print(f"{style}{action}[/] {escape(text)} {suffix}")
    def not_deleted(self, style: str, action: str, text: str, suffix: str = "") -> None:
        """
        Print a message for a local only file that wasn't
        deleted while crawling. Allows markup in the "style"
        argument which will be applied to the "action" string.
        """
        if self.output_status and self.output_not_deleted:
            action = escape(f"{action:<{self.STATUS_WIDTH}}")
            self.print(f"{style}{action}[/] {escape(text)} {suffix}")
    def report(self, text: str) -> None:
        """
        Print a report after crawling. Allows markup.
@@ -215,6 +227,14 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
        if self.output_report:
            self.print(text)
    def report_not_deleted(self, text: str) -> None:
        """
        Print a report for a local only file that wasn't deleted after crawling. Allows markup.
        """
        if self.output_report and self.output_not_deleted:
            self.print(text)
    @contextmanager
    def _bar(
            self,
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -44,6 +44,7 @@ class OnConflict(Enum):
    LOCAL_FIRST = "local-first"
    REMOTE_FIRST = "remote-first"
    NO_DELETE = "no-delete"
    NO_DELETE_PROMPT_OVERWRITE = "no-delete-prompt-overwrite"
    @staticmethod
    def from_string(string: str) -> "OnConflict":
@@ -51,7 +52,7 @@ class OnConflict(Enum):
            return OnConflict(string)
        except ValueError:
            raise ValueError("must be one of 'prompt', 'local-first',"
-                             " 'remote-first', 'no-delete'")
+                             " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'")
@dataclass
@@ -264,7 +265,7 @@ class OutputDirectory:
            on_conflict: OnConflict,
            path: PurePath,
    ) -> bool:
-        if on_conflict == OnConflict.PROMPT:
+        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
            async with log.exclusive_output():
                prompt = f"Replace {fmt_path(path)} with remote file?"
                return await prompt_yes_no(prompt, default=False)
@@ -283,7 +284,7 @@ class OutputDirectory:
            on_conflict: OnConflict,
            path: PurePath,
    ) -> bool:
-        if on_conflict == OnConflict.PROMPT:
+        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
            async with log.exclusive_output():
                prompt = f"Recursively delete {fmt_path(path)} and replace with remote file?"
                return await prompt_yes_no(prompt, default=False)
@@ -303,7 +304,7 @@ class OutputDirectory:
            path: PurePath,
            parent: PurePath,
    ) -> bool:
-        if on_conflict == OnConflict.PROMPT:
+        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
            async with log.exclusive_output():
                prompt = f"Delete {fmt_path(parent)} so remote file {fmt_path(path)} can be downloaded?"
                return await prompt_yes_no(prompt, default=False)
@@ -330,7 +331,7 @@ class OutputDirectory:
            return False
        elif on_conflict == OnConflict.REMOTE_FIRST:
            return True
-        elif on_conflict == OnConflict.NO_DELETE:
+        elif on_conflict in {OnConflict.NO_DELETE, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
            return False
        # This should never be reached
@@ -495,7 +496,7 @@ class OutputDirectory:
            except OSError:
                pass
        else:
-            log.status("[bold bright_magenta]", "Not deleted", fmt_path(pure))
+            log.not_deleted("[bold bright_magenta]", "Not deleted", fmt_path(pure))
            self._report.not_delete_file(pure)
    def load_prev_report(self) -> None:
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Set
 from rich.markup import escape
@@ -43,16 +43,24 @@ class Pferd:
        crawl_sections = [name for name, _ in config.crawl_sections()]
-        crawlers_to_run = []  # With crawl: prefix
+        crawlers_to_run = set()  # With crawl: prefix
        unknown_names = []  # Without crawl: prefix
        for name in cli_crawlers:
            section_name = f"crawl:{name}"
            if section_name in crawl_sections:
                log.explain(f"Crawler section named {section_name!r} exists")
-                crawlers_to_run.append(section_name)
+                crawlers_to_run.add(section_name)
-            else:
+            # interprete name as alias of a crawler
-                log.explain(f"There's no crawler section named {section_name!r}")
+            alias_names = self._find_crawlers_by_alias(name, config)
            if alias_names:
                crawlers_to_run.update(alias_names)
                log.explain_topic(f"Crawler alias {name!r} found corresponding crawler sections:")
                for alias_name in alias_names:
                    log.explain(f"Crawler section named {alias_name!r} with alias {name!r} exists")
            if not section_name in crawl_sections and not alias_names:
                log.explain(f"There's neither a crawler section named {section_name!r} nor does a crawler with alias {name!r} exist.")
                unknown_names.append(name)
        if unknown_names:
@@ -65,6 +73,14 @@ class Pferd:
        return crawlers_to_run
    def _find_crawlers_by_alias(self, alias: str, config: Config) -> Set[str]:
        alias_names = set()
        for (section_name, section) in config.crawl_sections():
            section_aliases = section.get("aliases", [])
            if alias in section_aliases:
                alias_names.add(section_name)
        return alias_names
    def _find_crawlers_to_run(
            self,
            config: Config,
@@ -180,7 +196,7 @@ class Pferd:
                log.report(f"  [bold bright_magenta]Deleted[/] {fmt_path(path)}")
            for path in sorted(crawler.report.not_deleted_files):
                something_changed = True
-                log.report(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
+                log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
            for warning in crawler.report.encountered_warnings:
                something_changed = True
Author	SHA1	Message	Date
Julius Rüberg	29251fa003	Merge `77c1f1516c` into `e41a22149e`	2023-08-26 17:16:00 +02:00
Mr. Pine	e41a22149e	Add default `show-not-deleted` option If set to `no`, PFERD won't print status or report messages for not deleted files	2023-08-26 17:13:45 +02:00
I-Al-Istannen	68c398f1fe	Add support for ILIAS learning modules	2023-08-02 13:34:54 +02:00
I-Al-Istannen	123a57beec	Fix mypy unreachable error in file_templates	2023-07-29 18:36:33 +02:00
I-Al-Istannen	d204dac8ce	Detect unexpected root page redirects and abort operation	2023-07-29 18:36:33 +02:00
Mr. Pine	443f7fe839	Add `no-delete-prompt-overwrite` crawler conflict resolution option (#75 )	2023-07-29 18:36:33 +02:00
Julius Rüberg	77c1f1516c	Used proper plural	2021-11-02 12:41:40 +01:00
Julius Rüberg	9e12e96d90	Added alias functionality	2021-11-02 03:42:08 +01:00