Merge pull request #8 from pavelzw/master

Fix version number
2023-12-21 10:23:01 +01:00 · 2020-06-26 17:52:05 +02:00 · 2020-06-26 17:49:47 +02:00 · 2020-06-26 17:35:03 +02:00 · 2020-06-26 17:30:45 +02:00 · 2020-06-26 17:24:36 +02:00
13 changed files with 457 additions and 188 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,8 @@
 __pycache__/
 .venv/
+venv/
+.idea/
+build/
 .mypy_cache/
 .tmp/
 .env
--- a/PFERD/download_summary.py
+++ b/PFERD/download_summary.py
@ -0,0 +1,69 @@
+"""
+Provides a summary that keeps track of new modified or deleted files.
+"""
+from pathlib import Path
+from typing import List
+
+
+class DownloadSummary:
+    """
+    Keeps track of all new, modified or deleted files and provides a summary.
+    """
+
+    def __init__(self) -> None:
+        self._new_files: List[Path] = []
+        self._modified_files: List[Path] = []
+        self._deleted_files: List[Path] = []
+
+    @property
+    def new_files(self) -> List[Path]:
+        """
+        Returns all new files.
+        """
+        return self._new_files.copy()
+
+    @property
+    def modified_files(self) -> List[Path]:
+        """
+        Returns all modified files.
+        """
+        return self._modified_files.copy()
+
+    @property
+    def deleted_files(self) -> List[Path]:
+        """
+        Returns all deleted files.
+        """
+        return self._deleted_files.copy()
+
+    def merge(self, summary: 'DownloadSummary') -> None:
+        """
+        Merges ourselves with the passed summary. Modifies this object, but not the passed one.
+        """
+        self._new_files += summary.new_files
+        self._modified_files += summary.modified_files
+        self._deleted_files += summary.deleted_files
+
+    def add_deleted_file(self, path: Path) -> None:
+        """
+        Registers a file as deleted.
+        """
+        self._deleted_files.append(path)
+
+    def add_modified_file(self, path: Path) -> None:
+        """
+        Registers a file as changed.
+        """
+        self._modified_files.append(path)
+
+    def add_new_file(self, path: Path) -> None:
+        """
+        Registers a file as new.
+        """
+        self._new_files.append(path)
+
+    def has_updates(self) -> bool:
+        """
+        Returns whether this summary has any updates.
+        """
+        return bool(self._new_files or self._modified_files or self._deleted_files)
--- a/PFERD/ilias/init.py
+++ b/PFERD/ilias/init.py
@ -3,7 +3,8 @@ Synchronizing files from ILIAS instances (https://www.ilias.de/).
 """

 from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
-from .crawler import IliasCrawler, IliasDirectoryFilter, IliasDirectoryType
+from .crawler import (IliasCrawler, IliasCrawlerEntry, IliasDirectoryFilter,
+                      IliasElementType)
 from .downloader import (IliasDownloader, IliasDownloadInfo,
                         IliasDownloadStrategy, download_everything,
                         download_modified_or_new)
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@ -8,7 +8,7 @@ import logging
 import re
 from enum import Enum
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
                          urlunsplit)

@ -26,16 +26,52 @@ LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)


-class IliasDirectoryType(Enum):
+class IliasElementType(Enum):
    """
-    The type of an ilias directory.
+    The type of an ilias element.
    """
-    FOLDER = "FOLDER"
-    VIDEO = "VIDEO"
-    EXERCISE = "EXERCISE"
+    REGULAR_FOLDER = "REGULAR_FOLDER"
+    VIDEO_FOLDER = "VIDEO_FOLDER"
+    EXERCISE_FOLDER = "EXERCISE_FOLDER"
+    REGULAR_FILE = "REGULAR_FILE"
+    VIDEO_FILE = "VIDEO_FILE"
+    FORUM = "FORUM"
+    EXTERNAL_LINK = "EXTERNAL_LINK"


-IliasDirectoryFilter = Callable[[Path, IliasDirectoryType], bool]
+IliasDirectoryFilter = Callable[[Path, IliasElementType], bool]
+
+
+class IliasCrawlerEntry:
+    # pylint: disable=too-few-public-methods
+    """
+    An ILIAS crawler entry used internally to find, catalogue and recursively crawl elements.
+    """
+
+    def __init__(
+            self,
+            path: Path,
+            url: Union[str, Callable[[], Optional[str]]],
+            entry_type: IliasElementType,
+            modification_date: Optional[datetime.datetime]
+    ):
+        self.path = path
+        if isinstance(url, str):
+            str_url = url
+            self.url: Callable[[], Optional[str]] = lambda: str_url
+        else:
+            self.url = url
+        self.entry_type = entry_type
+        self.modification_date = modification_date
+
+    def to_download_info(self) -> Optional[IliasDownloadInfo]:
+        """
+        Converts this crawler entry to an IliasDownloadInfo, if possible.
+        This method will only succeed for *File* types.
+        """
+        if self.entry_type in [IliasElementType.REGULAR_FILE, IliasElementType.VIDEO_FILE]:
+            return IliasDownloadInfo(self.path, self.url, self.modification_date)
+        return None


 class IliasCrawler:
@ -62,12 +98,6 @@ class IliasCrawler:
        self._authenticator = authenticator
        self.dir_filter = dir_filter

-    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
-        """
-        Create an absolute url from an <a> tag.
-        """
-        return urljoin(self._base_url, link_tag.get("href"))
-
    @staticmethod
    def _url_set_query_param(url: str, param: str, value: str) -> str:
        """
@ -102,7 +132,8 @@ class IliasCrawler:
            )

        # And treat it as a folder
-        return self._crawl_folder(Path(""), root_url)
+        entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), root_url)
+        return self._iterate_entries_to_download_infos(entries)

    def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
        response: requests.Response = self._session.get(root_url)
@ -115,14 +146,92 @@ class IliasCrawler:
        Raises:
            FatalException: if an unrecoverable error occurs
        """
-        return self._crawl_folder(Path(""), self._base_url + "?baseClass=ilPersonalDesktopGUI")
+        entries: List[IliasCrawlerEntry] = self._crawl_folder(
+            Path(""), self._base_url + "?baseClass=ilPersonalDesktopGUI"
+        )
+        return self._iterate_entries_to_download_infos(entries)

-    def _switch_on_crawled_type(
+    def _iterate_entries_to_download_infos(
            self,
+            entries: List[IliasCrawlerEntry]
+    ) -> List[IliasDownloadInfo]:
+        result: List[IliasDownloadInfo] = []
+        entries_to_process: List[IliasCrawlerEntry] = entries.copy()
+        while len(entries_to_process) > 0:
+            entry = entries_to_process.pop()
+
+            if entry.entry_type == IliasElementType.EXTERNAL_LINK:
+                PRETTY.not_searching(entry.path, "external link")
+                continue
+            if entry.entry_type == IliasElementType.FORUM:
+                PRETTY.not_searching(entry.path, "forum")
+                continue
+
+            if not self.dir_filter(entry.path, entry.entry_type):
+                PRETTY.not_searching(entry.path, "user filter")
+                continue
+
+            download_info = entry.to_download_info()
+            if download_info is not None:
+                result.append(download_info)
+                continue
+
+            url = entry.url()
+
+            if url is None:
+                PRETTY.warning(f"Could not find url for {str(entry.path)!r}, skipping it")
+                continue
+
+            PRETTY.searching(entry.path)
+
+            if entry.entry_type == IliasElementType.EXERCISE_FOLDER:
+                entries_to_process += self._crawl_exercises(entry.path, url)
+                continue
+            if entry.entry_type == IliasElementType.REGULAR_FOLDER:
+                entries_to_process += self._crawl_folder(entry.path, url)
+                continue
+            if entry.entry_type == IliasElementType.VIDEO_FOLDER:
+                entries_to_process += self._crawl_video_directory(entry.path, url)
+                continue
+
+        return result
+
+    def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]:
+        """
+        Crawl all files in a folder-like element.
+        """
+        soup = self._get_page(url, {})
+
+        result: List[IliasCrawlerEntry] = []
+
+        # Fetch all links and throw them to the general interpreter
+        links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
+        for link in links:
+            abs_url = self._abs_url_from_link(link)
+            element_path = Path(folder_path, link.getText().strip())
+            element_type = self._find_type_from_link(element_path, link, abs_url)
+
+            if element_type == IliasElementType.REGULAR_FILE:
+                result += self._crawl_file(folder_path, link, abs_url)
+            elif element_type is not None:
+                result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
+            else:
+                PRETTY.warning(f"Found element without a type at {str(element_path)!r}")
+
+        return result
+
+    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
+        """
+        Create an absolute url from an <a> tag.
+        """
+        return urljoin(self._base_url, link_tag.get("href"))
+
+    @staticmethod
+    def _find_type_from_link(
            path: Path,
            link_element: bs4.Tag,
            url: str
-    ) -> List[IliasDownloadInfo]:
+    ) -> Optional[IliasElementType]:
        """
        Decides which sub crawler to use for a given top level element.
        """
@ -131,28 +240,64 @@ class IliasCrawler:

        # file URLs contain "target=file"
        if "target=file_" in parsed_url.query:
-            LOGGER.debug("Interpreted as file.")
-            return self._crawl_file(path, link_element, url)
+            return IliasElementType.REGULAR_FILE

        # Skip forums
        if "cmd=showThreads" in parsed_url.query:
-            LOGGER.debug("Skipping forum %r", url)
-            return []
+            return IliasElementType.FORUM

        # Everything with a ref_id can *probably* be opened to reveal nested things
        # video groups, directories, exercises, etc
        if "ref_id=" in parsed_url.query:
-            LOGGER.debug("Processing folder-like...")
-            return self._switch_on_folder_like(path, link_element, url)
+            return IliasCrawler._find_type_from_folder_like(link_element, url)

        PRETTY.warning(
-            "Got unkwarning element type in switch. I am not sure what horror I found on the"
+            "Got unknown element type in switch. I am not sure what horror I found on the"
            f" ILIAS page. The element was at {str(path)!r} and it is {link_element!r})"
        )
-        return []
+        return None

    @staticmethod
-    def _crawl_file(path: Path, link_element: bs4.Tag, url: str) -> List[IliasDownloadInfo]:
+    def _find_type_from_folder_like(link_element: bs4.Tag, url: str) -> Optional[IliasElementType]:
+        """
+        Try crawling something that looks like a folder.
+        """
+        # pylint: disable=too-many-return-statements
+
+        # We look for the outer div of our inner link, to find information around it
+        # (mostly the icon)
+        for parent in link_element.parents:
+            if "ilContainerListItemOuter" in parent["class"]:
+                found_parent = parent
+                break
+
+        if found_parent is None:
+            PRETTY.warning(f"Could not find element icon for {url!r}")
+            return None
+
+        # Find the small descriptive icon to figure out the type
+        img_tag: Optional[bs4.Tag] = found_parent.select_one("img.ilListItemIcon")
+
+        if img_tag is None:
+            PRETTY.warning(f"Could not find image tag for {url!r}")
+            return None
+
+        if "opencast" in str(img_tag["alt"]).lower():
+            return IliasElementType.VIDEO_FOLDER
+
+        if str(img_tag["src"]).endswith("icon_exc.svg"):
+            return IliasElementType.EXERCISE_FOLDER
+
+        if str(img_tag["src"]).endswith("icon_webr.svg"):
+            return IliasElementType.EXTERNAL_LINK
+
+        if str(img_tag["src"]).endswith("frm.svg"):
+            return IliasElementType.FORUM
+
+        return IliasElementType.REGULAR_FOLDER
+
+    @staticmethod
+    def _crawl_file(path: Path, link_element: bs4.Tag, url: str) -> List[IliasCrawlerEntry]:
        """
        Crawls a file.
        """
@ -183,80 +328,11 @@ class IliasCrawler:
        name = link_element.getText()
        full_path = Path(path, name + "." + file_type)

-        return [IliasDownloadInfo(full_path, url, modification_date)]
+        return [
+            IliasCrawlerEntry(full_path, url, IliasElementType.REGULAR_FILE, modification_date)
+        ]

-    def _switch_on_folder_like(
-            self,
-            parent_path: Path,
-            link_element: bs4.Tag,
-            url: str
-    ) -> List[IliasDownloadInfo]:
-        """
-        Try crawling something that looks like a folder.
-        """
-        # pylint: disable=too-many-return-statements
-
-        element_path = Path(parent_path, link_element.getText().strip())
-
-        found_parent: Optional[bs4.Tag] = None
-
-        # We look for the outer div of our inner link, to find information around it
-        # (mostly the icon)
-        for parent in link_element.parents:
-            if "ilContainerListItemOuter" in parent["class"]:
-                found_parent = parent
-                break
-
-        if found_parent is None:
-            PRETTY.warning(f"Could not find element icon for {url!r}")
-            return []
-
-        # Find the small descriptive icon to figure out the type
-        img_tag: Optional[bs4.Tag] = found_parent.select_one("img.ilListItemIcon")
-
-        if img_tag is None:
-            PRETTY.warning(f"Could not find image tag for {url!r}")
-            return []
-
-        directory_type = IliasDirectoryType.FOLDER
-
-        if "opencast" in str(img_tag["alt"]).lower():
-            directory_type = IliasDirectoryType.VIDEO
-
-        if str(img_tag["src"]).endswith("icon_exc.svg"):
-            directory_type = IliasDirectoryType.EXERCISE
-
-        if not self.dir_filter(element_path, directory_type):
-            PRETTY.not_searching(element_path, "user filter")
-            return []
-
-        PRETTY.searching(element_path)
-
-        # A forum
-        if str(img_tag["src"]).endswith("frm.svg"):
-            LOGGER.debug("Skipping forum at %r", url)
-            PRETTY.not_searching(element_path, "forum")
-            return []
-
-        # An exercise
-        if directory_type == IliasDirectoryType.EXERCISE:
-            LOGGER.debug("Crawling exercises at %r", url)
-            return self._crawl_exercises(element_path, url)
-
-        if str(img_tag["src"]).endswith("icon_webr.svg"):
-            LOGGER.debug("Skipping external link at %r", url)
-            PRETTY.not_searching(element_path, "external link")
-            return []
-
-        # Match the opencast video plugin
-        if directory_type == IliasDirectoryType.VIDEO:
-            LOGGER.debug("Found video site: %r", url)
-            return self._crawl_video_directory(element_path, url)
-
-        # Assume it is a folder
-        return self._crawl_folder(element_path, self._abs_url_from_link(link_element))
-
-    def _crawl_video_directory(self, video_dir_path: Path, url: str) -> List[IliasDownloadInfo]:
+    def _crawl_video_directory(self, video_dir_path: Path, url: str) -> List[IliasCrawlerEntry]:
        """
        Crawl the video overview site.
        """
@ -272,6 +348,71 @@ class IliasCrawler:
            {"limit": 800, "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
        )

+        # If we find a page selected, we probably need to respect pagination
+        if self._is_paginated_video_page(video_list_soup):
+            second_stage_url = self._abs_url_from_link(content_link)
+
+            return self._crawl_paginated_video_directory(
+                video_dir_path, video_list_soup, second_stage_url
+            )
+
+        return self._crawl_video_directory_second_stage(video_dir_path, video_list_soup)
+
+    @staticmethod
+    def _is_paginated_video_page(soup: bs4.BeautifulSoup) -> bool:
+        return soup.find(id=re.compile(r"tab_page_sel.+")) is not None
+
+    def _crawl_paginated_video_directory(
+            self,
+            video_dir_path: Path,
+            paged_video_list_soup: bs4.BeautifulSoup,
+            second_stage_url: str
+    ) -> List[IliasCrawlerEntry]:
+        LOGGER.info("Found paginated video page, trying 800 elements")
+
+        # Try to find the table id. This can be used to build the query parameter indicating
+        # you want 800 elements
+
+        table_element: bs4.Tag = paged_video_list_soup.find(
+            name="table", id=re.compile(r"tbl_xoct_.+")
+        )
+        if table_element is None:
+            PRETTY.warning(
+                "Could not increase elements per page (table not found)."
+                " Some might not be crawled!"
+            )
+            return self._crawl_video_directory_second_stage(video_dir_path, paged_video_list_soup)
+
+        match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
+        if match is None:
+            PRETTY.warning(
+                "Could not increase elements per page (table id not found)."
+                " Some might not be crawled!"
+            )
+            return self._crawl_video_directory_second_stage(video_dir_path, paged_video_list_soup)
+        table_id = match.group(1)
+
+        extended_video_page = self._get_page(
+            second_stage_url,
+            {f"tbl_xoct_{table_id}_trows": 800, "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
+        )
+
+        if self._is_paginated_video_page(extended_video_page):
+            PRETTY.warning(
+                "800 elements do not seem to be enough (or I failed to fetch that many)."
+                " I will miss elements."
+            )
+
+        return self._crawl_video_directory_second_stage(video_dir_path, extended_video_page)
+
+    def _crawl_video_directory_second_stage(
+            self,
+            video_dir_path: Path,
+            video_list_soup: bs4.BeautifulSoup
+    ) -> List[IliasCrawlerEntry]:
+        """
+        Crawls the "second stage" video page. This page contains the actual video urls.
+        """
        direct_download_links: List[bs4.Tag] = video_list_soup.findAll(
            name="a", text=re.compile(r"\s*Download\s*")
        )
@ -281,10 +422,11 @@ class IliasCrawler:
            name="a", text=re.compile(r"\s*Abspielen\s*")
        )

-        results: List[IliasDownloadInfo] = []
+        results: List[IliasCrawlerEntry] = []

        # We can download everything directly!
-        if len(direct_download_links) == len(video_links):
+        # FIXME: Sadly the download button is currently broken, so never do that
+        if False and len(direct_download_links) == len(video_links):
            for link in direct_download_links:
                results += self._crawl_single_video(video_dir_path, link, True)
        else:
@ -298,7 +440,7 @@ class IliasCrawler:
            parent_path: Path,
            link: bs4.Tag,
            direct_download: bool
-    ) -> List[IliasDownloadInfo]:
+    ) -> List[IliasCrawlerEntry]:
        """
        Crawl a single video based on its "Abspielen" link from the video listing.
        """
@ -316,46 +458,54 @@ class IliasCrawler:

        video_path: Path = Path(parent_path, title)

+        video_url = self._abs_url_from_link(link)
+
        # The video had a direct download button we can use instead
        if direct_download:
            LOGGER.debug("Using direct download for video %r", str(video_path))
-            return [IliasDownloadInfo(
+            return [IliasCrawlerEntry(
+                video_path, video_url, IliasElementType.VIDEO_FILE, modification_time
+            )]
+
+        return [IliasCrawlerEntry(
            video_path,
-                self._abs_url_from_link(link),
+            self._crawl_video_url_from_play_link(video_url),
+            IliasElementType.VIDEO_FILE,
            modification_time
        )]

+    def _crawl_video_url_from_play_link(self, play_url: str) -> Callable[[], Optional[str]]:
+        def inner() -> Optional[str]:
            # Fetch the actual video page. This is a small wrapper page initializing a javscript
            # player. Sadly we can not execute that JS. The actual video stream url is nowhere
            # on the page, but defined in a JS object inside a script tag, passed to the player
            # library.
            # We do the impossible and RegEx the stream JSON object out of the page's HTML source
-        video_page_url = self._abs_url_from_link(link)
-        video_page_soup = self._get_page(video_page_url, {})
+            video_page_soup = soupify(self._session.get(play_url))
            regex: re.Pattern = re.compile(
                r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
            )
            json_match = regex.search(str(video_page_soup))

            if json_match is None:
-            PRETTY.warning(f"Could not find json stream info for {video_page_url!r}")
-            return []
+                PRETTY.warning(f"Could not find json stream info for {play_url!r}")
+                return None
            json_str = json_match.group(1)

            # parse it
            json_object = json.loads(json_str)
            # and fetch the video url!
            video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
+            return video_url
+        return inner

-        return [IliasDownloadInfo(video_path, video_url, modification_time)]
-
-    def _crawl_exercises(self, element_path: Path, url: str) -> List[IliasDownloadInfo]:
+    def _crawl_exercises(self, element_path: Path, url: str) -> List[IliasCrawlerEntry]:
        """
        Crawl files offered for download in exercises.
        """
        soup = self._get_page(url, {})

-        results: List[IliasDownloadInfo] = []
+        results: List[IliasCrawlerEntry] = []

        # Each assignment is in an accordion container
        assignment_containers: List[bs4.Tag] = soup.select(".il_VAccordionInnerContainer")
@ -382,30 +532,15 @@ class IliasCrawler:

                LOGGER.debug("Found file %r at %r", file_name, url)

-                results.append(IliasDownloadInfo(
+                results.append(IliasCrawlerEntry(
                    Path(element_path, container_name, file_name),
                    url,
+                    IliasElementType.REGULAR_FILE,
                    None  # We do not have any timestamp
                ))

        return results

-    def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasDownloadInfo]:
-        """
-        Crawl all files in a folder-like element.
-        """
-        soup = self._get_page(url, {})
-
-        result: List[IliasDownloadInfo] = []
-
-        # Fetch all links and throw them to the general interpreter
-        links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
-        for link in links:
-            abs_url = self._abs_url_from_link(link)
-            result += self._switch_on_crawled_type(folder_path, link, abs_url)
-
-        return result
-
    def _get_page(self, url: str, params: Dict[str, Any]) -> bs4.BeautifulSoup:
        """
        Fetches a page from ILIAS, authenticating when needed.
@ -416,8 +551,10 @@ class IliasCrawler:
        content_type = response.headers["content-type"]

        if not content_type.startswith("text/html"):
-            # TODO: Correct exception type
-            raise Exception(f"Invalid content type {content_type}")
+            raise FatalException(
+                f"Invalid content type {content_type} when crawling ilias page"
+                " {url!r} with {params!r}"
+            )

        soup = soupify(response)

--- a/PFERD/ilias/downloader.py
+++ b/PFERD/ilias/downloader.py
@ -2,9 +2,8 @@

 import datetime
 import logging
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Callable, List, Optional
+from pathlib import Path, PurePath
+from typing import Callable, List, Optional, Union

 import bs4
 import requests
@ -24,15 +23,24 @@ class ContentTypeException(Exception):
    """Thrown when the content type of the ilias element can not be handled."""


-@dataclass
 class IliasDownloadInfo(Transformable):
    """
    This class describes a single file to be downloaded.
    """

-    url: str
-    modification_date: Optional[datetime.datetime]
-    # parameters: Dict[str, Any] = field(default_factory=dict)
+    def __init__(
+            self,
+            path: PurePath,
+            url: Union[str, Callable[[], Optional[str]]],
+            modifcation_date: Optional[datetime.datetime]
+    ):
+        super().__init__(path)
+        if isinstance(url, str):
+            string_url = url
+            self.url: Callable[[], Optional[str]] = lambda: string_url
+        else:
+            self.url = url
+        self.modification_date = modifcation_date


 IliasDownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
@ -108,16 +116,21 @@ class IliasDownloader:
        tmp_file = self._tmp_dir.new_path()

        while not self._try_download(info, tmp_file):
+            LOGGER.info("Retrying download: %r", info)
            self._authenticator.authenticate(self._session)

        self._organizer.accept_file(tmp_file, info.path)

    def _try_download(self, info: IliasDownloadInfo, target: Path) -> bool:
-        with self._session.get(info.url, stream=True) as response:
+        url = info.url()
+        if url is None:
+            PRETTY.warning(f"Could not download {str(info.path)!r} as I got no URL :/")
+            return True
+
+        with self._session.get(url, stream=True) as response:
            content_type = response.headers["content-type"]

            if content_type.startswith("text/html"):
-                # Dangit, we're probably not logged in.
                if self._is_logged_in(soupify(response)):
                    raise ContentTypeException("Attempting to download a web page, not a file")

--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@ -3,14 +3,18 @@ Contains a few logger utility functions and implementations.
 """

 import logging
-from typing import Optional
+from pathlib import Path
+from typing import List, Optional

+from rich import print as rich_print
 from rich._log_render import LogRender
 from rich.console import Console
+from rich.panel import Panel
 from rich.style import Style
 from rich.text import Text
 from rich.theme import Theme

+from .download_summary import DownloadSummary
 from .utils import PathLike, to_path

 STYLE = "{"
@ -111,6 +115,15 @@ class PrettyLogger:
            f"[bold green]Created {self._format_path(path)}.[/bold green]"
        )

+    def deleted_file(self, path: PathLike) -> None:
+        """
+        A file has been deleted.
+        """
+
+        self.logger.info(
+            f"[bold red]Deleted {self._format_path(path)}.[/bold red]"
+        )
+
    def ignored_file(self, path: PathLike, reason: str) -> None:
        """
        File was not downloaded or modified.
@ -138,6 +151,23 @@ class PrettyLogger:
            f"([/dim]{reason}[dim]).[/dim]"
        )

+    def summary(self, download_summary: DownloadSummary) -> None:
+        """
+        Prints a download summary.
+        """
+        self.logger.info("")
+        self.logger.info("[bold cyan]Download Summary[/bold cyan]")
+        if not download_summary.has_updates():
+            self.logger.info("[bold dim]Nothing changed![/bold dim]")
+            return
+
+        for new_file in download_summary.new_files:
+            self.new_file(new_file)
+        for modified_file in download_summary.modified_files:
+            self.modified_file(modified_file)
+        for deleted_files in download_summary.deleted_files:
+            self.deleted_file(deleted_files)
+
    def starting_synchronizer(
            self,
            target_directory: PathLike,
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@ -9,6 +9,7 @@ import shutil
 from pathlib import Path, PurePath
 from typing import List, Set

+from .download_summary import DownloadSummary
 from .location import Location
 from .logging import PrettyLogger
 from .utils import prompt_yes_no
@ -32,6 +33,8 @@ class Organizer(Location):
        # Keep the root dir
        self._known_files.add(path.resolve())

+        self.download_summary = DownloadSummary()
+
    def accept_file(self, src: Path, dst: PurePath) -> None:
        """Move a file to this organizer and mark it."""
        src_absolute = src.resolve()
@ -65,10 +68,14 @@ class Organizer(Location):
                # Bail out, nothing more to do
                PRETTY.ignored_file(dst_absolute, "same file contents")
                self.mark(dst)
+                # Touch it to update the timestamp
+                dst_absolute.touch()
                return

+            self.download_summary.add_modified_file(dst_absolute)
            PRETTY.modified_file(dst_absolute)
        else:
+            self.download_summary.add_new_file(dst_absolute)
            PRETTY.new_file(dst_absolute)

        # Create parent dir if needed
@ -115,9 +122,9 @@ class Organizer(Location):
        if start_dir.resolve() not in self._known_files and dir_empty:
            start_dir.rmdir()

-    @staticmethod
-    def _delete_file_if_confirmed(path: Path) -> None:
+    def _delete_file_if_confirmed(self, path: Path) -> None:
        prompt = f"Do you want to delete {path}"

        if prompt_yes_no(prompt, False):
+            self.download_summary.add_deleted_file(path)
            path.unlink()
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@ -9,6 +9,7 @@ from typing import Callable, List, Optional, Union
 from .cookie_jar import CookieJar
 from .diva import (DivaDownloader, DivaDownloadStrategy, DivaPlaylistCrawler,
                   diva_download_new)
+from .download_summary import DownloadSummary
 from .errors import FatalException, swallow_and_print_errors
 from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
                    IliasDownloader, IliasDownloadInfo, IliasDownloadStrategy,
@ -42,10 +43,10 @@ class Pferd(Location):
    ):
        super().__init__(Path(base_dir))

+        self._download_summary = DownloadSummary()
        self._tmp_dir = TmpDir(self.resolve(tmp_dir))
        self._test_run = test_run

-
    @staticmethod
    def enable_logging() -> None:
        """
@ -54,7 +55,6 @@ class Pferd(Location):

        enable_logging()

-
    @staticmethod
    def _print_transformables(transformables: List[TF]) -> None:
        LOGGER.info("")
@ -141,7 +141,8 @@ class Pferd(Location):
        # This authenticator only works with the KIT ilias instance.
        authenticator = KitShibbolethAuthenticator(username=username, password=password)
        PRETTY.starting_synchronizer(target, "ILIAS", course_id)
-        return self._ilias(
+
+        organizer = self._ilias(
            target=target,
            base_url="https://ilias.studium.kit.edu/",
            crawl_function=lambda crawler: crawler.crawl_course(course_id),
@ -153,6 +154,16 @@ class Pferd(Location):
            clean=clean,
        )

+        self._download_summary.merge(organizer.download_summary)
+
+        return organizer
+
+    def print_summary(self) -> None:
+        """
+        Prints the accumulated download summary.
+        """
+        PRETTY.summary(self._download_summary)
+
    @swallow_and_print_errors
    def ilias_kit_personal_desktop(
            self,
--- a/PFERD/progress.py
+++ b/PFERD/progress.py
@ -7,8 +7,7 @@ from types import TracebackType
 from typing import Optional, Type

 import requests
-from rich.console import Console, ConsoleOptions, Control, RenderResult
-from rich.live_render import LiveRender
+from rich.console import Console
 from rich.progress import (BarColumn, DownloadColumn, Progress, TaskID,
                           TextColumn, TimeRemainingColumn,
                           TransferSpeedColumn)
@ -23,7 +22,8 @@ _progress: Progress = Progress(
    TransferSpeedColumn(),
    "•",
    TimeRemainingColumn(),
-    console=Console(file=sys.stdout)
+    console=Console(file=sys.stdout),
+    transient=True
 )


@ -61,18 +61,6 @@ def progress_for(settings: Optional[ProgressSettings]) -> 'ProgressContextManage
    return ProgressContextManager(settings)


-class _OneLineUp(LiveRender):
-    """
-    Render a control code for moving one line upwards.
-    """
-
-    def __init__(self) -> None:
-        super().__init__("not rendered")
-
-    def __console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
-        yield Control(f"\r\x1b[1A")
-
-
 class ProgressContextManager:
    """
    A context manager used for displaying progress.
@ -113,9 +101,6 @@ class ProgressContextManager:
            _progress.stop()
            _progress.refresh()

-            # And we existed, so remove the line above (remove_task leaves one behind)
-            Console().print(_OneLineUp())
-
        return None

    def advance(self, amount: float) -> None:
--- a/README.md
+++ b/README.md
@ -9,11 +9,19 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install/have installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.0.0
+$ pip install git+https://github.com/Garmelon/PFERD@v2.1.2
 ```

 The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.

+### Upgrading from 2.0.0 to 2.1.0+
+
+The `IliasDirectoryType` type was renamed to `IliasElementType` and is now far
+more detailed.  
+The new values are: REGULAR_FOLDER, VIDEO_FOLDER,
+EXERCISE_FOLDER, REGULAR_FILE, VIDEO_FILE, FORUM, EXTERNAL_LINK.  
+Forums and external links are skipped automatically if you use the `kit_ilias` helper.
+
 ## Example setup

 In this example, `python3` refers to at least Python 3.8.
@ -29,8 +37,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.0.0
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.0.0/example_config.py
+$ pip install git+https://github.com/Garmelon/PFERD@v2.1.2
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.1.2/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
--- a/example_config.py
+++ b/example_config.py
@ -2,7 +2,7 @@ import argparse
 from pathlib import Path, PurePath

 from PFERD import Pferd
-from PFERD.ilias import IliasDirectoryType
+from PFERD.ilias import IliasElementType
 from PFERD.transform import (attempt, do, glob, keep, move, move_dir,
                             optionally, re_move, re_rename)

@ -49,7 +49,7 @@ tf_ss_2020_pg = attempt(
 )


-def df_ss_2020_or1(path: PurePath, _type: IliasDirectoryType) -> bool:
+def df_ss_2020_or1(path: PurePath, _type: IliasElementType) -> bool:
    if glob("Tutorien/")(path):
        return True
    if glob("Tutorien/Tutorium 10, dienstags 15:45 Uhr/")(path):
@ -124,6 +124,8 @@ def main() -> None:
            cookies="ilias_cookies.txt",
        )

+    # Prints a summary listing all new, modified or deleted files
+    pferd.print_summary()

 if __name__ == "__main__":
    main()
--- a/example_config_personal_desktop.py
+++ b/example_config_personal_desktop.py
@ -30,6 +30,9 @@ def main() -> None:
        cookies="ilias_cookies.txt",
    )

+    # Prints a summary listing all new, modified or deleted files
+    pferd.print_summary()
+

 if __name__ == "__main__":
    main()
--- a/setup.py
+++ b/setup.py
@ -2,12 +2,12 @@ from setuptools import find_packages, setup

 setup(
    name="PFERD",
-    version="2.0.0",
+    version="2.1.2",
    packages=find_packages(),
    install_requires=[
        "requests>=2.21.0",
        "beautifulsoup4>=4.7.1",
-        "rich>=1.0.0"
+        "rich>=2.1.0"
    ],
 )
Author	SHA1	Message	Date
I-Al-Istannen	ecaedea709	Merge pull request #8 from pavelzw/master Fix version number	2020-06-26 17:52:05 +02:00
Pavel Zwerschke	f05d1b1261	Fix version number	2020-06-26 17:49:47 +02:00
I-Al-Istannen	6aaa3071f9	Update README with new version	2020-06-26 17:35:03 +02:00
I-Al-Istannen	c26c9352f1	Make DownloadSummary private, provide property accessors	2020-06-26 17:30:45 +02:00
I-Al-Istannen	d9ea688145	Use pretty logger for summaries	2020-06-26 17:24:36 +02:00
I-Al-Istannen	e8be6e498e	Add summary to example_config_personal_desktop	2020-06-26 17:24:36 +02:00
I-Al-Istannen	e4b1fac045	Satisfy pylint	2020-06-26 15:38:22 +02:00
Joscha	402ae81335	Fix type hints	2020-06-26 13:17:44 +00:00
Daniel Augustin	52f31e2783	Add type hints to DownloadSummary	2020-06-26 13:02:37 +02:00
Daniel Augustin	739522a151	Move download summary into a separate class	2020-06-25 23:07:11 +02:00
Daniel Augustin	6c034209b6	Add deleted files to summary	2020-06-25 22:00:28 +02:00
Daniel Augustin	f6fbd5e4bb	Add download summary	2020-06-25 19:19:34 +02:00
I-Al-Istannen	7024db1f13	Use transient progessbar This will ensure no pesky newline ends up in the output, even on windows.	2020-06-25 18:03:12 +02:00
I-Al-Istannen	23bfa42a0d	Never use the direct download button, as it is currently broken	2020-06-11 13:31:01 +02:00
I-Al-Istannen	fdb57884ed	Touch files with same content to update timestamps	2020-05-31 20:27:15 +02:00
I-Al-Istannen	f614b95a00	Adjust version in setup.py	2020-05-30 19:07:02 +02:00
I-Al-Istannen	8198c9ecaa	Reorder methods a bit	2020-05-30 19:06:36 +02:00
I-Al-Istannen	086b15d10f	Crawl a bit more iteratively	2020-05-30 15:47:15 +02:00
I-Al-Istannen	9d6ce331a5	Use IliasCrawlerEntry entries in the ilias scraper	2020-05-30 15:20:51 +02:00
I-Al-Istannen	821c7ade26	Move video url extraction logic to crawler	2020-05-30 00:22:31 +02:00
I-Al-Istannen	b969a1854a	Remove unneeded whitespace	2020-05-30 00:22:31 +02:00
I-Al-Istannen	62535b4452	Unpack videos in ILIAS downloader	2020-05-21 22:12:52 +02:00
I-Al-Istannen	c0056e5669	Correctly crawl video pages with multiple pages	2020-05-21 21:38:07 +02:00