Allow crawling the ILIAS Personal Desktop

2026-02-18 23:02:23 +01:00 · 2020-05-10 12:16:42 +02:00
parent 9950144e97
commit 9850ab1d73
2 changed files with 79 additions and 15 deletions
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -38,7 +38,6 @@ class IliasCrawler:
    def __init__(
            self,
            base_url: str,
-            course_id: str,
            session: requests.Session,
            authenticator: IliasAuthenticator,
            dir_filter: IliasDirectoryFilter
@@ -48,7 +47,6 @@ class IliasCrawler:
        """

        self._base_url = base_url
-        self._course_id = course_id
        self._session = session
        self._authenticator = authenticator
        self.dir_filter = dir_filter
@@ -71,17 +69,23 @@ class IliasCrawler:

        return urlunsplit((scheme, netloc, path, new_query_string, fragment))

-    def crawl(self) -> List[IliasDownloadInfo]:
-        """
-        Starts the crawl process, yielding a list of elements to (potentially) download.
+    def crawl_course(self, course_id: str) -> List[IliasDownloadInfo]:
        """
+        Starts the crawl process for a course, yielding a list of elements to (potentially)
+        download.

+        Arguments:
+            course_id {str} -- the course id
+
+        Raises:
+            FatalException: if an unrecoverable error occurs or the course id is not valid
+        """
        # Start crawling at the given course
        root_url = self._url_set_query_param(
-            self._base_url + "/goto.php", "target", f"crs_{self._course_id}"
+            self._base_url + "/goto.php", "target", f"crs_{course_id}"
        )

-        if not self._is_course_id_valid(root_url):
+        if not self._is_course_id_valid(root_url, course_id):
            raise FatalException(
                "Invalid course id? The URL the server returned did not contain my id."
            )
@@ -89,9 +93,18 @@ class IliasCrawler:
        # And treat it as a folder
        return self._crawl_folder(Path(""), root_url)

-    def _is_course_id_valid(self, root_url: str) -> bool:
+    def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
        response: requests.Response = self._session.get(root_url)
-        return self._course_id in response.url
+        return course_id in response.url
+
+    def crawl_personal_desktop(self) -> List[IliasDownloadInfo]:
+        """
+        Crawls the ILIAS personal desktop (and every subelements that can be reached from there).
+
+        Raises:
+            FatalException: if an unrecoverable error occurs
+        """
+        return self._crawl_folder(Path(""), self._base_url + "?baseClass=ilPersonalDesktopGUI")

    def _switch_on_crawled_type(
            self,
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -4,14 +4,14 @@ Convenience functions for using PFERD.

 import logging
 from pathlib import Path
-from typing import List, Optional, Union
+from typing import Callable, List, Optional, Union

 from .cookie_jar import CookieJar
 from .diva import (DivaDownloader, DivaDownloadStrategy, DivaPlaylistCrawler,
                   diva_download_new)
 from .errors import FatalException, swallow_and_print_errors
 from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
-                    IliasDownloader, IliasDownloadStrategy,
+                    IliasDownloader, IliasDownloadInfo, IliasDownloadStrategy,
                    KitShibbolethAuthenticator, download_modified_or_new)
 from .location import Location
 from .logging import PrettyLogger
@@ -56,7 +56,7 @@ class Pferd(Location):
            self,
            target: PathLike,
            base_url: str,
-            course_id: str,
+            crawl_function: Callable[[IliasCrawler], List[IliasDownloadInfo]],
            authenticator: IliasAuthenticator,
            cookies: Optional[PathLike],
            dir_filter: IliasDirectoryFilter,
@@ -70,11 +70,11 @@ class Pferd(Location):
        tmp_dir = self._tmp_dir.new_subdir()
        organizer = Organizer(self.resolve(to_path(target)))

-        crawler = IliasCrawler(base_url, course_id, session, authenticator, dir_filter)
+        crawler = IliasCrawler(base_url, session, authenticator, dir_filter)
        downloader = IliasDownloader(tmp_dir, organizer, session, authenticator, download_strategy)

        cookie_jar.load_cookies()
-        info = crawler.crawl()
+        info = crawl_function(crawler)
        cookie_jar.save_cookies()

        transformed = apply_transform(transform, info)
@@ -134,7 +134,58 @@ class Pferd(Location):
        return self._ilias(
            target=target,
            base_url="https://ilias.studium.kit.edu/",
-            course_id=course_id,
+            crawl_function=lambda crawler: crawler.crawl_course(course_id),
+            authenticator=authenticator,
+            cookies=cookies,
+            dir_filter=dir_filter,
+            transform=transform,
+            download_strategy=download_strategy,
+            clean=clean,
+        )
+
+    @swallow_and_print_errors
+    def ilias_kit_personal_desktop(
+            self,
+            target: PathLike,
+            dir_filter: IliasDirectoryFilter = lambda x: True,
+            transform: Transform = lambda x: x,
+            cookies: Optional[PathLike] = None,
+            username: Optional[str] = None,
+            password: Optional[str] = None,
+            download_strategy: IliasDownloadStrategy = download_modified_or_new,
+            clean: bool = True,
+    ) -> Organizer:
+        """
+        Synchronizes a folder with the ILIAS instance of the KIT. This method will crawl the ILIAS
+        "personal desktop" instead of a single course.
+
+        Arguments:
+            target {Path} -- the target path to write the data to
+
+        Keyword Arguments:
+            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
+                crawler level, these directories and all of their content is skipped.
+                (default: {lambdax:True})
+            transform {Transform} -- A transformation function for the output paths. Return None
+                to ignore a file. (default: {lambdax:x})
+            cookies {Optional[Path]} -- The path to store and load cookies from.
+                (default: {None})
+            username {Optional[str]} -- The SCC username. If none is given, it will prompt
+                the user. (default: {None})
+            password {Optional[str]} -- The SCC password. If none is given, it will prompt
+                the user. (default: {None})
+            download_strategy {DownloadStrategy} -- A function to determine which files need to
+                be downloaded. Can save bandwidth and reduce the number of requests.
+                (default: {download_modified_or_new})
+            clean {bool} -- Whether to clean up when the method finishes.
+        """
+        # This authenticator only works with the KIT ilias instance.
+        authenticator = KitShibbolethAuthenticator(username=username, password=password)
+        PRETTY.starting_synchronizer(target, "ILIAS", "Personal Desktop")
+        return self._ilias(
+            target=target,
+            base_url="https://ilias.studium.kit.edu/",
+            crawl_function=lambda crawler: crawler.crawl_personal_desktop(),
            authenticator=authenticator,
            cookies=cookies,
            dir_filter=dir_filter,