From 9850ab1d73f3de722c1d5af0b3736333c31b5b72 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 10 May 2020 12:16:42 +0200
Subject: [PATCH] Allow crawling the ILIAS Personal Desktop

---
 PFERD/ilias/crawler.py | 31 +++++++++++++++------
 PFERD/pferd.py         | 63 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 79 insertions(+), 15 deletions(-)

diff --git a/PFERD/ilias/crawler.py b/PFERD/ilias/crawler.py
index 0f7d4f6..8f48973 100644
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -38,7 +38,6 @@ class IliasCrawler:
     def __init__(
             self,
             base_url: str,
-            course_id: str,
             session: requests.Session,
             authenticator: IliasAuthenticator,
             dir_filter: IliasDirectoryFilter
@@ -48,7 +47,6 @@ class IliasCrawler:
         """
 
         self._base_url = base_url
-        self._course_id = course_id
         self._session = session
         self._authenticator = authenticator
         self.dir_filter = dir_filter
@@ -71,17 +69,23 @@ class IliasCrawler:
 
         return urlunsplit((scheme, netloc, path, new_query_string, fragment))
 
-    def crawl(self) -> List[IliasDownloadInfo]:
-        """
-        Starts the crawl process, yielding a list of elements to (potentially) download.
+    def crawl_course(self, course_id: str) -> List[IliasDownloadInfo]:
         """
+        Starts the crawl process for a course, yielding a list of elements to (potentially)
+        download.
 
+        Arguments:
+            course_id {str} -- the course id
+
+        Raises:
+            FatalException: if an unrecoverable error occurs or the course id is not valid
+        """
         # Start crawling at the given course
         root_url = self._url_set_query_param(
-            self._base_url + "/goto.php", "target", f"crs_{self._course_id}"
+            self._base_url + "/goto.php", "target", f"crs_{course_id}"
         )
 
-        if not self._is_course_id_valid(root_url):
+        if not self._is_course_id_valid(root_url, course_id):
             raise FatalException(
                 "Invalid course id? The URL the server returned did not contain my id."
             )
@@ -89,9 +93,18 @@ class IliasCrawler:
         # And treat it as a folder
         return self._crawl_folder(Path(""), root_url)
 
-    def _is_course_id_valid(self, root_url: str) -> bool:
+    def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
         response: requests.Response = self._session.get(root_url)
-        return self._course_id in response.url
+        return course_id in response.url
+
+    def crawl_personal_desktop(self) -> List[IliasDownloadInfo]:
+        """
+        Crawls the ILIAS personal desktop (and every subelements that can be reached from there).
+
+        Raises:
+            FatalException: if an unrecoverable error occurs
+        """
+        return self._crawl_folder(Path(""), self._base_url + "?baseClass=ilPersonalDesktopGUI")
 
     def _switch_on_crawled_type(
             self,
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index b2e1cb4..79d6e4f 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -4,14 +4,14 @@ Convenience functions for using PFERD.
 
 import logging
 from pathlib import Path
-from typing import List, Optional, Union
+from typing import Callable, List, Optional, Union
 
 from .cookie_jar import CookieJar
 from .diva import (DivaDownloader, DivaDownloadStrategy, DivaPlaylistCrawler,
                    diva_download_new)
 from .errors import FatalException, swallow_and_print_errors
 from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
-                    IliasDownloader, IliasDownloadStrategy,
+                    IliasDownloader, IliasDownloadInfo, IliasDownloadStrategy,
                     KitShibbolethAuthenticator, download_modified_or_new)
 from .location import Location
 from .logging import PrettyLogger
@@ -56,7 +56,7 @@ class Pferd(Location):
             self,
             target: PathLike,
             base_url: str,
-            course_id: str,
+            crawl_function: Callable[[IliasCrawler], List[IliasDownloadInfo]],
             authenticator: IliasAuthenticator,
             cookies: Optional[PathLike],
             dir_filter: IliasDirectoryFilter,
@@ -70,11 +70,11 @@ class Pferd(Location):
         tmp_dir = self._tmp_dir.new_subdir()
         organizer = Organizer(self.resolve(to_path(target)))
 
-        crawler = IliasCrawler(base_url, course_id, session, authenticator, dir_filter)
+        crawler = IliasCrawler(base_url, session, authenticator, dir_filter)
         downloader = IliasDownloader(tmp_dir, organizer, session, authenticator, download_strategy)
 
         cookie_jar.load_cookies()
-        info = crawler.crawl()
+        info = crawl_function(crawler)
         cookie_jar.save_cookies()
 
         transformed = apply_transform(transform, info)
@@ -134,7 +134,58 @@ class Pferd(Location):
         return self._ilias(
             target=target,
             base_url="https://ilias.studium.kit.edu/",
-            course_id=course_id,
+            crawl_function=lambda crawler: crawler.crawl_course(course_id),
+            authenticator=authenticator,
+            cookies=cookies,
+            dir_filter=dir_filter,
+            transform=transform,
+            download_strategy=download_strategy,
+            clean=clean,
+        )
+
+    @swallow_and_print_errors
+    def ilias_kit_personal_desktop(
+            self,
+            target: PathLike,
+            dir_filter: IliasDirectoryFilter = lambda x: True,
+            transform: Transform = lambda x: x,
+            cookies: Optional[PathLike] = None,
+            username: Optional[str] = None,
+            password: Optional[str] = None,
+            download_strategy: IliasDownloadStrategy = download_modified_or_new,
+            clean: bool = True,
+    ) -> Organizer:
+        """
+        Synchronizes a folder with the ILIAS instance of the KIT. This method will crawl the ILIAS
+        "personal desktop" instead of a single course.
+
+        Arguments:
+            target {Path} -- the target path to write the data to
+
+        Keyword Arguments:
+            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
+                crawler level, these directories and all of their content is skipped.
+                (default: {lambdax:True})
+            transform {Transform} -- A transformation function for the output paths. Return None
+                to ignore a file. (default: {lambdax:x})
+            cookies {Optional[Path]} -- The path to store and load cookies from.
+                (default: {None})
+            username {Optional[str]} -- The SCC username. If none is given, it will prompt
+                the user. (default: {None})
+            password {Optional[str]} -- The SCC password. If none is given, it will prompt
+                the user. (default: {None})
+            download_strategy {DownloadStrategy} -- A function to determine which files need to
+                be downloaded. Can save bandwidth and reduce the number of requests.
+                (default: {download_modified_or_new})
+            clean {bool} -- Whether to clean up when the method finishes.
+        """
+        # This authenticator only works with the KIT ilias instance.
+        authenticator = KitShibbolethAuthenticator(username=username, password=password)
+        PRETTY.starting_synchronizer(target, "ILIAS", "Personal Desktop")
+        return self._ilias(
+            target=target,
+            base_url="https://ilias.studium.kit.edu/",
+            crawl_function=lambda crawler: crawler.crawl_personal_desktop(),
             authenticator=authenticator,
             cookies=cookies,
             dir_filter=dir_filter,