pferd/PFERD/pferd.py

"""
Convenience functions for using PFERD.
"""

import logging
from pathlib import Path
from typing import Callable, List, Optional, Union

from .cookie_jar import CookieJar
from .diva import (DivaDownloader, DivaDownloadStrategy, DivaPlaylistCrawler,
                   diva_download_new)
from .download_summary import DownloadSummary
from .errors import FatalException, swallow_and_print_errors
from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
                    IliasDownloader, IliasDownloadInfo, IliasDownloadStrategy,
                    KitShibbolethAuthenticator, download_modified_or_new)
from .location import Location
from .logging import PrettyLogger, enable_logging
from .organizer import Organizer
from .tmp_dir import TmpDir
from .transform import TF, Transform, apply_transform
from .utils import PathLike, to_path

# TODO save known-good cookies as soon as possible


LOGGER = logging.getLogger(__name__)
PRETTY = PrettyLogger(LOGGER)


class Pferd(Location):
    # pylint: disable=too-many-arguments
    """
    The main entrypoint in your Pferd usage: This class combines a number of
    useful shortcuts for running synchronizers in a single interface.
    """

    def __init__(
            self,
            base_dir: Path,
            tmp_dir: Path = Path(".tmp"),
            test_run: bool = False
    ):
        super().__init__(Path(base_dir))

        self._download_summary = DownloadSummary()
        self._tmp_dir = TmpDir(self.resolve(tmp_dir))
        self._test_run = test_run

    @staticmethod
    def enable_logging() -> None:
        """
        Enable and configure logging via the logging module.
        """

        enable_logging()

    @staticmethod
    def _print_transformables(transformables: List[TF]) -> None:
        LOGGER.info("")
        LOGGER.info("Results of the test run:")
        for transformable in transformables:
            LOGGER.info(transformable.path)

    def _ilias(
            self,
            target: PathLike,
            base_url: str,
            crawl_function: Callable[[IliasCrawler], List[IliasDownloadInfo]],
            authenticator: IliasAuthenticator,
            cookies: Optional[PathLike],
            dir_filter: IliasDirectoryFilter,
            transform: Transform,
            download_strategy: IliasDownloadStrategy,
            timeout: int,
            clean: bool = True,
    ) -> Organizer:
        # pylint: disable=too-many-locals
        cookie_jar = CookieJar(to_path(cookies) if cookies else None)
        session = cookie_jar.create_session()
        tmp_dir = self._tmp_dir.new_subdir()
        organizer = Organizer(self.resolve(to_path(target)))

        crawler = IliasCrawler(base_url, session, authenticator, dir_filter)
        downloader = IliasDownloader(tmp_dir, organizer, session,
                                     authenticator, download_strategy, timeout)

        cookie_jar.load_cookies()
        info = crawl_function(crawler)
        cookie_jar.save_cookies()

        transformed = apply_transform(transform, info)
        if self._test_run:
            self._print_transformables(transformed)
            return organizer

        downloader.download_all(transformed)
        cookie_jar.save_cookies()

        if clean:
            organizer.cleanup()

        return organizer

    @swallow_and_print_errors
    def ilias_kit(
            self,
            target: PathLike,
            course_id: str,
            dir_filter: IliasDirectoryFilter = lambda x, y: True,
            transform: Transform = lambda x: x,
            cookies: Optional[PathLike] = None,
            username: Optional[str] = None,
            password: Optional[str] = None,
            download_strategy: IliasDownloadStrategy = download_modified_or_new,
            clean: bool = True,
            timeout: int = 5,
    ) -> Organizer:
        """
        Synchronizes a folder with the ILIAS instance of the KIT.

        Arguments:
            target {Path} -- the target path to write the data to
            course_id {str} -- the id of the main course page (found in the URL after ref_id
                when opening the course homepage)

        Keyword Arguments:
            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
                crawler level, these directories and all of their content is skipped.
                (default: {lambdax:True})
            transform {Transform} -- A transformation function for the output paths. Return None
                to ignore a file. (default: {lambdax:x})
            cookies {Optional[Path]} -- The path to store and load cookies from.
                (default: {None})
            username {Optional[str]} -- The SCC username. If none is given, it will prompt
                the user. (default: {None})
            password {Optional[str]} -- The SCC password. If none is given, it will prompt
                the user. (default: {None})
            download_strategy {DownloadStrategy} -- A function to determine which files need to
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {download_modified_or_new})
            clean {bool} -- Whether to clean up when the method finishes.
            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                requests bug.
        """
        # This authenticator only works with the KIT ilias instance.
        authenticator = KitShibbolethAuthenticator(username=username, password=password)
        PRETTY.starting_synchronizer(target, "ILIAS", course_id)

        organizer = self._ilias(
            target=target,
            base_url="https://ilias.studium.kit.edu/",
            crawl_function=lambda crawler: crawler.crawl_course(course_id),
            authenticator=authenticator,
            cookies=cookies,
            dir_filter=dir_filter,
            transform=transform,
            download_strategy=download_strategy,
            clean=clean,
            timeout=timeout
        )

        self._download_summary.merge(organizer.download_summary)

        return organizer

    def print_summary(self) -> None:
        """
        Prints the accumulated download summary.
        """
        PRETTY.summary(self._download_summary)

    @swallow_and_print_errors
    def ilias_kit_personal_desktop(
            self,
            target: PathLike,
            dir_filter: IliasDirectoryFilter = lambda x, y: True,
            transform: Transform = lambda x: x,
            cookies: Optional[PathLike] = None,
            username: Optional[str] = None,
            password: Optional[str] = None,
            download_strategy: IliasDownloadStrategy = download_modified_or_new,
            clean: bool = True,
            timeout: int = 5,
    ) -> Organizer:
        """
        Synchronizes a folder with the ILIAS instance of the KIT. This method will crawl the ILIAS
        "personal desktop" instead of a single course.

        Arguments:
            target {Path} -- the target path to write the data to

        Keyword Arguments:
            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
                crawler level, these directories and all of their content is skipped.
                (default: {lambdax:True})
            transform {Transform} -- A transformation function for the output paths. Return None
                to ignore a file. (default: {lambdax:x})
            cookies {Optional[Path]} -- The path to store and load cookies from.
                (default: {None})
            username {Optional[str]} -- The SCC username. If none is given, it will prompt
                the user. (default: {None})
            password {Optional[str]} -- The SCC password. If none is given, it will prompt
                the user. (default: {None})
            download_strategy {DownloadStrategy} -- A function to determine which files need to
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {download_modified_or_new})
            clean {bool} -- Whether to clean up when the method finishes.
            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                requests bug.
        """
        # This authenticator only works with the KIT ilias instance.
        authenticator = KitShibbolethAuthenticator(username=username, password=password)
        PRETTY.starting_synchronizer(target, "ILIAS", "Personal Desktop")

        organizer = self._ilias(
            target=target,
            base_url="https://ilias.studium.kit.edu/",
            crawl_function=lambda crawler: crawler.crawl_personal_desktop(),
            authenticator=authenticator,
            cookies=cookies,
            dir_filter=dir_filter,
            transform=transform,
            download_strategy=download_strategy,
            clean=clean,
            timeout=timeout
        )

        self._download_summary.merge(organizer.download_summary)

        return organizer

    @swallow_and_print_errors
    def ilias_kit_folder(
            self,
            target: PathLike,
            full_url: str,
            dir_filter: IliasDirectoryFilter = lambda x, y: True,
            transform: Transform = lambda x: x,
            cookies: Optional[PathLike] = None,
            username: Optional[str] = None,
            password: Optional[str] = None,
            download_strategy: IliasDownloadStrategy = download_modified_or_new,
            clean: bool = True,
            timeout: int = 5,
    ) -> Organizer:
        """
        Synchronizes a folder with a given folder on the ILIAS instance of the KIT.

        Arguments:
            target {Path}  -- the target path to write the data to
            full_url {str} -- the full url of the folder/videos/course to crawl

        Keyword Arguments:
            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
                crawler level, these directories and all of their content is skipped.
                (default: {lambdax:True})
            transform {Transform} -- A transformation function for the output paths. Return None
                to ignore a file. (default: {lambdax:x})
            cookies {Optional[Path]} -- The path to store and load cookies from.
                (default: {None})
            username {Optional[str]} -- The SCC username. If none is given, it will prompt
                the user. (default: {None})
            password {Optional[str]} -- The SCC password. If none is given, it will prompt
                the user. (default: {None})
            download_strategy {DownloadStrategy} -- A function to determine which files need to
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {download_modified_or_new})
            clean {bool} -- Whether to clean up when the method finishes.
            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                requests bug.
        """
        # This authenticator only works with the KIT ilias instance.
        authenticator = KitShibbolethAuthenticator(username=username, password=password)
        PRETTY.starting_synchronizer(target, "ILIAS", "An ILIAS element by url")

        if not full_url.startswith("https://ilias.studium.kit.edu"):
            raise FatalException("Not a valid KIT ILIAS URL")

        organizer = self._ilias(
            target=target,
            base_url="https://ilias.studium.kit.edu/",
            crawl_function=lambda crawler: crawler.recursive_crawl_url(full_url),
            authenticator=authenticator,
            cookies=cookies,
            dir_filter=dir_filter,
            transform=transform,
            download_strategy=download_strategy,
            clean=clean,
            timeout=timeout
        )

        self._download_summary.merge(organizer.download_summary)

        return organizer

    @swallow_and_print_errors
    def diva_kit(
            self,
            target: Union[PathLike, Organizer],
            playlist_location: str,
            transform: Transform = lambda x: x,
            download_strategy: DivaDownloadStrategy = diva_download_new,
            clean: bool = True
    ) -> Organizer:
        """
        Synchronizes a folder with a DIVA playlist.

        Arguments:
            organizer {Organizer} -- The organizer to use.
            playlist_location {str} -- the playlist id or the playlist URL
              in the format 'https://mediaservice.bibliothek.kit.edu/#/details/DIVA-2019-271'

        Keyword Arguments:
            transform {Transform} -- A transformation function for the output paths. Return None
                to ignore a file. (default: {lambdax:x})
            download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {diva_download_new})
            clean {bool} -- Whether to clean up when the method finishes.
        """
        tmp_dir = self._tmp_dir.new_subdir()

        if playlist_location.startswith("http"):
            playlist_id = DivaPlaylistCrawler.fetch_id(playlist_link=playlist_location)
        else:
            playlist_id = playlist_location

        if target is None:
            PRETTY.starting_synchronizer("None", "DIVA", playlist_id)
            raise FatalException("Got 'None' as target directory, aborting")

        if isinstance(target, Organizer):
            organizer = target
        else:
            organizer = Organizer(self.resolve(to_path(target)))

        PRETTY.starting_synchronizer(organizer.path, "DIVA", playlist_id)

        crawler = DivaPlaylistCrawler(playlist_id)
        downloader = DivaDownloader(tmp_dir, organizer, download_strategy)

        info = crawler.crawl()

        transformed = apply_transform(transform, info)
        if self._test_run:
            self._print_transformables(transformed)
            return organizer

        downloader.download_all(transformed)

        if clean:
            organizer.cleanup()

        return organizer
Satisfy pyling. Useful docstrings? Not quite sure. 2020-04-23 20:31:32 +02:00			`"""`
			`Convenience functions for using PFERD.`
			`"""`

Log when starting to synchronize 2020-04-23 19:55:37 +02:00			`import logging`
Add Pferd class 2020-04-23 11:44:13 +02:00			`from pathlib import Path`
Allow crawling the ILIAS Personal Desktop 2020-05-10 12:16:42 +02:00			`from typing import Callable, List, Optional, Union`
Add Pferd class 2020-04-23 11:44:13 +02:00
			`from .cookie_jar import CookieJar`
Added a diva playlist downloader 2020-04-30 16:24:38 +02:00			`from .diva import (DivaDownloader, DivaDownloadStrategy, DivaPlaylistCrawler,`
			`diva_download_new)`
Move download summary into a separate class 2020-06-25 21:55:08 +02:00			`from .download_summary import DownloadSummary`
Move FatalException to errors.py 2020-05-09 00:00:21 +02:00			`from .errors import FatalException, swallow_and_print_errors`
Rename and implement IliasDirectoryFilter 2020-04-23 12:33:38 +02:00			`from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,`
Allow crawling the ILIAS Personal Desktop 2020-05-10 12:16:42 +02:00			`IliasDownloader, IliasDownloadInfo, IliasDownloadStrategy,`
Move download strategies to downloader Also fixes an issue where the downloader didn't mark files that were not downloaded due to the strategy used. 2020-04-24 16:26:20 +02:00			`KitShibbolethAuthenticator, download_modified_or_new)`
Extract Location into separate file 2020-04-23 19:38:28 +02:00			`from .location import Location`
Avoid logging import in config 2020-05-12 20:19:23 +02:00			`from .logging import PrettyLogger, enable_logging`
Add Pferd class 2020-04-23 11:44:13 +02:00			`from .organizer import Organizer`
			`from .tmp_dir import TmpDir`
Satisfy pylint and add todo 2020-04-24 20:26:28 +02:00			`from .transform import TF, Transform, apply_transform`
Move logging into its own file 2020-04-25 19:59:58 +02:00			`from .utils import PathLike, to_path`
Add Pferd class 2020-04-23 11:44:13 +02:00
Move around TODOs 2020-04-23 12:49:01 +02:00			`# TODO save known-good cookies as soon as possible`
Log when starting to synchronize 2020-04-23 19:55:37 +02:00

			`LOGGER = logging.getLogger(__name__)`
			`PRETTY = PrettyLogger(LOGGER)`
Move around TODOs 2020-04-23 12:49:01 +02:00

Add Pferd class 2020-04-23 11:44:13 +02:00			`class Pferd(Location):`
			`# pylint: disable=too-many-arguments`
Satisfy pyling. Useful docstrings? Not quite sure. 2020-04-23 20:31:32 +02:00			`"""`
			`The main entrypoint in your Pferd usage: This class combines a number of`
			`useful shortcuts for running synchronizers in a single interface.`
			`"""`
Add Pferd class 2020-04-23 11:44:13 +02:00
Add test run option to PFERD 2020-04-24 20:00:21 +02:00			`def __init__(`
			`self,`
			`base_dir: Path,`
			`tmp_dir: Path = Path(".tmp"),`
			`test_run: bool = False`
			`):`
Add Pferd class 2020-04-23 11:44:13 +02:00			`super().__init__(Path(base_dir))`

Move download summary into a separate class 2020-06-25 21:55:08 +02:00			`self._download_summary = DownloadSummary()`
Add Pferd class 2020-04-23 11:44:13 +02:00			`self._tmp_dir = TmpDir(self.resolve(tmp_dir))`
Add test run option to PFERD 2020-04-24 20:00:21 +02:00			`self._test_run = test_run`

Avoid logging import in config 2020-05-12 20:19:23 +02:00			`@staticmethod`
			`def enable_logging() -> None:`
			`"""`
			`Enable and configure logging via the logging module.`
			`"""`

			`enable_logging()`

Satisfy pylint and add todo 2020-04-24 20:26:28 +02:00			`@staticmethod`
			`def _print_transformables(transformables: List[TF]) -> None:`
Add test run option to PFERD 2020-04-24 20:00:21 +02:00			`LOGGER.info("")`
			`LOGGER.info("Results of the test run:")`
			`for transformable in transformables:`
			`LOGGER.info(transformable.path)`
Add Pferd class 2020-04-23 11:44:13 +02:00
			`def _ilias(`
			`self,`
Use PathLike everywhere 2020-04-24 20:39:30 +02:00			`target: PathLike,`
Add Pferd class 2020-04-23 11:44:13 +02:00			`base_url: str,`
Allow crawling the ILIAS Personal Desktop 2020-05-10 12:16:42 +02:00			`crawl_function: Callable[[IliasCrawler], List[IliasDownloadInfo]],`
Add Pferd class 2020-04-23 11:44:13 +02:00			`authenticator: IliasAuthenticator,`
Use PathLike everywhere 2020-04-24 20:39:30 +02:00			`cookies: Optional[PathLike],`
Rename and implement IliasDirectoryFilter 2020-04-23 12:33:38 +02:00			`dir_filter: IliasDirectoryFilter,`
Add Pferd class 2020-04-23 11:44:13 +02:00			`transform: Transform,`
Move download strategies to downloader Also fixes an issue where the downloader didn't mark files that were not downloaded due to the strategy used. 2020-04-24 16:26:20 +02:00			`download_strategy: IliasDownloadStrategy,`
Add timeout to video downloads to work around requests IPv6 bug 2020-08-11 14:40:13 +02:00			`timeout: int,`
			`clean: bool = True,`
Added a diva playlist downloader 2020-04-30 16:24:38 +02:00			`) -> Organizer:`
Satisfy pyling. Useful docstrings? Not quite sure. 2020-04-23 20:31:32 +02:00			`# pylint: disable=too-many-locals`
Use PathLike everywhere 2020-04-24 20:39:30 +02:00			`cookie_jar = CookieJar(to_path(cookies) if cookies else None)`
Add Pferd class 2020-04-23 11:44:13 +02:00			`session = cookie_jar.create_session()`
			`tmp_dir = self._tmp_dir.new_subdir()`
Use PathLike everywhere 2020-04-24 20:39:30 +02:00			`organizer = Organizer(self.resolve(to_path(target)))`
Add Pferd class 2020-04-23 11:44:13 +02:00
Allow crawling the ILIAS Personal Desktop 2020-05-10 12:16:42 +02:00			`crawler = IliasCrawler(base_url, session, authenticator, dir_filter)`
Add timeout to video downloads to work around requests IPv6 bug 2020-08-11 14:40:13 +02:00			`downloader = IliasDownloader(tmp_dir, organizer, session,`
			`authenticator, download_strategy, timeout)`
Add Pferd class 2020-04-23 11:44:13 +02:00
			`cookie_jar.load_cookies()`
Allow crawling the ILIAS Personal Desktop 2020-05-10 12:16:42 +02:00			`info = crawl_function(crawler)`
Add Pferd class 2020-04-23 11:44:13 +02:00			`cookie_jar.save_cookies()`
Add test run option to PFERD 2020-04-24 20:00:21 +02:00
			`transformed = apply_transform(transform, info)`
			`if self._test_run:`
			`self._print_transformables(transformed)`
Added a diva playlist downloader 2020-04-30 16:24:38 +02:00			`return organizer`
Add test run option to PFERD 2020-04-24 20:00:21 +02:00
			`downloader.download_all(transformed)`
Add Pferd class 2020-04-23 11:44:13 +02:00			`cookie_jar.save_cookies()`
Added a diva playlist downloader 2020-04-30 16:24:38 +02:00
			`if clean:`
			`organizer.cleanup()`

			`return organizer`
Add Pferd class 2020-04-23 11:44:13 +02:00
Swallow and print errors instead of crashing 2020-05-08 23:35:25 +02:00			`@swallow_and_print_errors`
Add Pferd class 2020-04-23 11:44:13 +02:00			`def ilias_kit(`
			`self,`
Use PathLike everywhere 2020-04-24 20:39:30 +02:00			`target: PathLike,`
Add Pferd class 2020-04-23 11:44:13 +02:00			`course_id: str,`
Pass element type to ilias directory filter 2020-05-12 14:38:25 +02:00			`dir_filter: IliasDirectoryFilter = lambda x, y: True,`
Add Pferd class 2020-04-23 11:44:13 +02:00			`transform: Transform = lambda x: x,`
Use PathLike everywhere 2020-04-24 20:39:30 +02:00			`cookies: Optional[PathLike] = None,`
Add Pferd class 2020-04-23 11:44:13 +02:00			`username: Optional[str] = None,`
			`password: Optional[str] = None,`
Move download strategies to downloader Also fixes an issue where the downloader didn't mark files that were not downloaded due to the strategy used. 2020-04-24 16:26:20 +02:00			`download_strategy: IliasDownloadStrategy = download_modified_or_new,`
Added a diva playlist downloader 2020-04-30 16:24:38 +02:00			`clean: bool = True,`
Add timeout to video downloads to work around requests IPv6 bug 2020-08-11 14:40:13 +02:00			`timeout: int = 5,`
Added a diva playlist downloader 2020-04-30 16:24:38 +02:00			`) -> Organizer:`
Satisfy pyling. Useful docstrings? Not quite sure. 2020-04-23 20:31:32 +02:00			`"""`
			`Synchronizes a folder with the ILIAS instance of the KIT.`

			`Arguments:`
			`target {Path} -- the target path to write the data to`
			`course_id {str} -- the id of the main course page (found in the URL after ref_id`
			`when opening the course homepage)`

			`Keyword Arguments:`
			`dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the`
			`crawler level, these directories and all of their content is skipped.`
			`(default: {lambdax:True})`
			`transform {Transform} -- A transformation function for the output paths. Return None`
			`to ignore a file. (default: {lambdax:x})`
			`cookies {Optional[Path]} -- The path to store and load cookies from.`
			`(default: {None})`
			`username {Optional[str]} -- The SCC username. If none is given, it will prompt`
			`the user. (default: {None})`
			`password {Optional[str]} -- The SCC password. If none is given, it will prompt`
			`the user. (default: {None})`
			`download_strategy {DownloadStrategy} -- A function to determine which files need to`
			`be downloaded. Can save bandwidth and reduce the number of requests.`
Use download_modified_or_new as default strategy 2020-04-24 13:48:06 +02:00			`(default: {download_modified_or_new})`
Added a diva playlist downloader 2020-04-30 16:24:38 +02:00			`clean {bool} -- Whether to clean up when the method finishes.`
Add timeout to video downloads to work around requests IPv6 bug 2020-08-11 14:40:13 +02:00			`timeout {int} -- The download timeout for opencast videos. Sadly needed due to a`
			`requests bug.`
Satisfy pyling. Useful docstrings? Not quite sure. 2020-04-23 20:31:32 +02:00			`"""`
Add Pferd class 2020-04-23 11:44:13 +02:00			`# This authenticator only works with the KIT ilias instance.`
			`authenticator = KitShibbolethAuthenticator(username=username, password=password)`
Log when starting to synchronize 2020-04-23 19:55:37 +02:00			`PRETTY.starting_synchronizer(target, "ILIAS", course_id)`
Add download summary 2020-06-25 15:41:58 +02:00
			`organizer = self._ilias(`
Add Pferd class 2020-04-23 11:44:13 +02:00			`target=target,`
			`base_url="https://ilias.studium.kit.edu/",`
Allow crawling the ILIAS Personal Desktop 2020-05-10 12:16:42 +02:00			`crawl_function=lambda crawler: crawler.crawl_course(course_id),`
			`authenticator=authenticator,`
			`cookies=cookies,`
			`dir_filter=dir_filter,`
			`transform=transform,`
			`download_strategy=download_strategy,`
			`clean=clean,`
Add timeout to video downloads to work around requests IPv6 bug 2020-08-11 14:40:13 +02:00			`timeout=timeout`
Allow crawling the ILIAS Personal Desktop 2020-05-10 12:16:42 +02:00			`)`

Move download summary into a separate class 2020-06-25 21:55:08 +02:00			`self._download_summary.merge(organizer.download_summary)`
Add download summary 2020-06-25 15:41:58 +02:00
			`return organizer`

Fix type hints 2020-06-26 15:17:44 +02:00			`def print_summary(self) -> None:`
Use pretty logger for summaries 2020-06-26 15:52:07 +02:00			`"""`
			`Prints the accumulated download summary.`
			`"""`
			`PRETTY.summary(self._download_summary)`
Add deleted files to summary 2020-06-25 21:30:03 +02:00
Allow crawling the ILIAS Personal Desktop 2020-05-10 12:16:42 +02:00			`@swallow_and_print_errors`
			`def ilias_kit_personal_desktop(`
			`self,`
			`target: PathLike,`
Pass element type to ilias directory filter 2020-05-12 14:38:25 +02:00			`dir_filter: IliasDirectoryFilter = lambda x, y: True,`
Allow crawling the ILIAS Personal Desktop 2020-05-10 12:16:42 +02:00			`transform: Transform = lambda x: x,`
			`cookies: Optional[PathLike] = None,`
			`username: Optional[str] = None,`
			`password: Optional[str] = None,`
			`download_strategy: IliasDownloadStrategy = download_modified_or_new,`
			`clean: bool = True,`
Add timeout to video downloads to work around requests IPv6 bug 2020-08-11 14:40:13 +02:00			`timeout: int = 5,`
Allow crawling the ILIAS Personal Desktop 2020-05-10 12:16:42 +02:00			`) -> Organizer:`
			`"""`
			`Synchronizes a folder with the ILIAS instance of the KIT. This method will crawl the ILIAS`
			`"personal desktop" instead of a single course.`

			`Arguments:`
			`target {Path} -- the target path to write the data to`

			`Keyword Arguments:`
			`dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the`
			`crawler level, these directories and all of their content is skipped.`
			`(default: {lambdax:True})`
			`transform {Transform} -- A transformation function for the output paths. Return None`
			`to ignore a file. (default: {lambdax:x})`
			`cookies {Optional[Path]} -- The path to store and load cookies from.`
			`(default: {None})`
			`username {Optional[str]} -- The SCC username. If none is given, it will prompt`
			`the user. (default: {None})`
			`password {Optional[str]} -- The SCC password. If none is given, it will prompt`
			`the user. (default: {None})`
			`download_strategy {DownloadStrategy} -- A function to determine which files need to`
			`be downloaded. Can save bandwidth and reduce the number of requests.`
			`(default: {download_modified_or_new})`
			`clean {bool} -- Whether to clean up when the method finishes.`
Add timeout to video downloads to work around requests IPv6 bug 2020-08-11 14:40:13 +02:00			`timeout {int} -- The download timeout for opencast videos. Sadly needed due to a`
			`requests bug.`
Allow crawling the ILIAS Personal Desktop 2020-05-10 12:16:42 +02:00			`"""`
			`# This authenticator only works with the KIT ilias instance.`
			`authenticator = KitShibbolethAuthenticator(username=username, password=password)`
			`PRETTY.starting_synchronizer(target, "ILIAS", "Personal Desktop")`
Also add personal_desktop to download summary 2020-07-15 22:47:28 +02:00
			`organizer = self._ilias(`
Allow crawling the ILIAS Personal Desktop 2020-05-10 12:16:42 +02:00			`target=target,`
			`base_url="https://ilias.studium.kit.edu/",`
			`crawl_function=lambda crawler: crawler.crawl_personal_desktop(),`
Add Pferd class 2020-04-23 11:44:13 +02:00			`authenticator=authenticator,`
			`cookies=cookies,`
Rename and implement IliasDirectoryFilter 2020-04-23 12:33:38 +02:00			`dir_filter=dir_filter,`
Add Pferd class 2020-04-23 11:44:13 +02:00			`transform=transform,`
Add download strategies to save bandwith Only download files that are newer than the local version. 2020-04-23 18:29:20 +02:00			`download_strategy=download_strategy,`
Added a diva playlist downloader 2020-04-30 16:24:38 +02:00			`clean=clean,`
Add timeout to video downloads to work around requests IPv6 bug 2020-08-11 14:40:13 +02:00			`timeout=timeout`
Add Pferd class 2020-04-23 11:44:13 +02:00			`)`
Added a diva playlist downloader 2020-04-30 16:24:38 +02:00
Also add personal_desktop to download summary 2020-07-15 22:47:28 +02:00			`self._download_summary.merge(organizer.download_summary)`

			`return organizer`

Allow crawling courses or folders with sync_url Video folders do not work, if they are passed directly. Their containing folder must be specified instead. 2020-09-28 20:00:01 +02:00			`@swallow_and_print_errors`
			`def ilias_kit_folder(`
			`self,`
			`target: PathLike,`
			`full_url: str,`
			`dir_filter: IliasDirectoryFilter = lambda x, y: True,`
			`transform: Transform = lambda x: x,`
			`cookies: Optional[PathLike] = None,`
			`username: Optional[str] = None,`
			`password: Optional[str] = None,`
			`download_strategy: IliasDownloadStrategy = download_modified_or_new,`
			`clean: bool = True,`
			`timeout: int = 5,`
			`) -> Organizer:`
			`"""`
			`Synchronizes a folder with a given folder on the ILIAS instance of the KIT.`

			`Arguments:`
			`target {Path} -- the target path to write the data to`
			`full_url {str} -- the full url of the folder/videos/course to crawl`

			`Keyword Arguments:`
			`dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the`
			`crawler level, these directories and all of their content is skipped.`
			`(default: {lambdax:True})`
			`transform {Transform} -- A transformation function for the output paths. Return None`
			`to ignore a file. (default: {lambdax:x})`
			`cookies {Optional[Path]} -- The path to store and load cookies from.`
			`(default: {None})`
			`username {Optional[str]} -- The SCC username. If none is given, it will prompt`
			`the user. (default: {None})`
			`password {Optional[str]} -- The SCC password. If none is given, it will prompt`
			`the user. (default: {None})`
			`download_strategy {DownloadStrategy} -- A function to determine which files need to`
			`be downloaded. Can save bandwidth and reduce the number of requests.`
			`(default: {download_modified_or_new})`
			`clean {bool} -- Whether to clean up when the method finishes.`
			`timeout {int} -- The download timeout for opencast videos. Sadly needed due to a`
			`requests bug.`
			`"""`
			`# This authenticator only works with the KIT ilias instance.`
			`authenticator = KitShibbolethAuthenticator(username=username, password=password)`
			`PRETTY.starting_synchronizer(target, "ILIAS", "An ILIAS element by url")`

			`if not full_url.startswith("https://ilias.studium.kit.edu"):`
			`raise FatalException("Not a valid KIT ILIAS URL")`

			`organizer = self._ilias(`
			`target=target,`
			`base_url="https://ilias.studium.kit.edu/",`
			`crawl_function=lambda crawler: crawler.recursive_crawl_url(full_url),`
			`authenticator=authenticator,`
			`cookies=cookies,`
			`dir_filter=dir_filter,`
			`transform=transform,`
			`download_strategy=download_strategy,`
			`clean=clean,`
			`timeout=timeout`
			`)`

			`self._download_summary.merge(organizer.download_summary)`

			`return organizer`

Swallow and print errors instead of crashing 2020-05-08 23:35:25 +02:00			`@swallow_and_print_errors`
Added a diva playlist downloader 2020-04-30 16:24:38 +02:00			`def diva_kit(`
			`self,`
			`target: Union[PathLike, Organizer],`
Allow passing a playlist URL to diva instead of an id 2020-05-10 11:11:28 +02:00			`playlist_location: str,`
Added a diva playlist downloader 2020-04-30 16:24:38 +02:00			`transform: Transform = lambda x: x,`
			`download_strategy: DivaDownloadStrategy = diva_download_new,`
			`clean: bool = True`
			`) -> Organizer:`
			`"""`
			`Synchronizes a folder with a DIVA playlist.`

			`Arguments:`
			`organizer {Organizer} -- The organizer to use.`
Allow passing a playlist URL to diva instead of an id 2020-05-10 11:11:28 +02:00			`playlist_location {str} -- the playlist id or the playlist URL`
			`in the format 'https://mediaservice.bibliothek.kit.edu/#/details/DIVA-2019-271'`
Added a diva playlist downloader 2020-04-30 16:24:38 +02:00
			`Keyword Arguments:`
			`transform {Transform} -- A transformation function for the output paths. Return None`
			`to ignore a file. (default: {lambdax:x})`
			`download_strategy {DivaDownloadStrategy} -- A function to determine which files need to`
			`be downloaded. Can save bandwidth and reduce the number of requests.`
			`(default: {diva_download_new})`
			`clean {bool} -- Whether to clean up when the method finishes.`
			`"""`
			`tmp_dir = self._tmp_dir.new_subdir()`
Fail on invalid ILIAS course ids 2020-05-08 23:47:05 +02:00
Allow passing a playlist URL to diva instead of an id 2020-05-10 11:11:28 +02:00			`if playlist_location.startswith("http"):`
			`playlist_id = DivaPlaylistCrawler.fetch_id(playlist_link=playlist_location)`
			`else:`
			`playlist_id = playlist_location`

Fail on invalid ILIAS course ids 2020-05-08 23:47:05 +02:00			`if target is None:`
			`PRETTY.starting_synchronizer("None", "DIVA", playlist_id)`
			`raise FatalException("Got 'None' as target directory, aborting")`

Added a diva playlist downloader 2020-04-30 16:24:38 +02:00			`if isinstance(target, Organizer):`
			`organizer = target`
			`else:`
			`organizer = Organizer(self.resolve(to_path(target)))`

Fail on invalid ILIAS course ids 2020-05-08 23:47:05 +02:00			`PRETTY.starting_synchronizer(organizer.path, "DIVA", playlist_id)`

Added a diva playlist downloader 2020-04-30 16:24:38 +02:00			`crawler = DivaPlaylistCrawler(playlist_id)`
			`downloader = DivaDownloader(tmp_dir, organizer, download_strategy)`

			`info = crawler.crawl()`

			`transformed = apply_transform(transform, info)`
			`if self._test_run:`
			`self._print_transformables(transformed)`
			`return organizer`

			`downloader.download_all(transformed)`

			`if clean:`
			`organizer.cleanup()`

			`return organizer`