Rename and implement IliasDirectoryFilter

2025-12-27 16:52:29 +01:00 · 2020-04-23 12:33:38 +02:00
parent ea005517cf
commit bef210ae77
4 changed files with 45 additions and 26 deletions
--- a/PFERD/ilias/init.py
+++ b/PFERD/ilias/init.py
@@ -3,5 +3,5 @@ Synchronizing files from ILIAS instances (https://www.ilias.de/).
 """
 from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
-from .crawler import IliasCrawler, IliasFilter
+from .crawler import IliasCrawler, IliasDirectoryFilter
 from .downloader import IliasDownloader
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -15,15 +15,15 @@ import bs4
 import requests
 from ..cookie_jar import CookieJar
-from ..utils import soupify
+from ..utils import PrettyLogger, soupify
 from .authenticators import IliasAuthenticator
 from .date_demangler import demangle_date
 from .downloader import IliasDownloadInfo
 LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
-
+IliasDirectoryFilter = Callable[[Path], bool]
 IliasFilter = Callable[[Path], bool]
 class IliasCrawler:
@@ -36,13 +36,14 @@ class IliasCrawler:
    A crawler for ILIAS.
    """
    # pylint: disable=too-many-arguments
    def __init__(
            self,
            base_url: str,
            course_id: str,
            session: requests.Session,
            authenticator: IliasAuthenticator,
-            filter_: IliasFilter
+            dir_filter: IliasDirectoryFilter
    ):
        """
        Create a new ILIAS crawler.
@@ -52,7 +53,7 @@ class IliasCrawler:
        self._course_id = course_id
        self._session = session
        self._authenticator = authenticator
-        self._filter = filter_
+        self.dir_filter = dir_filter
    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
        """
@@ -153,13 +154,23 @@ class IliasCrawler:
    def _switch_on_folder_like(
            self,
-            path: Path,
+            parent_path: Path,
            link_element: bs4.Tag,
            url: str
    ) -> List[IliasDownloadInfo]:
        """
        Try crawling something that looks like a folder.
        """
        # pylint: disable=too-many-return-statements
        element_path = Path(parent_path, link_element.getText().strip())
        if not self.dir_filter(element_path):
            PRETTY.filtered_path(element_path)
            return []
        LOGGER.info("Searching %r", str(element_path))
        found_parent: Optional[bs4.Tag] = None
        # We look for the outer div of our inner link, to find information around it
@@ -185,8 +196,6 @@ class IliasCrawler:
            LOGGER.debug("Skipping forum at %r", url)
            return []
        element_path = Path(path, link_element.getText().strip())
        # An exercise
        if str(img_tag["src"]).endswith("icon_exc.svg"):
            LOGGER.debug("Crawling exercises at %r", url)
@@ -200,7 +209,7 @@ class IliasCrawler:
        # Assume it is a folder
        return self._crawl_folder(element_path, self._abs_url_from_link(link_element))
-    def _crawl_video_directory(self, path: Path, url: str) -> List[IliasDownloadInfo]:
+    def _crawl_video_directory(self, video_dir_path: Path, url: str) -> List[IliasDownloadInfo]:
        """
        Crawl the video overview site.
        """
@@ -224,11 +233,11 @@ class IliasCrawler:
        results: List[IliasDownloadInfo] = []
        for link in video_links:
-            results += self._crawl_single_video(path, link)
+            results += self._crawl_single_video(video_dir_path, link)
        return results
-    def _crawl_single_video(self, path: Path, link: bs4.Tag) -> List[IliasDownloadInfo]:
+    def _crawl_single_video(self, parent_path: Path, link: bs4.Tag) -> List[IliasDownloadInfo]:
        """
        Crawl a single video based on its "Abspielen" link from the video listing.
        """
@@ -267,7 +276,7 @@ class IliasCrawler:
        # and fetch the video url!
        video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
-        return [IliasDownloadInfo(Path(path, title), video_url, modification_time)]
+        return [IliasDownloadInfo(Path(parent_path, title), video_url, modification_time)]
    def _crawl_exercises(self, element_path: Path, url: str) -> List[IliasDownloadInfo]:
        """
@@ -318,7 +327,7 @@ class IliasCrawler:
        return results
-    def _crawl_folder(self, path: Path, url: str) -> List[IliasDownloadInfo]:
+    def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasDownloadInfo]:
        """
        Crawl all files in a folder-like element.
        """
@@ -330,7 +339,7 @@ class IliasCrawler:
        links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
        for link in links:
            abs_url = self._abs_url_from_link(link)
-            result += self._switch_on_crawled_type(path, link, abs_url)
+            result += self._switch_on_crawled_type(folder_path, link, abs_url)
        return result
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -2,8 +2,8 @@ from pathlib import Path
 from typing import Optional
 from .cookie_jar import CookieJar
-from .ilias import (IliasAuthenticator, IliasCrawler, IliasDownloader,
+from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
-                    IliasFilter, KitShibbolethAuthenticator)
+                    IliasDownloader, KitShibbolethAuthenticator)
 from .organizer import Organizer
 from .tmp_dir import TmpDir
 from .transform import Transform, apply_transform
@@ -25,7 +25,7 @@ class Pferd(Location):
            course_id: str,
            authenticator: IliasAuthenticator,
            cookies: Optional[Path],
-            filter_: IliasFilter,
+            dir_filter: IliasDirectoryFilter,
            transform: Transform,
    ) -> None:
        cookie_jar = CookieJar(cookies)
@@ -33,7 +33,7 @@ class Pferd(Location):
        tmp_dir = self._tmp_dir.new_subdir()
        organizer = Organizer(self.resolve(target))
-        crawler = IliasCrawler(base_url, course_id, session, authenticator, filter_)
+        crawler = IliasCrawler(base_url, course_id, session, authenticator, dir_filter)
        downloader = IliasDownloader(tmp_dir, organizer, session, authenticator)
        cookie_jar.load_cookies()
@@ -46,7 +46,7 @@ class Pferd(Location):
            self,
            target: Path,
            course_id: str,
-            filter_: IliasFilter = lambda x: True,
+            dir_filter: IliasDirectoryFilter = lambda x: True,
            transform: Transform = lambda x: x,
            cookies: Optional[Path] = None,
            username: Optional[str] = None,
@@ -60,6 +60,6 @@ class Pferd(Location):
            course_id=course_id,
            authenticator=authenticator,
            cookies=cookies,
-            filter_=filter_,
+            dir_filter=dir_filter,
            transform=transform,
        )
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -127,7 +127,8 @@ class PrettyLogger:
        """
        self.logger.info(
-            f"{Fore.MAGENTA}{Style.BRIGHT}Modified {file_name}.{Style.RESET_ALL}")
+            f"{Fore.MAGENTA}{Style.BRIGHT}Modified {str(file_name)!r}.{Style.RESET_ALL}"
        )
    def new_file(self, file_name: Path) -> None:
        """
@@ -135,14 +136,23 @@ class PrettyLogger:
        """
        self.logger.info(
-            f"{Fore.GREEN}{Style.BRIGHT}Created {file_name}.{Style.RESET_ALL}")
+            f"{Fore.GREEN}{Style.BRIGHT}Created {str(file_name)!r}.{Style.RESET_ALL}")
    def ignored_file(self, file_name: Path) -> None:
        """
-        Nothing in particular happened to this file.
+        Nothing in particular happened to this file or directory.
        """
-        self.logger.info(f"{Style.DIM}Ignored {file_name}.{Style.RESET_ALL}")
+        self.logger.info(f"{Style.DIM}Ignored {str(file_name)!r}.{Style.RESET_ALL}")
    def filtered_path(self, path: Path) -> None:
        """
        A crawler filter rejected the given path.
        """
        self.logger.info(
            f"{Style.DIM}Not considering {str(path)!r} due to filter rules.{Style.RESET_ALL}"
        )
    def starting_synchronizer(
            self,
@@ -157,6 +167,6 @@ class PrettyLogger:
        subject_str = f"{subject} " if subject else ""
        self.logger.info("")
        self.logger.info((
-            f"{Fore.CYAN}{Style.BRIGHT}Synchronizing {subject_str}to {target_directory}"
+            f"{Fore.CYAN}{Style.BRIGHT}Synchronizing {subject_str}to {str(target_directory)!r}"
            f" using the {synchronizer_name} synchronizer.{Style.RESET_ALL}"
        ))