Reorder methods a bit

2026-01-09 05:52:30 +01:00 · 2020-05-30 15:53:31 +02:00
parent 086b15d10f
commit 8198c9ecaa
2 changed files with 37 additions and 35 deletions
--- a/PFERD/ilias/init.py
+++ b/PFERD/ilias/init.py
@@ -3,7 +3,8 @@ Synchronizing files from ILIAS instances (https://www.ilias.de/).
 """

 from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
-from .crawler import IliasCrawler, IliasDirectoryFilter, IliasElementType
+from .crawler import (IliasCrawler, IliasCrawlerEntry, IliasDirectoryFilter,
+                      IliasElementType)
 from .downloader import (IliasDownloader, IliasDownloadInfo,
                         IliasDownloadStrategy, download_everything,
                         download_modified_or_new)
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -28,7 +28,7 @@ PRETTY = PrettyLogger(LOGGER)

 class IliasElementType(Enum):
    """
-    The type of an ilias directory.
+    The type of an ilias element.
    """
    REGULAR_FOLDER = "REGULAR_FOLDER"
    VIDEO_FOLDER = "VIDEO_FOLDER"
@@ -43,6 +43,7 @@ IliasDirectoryFilter = Callable[[Path, IliasElementType], bool]


 class IliasCrawlerEntry:
+    # pylint: disable=too-few-public-methods
    """
    An ILIAS crawler entry used internally to find, catalogue and recursively crawl elements.
    """
@@ -97,12 +98,6 @@ class IliasCrawler:
        self._authenticator = authenticator
        self.dir_filter = dir_filter

-    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
-        """
-        Create an absolute url from an <a> tag.
-        """
-        return urljoin(self._base_url, link_tag.get("href"))
-
    @staticmethod
    def _url_set_query_param(url: str, param: str, value: str) -> str:
        """
@@ -138,7 +133,7 @@ class IliasCrawler:

        # And treat it as a folder
        entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), root_url)
-        return self._entries_to_download_infos(entries)
+        return self._iterate_entries_to_download_infos(entries)

    def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
        response: requests.Response = self._session.get(root_url)
@@ -154,9 +149,9 @@ class IliasCrawler:
        entries: List[IliasCrawlerEntry] = self._crawl_folder(
            Path(""), self._base_url + "?baseClass=ilPersonalDesktopGUI"
        )
-        return self._entries_to_download_infos(entries)
+        return self._iterate_entries_to_download_infos(entries)

-    def _entries_to_download_infos(
+    def _iterate_entries_to_download_infos(
            self,
            entries: List[IliasCrawlerEntry]
    ) -> List[IliasDownloadInfo]:
@@ -201,6 +196,36 @@ class IliasCrawler:

        return result

+    def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]:
+        """
+        Crawl all files in a folder-like element.
+        """
+        soup = self._get_page(url, {})
+
+        result: List[IliasCrawlerEntry] = []
+
+        # Fetch all links and throw them to the general interpreter
+        links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
+        for link in links:
+            abs_url = self._abs_url_from_link(link)
+            element_path = Path(folder_path, link.getText().strip())
+            element_type = self._find_type_from_link(element_path, link, abs_url)
+
+            if element_type == IliasElementType.REGULAR_FILE:
+                result += self._crawl_file(folder_path, link, abs_url)
+            elif element_type is not None:
+                result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
+            else:
+                PRETTY.warning(f"Found element without a type at {str(element_path)!r}")
+
+        return result
+
+    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
+        """
+        Create an absolute url from an <a> tag.
+        """
+        return urljoin(self._base_url, link_tag.get("href"))
+
    @staticmethod
    def _find_type_from_link(
            path: Path,
@@ -515,30 +540,6 @@ class IliasCrawler:

        return results

-    def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]:
-        """
-        Crawl all files in a folder-like element.
-        """
-        soup = self._get_page(url, {})
-
-        result: List[IliasCrawlerEntry] = []
-
-        # Fetch all links and throw them to the general interpreter
-        links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
-        for link in links:
-            abs_url = self._abs_url_from_link(link)
-            element_path = Path(folder_path, link.getText().strip())
-            element_type = self._find_type_from_link(element_path, link, abs_url)
-
-            if element_type == IliasElementType.REGULAR_FILE:
-                result += self._crawl_file(folder_path, link, abs_url)
-            elif element_type is not None:
-                result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
-            else:
-                PRETTY.warning(f"Found element without a type at {str(element_path)!r}")
-
-        return result
-
    def _get_page(self, url: str, params: Dict[str, Any]) -> bs4.BeautifulSoup:
        """
        Fetches a page from ILIAS, authenticating when needed.