Reorder methods a bit

This commit is contained in:
I-Al-Istannen 2020-05-30 15:53:31 +02:00
parent 086b15d10f
commit 8198c9ecaa
2 changed files with 37 additions and 35 deletions

View File

@ -3,7 +3,8 @@ Synchronizing files from ILIAS instances (https://www.ilias.de/).
""" """
from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
from .crawler import IliasCrawler, IliasDirectoryFilter, IliasElementType from .crawler import (IliasCrawler, IliasCrawlerEntry, IliasDirectoryFilter,
IliasElementType)
from .downloader import (IliasDownloader, IliasDownloadInfo, from .downloader import (IliasDownloader, IliasDownloadInfo,
IliasDownloadStrategy, download_everything, IliasDownloadStrategy, download_everything,
download_modified_or_new) download_modified_or_new)

View File

@ -28,7 +28,7 @@ PRETTY = PrettyLogger(LOGGER)
class IliasElementType(Enum): class IliasElementType(Enum):
""" """
The type of an ilias directory. The type of an ilias element.
""" """
REGULAR_FOLDER = "REGULAR_FOLDER" REGULAR_FOLDER = "REGULAR_FOLDER"
VIDEO_FOLDER = "VIDEO_FOLDER" VIDEO_FOLDER = "VIDEO_FOLDER"
@ -43,6 +43,7 @@ IliasDirectoryFilter = Callable[[Path, IliasElementType], bool]
class IliasCrawlerEntry: class IliasCrawlerEntry:
# pylint: disable=too-few-public-methods
""" """
An ILIAS crawler entry used internally to find, catalogue and recursively crawl elements. An ILIAS crawler entry used internally to find, catalogue and recursively crawl elements.
""" """
@ -97,12 +98,6 @@ class IliasCrawler:
self._authenticator = authenticator self._authenticator = authenticator
self.dir_filter = dir_filter self.dir_filter = dir_filter
def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
"""
Create an absolute url from an <a> tag.
"""
return urljoin(self._base_url, link_tag.get("href"))
@staticmethod @staticmethod
def _url_set_query_param(url: str, param: str, value: str) -> str: def _url_set_query_param(url: str, param: str, value: str) -> str:
""" """
@ -138,7 +133,7 @@ class IliasCrawler:
# And treat it as a folder # And treat it as a folder
entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), root_url) entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), root_url)
return self._entries_to_download_infos(entries) return self._iterate_entries_to_download_infos(entries)
def _is_course_id_valid(self, root_url: str, course_id: str) -> bool: def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
response: requests.Response = self._session.get(root_url) response: requests.Response = self._session.get(root_url)
@ -154,9 +149,9 @@ class IliasCrawler:
entries: List[IliasCrawlerEntry] = self._crawl_folder( entries: List[IliasCrawlerEntry] = self._crawl_folder(
Path(""), self._base_url + "?baseClass=ilPersonalDesktopGUI" Path(""), self._base_url + "?baseClass=ilPersonalDesktopGUI"
) )
return self._entries_to_download_infos(entries) return self._iterate_entries_to_download_infos(entries)
def _entries_to_download_infos( def _iterate_entries_to_download_infos(
self, self,
entries: List[IliasCrawlerEntry] entries: List[IliasCrawlerEntry]
) -> List[IliasDownloadInfo]: ) -> List[IliasDownloadInfo]:
@ -201,6 +196,36 @@ class IliasCrawler:
return result return result
def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]:
"""
Crawl all files in a folder-like element.
"""
soup = self._get_page(url, {})
result: List[IliasCrawlerEntry] = []
# Fetch all links and throw them to the general interpreter
links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
for link in links:
abs_url = self._abs_url_from_link(link)
element_path = Path(folder_path, link.getText().strip())
element_type = self._find_type_from_link(element_path, link, abs_url)
if element_type == IliasElementType.REGULAR_FILE:
result += self._crawl_file(folder_path, link, abs_url)
elif element_type is not None:
result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
else:
PRETTY.warning(f"Found element without a type at {str(element_path)!r}")
return result
def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
"""
Create an absolute url from an <a> tag.
"""
return urljoin(self._base_url, link_tag.get("href"))
@staticmethod @staticmethod
def _find_type_from_link( def _find_type_from_link(
path: Path, path: Path,
@ -515,30 +540,6 @@ class IliasCrawler:
return results return results
def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]:
"""
Crawl all files in a folder-like element.
"""
soup = self._get_page(url, {})
result: List[IliasCrawlerEntry] = []
# Fetch all links and throw them to the general interpreter
links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
for link in links:
abs_url = self._abs_url_from_link(link)
element_path = Path(folder_path, link.getText().strip())
element_type = self._find_type_from_link(element_path, link, abs_url)
if element_type == IliasElementType.REGULAR_FILE:
result += self._crawl_file(folder_path, link, abs_url)
elif element_type is not None:
result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
else:
PRETTY.warning(f"Found element without a type at {str(element_path)!r}")
return result
def _get_page(self, url: str, params: Dict[str, Any]) -> bs4.BeautifulSoup: def _get_page(self, url: str, params: Dict[str, Any]) -> bs4.BeautifulSoup:
""" """
Fetches a page from ILIAS, authenticating when needed. Fetches a page from ILIAS, authenticating when needed.