mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Reorder methods a bit
This commit is contained in:
parent
086b15d10f
commit
8198c9ecaa
@ -3,7 +3,8 @@ Synchronizing files from ILIAS instances (https://www.ilias.de/).
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
|
from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
|
||||||
from .crawler import IliasCrawler, IliasDirectoryFilter, IliasElementType
|
from .crawler import (IliasCrawler, IliasCrawlerEntry, IliasDirectoryFilter,
|
||||||
|
IliasElementType)
|
||||||
from .downloader import (IliasDownloader, IliasDownloadInfo,
|
from .downloader import (IliasDownloader, IliasDownloadInfo,
|
||||||
IliasDownloadStrategy, download_everything,
|
IliasDownloadStrategy, download_everything,
|
||||||
download_modified_or_new)
|
download_modified_or_new)
|
||||||
|
@ -28,7 +28,7 @@ PRETTY = PrettyLogger(LOGGER)
|
|||||||
|
|
||||||
class IliasElementType(Enum):
|
class IliasElementType(Enum):
|
||||||
"""
|
"""
|
||||||
The type of an ilias directory.
|
The type of an ilias element.
|
||||||
"""
|
"""
|
||||||
REGULAR_FOLDER = "REGULAR_FOLDER"
|
REGULAR_FOLDER = "REGULAR_FOLDER"
|
||||||
VIDEO_FOLDER = "VIDEO_FOLDER"
|
VIDEO_FOLDER = "VIDEO_FOLDER"
|
||||||
@ -43,6 +43,7 @@ IliasDirectoryFilter = Callable[[Path, IliasElementType], bool]
|
|||||||
|
|
||||||
|
|
||||||
class IliasCrawlerEntry:
|
class IliasCrawlerEntry:
|
||||||
|
# pylint: disable=too-few-public-methods
|
||||||
"""
|
"""
|
||||||
An ILIAS crawler entry used internally to find, catalogue and recursively crawl elements.
|
An ILIAS crawler entry used internally to find, catalogue and recursively crawl elements.
|
||||||
"""
|
"""
|
||||||
@ -97,12 +98,6 @@ class IliasCrawler:
|
|||||||
self._authenticator = authenticator
|
self._authenticator = authenticator
|
||||||
self.dir_filter = dir_filter
|
self.dir_filter = dir_filter
|
||||||
|
|
||||||
def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
|
|
||||||
"""
|
|
||||||
Create an absolute url from an <a> tag.
|
|
||||||
"""
|
|
||||||
return urljoin(self._base_url, link_tag.get("href"))
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _url_set_query_param(url: str, param: str, value: str) -> str:
|
def _url_set_query_param(url: str, param: str, value: str) -> str:
|
||||||
"""
|
"""
|
||||||
@ -138,7 +133,7 @@ class IliasCrawler:
|
|||||||
|
|
||||||
# And treat it as a folder
|
# And treat it as a folder
|
||||||
entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), root_url)
|
entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), root_url)
|
||||||
return self._entries_to_download_infos(entries)
|
return self._iterate_entries_to_download_infos(entries)
|
||||||
|
|
||||||
def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
|
def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
|
||||||
response: requests.Response = self._session.get(root_url)
|
response: requests.Response = self._session.get(root_url)
|
||||||
@ -154,9 +149,9 @@ class IliasCrawler:
|
|||||||
entries: List[IliasCrawlerEntry] = self._crawl_folder(
|
entries: List[IliasCrawlerEntry] = self._crawl_folder(
|
||||||
Path(""), self._base_url + "?baseClass=ilPersonalDesktopGUI"
|
Path(""), self._base_url + "?baseClass=ilPersonalDesktopGUI"
|
||||||
)
|
)
|
||||||
return self._entries_to_download_infos(entries)
|
return self._iterate_entries_to_download_infos(entries)
|
||||||
|
|
||||||
def _entries_to_download_infos(
|
def _iterate_entries_to_download_infos(
|
||||||
self,
|
self,
|
||||||
entries: List[IliasCrawlerEntry]
|
entries: List[IliasCrawlerEntry]
|
||||||
) -> List[IliasDownloadInfo]:
|
) -> List[IliasDownloadInfo]:
|
||||||
@ -201,6 +196,36 @@ class IliasCrawler:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]:
|
||||||
|
"""
|
||||||
|
Crawl all files in a folder-like element.
|
||||||
|
"""
|
||||||
|
soup = self._get_page(url, {})
|
||||||
|
|
||||||
|
result: List[IliasCrawlerEntry] = []
|
||||||
|
|
||||||
|
# Fetch all links and throw them to the general interpreter
|
||||||
|
links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
|
||||||
|
for link in links:
|
||||||
|
abs_url = self._abs_url_from_link(link)
|
||||||
|
element_path = Path(folder_path, link.getText().strip())
|
||||||
|
element_type = self._find_type_from_link(element_path, link, abs_url)
|
||||||
|
|
||||||
|
if element_type == IliasElementType.REGULAR_FILE:
|
||||||
|
result += self._crawl_file(folder_path, link, abs_url)
|
||||||
|
elif element_type is not None:
|
||||||
|
result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
|
||||||
|
else:
|
||||||
|
PRETTY.warning(f"Found element without a type at {str(element_path)!r}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
|
||||||
|
"""
|
||||||
|
Create an absolute url from an <a> tag.
|
||||||
|
"""
|
||||||
|
return urljoin(self._base_url, link_tag.get("href"))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _find_type_from_link(
|
def _find_type_from_link(
|
||||||
path: Path,
|
path: Path,
|
||||||
@ -515,30 +540,6 @@ class IliasCrawler:
|
|||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]:
|
|
||||||
"""
|
|
||||||
Crawl all files in a folder-like element.
|
|
||||||
"""
|
|
||||||
soup = self._get_page(url, {})
|
|
||||||
|
|
||||||
result: List[IliasCrawlerEntry] = []
|
|
||||||
|
|
||||||
# Fetch all links and throw them to the general interpreter
|
|
||||||
links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
|
|
||||||
for link in links:
|
|
||||||
abs_url = self._abs_url_from_link(link)
|
|
||||||
element_path = Path(folder_path, link.getText().strip())
|
|
||||||
element_type = self._find_type_from_link(element_path, link, abs_url)
|
|
||||||
|
|
||||||
if element_type == IliasElementType.REGULAR_FILE:
|
|
||||||
result += self._crawl_file(folder_path, link, abs_url)
|
|
||||||
elif element_type is not None:
|
|
||||||
result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
|
|
||||||
else:
|
|
||||||
PRETTY.warning(f"Found element without a type at {str(element_path)!r}")
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _get_page(self, url: str, params: Dict[str, Any]) -> bs4.BeautifulSoup:
|
def _get_page(self, url: str, params: Dict[str, Any]) -> bs4.BeautifulSoup:
|
||||||
"""
|
"""
|
||||||
Fetches a page from ILIAS, authenticating when needed.
|
Fetches a page from ILIAS, authenticating when needed.
|
||||||
|
Loading…
Reference in New Issue
Block a user