diff --git a/PFERD/ilias/crawler.py b/PFERD/ilias/crawler.py index b6c65b2..a10db3d 100644 --- a/PFERD/ilias/crawler.py +++ b/PFERD/ilias/crawler.py @@ -116,6 +116,16 @@ class IliasCrawler: return urlunsplit((scheme, netloc, path, new_query_string, fragment)) + def recursive_crawl_url(self, url: str) -> List[IliasDownloadInfo]: + """ + Crawls a given url *and all reachable elements in it*. + + Args: + url {str} -- the *full* url to crawl + """ + start_entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), url) + return self._iterate_entries_to_download_infos(start_entries) + def crawl_course(self, course_id: str) -> List[IliasDownloadInfo]: """ Starts the crawl process for a course, yielding a list of elements to (potentially) diff --git a/PFERD/pferd.py b/PFERD/pferd.py index 0b25151..042dd93 100644 --- a/PFERD/pferd.py +++ b/PFERD/pferd.py @@ -230,6 +230,70 @@ class Pferd(Location): return organizer + @swallow_and_print_errors + def ilias_kit_folder( + self, + target: PathLike, + full_url: str, + dir_filter: IliasDirectoryFilter = lambda x, y: True, + transform: Transform = lambda x: x, + cookies: Optional[PathLike] = None, + username: Optional[str] = None, + password: Optional[str] = None, + download_strategy: IliasDownloadStrategy = download_modified_or_new, + clean: bool = True, + timeout: int = 5, + ) -> Organizer: + """ + Synchronizes a folder with a given folder on the ILIAS instance of the KIT. + + Arguments: + target {Path} -- the target path to write the data to + full_url {str} -- the full url of the folder/videos/course to crawl + + Keyword Arguments: + dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the + crawler level, these directories and all of their content is skipped. + (default: {lambdax:True}) + transform {Transform} -- A transformation function for the output paths. Return None + to ignore a file. (default: {lambdax:x}) + cookies {Optional[Path]} -- The path to store and load cookies from. + (default: {None}) + username {Optional[str]} -- The SCC username. If none is given, it will prompt + the user. (default: {None}) + password {Optional[str]} -- The SCC password. If none is given, it will prompt + the user. (default: {None}) + download_strategy {DownloadStrategy} -- A function to determine which files need to + be downloaded. Can save bandwidth and reduce the number of requests. + (default: {download_modified_or_new}) + clean {bool} -- Whether to clean up when the method finishes. + timeout {int} -- The download timeout for opencast videos. Sadly needed due to a + requests bug. + """ + # This authenticator only works with the KIT ilias instance. + authenticator = KitShibbolethAuthenticator(username=username, password=password) + PRETTY.starting_synchronizer(target, "ILIAS", "An ILIAS element by url") + + if not full_url.startswith("https://ilias.studium.kit.edu"): + raise FatalException("Not a valid KIT ILIAS URL") + + organizer = self._ilias( + target=target, + base_url="https://ilias.studium.kit.edu/", + crawl_function=lambda crawler: crawler.recursive_crawl_url(full_url), + authenticator=authenticator, + cookies=cookies, + dir_filter=dir_filter, + transform=transform, + download_strategy=download_strategy, + clean=clean, + timeout=timeout + ) + + self._download_summary.merge(organizer.download_summary) + + return organizer + @swallow_and_print_errors def diva_kit( self, diff --git a/sync_url.py b/sync_url.py index d486ce1..64c742b 100755 --- a/sync_url.py +++ b/sync_url.py @@ -23,10 +23,15 @@ def main() -> None: parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into") args = parser.parse_args() - # parse provided course URL url = urlparse(args.url) - query = parse_qs(url.query) - course_id = query['ref_id'][0] + + cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None) + session = cookie_jar.create_session() + authenticator = KitShibbolethAuthenticator() + crawler = IliasCrawler(url.scheme + '://' + url.netloc, session, + authenticator, lambda x, y: True) + + cookie_jar.load_cookies() if args.folder is not None: folder = args.folder @@ -36,13 +41,6 @@ def main() -> None: pferd = Pferd(Path(Path(__file__).parent, folder).parent, test_run=args.test_run) else: # fetch course name from ilias - cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None) - session = cookie_jar.create_session() - authenticator = KitShibbolethAuthenticator() - crawler = IliasCrawler(url.scheme + '://' + url.netloc, session, - authenticator, lambda x, y: True) - - cookie_jar.load_cookies() folder = crawler.find_element_name(args.url) cookie_jar.save_cookies() @@ -51,7 +49,7 @@ def main() -> None: pferd.enable_logging() # fetch - pferd.ilias_kit(target=folder, course_id=course_id, cookies=args.cookies) + pferd.ilias_kit_folder(target=folder, full_url=args.url, cookies=args.cookies) if __name__ == "__main__":