2020-09-28 17:49:36 +02:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
2020-09-28 19:42:59 +02:00
|
|
|
"""
|
|
|
|
A simple script to download a course by name from ILIAS.
|
|
|
|
"""
|
|
|
|
|
2020-09-28 17:49:36 +02:00
|
|
|
import argparse
|
2020-11-10 15:27:12 +01:00
|
|
|
import os
|
|
|
|
import re
|
|
|
|
from pathlib import Path, PurePath
|
|
|
|
from typing import Optional
|
2020-10-06 17:15:10 +02:00
|
|
|
from urllib.parse import urlparse
|
2020-09-28 17:49:36 +02:00
|
|
|
|
|
|
|
from PFERD import Pferd
|
|
|
|
from PFERD.cookie_jar import CookieJar
|
2020-10-06 17:15:10 +02:00
|
|
|
from PFERD.ilias import (IliasCrawler, IliasElementType,
|
|
|
|
KitShibbolethAuthenticator)
|
2020-11-10 15:27:12 +01:00
|
|
|
from PFERD.transform import re_rename
|
2020-09-28 19:42:59 +02:00
|
|
|
from PFERD.utils import to_path
|
|
|
|
|
2020-09-28 17:49:36 +02:00
|
|
|
|
2020-11-10 15:27:12 +01:00
|
|
|
def sanitize_path(path: PurePath) -> Optional[PurePath]:
|
|
|
|
# Escape windows illegal path characters
|
|
|
|
if os.name == 'nt':
|
|
|
|
return PurePath(re.sub(r'[<>:"/\\|?]', "", str(path)))
|
|
|
|
return path
|
|
|
|
|
|
|
|
|
2020-09-28 17:49:36 +02:00
|
|
|
def main() -> None:
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument("--test-run", action="store_true")
|
|
|
|
parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in")
|
2020-10-30 16:58:44 +01:00
|
|
|
parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos")
|
2020-09-28 17:49:36 +02:00
|
|
|
parser.add_argument('url', help="URL to the course page")
|
|
|
|
parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into")
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
url = urlparse(args.url)
|
2020-09-28 20:00:01 +02:00
|
|
|
|
|
|
|
cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
|
|
|
|
session = cookie_jar.create_session()
|
|
|
|
authenticator = KitShibbolethAuthenticator()
|
|
|
|
crawler = IliasCrawler(url.scheme + '://' + url.netloc, session,
|
|
|
|
authenticator, lambda x, y: True)
|
|
|
|
|
|
|
|
cookie_jar.load_cookies()
|
2020-09-28 17:49:36 +02:00
|
|
|
|
2020-09-28 19:42:59 +02:00
|
|
|
if args.folder is not None:
|
|
|
|
folder = args.folder
|
|
|
|
# Initialize pferd at the *parent of the passed folder*
|
|
|
|
# This is needed so Pferd's internal protections against escaping the working directory
|
|
|
|
# do not trigger (e.g. if somebody names a file in ILIAS '../../bad thing.txt')
|
|
|
|
pferd = Pferd(Path(Path(__file__).parent, folder).parent, test_run=args.test_run)
|
|
|
|
else:
|
2020-09-28 17:49:36 +02:00
|
|
|
# fetch course name from ilias
|
2020-09-28 18:14:20 +02:00
|
|
|
folder = crawler.find_element_name(args.url)
|
2020-09-28 17:49:36 +02:00
|
|
|
cookie_jar.save_cookies()
|
|
|
|
|
2020-09-28 19:42:59 +02:00
|
|
|
# Initialize pferd at the location of the script
|
|
|
|
pferd = Pferd(Path(__file__).parent, test_run=args.test_run)
|
|
|
|
|
2020-10-06 17:15:10 +02:00
|
|
|
def dir_filter(_: Path, element: IliasElementType) -> bool:
|
|
|
|
if args.no_videos:
|
|
|
|
return element not in [IliasElementType.VIDEO_FILE, IliasElementType.VIDEO_FOLDER]
|
|
|
|
return True
|
|
|
|
|
2020-09-28 19:42:59 +02:00
|
|
|
pferd.enable_logging()
|
2020-09-28 17:49:36 +02:00
|
|
|
# fetch
|
2020-10-06 17:15:10 +02:00
|
|
|
pferd.ilias_kit_folder(
|
|
|
|
target=folder,
|
|
|
|
full_url=args.url,
|
|
|
|
cookies=args.cookies,
|
2020-11-10 15:27:12 +01:00
|
|
|
dir_filter=dir_filter,
|
|
|
|
transform=sanitize_path
|
2020-10-06 17:15:10 +02:00
|
|
|
)
|
2020-09-28 19:42:59 +02:00
|
|
|
|
2020-09-28 17:49:36 +02:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|