diff --git a/PFERD/diva.py b/PFERD/diva.py index f7f606e..d5c9240 100644 --- a/PFERD/diva.py +++ b/PFERD/diva.py @@ -1,4 +1,5 @@ import logging +import re from dataclasses import dataclass from pathlib import Path from typing import Any, Callable, List, Optional @@ -44,16 +45,47 @@ class DivaPlaylistCrawler: A crawler for DIVA playlists. """ - _BASE_URL = "https://mediaservice.bibliothek.kit.edu/asset/collection.json" + _PLAYLIST_BASE_URL = "https://mediaservice.bibliothek.kit.edu/asset/detail/" + _COLLECTION_BASE_URL = "https://mediaservice.bibliothek.kit.edu/asset/collection.json" def __init__(self, playlist_id: str): self._id = playlist_id + @classmethod + def fetch_id(cls, playlist_link: str) -> str: + """ + Fetches the ID for a playerlist, given the base link + (e.g. https://mediaservice.bibliothek.kit.edu/#/details/DIVA-2019-271). + + Raises a FatalException, if the id can not be resolved + """ + match = re.match(r".+#/details/(.+)", playlist_link) + if match is None: + raise FatalException( + "DIVA: Invalid playlist link format, could not extract details." + ) + base_name = match.group(1) + + response = requests.get(cls._PLAYLIST_BASE_URL + base_name + ".json") + + if response.status_code != 200: + raise FatalException( + f"DIVA: Got non-200 status code ({response.status_code}))" + f"when requesting {response.url!r}!" + ) + + body = response.json() + + if body["error"]: + raise FatalException(f"DIVA: Server returned error {body['error']!r}.") + + return body["result"]["id"] + def crawl(self) -> List[DivaDownloadInfo]: """ Crawls the playlist given in the constructor. """ - response = requests.get(self._BASE_URL, params={"collection": self._id}) + response = requests.get(self._COLLECTION_BASE_URL, params={"collection": self._id}) if response.status_code != 200: raise FatalException(f"Server returned status {response.status_code}.") diff --git a/PFERD/pferd.py b/PFERD/pferd.py index d13d9d1..b2e1cb4 100644 --- a/PFERD/pferd.py +++ b/PFERD/pferd.py @@ -147,7 +147,7 @@ class Pferd(Location): def diva_kit( self, target: Union[PathLike, Organizer], - playlist_id: str, + playlist_location: str, transform: Transform = lambda x: x, download_strategy: DivaDownloadStrategy = diva_download_new, clean: bool = True @@ -157,7 +157,8 @@ class Pferd(Location): Arguments: organizer {Organizer} -- The organizer to use. - playlist_id {str} -- the playlist id + playlist_location {str} -- the playlist id or the playlist URL + in the format 'https://mediaservice.bibliothek.kit.edu/#/details/DIVA-2019-271' Keyword Arguments: transform {Transform} -- A transformation function for the output paths. Return None @@ -169,6 +170,11 @@ class Pferd(Location): """ tmp_dir = self._tmp_dir.new_subdir() + if playlist_location.startswith("http"): + playlist_id = DivaPlaylistCrawler.fetch_id(playlist_link=playlist_location) + else: + playlist_id = playlist_location + if target is None: PRETTY.starting_synchronizer("None", "DIVA", playlist_id) raise FatalException("Got 'None' as target directory, aborting")