Allow passing a playlist URL to diva instead of an id

2025-07-20 01:42:37 +02:00 · 2020-05-10 11:11:28 +02:00
parent f6faacabb0
commit 9950144e97
2 changed files with 42 additions and 4 deletions
--- a/PFERD/diva.py
+++ b/PFERD/diva.py
@@ -1,4 +1,5 @@
 import logging
+import re
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Callable, List, Optional
@@ -44,16 +45,47 @@ class DivaPlaylistCrawler:
    A crawler for DIVA playlists.
    """

-    _BASE_URL = "https://mediaservice.bibliothek.kit.edu/asset/collection.json"
+    _PLAYLIST_BASE_URL = "https://mediaservice.bibliothek.kit.edu/asset/detail/"
+    _COLLECTION_BASE_URL = "https://mediaservice.bibliothek.kit.edu/asset/collection.json"

    def __init__(self, playlist_id: str):
        self._id = playlist_id

+    @classmethod
+    def fetch_id(cls, playlist_link: str) -> str:
+        """
+        Fetches the ID for a playerlist, given the base link
+        (e.g. https://mediaservice.bibliothek.kit.edu/#/details/DIVA-2019-271).
+
+        Raises a FatalException, if the id can not be resolved
+        """
+        match = re.match(r".+#/details/(.+)", playlist_link)
+        if match is None:
+            raise FatalException(
+                "DIVA: Invalid playlist link format, could not extract details."
+            )
+        base_name = match.group(1)
+
+        response = requests.get(cls._PLAYLIST_BASE_URL + base_name + ".json")
+
+        if response.status_code != 200:
+            raise FatalException(
+                f"DIVA: Got non-200 status code ({response.status_code}))"
+                f"when requesting {response.url!r}!"
+            )
+
+        body = response.json()
+
+        if body["error"]:
+            raise FatalException(f"DIVA: Server returned error {body['error']!r}.")
+
+        return body["result"]["id"]
+
    def crawl(self) -> List[DivaDownloadInfo]:
        """
        Crawls the playlist given in the constructor.
        """
-        response = requests.get(self._BASE_URL, params={"collection": self._id})
+        response = requests.get(self._COLLECTION_BASE_URL, params={"collection": self._id})
        if response.status_code != 200:
            raise FatalException(f"Server returned status {response.status_code}.")

--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -147,7 +147,7 @@ class Pferd(Location):
    def diva_kit(
            self,
            target: Union[PathLike, Organizer],
-            playlist_id: str,
+            playlist_location: str,
            transform: Transform = lambda x: x,
            download_strategy: DivaDownloadStrategy = diva_download_new,
            clean: bool = True
@@ -157,7 +157,8 @@ class Pferd(Location):

        Arguments:
            organizer {Organizer} -- The organizer to use.
-            playlist_id {str} -- the playlist id
+            playlist_location {str} -- the playlist id or the playlist URL
+              in the format 'https://mediaservice.bibliothek.kit.edu/#/details/DIVA-2019-271'

        Keyword Arguments:
            transform {Transform} -- A transformation function for the output paths. Return None
@@ -169,6 +170,11 @@ class Pferd(Location):
        """
        tmp_dir = self._tmp_dir.new_subdir()

+        if playlist_location.startswith("http"):
+            playlist_id = DivaPlaylistCrawler.fetch_id(playlist_link=playlist_location)
+        else:
+            playlist_id = playlist_location
+
        if target is None:
            PRETTY.starting_synchronizer("None", "DIVA", playlist_id)
            raise FatalException("Got 'None' as target directory, aborting")