Support v.redd.it

2021-01-01 21:44:40 +00:00 · 2021-01-01 21:44:40 +00:00 · 3da0877616
commit 3da0877616
parent a1704c1d52
2 changed files with 69 additions and 35 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -2,3 +2,4 @@ praw
 requests
 youtube_dl
 tqdm
 redvid
--- a/utilities.py
+++ b/utilities.py
@ -1,6 +1,7 @@
 import os
 import praw
 import requests
 from redvid import Downloader
 import youtube_dl
 import re
 from datetime import datetime
@ -9,9 +10,11 @@ from secrets import REDDIT_CLIENT_ID, REDDIT_SECRET
 IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "gif", "gifv"]
 VIDEO_EXTENSIONS = ["mp4"]
-PLATFORMS = ["redgifs.com", "gfycat.com", "imgur.com"]
+PLATFORMS = ["redgifs.com", "gfycat.com", "imgur.com", "youtube.com"]
 def make_client():
    """Creates a PRAW client with the details in the secrets.py file."""
    return praw.Reddit(
        username=REDDIT_USERNAME,
        password=REDDIT_PASSWORD,
@ -22,13 +25,17 @@ def make_client():
 def get_saved_posts(client):
    """Gets a list of posts that the user has saved."""
    return [
-        saved for saved in client.user.me().saved(limit=None)
+        saved for saved in client.user.me().saved(limit=20)
        if saved.__class__.__name__ == "Submission"
    ]
 def get_upvoted_posts(client):
    """Gets a list of posts that the user has saved."""
    return [
        upvoted for upvoted in client.user.me().upvoted(limit=None)
        if saved.__class__.__name__ == "Submission"
@ -36,6 +43,9 @@ def get_upvoted_posts(client):
 def get_post_html(post):
    """Takes a post object and creates a HTML for it - but not including the
    preview HTML."""
    with open(os.path.join("html", "post.html")) as f:
        html = f.read()
    dt = datetime.utcfromtimestamp(post.created_utc)
@ -51,46 +61,69 @@ def get_post_html(post):
 def save_media(post, location):
-    media_extensions = IMAGE_EXTENSIONS + VIDEO_EXTENSIONS
+    """Takes a post object and tries to download any image/video it might be
-    extension = post.url.split("?")[0].split(".")[-1].lower()
+    associated with. If it can, it will return the filename."""
-    readable_name = list(filter(bool, post.permalink.split("/")))[-1]
+
    url = post.url
    stripped_url = url.split("?")[0]
    if url.endswith(post.permalink): return
    # What is the key information?
    extension = stripped_url.split(".")[-1].lower()
    domain = ".".join(post.url.split("/")[2].split(".")[-2:])
-    if extension in media_extensions and not (extension == "gifv" and domain == "imgur.com"):
+    readable_name = list(filter(bool, post.permalink.split("/")))[-1]
    # Can the media be obtained directly?
    if extension in IMAGE_EXTENSIONS + VIDEO_EXTENSIONS:
        filename = f"{readable_name}_{post.id}.{extension}"
        with open(os.path.join(location, "media", filename), "wb") as f:
-            f.write(requests.get(post.url).content)
+            response = requests.get(post.url)
-            return filename
+            media_type = response.headers.get("Content-Type", "")
-    else:
+            if media_type.startswith("image") or media_type.startswith("video"):
-        if domain in PLATFORMS:
+                f.write(response.content)
-            url = post.url
+                return filename
-            if domain == "gfycat.com":
+    
-                html = requests.get(post.url).content
+    # Is this a v.redd.it link?
-                if len(html) < 50000:
+    if domain == "redd.it":
-                    match = re.search(
+        downloader = Downloader(max_q=True, log=False)
-                        r"http([\dA-Za-z\+\:\/\.]+)\.mp4", html.decode()
+        downloader.url = url
-                    )
+        name = downloader.download()
-                    if match:
+        extension = name.split(".")[-1]
-                        url = match.group()
+        filename = f"{readable_name}_{post.id}.{extension}"
-                    else: return None
+        os.rename(name, os.path.join(location, "media", filename))
-            options = {
+        return filename
                "nocheckcertificate": True, "quiet": True, "no_warnings": True,
                "ignoreerrors": True,
                "outtmpl": os.path.join(
                    location, "media",  f"{readable_name}_{post.id}" + ".%(ext)s"
                )
            }
            with youtube_dl.YoutubeDL(options) as ydl:
                try:
                    ydl.download([url])
                except: pass
            for f in os.listdir(os.path.join(location, "media")):
                if f.startswith(f"{readable_name}_{post.id}"):
                    return f
-        # gyfcat, v.reddit, imgur, redgifs
+    # Is it a gfycat link that redirects? Update the URL if possible
    if domain == "gfycat.com":
        html = requests.get(post.url).content
        if len(html) < 50000:
            match = re.search(r"http([\dA-Za-z\+\:\/\.]+)\.mp4", html.decode())
            if match:
                url = match.group()
            else: return None
    # Try to use youtube_dl if it's one of the possible domains
    if domain in PLATFORMS:
        options = {
            "nocheckcertificate": True, "quiet": True, "no_warnings": True,
            "ignoreerrors": True,
            "outtmpl": os.path.join(
                location, "media",  f"{readable_name}_{post.id}" + ".%(ext)s"
            )
        }
        with youtube_dl.YoutubeDL(options) as ydl:
            try:
                ydl.download([url])
            except: pass
        for f in os.listdir(os.path.join(location, "media")):
            if f.startswith(f"{readable_name}_{post.id}"):
                return f
 def add_media_preview_to_html(post_html, media):
    """Takes post HTML and returns a modified version with the preview
    inserted."""
    extension = media.split(".")[-1]
    location = "/".join(["media", media])
    if extension in IMAGE_EXTENSIONS: