Handle gfycat links

2020-12-31 02:18:48 +00:00 · 2020-12-31 02:18:48 +00:00 · 4b21a2cb2b
commit 4b21a2cb2b
parent 80474bc74e
3 changed files with 27 additions and 11 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,4 @@
 praw
 requests
 youtube_dl
+tqdm
--- a/save.py
+++ b/save.py
@ -2,6 +2,7 @@

 import argparse
 import os
+from tqdm import tqdm
 from utilities import *

 # Get arguments
@ -33,7 +34,8 @@ if not os.path.exists(os.path.join(location, "media")):

 posts_html = []

-for post in get_posts(client):
+posts = get_posts(client)
+for post in tqdm(posts):
    post_html = get_post_html(post)
    media = save_media(post, location)
    if media:
--- a/utilities.py
+++ b/utilities.py
@ -2,13 +2,14 @@ import os
 import praw
 import requests
 import youtube_dl
+import re
 from datetime import datetime
-from secrets import REDDIT_USERNAME, REDDIT_PASSWORD
-from secrets import REDDIT_CLIENT_ID, REDDIT_SECRET
+from secrets2 import REDDIT_USERNAME, REDDIT_PASSWORD
+from secrets2 import REDDIT_CLIENT_ID, REDDIT_SECRET

 IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "gif"]
 VIDEO_EXTENSIONS = ["mp4"]
-PLATFORMS = ["redgifs.com"]
+PLATFORMS = ["redgifs.com", "gfycat.com"]

 def make_client():
    return praw.Reddit(
@ -21,15 +22,17 @@ def make_client():


 def get_saved_posts(client):
-    for saved in client.user.me().saved(limit=10):
-        if saved.__class__.__name__ == "Submission":
-            yield saved
+    return [
+        saved for saved in client.user.me().saved(limit=10)
+        if saved.__class__.__name__ == "Submission"
+    ]


 def get_upvoted_posts(client):
-    for upvoted in client.user.me().upvoted(limit=None):
-        if upvoted.__class__.__name__ == "Submission":
-            yield upvoted
+    return [
+        upvoted for upvoted in client.user.me().upvoted(limit=None)
+        if saved.__class__.__name__ == "Submission"
+    ]


 def get_post_html(post):
@ -59,6 +62,16 @@ def save_media(post, location):
    else:
        domain = ".".join(post.url.split("/")[2].split(".")[-2:])
        if domain in PLATFORMS:
+            url = post.url
+            if domain == "gfycat.com":
+                html = requests.get(post.url).content
+                if len(html) < 50000:
+                    match = re.search(
+                        r"http([\dA-Za-z\+\:\/\.]+)\.mp4", html.decode()
+                    )
+                    if match:
+                        url = match.group()
+                    else: return None
            options = {
                "nocheckcertificate": True, "quiet": True, "no_warnings": True,
                "outtmpl": os.path.join(
@ -66,7 +79,7 @@ def save_media(post, location):
                )
            }
            with youtube_dl.YoutubeDL(options) as ydl:
-                ydl.download([post.url])
+                ydl.download([url])
            for f in os.listdir(os.path.join(location, "media")):
                if f.startswith(f"{readable_name}_{post.id}"):
                    return f