Handle gfycat links

This commit is contained in:
Sam Ireland 2020-12-31 02:18:48 +00:00
parent 80474bc74e
commit 4b21a2cb2b
3 changed files with 27 additions and 11 deletions

View File

@ -1,3 +1,4 @@
praw praw
requests requests
youtube_dl youtube_dl
tqdm

View File

@ -2,6 +2,7 @@
import argparse import argparse
import os import os
from tqdm import tqdm
from utilities import * from utilities import *
# Get arguments # Get arguments
@ -33,7 +34,8 @@ if not os.path.exists(os.path.join(location, "media")):
posts_html = [] posts_html = []
for post in get_posts(client): posts = get_posts(client)
for post in tqdm(posts):
post_html = get_post_html(post) post_html = get_post_html(post)
media = save_media(post, location) media = save_media(post, location)
if media: if media:

View File

@ -2,13 +2,14 @@ import os
import praw import praw
import requests import requests
import youtube_dl import youtube_dl
import re
from datetime import datetime from datetime import datetime
from secrets import REDDIT_USERNAME, REDDIT_PASSWORD from secrets2 import REDDIT_USERNAME, REDDIT_PASSWORD
from secrets import REDDIT_CLIENT_ID, REDDIT_SECRET from secrets2 import REDDIT_CLIENT_ID, REDDIT_SECRET
IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "gif"] IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "gif"]
VIDEO_EXTENSIONS = ["mp4"] VIDEO_EXTENSIONS = ["mp4"]
PLATFORMS = ["redgifs.com"] PLATFORMS = ["redgifs.com", "gfycat.com"]
def make_client(): def make_client():
return praw.Reddit( return praw.Reddit(
@ -21,15 +22,17 @@ def make_client():
def get_saved_posts(client): def get_saved_posts(client):
for saved in client.user.me().saved(limit=10): return [
if saved.__class__.__name__ == "Submission": saved for saved in client.user.me().saved(limit=10)
yield saved if saved.__class__.__name__ == "Submission"
]
def get_upvoted_posts(client): def get_upvoted_posts(client):
for upvoted in client.user.me().upvoted(limit=None): return [
if upvoted.__class__.__name__ == "Submission": upvoted for upvoted in client.user.me().upvoted(limit=None)
yield upvoted if saved.__class__.__name__ == "Submission"
]
def get_post_html(post): def get_post_html(post):
@ -59,6 +62,16 @@ def save_media(post, location):
else: else:
domain = ".".join(post.url.split("/")[2].split(".")[-2:]) domain = ".".join(post.url.split("/")[2].split(".")[-2:])
if domain in PLATFORMS: if domain in PLATFORMS:
url = post.url
if domain == "gfycat.com":
html = requests.get(post.url).content
if len(html) < 50000:
match = re.search(
r"http([\dA-Za-z\+\:\/\.]+)\.mp4", html.decode()
)
if match:
url = match.group()
else: return None
options = { options = {
"nocheckcertificate": True, "quiet": True, "no_warnings": True, "nocheckcertificate": True, "quiet": True, "no_warnings": True,
"outtmpl": os.path.join( "outtmpl": os.path.join(
@ -66,7 +79,7 @@ def save_media(post, location):
) )
} }
with youtube_dl.YoutubeDL(options) as ydl: with youtube_dl.YoutubeDL(options) as ydl:
ydl.download([post.url]) ydl.download([url])
for f in os.listdir(os.path.join(location, "media")): for f in os.listdir(os.path.join(location, "media")):
if f.startswith(f"{readable_name}_{post.id}"): if f.startswith(f"{readable_name}_{post.id}"):
return f return f