reddit-save/utilities.py

106 lines
3.7 KiB
Python
Raw Normal View History

2020-12-30 23:59:55 +01:00
import os
import praw
2020-12-31 01:33:30 +01:00
import requests
2020-12-31 02:32:06 +01:00
import youtube_dl
2020-12-31 03:18:48 +01:00
import re
2020-12-30 23:59:55 +01:00
from datetime import datetime
2020-12-31 03:58:31 +01:00
from secrets import REDDIT_USERNAME, REDDIT_PASSWORD
from secrets import REDDIT_CLIENT_ID, REDDIT_SECRET
2020-12-30 23:59:55 +01:00
2020-12-31 03:58:31 +01:00
IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "gif", "gifv"]
2020-12-31 01:33:30 +01:00
VIDEO_EXTENSIONS = ["mp4"]
2020-12-31 03:58:31 +01:00
PLATFORMS = ["redgifs.com", "gfycat.com", "imgur.com"]
2020-12-31 01:33:30 +01:00
2020-12-30 23:59:55 +01:00
def make_client():
return praw.Reddit(
username=REDDIT_USERNAME,
password=REDDIT_PASSWORD,
client_id=REDDIT_CLIENT_ID,
client_secret=REDDIT_SECRET,
user_agent="reddit-save",
)
def get_saved_posts(client):
2020-12-31 03:18:48 +01:00
return [
2020-12-31 03:58:31 +01:00
saved for saved in client.user.me().saved(limit=None)
2020-12-31 03:18:48 +01:00
if saved.__class__.__name__ == "Submission"
]
2020-12-30 23:59:55 +01:00
2020-12-31 00:05:45 +01:00
def get_upvoted_posts(client):
2020-12-31 03:18:48 +01:00
return [
upvoted for upvoted in client.user.me().upvoted(limit=None)
if saved.__class__.__name__ == "Submission"
]
2020-12-31 00:05:45 +01:00
2020-12-30 23:59:55 +01:00
def get_post_html(post):
with open(os.path.join("html", "post.html")) as f:
html = f.read()
dt = datetime.utcfromtimestamp(post.created_utc)
html = html.replace("<!--title-->", post.title)
2020-12-31 00:47:15 +01:00
html = html.replace("<!--subreddit-->", f"/r/{str(post.subreddit)}")
html = html.replace("<!--user-->", f"/u/{post.author.name}" if post.author else "[deleted]")
html = html.replace("<!--link-->", f"https://reddit.com{post.permalink}")
2020-12-31 00:54:04 +01:00
html = html.replace("<!--content-link-->", post.url)
2020-12-31 00:47:15 +01:00
html = html.replace("<!--body-->", post.selftext_html or "")
2020-12-30 23:59:55 +01:00
html = html.replace("<!--timestamp-->", str(dt))
html = html.replace("<!--date-->", dt.strftime("%d %B, %Y"))
2020-12-31 01:33:30 +01:00
return html
2020-12-31 02:32:06 +01:00
def save_media(post, location):
2020-12-31 01:33:30 +01:00
media_extensions = IMAGE_EXTENSIONS + VIDEO_EXTENSIONS
2020-12-31 03:58:31 +01:00
extension = post.url.split("?")[0].split(".")[-1].lower()
2020-12-31 01:33:30 +01:00
readable_name = list(filter(bool, post.permalink.split("/")))[-1]
2020-12-31 03:58:31 +01:00
domain = ".".join(post.url.split("/")[2].split(".")[-2:])
if extension in media_extensions and not (extension == "gifv" and domain == "imgur.com"):
2020-12-31 02:32:06 +01:00
filename = f"{readable_name}_{post.id}.{extension}"
with open(os.path.join(location, "media", filename), "wb") as f:
f.write(requests.get(post.url).content)
return filename
else:
if domain in PLATFORMS:
2020-12-31 03:18:48 +01:00
url = post.url
if domain == "gfycat.com":
html = requests.get(post.url).content
if len(html) < 50000:
match = re.search(
r"http([\dA-Za-z\+\:\/\.]+)\.mp4", html.decode()
)
if match:
url = match.group()
else: return None
2020-12-31 02:32:06 +01:00
options = {
"nocheckcertificate": True, "quiet": True, "no_warnings": True,
2020-12-31 03:58:31 +01:00
"ignoreerrors": True,
2020-12-31 02:32:06 +01:00
"outtmpl": os.path.join(
location, "media", f"{readable_name}_{post.id}" + ".%(ext)s"
)
}
with youtube_dl.YoutubeDL(options) as ydl:
2020-12-31 03:58:31 +01:00
try:
ydl.download([url])
except: pass
2020-12-31 02:32:06 +01:00
for f in os.listdir(os.path.join(location, "media")):
if f.startswith(f"{readable_name}_{post.id}"):
return f
2020-12-31 01:33:30 +01:00
# gyfcat, v.reddit, imgur, redgifs
def add_media_preview_to_html(post_html, media):
2020-12-31 02:32:06 +01:00
extension = media.split(".")[-1]
location = "/".join(["media", media])
2020-12-31 01:33:30 +01:00
if extension in IMAGE_EXTENSIONS:
return post_html.replace(
"<!--preview-->",
f'<img src="{location}">'
)
2020-12-31 02:32:06 +01:00
if extension in VIDEO_EXTENSIONS:
return post_html.replace(
"<!--preview-->",
f'<video controls><source src="{location}"></video>'
2020-12-31 03:58:31 +01:00
)
return post_html