reddit-save/save.py

74 lines
2.3 KiB
Python
Raw Normal View History

2020-12-30 23:28:50 +01:00
#!/usr/bin/env python
import argparse
import os
2021-01-02 23:33:47 +01:00
import re
2020-12-31 03:18:48 +01:00
from tqdm import tqdm
2020-12-30 23:59:55 +01:00
from utilities import *
2020-12-30 23:28:50 +01:00
# Get arguments
parser = argparse.ArgumentParser(description="Save reddit posts to file.")
parser.add_argument("mode", type=str, nargs=1, choices=["saved", "upvoted"], help="The file to convert.")
parser.add_argument("location", type=str, nargs=1, help="The path to save to.")
args = parser.parse_args()
mode = args.mode[0]
location = args.location[0]
# Is location specified a directory?
if not os.path.isdir(location):
2020-12-30 23:59:55 +01:00
print(location, "is not a directory")
# Make a client object
client = make_client()
2020-12-31 01:33:30 +01:00
# Saved posts or upvoted posts?
2020-12-31 00:05:45 +01:00
if mode == "saved":
html_file = "saved.html"
get_posts = get_saved_posts
else:
html_file = "upvoted.html"
get_posts = get_upvoted_posts
2021-01-02 23:46:36 +01:00
# Make directory for media and posts
2020-12-31 01:33:30 +01:00
if not os.path.exists(os.path.join(location, "media")):
os.mkdir(os.path.join(location, "media"))
2021-01-02 23:46:36 +01:00
if not os.path.exists(os.path.join(location, "posts")):
os.mkdir(os.path.join(location, "posts"))
2020-12-31 01:33:30 +01:00
2021-01-02 23:33:47 +01:00
# Are there any posts already?
post_ids, posts_html = [], []
if os.path.exists(os.path.join(location, html_file)):
with open(os.path.join(location, html_file)) as f:
current_html = f.read()
post_ids = re.findall(r'id="(.+?)"', current_html)
posts_html = re.findall(
r'(<div class="post"[\S\n\t\v ]+?<!--postend--><\/div>)',
current_html
)
2020-12-30 23:59:55 +01:00
2021-01-02 23:33:47 +01:00
# Get posts HTML
posts = [p for p in get_posts(client) if p.id not in post_ids]
if not posts:
print("No new saved posts")
else:
for post in tqdm(posts):
post_html = get_post_html(post)
media = save_media(post, location)
if media:
post_html = add_media_preview_to_html(post_html, media)
posts_html.append(post_html)
2021-01-02 23:46:36 +01:00
page_html = create_post_page_html(post, post_html)
with open(os.path.join(location, "posts", f"{post.id}.html"), "w") as f:
f.write(page_html)
2020-12-30 23:59:55 +01:00
2021-01-02 23:33:47 +01:00
# Save HTML
2020-12-31 00:05:45 +01:00
with open(os.path.join("html", html_file)) as f:
2020-12-30 23:59:55 +01:00
html = f.read()
2020-12-31 00:51:41 +01:00
with open(os.path.join("html", "style.css")) as f:
html = html.replace("<style></style>", f"<style>\n{f.read()}\n</style>")
2020-12-30 23:59:55 +01:00
html = html.replace("<!--posts-->", "\n".join(posts_html))
2020-12-31 00:05:45 +01:00
with open(os.path.join(location, html_file), "w") as f:
2020-12-30 23:59:55 +01:00
f.write(html)
2021-01-02 23:46:36 +01:00