#!/usr/bin/env python import argparse import os import re from tqdm import tqdm from utilities import * # Get arguments parser = argparse.ArgumentParser(description="Save reddit posts to file.") parser.add_argument("mode", type=str, nargs=1, choices=["saved", "upvoted"], help="The file to convert.") if os.getenv("DOCKER", "0") != "1": parser.add_argument("location", type=str, nargs=1, help="The path to save to.") args = parser.parse_args() mode = args.mode[0] location = "./archive/" if os.getenv("DOCKER", "0") == "1" else args.location[0] # Is location specified a directory? if not os.path.isdir(location): print(location, "is not a directory") # Make a client object client = make_client() # Saved posts or upvoted posts? if mode == "saved": html_file = "saved.html" get_posts = get_saved_posts get_comments = get_saved_comments else: html_file = "upvoted.html" get_posts = get_upvoted_posts get_comments = lambda client: [] # Make directory for media and posts if not os.path.exists(os.path.join(location, "media")): os.mkdir(os.path.join(location, "media")) if not os.path.exists(os.path.join(location, "posts")): os.mkdir(os.path.join(location, "posts")) # Are there any posts already? post_ids, existing_posts_html = [], [] if os.path.exists(os.path.join(location, html_file)): with open(os.path.join(location, html_file)) as f: current_html = f.read() post_ids = re.findall(r'id="(.+?)"', current_html) existing_posts_html = re.findall( r'(