From 2154e7d9b5c06682fc117b55efc93602700114b9 Mon Sep 17 00:00:00 2001 From: sys-nyx Date: Thu, 26 Dec 2024 20:15:49 -0800 Subject: [PATCH] moved dumps.py to redarch.py. modified script to import get_redd_object function from watchful.py. Now supports building straight from zst files --- redarch.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 redarch.py diff --git a/redarch.py b/redarch.py new file mode 100644 index 0000000..8db9644 --- /dev/null +++ b/redarch.py @@ -0,0 +1,77 @@ + + +import os +import json +import argparse +import configparser +from urllib.parse import urlparse +from write_html import generate_html +from watchful import return_redd_objects + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('config', type=str, help='Path to configuration file.') + args = parser.parse_args() + if not args.config: + print("No config file found") + exit() + + config = configparser.ConfigParser() + config.read(args.config) + + subreddits = {} + + for s in config.sections(): + + posts_path = config[s]['posts'] + comments_path = config[s]['comments'] + + links = [] + + print(f"loading from {posts_path}") + raw_posts = return_redd_objects(posts_path) + print('done') + print(f"loading from {comments_path}") + raw_comments = return_redd_objects(comments_path) + print('done') + + missing_perm = [] + comments = {} + for c in raw_comments: + if 'permalink' not in c.keys(): + missing_perm.append(c) + continue + + parent_url = '/'.join(urlparse(c['permalink']).path.split('/')[:6]) + if parent_url.endswith('/'): + parent_url = parent_url[:-1] + if parent_url not in comments.keys(): + comments[parent_url] = [] + comments[parent_url].append(c) + + + complete_threads = [] + + for p in raw_posts: + p['comments'] = [] + postp = urlparse(p['permalink']).path + if postp.endswith('/'): + postp = postp[:-1] + if postp in comments.keys(): + p['comments'] = comments[postp] + + complete_threads.append(p) + + subreddits[s.lower()] = complete_threads + + print("Total threads: ",len(raw_posts)) + print("Total comments: ", len(raw_comments)) + print("Comments missing permalinks: ", len(missing_perm)) + print("Comment chains found: ", len(comments)) + print("Threads rebuilt: ", len(complete_threads)) + + + generate_html([s.lower() for s in config.sections()], subreddits) + +if __name__ == "__main__": + main()