From 439ab0108e48c57b9a86d65774ed7e97b4ceea33 Mon Sep 17 00:00:00 2001 From: Watchful1 Date: Sat, 29 Mar 2025 17:08:12 -0700 Subject: [PATCH] Add note --- scripts/find_overlapping_users.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/find_overlapping_users.py b/scripts/find_overlapping_users.py index 9fa35a3..14f6b48 100644 --- a/scripts/find_overlapping_users.py +++ b/scripts/find_overlapping_users.py @@ -8,6 +8,7 @@ import zstandard import json # IMPORTANT SETUP INSTRUCTIONS +# get subreddit files from here https://www.reddit.com/r/pushshift/comments/1itme1k/separate_dump_files_for_the_top_40k_subreddits/ # change the folder line to the folder where the files are stored # change the subreddits to the list of subreddits, one per line. The case must exactly match, ie, for r/AskReddit, put "AskReddit" # the files in the folder must match the format from the torrent, subreddit_type.zst, like AskReddit_comments.zst