mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-23 23:00:40 -04:00
Didn't mean to commit that
This commit is contained in:
parent
ef186b7bd7
commit
ec977a76b2
2 changed files with 14 additions and 6 deletions
|
@ -7,10 +7,13 @@ import zstandard
|
|||
import json
|
||||
|
||||
input_files = [
|
||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits23\baseballcards_comments.zst",
|
||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits23\classicwow_comments.zst",
|
||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits23\trading212_comments.zst",
|
||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits23\Fire_comments.zst",
|
||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits23\IAmTheMainCharacter_comments.zst",
|
||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits23\BrightonHoveAlbion_comments.zst",
|
||||
]
|
||||
ignored_users = ['[deleted]', 'automoderator']
|
||||
ignored_users = {'[deleted]', 'automoderator'}
|
||||
ignored_users_file = "ignored.txt"
|
||||
min_comments_per_sub = 1
|
||||
file_name = "users.txt"
|
||||
require_first_subreddit = False # if true, print users that occur in the first subreddit and any one of the following ones. Otherwise just find the most overlap between all subs
|
||||
|
@ -64,6 +67,11 @@ def read_lines_zst(file_name):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if os.path.exists(ignored_users_file):
|
||||
with open(ignored_users_file) as fh:
|
||||
for user in fh.readlines():
|
||||
ignored_users.add(user.strip().lower())
|
||||
|
||||
commenterSubreddits = defaultdict(int)
|
||||
is_first = True
|
||||
total_lines = 0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue