From 8a0256285f6b4d89cf6caeea67cfc95ed1d7d37c Mon Sep 17 00:00:00 2001 From: Watchful1 Date: Tue, 22 Aug 2023 22:13:37 -0700 Subject: [PATCH] Update comment --- personal/test_file.py | 2 +- scripts/filter_file.py | 2 +- scripts/find_overlapping_users.py | 5 ++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/personal/test_file.py b/personal/test_file.py index 6795e0a..45f3d18 100644 --- a/personal/test_file.py +++ b/personal/test_file.py @@ -8,7 +8,7 @@ log = discord_logging.init_logging() if __name__ == "__main__": - input_path = r"\\MYCLOUDPR4100\Public\reddit\requests\wallstreetbets_comments.zst" + input_path = r"\\MYCLOUDPR4100\Public\reddit\subreddits\NoStupidQuestions_comments.zst" input_file_paths = [] if os.path.isdir(input_path): diff --git a/scripts/filter_file.py b/scripts/filter_file.py index 647b18c..0f5850e 100644 --- a/scripts/filter_file.py +++ b/scripts/filter_file.py @@ -8,7 +8,7 @@ import logging.handlers # put the path to the input file, or a folder of files to process all of input_file = r"\\MYCLOUDPR4100\Public\reddit_test" -# put the name or path to the output file. The file extension from below will be added automatically +# put the name or path to the output file. The file extension from below will be added automatically. If the input file is a folder, the output will be treated as a folder as well output_file = r"\\MYCLOUDPR4100\Public\output" # the format to output in, pick from the following options # zst: same as the input, a zstandard compressed ndjson file. Can be read by the other scripts in the repo diff --git a/scripts/find_overlapping_users.py b/scripts/find_overlapping_users.py index f32bbbf..acded30 100644 --- a/scripts/find_overlapping_users.py +++ b/scripts/find_overlapping_users.py @@ -7,9 +7,8 @@ import zstandard import json input_files = [ - r"\\MYCLOUDPR4100\Public\reddit\subreddits\redditdev_comments.zst", - r"\\MYCLOUDPR4100\Public\reddit\subreddits\announcements_comments.zst", - r"\\MYCLOUDPR4100\Public\reddit\subreddits\modnews_comments.zst", + r"\\MYCLOUDPR4100\Public\reddit\subreddits\collapse_comments.zst", + r"\\MYCLOUDPR4100\Public\reddit\subreddits\Slovakia_comments.zst", ] ignored_users = ['[deleted]', 'automoderator'] min_comments_per_sub = 1