mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-25 15:45:19 -04:00
Update comment
This commit is contained in:
parent
d7beff9a08
commit
8a0256285f
3 changed files with 4 additions and 5 deletions
|
@ -8,7 +8,7 @@ log = discord_logging.init_logging()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
input_path = r"\\MYCLOUDPR4100\Public\reddit\requests\wallstreetbets_comments.zst"
|
input_path = r"\\MYCLOUDPR4100\Public\reddit\subreddits\NoStupidQuestions_comments.zst"
|
||||||
|
|
||||||
input_file_paths = []
|
input_file_paths = []
|
||||||
if os.path.isdir(input_path):
|
if os.path.isdir(input_path):
|
||||||
|
|
|
@ -8,7 +8,7 @@ import logging.handlers
|
||||||
|
|
||||||
# put the path to the input file, or a folder of files to process all of
|
# put the path to the input file, or a folder of files to process all of
|
||||||
input_file = r"\\MYCLOUDPR4100\Public\reddit_test"
|
input_file = r"\\MYCLOUDPR4100\Public\reddit_test"
|
||||||
# put the name or path to the output file. The file extension from below will be added automatically
|
# put the name or path to the output file. The file extension from below will be added automatically. If the input file is a folder, the output will be treated as a folder as well
|
||||||
output_file = r"\\MYCLOUDPR4100\Public\output"
|
output_file = r"\\MYCLOUDPR4100\Public\output"
|
||||||
# the format to output in, pick from the following options
|
# the format to output in, pick from the following options
|
||||||
# zst: same as the input, a zstandard compressed ndjson file. Can be read by the other scripts in the repo
|
# zst: same as the input, a zstandard compressed ndjson file. Can be read by the other scripts in the repo
|
||||||
|
|
|
@ -7,9 +7,8 @@ import zstandard
|
||||||
import json
|
import json
|
||||||
|
|
||||||
input_files = [
|
input_files = [
|
||||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits\redditdev_comments.zst",
|
r"\\MYCLOUDPR4100\Public\reddit\subreddits\collapse_comments.zst",
|
||||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits\announcements_comments.zst",
|
r"\\MYCLOUDPR4100\Public\reddit\subreddits\Slovakia_comments.zst",
|
||||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits\modnews_comments.zst",
|
|
||||||
]
|
]
|
||||||
ignored_users = ['[deleted]', 'automoderator']
|
ignored_users = ['[deleted]', 'automoderator']
|
||||||
min_comments_per_sub = 1
|
min_comments_per_sub = 1
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue