mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-25 15:45:19 -04:00
Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
d442ef991c
1 changed files with 6 additions and 1 deletions
|
@ -12,7 +12,7 @@ import json
|
||||||
# change the subreddits to the list of subreddits, one per line. The case must exactly match, ie, for r/AskReddit, put "AskReddit"
|
# change the subreddits to the list of subreddits, one per line. The case must exactly match, ie, for r/AskReddit, put "AskReddit"
|
||||||
# the files in the folder must match the format from the torrent, subreddit_type.zst, like AskReddit_comments.zst
|
# the files in the folder must match the format from the torrent, subreddit_type.zst, like AskReddit_comments.zst
|
||||||
# the script will look for both comments and submissions files for each subreddit
|
# the script will look for both comments and submissions files for each subreddit
|
||||||
folder = r"\\MYCLOUDPR4100\Public\reddit\subreddits23"
|
folder = r"C:\Users\user\Downloads"
|
||||||
subreddits = [
|
subreddits = [
|
||||||
"demolitionranch",
|
"demolitionranch",
|
||||||
"politics",
|
"politics",
|
||||||
|
@ -98,6 +98,9 @@ def get_commenters_from_file(subreddit_file, subreddit_commenters, total_lines):
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
log.info(f"Subreddit's folder: {folder}")
|
log.info(f"Subreddit's folder: {folder}")
|
||||||
|
if not os.path.exists(folder):
|
||||||
|
log.error(f"Subreddit's folder either doesn't exist or the script doesn't have access to it: {folder}")
|
||||||
|
sys.exit()
|
||||||
if len(subreddits) <= 10:
|
if len(subreddits) <= 10:
|
||||||
log.info(f"Finding overlapping users in {', '.join(subreddits)}")
|
log.info(f"Finding overlapping users in {', '.join(subreddits)}")
|
||||||
else:
|
else:
|
||||||
|
@ -128,6 +131,8 @@ if __name__ == "__main__":
|
||||||
total_lines = get_commenters_from_file(subreddit_file, commenters, total_lines)
|
total_lines = get_commenters_from_file(subreddit_file, commenters, total_lines)
|
||||||
if not subreddit_exists:
|
if not subreddit_exists:
|
||||||
log.error(f"Subreddit {subreddit} has no files, aborting")
|
log.error(f"Subreddit {subreddit} has no files, aborting")
|
||||||
|
file_count = len(list(os.listdir(folder)))
|
||||||
|
log.error(f"The script can see {file_count} files in the folder, but not the ones requested: {folder}")
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
for commenter in commenters:
|
for commenter in commenters:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue