From fe8fef722fcf3dc94246a67ea5ac82083cb24fd8 Mon Sep 17 00:00:00 2001 From: Watchful1 Date: Mon, 19 Feb 2024 21:20:37 -0800 Subject: [PATCH] Fix csv script for comments without a permalink --- scripts/find_overlapping_users.py | 2 +- scripts/to_csv.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/find_overlapping_users.py b/scripts/find_overlapping_users.py index 7e737d7..8ee9812 100644 --- a/scripts/find_overlapping_users.py +++ b/scripts/find_overlapping_users.py @@ -7,7 +7,7 @@ import zstandard import json input_files = [ - r"\\MYCLOUDPR4100\Public\reddit\subreddits23\StyleCoordinators_comments.zst", + r"\\MYCLOUDPR4100\Public\reddit\subreddits23\baseballcards_comments.zst", r"\\MYCLOUDPR4100\Public\reddit\subreddits23\classicwow_comments.zst", ] ignored_users = ['[deleted]', 'automoderator'] diff --git a/scripts/to_csv.py b/scripts/to_csv.py index f4d561a..6486500 100644 --- a/scripts/to_csv.py +++ b/scripts/to_csv.py @@ -17,9 +17,9 @@ import logging.handlers # put the path to the input file -input_file_path = r"\\MYCLOUDPR4100\Public\reddit\subreddits\CryptoCurrency_submissions.zst" +input_file_path = r"\\MYCLOUDPR4100\Public\reddit\subreddits\intel_comments.zst" # put the path to the output file, with the csv extension -output_file_path = r"\\MYCLOUDPR4100\Public\CryptoCurrency_submissions.csv" +output_file_path = r"\\MYCLOUDPR4100\Public\intel_comments.csv" # if you want a custom set of fields, put them in the following list. If you leave it empty the script will use a default set of fields fields = [] @@ -86,7 +86,10 @@ if __name__ == "__main__": if field == "created": value = datetime.fromtimestamp(int(obj['created_utc'])).strftime("%Y-%m-%d %H:%M") elif field == "link": - value = f"https://www.reddit.com{obj['permalink']}" + if 'permalink' in obj: + value = f"https://www.reddit.com{obj['permalink']}" + else: + value = f"https://www.reddit.com/r/{obj['subreddit']}/comments/{obj['link_id'][3:]}/_/{obj['id']}/" elif field == "author": value = f"u/{obj['author']}" elif field == "text":