mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-25 15:45:19 -04:00
Fix csv script for comments without a permalink
This commit is contained in:
parent
ef12dc5694
commit
fe8fef722f
2 changed files with 7 additions and 4 deletions
|
@ -7,7 +7,7 @@ import zstandard
|
||||||
import json
|
import json
|
||||||
|
|
||||||
input_files = [
|
input_files = [
|
||||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits23\StyleCoordinators_comments.zst",
|
r"\\MYCLOUDPR4100\Public\reddit\subreddits23\baseballcards_comments.zst",
|
||||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits23\classicwow_comments.zst",
|
r"\\MYCLOUDPR4100\Public\reddit\subreddits23\classicwow_comments.zst",
|
||||||
]
|
]
|
||||||
ignored_users = ['[deleted]', 'automoderator']
|
ignored_users = ['[deleted]', 'automoderator']
|
||||||
|
|
|
@ -17,9 +17,9 @@ import logging.handlers
|
||||||
|
|
||||||
|
|
||||||
# put the path to the input file
|
# put the path to the input file
|
||||||
input_file_path = r"\\MYCLOUDPR4100\Public\reddit\subreddits\CryptoCurrency_submissions.zst"
|
input_file_path = r"\\MYCLOUDPR4100\Public\reddit\subreddits\intel_comments.zst"
|
||||||
# put the path to the output file, with the csv extension
|
# put the path to the output file, with the csv extension
|
||||||
output_file_path = r"\\MYCLOUDPR4100\Public\CryptoCurrency_submissions.csv"
|
output_file_path = r"\\MYCLOUDPR4100\Public\intel_comments.csv"
|
||||||
# if you want a custom set of fields, put them in the following list. If you leave it empty the script will use a default set of fields
|
# if you want a custom set of fields, put them in the following list. If you leave it empty the script will use a default set of fields
|
||||||
fields = []
|
fields = []
|
||||||
|
|
||||||
|
@ -86,7 +86,10 @@ if __name__ == "__main__":
|
||||||
if field == "created":
|
if field == "created":
|
||||||
value = datetime.fromtimestamp(int(obj['created_utc'])).strftime("%Y-%m-%d %H:%M")
|
value = datetime.fromtimestamp(int(obj['created_utc'])).strftime("%Y-%m-%d %H:%M")
|
||||||
elif field == "link":
|
elif field == "link":
|
||||||
|
if 'permalink' in obj:
|
||||||
value = f"https://www.reddit.com{obj['permalink']}"
|
value = f"https://www.reddit.com{obj['permalink']}"
|
||||||
|
else:
|
||||||
|
value = f"https://www.reddit.com/r/{obj['subreddit']}/comments/{obj['link_id'][3:]}/_/{obj['id']}/"
|
||||||
elif field == "author":
|
elif field == "author":
|
||||||
value = f"u/{obj['author']}"
|
value = f"u/{obj['author']}"
|
||||||
elif field == "text":
|
elif field == "text":
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue