mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-25 23:55:18 -04:00
Updates
This commit is contained in:
parent
4e0d382bee
commit
9169b9b5ac
5 changed files with 13 additions and 14 deletions
|
@ -8,7 +8,7 @@ log = discord_logging.init_logging()
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
day = None
|
day = None
|
||||||
day_comments = 0
|
day_comments = 0
|
||||||
for comment in utils.read_obj_zst(r"\\MYCLOUDPR4100\Public\reddit\subreddits23\antiwork_comments.zst"):
|
for comment in utils.read_obj_zst(r"\\MYCLOUDPR4100\Public\reddit\subreddits24\NewTubers_comments.zst"):
|
||||||
created_day = datetime.utcfromtimestamp(int(comment['created_utc'])).strftime("%y-%m-%d")
|
created_day = datetime.utcfromtimestamp(int(comment['created_utc'])).strftime("%y-%m-%d")
|
||||||
if day is None:
|
if day is None:
|
||||||
day = created_day
|
day = created_day
|
||||||
|
|
|
@ -8,7 +8,7 @@ log = discord_logging.init_logging()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
input_path = r"\\MYCLOUDPR4100\Public\reddit\submissions\RS_2023-04.zst"
|
input_path = r"\\MYCLOUDPR4100\Public\reddit\submissions\RS_2025-05.zst"
|
||||||
|
|
||||||
input_file_paths = []
|
input_file_paths = []
|
||||||
if os.path.isdir(input_path):
|
if os.path.isdir(input_path):
|
||||||
|
|
|
@ -15,14 +15,8 @@ import json
|
||||||
# the script will look for both comments and submissions files for each subreddit
|
# the script will look for both comments and submissions files for each subreddit
|
||||||
folder = r"\\MYCLOUDPR4100\Public\reddit\subreddits24"
|
folder = r"\\MYCLOUDPR4100\Public\reddit\subreddits24"
|
||||||
subreddits_string = """
|
subreddits_string = """
|
||||||
askcarsales
|
fragranceswap
|
||||||
Denton
|
MTB
|
||||||
relationship_advice
|
|
||||||
Dallas
|
|
||||||
askdfw
|
|
||||||
AskMen
|
|
||||||
rolex
|
|
||||||
lego
|
|
||||||
"""
|
"""
|
||||||
ignored_users = {'[deleted]', 'automoderator'}
|
ignored_users = {'[deleted]', 'automoderator'}
|
||||||
# this is a list of users to ignore when doing the comparison. Most popular bots post in many subreddits and aren't the person you're looking for
|
# this is a list of users to ignore when doing the comparison. Most popular bots post in many subreddits and aren't the person you're looking for
|
||||||
|
|
|
@ -174,3 +174,8 @@ could-of-bot
|
||||||
mentionhelper
|
mentionhelper
|
||||||
RossGellerBot
|
RossGellerBot
|
||||||
the_timezone_bot
|
the_timezone_bot
|
||||||
|
TitleLinkHelperBot
|
||||||
|
timee_bot
|
||||||
|
notthebottest
|
||||||
|
colorsbot
|
||||||
|
ComeOnMisspellingBot
|
||||||
|
|
|
@ -17,9 +17,9 @@ import logging.handlers
|
||||||
|
|
||||||
|
|
||||||
# put the path to the input file
|
# put the path to the input file
|
||||||
input_file_path = r"\\MYCLOUDPR4100\Public\tools\PushshiftDumps\Straight-Wrap-172_submissions.zst"
|
input_file_path = r"\\MYCLOUDPR4100\Public\reddit\subreddits24\StockMarket_submissions.zst"
|
||||||
# put the path to the output file, with the csv extension
|
# put the path to the output file, with the csv extension
|
||||||
output_file_path = r"\\MYCLOUDPR4100\Public\Straight-Wrap-172_submissions.csv"
|
output_file_path = r"\\MYCLOUDPR4100\Public\StockMarket_submissions.csv"
|
||||||
# if you want a custom set of fields, put them in the following list. If you leave it empty the script will use a default set of fields
|
# if you want a custom set of fields, put them in the following list. If you leave it empty the script will use a default set of fields
|
||||||
fields = []
|
fields = []
|
||||||
|
|
||||||
|
@ -94,7 +94,7 @@ if __name__ == "__main__":
|
||||||
value = f"u/{obj['author']}"
|
value = f"u/{obj['author']}"
|
||||||
elif field == "text":
|
elif field == "text":
|
||||||
if 'selftext' in obj:
|
if 'selftext' in obj:
|
||||||
value = obj['selftext']
|
value = obj['selftext']#[:32000] # remove first # if the subreddit has very large text posts and you want to open this in excel
|
||||||
else:
|
else:
|
||||||
value = ""
|
value = ""
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue