This commit is contained in:
Watchful1 2025-06-17 20:53:12 -07:00
parent 4e0d382bee
commit 9169b9b5ac
5 changed files with 13 additions and 14 deletions

View file

@ -8,7 +8,7 @@ log = discord_logging.init_logging()
if __name__ == "__main__": if __name__ == "__main__":
day = None day = None
day_comments = 0 day_comments = 0
for comment in utils.read_obj_zst(r"\\MYCLOUDPR4100\Public\reddit\subreddits23\antiwork_comments.zst"): for comment in utils.read_obj_zst(r"\\MYCLOUDPR4100\Public\reddit\subreddits24\NewTubers_comments.zst"):
created_day = datetime.utcfromtimestamp(int(comment['created_utc'])).strftime("%y-%m-%d") created_day = datetime.utcfromtimestamp(int(comment['created_utc'])).strftime("%y-%m-%d")
if day is None: if day is None:
day = created_day day = created_day

View file

@ -8,7 +8,7 @@ log = discord_logging.init_logging()
if __name__ == "__main__": if __name__ == "__main__":
input_path = r"\\MYCLOUDPR4100\Public\reddit\submissions\RS_2023-04.zst" input_path = r"\\MYCLOUDPR4100\Public\reddit\submissions\RS_2025-05.zst"
input_file_paths = [] input_file_paths = []
if os.path.isdir(input_path): if os.path.isdir(input_path):

View file

@ -15,14 +15,8 @@ import json
# the script will look for both comments and submissions files for each subreddit # the script will look for both comments and submissions files for each subreddit
folder = r"\\MYCLOUDPR4100\Public\reddit\subreddits24" folder = r"\\MYCLOUDPR4100\Public\reddit\subreddits24"
subreddits_string = """ subreddits_string = """
askcarsales fragranceswap
Denton MTB
relationship_advice
Dallas
askdfw
AskMen
rolex
lego
""" """
ignored_users = {'[deleted]', 'automoderator'} ignored_users = {'[deleted]', 'automoderator'}
# this is a list of users to ignore when doing the comparison. Most popular bots post in many subreddits and aren't the person you're looking for # this is a list of users to ignore when doing the comparison. Most popular bots post in many subreddits and aren't the person you're looking for

View file

@ -174,3 +174,8 @@ could-of-bot
mentionhelper mentionhelper
RossGellerBot RossGellerBot
the_timezone_bot the_timezone_bot
TitleLinkHelperBot
timee_bot
notthebottest
colorsbot
ComeOnMisspellingBot

View file

@ -17,9 +17,9 @@ import logging.handlers
# put the path to the input file # put the path to the input file
input_file_path = r"\\MYCLOUDPR4100\Public\tools\PushshiftDumps\Straight-Wrap-172_submissions.zst" input_file_path = r"\\MYCLOUDPR4100\Public\reddit\subreddits24\StockMarket_submissions.zst"
# put the path to the output file, with the csv extension # put the path to the output file, with the csv extension
output_file_path = r"\\MYCLOUDPR4100\Public\Straight-Wrap-172_submissions.csv" output_file_path = r"\\MYCLOUDPR4100\Public\StockMarket_submissions.csv"
# if you want a custom set of fields, put them in the following list. If you leave it empty the script will use a default set of fields # if you want a custom set of fields, put them in the following list. If you leave it empty the script will use a default set of fields
fields = [] fields = []
@ -94,7 +94,7 @@ if __name__ == "__main__":
value = f"u/{obj['author']}" value = f"u/{obj['author']}"
elif field == "text": elif field == "text":
if 'selftext' in obj: if 'selftext' in obj:
value = obj['selftext'] value = obj['selftext']#[:32000] # remove first # if the subreddit has very large text posts and you want to open this in excel
else: else:
value = "" value = ""
else: else: