diff --git a/personal/diagnostic/comments_per_day.py b/personal/diagnostic/comments_per_day.py index 854f839..54f9439 100644 --- a/personal/diagnostic/comments_per_day.py +++ b/personal/diagnostic/comments_per_day.py @@ -8,7 +8,7 @@ log = discord_logging.init_logging() if __name__ == "__main__": day = None day_comments = 0 - for comment in utils.read_obj_zst(r"\\MYCLOUDPR4100\Public\reddit\subreddits23\antiwork_comments.zst"): + for comment in utils.read_obj_zst(r"\\MYCLOUDPR4100\Public\reddit\subreddits24\NewTubers_comments.zst"): created_day = datetime.utcfromtimestamp(int(comment['created_utc'])).strftime("%y-%m-%d") if day is None: day = created_day diff --git a/personal/diagnostic/test_file.py b/personal/diagnostic/test_file.py index abb178c..dfcb42e 100644 --- a/personal/diagnostic/test_file.py +++ b/personal/diagnostic/test_file.py @@ -8,7 +8,7 @@ log = discord_logging.init_logging() if __name__ == "__main__": - input_path = r"\\MYCLOUDPR4100\Public\reddit\submissions\RS_2023-04.zst" + input_path = r"\\MYCLOUDPR4100\Public\reddit\submissions\RS_2025-05.zst" input_file_paths = [] if os.path.isdir(input_path): diff --git a/scripts/find_overlapping_users.py b/scripts/find_overlapping_users.py index 6240740..25768ed 100644 --- a/scripts/find_overlapping_users.py +++ b/scripts/find_overlapping_users.py @@ -15,14 +15,8 @@ import json # the script will look for both comments and submissions files for each subreddit folder = r"\\MYCLOUDPR4100\Public\reddit\subreddits24" subreddits_string = """ - askcarsales - Denton - relationship_advice - Dallas - askdfw - AskMen - rolex - lego + fragranceswap + MTB """ ignored_users = {'[deleted]', 'automoderator'} # this is a list of users to ignore when doing the comparison. Most popular bots post in many subreddits and aren't the person you're looking for diff --git a/scripts/ignored.txt b/scripts/ignored.txt index ce1cbfc..58893d5 100644 --- a/scripts/ignored.txt +++ b/scripts/ignored.txt @@ -173,4 +173,9 @@ HippoBot9000 could-of-bot mentionhelper RossGellerBot -the_timezone_bot \ No newline at end of file +the_timezone_bot +TitleLinkHelperBot +timee_bot +notthebottest +colorsbot +ComeOnMisspellingBot diff --git a/scripts/to_csv.py b/scripts/to_csv.py index bbe48c6..977249f 100644 --- a/scripts/to_csv.py +++ b/scripts/to_csv.py @@ -17,9 +17,9 @@ import logging.handlers # put the path to the input file -input_file_path = r"\\MYCLOUDPR4100\Public\tools\PushshiftDumps\Straight-Wrap-172_submissions.zst" +input_file_path = r"\\MYCLOUDPR4100\Public\reddit\subreddits24\StockMarket_submissions.zst" # put the path to the output file, with the csv extension -output_file_path = r"\\MYCLOUDPR4100\Public\Straight-Wrap-172_submissions.csv" +output_file_path = r"\\MYCLOUDPR4100\Public\StockMarket_submissions.csv" # if you want a custom set of fields, put them in the following list. If you leave it empty the script will use a default set of fields fields = [] @@ -94,7 +94,7 @@ if __name__ == "__main__": value = f"u/{obj['author']}" elif field == "text": if 'selftext' in obj: - value = obj['selftext'] + value = obj['selftext']#[:32000] # remove first # if the subreddit has very large text posts and you want to open this in excel else: value = "" else: