diff --git a/personal/combine/merge_and_backfill.py b/personal/combine/merge_and_backfill.py index b7408da..4517cb9 100644 --- a/personal/combine/merge_and_backfill.py +++ b/personal/combine/merge_and_backfill.py @@ -103,6 +103,7 @@ def query_pushshift(ids, bearer, object_type, pushshift_token_function): time.sleep(2) if response.status_code != 200: log.warning(f"4 requests failed with status code {response.status_code}") + discord_logging.flush_discord() sys.exit(1) return response.json()['data'], bearer diff --git a/personal/process_month.py b/personal/process_month.py index 1d482ae..db8cf8e 100644 --- a/personal/process_month.py +++ b/personal/process_month.py @@ -151,7 +151,8 @@ def process(queue, base_folder, month, file_type, type_stages, reddit_username, log.info(f"{file_type}: Writing to: {split_folder}") split_blocks_by_minutes.split_by_minutes(split_file, split_folder) - log.info(f"{file_type}: {file_type} split complete") + log.warning(f"{file_type}: {file_type} split complete") + discord_logging.flush_discord() queue.put((file_type, "split", True)) start_date = datetime.strptime(month, "%y-%m") @@ -188,7 +189,8 @@ def process(queue, base_folder, month, file_type, type_stages, reddit_username, ) start_date = end_of_day(start_date) queue.put((file_type, "merge", start_date)) - log.info(f"{file_type}: {file_type} merge complete") + log.warning(f"{file_type}: {file_type} merge complete") + discord_logging.flush_discord() if not type_stages["build"]: log.info(f"{file_type}: Starting {file_type} build") @@ -206,9 +208,11 @@ def process(queue, base_folder, month, file_type, type_stages, reddit_username, compression_level ) queue.put((file_type, "build", True)) - log.info(f"{file_type}: {file_type} build complete") + log.warning(f"{file_type}: {file_type} build complete") + discord_logging.flush_discord() - log.info(f"{file_type}: {file_type} all steps complete") + log.warning(f"{file_type}: {file_type} all steps complete") + discord_logging.flush_discord() # for stage, status in type_stages.items(): # log.info(f"{file_type} {stage}: {status}") diff --git a/scripts/find_overlapping_users.py b/scripts/find_overlapping_users.py index d0e6803..f9d4f7e 100644 --- a/scripts/find_overlapping_users.py +++ b/scripts/find_overlapping_users.py @@ -14,8 +14,9 @@ import json # the script will look for both comments and submissions files for each subreddit folder = r"\\MYCLOUDPR4100\Public\reddit\subreddits23" subreddits = [ - "JEENEETards", - "TwoXIndia", + "demolitionranch", + "politics", + "pittsburgh", ] ignored_users = {'[deleted]', 'automoderator'} # this is a list of users to ignore when doing the comparison. Most popular bots post in many subreddits and aren't the person you're looking for