mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-03 10:56:40 -04:00
Update logging a bit
This commit is contained in:
parent
bda7a4c8d6
commit
99ae19f690
3 changed files with 12 additions and 6 deletions
|
@ -103,6 +103,7 @@ def query_pushshift(ids, bearer, object_type, pushshift_token_function):
|
|||
time.sleep(2)
|
||||
if response.status_code != 200:
|
||||
log.warning(f"4 requests failed with status code {response.status_code}")
|
||||
discord_logging.flush_discord()
|
||||
sys.exit(1)
|
||||
return response.json()['data'], bearer
|
||||
|
||||
|
|
|
@ -151,7 +151,8 @@ def process(queue, base_folder, month, file_type, type_stages, reddit_username,
|
|||
log.info(f"{file_type}: Writing to: {split_folder}")
|
||||
split_blocks_by_minutes.split_by_minutes(split_file, split_folder)
|
||||
|
||||
log.info(f"{file_type}: {file_type} split complete")
|
||||
log.warning(f"{file_type}: {file_type} split complete")
|
||||
discord_logging.flush_discord()
|
||||
queue.put((file_type, "split", True))
|
||||
|
||||
start_date = datetime.strptime(month, "%y-%m")
|
||||
|
@ -188,7 +189,8 @@ def process(queue, base_folder, month, file_type, type_stages, reddit_username,
|
|||
)
|
||||
start_date = end_of_day(start_date)
|
||||
queue.put((file_type, "merge", start_date))
|
||||
log.info(f"{file_type}: {file_type} merge complete")
|
||||
log.warning(f"{file_type}: {file_type} merge complete")
|
||||
discord_logging.flush_discord()
|
||||
|
||||
if not type_stages["build"]:
|
||||
log.info(f"{file_type}: Starting {file_type} build")
|
||||
|
@ -206,9 +208,11 @@ def process(queue, base_folder, month, file_type, type_stages, reddit_username,
|
|||
compression_level
|
||||
)
|
||||
queue.put((file_type, "build", True))
|
||||
log.info(f"{file_type}: {file_type} build complete")
|
||||
log.warning(f"{file_type}: {file_type} build complete")
|
||||
discord_logging.flush_discord()
|
||||
|
||||
log.info(f"{file_type}: {file_type} all steps complete")
|
||||
log.warning(f"{file_type}: {file_type} all steps complete")
|
||||
discord_logging.flush_discord()
|
||||
|
||||
# for stage, status in type_stages.items():
|
||||
# log.info(f"{file_type} {stage}: {status}")
|
||||
|
|
|
@ -14,8 +14,9 @@ import json
|
|||
# the script will look for both comments and submissions files for each subreddit
|
||||
folder = r"\\MYCLOUDPR4100\Public\reddit\subreddits23"
|
||||
subreddits = [
|
||||
"JEENEETards",
|
||||
"TwoXIndia",
|
||||
"demolitionranch",
|
||||
"politics",
|
||||
"pittsburgh",
|
||||
]
|
||||
ignored_users = {'[deleted]', 'automoderator'}
|
||||
# this is a list of users to ignore when doing the comparison. Most popular bots post in many subreddits and aren't the person you're looking for
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue