mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-03 19:06:39 -04:00
Fix last month of year
This commit is contained in:
parent
82966bf7f6
commit
6114afb53f
3 changed files with 8 additions and 7 deletions
|
@ -60,7 +60,10 @@ if __name__ == "__main__":
|
|||
total_objects = 0
|
||||
total_bytes = 0
|
||||
minute_iterator = month
|
||||
end_time = month.replace(month=month.month + 1)
|
||||
if month == 12:
|
||||
end_time = month.replace(year=month.year + 1, month=1)
|
||||
else:
|
||||
end_time = month.replace(month=month.month + 1)
|
||||
while minute_iterator < end_time:
|
||||
minute_file_path = os.path.join(args.input, args.type, minute_iterator.strftime('%y-%m-%d'), f"{prefix}_{minute_iterator.strftime('%y-%m-%d_%H-%M')}.zst")
|
||||
for obj, line, _ in utils.read_obj_zst_meta(minute_file_path):
|
||||
|
|
|
@ -8,8 +8,8 @@ log = discord_logging.init_logging()
|
|||
if __name__ == "__main__":
|
||||
day = None
|
||||
day_comments = 0
|
||||
for comment in utils.read_obj_zst(r"\\MYCLOUDPR4100\Public\reddit_final\wallstreetbets_comments.zst"):
|
||||
created_day = datetime.utcfromtimestamp(int(comment['created_utc'])).strftime("%m/%d/%y")
|
||||
for comment in utils.read_obj_zst(r"C:\Users\greg\Desktop\Drive\pushshift\haley0530\chatbots_submissions.zst"):
|
||||
created_day = datetime.utcfromtimestamp(int(comment['created_utc'])).strftime("%y-%m-%d")
|
||||
if day is None:
|
||||
day = created_day
|
||||
if day != created_day:
|
||||
|
|
|
@ -7,10 +7,8 @@ import zstandard
|
|||
import json
|
||||
|
||||
input_files = [
|
||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits\PersonalFinanceCanada_comments.zst",
|
||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits\hacking_comments.zst",
|
||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits\alberta_comments.zst",
|
||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits\GothGirls_comments.zst",
|
||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits\srilanka_comments.zst",
|
||||
r"\\MYCLOUDPR4100\Public\reddit\subreddits\Warthunder_comments.zst",
|
||||
]
|
||||
ignored_users = ['[deleted]', 'automoderator']
|
||||
min_comments_per_sub = 1
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue