mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-12-17 09:14:04 -05:00
Try a fix
This commit is contained in:
parent
712e3aaf01
commit
c848480ed9
2 changed files with 6 additions and 2 deletions
|
|
@ -255,10 +255,10 @@ class ObjectDict:
|
|||
def get_missing_ids_by_minutes(self, start_minute, end_minute, ignore_ids):
|
||||
start_id = self.by_minute[start_minute].min_id
|
||||
end_id = self.by_minute[end_minute].max_id
|
||||
if start_id is None:
|
||||
if start_id is None or end_id is None:
|
||||
log.warning(f"Unable to get start id for start minute {start_minute} : {self.by_minute[start_minute]}")
|
||||
if end_id is None:
|
||||
log.warning(f"Unable to get end id for end minute {end_minute} : {self.by_minute[end_minute]}")
|
||||
return [], None, None
|
||||
missing_ids = []
|
||||
count_ignored_ids = 0
|
||||
for int_id in range(start_id, end_id + 1):
|
||||
|
|
|
|||
|
|
@ -189,6 +189,10 @@ def build_day(day_to_process, input_folders, output_folder, object_type, reddit,
|
|||
else:
|
||||
working_highest_minute = minute_iterator - timedelta(minutes=1)
|
||||
missing_ids, start_id, end_id = objects.get_missing_ids_by_minutes(working_lowest_minute, working_highest_minute, ignore_ids)
|
||||
if start_id is None or end_id is None:
|
||||
log.warning(f"Unable to get start or end id for minute {minute_iterator} : {working_lowest_minute} : {working_highest_minute}")
|
||||
minute_iterator += timedelta(minutes=1)
|
||||
continue
|
||||
log.debug(
|
||||
f"{file_type}: Backfilling from: {working_lowest_minute.strftime('%y-%m-%d_%H-%M')} ({utils.base36encode(start_id)}|{start_id}) to "
|
||||
f"{working_highest_minute.strftime('%y-%m-%d_%H-%M')} ({utils.base36encode(end_id)}|{end_id}) with {len(missing_ids)} ({end_id - start_id}) ids")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue