mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-28 00:55:17 -04:00
Log on bad lines too
This commit is contained in:
parent
4110374fe8
commit
f7146593a0
1 changed files with 9 additions and 0 deletions
|
@ -26,6 +26,8 @@ single_field = None
|
|||
# the fields in the file are different depending on whether it has comments or submissions. If we're writing a csv, we need to know which fields to write.
|
||||
# The filename from the torrent has which type it is, but you'll need to change this if you removed that from the filename
|
||||
is_submission = "submission" in input_file
|
||||
# set this to true to write out to the log every time there's a bad line, set to false if you're expecting only some of the lines to match the key
|
||||
write_bad_lines = True
|
||||
|
||||
# only output items between these two dates
|
||||
from_date = datetime.strptime("2005-01-01", "%Y-%m-%d")
|
||||
|
@ -259,6 +261,13 @@ if __name__ == "__main__":
|
|||
log.info(f"Something went wrong, invalid output format {output_format}")
|
||||
except (KeyError, json.JSONDecodeError) as err:
|
||||
bad_lines += 1
|
||||
if write_bad_lines:
|
||||
if isinstance(err, KeyError):
|
||||
log.warning(f"Key {field} is not in the object: {err}")
|
||||
elif isinstance(err, json.JSONDecodeError):
|
||||
log.warning(f"Line decoding failed: {err}")
|
||||
log.warning(line)
|
||||
|
||||
|
||||
handle.close()
|
||||
log.info(f"Complete : {total_lines:,} : {matched_lines:,} : {bad_lines:,}")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue