mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-25 23:55:18 -04:00
Needs to be an int
This commit is contained in:
parent
5b95fc7e7d
commit
18e36b1b81
2 changed files with 4 additions and 2 deletions
|
@ -8,7 +8,7 @@ log = discord_logging.init_logging()
|
|||
if __name__ == "__main__":
|
||||
day = None
|
||||
day_comments = 0
|
||||
for comment in utils.read_obj_zst(r"\\MYCLOUDPR4100\Public\reddit\subreddits24\NewTubers_comments.zst"):
|
||||
for comment in utils.read_obj_zst(r"\\MYCLOUDPR4100\Public\reddit\subreddits24\recruitinghell_submissions.zst"):
|
||||
created_day = datetime.utcfromtimestamp(int(comment['created_utc'])).strftime("%y-%m-%d")
|
||||
if day is None:
|
||||
day = created_day
|
||||
|
|
|
@ -134,6 +134,8 @@ def process_file(file, queue, field):
|
|||
try:
|
||||
obj = json.loads(line)
|
||||
observed = obj[field].lower()
|
||||
if observed is None or observed == "":
|
||||
continue
|
||||
if output_file is None:
|
||||
output_file = open(file.output_path, 'w', encoding="utf-8")
|
||||
output_file.write(observed)
|
||||
|
@ -342,7 +344,7 @@ if __name__ == '__main__':
|
|||
for line in input_handle:
|
||||
try:
|
||||
field, count = line.strip().split("\t")
|
||||
field_counts[field] = count
|
||||
field_counts[field] = int(count)
|
||||
except Exception as err:
|
||||
log.info(f"Line failed in file {file.count_file_path}: {line}")
|
||||
raise
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue