mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-26 16:15:37 -04:00
Needs to be an int
This commit is contained in:
parent
5b95fc7e7d
commit
18e36b1b81
2 changed files with 4 additions and 2 deletions
|
@ -8,7 +8,7 @@ log = discord_logging.init_logging()
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
day = None
|
day = None
|
||||||
day_comments = 0
|
day_comments = 0
|
||||||
for comment in utils.read_obj_zst(r"\\MYCLOUDPR4100\Public\reddit\subreddits24\NewTubers_comments.zst"):
|
for comment in utils.read_obj_zst(r"\\MYCLOUDPR4100\Public\reddit\subreddits24\recruitinghell_submissions.zst"):
|
||||||
created_day = datetime.utcfromtimestamp(int(comment['created_utc'])).strftime("%y-%m-%d")
|
created_day = datetime.utcfromtimestamp(int(comment['created_utc'])).strftime("%y-%m-%d")
|
||||||
if day is None:
|
if day is None:
|
||||||
day = created_day
|
day = created_day
|
||||||
|
|
|
@ -134,6 +134,8 @@ def process_file(file, queue, field):
|
||||||
try:
|
try:
|
||||||
obj = json.loads(line)
|
obj = json.loads(line)
|
||||||
observed = obj[field].lower()
|
observed = obj[field].lower()
|
||||||
|
if observed is None or observed == "":
|
||||||
|
continue
|
||||||
if output_file is None:
|
if output_file is None:
|
||||||
output_file = open(file.output_path, 'w', encoding="utf-8")
|
output_file = open(file.output_path, 'w', encoding="utf-8")
|
||||||
output_file.write(observed)
|
output_file.write(observed)
|
||||||
|
@ -342,7 +344,7 @@ if __name__ == '__main__':
|
||||||
for line in input_handle:
|
for line in input_handle:
|
||||||
try:
|
try:
|
||||||
field, count = line.strip().split("\t")
|
field, count = line.strip().split("\t")
|
||||||
field_counts[field] = count
|
field_counts[field] = int(count)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
log.info(f"Line failed in file {file.count_file_path}: {line}")
|
log.info(f"Line failed in file {file.count_file_path}: {line}")
|
||||||
raise
|
raise
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue