Short script to sort the subreddit counts

This commit is contained in:
Watchful1 2023-01-24 20:51:39 -08:00
parent 2358bf555b
commit 52d65e3c8d
2 changed files with 22 additions and 2 deletions

View file

@ -127,7 +127,6 @@ def read_lines_zst(file_name):
# information back to the parent via a queue
def process_file(file, queue, field):
output_file = None
log.debug(f"Starting file: {file.input_path} : {file.file_size:,}")
try:
for line, file_bytes_processed in read_lines_zst(file.input_path):
try:
@ -149,7 +148,6 @@ def process_file(file, queue, field):
file.complete = True
file.bytes_processed = file.file_size
log.debug(f"Finished file: {file.input_path} : {file.file_size:,}")
except Exception as err:
file.error_message = str(err)
queue.put(file)

View file

@ -0,0 +1,22 @@
if __name__ == '__main__':
input_file = r"\\MYCLOUDPR4100\Public\field_counts.txt"
output_file = r"\\MYCLOUDPR4100\Public\field_counts_sorted.txt"
subreddits = {}
with open(input_file, 'r') as input_handle:
for line in input_handle:
subreddit, count_string = line.strip().split("\t")
count = int(count_string)
if count > 10000:
subreddits[subreddit] = count
print(f"{len(subreddits)}")
with open(output_file, 'w') as output_handle:
count_written = 0
for subreddit, count in sorted(subreddits.items(), key=lambda item: item[1], reverse=True):
output_handle.write(f"{subreddit}\n")
count_written += 1
if count_written >= 20000:
break