mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-03 19:06:39 -04:00
Change the pool chunksize to 1 to reduce parallelization
This commit is contained in:
parent
52d65e3c8d
commit
87d2b22a73
2 changed files with 2 additions and 2 deletions
|
@ -233,7 +233,7 @@ if __name__ == '__main__':
|
|||
log.info(f"Processing file: {file.input_path}")
|
||||
# start the workers
|
||||
with multiprocessing.Pool(processes=min(args.processes, len(files_to_process))) as pool:
|
||||
workers = pool.starmap_async(process_file, [(file, queue, args.field) for file in files_to_process], error_callback=log.info)
|
||||
workers = pool.starmap_async(process_file, [(file, queue, args.field) for file in files_to_process], chunksize=1, error_callback=log.info)
|
||||
while not workers.ready():
|
||||
# loop until the workers are all done, pulling in status messages as they are sent
|
||||
file_update = queue.get()
|
||||
|
|
|
@ -295,7 +295,7 @@ if __name__ == '__main__':
|
|||
log.info(f"Processing file: {file.input_path}")
|
||||
# start the workers
|
||||
with multiprocessing.Pool(processes=min(args.processes, len(files_to_process))) as pool:
|
||||
workers = pool.starmap_async(process_file, [(file, queue, args.field, value, values, args.case_sensitive) for file in files_to_process], error_callback=log.info)
|
||||
workers = pool.starmap_async(process_file, [(file, queue, args.field, value, values, args.case_sensitive) for file in files_to_process], chunksize=1, error_callback=log.info)
|
||||
while not workers.ready():
|
||||
# loop until the workers are all done, pulling in status messages as they are sent
|
||||
file_update = queue.get()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue