CodeT5/CodeT5+/humaneval/process_preds.py

from human_eval.data import read_problems, write_jsonl, stream_jsonl
import glob
from tqdm import tqdm
import argparse

parser = argparse.ArgumentParser()

# Inputs
parser.add_argument(
    '--path',
    type=str,
    help="")
parser.add_argument(
    '--out_path',
    type=str,
    help="")
parser.add_argument(
    '--add_prompt',
    action='store_true',
    help='')

args = parser.parse_args()


files = sorted(glob.glob(args.path + '/*.jsonl'))
print("{} files in {}".format(len(files), args.path))

problems = read_problems('data/HumanEval.jsonl.gz')

output = []
for code_file in tqdm(files, total=len(files)):
    codes = [c for c in stream_jsonl(code_file)]
    if args.add_prompt:
        for code in codes:
            task_id = code['task_id']
            prompt = problems[task_id]['prompt']
            if 'def' in code['completion']:
                def_line = code['completion'].index('def')
                completion = code['completion'][def_line:]
                next_line = completion.index('\n')
                completion = code['completion'][def_line+next_line+1:]
                code['all_code'] = prompt + completion

    output += codes

print("save to {}".format(args.out_path))
write_jsonl(args.out_path, output)