fix: clean where prompt is randomly 1 char

This commit is contained in:
Zach Nussbaum 2023-04-04 20:47:21 +00:00
parent 2e3e35c7a2
commit 2e2e9f4339

View File

@ -64,6 +64,7 @@ for file in glob.glob(os.path.join(prompt_generation_dir, "*.jsonl")):
df = df.dropna(subset=['prompt', 'response']) df = df.dropna(subset=['prompt', 'response'])
df = df[df['prompt'] != ''] df = df[df['prompt'] != '']
df = df[df['response'] != ''] df = df[df['response'] != '']
df = df[df["prompt"].str.len() > 1]
curr_len = len(df) curr_len = len(df)
print(f"Removed {prev_len - curr_len} rows") print(f"Removed {prev_len - curr_len} rows")