From 2e2e9f43393303fbbefc65f58309630992628af0 Mon Sep 17 00:00:00 2001 From: Zach Nussbaum Date: Tue, 4 Apr 2023 20:47:21 +0000 Subject: [PATCH] fix: clean where prompt is randomly 1 char --- clean.py | 1 + 1 file changed, 1 insertion(+) diff --git a/clean.py b/clean.py index 4712820b..6d1cec81 100644 --- a/clean.py +++ b/clean.py @@ -64,6 +64,7 @@ for file in glob.glob(os.path.join(prompt_generation_dir, "*.jsonl")): df = df.dropna(subset=['prompt', 'response']) df = df[df['prompt'] != ''] df = df[df['response'] != ''] + df = df[df["prompt"].str.len() > 1] curr_len = len(df) print(f"Removed {prev_len - curr_len} rows")