mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2024-09-19 23:35:41 +00:00
fix: clean where prompt is randomly 1 char
This commit is contained in:
parent
2e3e35c7a2
commit
2e2e9f4339
1
clean.py
1
clean.py
@ -64,6 +64,7 @@ for file in glob.glob(os.path.join(prompt_generation_dir, "*.jsonl")):
|
||||
df = df.dropna(subset=['prompt', 'response'])
|
||||
df = df[df['prompt'] != '']
|
||||
df = df[df['response'] != '']
|
||||
df = df[df["prompt"].str.len() > 1]
|
||||
curr_len = len(df)
|
||||
|
||||
print(f"Removed {prev_len - curr_len} rows")
|
||||
|
Loading…
Reference in New Issue
Block a user