mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2024-10-01 01:06:10 -04:00
Don't crash when prompt is too large.
This commit is contained in:
parent
fbce5f2078
commit
a3d97fa009
7
llm.cpp
7
llm.cpp
@ -252,6 +252,13 @@ bool LLMObject::handleResponse(int32_t token, const std::string &response)
|
|||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// check for error
|
||||||
|
if (token < 0) {
|
||||||
|
m_response.append(response);
|
||||||
|
emit responseChanged();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Save the token to our prompt ctxt
|
// Save the token to our prompt ctxt
|
||||||
if (s_ctx.tokens.size() == s_ctx.n_ctx)
|
if (s_ctx.tokens.size() == s_ctx.n_ctx)
|
||||||
s_ctx.tokens.erase(s_ctx.tokens.begin());
|
s_ctx.tokens.erase(s_ctx.tokens.begin());
|
||||||
|
@ -707,6 +707,13 @@ void GPTJ::prompt(const std::string &prompt,
|
|||||||
// save the context size
|
// save the context size
|
||||||
promptCtx.n_ctx = d_ptr->model.hparams.n_ctx;
|
promptCtx.n_ctx = d_ptr->model.hparams.n_ctx;
|
||||||
|
|
||||||
|
if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
|
||||||
|
response(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed.");
|
||||||
|
std::cerr << "GPT-J ERROR: The prompt is" << embd_inp.size() <<
|
||||||
|
"tokens and the context window is" << promptCtx.n_ctx << "!\n";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
promptCtx.n_predict = std::min(promptCtx.n_predict, promptCtx.n_ctx - (int) embd_inp.size());
|
promptCtx.n_predict = std::min(promptCtx.n_predict, promptCtx.n_ctx - (int) embd_inp.size());
|
||||||
promptCtx.n_past = std::min(promptCtx.n_past, promptCtx.n_ctx);
|
promptCtx.n_past = std::min(promptCtx.n_past, promptCtx.n_ctx);
|
||||||
|
|
||||||
|
@ -102,7 +102,9 @@ void LLamaModel::prompt(const std::string &prompt,
|
|||||||
promptCtx.n_ctx = llama_n_ctx(d_ptr->ctx);
|
promptCtx.n_ctx = llama_n_ctx(d_ptr->ctx);
|
||||||
|
|
||||||
if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
|
if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
|
||||||
std::cerr << "LLAMA ERROR: prompt is too long\n";
|
response(-1, "The prompt size exceeds the context window size and cannot be processed.");
|
||||||
|
std::cerr << "LLAMA ERROR: The prompt is" << embd_inp.size() <<
|
||||||
|
"tokens and the context window is" << promptCtx.n_ctx << "!\n";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user