From ea1ade86680c8930109b2616a4bcf6252092112a Mon Sep 17 00:00:00 2001 From: AT Date: Fri, 27 Sep 2024 12:29:22 -0400 Subject: [PATCH] Use different language for prompt size too large. (#3004) Signed-off-by: Adam Treat Signed-off-by: Jared Van Bortel Co-authored-by: Jared Van Bortel --- gpt4all-backend/src/llmodel_shared.cpp | 4 +++- gpt4all-bindings/python/CHANGELOG.md | 1 + gpt4all-chat/CHANGELOG.md | 1 + gpt4all-chat/src/chatllm.cpp | 3 +++ 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/gpt4all-backend/src/llmodel_shared.cpp b/gpt4all-backend/src/llmodel_shared.cpp index 214c0ff7..b0c31d11 100644 --- a/gpt4all-backend/src/llmodel_shared.cpp +++ b/gpt4all-backend/src/llmodel_shared.cpp @@ -161,7 +161,9 @@ bool LLModel::decodePrompt(std::function promptCallback, std::vector embd_inp, bool isResponse) { if ((int) embd_inp.size() > promptCtx.n_ctx - 4) { - responseCallback(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed."); + // FIXME: (Adam) We should find a way to bubble these strings to the UI level to allow for + // translation + responseCallback(-1, "Your message was too long and could not be processed. Please try again with something shorter."); std::cerr << implementation().modelType() << " ERROR: The prompt is " << embd_inp.size() << " tokens and the context window is " << promptCtx.n_ctx << "!\n"; return false; diff --git a/gpt4all-bindings/python/CHANGELOG.md b/gpt4all-bindings/python/CHANGELOG.md index 28d9ce4b..a2948702 100644 --- a/gpt4all-bindings/python/CHANGELOG.md +++ b/gpt4all-bindings/python/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Changed - Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998)) +- Change the error message when a message is too long ([#3004](https://github.com/nomic-ai/gpt4all/pull/3004)) ## [2.8.2] - 2024-08-14 diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md index 37862826..0df051c8 100644 --- a/gpt4all-chat/CHANGELOG.md +++ b/gpt4all-chat/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Changed - Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998)) +- Change the error message when a message is too long ([#3004](https://github.com/nomic-ai/gpt4all/pull/3004)) ### Fixed - Fix a crash when attempting to continue a chat loaded from disk ([#2995](https://github.com/nomic-ai/gpt4all/pull/2995)) diff --git a/gpt4all-chat/src/chatllm.cpp b/gpt4all-chat/src/chatllm.cpp index c79bfbb6..aded57b0 100644 --- a/gpt4all-chat/src/chatllm.cpp +++ b/gpt4all-chat/src/chatllm.cpp @@ -706,6 +706,9 @@ bool ChatLLM::handleResponse(int32_t token, const std::string &response) #endif // check for error + // FIXME (Adam) The error messages should not be treated as a model response or part of the + // normal conversation. They should be serialized along with the conversation, but the strings + // are separate and we should preserve info that these are error messages and not actual model responses. if (token < 0) { m_response.append(response); m_trimmedResponse = remove_leading_whitespace(m_response);