server: use configured system prompt, ignore system messages (#2921)

Signed-off-by: Adam Treat <treat.adam@gmail.com>
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
Co-authored-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
AT 2024-08-29 12:59:13 -04:00 committed by GitHub
parent 82491fe154
commit e1d49d970f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 9 additions and 3 deletions

View File

@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
### Added ### Added
- Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854)) - Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854))
- Use configured system prompt in server mode and ignore system messages ([#2921](https://github.com/nomic-ai/gpt4all/pull/2921))
### Changed ### Changed
- Smaller default window size, dynamic minimum size, and scaling tweaks ([#2904](https://github.com/nomic-ai/gpt4all/pull/2904)) - Smaller default window size, dynamic minimum size, and scaling tweaks ([#2904](https://github.com/nomic-ai/gpt4all/pull/2904))

View File

@ -719,8 +719,6 @@ bool ChatLLM::prompt(const QList<QString> &collectionList, const QString &prompt
processRestoreStateFromText(); processRestoreStateFromText();
} }
if (!m_processedSystemPrompt)
processSystemPrompt();
const QString promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo); const QString promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo);
const int32_t n_predict = MySettings::globalInstance()->modelMaxLength(m_modelInfo); const int32_t n_predict = MySettings::globalInstance()->modelMaxLength(m_modelInfo);
const int32_t top_k = MySettings::globalInstance()->modelTopK(m_modelInfo); const int32_t top_k = MySettings::globalInstance()->modelTopK(m_modelInfo);
@ -741,6 +739,9 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
if (!isModelLoaded()) if (!isModelLoaded())
return false; return false;
if (!m_processedSystemPrompt)
processSystemPrompt();
QList<ResultInfo> databaseResults; QList<ResultInfo> databaseResults;
const int retrievalSize = MySettings::globalInstance()->localDocsRetrievalSize(); const int retrievalSize = MySettings::globalInstance()->localDocsRetrievalSize();
if (!collectionList.isEmpty()) { if (!collectionList.isEmpty()) {
@ -1206,7 +1207,7 @@ void ChatLLM::restoreState()
void ChatLLM::processSystemPrompt() void ChatLLM::processSystemPrompt()
{ {
Q_ASSERT(isModelLoaded()); Q_ASSERT(isModelLoaded());
if (!isModelLoaded() || m_processedSystemPrompt || m_restoreStateFromText || m_isServer) if (!isModelLoaded() || m_processedSystemPrompt || m_restoreStateFromText)
return; return;
const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString(); const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();

View File

@ -340,6 +340,10 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
QList<QString> chats; QList<QString> chats;
for (int i = 0; i < messages.count(); ++i) { for (int i = 0; i < messages.count(); ++i) {
QJsonValue v = messages.at(i); QJsonValue v = messages.at(i);
// FIXME: Deal with system messages correctly
QString role = v.toObject()["role"].toString();
if (role != "user")
continue;
QString content = v.toObject()["content"].toString(); QString content = v.toObject()["content"].toString();
if (!content.endsWith("\n") && i < messages.count() - 1) if (!content.endsWith("\n") && i < messages.count() - 1)
content += "\n"; content += "\n";