server: use configured system prompt, ignore system messages (#2921)

Signed-off-by: Adam Treat <treat.adam@gmail.com> Signed-off-by: Jared Van Bortel <jared@nomic.ai> Co-authored-by: Jared Van Bortel <jared@nomic.ai>
2024-10-01 01:06:10 -04:00 · 2024-08-29 12:59:13 -04:00 · 2024-08-29 12:59:13 -04:00 · e1d49d970f
commit e1d49d970f
parent 82491fe154
3 changed files with 9 additions and 3 deletions
--- a/gpt4all-chat/CHANGELOG.md
+++ b/gpt4all-chat/CHANGELOG.md
@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 ### Added
 - Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854))
 - Use configured system prompt in server mode and ignore system messages ([#2921](https://github.com/nomic-ai/gpt4all/pull/2921))
 ### Changed
 - Smaller default window size, dynamic minimum size, and scaling tweaks ([#2904](https://github.com/nomic-ai/gpt4all/pull/2904))
--- a/gpt4all-chat/src/chatllm.cpp
+++ b/gpt4all-chat/src/chatllm.cpp
@ -719,8 +719,6 @@ bool ChatLLM::prompt(const QList<QString> &collectionList, const QString &prompt
        processRestoreStateFromText();
    }
    if (!m_processedSystemPrompt)
        processSystemPrompt();
    const QString promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo);
    const int32_t n_predict = MySettings::globalInstance()->modelMaxLength(m_modelInfo);
    const int32_t top_k = MySettings::globalInstance()->modelTopK(m_modelInfo);
@ -741,6 +739,9 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
    if (!isModelLoaded())
        return false;
    if (!m_processedSystemPrompt)
        processSystemPrompt();
    QList<ResultInfo> databaseResults;
    const int retrievalSize = MySettings::globalInstance()->localDocsRetrievalSize();
    if (!collectionList.isEmpty()) {
@ -1206,7 +1207,7 @@ void ChatLLM::restoreState()
 void ChatLLM::processSystemPrompt()
 {
    Q_ASSERT(isModelLoaded());
-    if (!isModelLoaded() || m_processedSystemPrompt || m_restoreStateFromText || m_isServer)
+    if (!isModelLoaded() || m_processedSystemPrompt || m_restoreStateFromText)
        return;
    const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();
--- a/gpt4all-chat/src/server.cpp
+++ b/gpt4all-chat/src/server.cpp
@ -340,6 +340,10 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
        QList<QString> chats;
        for (int i = 0; i < messages.count();  ++i) {
            QJsonValue v = messages.at(i);
            // FIXME: Deal with system messages correctly
            QString role = v.toObject()["role"].toString();
            if (role != "user")
                continue;
            QString content = v.toObject()["content"].toString();
            if (!content.endsWith("\n") && i < messages.count() - 1)
                content += "\n";