Merge branch 'nomic-ai:main' into translation_zh_TW

2024-10-01 01:06:10 -04:00 · 2024-09-24 11:33:42 +08:00 · 2024-09-24 11:33:42 +08:00 · 4f13e44ba0
commit 4f13e44ba0
parent 5872d3a8d7 1047c5e038
90 changed files with 5681 additions and 6214 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -16,4 +16,3 @@ workflows:
            gpt4all-bindings/python/.* run-python-workflow true
            gpt4all-bindings/typescript/.* run-ts-workflow true
            gpt4all-chat/.* run-chat-workflow true
-            .* run-default-workflow true
--- a/.circleci/continue_config.yml
+++ b/.circleci/continue_config.yml
--- a/.gitmodules
+++ b/.gitmodules
@ -1,7 +1,13 @@
 [submodule "llama.cpp-mainline"]
-	path = gpt4all-backend/llama.cpp-mainline
+	path = gpt4all-backend/deps/llama.cpp-mainline
 	url = https://github.com/nomic-ai/llama.cpp.git
 	branch = master
 [submodule "gpt4all-chat/usearch"]
-	path = gpt4all-chat/usearch
+	path = gpt4all-chat/deps/usearch
 	url = https://github.com/nomic-ai/usearch.git
+[submodule "gpt4all-chat/deps/SingleApplication"]
+	path = gpt4all-chat/deps/SingleApplication
+	url = https://github.com/nomic-ai/SingleApplication.git
+[submodule "gpt4all-chat/deps/fmt"]
+	path = gpt4all-chat/deps/fmt
+	url = https://github.com/fmtlib/fmt.git
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 <h1 align="center">GPT4All</h1>

 <p align="center">
-  <a href="https://gpt4all.io">Website</a> &bull; <a href="https://docs.gpt4all.io">Documentation</a> &bull; <a href="https://discord.gg/mGZE39AS3e">Discord</a>
+  <a href="https://www.nomic.ai/gpt4all">Website</a> &bull; <a href="https://docs.gpt4all.io">Documentation</a> &bull; <a href="https://discord.gg/mGZE39AS3e">Discord</a> &bull; <a href="https://www.youtube.com/watch?v=gQcZDXRVJok">YouTube Tutorial</a>
 </p>

 <p align="center">
@ -50,6 +50,9 @@ GPT4All is made possible by our compute partner <a href="https://www.paperspace.
 <p>
  macOS requires Monterey 12.6 or newer. Best results with Apple Silicon M-series processors.
 </p>
+
+See the full [System Requirements](gpt4all-chat/system_requirements.md) for more details.
+
 <br/>
 <br/>
 <p>
@ -89,7 +92,7 @@ with model.chat_session():
    - Improved user workflow for LocalDocs
    - Expanded access to more model architectures
 - **October 19th, 2023**: GGUF Support Launches with Support for:
-    - Mistral 7b base model, an updated model gallery on [gpt4all.io](https://gpt4all.io), several new local code models including Rift Coder v1.5
+    - Mistral 7b base model, an updated model gallery on our website, several new local code models including Rift Coder v1.5
    - [Nomic Vulkan](https://blog.nomic.ai/posts/gpt4all-gpu-inference-with-vulkan) support for Q4\_0 and Q4\_1 quantizations in GGUF.
    - Offline build support for running old versions of the GPT4All Local LLM Chat Client.
 - **September 18th, 2023**: [Nomic Vulkan](https://blog.nomic.ai/posts/gpt4all-gpu-inference-with-vulkan) launches supporting local LLM inference on NVIDIA and AMD GPUs.
--- a/gpt4all-backend/CMakeLists.txt
+++ b/gpt4all-backend/CMakeLists.txt
@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.21)  # for PROJECT_IS_TOP_LEVEL
+cmake_minimum_required(VERSION 3.23)  # for FILE_SET
 set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

@ -33,7 +33,7 @@ set(LLMODEL_VERSION_PATCH 0)
 set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
 project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)

-set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD 23)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
 set(BUILD_SHARED_LIBS ON)
@ -47,7 +47,7 @@ else()
    message(STATUS "Interprocedural optimization support detected")
 endif()

-set(DIRECTORY llama.cpp-mainline)
+set(DIRECTORY deps/llama.cpp-mainline)
 include(llama.cpp.cmake)

 set(BUILD_VARIANTS)
@ -128,6 +128,10 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
    # Include GGML
    include_ggml(-mainline-${BUILD_VARIANT})

+    if (BUILD_VARIANT MATCHES metal)
+        set(GGML_METALLIB "${GGML_METALLIB}" PARENT_SCOPE)
+    endif()
+
    # Function for preparing individual implementations
    function(prepare_target TARGET_NAME BASE_LIB)
        set(TARGET_NAME ${TARGET_NAME}-${BUILD_VARIANT})
@ -146,9 +150,12 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)

    # Add each individual implementations
    add_library(llamamodel-mainline-${BUILD_VARIANT} SHARED
-        llamamodel.cpp llmodel_shared.cpp)
+        src/llamamodel.cpp src/llmodel_shared.cpp)
    target_compile_definitions(llamamodel-mainline-${BUILD_VARIANT} PRIVATE
        LLAMA_VERSIONS=>=3 LLAMA_DATE=999999)
+    target_include_directories(llamamodel-mainline-${BUILD_VARIANT} PRIVATE
+        src include/gpt4all-backend
+    )
    prepare_target(llamamodel-mainline llama-mainline)

    if (NOT PROJECT_IS_TOP_LEVEL AND BUILD_VARIANT STREQUAL cuda)
@ -157,11 +164,19 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
 endforeach()

 add_library(llmodel
-    llmodel.h llmodel.cpp llmodel_shared.cpp
-    llmodel_c.h llmodel_c.cpp
-    dlhandle.cpp
+    src/dlhandle.cpp
+    src/llmodel.cpp
+    src/llmodel_c.cpp
+    src/llmodel_shared.cpp
+)
+target_sources(llmodel PUBLIC
+    FILE_SET public_headers TYPE HEADERS BASE_DIRS include
+    FILES include/gpt4all-backend/llmodel.h
+          include/gpt4all-backend/llmodel_c.h
+          include/gpt4all-backend/sysinfo.h
 )
 target_compile_definitions(llmodel PRIVATE LIB_FILE_EXT="${CMAKE_SHARED_LIBRARY_SUFFIX}")
+target_include_directories(llmodel PRIVATE src include/gpt4all-backend)

 set_target_properties(llmodel PROPERTIES
                              VERSION ${PROJECT_VERSION}
--- a/gpt4all-backend/README.md
+++ b/gpt4all-backend/README.md
@ -27,7 +27,7 @@ Unfortunately, no for three reasons:

 # What is being done to make them more compatible?

-A few things. Number one, we are maintaining compatibility with our current model zoo by way of the submodule pinning. However, we are also exploring how we can update to newer versions of llama.cpp without breaking our current models. This might involve an additional magic header check or it could possibly involve keeping the currently pinned submodule and also adding a new submodule with later changes and differienting them with namespaces or some other manner. Investigations continue.
+A few things. Number one, we are maintaining compatibility with our current model zoo by way of the submodule pinning. However, we are also exploring how we can update to newer versions of llama.cpp without breaking our current models. This might involve an additional magic header check or it could possibly involve keeping the currently pinned submodule and also adding a new submodule with later changes and differentiating them with namespaces or some other manner. Investigations continue.

 # What about GPU inference?

--- a/gpt4all-backend/deps/llama.cpp-mainline
+++ b/gpt4all-backend/deps/llama.cpp-mainline
@ -0,0 +1 @@
+Subproject commit ced74fbad4b258507f3ec06e77eec9445583511a
--- a/gpt4all-backend/include/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/include/gpt4all-backend/llmodel.h
@ -162,7 +162,7 @@ public:
                        bool allowContextShift,
                        PromptContext &ctx,
                        bool special = false,
-                        std::string *fakeReply = nullptr);
+                        std::optional<std::string_view> fakeReply = {});

    using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend);

@ -212,7 +212,7 @@ public:
 protected:
    // These are pure virtual because subclasses need to implement as the default implementation of
    // 'prompt' above calls these functions
-    virtual std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special = false) = 0;
+    virtual std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special = false) = 0;
    virtual bool isSpecialToken(Token id) const = 0;
    virtual std::string tokenToString(Token id) const = 0;
    virtual Token sampleToken(PromptContext &ctx) const = 0;
@ -249,7 +249,8 @@ protected:
                      std::function<bool(int32_t, const std::string&)> responseCallback,
                      bool allowContextShift,
                      PromptContext &promptCtx,
-                      std::vector<Token> embd_inp);
+                      std::vector<Token> embd_inp,
+                      bool isResponse = false);
    void generateResponse(std::function<bool(int32_t, const std::string&)> responseCallback,
                          bool allowContextShift,
                          PromptContext &promptCtx);
--- a/gpt4all-backend/include/gpt4all-backend/llmodel_c.h
+++ b/gpt4all-backend/include/gpt4all-backend/llmodel_c.h
--- a/gpt4all-backend/include/gpt4all-backend/sysinfo.h
+++ b/gpt4all-backend/include/gpt4all-backend/sysinfo.h
--- a/gpt4all-backend/llama.cpp-mainline
+++ b/gpt4all-backend/llama.cpp-mainline
@ -1 +0,0 @@
-Subproject commit 443665aec4721ecf57df8162e7e093a0cd674a76
--- a/gpt4all-backend/llama.cpp.cmake
+++ b/gpt4all-backend/llama.cpp.cmake
@ -811,7 +811,8 @@ function(include_ggml SUFFIX)
            list(APPEND XC_FLAGS -std=${GGML_METAL_STD})
        endif()

-        set(GGML_METALLIB ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib)
+        set(GGML_METALLIB "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib")
+        set(GGML_METALLIB "${GGML_METALLIB}" PARENT_SCOPE)
        add_custom_command(
            OUTPUT ${GGML_METALLIB}
            COMMAND xcrun -sdk macosx metal    ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
@ -822,7 +823,6 @@ function(include_ggml SUFFIX)
            DEPENDS ${DIRECTORY}/ggml/src/ggml-metal.metal ${DIRECTORY}/ggml/src/ggml-common.h
            COMMENT "Compiling Metal kernels"
            )
-        set_source_files_properties(${GGML_METALLIB} DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTIES GENERATED ON)

        add_custom_target(
            ggml-metal ALL
--- a/gpt4all-backend/src/dlhandle.cpp
+++ b/gpt4all-backend/src/dlhandle.cpp
--- a/gpt4all-backend/src/dlhandle.h
+++ b/gpt4all-backend/src/dlhandle.h
--- a/gpt4all-backend/src/llamamodel.cpp
+++ b/gpt4all-backend/src/llamamodel.cpp
@ -536,13 +536,13 @@ size_t LLamaModel::restoreState(const uint8_t *src)
    return llama_set_state_data(d_ptr->ctx, const_cast<uint8_t*>(src));
 }

-std::vector<LLModel::Token> LLamaModel::tokenize(PromptContext &ctx, const std::string &str, bool special)
+std::vector<LLModel::Token> LLamaModel::tokenize(PromptContext &ctx, std::string_view str, bool special)
 {
    bool atStart = m_tokenize_last_token == -1;
    bool insertSpace = atStart || isSpecialToken(m_tokenize_last_token);
    std::vector<LLModel::Token> fres(str.length() + 4);
    int32_t fres_len = llama_tokenize_gpt4all(
-        d_ptr->model, str.c_str(), str.length(), fres.data(), fres.size(), /*add_special*/ atStart,
+        d_ptr->model, str.data(), str.length(), fres.data(), fres.size(), /*add_special*/ atStart,
        /*parse_special*/ special, /*insert_space*/ insertSpace
    );
    fres.resize(fres_len);
--- a/gpt4all-backend/src/llamamodel_impl.h
+++ b/gpt4all-backend/src/llamamodel_impl.h
@ -8,6 +8,7 @@

 #include <memory>
 #include <string>
+#include <string_view>
 #include <vector>

 struct LLamaPrivate;
@ -52,7 +53,7 @@ private:
    bool m_supportsCompletion = false;

 protected:
-    std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) override;
+    std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special) override;
    bool isSpecialToken(Token id) const override;
    std::string tokenToString(Token id) const override;
    Token sampleToken(PromptContext &ctx) const override;
--- a/gpt4all-backend/src/llmodel.cpp
+++ b/gpt4all-backend/src/llmodel.cpp
--- a/gpt4all-backend/src/llmodel_c.cpp
+++ b/gpt4all-backend/src/llmodel_c.cpp
@ -12,6 +12,7 @@
 #include <memory>
 #include <optional>
 #include <string>
+#include <string_view>
 #include <vector>

 struct LLModelWrapper {
@ -130,13 +131,10 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
    wrapper->promptContext.repeat_last_n = ctx->repeat_last_n;
    wrapper->promptContext.contextErase = ctx->context_erase;

-    std::string fake_reply_str;
-    if (fake_reply) { fake_reply_str = fake_reply; }
-    auto *fake_reply_p = fake_reply ? &fake_reply_str : nullptr;
-
    // Call the C++ prompt method
    wrapper->llModel->prompt(prompt, prompt_template, prompt_callback, response_func, allow_context_shift,
-                             wrapper->promptContext, special, fake_reply_p);
+                             wrapper->promptContext, special,
+                             fake_reply ? std::make_optional<std::string_view>(fake_reply) : std::nullopt);

    // Update the C context by giving access to the wrappers raw pointers to std::vector data
    // which involves no copies
--- a/gpt4all-backend/src/llmodel_shared.cpp
+++ b/gpt4all-backend/src/llmodel_shared.cpp
@ -11,6 +11,7 @@
 #include <sstream>
 #include <stdexcept>
 #include <string>
+#include <string_view>
 #include <vector>

 namespace ranges = std::ranges;
@ -45,7 +46,7 @@ void LLModel::prompt(const std::string &prompt,
                     bool allowContextShift,
                     PromptContext &promptCtx,
                     bool special,
-                     std::string *fakeReply)
+                     std::optional<std::string_view> fakeReply)
 {
    if (!isModelLoaded()) {
        std::cerr << implementation().modelType() << " ERROR: prompt won't work with an unloaded model!\n";
@ -129,11 +130,11 @@ void LLModel::prompt(const std::string &prompt,
        return; // error

    // decode the assistant's reply, either generated or spoofed
-    if (fakeReply == nullptr) {
+    if (!fakeReply) {
        generateResponse(responseCallback, allowContextShift, promptCtx);
    } else {
        embd_inp = tokenize(promptCtx, *fakeReply, false);
-        if (!decodePrompt(promptCallback, responseCallback, allowContextShift, promptCtx, embd_inp))
+        if (!decodePrompt(promptCallback, responseCallback, allowContextShift, promptCtx, embd_inp, true))
            return; // error
    }

@ -157,7 +158,8 @@ bool LLModel::decodePrompt(std::function<bool(int32_t)> promptCallback,
                           std::function<bool(int32_t, const std::string&)> responseCallback,
                           bool allowContextShift,
                           PromptContext &promptCtx,
-                           std::vector<Token> embd_inp) {
+                           std::vector<Token> embd_inp,
+                           bool isResponse) {
    if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
        responseCallback(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed.");
        std::cerr << implementation().modelType() << " ERROR: The prompt is " << embd_inp.size() <<
@ -196,7 +198,9 @@ bool LLModel::decodePrompt(std::function<bool(int32_t)> promptCallback,
        for (size_t t = 0; t < tokens; ++t) {
            promptCtx.tokens.push_back(batch.at(t));
            promptCtx.n_past += 1;
-            if (!promptCallback(batch.at(t)))
+            Token tok = batch.at(t);
+            bool res = isResponse ? responseCallback(tok, tokenToString(tok)) : promptCallback(tok);
+            if (!res)
                return false;
        }
        i = batch_end;
--- a/gpt4all-backend/src/llmodel_shared.h
+++ b/gpt4all-backend/src/llmodel_shared.h
--- a/gpt4all-backend/src/utils.cpp
+++ b/gpt4all-backend/src/utils.cpp
--- a/gpt4all-backend/src/utils.h
+++ b/gpt4all-backend/src/utils.h
--- a/gpt4all-bindings/python/CHANGELOG.md
+++ b/gpt4all-bindings/python/CHANGELOG.md
@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.

 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

+## [Unreleased]
+
+### Added
+- Warn on Windows if the Microsoft Visual C++ runtime libraries are not found ([#2920](https://github.com/nomic-ai/gpt4all/pull/2920))
+
 ## [2.8.2] - 2024-08-14

 ### Fixed
@ -56,6 +61,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
  - Restore leading space removal logic that was incorrectly removed in [#2694](https://github.com/nomic-ai/gpt4all/pull/2694)
  - CUDA: Cherry-pick llama.cpp DMMV cols requirement fix that caused a crash with long conversations since [#2694](https://github.com/nomic-ai/gpt4all/pull/2694)

+[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/python-v2.8.2...HEAD
 [2.8.2]: https://github.com/nomic-ai/gpt4all/compare/python-v2.8.1...python-v2.8.2
 [2.8.1]: https://github.com/nomic-ai/gpt4all/compare/python-v2.8.0...python-v2.8.1
 [2.8.0]: https://github.com/nomic-ai/gpt4all/compare/python-v2.7.0...python-v2.8.0
--- a/gpt4all-bindings/python/docs/gpt4all_desktop/quickstart.md
+++ b/gpt4all-bindings/python/docs/gpt4all_desktop/quickstart.md
@ -4,6 +4,8 @@ The GPT4All Desktop Application allows you to download and run large language mo

 With GPT4All, you can chat with models, turn your local files into information sources for models [(LocalDocs)](localdocs.md), or browse models available online to download onto your device.

+[Official Video Tutorial](https://www.youtube.com/watch?v=gQcZDXRVJok)
+
 ## Quickstart

 !!! note "Quickstart"
--- a/gpt4all-bindings/python/docs/gpt4all_help/troubleshooting.md
+++ b/gpt4all-bindings/python/docs/gpt4all_help/troubleshooting.md
@ -4,7 +4,7 @@

 It is possible you are trying to load a model from HuggingFace whose weights are not compatible with our [backend](https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings).

-Try downloading one of the officially supported models mentioned our [website](https://gpt4all.io/). If the problem persists, please share your experience on our [Discord](https://discord.com/channels/1076964370942267462).
+Try downloading one of the officially supported models listed on the main models page in the application. If the problem persists, please share your experience on our [Discord](https://discord.com/channels/1076964370942267462).

 ## Bad Responses 

@ -24,4 +24,4 @@ Including information in a prompt is not a guarantee that it will be used correc

 ### LocalDocs Issues

-Occasionally a model - particularly a smaller or overall weaker LLM - may not use the relevant text snippets from the files that were referenced via LocalDocs. If you are seeing this, it can help to use phrases like "in the docs" or "from the provided files" when prompting your model.
+Occasionally a model - particularly a smaller or overall weaker LLM - may not use the relevant text snippets from the files that were referenced via LocalDocs. If you are seeing this, it can help to use phrases like "in the docs" or "from the provided files" when prompting your model.
--- a/gpt4all-bindings/python/gpt4all/_pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/_pyllmodel.py
@ -37,7 +37,20 @@ if platform.system() == "Darwin" and platform.processor() == "i386":
        raise RuntimeError(textwrap.dedent("""\
            Running GPT4All under Rosetta is not supported due to CPU feature requirements.
            Please install GPT4All in an environment that uses a native ARM64 Python interpreter.
-        """))
+        """).strip())
+
+# Check for C++ runtime libraries
+if platform.system() == "Windows":
+    try:
+        ctypes.CDLL("msvcp140.dll")
+        ctypes.CDLL("vcruntime140.dll")
+        ctypes.CDLL("vcruntime140_1.dll")
+    except OSError as e:
+        print(textwrap.dedent(f"""\
+            {e!r}
+            The Microsoft Visual C++ runtime libraries were not found. Please install them from
+            https://aka.ms/vs/17/release/vc_redist.x64.exe
+        """), file=sys.stderr)


 def _load_cuda(rtver: str, blasver: str) -> None:
--- a/gpt4all-bindings/python/gpt4all/gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/gpt4all.py
@ -357,7 +357,7 @@ class GPT4All:
        expected_md5: str | None = None,
    ) -> str | os.PathLike[str]:
        """
-        Download model from https://gpt4all.io.
+        Download model from gpt4all.io.

        Args:
            model_filename: Filename of model (with .gguf extension).
--- a/gpt4all-bindings/python/setup.py
+++ b/gpt4all-bindings/python/setup.py
@ -68,13 +68,13 @@ def get_long_description():

 setup(
    name=package_name,
-    version="2.8.2",
+    version="2.8.3.dev0",
    description="Python bindings for GPT4All",
    long_description=get_long_description(),
    long_description_content_type="text/markdown",
    author="Nomic and the Open Source Community",
    author_email="support@nomic.ai",
-    url="https://gpt4all.io/",
+    url="https://www.nomic.ai/gpt4all",
    project_urls={
        "Documentation": "https://docs.gpt4all.io/gpt4all_python.html",
        "Source code": "https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings/python",
--- a/gpt4all-chat/CHANGELOG.md
+++ b/gpt4all-chat/CHANGELOG.md
@ -4,21 +4,34 @@ All notable changes to this project will be documented in this file.

 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

-## [Unreleased]
+## [3.3.0] - 2024-09-20

 ### Added
 - Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854))
+- Use configured system prompt in server mode and ignore system messages ([#2921](https://github.com/nomic-ai/gpt4all/pull/2921), [#2924](https://github.com/nomic-ai/gpt4all/pull/2924))
+- Add more system information to anonymous usage stats ([#2939](https://github.com/nomic-ai/gpt4all/pull/2939))
+- Check for unsupported Ubuntu and macOS versions at install time ([#2940](https://github.com/nomic-ai/gpt4all/pull/2940))
+
+### Changed
+- The offline update button now directs users to the offline installer releases page. (by [@3Simplex](https://github.com/3Simplex) in [#2888](https://github.com/nomic-ai/gpt4all/pull/2888))
+- Change the website link on the home page to point to the new URL ([#2915](https://github.com/nomic-ai/gpt4all/pull/2915))
+- Smaller default window size, dynamic minimum size, and scaling tweaks ([#2904](https://github.com/nomic-ai/gpt4all/pull/2904))
+- Only allow a single instance of program to be run at a time ([#2923](https://github.com/nomic-ai/gpt4all/pull/2923]))

 ### Fixed
 - Bring back "Auto" option for Embeddings Device as "Application default," which went missing in v3.1.0 ([#2873](https://github.com/nomic-ai/gpt4all/pull/2873))
- Correct a few strings in the Italian translation (by [@Harvester62](https://github.com/Harvester62) in [#2872](https://github.com/nomic-ai/gpt4all/pull/2872))
+- Correct a few strings in the Italian translation (by [@Harvester62](https://github.com/Harvester62) in [#2872](https://github.com/nomic-ai/gpt4all/pull/2872) and [#2909](https://github.com/nomic-ai/gpt4all/pull/2909))
 - Correct typos in Traditional Chinese translation (by [@supersonictw](https://github.com/supersonictw) in [#2852](https://github.com/nomic-ai/gpt4all/pull/2852))
 - Set the window icon on Linux ([#2880](https://github.com/nomic-ai/gpt4all/pull/2880))
 - Corrections to the Romanian translation (by [@SINAPSA-IC](https://github.com/SINAPSA-IC) in [#2890](https://github.com/nomic-ai/gpt4all/pull/2890))
 - Fix singular/plural forms of LocalDocs "x Sources" (by [@cosmic-snow](https://github.com/cosmic-snow) in [#2885](https://github.com/nomic-ai/gpt4all/pull/2885))
-
-### Changed
- The offline update button now directs users to the offline installer releases page. (by [@3Simplex](https://github.com/3Simplex) in [#2888](https://github.com/nomic-ai/gpt4all/pull/2888))
+- Fix a typo in Model Settings (by [@3Simplex](https://github.com/3Simplex) in [#2916](https://github.com/nomic-ai/gpt4all/pull/2916))
+- Fix the antenna icon tooltip when using the local server ([#2922](https://github.com/nomic-ai/gpt4all/pull/2922))
+- Fix a few issues with locating files and handling errors when loading remote models on startup ([#2875](https://github.com/nomic-ai/gpt4all/pull/2875))
+- Significantly improve API server request parsing and response correctness ([#2929](https://github.com/nomic-ai/gpt4all/pull/2929))
+- Remove unnecessary dependency on Qt WaylandCompositor module ([#2949](https://github.com/nomic-ai/gpt4all/pull/2949))
+- Update translations ([#2970](https://github.com/nomic-ai/gpt4all/pull/2970))
+- Fix macOS installer and remove extra installed copy of Nomic Embed ([#2973](https://github.com/nomic-ai/gpt4all/pull/2973))

 ## [3.2.1] - 2024-08-13

@ -106,7 +119,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 - Fix several Vulkan resource management issues ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
 - Fix crash/hang when some models stop generating, by showing special tokens ([#2701](https://github.com/nomic-ai/gpt4all/pull/2701))

-[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/v3.2.1...HEAD
+[3.3.0]: https://github.com/nomic-ai/gpt4all/compare/v3.2.1...v3.3.0
 [3.2.1]: https://github.com/nomic-ai/gpt4all/compare/v3.2.0...v3.2.1
 [3.2.0]: https://github.com/nomic-ai/gpt4all/compare/v3.1.1...v3.2.0
 [3.1.1]: https://github.com/nomic-ai/gpt4all/compare/v3.1.0...v3.1.1
--- a/gpt4all-chat/CMakeLists.txt
+++ b/gpt4all-chat/CMakeLists.txt
@ -1,8 +1,12 @@
-cmake_minimum_required(VERSION 3.16)
+cmake_minimum_required(VERSION 3.25)  # for try_compile SOURCE_FROM_VAR

-set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-set(CMAKE_CXX_STANDARD 20)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(APP_VERSION_MAJOR 3)
+set(APP_VERSION_MINOR 3)
+set(APP_VERSION_PATCH 0)
+set(APP_VERSION_BASE "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
+set(APP_VERSION "${APP_VERSION_BASE}")
+
+project(gpt4all VERSION ${APP_VERSION_BASE} LANGUAGES CXX C)

 if(APPLE)
  option(BUILD_UNIVERSAL "Build a Universal binary on macOS" OFF)
@ -16,37 +20,57 @@ if(APPLE)
  endif()
 endif()

-set(APP_VERSION_MAJOR 3)
-set(APP_VERSION_MINOR 2)
-set(APP_VERSION_PATCH 2)
-set(APP_VERSION_BASE "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
-set(APP_VERSION "${APP_VERSION_BASE}-dev0")
+option(GPT4ALL_LOCALHOST "Build installer for localhost repo" OFF)
+option(GPT4ALL_OFFLINE_INSTALLER "Build an offline installer" OFF)
+option(GPT4ALL_SIGN_INSTALL "Sign installed binaries and installers (requires signing identities)" OFF)
+
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+set(CMAKE_CXX_STANDARD 23)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+
+# conftests
+function(check_cpp_feature FEATURE_NAME MIN_VALUE)
+    message(CHECK_START "Checking for ${FEATURE_NAME} >= ${MIN_VALUE}")
+    string(CONCAT SRC
+        "#include <version>\n"
+        "#if !defined(${FEATURE_NAME}) || ${FEATURE_NAME} < ${MIN_VALUE}\n"
+        "#   error \"${FEATURE_NAME} is not defined or less than ${MIN_VALUE}\"\n"
+        "#endif\n"
+        "int main() { return 0; }\n"
+    )
+    try_compile(HAS_FEATURE SOURCE_FROM_VAR "test_${FEATURE_NAME}.cpp" SRC)
+    if (NOT HAS_FEATURE)
+        message(CHECK_FAIL "fail")
+        message(FATAL_ERROR
+            "The C++ compiler\n  \"${CMAKE_CXX_COMPILER}\"\n"
+            "is too old to support ${FEATURE_NAME} >= ${MIN_VALUE}.\n"
+            "Please specify a newer compiler via -DCMAKE_C_COMPILER/-DCMAKE_CXX_COMPILER."
+        )
+    endif()
+  message(CHECK_PASS "pass")
+endfunction()
+
+# check for monadic operations in std::optional (e.g. transform)
+check_cpp_feature("__cpp_lib_optional" "202110L")
+

 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/Modules")

 # Include the binary directory for the generated header file
 include_directories("${CMAKE_CURRENT_BINARY_DIR}")

-project(gpt4all VERSION ${APP_VERSION_BASE} LANGUAGES CXX C)
-
 set(CMAKE_AUTOMOC ON)
 set(CMAKE_AUTORCC ON)

-option(GPT4ALL_LOCALHOST "Build installer for localhost repo" OFF)
-option(GPT4ALL_OFFLINE_INSTALLER "Build an offline installer" OFF)
-option(GPT4ALL_SIGN_INSTALL "Sign installed binaries and installers (requires signing identities)" OFF)
-
 # Generate a header file with the version number
 configure_file(
  "${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.h.in"
  "${CMAKE_CURRENT_BINARY_DIR}/config.h"
 )

-if(LINUX)
-  find_package(Qt6 6.4 COMPONENTS Core Quick WaylandCompositor QuickDialogs2 Svg HttpServer Sql Pdf LinguistTools REQUIRED)
-else()
-  find_package(Qt6 6.4 COMPONENTS Core Quick QuickDialogs2 Svg HttpServer Sql Pdf LinguistTools REQUIRED)
-endif()
+find_package(Qt6 6.4 COMPONENTS Core HttpServer LinguistTools Pdf Quick QuickDialogs2 Sql Svg REQUIRED)

 # Get the Qt6Core target properties
 get_target_property(Qt6Core_INCLUDE_DIRS Qt6::Core INTERFACE_INCLUDE_DIRECTORIES)
@ -64,13 +88,19 @@ message(STATUS "Qt 6 root directory: ${Qt6_ROOT_DIR}")

 set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

+set(FMT_INSTALL OFF)
+set(BUILD_SHARED_LIBS_SAVED "${BUILD_SHARED_LIBS}")
+set(BUILD_SHARED_LIBS OFF)
+add_subdirectory(deps/fmt)
+set(BUILD_SHARED_LIBS "${BUILD_SHARED_LIBS_SAVED}")
+
 add_subdirectory(../gpt4all-backend llmodel)

 set(CHAT_EXE_RESOURCES)

 # Metal shader library
 if (APPLE)
-    list(APPEND CHAT_EXE_RESOURCES "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib")
+    list(APPEND CHAT_EXE_RESOURCES "${GGML_METALLIB}")
 endif()

 # App icon
@ -84,8 +114,6 @@ elseif (APPLE)

    # And the following tells CMake where to find and install the file itself.
    set(APP_ICON_RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/resources/gpt4all.icns")
-    set_source_files_properties(${APP_ICON_RESOURCE} PROPERTIES
-        MACOSX_PACKAGE_LOCATION "Resources")
    list(APPEND CHAT_EXE_RESOURCES "${APP_ICON_RESOURCE}")
 endif()

@ -105,24 +133,35 @@ if (APPLE)
    list(APPEND CHAT_EXE_RESOURCES "${LOCAL_EMBEDDING_MODEL_PATH}")
 endif()

+set(QAPPLICATION_CLASS QGuiApplication)
+add_subdirectory(deps/SingleApplication)
+
+if (DEFINED GGML_METALLIB)
+    set_source_files_properties("${GGML_METALLIB}" PROPERTIES GENERATED ON)
+endif()
+if (APPLE)
+    set_source_files_properties(${CHAT_EXE_RESOURCES} PROPERTIES MACOSX_PACKAGE_LOCATION Resources)
+endif()
+
 qt_add_executable(chat
-    main.cpp
-    chat.h chat.cpp
-    chatllm.h chatllm.cpp
-    chatmodel.h chatlistmodel.h chatlistmodel.cpp
-    chatapi.h chatapi.cpp
-    chatviewtextprocessor.h chatviewtextprocessor.cpp
-    database.h database.cpp
-    download.h download.cpp
-    embllm.cpp embllm.h
-    localdocs.h localdocs.cpp localdocsmodel.h localdocsmodel.cpp
-    llm.h llm.cpp
-    modellist.h modellist.cpp
-    mysettings.h mysettings.cpp
-    network.h network.cpp
-    server.h server.cpp
-    logger.h logger.cpp
-    ${APP_ICON_RESOURCE}
+    src/main.cpp
+    src/chat.cpp                  src/chat.h
+    src/chatapi.cpp               src/chatapi.h
+    src/chatlistmodel.cpp         src/chatlistmodel.h
+    src/chatllm.cpp               src/chatllm.h
+    src/chatmodel.h
+    src/chatviewtextprocessor.cpp src/chatviewtextprocessor.h
+    src/database.cpp              src/database.h
+    src/download.cpp              src/download.h
+    src/embllm.cpp                src/embllm.h
+    src/llm.cpp                   src/llm.h
+    src/localdocs.cpp             src/localdocs.h
+    src/localdocsmodel.cpp        src/localdocsmodel.h
+    src/logger.cpp                src/logger.h
+    src/modellist.cpp             src/modellist.h
+    src/mysettings.cpp            src/mysettings.h
+    src/network.cpp               src/network.h
+    src/server.cpp                src/server.h
    ${CHAT_EXE_RESOURCES}
 )

@ -255,7 +294,6 @@ if (APPLE)
        MACOSX_BUNDLE_GUI_IDENTIFIER gpt4all
        MACOSX_BUNDLE_BUNDLE_VERSION ${PROJECT_VERSION}
        MACOSX_BUNDLE_SHORT_VERSION_STRING ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}
-        RESOURCE "${CHAT_EXE_RESOURCES}"
        OUTPUT_NAME gpt4all
    )
    add_dependencies(chat ggml-metal)
@ -286,21 +324,18 @@ endif()
 target_compile_definitions(chat
    PRIVATE $<$<OR:$<CONFIG:Debug>,$<CONFIG:RelWithDebInfo>>:QT_QML_DEBUG>)

+target_include_directories(chat PRIVATE src)
+
 # usearch uses the identifier 'slots' which conflicts with Qt's 'slots' keyword
 target_compile_definitions(chat PRIVATE QT_NO_SIGNALS_SLOTS_KEYWORDS)

-target_include_directories(chat PRIVATE usearch/include
-                                        usearch/fp16/include)
+target_include_directories(chat PRIVATE deps/usearch/include
+                                        deps/usearch/fp16/include)

-if(LINUX)
-  target_link_libraries(chat
-      PRIVATE Qt6::Quick Qt6::Svg Qt6::HttpServer Qt6::Sql Qt6::Pdf Qt6::WaylandCompositor)
-else()
-  target_link_libraries(chat
-    PRIVATE Qt6::Quick Qt6::Svg Qt6::HttpServer Qt6::Sql Qt6::Pdf)
-endif()
 target_link_libraries(chat
-    PRIVATE llmodel)
+    PRIVATE Qt6::Core Qt6::HttpServer Qt6::Pdf Qt6::Quick Qt6::Sql Qt6::Svg)
+target_link_libraries(chat
+    PRIVATE llmodel SingleApplication fmt::fmt)


 # -- install --
@ -384,7 +419,7 @@ if (LLMODEL_CUDA)
 endif()

 if (NOT APPLE)
-    install(FILES "${CMAKE_BINARY_DIR}/resources/${LOCAL_EMBEDDING_MODEL}"
+    install(FILES "${LOCAL_EMBEDDING_MODEL_PATH}"
            DESTINATION resources
            COMPONENT ${COMPONENT_NAME_MAIN})
 endif()
@ -427,7 +462,7 @@ set(CPACK_PACKAGE_INSTALL_DIRECTORY ${COMPONENT_NAME_MAIN})
 set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
 set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
 SET(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
-set(CPACK_PACKAGE_HOMEPAGE_URL "https://gpt4all.io")
+set(CPACK_PACKAGE_HOMEPAGE_URL "https://www.nomic.ai/gpt4all")
 set(CPACK_PACKAGE_ICON "${CMAKE_CURRENT_SOURCE_DIR}/icons/gpt4all-48.png")
 set(CPACK_RESOURCE_FILE_LICENSE ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE)
 set(CPACK_RESOURCE_FILE_README ${CMAKE_CURRENT_SOURCE_DIR}/README.md)
@ -436,11 +471,12 @@ set(CPACK_CREATE_DESKTOP_LINKS "GPT4All")
 set(CPACK_IFW_PACKAGE_NAME "GPT4All")
 set(CPACK_IFW_PACKAGE_TITLE "GPT4All Installer")
 set(CPACK_IFW_PACKAGE_PUBLISHER "Nomic, Inc.")
-set(CPACK_IFW_PRODUCT_URL "https://gpt4all.io")
+set(CPACK_IFW_PRODUCT_URL "https://www.nomic.ai/gpt4all")
 set(CPACK_IFW_PACKAGE_WIZARD_STYLE "Aero")
 set(CPACK_IFW_PACKAGE_LOGO "${CMAKE_CURRENT_SOURCE_DIR}/icons/gpt4all-48.png")
 set(CPACK_IFW_PACKAGE_WINDOW_ICON "${CMAKE_CURRENT_SOURCE_DIR}/icons/gpt4all-32.png")
 set(CPACK_IFW_PACKAGE_WIZARD_SHOW_PAGE_LIST OFF)
+set(CPACK_IFW_PACKAGE_CONTROL_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/cmake/installer_control.qs")

 include(InstallRequiredSystemLibraries)
 include(CPack)
@ -453,7 +489,7 @@ endif()
 cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} ESSENTIAL FORCED_INSTALLATION)
 cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} VERSION ${APP_VERSION})
 cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} LICENSES "MIT LICENSE" ${CPACK_RESOURCE_FILE_LICENSE})
-cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/cmake/installerscript.qs")
+cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/cmake/installer_component.qs")
 cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} REPLACES "gpt4all-chat") #Was used in very earliest prototypes

 if (GPT4ALL_LOCALHOST)
--- a/gpt4all-chat/README.md
+++ b/gpt4all-chat/README.md
@ -11,7 +11,7 @@ GPT-J model by following build instructions below.

 ## Install

-One click installers for macOS, Linux, and Windows at https://gpt4all.io
+One click installers for macOS, Linux, and Windows at https://www.nomic.ai/gpt4all

 ## Features

--- a/gpt4all-chat/build_and_run.md
+++ b/gpt4all-chat/build_and_run.md
@ -1,109 +1,106 @@
-# Building gpt4all-chat from source
-
-Depending upon your operating system, there are many ways that Qt is distributed. 
-Here is the recommended method for getting the Qt dependency installed to setup and build 
-gpt4all-chat from source.
-
-## Prerequisites
-
-You will need a compiler. On Windows, you should install Visual Studio with the C++ Development components. On macOS, you will need the full version of Xcode&mdash;Xcode Command Line Tools lacks certain required tools. On Linux, you will need a GCC or Clang toolchain with C++ support.
-
-On Windows and Linux, building GPT4All with full GPU support requires the [Vulkan SDK](https://vulkan.lunarg.com/sdk/home) and the latest [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads).
-
-## Note for Linux users
-
-Linux users may install Qt via their distro's official packages instead of using the Qt installer. You need at least Qt 6.5, with support for QPdf and the Qt HTTP Server. It should be straightforward to build with just cmake and make, but you may continue to follow these instructions to build with Qt Creator.
-
-On Arch Linux, this looks like:
-```
-sudo pacman -S --needed base-devel qt6-base qt6-declarative qt6-wayland qt6-svg qt6-httpserver qt6-webengine qt6-5compat qt6-shadertools qtcreator cmake ninja
-```
-
-On Ubuntu 23.04, this looks like:
-```
-sudo apt install build-essential qt6-base-dev qt6-declarative-dev qt6-wayland-dev qt6-svg-dev qt6-httpserver-dev qt6-webengine-dev libqt6core5compat6 qml6-module-qt5compat-graphicaleffects libqt6shadertools6 qtcreator cmake ninja-build
-```
-
-On Fedora 39, this looks like:
-```
-sudo dnf install make gcc gcc-c++ qt6-qtbase-devel qt6-qtdeclarative-devel qt6-qtwayland-devel qt6-qtsvg-devel qt6-qthttpserver-devel qt6-qtwebengine-devel qt6-qt5compat qt5-qtgraphicaleffects qt6-qtshadertools qt-creator cmake ninja-build
-```
-
-## Download Qt
-
- Go to https://login.qt.io/register to create a free Qt account.
- Download the Qt Online Installer for your OS from here: https://www.qt.io/download-qt-installer-oss
- Sign into the installer.
- Agree to the terms of the (L)GPL 3 license.
- Select whether you would like to send anonymous usage statistics to Qt.
- On the Installation Folder page, leave the default installation path, and select "Custom Installation".
-
-## Customize the installation
-
-![image](https://github.com/nomic-ai/gpt4all-chat/assets/10168/c6e999e5-cc8a-4dfc-8065-b59139e8c7ae)
-
-Under "Qt", find the latest Qt 6.x release.
-
-Under this release (e.g. Qt 6.5.0), select the target platform:
- On macOS, it is just called "macOS".
- On Windows, it is called "MSVC 2019 64-bit" (for 64-bit x86 CPUs). MinGW has not been tested.
-
-Under this release, select the following additional components:
- Qt Quick 3D
- Qt Wayland Compositor (for Linux only)
- Qt 5 Compatibility Module
- Qt Shader Tools
- Additional Libraries:
-  - Qt HTTP Server
-  - Qt PDF
- Qt Debug information Files
-
-Under Developer and Designer Tools, select the following components:
- Qt Creator
- Qt Creator CDB Debugger Support (for Windows only)
- Debugging Tools for Windows (for Windows only)
- CMake
- Ninja
-
-Agree to the license and complete the installation.
-
-## Download the source code
-
-You must use git to download the source code for gpt4all:
-```
-git clone --recurse-submodules https://github.com/nomic-ai/gpt4all
-```
-
-Note the use of --recurse-submodules, which makes sure the necessary dependencies are downloaded inside the repo. This is why you cannot simply download a zip archive.
-
-Windows users: To install git for Windows, see https://git-scm.com/downloads. Once it is installed, you should be able to shift-right click in any folder, "Open PowerShell window here" (or similar, depending on the version of Windows), and run the above command.
-
-## Open gpt4all-chat in Qt Creator
-
-Open Qt Creator. Navigate to File > Open File or Project, find the "gpt4all-chat" folder inside the freshly cloned repository, and select CMakeLists.txt.
-
-![image](https://github.com/nomic-ai/gpt4all-chat/assets/10168/3d3e2743-2a1d-43d6-9e55-62f7f4306de7)
-
-## Configure project
-
-You can now expand the "Details" section next to the build kit. It is best to uncheck all but one build configuration, e.g. "Release", which will produce optimized binaries that are not useful for debugging.
-
-Click "Configure Project", and wait for it to complete.
-
-![image](https://github.com/nomic-ai/gpt4all-chat/assets/10168/44d5aafb-a95d-434b-ba2a-a3138c0e49a0)
-
-## Build project
-
-Now that the project has been configured, click the hammer button on the left sidebar to build the project.
-
-![image](https://github.com/nomic-ai/gpt4all-chat/assets/10168/43cd7b42-32f0-4efa-9612-d51f85637103)
-
-## Run project
-
-Click the play button on the left sidebar to run the Chat UI.
-
-![image](https://github.com/nomic-ai/gpt4all-chat/assets/10168/611ea795-bdcd-4feb-a466-eb1c2e936e7e)
-
-## Updating the downloaded source code
-
-You do not need to make a fresh clone of the source code every time. To update it, you may open a terminal/command prompt in the repository, run `git pull`, and then `git submodule update --init --recursive`.
+# Building gpt4all-chat from source
+
+Depending upon your operating system, there are many ways that Qt is distributed.
+Here is the recommended method for getting the Qt dependency installed to setup and build
+gpt4all-chat from source.
+
+## Prerequisites
+
+You will need a compiler. On Windows, you should install Visual Studio with the C++ Development components. On macOS, you will need the full version of Xcode&mdash;Xcode Command Line Tools lacks certain required tools. On Linux, you will need a GCC or Clang toolchain with C++ support.
+
+On Windows and Linux, building GPT4All with full GPU support requires the [Vulkan SDK](https://vulkan.lunarg.com/sdk/home) and the latest [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads).
+
+## Note for Linux users
+
+Linux users may install Qt via their distro's official packages instead of using the Qt installer. You need at least Qt 6.5, with support for QPdf and the Qt HTTP Server. You may build from the CLI using CMake and Ninja, or with Qt Creator as described later in this document.
+
+On Arch Linux, this looks like:
+```
+sudo pacman -S --needed cmake gcc ninja qt6-5compat qt6-base qt6-declarative qt6-httpserver qt6-svg qtcreator
+```
+
+On Ubuntu 23.04, this looks like:
+```
+sudo apt install cmake g++ libgl-dev libqt6core5compat6 ninja-build qml6-module-qt5compat-graphicaleffects qt6-base-dev qt6-declarative-dev qt6-httpserver-dev qt6-svg-dev qtcreator
+```
+
+On Fedora 39, this looks like:
+```
+sudo dnf install cmake gcc-c++ ninja-build qt-creator qt5-qtgraphicaleffects qt6-qt5compat qt6-qtbase-devel qt6-qtdeclarative-devel qt6-qthttpserver-devel qt6-qtsvg-devel
+```
+
+## Download Qt
+
+- Go to https://login.qt.io/register to create a free Qt account.
+- Download the Qt Online Installer for your OS from here: https://www.qt.io/download-qt-installer-oss
+- Sign into the installer.
+- Agree to the terms of the (L)GPL 3 license.
+- Select whether you would like to send anonymous usage statistics to Qt.
+- On the Installation Folder page, leave the default installation path, and select "Custom Installation".
+
+## Customize the installation
+
+![image](https://github.com/nomic-ai/gpt4all-chat/assets/10168/c6e999e5-cc8a-4dfc-8065-b59139e8c7ae)
+
+Under "Qt", find the latest Qt 6.x release.
+
+Under this release (e.g. Qt 6.5.0), select the target platform:
+- On macOS, it is just called "macOS".
+- On Windows, it is called "MSVC 2019 64-bit" (for 64-bit x86 CPUs). MinGW has not been tested.
+
+Under this release, select the following additional components:
+- Qt 5 Compatibility Module
+- Additional Libraries:
+  - Qt HTTP Server
+  - Qt PDF
+- Qt Debug information Files
+
+Under Developer and Designer Tools, select the following components:
+- Qt Creator
+- Qt Creator CDB Debugger Support (for Windows only)
+- Debugging Tools for Windows (for Windows only)
+- CMake
+- Ninja
+
+Agree to the license and complete the installation.
+
+## Download the source code
+
+You must use git to download the source code for gpt4all:
+```
+git clone --recurse-submodules https://github.com/nomic-ai/gpt4all
+```
+
+Note the use of --recurse-submodules, which makes sure the necessary dependencies are downloaded inside the repo. This is why you cannot simply download a zip archive.
+
+Windows users: To install git for Windows, see https://git-scm.com/downloads. Once it is installed, you should be able to shift-right click in any folder, "Open PowerShell window here" (or similar, depending on the version of Windows), and run the above command.
+
+## Open gpt4all-chat in Qt Creator
+
+Open Qt Creator. Navigate to File > Open File or Project, find the "gpt4all-chat" folder inside the freshly cloned repository, and select CMakeLists.txt.
+
+![image](https://github.com/nomic-ai/gpt4all-chat/assets/10168/3d3e2743-2a1d-43d6-9e55-62f7f4306de7)
+
+## Configure project
+
+You can now expand the "Details" section next to the build kit. It is best to uncheck all but one build configuration, e.g. "Release", which will produce optimized binaries that are not useful for debugging.
+
+Click "Configure Project", and wait for it to complete.
+
+![image](https://github.com/nomic-ai/gpt4all-chat/assets/10168/44d5aafb-a95d-434b-ba2a-a3138c0e49a0)
+
+## Build project
+
+Now that the project has been configured, click the hammer button on the left sidebar to build the project.
+
+![image](https://github.com/nomic-ai/gpt4all-chat/assets/10168/43cd7b42-32f0-4efa-9612-d51f85637103)
+
+## Run project
+
+Click the play button on the left sidebar to run the Chat UI.
+
+![image](https://github.com/nomic-ai/gpt4all-chat/assets/10168/611ea795-bdcd-4feb-a466-eb1c2e936e7e)
+
+## Updating the downloaded source code
+
+You do not need to make a fresh clone of the source code every time. To update it, you may open a terminal/command prompt in the repository, run `git pull`, and then `git submodule update --init --recursive`.
--- a/gpt4all-chat/cmake/Modules/SignWindowsBinaries.cmake
+++ b/gpt4all-chat/cmake/Modules/SignWindowsBinaries.cmake
@ -3,7 +3,7 @@ function(sign_target_windows tgt)
        add_custom_command(TARGET ${tgt}
            POST_BUILD
            COMMAND AzureSignTool.exe sign
-                -du "https://gpt4all.io/index.html"
+                -du "https://www.nomic.ai/gpt4all"
                -kvu https://gpt4all.vault.azure.net
                -kvi "$Env{AZSignGUID}"
                -kvs "$Env{AZSignPWD}"
@ -14,4 +14,4 @@ function(sign_target_windows tgt)
                $<TARGET_FILE:${tgt}>
        )
    endif()
-endfunction()
+endfunction()
--- a/gpt4all-chat/cmake/installer_component.qs
+++ b/gpt4all-chat/cmake/installer_component.qs
@ -6,8 +6,7 @@ Component.prototype.beginInstallation = function() {
    targetDirectory = installer.value("TargetDir");
 };

-Component.prototype.createOperations = function()
-{
+Component.prototype.createOperations = function() {
    try {
        // call the base create operations function
        component.createOperations();
@ -30,7 +29,7 @@ Component.prototype.createOperations = function()
                "workingDirectory=" + targetDirectory + "/bin",
                "iconPath=" + targetDirectory + "/gpt4all.ico",
                "iconId=0", "description=Open GPT4All");
-        } else if (systemInfo.productType === "macos" || systemInfo.productType === "osx") {
+        } else if (systemInfo.productType === "macos") {
            var gpt4allAppPath = targetDirectory + "/bin/gpt4all.app";
            var symlinkPath = targetDirectory + "/../GPT4All.app";
            // Remove the symlink if it already exists
@ -56,7 +55,7 @@ Component.prototype.createOperationsForArchive = function(archive)
 {
    component.createOperationsForArchive(archive);

-    if (systemInfo.productType === "macos" || systemInfo.productType === "osx") {
+    if (systemInfo.productType === "macos") {
        var uninstallTargetDirectory = installer.value("TargetDir");
        var symlinkPath = uninstallTargetDirectory + "/../GPT4All.app";

--- a/gpt4all-chat/cmake/installer_control.qs
+++ b/gpt4all-chat/cmake/installer_control.qs
@ -0,0 +1,44 @@
+var finishedText = null;
+
+function cancelInstaller(message) {
+    installer.setDefaultPageVisible(QInstaller.Introduction,         false);
+    installer.setDefaultPageVisible(QInstaller.TargetDirectory,      false);
+    installer.setDefaultPageVisible(QInstaller.ComponentSelection,   false);
+    installer.setDefaultPageVisible(QInstaller.ReadyForInstallation, false);
+    installer.setDefaultPageVisible(QInstaller.StartMenuSelection,   false);
+    installer.setDefaultPageVisible(QInstaller.PerformInstallation,  false);
+    installer.setDefaultPageVisible(QInstaller.LicenseCheck,         false);
+    finishedText = message;
+    installer.setCanceled();
+}
+
+function vercmp(a, b) {
+    return a.localeCompare(b, undefined, { numeric: true, sensitivity: "base" });
+}
+
+function Controller() {
+}
+
+Controller.prototype.TargetDirectoryPageCallback = function() {
+    var failedReq = null;
+    if (systemInfo.productType === "ubuntu" && vercmp(systemInfo.productVersion, "22.04") < 0) {
+        failedReq = "Ubuntu 22.04 LTS";
+    } else if (systemInfo.productType === "macos" && vercmp(systemInfo.productVersion, "12.6") < 0) {
+        failedReq = "macOS Monterey 12.6";
+    }
+
+    if (failedReq !== null) {
+        cancelInstaller(
+            "Installation cannot continue because GPT4All does not support your operating system: " +
+            `${systemInfo.prettyProductName}<br/><br/>` +
+            `GPT4All requires ${failedReq} or newer.`
+        );
+    }
+}
+
+Controller.prototype.FinishedPageCallback = function() {
+    const widget = gui.currentPageWidget();
+    if (widget != null && finishedText != null) {
+        widget.MessageLabel.setText(finishedText);
+    }
+}
--- a/gpt4all-chat/deps/SingleApplication
+++ b/gpt4all-chat/deps/SingleApplication
@ -0,0 +1 @@
+Subproject commit 21bdef01eddcbd78044eea1d50b9dee08d218ff2
--- a/gpt4all-chat/deps/fmt
+++ b/gpt4all-chat/deps/fmt
@ -0,0 +1 @@
+Subproject commit 0c9fce2ffefecfdce794e1859584e25877b7b592
--- a/gpt4all-chat/deps/usearch
+++ b/gpt4all-chat/deps/usearch
--- a/gpt4all-chat/flatpak-manifest/io.gpt4all.gpt4all.appdata.xml
+++ b/gpt4all-chat/flatpak-manifest/io.gpt4all.gpt4all.appdata.xml
@ -32,7 +32,7 @@
            <image>https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/flatpak-manifest/screenshots/model.png</image>
        </screenshot>
    </screenshots>
-    <url type="homepage">https://gpt4all.io</url>
+    <url type="homepage">https://www.nomic.ai/gpt4all</url>
    <url type="bugtracker">https://github.com/nomic-ai/gpt4all/issues</url>
    <url type="vcs-browser">https://github.com/nomic-ai/gpt4all</url>
    <releases>
@ -46,4 +46,4 @@
        <content_attribute id="language-humor">moderate</content_attribute>
        <content_attribute id="language-discrimination">mild</content_attribute>
    </content_rating>
-</component>
+</component>
--- a/gpt4all-chat/main.qml
+++ b/gpt4all-chat/main.qml
@ -15,10 +15,10 @@ import mysettings

 Window {
    id: window
-    width: 1920
-    height: 1080
-    minimumWidth: 1280
-    minimumHeight: 720
+    width: 1440
+    height: 810
+    minimumWidth: 658 + 470 * theme.fontScale
+    minimumHeight: 384 + 160 * theme.fontScale
    visible: true
    title: qsTr("GPT4All v%1").arg(Qt.application.version)

@ -422,7 +422,7 @@ Window {
                            return qsTr("The datalake is enabled")
                        else if (currentChat.modelInfo.isOnline)
                            return qsTr("Using a network model")
-                        else if (currentChat.modelInfo.isOnline)
+                        else if (currentChat.isServer)
                            return qsTr("Server mode is enabled")
                        return ""
                    }
--- a/gpt4all-chat/metadata/latestnews.md
+++ b/gpt4all-chat/metadata/latestnews.md
@ -1,10 +1,9 @@
 ## Latest News

-Version 3.2.1 has now been released which fixes an issue with poor quality responses on NVIDIA GPUs in 3.2.0. The new 3.2 minor version brings:
+GPT4All v3.3.0 was released on September 23rd. Changes include:

-* **Official Language Translations**: Translations for Simplified Chinese, Traditional Chinese, Italian, Portuguese, Romanian, and Spanish.<br/>
-  Go to Settings > Language and Locale to change the application language.
-* **Context Window Improvements**: Significantly faster context recalculation when context runs out
-* **Bugfixes**: Models no longer stop generating when they run out of context
-
-Also, Qwen2-1.5B-Instruct was recently added to the model list, which has good Chinese support.
+* **UI Improvements**: The minimum window size now adapts to the font size. A few labels and links have been fixed. The Embeddings Device selection of "Auto"/"Application default" works again. The window icon is now set on Linux. The antenna icon now displays when the API server is listening.
+* **Single Instance**: Only one instance of GPT4All can be opened at a time. This is now enforced.
+* **Greedy Sampling**: Set temperature to zero to enable greedy sampling.
+* **API Server Changes**: The built-in API server now responds correctly to both legacy completions, and chats with message history. Also, it now uses the system prompt configured in the UI.
+* **Translation Improvements**: The Italian, Romanian, and Traditional Chinese translations have been updated.
--- a/gpt4all-chat/metadata/release.json
+++ b/gpt4all-chat/metadata/release.json
--- a/gpt4all-chat/qml/ApplicationSettings.qml
+++ b/gpt4all-chat/qml/ApplicationSettings.qml
@ -32,15 +32,15 @@ MySettingsTab {
        anchors.centerIn: parent
        modal: false
        padding: 20
+        width: 40 + 400 * theme.fontScale
        Text {
+            anchors.fill: parent
            horizontalAlignment: Text.AlignJustify
-            text: qsTr("ERROR: Update system could not find the MaintenanceTool used<br>
-                   to check for updates!<br><br>
-                   Did you install this application using the online installer? If so,<br>
-                   the MaintenanceTool executable should be located one directory<br>
-                   above where this application resides on your filesystem.<br><br>
-                   If you can't start it manually, then I'm afraid you'll have to<br>
-                   reinstall.")
+            text: qsTr("ERROR: Update system could not find the MaintenanceTool used to check for updates!<br/><br/>"
+                  + "Did you install this application using the online installer? If so, the MaintenanceTool "
+                  + "executable should be located one directory above where this application resides on your "
+                  + "filesystem.<br/><br/>If you can't start it manually, then I'm afraid you'll have to reinstall.")
+            wrapMode: Text.WordWrap
            color: theme.textErrorColor
            font.pixelSize: theme.fontSizeLarge
            Accessible.role: Accessible.Dialog
@ -502,7 +502,7 @@ MySettingsTab {
        }
        MySettingsLabel {
            id: serverChatLabel
-            text: qsTr("Enable Local Server")
+            text: qsTr("Enable Local API Server")
            helpText: qsTr("Expose an OpenAI-Compatible server to localhost. WARNING: Results in increased resource usage.")
            Layout.row: 13
            Layout.column: 0
--- a/gpt4all-chat/qml/HomeView.qml
+++ b/gpt4all-chat/qml/HomeView.qml
@ -76,8 +76,8 @@ Rectangle {

                MyWelcomeButton {
                    Layout.fillWidth: true
-                    Layout.maximumWidth: 500
-                    Layout.preferredHeight: 150
+                    Layout.maximumWidth: 150 + 200 * theme.fontScale
+                    Layout.preferredHeight: 40 + 90 * theme.fontScale
                    text: qsTr("Start Chatting")
                    description: qsTr("Chat with any LLM")
                    imageSource: "qrc:/gpt4all/icons/chat.svg"
@ -87,8 +87,8 @@ Rectangle {
                }
                MyWelcomeButton {
                    Layout.fillWidth: true
-                    Layout.maximumWidth: 500
-                    Layout.preferredHeight: 150
+                    Layout.maximumWidth: 150 + 200 * theme.fontScale
+                    Layout.preferredHeight: 40 + 90 * theme.fontScale
                    text: qsTr("LocalDocs")
                    description: qsTr("Chat with your local files")
                    imageSource: "qrc:/gpt4all/icons/db.svg"
@ -98,8 +98,8 @@ Rectangle {
                }
                MyWelcomeButton {
                    Layout.fillWidth: true
-                    Layout.maximumWidth: 500
-                    Layout.preferredHeight: 150
+                    Layout.maximumWidth: 150 + 200 * theme.fontScale
+                    Layout.preferredHeight: 40 + 90 * theme.fontScale
                    text: qsTr("Find Models")
                    description: qsTr("Explore and download models")
                    imageSource: "qrc:/gpt4all/icons/models.svg"
@ -254,9 +254,9 @@ Rectangle {
                    spacing: 40

                    MyFancyLink {
-                        text: qsTr("GPT4All.io")
+                        text: qsTr("nomic.ai")
                        imageSource: "qrc:/gpt4all/icons/globe.svg"
-                        onClicked: { Qt.openUrlExternally("https://gpt4all.io") }
+                        onClicked: { Qt.openUrlExternally("https://www.nomic.ai/gpt4all") }
                        rightPadding: 15
                    }
                }
--- a/gpt4all-chat/qml/ModelSettings.qml
+++ b/gpt4all-chat/qml/ModelSettings.qml
@ -456,7 +456,7 @@ MySettingsTab {
            MySettingsLabel {
                id: topPLabel
                text: qsTr("Top-P")
-                helpText: qsTr("Nucleus Sampling factor. Lower -> more predicatable.")
+                helpText: qsTr("Nucleus Sampling factor. Lower -> more predictable.")
                Layout.row: 2
                Layout.column: 0
                Layout.maximumWidth: 300 * theme.fontScale
--- a/gpt4all-chat/qml/NetworkDialog.qml
+++ b/gpt4all-chat/qml/NetworkDialog.qml
@ -52,11 +52,18 @@ MyDialog {
            MyTextArea {
                id: textOptIn
                width: 1024 - 40
-                text: qsTr("By enabling this feature, you will be able to participate in the democratic process of training a large language model by contributing data for future model improvements.
-
-When a GPT4All model responds to you and you have opted-in, your conversation will be sent to the GPT4All Open Source Datalake. Additionally, you can like/dislike its response. If you dislike a response, you can suggest an alternative response. This data will be collected and aggregated in the GPT4All Datalake.
-
-NOTE: By turning on this feature, you will be sending your data to the GPT4All Open Source Datalake. You should have no expectation of chat privacy when this feature is enabled. You should; however, have an expectation of an optional attribution if you wish. Your chat data will be openly available for anyone to download and will be used by Nomic AI to improve future GPT4All models. Nomic AI will retain all attribution information attached to your data and you will be credited as a contributor to any GPT4All model release that uses your data!")
+                text: qsTr("By enabling this feature, you will be able to participate in the democratic process of "
+                      + "training a large language model by contributing data for future model improvements.\n\n"
+                      + "When a GPT4All model responds to you and you have opted-in, your conversation will be sent to "
+                      + "the GPT4All Open Source Datalake. Additionally, you can like/dislike its response. If you "
+                      + "dislike a response, you can suggest an alternative response. This data will be collected and "
+                      + "aggregated in the GPT4All Datalake.\n\n"
+                      + "NOTE: By turning on this feature, you will be sending your data to the GPT4All Open Source "
+                      + "Datalake. You should have no expectation of chat privacy when this feature is enabled. You "
+                      + "should; however, have an expectation of an optional attribution if you wish. Your chat data "
+                      + "will be openly available for anyone to download and will be used by Nomic AI to improve "
+                      + "future GPT4All models. Nomic AI will retain all attribution information attached to your data "
+                      + "and you will be credited as a contributor to any GPT4All model release that uses your data!")
                focus: false
                readOnly: true
                Accessible.role: Accessible.Paragraph
--- a/gpt4all-chat/qml/StartupDialog.qml
+++ b/gpt4all-chat/qml/StartupDialog.qml
@ -64,7 +64,7 @@ MyDialog {
                id: welcome
                width: 1024 - 40
                textFormat: TextEdit.MarkdownText
-                text: qsTr("### Release notes\n%1### Contributors\n%2").arg(Download.releaseInfo.notes).arg(Download.releaseInfo.contributors)
+                text: qsTr("### Release Notes\n%1<br/>\n### Contributors\n%2").arg(Download.releaseInfo.notes).arg(Download.releaseInfo.contributors)
                focus: false
                readOnly: true
                Accessible.role: Accessible.Paragraph
--- a/gpt4all-chat/server.cpp
+++ b/gpt4all-chat/server.cpp
@ -1,468 +0,0 @@
-#include "server.h"
-
-#include "chat.h"
-#include "modellist.h"
-#include "mysettings.h"
-
-#include <QByteArray>
-#include <QDateTime>
-#include <QDebug>
-#include <QHostAddress>
-#include <QHttpServer>
-#include <QHttpServerResponder>
-#include <QJsonArray>
-#include <QJsonDocument>
-#include <QJsonObject>
-#include <QJsonValue>
-#include <QPair>
-#include <Qt>
-#include <QtLogging>
-
-#include <iostream>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-using namespace Qt::Literals::StringLiterals;
-
-//#define DEBUG
-
-static inline QJsonObject modelToJson(const ModelInfo &info)
-{
-    QJsonObject model;
-    model.insert("id", info.name());
-    model.insert("object", "model");
-    model.insert("created", 0);
-    model.insert("owned_by", "humanity");
-    model.insert("root", info.name());
-    model.insert("parent", QJsonValue::Null);
-
-    QJsonArray permissions;
-    QJsonObject permissionObj;
-    permissionObj.insert("id", "foobarbaz");
-    permissionObj.insert("object", "model_permission");
-    permissionObj.insert("created", 0);
-    permissionObj.insert("allow_create_engine", false);
-    permissionObj.insert("allow_sampling", false);
-    permissionObj.insert("allow_logprobs", false);
-    permissionObj.insert("allow_search_indices", false);
-    permissionObj.insert("allow_view", true);
-    permissionObj.insert("allow_fine_tuning", false);
-    permissionObj.insert("organization", "*");
-    permissionObj.insert("group", QJsonValue::Null);
-    permissionObj.insert("is_blocking", false);
-    permissions.append(permissionObj);
-    model.insert("permissions", permissions);
-    return model;
-}
-
-static inline QJsonObject resultToJson(const ResultInfo &info)
-{
-    QJsonObject result;
-    result.insert("file", info.file);
-    result.insert("title", info.title);
-    result.insert("author", info.author);
-    result.insert("date", info.date);
-    result.insert("text", info.text);
-    result.insert("page", info.page);
-    result.insert("from", info.from);
-    result.insert("to", info.to);
-    return result;
-}
-
-Server::Server(Chat *chat)
-    : ChatLLM(chat, true /*isServer*/)
-    , m_chat(chat)
-    , m_server(nullptr)
-{
-    connect(this, &Server::threadStarted, this, &Server::start);
-    connect(this, &Server::databaseResultsChanged, this, &Server::handleDatabaseResultsChanged);
-    connect(chat, &Chat::collectionListChanged, this, &Server::handleCollectionListChanged, Qt::QueuedConnection);
-}
-
-Server::~Server()
-{
-}
-
-void Server::start()
-{
-    m_server = new QHttpServer(this);
-    if (!m_server->listen(QHostAddress::LocalHost, MySettings::globalInstance()->networkPort())) {
-        qWarning() << "ERROR: Unable to start the server";
-        return;
-    }
-
-    m_server->route("/v1/models", QHttpServerRequest::Method::Get,
-        [](const QHttpServerRequest &request) {
-            if (!MySettings::globalInstance()->serverChat())
-                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
-
-            const QList<ModelInfo> modelList = ModelList::globalInstance()->selectableModelList();
-            QJsonObject root;
-            root.insert("object", "list");
-            QJsonArray data;
-            for (const ModelInfo &info : modelList) {
-                Q_ASSERT(info.installed);
-                if (!info.installed)
-                    continue;
-                data.append(modelToJson(info));
-            }
-            root.insert("data", data);
-            return QHttpServerResponse(root);
-        }
-    );
-
-    m_server->route("/v1/models/<arg>", QHttpServerRequest::Method::Get,
-        [](const QString &model, const QHttpServerRequest &request) {
-            if (!MySettings::globalInstance()->serverChat())
-                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
-
-            const QList<ModelInfo> modelList = ModelList::globalInstance()->selectableModelList();
-            QJsonObject object;
-            for (const ModelInfo &info : modelList) {
-                Q_ASSERT(info.installed);
-                if (!info.installed)
-                    continue;
-
-                if (model == info.name()) {
-                    object = modelToJson(info);
-                    break;
-                }
-            }
-            return QHttpServerResponse(object);
-        }
-    );
-
-    m_server->route("/v1/completions", QHttpServerRequest::Method::Post,
-        [this](const QHttpServerRequest &request) {
-            if (!MySettings::globalInstance()->serverChat())
-                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
-            return handleCompletionRequest(request, false);
-        }
-    );
-
-    m_server->route("/v1/chat/completions", QHttpServerRequest::Method::Post,
-        [this](const QHttpServerRequest &request) {
-            if (!MySettings::globalInstance()->serverChat())
-                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
-            return handleCompletionRequest(request, true);
-        }
-    );
-
-    // Respond with code 405 to wrong HTTP methods:
-    m_server->route("/v1/models",  QHttpServerRequest::Method::Post,
-        [](const QHttpServerRequest &request) {
-            if (!MySettings::globalInstance()->serverChat())
-                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
-            return QHttpServerResponse(
-                QJsonDocument::fromJson("{\"error\": {\"message\": \"Not allowed to POST on /v1/models."
-                    " (HINT: Perhaps you meant to use a different HTTP method?)\","
-                    " \"type\": \"invalid_request_error\", \"param\": null, \"code\": null}}").object(),
-                QHttpServerResponder::StatusCode::MethodNotAllowed);
-        }
-    );
-
-    m_server->route("/v1/models/<arg>", QHttpServerRequest::Method::Post,
-        [](const QString &model, const QHttpServerRequest &request) {
-            if (!MySettings::globalInstance()->serverChat())
-                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
-            return QHttpServerResponse(
-                QJsonDocument::fromJson("{\"error\": {\"message\": \"Not allowed to POST on /v1/models/*."
-                    " (HINT: Perhaps you meant to use a different HTTP method?)\","
-                    " \"type\": \"invalid_request_error\", \"param\": null, \"code\": null}}").object(),
-                QHttpServerResponder::StatusCode::MethodNotAllowed);
-        }
-    );
-
-    m_server->route("/v1/completions", QHttpServerRequest::Method::Get,
-        [](const QHttpServerRequest &request) {
-            if (!MySettings::globalInstance()->serverChat())
-                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
-            return QHttpServerResponse(
-                QJsonDocument::fromJson("{\"error\": {\"message\": \"Only POST requests are accepted.\","
-                    " \"type\": \"invalid_request_error\", \"param\": null, \"code\": \"method_not_supported\"}}").object(),
-                QHttpServerResponder::StatusCode::MethodNotAllowed);
-        }
-    );
-
-    m_server->route("/v1/chat/completions", QHttpServerRequest::Method::Get,
-        [](const QHttpServerRequest &request) {
-            if (!MySettings::globalInstance()->serverChat())
-                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
-            return QHttpServerResponse(
-                QJsonDocument::fromJson("{\"error\": {\"message\": \"Only POST requests are accepted.\","
-                    " \"type\": \"invalid_request_error\", \"param\": null, \"code\": \"method_not_supported\"}}").object(),
-                QHttpServerResponder::StatusCode::MethodNotAllowed);
-        }
-    );
-
-    m_server->afterRequest([] (QHttpServerResponse &&resp) {
-        resp.addHeader("Access-Control-Allow-Origin", "*");
-        return std::move(resp);
-    });
-
-    connect(this, &Server::requestServerNewPromptResponsePair, m_chat,
-        &Chat::serverNewPromptResponsePair, Qt::BlockingQueuedConnection);
-}
-
-QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &request, bool isChat)
-{
-    // We've been asked to do a completion...
-    QJsonParseError err;
-    const QJsonDocument document = QJsonDocument::fromJson(request.body(), &err);
-    if (err.error || !document.isObject()) {
-        std::cerr << "ERROR: invalid json in completions body" << std::endl;
-        return QHttpServerResponse(QHttpServerResponder::StatusCode::NoContent);
-    }
-#if defined(DEBUG)
-    printf("/v1/completions %s\n", qPrintable(document.toJson(QJsonDocument::Indented)));
-    fflush(stdout);
-#endif
-    const QJsonObject body = document.object();
-    if (!body.contains("model")) { // required
-        std::cerr << "ERROR: completions contains no model" << std::endl;
-        return QHttpServerResponse(QHttpServerResponder::StatusCode::NoContent);
-    }
-    QJsonArray messages;
-    if (isChat) {
-        if (!body.contains("messages")) {
-            std::cerr << "ERROR: chat completions contains no messages" << std::endl;
-            return QHttpServerResponse(QHttpServerResponder::StatusCode::NoContent);
-        }
-        messages = body["messages"].toArray();
-    }
-
-    const QString modelRequested = body["model"].toString();
-    ModelInfo modelInfo = ModelList::globalInstance()->defaultModelInfo();
-    const QList<ModelInfo> modelList = ModelList::globalInstance()->selectableModelList();
-    for (const ModelInfo &info : modelList) {
-        Q_ASSERT(info.installed);
-        if (!info.installed)
-            continue;
-        if (modelRequested == info.name() || modelRequested == info.filename()) {
-            modelInfo = info;
-            break;
-        }
-    }
-
-    // We only support one prompt for now
-    QList<QString> prompts;
-    if (body.contains("prompt")) {
-        QJsonValue promptValue = body["prompt"];
-        if (promptValue.isString())
-            prompts.append(promptValue.toString());
-        else {
-            QJsonArray array = promptValue.toArray();
-            for (const QJsonValue &v : array)
-                prompts.append(v.toString());
-        }
-    } else
-        prompts.append(" ");
-
-    int max_tokens = 16;
-    if (body.contains("max_tokens"))
-        max_tokens = body["max_tokens"].toInt();
-
-    float temperature = 1.f;
-    if (body.contains("temperature"))
-        temperature = body["temperature"].toDouble();
-
-    float top_p = 1.f;
-    if (body.contains("top_p"))
-        top_p = body["top_p"].toDouble();
-
-    float min_p = 0.f;
-    if (body.contains("min_p"))
-        min_p = body["min_p"].toDouble();
-
-    int n = 1;
-    if (body.contains("n"))
-        n = body["n"].toInt();
-
-    int logprobs = -1; // supposed to be null by default??
-    if (body.contains("logprobs"))
-        logprobs = body["logprobs"].toInt();
-
-    bool echo = false;
-    if (body.contains("echo"))
-        echo = body["echo"].toBool();
-
-    // We currently don't support any of the following...
-#if 0
-    // FIXME: Need configurable reverse prompts
-    QList<QString> stop;
-    if (body.contains("stop")) {
-        QJsonValue stopValue = body["stop"];
-        if (stopValue.isString())
-            stop.append(stopValue.toString());
-        else {
-            QJsonArray array = stopValue.toArray();
-            for (QJsonValue v : array)
-                stop.append(v.toString());
-        }
-    }
-
-    // FIXME: QHttpServer doesn't support server-sent events
-    bool stream = false;
-    if (body.contains("stream"))
-        stream = body["stream"].toBool();
-
-    // FIXME: What does this do?
-    QString suffix;
-    if (body.contains("suffix"))
-        suffix = body["suffix"].toString();
-
-    // FIXME: We don't support
-    float presence_penalty = 0.f;
-    if (body.contains("presence_penalty"))
-        top_p = body["presence_penalty"].toDouble();
-
-    // FIXME: We don't support
-    float frequency_penalty = 0.f;
-    if (body.contains("frequency_penalty"))
-        top_p = body["frequency_penalty"].toDouble();
-
-    // FIXME: We don't support
-    int best_of = 1;
-    if (body.contains("best_of"))
-        logprobs = body["best_of"].toInt();
-
-    // FIXME: We don't need
-    QString user;
-    if (body.contains("user"))
-        suffix = body["user"].toString();
-#endif
-
-    QString actualPrompt = prompts.first();
-
-    // if we're a chat completion we have messages which means we need to prepend these to the prompt
-    if (!messages.isEmpty()) {
-        QList<QString> chats;
-        for (int i = 0; i < messages.count();  ++i) {
-            QJsonValue v = messages.at(i);
-            QString content = v.toObject()["content"].toString();
-            if (!content.endsWith("\n") && i < messages.count() - 1)
-                content += "\n";
-            chats.append(content);
-        }
-        actualPrompt.prepend(chats.join("\n"));
-    }
-
-    // adds prompt/response items to GUI
-    emit requestServerNewPromptResponsePair(actualPrompt); // blocks
-
-    // load the new model if necessary
-    setShouldBeLoaded(true);
-
-    if (modelInfo.filename().isEmpty()) {
-        std::cerr << "ERROR: couldn't load default model " << modelRequested.toStdString() << std::endl;
-        return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest);
-    } else if (!loadModel(modelInfo)) {
-        std::cerr << "ERROR: couldn't load model " << modelInfo.name().toStdString() << std::endl;
-        return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
-    }
-
-    // don't remember any context
-    resetContext();
-
-    const QString promptTemplate    = modelInfo.promptTemplate();
-    const float top_k               = modelInfo.topK();
-    const int n_batch               = modelInfo.promptBatchSize();
-    const float repeat_penalty      = modelInfo.repeatPenalty();
-    const int repeat_last_n         = modelInfo.repeatPenaltyTokens();
-
-    int promptTokens = 0;
-    int responseTokens = 0;
-    QList<QPair<QString, QList<ResultInfo>>> responses;
-    for (int i = 0; i < n; ++i) {
-        if (!promptInternal(
-            m_collections,
-            actualPrompt,
-            promptTemplate,
-            max_tokens /*n_predict*/,
-            top_k,
-            top_p,
-            min_p,
-            temperature,
-            n_batch,
-            repeat_penalty,
-            repeat_last_n)) {
-
-            std::cerr << "ERROR: couldn't prompt model " << modelInfo.name().toStdString() << std::endl;
-            return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
-        }
-        QString echoedPrompt = actualPrompt;
-        if (!echoedPrompt.endsWith("\n"))
-            echoedPrompt += "\n";
-        responses.append(qMakePair((echo ? u"%1\n"_s.arg(actualPrompt) : QString()) + response(), m_databaseResults));
-        if (!promptTokens)
-            promptTokens += m_promptTokens;
-        responseTokens += m_promptResponseTokens - m_promptTokens;
-        if (i != n - 1)
-            resetResponse();
-    }
-
-    QJsonObject responseObject;
-    responseObject.insert("id", "foobarbaz");
-    responseObject.insert("object", "text_completion");
-    responseObject.insert("created", QDateTime::currentSecsSinceEpoch());
-    responseObject.insert("model", modelInfo.name());
-
-    QJsonArray choices;
-
-    if (isChat) {
-        int index = 0;
-        for (const auto &r : responses) {
-            QString result = r.first;
-            QList<ResultInfo> infos = r.second;
-            QJsonObject choice;
-            choice.insert("index", index++);
-            choice.insert("finish_reason", responseTokens == max_tokens ? "length" : "stop");
-            QJsonObject message;
-            message.insert("role", "assistant");
-            message.insert("content", result);
-            choice.insert("message", message);
-            if (MySettings::globalInstance()->localDocsShowReferences()) {
-                QJsonArray references;
-                for (const auto &ref : infos)
-                    references.append(resultToJson(ref));
-                choice.insert("references", references);
-            }
-            choices.append(choice);
-        }
-    } else {
-        int index = 0;
-        for (const auto &r : responses) {
-            QString result = r.first;
-            QList<ResultInfo> infos = r.second;
-            QJsonObject choice;
-            choice.insert("text", result);
-            choice.insert("index", index++);
-            choice.insert("logprobs", QJsonValue::Null); // We don't support
-            choice.insert("finish_reason", responseTokens == max_tokens ? "length" : "stop");
-            if (MySettings::globalInstance()->localDocsShowReferences()) {
-                QJsonArray references;
-                for (const auto &ref : infos)
-                    references.append(resultToJson(ref));
-                choice.insert("references", references);
-            }
-            choices.append(choice);
-        }
-    }
-
-    responseObject.insert("choices", choices);
-
-    QJsonObject usage;
-    usage.insert("prompt_tokens", int(promptTokens));
-    usage.insert("completion_tokens", int(responseTokens));
-    usage.insert("total_tokens", int(promptTokens + responseTokens));
-    responseObject.insert("usage", usage);
-
-#if defined(DEBUG)
-    QJsonDocument newDoc(responseObject);
-    printf("/v1/completions %s\n", qPrintable(newDoc.toJson(QJsonDocument::Indented)));
-    fflush(stdout);
-#endif
-
-    return QHttpServerResponse(responseObject);
-}
--- a/gpt4all-chat/src/chat.cpp
+++ b/gpt4all-chat/src/chat.cpp
@ -74,7 +74,6 @@ void Chat::connectLLM()
    connect(this, &Chat::promptRequested, m_llmodel, &ChatLLM::prompt, Qt::QueuedConnection);
    connect(this, &Chat::modelChangeRequested, m_llmodel, &ChatLLM::modelChangeRequested, Qt::QueuedConnection);
    connect(this, &Chat::loadDefaultModelRequested, m_llmodel, &ChatLLM::loadDefaultModel, Qt::QueuedConnection);
-    connect(this, &Chat::loadModelRequested, m_llmodel, &ChatLLM::loadModel, Qt::QueuedConnection);
    connect(this, &Chat::generateNameRequested, m_llmodel, &ChatLLM::generateName, Qt::QueuedConnection);
    connect(this, &Chat::regenerateResponseRequested, m_llmodel, &ChatLLM::regenerateResponse, Qt::QueuedConnection);
    connect(this, &Chat::resetResponseRequested, m_llmodel, &ChatLLM::resetResponse, Qt::QueuedConnection);
@ -240,16 +239,17 @@ void Chat::newPromptResponsePair(const QString &prompt)
    resetResponseState();
    m_chatModel->updateCurrentResponse(m_chatModel->count() - 1, false);
    m_chatModel->appendPrompt("Prompt: ", prompt);
-    m_chatModel->appendResponse("Response: ", prompt);
+    m_chatModel->appendResponse("Response: ", QString());
    emit resetResponseRequested();
 }

+// the server needs to block until response is reset, so it calls resetResponse on its own m_llmThread
 void Chat::serverNewPromptResponsePair(const QString &prompt)
 {
    resetResponseState();
    m_chatModel->updateCurrentResponse(m_chatModel->count() - 1, false);
    m_chatModel->appendPrompt("Prompt: ", prompt);
-    m_chatModel->appendResponse("Response: ", prompt);
+    m_chatModel->appendResponse("Response: ", QString());
 }

 bool Chat::restoringFromText() const
--- a/gpt4all-chat/src/chat.h
+++ b/gpt4all-chat/src/chat.h
@ -146,7 +146,6 @@ Q_SIGNALS:
    void modelInfoChanged();
    void restoringFromTextChanged();
    void loadDefaultModelRequested();
-    void loadModelRequested(const ModelInfo &modelInfo);
    void generateNameRequested();
    void modelLoadingErrorChanged();
    void isServerChanged();
--- a/gpt4all-chat/src/chatapi.cpp
+++ b/gpt4all-chat/src/chatapi.cpp
@ -1,6 +1,6 @@
 #include "chatapi.h"

-#include "../gpt4all-backend/llmodel.h"
+#include <gpt4all-backend/llmodel.h>

 #include <QCoreApplication>
 #include <QGuiApplication>
@ -93,7 +93,7 @@ void ChatAPI::prompt(const std::string &prompt,
                     bool allowContextShift,
                     PromptContext &promptCtx,
                     bool special,
-                     std::string *fakeReply) {
+                     std::optional<std::string_view> fakeReply) {

    Q_UNUSED(promptCallback);
    Q_UNUSED(allowContextShift);
@ -121,7 +121,7 @@ void ChatAPI::prompt(const std::string &prompt,
    if (fakeReply) {
        promptCtx.n_past += 1;
        m_context.append(formattedPrompt);
-        m_context.append(QString::fromStdString(*fakeReply));
+        m_context.append(QString::fromUtf8(fakeReply->data(), fakeReply->size()));
        return;
    }

--- a/gpt4all-chat/src/chatapi.h
+++ b/gpt4all-chat/src/chatapi.h
@ -1,7 +1,7 @@
 #ifndef CHATAPI_H
 #define CHATAPI_H

-#include "../gpt4all-backend/llmodel.h"
+#include <gpt4all-backend/llmodel.h>

 #include <QByteArray>
 #include <QNetworkReply>
@ -12,9 +12,10 @@

 #include <cstddef>
 #include <cstdint>
-#include <stdexcept>
 #include <functional>
+#include <stdexcept>
 #include <string>
+#include <string_view>
 #include <vector>

 class QNetworkAccessManager;
@ -72,7 +73,7 @@ public:
                bool allowContextShift,
                PromptContext &ctx,
                bool special,
-                std::string *fakeReply) override;
+                std::optional<std::string_view> fakeReply) override;

    void setThreadCount(int32_t n_threads) override;
    int32_t threadCount() const override;
@ -97,7 +98,7 @@ protected:
    // them as they are only called from the default implementation of 'prompt' which we override and
    // completely replace

-    std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) override
+    std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special) override
    {
        (void)ctx;
        (void)str;
--- a/gpt4all-chat/src/chatlistmodel.cpp
+++ b/gpt4all-chat/src/chatlistmodel.cpp
--- a/gpt4all-chat/src/chatlistmodel.h
+++ b/gpt4all-chat/src/chatlistmodel.h
--- a/gpt4all-chat/src/chatllm.cpp
+++ b/gpt4all-chat/src/chatllm.cpp
@ -249,9 +249,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
    // and what the type and name of that model is. I've tried to comment extensively in this method
    // to provide an overview of what we're doing here.

-    // We're already loaded with this model
-    if (isModelLoaded() && this->modelInfo() == modelInfo)
-        return true;
+    if (isModelLoaded() && this->modelInfo() == modelInfo) {
+        // already acquired -> keep it and reset
+        resetContext();
+        return true; // already loaded
+    }

    // reset status
    emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
@ -624,16 +626,16 @@ void ChatLLM::regenerateResponse()
    m_ctx.tokens.erase(m_ctx.tokens.end() - m_promptResponseTokens, m_ctx.tokens.end());
    m_promptResponseTokens = 0;
    m_promptTokens = 0;
-    m_response = std::string();
-    emit responseChanged(QString::fromStdString(m_response));
+    m_response = m_trimmedResponse = std::string();
+    emit responseChanged(QString::fromStdString(m_trimmedResponse));
 }

 void ChatLLM::resetResponse()
 {
    m_promptTokens = 0;
    m_promptResponseTokens = 0;
-    m_response = std::string();
-    emit responseChanged(QString::fromStdString(m_response));
+    m_response = m_trimmedResponse = std::string();
+    emit responseChanged(QString::fromStdString(m_trimmedResponse));
 }

 void ChatLLM::resetContext()
@ -643,9 +645,12 @@ void ChatLLM::resetContext()
    m_ctx = LLModel::PromptContext();
 }

-QString ChatLLM::response() const
+QString ChatLLM::response(bool trim) const
 {
-    return QString::fromStdString(remove_leading_whitespace(m_response));
+    std::string resp = m_response;
+    if (trim)
+        resp = remove_leading_whitespace(resp);
+    return QString::fromStdString(resp);
 }

 ModelInfo ChatLLM::modelInfo() const
@ -659,20 +664,25 @@ void ChatLLM::setModelInfo(const ModelInfo &modelInfo)
    emit modelInfoChanged(modelInfo);
 }

-void ChatLLM::acquireModel() {
+void ChatLLM::acquireModel()
+{
    m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
    emit loadedModelInfoChanged();
 }

-void ChatLLM::resetModel() {
+void ChatLLM::resetModel()
+{
    m_llModelInfo = {};
    emit loadedModelInfoChanged();
 }

 void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo)
 {
-    m_shouldBeLoaded = true;
-    loadModel(modelInfo);
+    // ignore attempts to switch to the same model twice
+    if (!isModelLoaded() || this->modelInfo() != modelInfo) {
+        m_shouldBeLoaded = true;
+        loadModel(modelInfo);
+    }
 }

 bool ChatLLM::handlePrompt(int32_t token)
@ -698,7 +708,8 @@ bool ChatLLM::handleResponse(int32_t token, const std::string &response)
    // check for error
    if (token < 0) {
        m_response.append(response);
-        emit responseChanged(QString::fromStdString(remove_leading_whitespace(m_response)));
+        m_trimmedResponse = remove_leading_whitespace(m_response);
+        emit responseChanged(QString::fromStdString(m_trimmedResponse));
        return false;
    }

@ -708,7 +719,8 @@ bool ChatLLM::handleResponse(int32_t token, const std::string &response)
    m_timer->inc();
    Q_ASSERT(!response.empty());
    m_response.append(response);
-    emit responseChanged(QString::fromStdString(remove_leading_whitespace(m_response)));
+    m_trimmedResponse = remove_leading_whitespace(m_response);
+    emit responseChanged(QString::fromStdString(m_trimmedResponse));
    return !m_stopGenerating;
 }

@ -719,8 +731,6 @@ bool ChatLLM::prompt(const QList<QString> &collectionList, const QString &prompt
        processRestoreStateFromText();
    }

-    if (!m_processedSystemPrompt)
-        processSystemPrompt();
    const QString promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo);
    const int32_t n_predict = MySettings::globalInstance()->modelMaxLength(m_modelInfo);
    const int32_t top_k = MySettings::globalInstance()->modelTopK(m_modelInfo);
@ -736,14 +746,17 @@ bool ChatLLM::prompt(const QList<QString> &collectionList, const QString &prompt

 bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString &prompt, const QString &promptTemplate,
    int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty,
-    int32_t repeat_penalty_tokens)
+    int32_t repeat_penalty_tokens, std::optional<QString> fakeReply)
 {
    if (!isModelLoaded())
        return false;

+    if (!m_processedSystemPrompt)
+        processSystemPrompt();
+
    QList<ResultInfo> databaseResults;
    const int retrievalSize = MySettings::globalInstance()->localDocsRetrievalSize();
-    if (!collectionList.isEmpty()) {
+    if (!fakeReply && !collectionList.isEmpty()) {
        emit requestRetrieveFromDB(collectionList, prompt, retrievalSize, &databaseResults); // blocks
        emit databaseResultsChanged(databaseResults);
    }
@ -789,7 +802,8 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
        m_ctx.n_predict = old_n_predict; // now we are ready for a response
    }
    m_llModelInfo.model->prompt(prompt.toStdString(), promptTemplate.toStdString(), promptFunc, responseFunc,
-                                /*allowContextShift*/ true, m_ctx);
+                                /*allowContextShift*/ true, m_ctx, false,
+                                fakeReply.transform(std::mem_fn(&QString::toStdString)));
 #if defined(DEBUG)
    printf("\n");
    fflush(stdout);
@ -797,9 +811,9 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
    m_timer->stop();
    qint64 elapsed = totalTime.elapsed();
    std::string trimmed = trim_whitespace(m_response);
-    if (trimmed != m_response) {
-        m_response = trimmed;
-        emit responseChanged(QString::fromStdString(m_response));
+    if (trimmed != m_trimmedResponse) {
+        m_trimmedResponse = trimmed;
+        emit responseChanged(QString::fromStdString(m_trimmedResponse));
    }

    SuggestionMode mode = MySettings::globalInstance()->suggestionMode();
@ -1070,6 +1084,7 @@ bool ChatLLM::deserialize(QDataStream &stream, int version, bool deserializeKV,
    QString response;
    stream >> response;
    m_response = response.toStdString();
+    m_trimmedResponse = trim_whitespace(m_response);
    QString nameResponse;
    stream >> nameResponse;
    m_nameResponse = nameResponse.toStdString();
@ -1206,7 +1221,7 @@ void ChatLLM::restoreState()
 void ChatLLM::processSystemPrompt()
 {
    Q_ASSERT(isModelLoaded());
-    if (!isModelLoaded() || m_processedSystemPrompt || m_restoreStateFromText || m_isServer)
+    if (!isModelLoaded() || m_processedSystemPrompt || m_restoreStateFromText)
        return;

    const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();
@ -1298,10 +1313,9 @@ void ChatLLM::processRestoreStateFromText()

        auto &response = *it++;
        Q_ASSERT(response.first != "Prompt: ");
-        auto responseText = response.second.toStdString();

        m_llModelInfo.model->prompt(prompt.second.toStdString(), promptTemplate.toStdString(), promptFunc, nullptr,
-                                    /*allowContextShift*/ true, m_ctx, false, &responseText);
+                                    /*allowContextShift*/ true, m_ctx, false, response.second.toUtf8().constData());
    }

    if (!m_stopGenerating) {
--- a/gpt4all-chat/src/chatllm.h
+++ b/gpt4all-chat/src/chatllm.h
@ -4,7 +4,7 @@
 #include "database.h" // IWYU pragma: keep
 #include "modellist.h"

-#include "../gpt4all-backend/llmodel.h"
+#include <gpt4all-backend/llmodel.h>

 #include <QByteArray>
 #include <QElapsedTimer>
@ -116,7 +116,7 @@ public:
    void setForceUnloadModel(bool b) { m_forceUnloadModel = b; }
    void setMarkedForDeletion(bool b) { m_markedForDeletion = b; }

-    QString response() const;
+    QString response(bool trim = true) const;

    ModelInfo modelInfo() const;
    void setModelInfo(const ModelInfo &info);
@ -198,7 +198,7 @@ Q_SIGNALS:
 protected:
    bool promptInternal(const QList<QString> &collectionList, const QString &prompt, const QString &promptTemplate,
        int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty,
-        int32_t repeat_penalty_tokens);
+        int32_t repeat_penalty_tokens, std::optional<QString> fakeReply = {});
    bool handlePrompt(int32_t token);
    bool handleResponse(int32_t token, const std::string &response);
    bool handleNamePrompt(int32_t token);
@ -221,6 +221,7 @@ private:
    bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps);

    std::string m_response;
+    std::string m_trimmedResponse;
    std::string m_nameResponse;
    QString m_questionResponse;
    LLModelInfo m_llModelInfo;
--- a/gpt4all-chat/src/chatmodel.h
+++ b/gpt4all-chat/src/chatmodel.h
--- a/gpt4all-chat/src/chatviewtextprocessor.cpp
+++ b/gpt4all-chat/src/chatviewtextprocessor.cpp
--- a/gpt4all-chat/src/chatviewtextprocessor.h
+++ b/gpt4all-chat/src/chatviewtextprocessor.h
--- a/gpt4all-chat/src/database.cpp
+++ b/gpt4all-chat/src/database.cpp
--- a/gpt4all-chat/src/database.h
+++ b/gpt4all-chat/src/database.h
--- a/gpt4all-chat/src/download.cpp
+++ b/gpt4all-chat/src/download.cpp
@ -396,8 +396,9 @@ void Download::parseReleaseJsonFile(const QByteArray &jsonData)
        QJsonObject obj = value.toObject();

        QString version = obj["version"].toString();
-        QString notes = obj["notes"].toString();
-        QString contributors = obj["contributors"].toString();
+        // "notes" field intentionally has a trailing newline for compatibility
+        QString notes = obj["notes"].toString().trimmed();
+        QString contributors = obj["contributors"].toString().trimmed();
        ReleaseInfo releaseInfo;
        releaseInfo.version = version;
        releaseInfo.notes = notes;
--- a/gpt4all-chat/src/download.h
+++ b/gpt4all-chat/src/download.h
--- a/gpt4all-chat/src/embllm.cpp
+++ b/gpt4all-chat/src/embllm.cpp
@ -3,7 +3,7 @@
 #include "modellist.h"
 #include "mysettings.h"

-#include "../gpt4all-backend/llmodel.h"
+#include <gpt4all-backend/llmodel.h>

 #include <QCoreApplication>
 #include <QDebug>
--- a/gpt4all-chat/src/embllm.h
+++ b/gpt4all-chat/src/embllm.h
--- a/gpt4all-chat/src/llm.cpp
+++ b/gpt4all-chat/src/llm.cpp
@ -1,7 +1,7 @@
 #include "llm.h"

-#include "../gpt4all-backend/llmodel.h"
-#include "../gpt4all-backend/sysinfo.h"
+#include <gpt4all-backend/llmodel.h>
+#include <gpt4all-backend/sysinfo.h>

 #include <QCoreApplication>
 #include <QDebug>
--- a/gpt4all-chat/src/llm.h
+++ b/gpt4all-chat/src/llm.h
--- a/gpt4all-chat/src/localdocs.cpp
+++ b/gpt4all-chat/src/localdocs.cpp
--- a/gpt4all-chat/src/localdocs.h
+++ b/gpt4all-chat/src/localdocs.h
--- a/gpt4all-chat/src/localdocsmodel.cpp
+++ b/gpt4all-chat/src/localdocsmodel.cpp
--- a/gpt4all-chat/src/localdocsmodel.h
+++ b/gpt4all-chat/src/localdocsmodel.h
@ -20,24 +20,25 @@ class LocalDocsCollectionsModel : public QSortFilterProxyModel
    Q_OBJECT
    Q_PROPERTY(int count READ count NOTIFY countChanged)
    Q_PROPERTY(int updatingCount READ updatingCount NOTIFY updatingCountChanged)
+
 public:
    explicit LocalDocsCollectionsModel(QObject *parent);
+    int count() const { return rowCount(); }
+    int updatingCount() const;

 public Q_SLOTS:
-    int count() const { return rowCount(); }
    void setCollections(const QList<QString> &collections);
-    int updatingCount() const;

 Q_SIGNALS:
    void countChanged();
    void updatingCountChanged();

-private Q_SLOT:
-    void maybeTriggerUpdatingCountChanged();
-
 protected:
    bool filterAcceptsRow(int sourceRow, const QModelIndex &sourceParent) const override;

+private Q_SLOTS:
+    void maybeTriggerUpdatingCountChanged();
+
 private:
    QList<QString> m_collections;
    int m_updatingCount = 0;
--- a/gpt4all-chat/src/logger.cpp
+++ b/gpt4all-chat/src/logger.cpp
--- a/gpt4all-chat/src/logger.h
+++ b/gpt4all-chat/src/logger.h
--- a/gpt4all-chat/src/main.cpp
+++ b/gpt4all-chat/src/main.cpp
@ -8,16 +8,15 @@
 #include "mysettings.h"
 #include "network.h"

-#include "../gpt4all-backend/llmodel.h"
+#include <gpt4all-backend/llmodel.h>
+#include <singleapplication.h>

 #include <QCoreApplication>
-#include <QGuiApplication>
 #include <QObject>
 #include <QQmlApplicationEngine>
-#include <QQmlEngine>
+#include <QQuickWindow>
 #include <QSettings>
 #include <QString>
-#include <QTranslator>
 #include <QUrl>
 #include <Qt>

@ -25,6 +24,29 @@
 #   include <QIcon>
 #endif

+#ifdef Q_OS_WINDOWS
+#   include <windows.h>
+#endif
+
+using namespace Qt::Literals::StringLiterals;
+
+
+static void raiseWindow(QWindow *window)
+{
+#ifdef Q_OS_WINDOWS
+    HWND hwnd = HWND(window->winId());
+
+    // check if window is minimized to Windows task bar
+    if (IsIconic(hwnd))
+        ShowWindow(hwnd, SW_RESTORE);
+
+    SetForegroundWindow(hwnd);
+#else
+    window->show();
+    window->raise();
+    window->requestActivate();
+#endif
+}

 int main(int argc, char *argv[])
 {
@ -36,7 +58,15 @@ int main(int argc, char *argv[])

    Logger::globalInstance();

-    QGuiApplication app(argc, argv);
+    SingleApplication app(argc, argv, true /*allowSecondary*/);
+    if (app.isSecondary()) {
+#ifdef Q_OS_WINDOWS
+        AllowSetForegroundWindow(DWORD(app.primaryPid()));
+#endif
+        app.sendMessage("RAISE_WINDOW");
+        return 0;
+    }
+
 #ifdef Q_OS_LINUX
    app.setWindowIcon(QIcon(":/gpt4all/icons/gpt4all.svg"));
 #endif
@ -77,7 +107,7 @@ int main(int argc, char *argv[])
    qmlRegisterSingletonInstance("localdocs", 1, 0, "LocalDocs", LocalDocs::globalInstance());
    qmlRegisterUncreatableMetaObject(MySettingsEnums::staticMetaObject, "mysettingsenums", 1, 0, "MySettingsEnums", "Error: only enums");

-    const QUrl url(u"qrc:/gpt4all/main.qml"_qs);
+    const QUrl url(u"qrc:/gpt4all/main.qml"_s);

    QObject::connect(&engine, &QQmlApplicationEngine::objectCreated,
        &app, [url](QObject *obj, const QUrl &objUrl) {
@ -86,6 +116,13 @@ int main(int argc, char *argv[])
        }, Qt::QueuedConnection);
    engine.load(url);

+    QObject *rootObject = engine.rootObjects().first();
+    QQuickWindow *windowObject = qobject_cast<QQuickWindow *>(rootObject);
+    Q_ASSERT(windowObject);
+    if (windowObject)
+        QObject::connect(&app, &SingleApplication::receivedMessage,
+                         windowObject, [windowObject] () { raiseWindow(windowObject); } );
+
 #if 0
    QDirIterator it("qrc:", QDirIterator::Subdirectories);
    while (it.hasNext()) {
--- a/gpt4all-chat/src/modellist.cpp
+++ b/gpt4all-chat/src/modellist.cpp
@ -4,7 +4,7 @@
 #include "mysettings.h"
 #include "network.h"

-#include "../gpt4all-backend/llmodel.h"
+#include <gpt4all-backend/llmodel.h>

 #include <QChar>
 #include <QCoreApplication>
@ -1208,132 +1208,139 @@ bool ModelList::modelExists(const QString &modelFilename) const
    return false;
 }

+void ModelList::updateOldRemoteModels(const QString &path)
+{
+    QDirIterator it(path, QDir::Files, QDirIterator::Subdirectories);
+    while (it.hasNext()) {
+        QFileInfo info = it.nextFileInfo();
+        QString filename = it.fileName();
+        if (!filename.startsWith("chatgpt-") || !filename.endsWith(".txt"))
+            continue;
+
+        QString apikey;
+        QString modelname(filename);
+        modelname.chop(4); // strip ".txt" extension
+        modelname.remove(0, 8); // strip "chatgpt-" prefix
+        QFile file(info.filePath());
+        if (!file.open(QIODevice::ReadOnly)) {
+            qWarning().noquote() << tr("cannot open \"%1\": %2").arg(file.fileName(), file.errorString());
+            continue;
+        }
+
+        {
+            QTextStream in(&file);
+            apikey = in.readAll();
+            file.close();
+        }
+
+        QFile newfile(u"%1/gpt4all-%2.rmodel"_s.arg(info.dir().path(), modelname));
+        if (!newfile.open(QIODevice::ReadWrite)) {
+            qWarning().noquote() << tr("cannot create \"%1\": %2").arg(newfile.fileName(), file.errorString());
+            continue;
+        }
+
+        QJsonObject obj {
+            { "apiKey",    apikey    },
+            { "modelName", modelname },
+        };
+
+        QTextStream out(&newfile);
+        out << QJsonDocument(obj).toJson();
+        newfile.close();
+
+        file.remove();
+    }
+}
+
+void ModelList::processModelDirectory(const QString &path)
+{
+    QDirIterator it(path, QDir::Files, QDirIterator::Subdirectories);
+    while (it.hasNext()) {
+        QFileInfo info = it.nextFileInfo();
+
+        QString filename = it.fileName();
+        if (filename.startsWith("incomplete") || FILENAME_BLACKLIST.contains(filename))
+            continue;
+        if (!filename.endsWith(".gguf") && !filename.endsWith(".rmodel"))
+            continue;
+
+        bool isOnline(filename.endsWith(".rmodel"));
+        bool isCompatibleApi(filename.endsWith("-capi.rmodel"));
+
+        QString name;
+        QString description;
+        if (isCompatibleApi) {
+            QJsonObject obj;
+            {
+                QFile file(info.filePath());
+                if (!file.open(QIODeviceBase::ReadOnly)) {
+                    qWarning().noquote() << tr("cannot open \"%1\": %2").arg(file.fileName(), file.errorString());
+                    continue;
+                }
+                QJsonDocument doc = QJsonDocument::fromJson(file.readAll());
+                obj = doc.object();
+            }
+            {
+                QString apiKey(obj["apiKey"].toString());
+                QString baseUrl(obj["baseUrl"].toString());
+                QString modelName(obj["modelName"].toString());
+                apiKey = apiKey.length() < 10 ? "*****" : apiKey.left(5) + "*****";
+                name = tr("%1 (%2)").arg(modelName, baseUrl);
+                description = tr("<strong>OpenAI-Compatible API Model</strong><br>"
+                                 "<ul><li>API Key: %1</li>"
+                                 "<li>Base URL: %2</li>"
+                                 "<li>Model Name: %3</li></ul>")
+                                    .arg(apiKey, baseUrl, modelName);
+            }
+        }
+
+        QVector<QString> modelsById;
+        {
+            QMutexLocker locker(&m_mutex);
+            for (ModelInfo *info : m_models)
+                if (info->filename() == filename)
+                    modelsById.append(info->id());
+        }
+
+        if (modelsById.isEmpty()) {
+            if (!contains(filename))
+                addModel(filename);
+            modelsById.append(filename);
+        }
+
+        for (const QString &id : modelsById) {
+            QVector<QPair<int, QVariant>> data {
+                { InstalledRole, true },
+                { FilenameRole, filename },
+                { OnlineRole, isOnline },
+                { CompatibleApiRole, isCompatibleApi },
+                { DirpathRole, info.dir().absolutePath() + "/" },
+                { FilesizeRole, toFileSize(info.size()) },
+            };
+            if (isCompatibleApi) {
+                // The data will be saved to "GPT4All.ini".
+                data.append({ NameRole, name });
+                // The description is hard-coded into "GPT4All.ini" due to performance issue.
+                // If the description goes to be dynamic from its .rmodel file, it will get high I/O usage while using the ModelList.
+                data.append({ DescriptionRole, description });
+                // Prompt template should be clear while using ChatML format which is using in most of OpenAI-Compatible API server.
+                data.append({ PromptTemplateRole, "%1" });
+            }
+            updateData(id, data);
+        }
+    }
+}
+
 void ModelList::updateModelsFromDirectory()
 {
    const QString exePath = QCoreApplication::applicationDirPath() + QDir::separator();
    const QString localPath = MySettings::globalInstance()->modelPath();

-    auto updateOldRemoteModels = [&](const QString& path) {
-        QDirIterator it(path, QDirIterator::Subdirectories);
-        while (it.hasNext()) {
-            it.next();
-            if (!it.fileInfo().isDir()) {
-                QString filename = it.fileName();
-                if (filename.startsWith("chatgpt-") && filename.endsWith(".txt")) {
-                    QString apikey;
-                    QString modelname(filename);
-                    modelname.chop(4); // strip ".txt" extension
-                    modelname.remove(0, 8); // strip "chatgpt-" prefix
-                    QFile file(path + filename);
-                    if (file.open(QIODevice::ReadWrite)) {
-                        QTextStream in(&file);
-                        apikey = in.readAll();
-                        file.close();
-                    }
-
-                    QJsonObject obj;
-                    obj.insert("apiKey", apikey);
-                    obj.insert("modelName", modelname);
-                    QJsonDocument doc(obj);
-
-                    auto newfilename = u"gpt4all-%1.rmodel"_s.arg(modelname);
-                    QFile newfile(path + newfilename);
-                    if (newfile.open(QIODevice::ReadWrite)) {
-                        QTextStream out(&newfile);
-                        out << doc.toJson();
-                        newfile.close();
-                    }
-                    file.remove();
-                }
-            }
-        }
-    };
-
-    auto processDirectory = [&](const QString& path) {
-        QDirIterator it(path, QDir::Files, QDirIterator::Subdirectories);
-        while (it.hasNext()) {
-            it.next();
-
-            QString filename = it.fileName();
-            if (filename.startsWith("incomplete") || FILENAME_BLACKLIST.contains(filename))
-                continue;
-            if (!filename.endsWith(".gguf") && !filename.endsWith(".rmodel"))
-                continue;
-
-            QVector<QString> modelsById;
-            {
-                QMutexLocker locker(&m_mutex);
-                for (ModelInfo *info : m_models)
-                    if (info->filename() == filename)
-                        modelsById.append(info->id());
-            }
-
-            if (modelsById.isEmpty()) {
-                if (!contains(filename))
-                    addModel(filename);
-                modelsById.append(filename);
-            }
-
-            QFileInfo info = it.fileInfo();
-
-            bool isOnline(filename.endsWith(".rmodel"));
-            bool isCompatibleApi(filename.endsWith("-capi.rmodel"));
-
-            QString name;
-            QString description;
-            if (isCompatibleApi) {
-                QJsonObject obj;
-                {
-                    QFile file(path + filename);
-                    bool success = file.open(QIODeviceBase::ReadOnly);
-                    (void)success;
-                    Q_ASSERT(success);
-                    QJsonDocument doc = QJsonDocument::fromJson(file.readAll());
-                    obj = doc.object();
-                }
-                {
-                    QString apiKey(obj["apiKey"].toString());
-                    QString baseUrl(obj["baseUrl"].toString());
-                    QString modelName(obj["modelName"].toString());
-                    apiKey = apiKey.length() < 10 ? "*****" : apiKey.left(5) + "*****";
-                    name = tr("%1 (%2)").arg(modelName, baseUrl);
-                    description = tr("<strong>OpenAI-Compatible API Model</strong><br>"
-                                     "<ul><li>API Key: %1</li>"
-                                     "<li>Base URL: %2</li>"
-                                     "<li>Model Name: %3</li></ul>")
-                                        .arg(apiKey, baseUrl, modelName);
-                }
-            }
-
-            for (const QString &id : modelsById) {
-                QVector<QPair<int, QVariant>> data {
-                    { InstalledRole, true },
-                    { FilenameRole, filename },
-                    { OnlineRole, isOnline },
-                    { CompatibleApiRole, isCompatibleApi },
-                    { DirpathRole, info.dir().absolutePath() + "/" },
-                    { FilesizeRole, toFileSize(info.size()) },
-                };
-                if (isCompatibleApi) {
-                    // The data will be saved to "GPT4All.ini".
-                    data.append({ NameRole, name });
-                    // The description is hard-coded into "GPT4All.ini" due to performance issue.
-                    // If the description goes to be dynamic from its .rmodel file, it will get high I/O usage while using the ModelList.
-                    data.append({ DescriptionRole, description });
-                    // Prompt template should be clear while using ChatML format which is using in most of OpenAI-Compatible API server.
-                    data.append({ PromptTemplateRole, "%1" });
-                }
-                updateData(id, data);
-            }
-        }
-    };
-
-
    updateOldRemoteModels(exePath);
-    processDirectory(exePath);
+    processModelDirectory(exePath);
    if (localPath != exePath) {
        updateOldRemoteModels(localPath);
-        processDirectory(localPath);
+        processModelDirectory(localPath);
    }
 }

--- a/gpt4all-chat/src/modellist.h
+++ b/gpt4all-chat/src/modellist.h
@ -18,10 +18,12 @@
 #include <QVector>
 #include <Qt>
 #include <QtGlobal>
-#include <QtQml>
+
+#include <utility>

 using namespace Qt::Literals::StringLiterals;

+
 struct ModelInfo {
    Q_GADGET
    Q_PROPERTY(QString id READ id WRITE setId)
@ -502,6 +504,8 @@ private:
    void parseModelsJsonFile(const QByteArray &jsonData, bool save);
    void parseDiscoveryJsonFile(const QByteArray &jsonData);
    QString uniqueModelName(const ModelInfo &model) const;
+    void updateOldRemoteModels(const QString &path);
+    void processModelDirectory(const QString &path);

 private:
    mutable QMutex m_mutex;
@ -521,7 +525,7 @@ private:

 protected:
    explicit ModelList();
-    ~ModelList() { for (auto *model: m_models) { delete model; } }
+    ~ModelList() override { for (auto *model: std::as_const(m_models)) { delete model; } }
    friend class MyModelList;
 };

--- a/gpt4all-chat/src/mysettings.cpp
+++ b/gpt4all-chat/src/mysettings.cpp
@ -1,6 +1,6 @@
 #include "mysettings.h"

-#include "../gpt4all-backend/llmodel.h"
+#include <gpt4all-backend/llmodel.h>

 #include <QDebug>
 #include <QDir>
--- a/gpt4all-chat/src/mysettings.h
+++ b/gpt4all-chat/src/mysettings.h
@ -8,6 +8,7 @@
 #include <QSettings>
 #include <QString>
 #include <QStringList>
+#include <QTranslator>
 #include <QVector>

 #include <cstdint>
--- a/gpt4all-chat/src/network.cpp
+++ b/gpt4all-chat/src/network.cpp
@ -9,7 +9,7 @@
 #include "modellist.h"
 #include "mysettings.h"

-#include "../gpt4all-backend/llmodel.h"
+#include <gpt4all-backend/llmodel.h>

 #include <QCoreApplication>
 #include <QDateTime>
@ -19,6 +19,7 @@
 #include <QJsonArray>
 #include <QJsonDocument>
 #include <QJsonObject>
+#include <QLibraryInfo>
 #include <QNetworkRequest>
 #include <QScreen>
 #include <QSettings>
@ -36,23 +37,52 @@
 #include <cstring>
 #include <utility>

+#ifdef __GLIBC__
+#   include <gnu/libc-version.h>
+#endif
+
 using namespace Qt::Literals::StringLiterals;

 //#define DEBUG

+#define STR_(x) #x
+#define STR(x) STR_(x)
+
 static const char MIXPANEL_TOKEN[] = "ce362e568ddaee16ed243eaffb5860a2";

+#ifdef __clang__
+#ifdef __apple_build_version__
+static const char COMPILER_NAME[] = "Apple Clang";
+#else
+static const char COMPILER_NAME[] = "LLVM Clang";
+#endif
+static const char COMPILER_VER[]  = STR(__clang_major__) "." STR(__clang_minor__) "." STR(__clang_patchlevel__);
+#elif defined(_MSC_VER)
+static const char COMPILER_NAME[] = "MSVC";
+static const char COMPILER_VER[]  = STR(_MSC_VER) " (" STR(_MSC_FULL_VER) ")";
+#elif defined(__GNUC__)
+static const char COMPILER_NAME[] = "GCC";
+static const char COMPILER_VER[]  = STR(__GNUC__) "." STR(__GNUC_MINOR__) "." STR(__GNUC_PATCHLEVEL__);
+#endif
+
+
 #if defined(Q_OS_MAC)

 #include <sys/sysctl.h>
-static QString getCPUModel()
+static std::optional<QString> getSysctl(const char *name)
 {
-    char buffer[256];
+    char buffer[256] = "";
    size_t bufferlen = sizeof(buffer);
-    sysctlbyname("machdep.cpu.brand_string", &buffer, &bufferlen, NULL, 0);
-    return buffer;
+    if (sysctlbyname(name, &buffer, &bufferlen, NULL, 0) < 0) {
+        int err = errno;
+        qWarning().nospace() << "sysctlbyname(\"" << name << "\") failed: " << strerror(err);
+        return std::nullopt;
+    }
+    return std::make_optional<QString>(buffer);
 }

+static QString getCPUModel() { return getSysctl("machdep.cpu.brand_string").value_or(u"(unknown)"_s); }
+
 #elif defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86)

 #ifndef _MSC_VER
@ -286,12 +316,36 @@ void Network::sendStartup()

    const auto *display = QGuiApplication::primaryScreen();
    trackEvent("startup", {
-        {"$screen_dpi", std::round(display->physicalDotsPerInch())},
-        {"display", u"%1x%2"_s.arg(display->size().width()).arg(display->size().height())},
-        {"ram", LLM::globalInstance()->systemTotalRAMInGB()},
-        {"cpu", getCPUModel()},
-        {"cpu_supports_avx2", LLModel::Implementation::cpuSupportsAVX2()},
-        {"datalake_active", mySettings->networkIsActive()},
+        // Build info
+        { "build_compiler",     COMPILER_NAME                                                         },
+        { "build_compiler_ver", COMPILER_VER                                                          },
+        { "build_abi",          QSysInfo::buildAbi()                                                  },
+        { "build_cpu_arch",     QSysInfo::buildCpuArchitecture()                                      },
+#ifdef __GLIBC__
+        { "build_glibc_ver",    QStringLiteral(STR(__GLIBC__) "." STR(__GLIBC_MINOR__))               },
+#endif
+        { "qt_version",         QLibraryInfo::version().toString()                                    },
+        { "qt_debug" ,          QLibraryInfo::isDebugBuild()                                          },
+        { "qt_shared",          QLibraryInfo::isSharedBuild()                                         },
+        // System info
+        { "runtime_cpu_arch",   QSysInfo::currentCpuArchitecture()                                    },
+#ifdef __GLIBC__
+        { "runtime_glibc_ver",  gnu_get_libc_version()                                                },
+#endif
+        { "sys_kernel_type",    QSysInfo::kernelType()                                                },
+        { "sys_kernel_ver",     QSysInfo::kernelVersion()                                             },
+        { "sys_product_type",   QSysInfo::productType()                                               },
+        { "sys_product_ver",    QSysInfo::productVersion()                                            },
+#ifdef Q_OS_MAC
+        { "sys_hw_model",       getSysctl("hw.model").value_or(u"(unknown)"_s)                        },
+#endif
+        { "$screen_dpi",        std::round(display->physicalDotsPerInch())                            },
+        { "display",            u"%1x%2"_s.arg(display->size().width()).arg(display->size().height()) },
+        { "ram",                LLM::globalInstance()->systemTotalRAMInGB()                           },
+        { "cpu",                getCPUModel()                                                         },
+        { "cpu_supports_avx2",  LLModel::Implementation::cpuSupportsAVX2()                            },
+        // Datalake status
+        { "datalake_active",    mySettings->networkIsActive()                                         },
    });
    sendIpify();

@ -321,7 +375,6 @@ void Network::trackEvent(const QString &ev, const QVariantMap &props)
    if (!m_sendUsageStats)
        return;

-    Q_ASSERT(ChatListModel::globalInstance()->currentChat());
    QJsonObject properties;

    properties.insert("token", MIXPANEL_TOKEN);
--- a/gpt4all-chat/src/network.h
+++ b/gpt4all-chat/src/network.h
--- a/gpt4all-chat/src/server.cpp
+++ b/gpt4all-chat/src/server.cpp
@ -0,0 +1,883 @@
+#include "server.h"
+
+#include "chat.h"
+#include "modellist.h"
+#include "mysettings.h"
+
+#include <fmt/base.h>
+#include <fmt/format.h>
+
+#include <QByteArray>
+#include <QCborArray>
+#include <QCborMap>
+#include <QCborValue>
+#include <QDateTime>
+#include <QDebug>
+#include <QHostAddress>
+#include <QHttpServer>
+#include <QHttpServerResponder>
+#include <QJsonArray>
+#include <QJsonDocument>
+#include <QJsonObject>
+#include <QJsonValue>
+#include <QLatin1StringView>
+#include <QPair>
+#include <QVariant>
+#include <Qt>
+#include <QtCborCommon>
+#include <QtLogging>
+
+#include <algorithm>
+#include <cstdint>
+#include <iostream>
+#include <optional>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+
+namespace ranges = std::ranges;
+using namespace std::string_literals;
+using namespace Qt::Literals::StringLiterals;
+
+//#define DEBUG
+
+
+#define MAKE_FORMATTER(type, conversion)                                      \
+    template <>                                                               \
+    struct fmt::formatter<type, char>: fmt::formatter<std::string, char> {    \
+        template <typename FmtContext>                                        \
+        FmtContext::iterator format(const type &value, FmtContext &ctx) const \
+        {                                                                     \
+            return formatter<std::string, char>::format(conversion, ctx);     \
+        }                                                                     \
+    }
+
+MAKE_FORMATTER(QString,  value.toStdString()           );
+MAKE_FORMATTER(QVariant, value.toString().toStdString());
+
+namespace {
+
+class InvalidRequestError: public std::invalid_argument {
+    using std::invalid_argument::invalid_argument;
+
+public:
+    QHttpServerResponse asResponse() const
+    {
+        QJsonObject error {
+            { "message", what(),                     },
+            { "type",    u"invalid_request_error"_s, },
+            { "param",   QJsonValue::Null            },
+            { "code",    QJsonValue::Null            },
+        };
+        return { QJsonObject {{ "error", error }},
+                 QHttpServerResponder::StatusCode::BadRequest };
+    }
+
+private:
+    Q_DISABLE_COPY_MOVE(InvalidRequestError)
+};
+
+} // namespace
+
+static inline QJsonObject modelToJson(const ModelInfo &info)
+{
+    QJsonObject model;
+    model.insert("id", info.name());
+    model.insert("object", "model");
+    model.insert("created", 0);
+    model.insert("owned_by", "humanity");
+    model.insert("root", info.name());
+    model.insert("parent", QJsonValue::Null);
+
+    QJsonArray permissions;
+    QJsonObject permissionObj;
+    permissionObj.insert("id", "placeholder");
+    permissionObj.insert("object", "model_permission");
+    permissionObj.insert("created", 0);
+    permissionObj.insert("allow_create_engine", false);
+    permissionObj.insert("allow_sampling", false);
+    permissionObj.insert("allow_logprobs", false);
+    permissionObj.insert("allow_search_indices", false);
+    permissionObj.insert("allow_view", true);
+    permissionObj.insert("allow_fine_tuning", false);
+    permissionObj.insert("organization", "*");
+    permissionObj.insert("group", QJsonValue::Null);
+    permissionObj.insert("is_blocking", false);
+    permissions.append(permissionObj);
+    model.insert("permissions", permissions);
+    return model;
+}
+
+static inline QJsonObject resultToJson(const ResultInfo &info)
+{
+    QJsonObject result;
+    result.insert("file", info.file);
+    result.insert("title", info.title);
+    result.insert("author", info.author);
+    result.insert("date", info.date);
+    result.insert("text", info.text);
+    result.insert("page", info.page);
+    result.insert("from", info.from);
+    result.insert("to", info.to);
+    return result;
+}
+
+class BaseCompletionRequest {
+public:
+    QString model; // required
+    // NB: some parameters are not supported yet
+    int32_t max_tokens = 16;
+    qint64 n = 1;
+    float temperature = 1.f;
+    float top_p = 1.f;
+    float min_p = 0.f;
+
+    BaseCompletionRequest() = default;
+    virtual ~BaseCompletionRequest() = default;
+
+    virtual BaseCompletionRequest &parse(QCborMap request)
+    {
+        parseImpl(request);
+        if (!request.isEmpty())
+            throw InvalidRequestError(fmt::format(
+                "Unrecognized request argument supplied: {}", request.keys().constFirst().toString()
+            ));
+        return *this;
+    }
+
+protected:
+    virtual void parseImpl(QCborMap &request)
+    {
+        using enum Type;
+
+        auto reqValue = [&request](auto &&...args) { return takeValue(request, args...); };
+        QCborValue value;
+
+        this->model = reqValue("model", String, /*required*/ true).toString();
+
+        value = reqValue("frequency_penalty", Number, false, /*min*/ -2, /*max*/ 2);
+        if (value.isDouble() || value.toInteger() != 0)
+            throw InvalidRequestError("'frequency_penalty' is not supported");
+
+        value = reqValue("max_tokens", Integer, false, /*min*/ 1);
+        if (!value.isNull())
+            this->max_tokens = int32_t(qMin(value.toInteger(), INT32_MAX));
+
+        value = reqValue("n", Integer, false, /*min*/ 1);
+        if (!value.isNull())
+            this->n = value.toInteger();
+
+        value = reqValue("presence_penalty", Number);
+        if (value.isDouble() || value.toInteger() != 0)
+            throw InvalidRequestError("'presence_penalty' is not supported");
+
+        value = reqValue("seed", Integer);
+        if (!value.isNull())
+            throw InvalidRequestError("'seed' is not supported");
+
+        value = reqValue("stop");
+        if (!value.isNull())
+            throw InvalidRequestError("'stop' is not supported");
+
+        value = reqValue("stream", Boolean);
+        if (value.isTrue())
+            throw InvalidRequestError("'stream' is not supported");
+
+        value = reqValue("stream_options", Object);
+        if (!value.isNull())
+            throw InvalidRequestError("'stream_options' is not supported");
+
+        value = reqValue("temperature", Number, false, /*min*/ 0, /*max*/ 2);
+        if (!value.isNull())
+            this->temperature = float(value.toDouble());
+
+        value = reqValue("top_p", Number, /*min*/ 0, /*max*/ 1);
+        if (!value.isNull())
+            this->top_p = float(value.toDouble());
+
+        value = reqValue("min_p", Number, /*min*/ 0, /*max*/ 1);
+        if (!value.isNull())
+            this->min_p = float(value.toDouble());
+
+        reqValue("user", String); // validate but don't use
+    }
+
+    enum class Type : uint8_t {
+        Boolean,
+        Integer,
+        Number,
+        String,
+        Array,
+        Object,
+    };
+
+    static const std::unordered_map<Type, const char *> s_typeNames;
+
+    static bool typeMatches(const QCborValue &value, Type type) noexcept {
+        using enum Type;
+        switch (type) {
+            case Boolean: return value.isBool();
+            case Integer: return value.isInteger();
+            case Number:  return value.isInteger() || value.isDouble();
+            case String:  return value.isString();
+            case Array:   return value.isArray();
+            case Object:  return value.isMap();
+        }
+        Q_UNREACHABLE();
+    }
+
+    static QCborValue takeValue(
+        QCborMap &obj, const char *key, std::optional<Type> type = {}, bool required = false,
+        std::optional<qint64> min = {}, std::optional<qint64> max = {}
+    ) {
+        auto value = obj.take(QLatin1StringView(key));
+        if (value.isUndefined())
+            value = QCborValue(QCborSimpleType::Null);
+        if (required && value.isNull())
+            throw InvalidRequestError(fmt::format("you must provide a {} parameter", key));
+        if (type && !value.isNull() && !typeMatches(value, *type))
+            throw InvalidRequestError(fmt::format("'{}' is not of type '{}' - '{}'",
+                                                  value.toVariant(), s_typeNames.at(*type), key));
+        if (!value.isNull()) {
+            double num = value.toDouble();
+            if (min && num < double(*min))
+                throw InvalidRequestError(fmt::format("{} is less than the minimum of {} - '{}'", num, *min, key));
+            if (max && num > double(*max))
+                throw InvalidRequestError(fmt::format("{} is greater than the maximum of {} - '{}'", num, *max, key));
+        }
+        return value;
+    }
+
+private:
+    Q_DISABLE_COPY_MOVE(BaseCompletionRequest)
+};
+
+class CompletionRequest : public BaseCompletionRequest {
+public:
+    QString prompt; // required
+    // some parameters are not supported yet - these ones are
+    bool echo = false;
+
+    CompletionRequest &parse(QCborMap request) override
+    {
+        BaseCompletionRequest::parse(std::move(request));
+        return *this;
+    }
+
+protected:
+    void parseImpl(QCborMap &request) override
+    {
+        using enum Type;
+
+        auto reqValue = [&request](auto &&...args) { return takeValue(request, args...); };
+        QCborValue value;
+
+        BaseCompletionRequest::parseImpl(request);
+
+        this->prompt = reqValue("prompt", String, /*required*/ true).toString();
+
+        value = reqValue("best_of", Integer);
+        {
+            qint64 bof = value.toInteger(1);
+            if (this->n > bof)
+                throw InvalidRequestError(fmt::format(
+                    "You requested that the server return more choices than it will generate (HINT: you must set 'n' "
+                    "(currently {}) to be at most 'best_of' (currently {}), or omit either parameter if you don't "
+                    "specifically want to use them.)",
+                    this->n, bof
+                ));
+            if (bof > this->n)
+                throw InvalidRequestError("'best_of' is not supported");
+        }
+
+        value = reqValue("echo", Boolean);
+        if (value.isBool())
+            this->echo = value.toBool();
+
+        // we don't bother deeply typechecking unsupported subobjects for now
+        value = reqValue("logit_bias", Object);
+        if (!value.isNull())
+            throw InvalidRequestError("'logit_bias' is not supported");
+
+        value = reqValue("logprobs", Integer, false, /*min*/ 0);
+        if (!value.isNull())
+            throw InvalidRequestError("'logprobs' is not supported");
+
+        value = reqValue("suffix", String);
+        if (!value.isNull() && !value.toString().isEmpty())
+            throw InvalidRequestError("'suffix' is not supported");
+    }
+};
+
+const std::unordered_map<BaseCompletionRequest::Type, const char *> BaseCompletionRequest::s_typeNames = {
+    { BaseCompletionRequest::Type::Boolean, "boolean" },
+    { BaseCompletionRequest::Type::Integer, "integer" },
+    { BaseCompletionRequest::Type::Number,  "number"  },
+    { BaseCompletionRequest::Type::String,  "string"  },
+    { BaseCompletionRequest::Type::Array,   "array"   },
+    { BaseCompletionRequest::Type::Object,  "object"  },
+};
+
+class ChatRequest : public BaseCompletionRequest {
+public:
+    struct Message {
+        enum class Role : uint8_t {
+            User,
+            Assistant,
+        };
+        Role role;
+        QString content;
+    };
+
+    QList<Message> messages; // required
+
+    ChatRequest &parse(QCborMap request) override
+    {
+        BaseCompletionRequest::parse(std::move(request));
+        return *this;
+    }
+
+protected:
+    void parseImpl(QCborMap &request) override
+    {
+        using enum Type;
+
+        auto reqValue = [&request](auto &&...args) { return takeValue(request, args...); };
+        QCborValue value;
+
+        BaseCompletionRequest::parseImpl(request);
+
+        value = reqValue("messages", std::nullopt, /*required*/ true);
+        if (!value.isArray() || value.toArray().isEmpty())
+            throw InvalidRequestError(fmt::format(
+                "Invalid type for 'messages': expected a non-empty array of objects, but got '{}' instead.",
+                value.toVariant()
+            ));
+
+        this->messages.clear();
+        {
+            QCborArray arr = value.toArray();
+            Message::Role nextRole = Message::Role::User;
+            for (qsizetype i = 0; i < arr.size(); i++) {
+                const auto &elem = arr[i];
+                if (!elem.isMap())
+                    throw InvalidRequestError(fmt::format(
+                        "Invalid type for 'messages[{}]': expected an object, but got '{}' instead.",
+                        i, elem.toVariant()
+                    ));
+                QCborMap msg = elem.toMap();
+                Message res;
+                QString role = takeValue(msg, "role", String, /*required*/ true).toString();
+                if (role == u"system"_s)
+                    continue; // FIXME(jared): don't ignore these
+                if (role == u"user"_s) {
+                    res.role = Message::Role::User;
+                } else if (role == u"assistant"_s) {
+                    res.role = Message::Role::Assistant;
+                } else {
+                    throw InvalidRequestError(fmt::format(
+                        "Invalid 'messages[{}].role': expected one of 'system', 'assistant', or 'user', but got '{}'"
+                        " instead.",
+                        i, role.toStdString()
+                    ));
+                }
+                res.content = takeValue(msg, "content", String, /*required*/ true).toString();
+                if (res.role != nextRole)
+                    throw InvalidRequestError(fmt::format(
+                        "Invalid 'messages[{}].role': did not expect '{}' here", i, role
+                    ));
+                this->messages.append(res);
+                nextRole = res.role == Message::Role::User ? Message::Role::Assistant
+                                                           : Message::Role::User;
+
+                if (!msg.isEmpty())
+                    throw InvalidRequestError(fmt::format(
+                        "Invalid 'messages[{}]': unrecognized key: '{}'", i, msg.keys().constFirst().toString()
+                    ));
+            }
+        }
+
+        // we don't bother deeply typechecking unsupported subobjects for now
+        value = reqValue("logit_bias", Object);
+        if (!value.isNull())
+            throw InvalidRequestError("'logit_bias' is not supported");
+
+        value = reqValue("logprobs", Boolean);
+        if (value.isTrue())
+            throw InvalidRequestError("'logprobs' is not supported");
+
+        value = reqValue("top_logprobs", Integer, false, /*min*/ 0);
+        if (!value.isNull())
+            throw InvalidRequestError("The 'top_logprobs' parameter is only allowed when 'logprobs' is enabled.");
+
+        value = reqValue("response_format", Object);
+        if (!value.isNull())
+            throw InvalidRequestError("'response_format' is not supported");
+
+        reqValue("service_tier", String); // validate but don't use
+
+        value = reqValue("tools", Array);
+        if (!value.isNull())
+            throw InvalidRequestError("'tools' is not supported");
+
+        value = reqValue("tool_choice");
+        if (!value.isNull())
+            throw InvalidRequestError("'tool_choice' is not supported");
+
+        // validate but don't use
+        reqValue("parallel_tool_calls", Boolean);
+
+        value = reqValue("function_call");
+        if (!value.isNull())
+            throw InvalidRequestError("'function_call' is not supported");
+
+        value = reqValue("functions", Array);
+        if (!value.isNull())
+            throw InvalidRequestError("'functions' is not supported");
+    }
+};
+
+template <typename T>
+T &parseRequest(T &request, QJsonObject &&obj)
+{
+    // lossless conversion to CBOR exposes more type information
+    return request.parse(QCborMap::fromJsonObject(obj));
+}
+
+Server::Server(Chat *chat)
+    : ChatLLM(chat, true /*isServer*/)
+    , m_chat(chat)
+    , m_server(nullptr)
+{
+    connect(this, &Server::threadStarted, this, &Server::start);
+    connect(this, &Server::databaseResultsChanged, this, &Server::handleDatabaseResultsChanged);
+    connect(chat, &Chat::collectionListChanged, this, &Server::handleCollectionListChanged, Qt::QueuedConnection);
+}
+
+static QJsonObject requestFromJson(const QByteArray &request)
+{
+    QJsonParseError err;
+    const QJsonDocument document = QJsonDocument::fromJson(request, &err);
+    if (err.error || !document.isObject())
+        throw InvalidRequestError(fmt::format(
+            "error parsing request JSON: {}",
+            err.error ? err.errorString().toStdString() : "not an object"s
+        ));
+    return document.object();
+}
+
+void Server::start()
+{
+    m_server = std::make_unique<QHttpServer>(this);
+    if (!m_server->listen(QHostAddress::LocalHost, MySettings::globalInstance()->networkPort())) {
+        qWarning() << "ERROR: Unable to start the server";
+        return;
+    }
+
+    m_server->route("/v1/models", QHttpServerRequest::Method::Get,
+        [](const QHttpServerRequest &) {
+            if (!MySettings::globalInstance()->serverChat())
+                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
+
+            const QList<ModelInfo> modelList = ModelList::globalInstance()->selectableModelList();
+            QJsonObject root;
+            root.insert("object", "list");
+            QJsonArray data;
+            for (const ModelInfo &info : modelList) {
+                Q_ASSERT(info.installed);
+                if (!info.installed)
+                    continue;
+                data.append(modelToJson(info));
+            }
+            root.insert("data", data);
+            return QHttpServerResponse(root);
+        }
+    );
+
+    m_server->route("/v1/models/<arg>", QHttpServerRequest::Method::Get,
+        [](const QString &model, const QHttpServerRequest &) {
+            if (!MySettings::globalInstance()->serverChat())
+                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
+
+            const QList<ModelInfo> modelList = ModelList::globalInstance()->selectableModelList();
+            QJsonObject object;
+            for (const ModelInfo &info : modelList) {
+                Q_ASSERT(info.installed);
+                if (!info.installed)
+                    continue;
+
+                if (model == info.name()) {
+                    object = modelToJson(info);
+                    break;
+                }
+            }
+            return QHttpServerResponse(object);
+        }
+    );
+
+    m_server->route("/v1/completions", QHttpServerRequest::Method::Post,
+        [this](const QHttpServerRequest &request) {
+            if (!MySettings::globalInstance()->serverChat())
+                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
+
+            try {
+                auto reqObj = requestFromJson(request.body());
+#if defined(DEBUG)
+                qDebug().noquote() << "/v1/completions request" << QJsonDocument(reqObj).toJson(QJsonDocument::Indented);
+#endif
+                CompletionRequest req;
+                parseRequest(req, std::move(reqObj));
+                auto [resp, respObj] = handleCompletionRequest(req);
+#if defined(DEBUG)
+                if (respObj)
+                    qDebug().noquote() << "/v1/completions reply" << QJsonDocument(*respObj).toJson(QJsonDocument::Indented);
+#endif
+                return std::move(resp);
+            } catch (const InvalidRequestError &e) {
+                return e.asResponse();
+            }
+        }
+    );
+
+    m_server->route("/v1/chat/completions", QHttpServerRequest::Method::Post,
+        [this](const QHttpServerRequest &request) {
+            if (!MySettings::globalInstance()->serverChat())
+                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
+
+            try {
+                auto reqObj = requestFromJson(request.body());
+#if defined(DEBUG)
+                qDebug().noquote() << "/v1/chat/completions request" << QJsonDocument(reqObj).toJson(QJsonDocument::Indented);
+#endif
+                ChatRequest req;
+                parseRequest(req, std::move(reqObj));
+                auto [resp, respObj] = handleChatRequest(req);
+                (void)respObj;
+#if defined(DEBUG)
+                if (respObj)
+                    qDebug().noquote() << "/v1/chat/completions reply" << QJsonDocument(*respObj).toJson(QJsonDocument::Indented);
+#endif
+                return std::move(resp);
+            } catch (const InvalidRequestError &e) {
+                return e.asResponse();
+            }
+        }
+    );
+
+    // Respond with code 405 to wrong HTTP methods:
+    m_server->route("/v1/models",  QHttpServerRequest::Method::Post,
+        [] {
+            if (!MySettings::globalInstance()->serverChat())
+                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
+            return QHttpServerResponse(
+                QJsonDocument::fromJson("{\"error\": {\"message\": \"Not allowed to POST on /v1/models."
+                    " (HINT: Perhaps you meant to use a different HTTP method?)\","
+                    " \"type\": \"invalid_request_error\", \"param\": null, \"code\": null}}").object(),
+                QHttpServerResponder::StatusCode::MethodNotAllowed);
+        }
+    );
+
+    m_server->route("/v1/models/<arg>", QHttpServerRequest::Method::Post,
+        [](const QString &model) {
+            (void)model;
+            if (!MySettings::globalInstance()->serverChat())
+                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
+            return QHttpServerResponse(
+                QJsonDocument::fromJson("{\"error\": {\"message\": \"Not allowed to POST on /v1/models/*."
+                    " (HINT: Perhaps you meant to use a different HTTP method?)\","
+                    " \"type\": \"invalid_request_error\", \"param\": null, \"code\": null}}").object(),
+                QHttpServerResponder::StatusCode::MethodNotAllowed);
+        }
+    );
+
+    m_server->route("/v1/completions", QHttpServerRequest::Method::Get,
+        [] {
+            if (!MySettings::globalInstance()->serverChat())
+                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
+            return QHttpServerResponse(
+                QJsonDocument::fromJson("{\"error\": {\"message\": \"Only POST requests are accepted.\","
+                    " \"type\": \"invalid_request_error\", \"param\": null, \"code\": \"method_not_supported\"}}").object(),
+                QHttpServerResponder::StatusCode::MethodNotAllowed);
+        }
+    );
+
+    m_server->route("/v1/chat/completions", QHttpServerRequest::Method::Get,
+        [] {
+            if (!MySettings::globalInstance()->serverChat())
+                return QHttpServerResponse(QHttpServerResponder::StatusCode::Unauthorized);
+            return QHttpServerResponse(
+                QJsonDocument::fromJson("{\"error\": {\"message\": \"Only POST requests are accepted.\","
+                    " \"type\": \"invalid_request_error\", \"param\": null, \"code\": \"method_not_supported\"}}").object(),
+                QHttpServerResponder::StatusCode::MethodNotAllowed);
+        }
+    );
+
+    m_server->afterRequest([] (QHttpServerResponse &&resp) {
+        resp.addHeader("Access-Control-Allow-Origin", "*");
+        return std::move(resp);
+    });
+
+    connect(this, &Server::requestServerNewPromptResponsePair, m_chat,
+        &Chat::serverNewPromptResponsePair, Qt::BlockingQueuedConnection);
+}
+
+static auto makeError(auto &&...args) -> std::pair<QHttpServerResponse, std::optional<QJsonObject>>
+{
+    return {QHttpServerResponse(args...), std::nullopt};
+}
+
+auto Server::handleCompletionRequest(const CompletionRequest &request)
+    -> std::pair<QHttpServerResponse, std::optional<QJsonObject>>
+{
+    ModelInfo modelInfo = ModelList::globalInstance()->defaultModelInfo();
+    const QList<ModelInfo> modelList = ModelList::globalInstance()->selectableModelList();
+    for (const ModelInfo &info : modelList) {
+        Q_ASSERT(info.installed);
+        if (!info.installed)
+            continue;
+        if (request.model == info.name() || request.model == info.filename()) {
+            modelInfo = info;
+            break;
+        }
+    }
+
+    // adds prompt/response items to GUI
+    emit requestServerNewPromptResponsePair(request.prompt); // blocks
+    resetResponse();
+
+    // load the new model if necessary
+    setShouldBeLoaded(true);
+
+    if (modelInfo.filename().isEmpty()) {
+        std::cerr << "ERROR: couldn't load default model " << request.model.toStdString() << std::endl;
+        return makeError(QHttpServerResponder::StatusCode::InternalServerError);
+    }
+
+    // NB: this resets the context, regardless of whether this model is already loaded
+    if (!loadModel(modelInfo)) {
+        std::cerr << "ERROR: couldn't load model " << modelInfo.name().toStdString() << std::endl;
+        return makeError(QHttpServerResponder::StatusCode::InternalServerError);
+    }
+
+    // FIXME(jared): taking parameters from the UI inhibits reproducibility of results
+    const int  top_k          = modelInfo.topK();
+    const int  n_batch        = modelInfo.promptBatchSize();
+    const auto repeat_penalty = float(modelInfo.repeatPenalty());
+    const int  repeat_last_n  = modelInfo.repeatPenaltyTokens();
+
+    int promptTokens = 0;
+    int responseTokens = 0;
+    QList<QPair<QString, QList<ResultInfo>>> responses;
+    for (int i = 0; i < request.n; ++i) {
+        if (!promptInternal(
+            m_collections,
+            request.prompt,
+            /*promptTemplate*/ u"%1"_s,
+            request.max_tokens,
+            top_k,
+            request.top_p,
+            request.min_p,
+            request.temperature,
+            n_batch,
+            repeat_penalty,
+            repeat_last_n)) {
+
+            std::cerr << "ERROR: couldn't prompt model " << modelInfo.name().toStdString() << std::endl;
+            return makeError(QHttpServerResponder::StatusCode::InternalServerError);
+        }
+        QString resp = response(/*trim*/ false);
+        if (request.echo)
+            resp = request.prompt + resp;
+        responses.append({resp, m_databaseResults});
+        if (!promptTokens)
+            promptTokens = m_promptTokens;
+        responseTokens += m_promptResponseTokens - m_promptTokens;
+        if (i < request.n - 1)
+            resetResponse();
+    }
+
+    QJsonObject responseObject {
+        { "id",      "placeholder"                      },
+        { "object",  "text_completion"                  },
+        { "created", QDateTime::currentSecsSinceEpoch() },
+        { "model",   modelInfo.name()                   },
+    };
+
+    QJsonArray choices;
+    {
+        int index = 0;
+        for (const auto &r : responses) {
+            QString result = r.first;
+            QList<ResultInfo> infos = r.second;
+            QJsonObject choice {
+                { "text",          result                                                   },
+                { "index",         index++                                                  },
+                { "logprobs",      QJsonValue::Null                                         },
+                { "finish_reason", responseTokens == request.max_tokens ? "length" : "stop" },
+            };
+            if (MySettings::globalInstance()->localDocsShowReferences()) {
+                QJsonArray references;
+                for (const auto &ref : infos)
+                    references.append(resultToJson(ref));
+                choice.insert("references", references.isEmpty() ? QJsonValue::Null : QJsonValue(references));
+            }
+            choices.append(choice);
+        }
+    }
+
+    responseObject.insert("choices", choices);
+    responseObject.insert("usage", QJsonObject {
+        { "prompt_tokens",     promptTokens                  },
+        { "completion_tokens", responseTokens                },
+        { "total_tokens",      promptTokens + responseTokens },
+    });
+
+    return {QHttpServerResponse(responseObject), responseObject};
+}
+
+auto Server::handleChatRequest(const ChatRequest &request)
+    -> std::pair<QHttpServerResponse, std::optional<QJsonObject>>
+{
+    ModelInfo modelInfo = ModelList::globalInstance()->defaultModelInfo();
+    const QList<ModelInfo> modelList = ModelList::globalInstance()->selectableModelList();
+    for (const ModelInfo &info : modelList) {
+        Q_ASSERT(info.installed);
+        if (!info.installed)
+            continue;
+        if (request.model == info.name() || request.model == info.filename()) {
+            modelInfo = info;
+            break;
+        }
+    }
+
+    // load the new model if necessary
+    setShouldBeLoaded(true);
+
+    if (modelInfo.filename().isEmpty()) {
+        std::cerr << "ERROR: couldn't load default model " << request.model.toStdString() << std::endl;
+        return makeError(QHttpServerResponder::StatusCode::InternalServerError);
+    }
+
+    // NB: this resets the context, regardless of whether this model is already loaded
+    if (!loadModel(modelInfo)) {
+        std::cerr << "ERROR: couldn't load model " << modelInfo.name().toStdString() << std::endl;
+        return makeError(QHttpServerResponder::StatusCode::InternalServerError);
+    }
+
+    const QString promptTemplate = modelInfo.promptTemplate();
+    const int     top_k          = modelInfo.topK();
+    const int     n_batch        = modelInfo.promptBatchSize();
+    const auto    repeat_penalty = float(modelInfo.repeatPenalty());
+    const int     repeat_last_n  = modelInfo.repeatPenaltyTokens();
+
+    int promptTokens = 0;
+    int responseTokens = 0;
+    QList<QPair<QString, QList<ResultInfo>>> responses;
+    Q_ASSERT(!request.messages.isEmpty());
+    Q_ASSERT(request.messages.size() % 2 == 1);
+    for (int i = 0; i < request.messages.size() - 2; i += 2) {
+        using enum ChatRequest::Message::Role;
+        auto &user      = request.messages[i];
+        auto &assistant = request.messages[i + 1];
+        Q_ASSERT(user.role      == User);
+        Q_ASSERT(assistant.role == Assistant);
+
+        // adds prompt/response items to GUI
+        emit requestServerNewPromptResponsePair(user.content); // blocks
+        resetResponse();
+
+        if (!promptInternal(
+            {},
+            user.content,
+            promptTemplate,
+            request.max_tokens,
+            top_k,
+            request.top_p,
+            request.min_p,
+            request.temperature,
+            n_batch,
+            repeat_penalty,
+            repeat_last_n,
+            assistant.content)
+        ) {
+            std::cerr << "ERROR: couldn't prompt model " << modelInfo.name().toStdString() << std::endl;
+            return makeError(QHttpServerResponder::StatusCode::InternalServerError);
+        }
+        promptTokens += m_promptResponseTokens; // previous responses are part of current prompt
+    }
+
+    QString lastMessage = request.messages.last().content;
+    // adds prompt/response items to GUI
+    emit requestServerNewPromptResponsePair(lastMessage); // blocks
+    resetResponse();
+
+    for (int i = 0; i < request.n; ++i) {
+        if (!promptInternal(
+            m_collections,
+            lastMessage,
+            promptTemplate,
+            request.max_tokens,
+            top_k,
+            request.top_p,
+            request.min_p,
+            request.temperature,
+            n_batch,
+            repeat_penalty,
+            repeat_last_n)
+        ) {
+            std::cerr << "ERROR: couldn't prompt model " << modelInfo.name().toStdString() << std::endl;
+            return makeError(QHttpServerResponder::StatusCode::InternalServerError);
+        }
+        responses.append({response(), m_databaseResults});
+        // FIXME(jared): these are UI counts and do not include framing tokens, which they should
+        if (i == 0)
+            promptTokens += m_promptTokens;
+        responseTokens += m_promptResponseTokens - m_promptTokens;
+        if (i != request.n - 1)
+            resetResponse();
+    }
+
+    QJsonObject responseObject {
+        { "id",      "placeholder"                      },
+        { "object",  "chat.completion"                  },
+        { "created", QDateTime::currentSecsSinceEpoch() },
+        { "model",   modelInfo.name()                   },
+    };
+
+    QJsonArray choices;
+    {
+        int index = 0;
+        for (const auto &r : responses) {
+            QString result = r.first;
+            QList<ResultInfo> infos = r.second;
+            QJsonObject message {
+                { "role",    "assistant" },
+                { "content", result      },
+            };
+            QJsonObject choice {
+                { "index",         index++                                                  },
+                { "message",       message                                                  },
+                { "finish_reason", responseTokens == request.max_tokens ? "length" : "stop" },
+                { "logprobs",      QJsonValue::Null                                         },
+            };
+            if (MySettings::globalInstance()->localDocsShowReferences()) {
+                QJsonArray references;
+                for (const auto &ref : infos)
+                    references.append(resultToJson(ref));
+                choice.insert("references", references.isEmpty() ? QJsonValue::Null : QJsonValue(references));
+            }
+            choices.append(choice);
+        }
+    }
+
+    responseObject.insert("choices", choices);
+    responseObject.insert("usage", QJsonObject {
+        { "prompt_tokens",     promptTokens                  },
+        { "completion_tokens", responseTokens                },
+        { "total_tokens",      promptTokens + responseTokens },
+    });
+
+    return {QHttpServerResponse(responseObject), responseObject};
+}
--- a/gpt4all-chat/src/server.h
+++ b/gpt4all-chat/src/server.h
@ -4,22 +4,29 @@
 #include "chatllm.h"
 #include "database.h"

-#include <QHttpServerRequest>
+#include <QHttpServer>
 #include <QHttpServerResponse>
-#include <QObject>
+#include <QJsonObject>
 #include <QList>
+#include <QObject>
 #include <QString>

+#include <memory>
+#include <optional>
+#include <utility>
+
 class Chat;
-class QHttpServer;
+class ChatRequest;
+class CompletionRequest;
+

 class Server : public ChatLLM
 {
    Q_OBJECT

 public:
-    Server(Chat *parent);
-    virtual ~Server();
+    explicit Server(Chat *chat);
+    ~Server() override = default;

 public Q_SLOTS:
    void start();
@ -27,14 +34,17 @@ public Q_SLOTS:
 Q_SIGNALS:
    void requestServerNewPromptResponsePair(const QString &prompt);

+private:
+    auto handleCompletionRequest(const CompletionRequest &request) -> std::pair<QHttpServerResponse, std::optional<QJsonObject>>;
+    auto handleChatRequest(const ChatRequest &request) -> std::pair<QHttpServerResponse, std::optional<QJsonObject>>;
+
 private Q_SLOTS:
-    QHttpServerResponse handleCompletionRequest(const QHttpServerRequest &request, bool isChat);
    void handleDatabaseResultsChanged(const QList<ResultInfo> &results) { m_databaseResults = results; }
    void handleCollectionListChanged(const QList<QString> &collectionList) { m_collections = collectionList; }

 private:
    Chat *m_chat;
-    QHttpServer *m_server;
+    std::unique_ptr<QHttpServer> m_server;
    QList<ResultInfo> m_databaseResults;
    QList<QString> m_collections;
 };
--- a/gpt4all-chat/system_requirements.md
+++ b/gpt4all-chat/system_requirements.md
@ -0,0 +1,19 @@
+Below are the recommended and minimum system requirements for GPT4All.
+
+### **Recommended System Requirements**
+| **Component** | **PC (Windows/Linux)**                                | **Apple**                  |
+|---------------|-------------------------------------------------------|----------------------------|
+| **CPU**       | Ryzen 5 3600 or Intel Core i7-10700, or better        | M2 Pro                     |
+| **RAM**       | 16GB                                                  | 16GB                       |
+| **GPU**       | NVIDIA GTX 1080 Ti/RTX 2080 or better, with 8GB+ VRAM | M2 Pro (integrated GPU)    |
+| **OS**        | At least Windows 10 or Ubuntu 24.04 LTS               | macOS Sonoma 14.5 or newer |
+
+### **Minimum System Requirements**
+| **Component** | **PC (Windows/Linux)**                                          | **Apple**           |
+|---------------|-----------------------------------------------------------------|---------------------|
+| **CPU**       | Intel Core: i3-2100, Pentium: 7505, Celeron: 6305; AMD: FX-4100 | M1                  |
+| **RAM**       | 16GB (8GB for 3B LLMs)                                          | 16GB                |
+| **GPU**       | Anything Direct3D 11/12 or OpenGL 2.1 capable                   | M1 (integrated GPU) |
+| **OS**        | Windows 10, Ubuntu 22.04 LTS, or other compatible Linux         | macOS Monterey 12.6 |
+
+Note that Windows and Linux PCs with ARM CPUs are not currently supported.
--- a/gpt4all-chat/translations/gpt4all_en_US.ts
+++ b/gpt4all-chat/translations/gpt4all_en_US.ts
--- a/gpt4all-chat/translations/gpt4all_es_MX.ts
+++ b/gpt4all-chat/translations/gpt4all_es_MX.ts
--- a/gpt4all-chat/translations/gpt4all_it_IT.ts
+++ b/gpt4all-chat/translations/gpt4all_it_IT.ts
--- a/gpt4all-chat/translations/gpt4all_pt_BR.ts
+++ b/gpt4all-chat/translations/gpt4all_pt_BR.ts
--- a/gpt4all-chat/translations/gpt4all_ro_RO.ts
+++ b/gpt4all-chat/translations/gpt4all_ro_RO.ts
--- a/gpt4all-chat/translations/gpt4all_zh_CN.ts
+++ b/gpt4all-chat/translations/gpt4all_zh_CN.ts
--- a/gpt4all-chat/translations/gpt4all_zh_TW.ts
+++ b/gpt4all-chat/translations/gpt4all_zh_TW.ts
				`@ -0,0 +1 @@`
				`Subproject commit ced74fbad4b258507f3ec06e77eec9445583511a`
				`@ -1 +0,0 @@`
				`Subproject commit 443665aec4721ecf57df8162e7e093a0cd674a76`
				`@ -0,0 +1 @@`
				`Subproject commit 21bdef01eddcbd78044eea1d50b9dee08d218ff2`
				`@ -0,0 +1 @@`
				`Subproject commit 0c9fce2ffefecfdce794e1859584e25877b7b592`