From 5be5314ace988bd4821bdb0d0fc3d3478140d0b8 Mon Sep 17 00:00:00 2001
From: Jared Van Bortel <jared@nomic.ai>
Date: Wed, 7 Aug 2024 17:44:34 -0400
Subject: [PATCH] rename LLModel -> ModelBackend, EmbLLModel ->
 EmbCapableBackend

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
---
 gpt4all-backend/CMakeLists.txt                |  2 +-
 gpt4all-backend/llamacpp_backend.h            |  4 ++--
 gpt4all-backend/llamacpp_backend_impl.cpp     | 22 +++++++++----------
 gpt4all-backend/llmodel_c.cpp                 |  4 ++--
 .../{llmodel.h => model_backend.h}            | 12 +++++-----
 gpt4all-bindings/python/setup.py              |  2 +-
 gpt4all-bindings/typescript/index.h           |  2 +-
 gpt4all-bindings/typescript/prompt.h          |  2 +-
 gpt4all-chat/chatapi.cpp                      |  4 ++--
 gpt4all-chat/chatapi.h                        | 10 ++++-----
 gpt4all-chat/chatllm.cpp                      | 12 +++++-----
 gpt4all-chat/chatllm.h                        | 10 ++++-----
 gpt4all-chat/embllm.cpp                       |  4 ++--
 13 files changed, 45 insertions(+), 45 deletions(-)
 rename gpt4all-backend/{llmodel.h => model_backend.h} (91%)
diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt
index 14fdbf44..f10d5d94 100644
--- a/gpt4all-backend/CMakeLists.txt
+++ b/gpt4all-backend/CMakeLists.txt
@@ -138,7 +138,7 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
 endforeach()
 
 add_library(llmodel
-    llmodel.h
+    model_backend.h
     llamacpp_backend.h llamacpp_backend.cpp
     llamacpp_backend_manager.h llamacpp_backend_manager.cpp
     llmodel_c.h llmodel_c.cpp
diff --git a/gpt4all-backend/llamacpp_backend.h b/gpt4all-backend/llamacpp_backend.h
index 86bac0ec..b319c473 100644
--- a/gpt4all-backend/llamacpp_backend.h
+++ b/gpt4all-backend/llamacpp_backend.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "llmodel.h"
+#include "model_backend.h"
 
 #include <algorithm>
 #include <cassert>
@@ -17,7 +17,7 @@ using namespace std::string_literals;
 class LlamaCppBackendManager;
 
 
-class LlamaCppBackend : public EmbLLModel {
+class LlamaCppBackend : public EmbCapableBackend {
 public:
     struct GPUDevice {
         const char *backend;
diff --git a/gpt4all-backend/llamacpp_backend_impl.cpp b/gpt4all-backend/llamacpp_backend_impl.cpp
index 0ace53bb..cd92b15e 100644
--- a/gpt4all-backend/llamacpp_backend_impl.cpp
+++ b/gpt4all-backend/llamacpp_backend_impl.cpp
@@ -1,7 +1,7 @@
 #define LLAMACPP_BACKEND_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
 #include "llamacpp_backend_impl.h"
 
-#include "llmodel.h"
+#include "model_backend.h"
 
 #include <ggml.h>
 #include <llama.h>
@@ -242,7 +242,7 @@ struct LlamaPrivate {
     llama_model_params model_params;
     llama_context_params ctx_params;
     int64_t n_threads = 0;
-    std::vector<LLModel::Token> end_tokens;
+    std::vector<ModelBackend::Token> end_tokens;
     const char *backend_name = nullptr;
 };
 
@@ -528,11 +528,11 @@ size_t LlamaCppBackendImpl::restoreState(const uint8_t *src)
     return llama_set_state_data(d_ptr->ctx, const_cast<uint8_t*>(src));
 }
 
-std::vector<LLModel::Token> LlamaCppBackendImpl::tokenize(PromptContext &ctx, const std::string &str, bool special)
+std::vector<ModelBackend::Token> LlamaCppBackendImpl::tokenize(PromptContext &ctx, const std::string &str, bool special)
 {
     bool atStart = m_tokenize_last_token == -1;
     bool insertSpace = atStart || isSpecialToken(m_tokenize_last_token);
-    std::vector<LLModel::Token> fres(str.length() + 4);
+    std::vector<ModelBackend::Token> fres(str.length() + 4);
     int32_t fres_len = llama_tokenize_gpt4all(
         d_ptr->model, str.c_str(), str.length(), fres.data(), fres.size(), /*add_special*/ atStart,
         /*parse_special*/ special, /*insert_space*/ insertSpace
@@ -565,7 +565,7 @@ std::string LlamaCppBackendImpl::tokenToString(Token id) const
     return std::string(result.data(), result.size());
 }
 
-LLModel::Token LlamaCppBackendImpl::sampleToken(PromptContext &promptCtx) const
+ModelBackend::Token LlamaCppBackendImpl::sampleToken(PromptContext &promptCtx) const
 {
     const size_t n_prev_toks = std::min((size_t) promptCtx.repeat_last_n, promptCtx.tokens.size());
     return llama_sample_top_p_top_k(d_ptr->ctx,
@@ -627,7 +627,7 @@ int32_t LlamaCppBackendImpl::contextLength() const
     return llama_n_ctx(d_ptr->ctx);
 }
 
-const std::vector<LLModel::Token> &LlamaCppBackendImpl::endTokens() const
+const std::vector<ModelBackend::Token> &LlamaCppBackendImpl::endTokens() const
 {
     return d_ptr->end_tokens;
 }
@@ -825,7 +825,7 @@ void llama_batch_add(
     batch.n_tokens++;
 }
 
-static void batch_add_seq(llama_batch &batch, const std::vector<LLModel::Token> &tokens, int seq_id)
+static void batch_add_seq(llama_batch &batch, const std::vector<ModelBackend::Token> &tokens, int seq_id)
 {
     for (unsigned i = 0; i < tokens.size(); i++) {
         llama_batch_add(batch, tokens[i], i, { seq_id }, i == tokens.size() - 1);
@@ -909,7 +909,7 @@ void LlamaCppBackendImpl::embed(
 
 void LlamaCppBackendImpl::embed(
     const std::vector<std::string> &texts, float *embeddings, std::optional<std::string> prefix, int dimensionality,
-    size_t *tokenCount, bool doMean, bool atlas, EmbLLModel::EmbedCancelCallback *cancelCb
+    size_t *tokenCount, bool doMean, bool atlas, EmbedCancelCallback *cancelCb
 ) {
     if (!d_ptr->model)
         throw std::logic_error("no model is loaded");
@@ -967,9 +967,9 @@ double getL2NormScale(T *start, T *end)
 
 void LlamaCppBackendImpl::embedInternal(
     const std::vector<std::string> &texts, float *embeddings, std::string prefix, int dimensionality,
-    size_t *tokenCount, bool doMean, bool atlas, EmbLLModel::EmbedCancelCallback *cancelCb, const EmbModelSpec *spec
+    size_t *tokenCount, bool doMean, bool atlas, EmbedCancelCallback *cancelCb, const EmbModelSpec *spec
 ) {
-    typedef std::vector<LLModel::Token> TokenString;
+    typedef std::vector<ModelBackend::Token> TokenString;
     static constexpr int32_t atlasMaxLength = 8192;
     static constexpr int chunkOverlap = 8; // Atlas overlaps chunks of input by 8 tokens
 
@@ -1217,7 +1217,7 @@ DLL_EXPORT bool is_arch_supported(const char *arch)
     return std::find(KNOWN_ARCHES.begin(), KNOWN_ARCHES.end(), std::string(arch)) < KNOWN_ARCHES.end();
 }
 
-DLL_EXPORT LLModel *construct()
+DLL_EXPORT LlamaCppBackend *construct()
 {
     llama_log_set(llama_log_callback, nullptr);
 #ifdef GGML_USE_CUDA
diff --git a/gpt4all-backend/llmodel_c.cpp b/gpt4all-backend/llmodel_c.cpp
index edeac477..18b59899 100644
--- a/gpt4all-backend/llmodel_c.cpp
+++ b/gpt4all-backend/llmodel_c.cpp
@@ -2,7 +2,7 @@
 
 #include "llamacpp_backend.h"
 #include "llamacpp_backend_manager.h"
-#include "llmodel.h"
+#include "model_backend.h"
 
 #include <algorithm>
 #include <cstdio>
@@ -18,7 +18,7 @@
 
 struct LLModelWrapper {
     LlamaCppBackend *llModel = nullptr;
-    LLModel::PromptContext promptContext;
+    ModelBackend::PromptContext promptContext;
     ~LLModelWrapper() { delete llModel; }
 };
 
diff --git a/gpt4all-backend/llmodel.h b/gpt4all-backend/model_backend.h
similarity index 91%
rename from gpt4all-backend/llmodel.h
rename to gpt4all-backend/model_backend.h
index 4067b353..467c4e83 100644
--- a/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/model_backend.h
@@ -10,7 +10,7 @@
 
 #define LLMODEL_MAX_PROMPT_BATCH 128
 
-class LLModel {
+class ModelBackend {
 public:
     using Token = int32_t;
 
@@ -29,7 +29,7 @@ public:
         float   contextErase = 0.5f;    // percent of context to erase if we exceed the context window
     };
 
-    virtual ~LLModel() {}
+    virtual ~ModelBackend() {}
 
     virtual bool supportsCompletion() const { return true; }
     virtual bool loadModel(const std::string &modelPath, int n_ctx, int ngl) = 0;
@@ -50,13 +50,13 @@ public:
                         std::string *fakeReply = nullptr) = 0;
 
 protected:
-    explicit LLModel() {}
+    explicit ModelBackend() {}
 };
 
-class EmbLLModel: virtual public LLModel {
-public:
-    using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend);
+using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend);
 
+class EmbCapableBackend : virtual public ModelBackend {
+public:
     virtual bool supportsCompletion() const = 0;
     virtual bool supportsEmbedding() const = 0;
     virtual size_t embeddingSize() const = 0;
diff --git a/gpt4all-bindings/python/setup.py b/gpt4all-bindings/python/setup.py
index e92fba61..ed6f4071 100644
--- a/gpt4all-bindings/python/setup.py
+++ b/gpt4all-bindings/python/setup.py
@@ -55,7 +55,7 @@ def copy_prebuilt_C_lib(src_dir, dest_dir, dest_build_dir):
 
 
 # NOTE: You must provide correct path to the prebuilt llmodel C library. 
-# Specifically, the llmodel.h and C shared library are needed.
+# Specifically, the model_backend.h and C shared library are needed.
 copy_prebuilt_C_lib(SRC_CLIB_DIRECTORY,
                     DEST_CLIB_DIRECTORY,
                     DEST_CLIB_BUILD_DIRECTORY)
diff --git a/gpt4all-bindings/typescript/index.h b/gpt4all-bindings/typescript/index.h
index db3ef11e..7726e8cf 100644
--- a/gpt4all-bindings/typescript/index.h
+++ b/gpt4all-bindings/typescript/index.h
@@ -1,4 +1,4 @@
-#include "llmodel.h"
+#include "model_backend.h"
 #include "llmodel_c.h"
 #include "prompt.h"
 #include <atomic>
diff --git a/gpt4all-bindings/typescript/prompt.h b/gpt4all-bindings/typescript/prompt.h
index 49c43620..e1d0a550 100644
--- a/gpt4all-bindings/typescript/prompt.h
+++ b/gpt4all-bindings/typescript/prompt.h
@@ -1,7 +1,7 @@
 #ifndef PREDICT_WORKER_H
 #define PREDICT_WORKER_H
 
-#include "llmodel.h"
+#include "model_backend.h"
 #include "llmodel_c.h"
 #include "napi.h"
 #include <atomic>
diff --git a/gpt4all-chat/chatapi.cpp b/gpt4all-chat/chatapi.cpp
index ada33325..41fb7f5b 100644
--- a/gpt4all-chat/chatapi.cpp
+++ b/gpt4all-chat/chatapi.cpp
@@ -1,6 +1,6 @@
 #include "chatapi.h"
 
-#include "../gpt4all-backend/llmodel.h"
+#include "../gpt4all-backend/model_backend.h"
 
 #include <QCoreApplication>
 #include <QGuiApplication>
@@ -170,7 +170,7 @@ bool ChatAPI::callResponse(int32_t token, const std::string& string)
 }
 
 void ChatAPIWorker::request(const QString &apiKey,
-                            LLModel::PromptContext *promptCtx,
+                            ModelBackend::PromptContext *promptCtx,
                             const QByteArray &array)
 {
     m_ctx = promptCtx;
diff --git a/gpt4all-chat/chatapi.h b/gpt4all-chat/chatapi.h
index 0decb642..45d50fe1 100644
--- a/gpt4all-chat/chatapi.h
+++ b/gpt4all-chat/chatapi.h
@@ -1,7 +1,7 @@
 #ifndef CHATAPI_H
 #define CHATAPI_H
 
-#include "../gpt4all-backend/llmodel.h"
+#include "../gpt4all-backend/model_backend.h"
 
 #include <QByteArray>
 #include <QNetworkReply>
@@ -33,7 +33,7 @@ public:
     QString currentResponse() const { return m_currentResponse; }
 
     void request(const QString &apiKey,
-                 LLModel::PromptContext *promptCtx,
+                 ModelBackend::PromptContext *promptCtx,
                  const QByteArray &array);
 
 Q_SIGNALS:
@@ -46,12 +46,12 @@ private Q_SLOTS:
 
 private:
     ChatAPI *m_chat;
-    LLModel::PromptContext *m_ctx;
+    ModelBackend::PromptContext *m_ctx;
     QNetworkAccessManager *m_networkManager;
     QString m_currentResponse;
 };
 
-class ChatAPI : public QObject, public LLModel {
+class ChatAPI : public QObject, public ModelBackend {
     Q_OBJECT
 public:
     ChatAPI();
@@ -83,7 +83,7 @@ public:
 
 Q_SIGNALS:
     void request(const QString &apiKey,
-                 LLModel::PromptContext *ctx,
+                 ModelBackend::PromptContext *ctx,
                  const QByteArray &array);
 
 private:
diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp
index f4599684..104d020e 100644
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@@ -94,7 +94,7 @@ void LLModelStore::destroy()
     m_availableModel.reset();
 }
 
-void LLModelInfo::resetModel(ChatLLM *cllm, LLModel *model) {
+void LLModelInfo::resetModel(ChatLLM *cllm, ModelBackend *model) {
     this->model.reset(model);
     fallbackReason.reset();
     emit cllm->loadedModelInfoChanged();
@@ -647,7 +647,7 @@ void ChatLLM::resetContext()
 {
     resetResponse();
     m_processedSystemPrompt = false;
-    m_ctx = LLModel::PromptContext();
+    m_ctx = ModelBackend::PromptContext();
 }
 
 QString ChatLLM::response() const
@@ -902,7 +902,7 @@ void ChatLLM::generateName()
     auto promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo);
     auto promptFunc = std::bind(&ChatLLM::handleNamePrompt, this, std::placeholders::_1);
     auto responseFunc = std::bind(&ChatLLM::handleNameResponse, this, std::placeholders::_1, std::placeholders::_2);
-    LLModel::PromptContext ctx = m_ctx;
+    ModelBackend::PromptContext ctx = m_ctx;
     m_llModelInfo.model->prompt(chatNamePrompt.toStdString(), promptTemplate.toStdString(),
                                 promptFunc, responseFunc, /*allowContextShift*/ false, ctx);
     std::string trimmed = trim_whitespace(m_nameResponse);
@@ -998,7 +998,7 @@ void ChatLLM::generateQuestions(qint64 elapsed)
     auto promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo);
     auto promptFunc = std::bind(&ChatLLM::handleQuestionPrompt, this, std::placeholders::_1);
     auto responseFunc = std::bind(&ChatLLM::handleQuestionResponse, this, std::placeholders::_1, std::placeholders::_2);
-    LLModel::PromptContext ctx = m_ctx;
+    ModelBackend::PromptContext ctx = m_ctx;
     QElapsedTimer totalTime;
     totalTime.start();
     m_llModelInfo.model->prompt(suggestedFollowUpPrompt, promptTemplate.toStdString(), promptFunc, responseFunc,
@@ -1225,7 +1225,7 @@ void ChatLLM::processSystemPrompt()
 
     // Start with a whole new context
     m_stopGenerating = false;
-    m_ctx = LLModel::PromptContext();
+    m_ctx = ModelBackend::PromptContext();
 
     auto promptFunc = std::bind(&ChatLLM::handleSystemPrompt, this, std::placeholders::_1);
 
@@ -1278,7 +1278,7 @@ void ChatLLM::processRestoreStateFromText()
     emit restoringFromTextChanged();
 
     m_stopGenerating = false;
-    m_ctx = LLModel::PromptContext();
+    m_ctx = ModelBackend::PromptContext();
 
     auto promptFunc = std::bind(&ChatLLM::handleRestoreStateFromTextPrompt, this, std::placeholders::_1);
 
diff --git a/gpt4all-chat/chatllm.h b/gpt4all-chat/chatllm.h
index 18ccb897..c721095e 100644
--- a/gpt4all-chat/chatllm.h
+++ b/gpt4all-chat/chatllm.h
@@ -4,7 +4,7 @@
 #include "modellist.h"
 
 #include "../gpt4all-backend/llamacpp_backend.h"
-#include "../gpt4all-backend/llmodel.h"
+#include "../gpt4all-backend/model_backend.h"
 
 #include <QByteArray>
 #include <QElapsedTimer>
@@ -39,14 +39,14 @@ enum LLModelType {
 };
 
 struct LLModelInfo {
-    std::unique_ptr<LLModel> model;
+    std::unique_ptr<ModelBackend> model;
     QFileInfo fileInfo;
     std::optional<QString> fallbackReason;
 
     // NOTE: This does not store the model type or name on purpose as this is left for ChatLLM which
     // must be able to serialize the information even if it is in the unloaded state
 
-    void resetModel(ChatLLM *cllm, LLModel *model = nullptr);
+    void resetModel(ChatLLM *cllm, ModelBackend *model = nullptr);
 };
 
 class TokenTimer : public QObject {
@@ -218,7 +218,7 @@ private:
     bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps);
 
 protected:
-    LLModel::PromptContext m_ctx;
+    ModelBackend::PromptContext m_ctx;
     quint32 m_promptTokens;
     quint32 m_promptResponseTokens;
 
@@ -243,7 +243,7 @@ private:
     bool m_processedSystemPrompt;
     bool m_restoreStateFromText;
     // m_pristineLoadedState is set if saveSate is unnecessary, either because:
-    // - an unload was queued during LLModel::restoreState()
+    // - an unload was queued during ModelBackend::restoreState()
     // - the chat will be restored from text and hasn't been interacted with yet
     bool m_pristineLoadedState = false;
     QVector<QPair<QString, QString>> m_stateFromText;
diff --git a/gpt4all-chat/embllm.cpp b/gpt4all-chat/embllm.cpp
index 1b3f5e1c..57f5f3a8 100644
--- a/gpt4all-chat/embllm.cpp
+++ b/gpt4all-chat/embllm.cpp
@@ -193,7 +193,7 @@ std::vector<float> EmbeddingLLMWorker::generateQueryEmbedding(const QString &tex
             try {
                 m_model->embed({text.toStdString()}, embedding.data(), /*isRetrieval*/ true);
             } catch (const std::exception &e) {
-                qWarning() << "WARNING: LLModel::embed failed:" << e.what();
+                qWarning() << "WARNING: LlamaCppBackend::embed failed:" << e.what();
                 return {};
             }
 
@@ -287,7 +287,7 @@ void EmbeddingLLMWorker::docEmbeddingsRequested(const QVector<EmbeddingChunk> &c
             try {
                 m_model->embed(batchTexts, result.data() + j * m_model->embeddingSize(), /*isRetrieval*/ false);
             } catch (const std::exception &e) {
-                qWarning() << "WARNING: LLModel::embed failed:" << e.what();
+                qWarning() << "WARNING: LlamaCppBackend::embed failed:" << e.what();
                 return;
             }
         }