From 5be5314ace988bd4821bdb0d0fc3d3478140d0b8 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 7 Aug 2024 17:44:34 -0400 Subject: [PATCH] rename LLModel -> ModelBackend, EmbLLModel -> EmbCapableBackend Signed-off-by: Jared Van Bortel --- gpt4all-backend/CMakeLists.txt | 2 +- gpt4all-backend/llamacpp_backend.h | 4 ++-- gpt4all-backend/llamacpp_backend_impl.cpp | 22 +++++++++---------- gpt4all-backend/llmodel_c.cpp | 4 ++-- .../{llmodel.h => model_backend.h} | 12 +++++----- gpt4all-bindings/python/setup.py | 2 +- gpt4all-bindings/typescript/index.h | 2 +- gpt4all-bindings/typescript/prompt.h | 2 +- gpt4all-chat/chatapi.cpp | 4 ++-- gpt4all-chat/chatapi.h | 10 ++++----- gpt4all-chat/chatllm.cpp | 12 +++++----- gpt4all-chat/chatllm.h | 10 ++++----- gpt4all-chat/embllm.cpp | 4 ++-- 13 files changed, 45 insertions(+), 45 deletions(-) rename gpt4all-backend/{llmodel.h => model_backend.h} (91%) diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index 14fdbf44..f10d5d94 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -138,7 +138,7 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS) endforeach() add_library(llmodel - llmodel.h + model_backend.h llamacpp_backend.h llamacpp_backend.cpp llamacpp_backend_manager.h llamacpp_backend_manager.cpp llmodel_c.h llmodel_c.cpp diff --git a/gpt4all-backend/llamacpp_backend.h b/gpt4all-backend/llamacpp_backend.h index 86bac0ec..b319c473 100644 --- a/gpt4all-backend/llamacpp_backend.h +++ b/gpt4all-backend/llamacpp_backend.h @@ -1,6 +1,6 @@ #pragma once -#include "llmodel.h" +#include "model_backend.h" #include #include @@ -17,7 +17,7 @@ using namespace std::string_literals; class LlamaCppBackendManager; -class LlamaCppBackend : public EmbLLModel { +class LlamaCppBackend : public EmbCapableBackend { public: struct GPUDevice { const char *backend; diff --git a/gpt4all-backend/llamacpp_backend_impl.cpp b/gpt4all-backend/llamacpp_backend_impl.cpp index 0ace53bb..cd92b15e 100644 --- a/gpt4all-backend/llamacpp_backend_impl.cpp +++ b/gpt4all-backend/llamacpp_backend_impl.cpp @@ -1,7 +1,7 @@ #define LLAMACPP_BACKEND_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE #include "llamacpp_backend_impl.h" -#include "llmodel.h" +#include "model_backend.h" #include #include @@ -242,7 +242,7 @@ struct LlamaPrivate { llama_model_params model_params; llama_context_params ctx_params; int64_t n_threads = 0; - std::vector end_tokens; + std::vector end_tokens; const char *backend_name = nullptr; }; @@ -528,11 +528,11 @@ size_t LlamaCppBackendImpl::restoreState(const uint8_t *src) return llama_set_state_data(d_ptr->ctx, const_cast(src)); } -std::vector LlamaCppBackendImpl::tokenize(PromptContext &ctx, const std::string &str, bool special) +std::vector LlamaCppBackendImpl::tokenize(PromptContext &ctx, const std::string &str, bool special) { bool atStart = m_tokenize_last_token == -1; bool insertSpace = atStart || isSpecialToken(m_tokenize_last_token); - std::vector fres(str.length() + 4); + std::vector fres(str.length() + 4); int32_t fres_len = llama_tokenize_gpt4all( d_ptr->model, str.c_str(), str.length(), fres.data(), fres.size(), /*add_special*/ atStart, /*parse_special*/ special, /*insert_space*/ insertSpace @@ -565,7 +565,7 @@ std::string LlamaCppBackendImpl::tokenToString(Token id) const return std::string(result.data(), result.size()); } -LLModel::Token LlamaCppBackendImpl::sampleToken(PromptContext &promptCtx) const +ModelBackend::Token LlamaCppBackendImpl::sampleToken(PromptContext &promptCtx) const { const size_t n_prev_toks = std::min((size_t) promptCtx.repeat_last_n, promptCtx.tokens.size()); return llama_sample_top_p_top_k(d_ptr->ctx, @@ -627,7 +627,7 @@ int32_t LlamaCppBackendImpl::contextLength() const return llama_n_ctx(d_ptr->ctx); } -const std::vector &LlamaCppBackendImpl::endTokens() const +const std::vector &LlamaCppBackendImpl::endTokens() const { return d_ptr->end_tokens; } @@ -825,7 +825,7 @@ void llama_batch_add( batch.n_tokens++; } -static void batch_add_seq(llama_batch &batch, const std::vector &tokens, int seq_id) +static void batch_add_seq(llama_batch &batch, const std::vector &tokens, int seq_id) { for (unsigned i = 0; i < tokens.size(); i++) { llama_batch_add(batch, tokens[i], i, { seq_id }, i == tokens.size() - 1); @@ -909,7 +909,7 @@ void LlamaCppBackendImpl::embed( void LlamaCppBackendImpl::embed( const std::vector &texts, float *embeddings, std::optional prefix, int dimensionality, - size_t *tokenCount, bool doMean, bool atlas, EmbLLModel::EmbedCancelCallback *cancelCb + size_t *tokenCount, bool doMean, bool atlas, EmbedCancelCallback *cancelCb ) { if (!d_ptr->model) throw std::logic_error("no model is loaded"); @@ -967,9 +967,9 @@ double getL2NormScale(T *start, T *end) void LlamaCppBackendImpl::embedInternal( const std::vector &texts, float *embeddings, std::string prefix, int dimensionality, - size_t *tokenCount, bool doMean, bool atlas, EmbLLModel::EmbedCancelCallback *cancelCb, const EmbModelSpec *spec + size_t *tokenCount, bool doMean, bool atlas, EmbedCancelCallback *cancelCb, const EmbModelSpec *spec ) { - typedef std::vector TokenString; + typedef std::vector TokenString; static constexpr int32_t atlasMaxLength = 8192; static constexpr int chunkOverlap = 8; // Atlas overlaps chunks of input by 8 tokens @@ -1217,7 +1217,7 @@ DLL_EXPORT bool is_arch_supported(const char *arch) return std::find(KNOWN_ARCHES.begin(), KNOWN_ARCHES.end(), std::string(arch)) < KNOWN_ARCHES.end(); } -DLL_EXPORT LLModel *construct() +DLL_EXPORT LlamaCppBackend *construct() { llama_log_set(llama_log_callback, nullptr); #ifdef GGML_USE_CUDA diff --git a/gpt4all-backend/llmodel_c.cpp b/gpt4all-backend/llmodel_c.cpp index edeac477..18b59899 100644 --- a/gpt4all-backend/llmodel_c.cpp +++ b/gpt4all-backend/llmodel_c.cpp @@ -2,7 +2,7 @@ #include "llamacpp_backend.h" #include "llamacpp_backend_manager.h" -#include "llmodel.h" +#include "model_backend.h" #include #include @@ -18,7 +18,7 @@ struct LLModelWrapper { LlamaCppBackend *llModel = nullptr; - LLModel::PromptContext promptContext; + ModelBackend::PromptContext promptContext; ~LLModelWrapper() { delete llModel; } }; diff --git a/gpt4all-backend/llmodel.h b/gpt4all-backend/model_backend.h similarity index 91% rename from gpt4all-backend/llmodel.h rename to gpt4all-backend/model_backend.h index 4067b353..467c4e83 100644 --- a/gpt4all-backend/llmodel.h +++ b/gpt4all-backend/model_backend.h @@ -10,7 +10,7 @@ #define LLMODEL_MAX_PROMPT_BATCH 128 -class LLModel { +class ModelBackend { public: using Token = int32_t; @@ -29,7 +29,7 @@ public: float contextErase = 0.5f; // percent of context to erase if we exceed the context window }; - virtual ~LLModel() {} + virtual ~ModelBackend() {} virtual bool supportsCompletion() const { return true; } virtual bool loadModel(const std::string &modelPath, int n_ctx, int ngl) = 0; @@ -50,13 +50,13 @@ public: std::string *fakeReply = nullptr) = 0; protected: - explicit LLModel() {} + explicit ModelBackend() {} }; -class EmbLLModel: virtual public LLModel { -public: - using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend); +using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend); +class EmbCapableBackend : virtual public ModelBackend { +public: virtual bool supportsCompletion() const = 0; virtual bool supportsEmbedding() const = 0; virtual size_t embeddingSize() const = 0; diff --git a/gpt4all-bindings/python/setup.py b/gpt4all-bindings/python/setup.py index e92fba61..ed6f4071 100644 --- a/gpt4all-bindings/python/setup.py +++ b/gpt4all-bindings/python/setup.py @@ -55,7 +55,7 @@ def copy_prebuilt_C_lib(src_dir, dest_dir, dest_build_dir): # NOTE: You must provide correct path to the prebuilt llmodel C library. -# Specifically, the llmodel.h and C shared library are needed. +# Specifically, the model_backend.h and C shared library are needed. copy_prebuilt_C_lib(SRC_CLIB_DIRECTORY, DEST_CLIB_DIRECTORY, DEST_CLIB_BUILD_DIRECTORY) diff --git a/gpt4all-bindings/typescript/index.h b/gpt4all-bindings/typescript/index.h index db3ef11e..7726e8cf 100644 --- a/gpt4all-bindings/typescript/index.h +++ b/gpt4all-bindings/typescript/index.h @@ -1,4 +1,4 @@ -#include "llmodel.h" +#include "model_backend.h" #include "llmodel_c.h" #include "prompt.h" #include diff --git a/gpt4all-bindings/typescript/prompt.h b/gpt4all-bindings/typescript/prompt.h index 49c43620..e1d0a550 100644 --- a/gpt4all-bindings/typescript/prompt.h +++ b/gpt4all-bindings/typescript/prompt.h @@ -1,7 +1,7 @@ #ifndef PREDICT_WORKER_H #define PREDICT_WORKER_H -#include "llmodel.h" +#include "model_backend.h" #include "llmodel_c.h" #include "napi.h" #include diff --git a/gpt4all-chat/chatapi.cpp b/gpt4all-chat/chatapi.cpp index ada33325..41fb7f5b 100644 --- a/gpt4all-chat/chatapi.cpp +++ b/gpt4all-chat/chatapi.cpp @@ -1,6 +1,6 @@ #include "chatapi.h" -#include "../gpt4all-backend/llmodel.h" +#include "../gpt4all-backend/model_backend.h" #include #include @@ -170,7 +170,7 @@ bool ChatAPI::callResponse(int32_t token, const std::string& string) } void ChatAPIWorker::request(const QString &apiKey, - LLModel::PromptContext *promptCtx, + ModelBackend::PromptContext *promptCtx, const QByteArray &array) { m_ctx = promptCtx; diff --git a/gpt4all-chat/chatapi.h b/gpt4all-chat/chatapi.h index 0decb642..45d50fe1 100644 --- a/gpt4all-chat/chatapi.h +++ b/gpt4all-chat/chatapi.h @@ -1,7 +1,7 @@ #ifndef CHATAPI_H #define CHATAPI_H -#include "../gpt4all-backend/llmodel.h" +#include "../gpt4all-backend/model_backend.h" #include #include @@ -33,7 +33,7 @@ public: QString currentResponse() const { return m_currentResponse; } void request(const QString &apiKey, - LLModel::PromptContext *promptCtx, + ModelBackend::PromptContext *promptCtx, const QByteArray &array); Q_SIGNALS: @@ -46,12 +46,12 @@ private Q_SLOTS: private: ChatAPI *m_chat; - LLModel::PromptContext *m_ctx; + ModelBackend::PromptContext *m_ctx; QNetworkAccessManager *m_networkManager; QString m_currentResponse; }; -class ChatAPI : public QObject, public LLModel { +class ChatAPI : public QObject, public ModelBackend { Q_OBJECT public: ChatAPI(); @@ -83,7 +83,7 @@ public: Q_SIGNALS: void request(const QString &apiKey, - LLModel::PromptContext *ctx, + ModelBackend::PromptContext *ctx, const QByteArray &array); private: diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index f4599684..104d020e 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -94,7 +94,7 @@ void LLModelStore::destroy() m_availableModel.reset(); } -void LLModelInfo::resetModel(ChatLLM *cllm, LLModel *model) { +void LLModelInfo::resetModel(ChatLLM *cllm, ModelBackend *model) { this->model.reset(model); fallbackReason.reset(); emit cllm->loadedModelInfoChanged(); @@ -647,7 +647,7 @@ void ChatLLM::resetContext() { resetResponse(); m_processedSystemPrompt = false; - m_ctx = LLModel::PromptContext(); + m_ctx = ModelBackend::PromptContext(); } QString ChatLLM::response() const @@ -902,7 +902,7 @@ void ChatLLM::generateName() auto promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo); auto promptFunc = std::bind(&ChatLLM::handleNamePrompt, this, std::placeholders::_1); auto responseFunc = std::bind(&ChatLLM::handleNameResponse, this, std::placeholders::_1, std::placeholders::_2); - LLModel::PromptContext ctx = m_ctx; + ModelBackend::PromptContext ctx = m_ctx; m_llModelInfo.model->prompt(chatNamePrompt.toStdString(), promptTemplate.toStdString(), promptFunc, responseFunc, /*allowContextShift*/ false, ctx); std::string trimmed = trim_whitespace(m_nameResponse); @@ -998,7 +998,7 @@ void ChatLLM::generateQuestions(qint64 elapsed) auto promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo); auto promptFunc = std::bind(&ChatLLM::handleQuestionPrompt, this, std::placeholders::_1); auto responseFunc = std::bind(&ChatLLM::handleQuestionResponse, this, std::placeholders::_1, std::placeholders::_2); - LLModel::PromptContext ctx = m_ctx; + ModelBackend::PromptContext ctx = m_ctx; QElapsedTimer totalTime; totalTime.start(); m_llModelInfo.model->prompt(suggestedFollowUpPrompt, promptTemplate.toStdString(), promptFunc, responseFunc, @@ -1225,7 +1225,7 @@ void ChatLLM::processSystemPrompt() // Start with a whole new context m_stopGenerating = false; - m_ctx = LLModel::PromptContext(); + m_ctx = ModelBackend::PromptContext(); auto promptFunc = std::bind(&ChatLLM::handleSystemPrompt, this, std::placeholders::_1); @@ -1278,7 +1278,7 @@ void ChatLLM::processRestoreStateFromText() emit restoringFromTextChanged(); m_stopGenerating = false; - m_ctx = LLModel::PromptContext(); + m_ctx = ModelBackend::PromptContext(); auto promptFunc = std::bind(&ChatLLM::handleRestoreStateFromTextPrompt, this, std::placeholders::_1); diff --git a/gpt4all-chat/chatllm.h b/gpt4all-chat/chatllm.h index 18ccb897..c721095e 100644 --- a/gpt4all-chat/chatllm.h +++ b/gpt4all-chat/chatllm.h @@ -4,7 +4,7 @@ #include "modellist.h" #include "../gpt4all-backend/llamacpp_backend.h" -#include "../gpt4all-backend/llmodel.h" +#include "../gpt4all-backend/model_backend.h" #include #include @@ -39,14 +39,14 @@ enum LLModelType { }; struct LLModelInfo { - std::unique_ptr model; + std::unique_ptr model; QFileInfo fileInfo; std::optional fallbackReason; // NOTE: This does not store the model type or name on purpose as this is left for ChatLLM which // must be able to serialize the information even if it is in the unloaded state - void resetModel(ChatLLM *cllm, LLModel *model = nullptr); + void resetModel(ChatLLM *cllm, ModelBackend *model = nullptr); }; class TokenTimer : public QObject { @@ -218,7 +218,7 @@ private: bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps); protected: - LLModel::PromptContext m_ctx; + ModelBackend::PromptContext m_ctx; quint32 m_promptTokens; quint32 m_promptResponseTokens; @@ -243,7 +243,7 @@ private: bool m_processedSystemPrompt; bool m_restoreStateFromText; // m_pristineLoadedState is set if saveSate is unnecessary, either because: - // - an unload was queued during LLModel::restoreState() + // - an unload was queued during ModelBackend::restoreState() // - the chat will be restored from text and hasn't been interacted with yet bool m_pristineLoadedState = false; QVector> m_stateFromText; diff --git a/gpt4all-chat/embllm.cpp b/gpt4all-chat/embllm.cpp index 1b3f5e1c..57f5f3a8 100644 --- a/gpt4all-chat/embllm.cpp +++ b/gpt4all-chat/embllm.cpp @@ -193,7 +193,7 @@ std::vector EmbeddingLLMWorker::generateQueryEmbedding(const QString &tex try { m_model->embed({text.toStdString()}, embedding.data(), /*isRetrieval*/ true); } catch (const std::exception &e) { - qWarning() << "WARNING: LLModel::embed failed:" << e.what(); + qWarning() << "WARNING: LlamaCppBackend::embed failed:" << e.what(); return {}; } @@ -287,7 +287,7 @@ void EmbeddingLLMWorker::docEmbeddingsRequested(const QVector &c try { m_model->embed(batchTexts, result.data() + j * m_model->embeddingSize(), /*isRetrieval*/ false); } catch (const std::exception &e) { - qWarning() << "WARNING: LLModel::embed failed:" << e.what(); + qWarning() << "WARNING: LlamaCppBackend::embed failed:" << e.what(); return; } }