From 9808be5e73268afb5f59879ae9ed4c230c237f5b Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 6 Aug 2024 16:47:58 -0400 Subject: [PATCH] rename LLamaModel to LlamaCppBackendImpl Signed-off-by: Jared Van Bortel --- .gitmodules | 2 +- gpt4all-backend/CMakeLists.txt | 11 ++- .../{llama.cpp-mainline => llama.cpp} | 0 ...amamodel.cpp => llamacpp_backend_impl.cpp} | 76 +++++++++---------- ...mamodel_impl.h => llamacpp_backend_impl.h} | 20 ++--- gpt4all-backend/llmodel.cpp | 7 +- gpt4all-chat/CMakeLists.txt | 16 ++-- 7 files changed, 67 insertions(+), 65 deletions(-) rename gpt4all-backend/{llama.cpp-mainline => llama.cpp} (100%) rename gpt4all-backend/{llamamodel.cpp => llamacpp_backend_impl.cpp} (94%) rename gpt4all-backend/{llamamodel_impl.h => llamacpp_backend_impl.h} (83%) diff --git a/.gitmodules b/.gitmodules index 98c9a214..1ac9606d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,5 +1,5 @@ [submodule "llama.cpp-mainline"] - path = gpt4all-backend/llama.cpp-mainline + path = gpt4all-backend/llama.cpp url = https://github.com/nomic-ai/llama.cpp.git branch = master [submodule "gpt4all-chat/usearch"] diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index e6210d74..aaa5e210 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -47,7 +47,7 @@ else() message(STATUS "Interprocedural optimization support detected") endif() -set(DIRECTORY llama.cpp-mainline) +set(DIRECTORY llama.cpp) include(llama.cpp.cmake) set(BUILD_VARIANTS) @@ -108,7 +108,7 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS) endif() # Include GGML - include_ggml(-mainline-${BUILD_VARIANT}) + include_ggml(-${BUILD_VARIANT}) # Function for preparing individual implementations function(prepare_target TARGET_NAME BASE_LIB) @@ -127,11 +127,10 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS) endfunction() # Add each individual implementations - add_library(llamamodel-mainline-${BUILD_VARIANT} SHARED - llamamodel.cpp llmodel_shared.cpp) - target_compile_definitions(llamamodel-mainline-${BUILD_VARIANT} PRIVATE + add_library(llamacpp-${BUILD_VARIANT} SHARED llamacpp_backend_impl.cpp) + target_compile_definitions(llamacpp-${BUILD_VARIANT} PRIVATE LLAMA_VERSIONS=>=3 LLAMA_DATE=999999) - prepare_target(llamamodel-mainline llama-mainline) + prepare_target(llamacpp llama) if (NOT PROJECT_IS_TOP_LEVEL AND BUILD_VARIANT STREQUAL cuda) set(CUDAToolkit_BIN_DIR ${CUDAToolkit_BIN_DIR} PARENT_SCOPE) diff --git a/gpt4all-backend/llama.cpp-mainline b/gpt4all-backend/llama.cpp similarity index 100% rename from gpt4all-backend/llama.cpp-mainline rename to gpt4all-backend/llama.cpp diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamacpp_backend_impl.cpp similarity index 94% rename from gpt4all-backend/llamamodel.cpp rename to gpt4all-backend/llamacpp_backend_impl.cpp index f07a05e8..aece51c6 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamacpp_backend_impl.cpp @@ -1,5 +1,5 @@ -#define LLAMAMODEL_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE -#include "llamamodel_impl.h" +#define LLAMACPP_BACKEND_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE +#include "llamacpp_backend_impl.h" #include "llmodel.h" @@ -232,7 +232,7 @@ cleanup: return value; } -struct LLamaPrivate { +struct LlamaPrivate { const std::string modelPath; bool modelLoaded = false; int device = -1; @@ -246,8 +246,8 @@ struct LLamaPrivate { const char *backend_name = nullptr; }; -LLamaModel::LLamaModel() - : d_ptr(new LLamaPrivate) {} +LlamaCppBackendImpl::LlamaCppBackendImpl() + : d_ptr(new LlamaPrivate) {} // default hparams (LLaMA 7B) struct llama_file_hparams { @@ -260,7 +260,7 @@ struct llama_file_hparams { enum llama_ftype ftype = LLAMA_FTYPE_MOSTLY_F16; }; -size_t LLamaModel::requiredMem(const std::string &modelPath, int n_ctx, int ngl) +size_t LlamaCppBackendImpl::requiredMem(const std::string &modelPath, int n_ctx, int ngl) { // TODO(cebtenzzre): update to GGUF (void)ngl; // FIXME(cetenzzre): use this value @@ -285,7 +285,7 @@ size_t LLamaModel::requiredMem(const std::string &modelPath, int n_ctx, int ngl) return filesize + est_kvcache_size; } -bool LLamaModel::isModelBlacklisted(const std::string &modelPath) const +bool LlamaCppBackendImpl::isModelBlacklisted(const std::string &modelPath) const { auto * ctx = load_gguf(modelPath.c_str()); if (!ctx) { @@ -322,7 +322,7 @@ bool LLamaModel::isModelBlacklisted(const std::string &modelPath) const return res; } -bool LLamaModel::isEmbeddingModel(const std::string &modelPath) const +bool LlamaCppBackendImpl::isEmbeddingModel(const std::string &modelPath) const { bool result = false; std::string arch; @@ -346,7 +346,7 @@ cleanup: return result; } -bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl) +bool LlamaCppBackendImpl::loadModel(const std::string &modelPath, int n_ctx, int ngl) { d_ptr->modelLoaded = false; @@ -488,18 +488,18 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl) return true; } -void LLamaModel::setThreadCount(int32_t n_threads) +void LlamaCppBackendImpl::setThreadCount(int32_t n_threads) { d_ptr->n_threads = n_threads; llama_set_n_threads(d_ptr->ctx, n_threads, n_threads); } -int32_t LLamaModel::threadCount() const +int32_t LlamaCppBackendImpl::threadCount() const { return d_ptr->n_threads; } -LLamaModel::~LLamaModel() +LlamaCppBackendImpl::~LlamaCppBackendImpl() { if (d_ptr->ctx) { llama_free(d_ptr->ctx); @@ -507,28 +507,28 @@ LLamaModel::~LLamaModel() llama_free_model(d_ptr->model); } -bool LLamaModel::isModelLoaded() const +bool LlamaCppBackendImpl::isModelLoaded() const { return d_ptr->modelLoaded; } -size_t LLamaModel::stateSize() const +size_t LlamaCppBackendImpl::stateSize() const { return llama_get_state_size(d_ptr->ctx); } -size_t LLamaModel::saveState(uint8_t *dest) const +size_t LlamaCppBackendImpl::saveState(uint8_t *dest) const { return llama_copy_state_data(d_ptr->ctx, dest); } -size_t LLamaModel::restoreState(const uint8_t *src) +size_t LlamaCppBackendImpl::restoreState(const uint8_t *src) { // const_cast is required, see: https://github.com/ggerganov/llama.cpp/pull/1540 return llama_set_state_data(d_ptr->ctx, const_cast(src)); } -std::vector LLamaModel::tokenize(PromptContext &ctx, const std::string &str, bool special) +std::vector LlamaCppBackendImpl::tokenize(PromptContext &ctx, const std::string &str, bool special) { bool atStart = m_tokenize_last_token == -1; bool insertSpace = atStart || isSpecialToken(m_tokenize_last_token); @@ -543,13 +543,13 @@ std::vector LLamaModel::tokenize(PromptContext &ctx, const std:: return fres; } -bool LLamaModel::isSpecialToken(Token id) const +bool LlamaCppBackendImpl::isSpecialToken(Token id) const { return llama_token_get_attr(d_ptr->model, id) & (LLAMA_TOKEN_ATTR_CONTROL | LLAMA_TOKEN_ATTR_USER_DEFINED | LLAMA_TOKEN_ATTR_UNKNOWN); } -std::string LLamaModel::tokenToString(Token id) const +std::string LlamaCppBackendImpl::tokenToString(Token id) const { std::vector result(8, 0); const int n_tokens = llama_token_to_piece(d_ptr->model, id, result.data(), result.size(), 0, true); @@ -565,7 +565,7 @@ std::string LLamaModel::tokenToString(Token id) const return std::string(result.data(), result.size()); } -LLModel::Token LLamaModel::sampleToken(PromptContext &promptCtx) const +LLModel::Token LlamaCppBackendImpl::sampleToken(PromptContext &promptCtx) const { const size_t n_prev_toks = std::min((size_t) promptCtx.repeat_last_n, promptCtx.tokens.size()); return llama_sample_top_p_top_k(d_ptr->ctx, @@ -574,7 +574,7 @@ LLModel::Token LLamaModel::sampleToken(PromptContext &promptCtx) const promptCtx.repeat_penalty); } -bool LLamaModel::evalTokens(PromptContext &ctx, const std::vector &tokens) const +bool LlamaCppBackendImpl::evalTokens(PromptContext &ctx, const std::vector &tokens) const { llama_kv_cache_seq_rm(d_ptr->ctx, 0, ctx.n_past, -1); @@ -598,7 +598,7 @@ bool LLamaModel::evalTokens(PromptContext &ctx, const std::vector &toke return res == 0; } -void LLamaModel::shiftContext(PromptContext &promptCtx) +void LlamaCppBackendImpl::shiftContext(PromptContext &promptCtx) { // infinite text generation via context shifting @@ -622,27 +622,27 @@ void LLamaModel::shiftContext(PromptContext &promptCtx) promptCtx.n_past = promptCtx.tokens.size(); } -int32_t LLamaModel::contextLength() const +int32_t LlamaCppBackendImpl::contextLength() const { return llama_n_ctx(d_ptr->ctx); } -const std::vector &LLamaModel::endTokens() const +const std::vector &LlamaCppBackendImpl::endTokens() const { return d_ptr->end_tokens; } -bool LLamaModel::shouldAddBOS() const +bool LlamaCppBackendImpl::shouldAddBOS() const { return llama_add_bos_token(d_ptr->model); } -int32_t LLamaModel::maxContextLength(std::string const &modelPath) const +int32_t LlamaCppBackendImpl::maxContextLength(std::string const &modelPath) const { return get_arch_key_u32(modelPath, "context_length"); } -int32_t LLamaModel::layerCount(std::string const &modelPath) const +int32_t LlamaCppBackendImpl::layerCount(std::string const &modelPath) const { return get_arch_key_u32(modelPath, "block_count"); } @@ -659,7 +659,7 @@ static const char *getVulkanVendorName(uint32_t vendorID) } #endif -std::vector LLamaModel::availableGPUDevices(size_t memoryRequired) const +std::vector LlamaCppBackendImpl::availableGPUDevices(size_t memoryRequired) const { #if defined(GGML_USE_KOMPUTE) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA) size_t count = 0; @@ -724,7 +724,7 @@ std::vector LLamaModel::availableGPUDevices(size_t memoryReq return {}; } -bool LLamaModel::initializeGPUDevice(size_t memoryRequired, const std::string &name) const +bool LlamaCppBackendImpl::initializeGPUDevice(size_t memoryRequired, const std::string &name) const { #if defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA) auto devices = availableGPUDevices(memoryRequired); @@ -761,7 +761,7 @@ bool LLamaModel::initializeGPUDevice(size_t memoryRequired, const std::string &n return false; } -bool LLamaModel::initializeGPUDevice(int device, std::string *unavail_reason) const +bool LlamaCppBackendImpl::initializeGPUDevice(int device, std::string *unavail_reason) const { #if defined(GGML_USE_KOMPUTE) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA) (void)unavail_reason; @@ -779,7 +779,7 @@ bool LLamaModel::initializeGPUDevice(int device, std::string *unavail_reason) co #endif } -bool LLamaModel::usingGPUDevice() const +bool LlamaCppBackendImpl::usingGPUDevice() const { if (!d_ptr->model) return false; @@ -791,12 +791,12 @@ bool LLamaModel::usingGPUDevice() const return usingGPU; } -const char *LLamaModel::backendName() const +const char *LlamaCppBackendImpl::backendName() const { return d_ptr->backend_name; } -const char *LLamaModel::gpuDeviceName() const +const char *LlamaCppBackendImpl::gpuDeviceName() const { if (usingGPUDevice()) { #if defined(GGML_USE_KOMPUTE) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA) @@ -832,7 +832,7 @@ static void batch_add_seq(llama_batch &batch, const std::vector } } -size_t LLamaModel::embeddingSize() const +size_t LlamaCppBackendImpl::embeddingSize() const { return llama_n_embd(d_ptr->model); } @@ -895,7 +895,7 @@ static const EmbModelSpec *getEmbedSpec(const std::string &modelName) { return it < std::end(specs) ? &it->spec : nullptr; } -void LLamaModel::embed( +void LlamaCppBackendImpl::embed( const std::vector &texts, float *embeddings, bool isRetrieval, int dimensionality, size_t *tokenCount, bool doMean, bool atlas ) { @@ -907,7 +907,7 @@ void LLamaModel::embed( embed(texts, embeddings, prefix, dimensionality, tokenCount, doMean, atlas); } -void LLamaModel::embed( +void LlamaCppBackendImpl::embed( const std::vector &texts, float *embeddings, std::optional prefix, int dimensionality, size_t *tokenCount, bool doMean, bool atlas, LLModel::EmbedCancelCallback *cancelCb ) { @@ -965,7 +965,7 @@ double getL2NormScale(T *start, T *end) return 1.0 / std::max(magnitude, 1e-12); } -void LLamaModel::embedInternal( +void LlamaCppBackendImpl::embedInternal( const std::vector &texts, float *embeddings, std::string prefix, int dimensionality, size_t *tokenCount, bool doMean, bool atlas, LLModel::EmbedCancelCallback *cancelCb, const EmbModelSpec *spec ) { @@ -1223,6 +1223,6 @@ DLL_EXPORT LLModel *construct() #ifdef GGML_USE_CUDA ggml_backend_cuda_log_set_callback(cuda_log_callback, nullptr); #endif - return new LLamaModel; + return new LlamaCppBackendImpl; } } diff --git a/gpt4all-backend/llamamodel_impl.h b/gpt4all-backend/llamacpp_backend_impl.h similarity index 83% rename from gpt4all-backend/llamamodel_impl.h rename to gpt4all-backend/llamacpp_backend_impl.h index 7c698ffa..5923572f 100644 --- a/gpt4all-backend/llamamodel_impl.h +++ b/gpt4all-backend/llamacpp_backend_impl.h @@ -1,8 +1,8 @@ -#ifndef LLAMAMODEL_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE -#error This file is NOT meant to be included outside of llamamodel.cpp. Doing so is DANGEROUS. Be sure to know what you are doing before proceeding to #define LLAMAMODEL_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE +#ifndef LLAMACPP_BACKEND_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE +#error This file is NOT meant to be included outside of llamacpp_backend_impl.cpp. Doing so is DANGEROUS. Be sure to know what you are doing before proceeding to #define LLAMACPP_BACKEND_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE #endif -#ifndef LLAMAMODEL_H -#define LLAMAMODEL_H +#ifndef LLAMACPP_BACKEND_IMPL_H +#define LLAMACPP_BACKEND_IMPL_H #include "llmodel.h" @@ -10,13 +10,13 @@ #include #include -struct LLamaPrivate; +struct LlamaPrivate; struct EmbModelSpec; -class LLamaModel : public LLModel { +class LlamaCppBackendImpl : public LLModel { public: - LLamaModel(); - ~LLamaModel(); + LlamaCppBackendImpl(); + ~LlamaCppBackendImpl(); bool supportsEmbedding() const override { return m_supportsEmbedding; } bool supportsCompletion() const override { return m_supportsCompletion; } @@ -47,7 +47,7 @@ public: size_t *tokenCount = nullptr, bool doMean = true, bool atlas = false) override; private: - std::unique_ptr d_ptr; + std::unique_ptr d_ptr; bool m_supportsEmbedding = false; bool m_supportsCompletion = false; @@ -69,4 +69,4 @@ protected: const EmbModelSpec *spec); }; -#endif // LLAMAMODEL_H +#endif // LLAMACPP_BACKEND_IMPL_H diff --git a/gpt4all-backend/llmodel.cpp b/gpt4all-backend/llmodel.cpp index 1acf0642..7b18004a 100644 --- a/gpt4all-backend/llmodel.cpp +++ b/gpt4all-backend/llmodel.cpp @@ -130,7 +130,7 @@ const std::vector &LLModel::Implementation::implementat addCudaSearchPath(); - std::string impl_name_re = "llamamodel-mainline-(cpu|metal|kompute|vulkan|cuda)"; + std::string impl_name_re = "llamacpp-(cpu|metal|kompute|vulkan|cuda)"; if (cpu_supports_avx2() == 0) { impl_name_re += "-avxonly"; } @@ -146,7 +146,10 @@ const std::vector &LLModel::Implementation::implementat const fs::path &p = f.path(); if (p.extension() != LIB_FILE_EXT) continue; - if (!std::regex_search(p.stem().string(), re)) continue; + if (!std::regex_search(p.stem().string(), re)) { + std::cerr << "did not match regex: " << p.stem().string() << "\n"; + continue; + } // Add to list if model implementation Dlhandle dl; diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index 07acef15..bada68c3 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -326,18 +326,18 @@ install( # to the this component's dir for the finicky qt installer to work if (LLMODEL_KOMPUTE) set(MODEL_IMPL_TARGETS - llamamodel-mainline-kompute - llamamodel-mainline-kompute-avxonly + llamacpp-kompute + llamacpp-kompute-avxonly ) else() set(MODEL_IMPL_TARGETS - llamamodel-mainline-cpu - llamamodel-mainline-cpu-avxonly + llamacpp-cpu + llamacpp-cpu-avxonly ) endif() if (APPLE) - list(APPEND MODEL_IMPL_TARGETS llamamodel-mainline-metal) + list(APPEND MODEL_IMPL_TARGETS llamacpp-metal) endif() install( @@ -365,12 +365,12 @@ if(WIN32 AND GPT4ALL_SIGN_INSTALL) endif() if (LLMODEL_CUDA) - set_property(TARGET llamamodel-mainline-cuda llamamodel-mainline-cuda-avxonly + set_property(TARGET llamacpp-cuda llamacpp-cuda-avxonly APPEND PROPERTY INSTALL_RPATH "$ORIGIN") install( - TARGETS llamamodel-mainline-cuda - llamamodel-mainline-cuda-avxonly + TARGETS llamacpp-cuda + llamacpp-cuda-avxonly RUNTIME_DEPENDENCY_SET llama-cuda-deps LIBRARY DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .so/.dylib RUNTIME DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .dll