#include "index.h" #include "napi.h" Napi::Function NodeModelWrapper::GetClass(Napi::Env env) { Napi::Function self = DefineClass(env, "LLModel", {InstanceMethod("type", &NodeModelWrapper::GetType), InstanceMethod("isModelLoaded", &NodeModelWrapper::IsModelLoaded), InstanceMethod("name", &NodeModelWrapper::GetName), InstanceMethod("stateSize", &NodeModelWrapper::StateSize), InstanceMethod("infer", &NodeModelWrapper::Infer), InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount), InstanceMethod("embed", &NodeModelWrapper::GenerateEmbedding), InstanceMethod("threadCount", &NodeModelWrapper::ThreadCount), InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath), InstanceMethod("initGpuByString", &NodeModelWrapper::InitGpuByString), InstanceMethod("hasGpuDevice", &NodeModelWrapper::HasGpuDevice), InstanceMethod("listGpu", &NodeModelWrapper::GetGpuDevices), InstanceMethod("memoryNeeded", &NodeModelWrapper::GetRequiredMemory), InstanceMethod("dispose", &NodeModelWrapper::Dispose)}); // Keep a static reference to the constructor // Napi::FunctionReference *constructor = new Napi::FunctionReference(); *constructor = Napi::Persistent(self); env.SetInstanceData(constructor); return self; } Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo &info) { auto env = info.Env(); return Napi::Number::New( env, static_cast(llmodel_required_mem(GetInference(), full_model_path.c_str(), nCtx, nGpuLayers))); } Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo &info) { auto env = info.Env(); int num_devices = 0; auto mem_size = llmodel_required_mem(GetInference(), full_model_path.c_str(), nCtx, nGpuLayers); llmodel_gpu_device *all_devices = llmodel_available_gpu_devices(mem_size, &num_devices); if (all_devices == nullptr) { Napi::Error::New(env, "Unable to retrieve list of all GPU devices").ThrowAsJavaScriptException(); return env.Undefined(); } auto js_array = Napi::Array::New(env, num_devices); for (int i = 0; i < num_devices; ++i) { auto gpu_device = all_devices[i]; /* * * struct llmodel_gpu_device { int index = 0; int type = 0; // same as VkPhysicalDeviceType size_t heapSize = 0; const char * name; const char * vendor; }; * */ Napi::Object js_gpu_device = Napi::Object::New(env); js_gpu_device["index"] = uint32_t(gpu_device.index); js_gpu_device["type"] = uint32_t(gpu_device.type); js_gpu_device["heapSize"] = static_cast(gpu_device.heapSize); js_gpu_device["name"] = gpu_device.name; js_gpu_device["vendor"] = gpu_device.vendor; js_array[i] = js_gpu_device; } return js_array; } Napi::Value NodeModelWrapper::GetType(const Napi::CallbackInfo &info) { if (type.empty()) { return info.Env().Undefined(); } return Napi::String::New(info.Env(), type); } Napi::Value NodeModelWrapper::InitGpuByString(const Napi::CallbackInfo &info) { auto env = info.Env(); size_t memory_required = static_cast(info[0].As().Uint32Value()); std::string gpu_device_identifier = info[1].As(); size_t converted_value; if (memory_required <= std::numeric_limits::max()) { converted_value = static_cast(memory_required); } else { Napi::Error::New(env, "invalid number for memory size. Exceeded bounds for memory.") .ThrowAsJavaScriptException(); return env.Undefined(); } auto result = llmodel_gpu_init_gpu_device_by_string(GetInference(), converted_value, gpu_device_identifier.c_str()); return Napi::Boolean::New(env, result); } Napi::Value NodeModelWrapper::HasGpuDevice(const Napi::CallbackInfo &info) { return Napi::Boolean::New(info.Env(), llmodel_has_gpu_device(GetInference())); } NodeModelWrapper::NodeModelWrapper(const Napi::CallbackInfo &info) : Napi::ObjectWrap(info) { auto env = info.Env(); auto config_object = info[0].As(); // sets the directory where models (gguf files) are to be searched llmodel_set_implementation_search_path( config_object.Has("library_path") ? config_object.Get("library_path").As().Utf8Value().c_str() : "."); std::string model_name = config_object.Get("model_name").As(); fs::path model_path = config_object.Get("model_path").As().Utf8Value(); std::string full_weight_path = (model_path / fs::path(model_name)).string(); name = model_name.empty() ? model_path.filename().string() : model_name; full_model_path = full_weight_path; nCtx = config_object.Get("nCtx").As().Int32Value(); nGpuLayers = config_object.Get("ngl").As().Int32Value(); const char *e; inference_ = llmodel_model_create2(full_weight_path.c_str(), "auto", &e); if (!inference_) { Napi::Error::New(env, e).ThrowAsJavaScriptException(); return; } if (GetInference() == nullptr) { std::cerr << "Tried searching libraries in \"" << llmodel_get_implementation_search_path() << "\"" << std::endl; std::cerr << "Tried searching for model weight in \"" << full_weight_path << "\"" << std::endl; std::cerr << "Do you have runtime libraries installed?" << std::endl; Napi::Error::New(env, "Had an issue creating llmodel object, inference is null").ThrowAsJavaScriptException(); return; } std::string device = config_object.Get("device").As(); if (device != "cpu") { size_t mem = llmodel_required_mem(GetInference(), full_weight_path.c_str(), nCtx, nGpuLayers); auto success = llmodel_gpu_init_gpu_device_by_string(GetInference(), mem, device.c_str()); if (!success) { // https://github.com/nomic-ai/gpt4all/blob/3acbef14b7c2436fe033cae9036e695d77461a16/gpt4all-bindings/python/gpt4all/pyllmodel.py#L215 // Haven't implemented this but it is still open to contribution std::cout << "WARNING: Failed to init GPU\n"; } } auto success = llmodel_loadModel(GetInference(), full_weight_path.c_str(), nCtx, nGpuLayers); if (!success) { Napi::Error::New(env, "Failed to load model at given path").ThrowAsJavaScriptException(); return; } // optional if (config_object.Has("model_type")) { type = config_object.Get("model_type").As(); } }; // NodeModelWrapper::~NodeModelWrapper() { // if(GetInference() != nullptr) { // std::cout << "Debug: deleting model\n"; // llmodel_model_destroy(inference_); // std::cout << (inference_ == nullptr); // } // } // void NodeModelWrapper::Finalize(Napi::Env env) { // if(inference_ != nullptr) { // std::cout << "Debug: deleting model\n"; // // } // } Napi::Value NodeModelWrapper::IsModelLoaded(const Napi::CallbackInfo &info) { return Napi::Boolean::New(info.Env(), llmodel_isModelLoaded(GetInference())); } Napi::Value NodeModelWrapper::StateSize(const Napi::CallbackInfo &info) { // Implement the binding for the stateSize method return Napi::Number::New(info.Env(), static_cast(llmodel_get_state_size(GetInference()))); } Napi::Array ChunkedFloatPtr(float *embedding_ptr, int embedding_size, int text_len, Napi::Env const &env) { auto n_embd = embedding_size / text_len; // std::cout << "Embedding size: " << embedding_size << std::endl; // std::cout << "Text length: " << text_len << std::endl; // std::cout << "Chunk size (n_embd): " << n_embd << std::endl; Napi::Array result = Napi::Array::New(env, text_len); auto count = 0; for (int i = 0; i < embedding_size; i += n_embd) { int end = std::min(i + n_embd, embedding_size); // possible bounds error? // Constructs a container with as many elements as the range [first,last), with each element emplace-constructed // from its corresponding element in that range, in the same order. std::vector chunk(embedding_ptr + i, embedding_ptr + end); Napi::Float32Array fltarr = Napi::Float32Array::New(env, chunk.size()); // I know there's a way to emplace the raw float ptr into a Napi::Float32Array but idk how and // im too scared to cause memory issues // this is goodenough for (int j = 0; j < chunk.size(); j++) { fltarr.Set(j, chunk[j]); } result.Set(count++, fltarr); } return result; } Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo &info) { auto env = info.Env(); auto prefix = info[1]; auto dimensionality = info[2].As().Int32Value(); auto do_mean = info[3].As().Value(); auto atlas = info[4].As().Value(); size_t embedding_size; size_t token_count = 0; // This procedure can maybe be optimized but its whatever, i have too many intermediary structures std::vector text_arr; bool is_single_text = false; if (info[0].IsString()) { is_single_text = true; text_arr.push_back(info[0].As().Utf8Value()); } else { auto jsarr = info[0].As(); size_t len = jsarr.Length(); text_arr.reserve(len); for (size_t i = 0; i < len; ++i) { std::string str = jsarr.Get(i).As().Utf8Value(); text_arr.push_back(str); } } std::vector str_ptrs; str_ptrs.reserve(text_arr.size() + 1); for (size_t i = 0; i < text_arr.size(); ++i) str_ptrs.push_back(text_arr[i].c_str()); str_ptrs.push_back(nullptr); const char *_err = nullptr; float *embeds = llmodel_embed(GetInference(), str_ptrs.data(), &embedding_size, prefix.IsUndefined() ? nullptr : prefix.As().Utf8Value().c_str(), dimensionality, &token_count, do_mean, atlas, nullptr, &_err); if (!embeds) { // i dont wanna deal with c strings lol std::string err(_err); Napi::Error::New(env, err == "(unknown error)" ? "Unknown error: sorry bud" : err).ThrowAsJavaScriptException(); return env.Undefined(); } auto embedmat = ChunkedFloatPtr(embeds, embedding_size, text_arr.size(), env); llmodel_free_embedding(embeds); auto res = Napi::Object::New(env); res.Set("n_prompt_tokens", token_count); if(is_single_text) { res.Set("embeddings", embedmat.Get(static_cast(0))); } else { res.Set("embeddings", embedmat); } return res; } /** * Generate a response using the model. * @param prompt A string representing the input prompt. * @param options Inference options. */ Napi::Value NodeModelWrapper::Infer(const Napi::CallbackInfo &info) { auto env = info.Env(); std::string prompt; if (info[0].IsString()) { prompt = info[0].As().Utf8Value(); } else { Napi::Error::New(info.Env(), "invalid string argument").ThrowAsJavaScriptException(); return info.Env().Undefined(); } if (!info[1].IsObject()) { Napi::Error::New(info.Env(), "Missing Prompt Options").ThrowAsJavaScriptException(); return info.Env().Undefined(); } // defaults copied from python bindings llmodel_prompt_context promptContext = {.logits = nullptr, .tokens = nullptr, .n_past = 0, .n_ctx = nCtx, .n_predict = 4096, .top_k = 40, .top_p = 0.9f, .min_p = 0.0f, .temp = 0.1f, .n_batch = 8, .repeat_penalty = 1.2f, .repeat_last_n = 10, .context_erase = 0.75}; PromptWorkerConfig promptWorkerConfig; auto inputObject = info[1].As(); if (inputObject.Has("logits") || inputObject.Has("tokens")) { Napi::Error::New(info.Env(), "Invalid input: 'logits' or 'tokens' properties are not allowed") .ThrowAsJavaScriptException(); return info.Env().Undefined(); } // Assign the remaining properties if (inputObject.Has("nPast") && inputObject.Get("nPast").IsNumber()) { promptContext.n_past = inputObject.Get("nPast").As().Int32Value(); } if (inputObject.Has("nPredict") && inputObject.Get("nPredict").IsNumber()) { promptContext.n_predict = inputObject.Get("nPredict").As().Int32Value(); } if (inputObject.Has("topK") && inputObject.Get("topK").IsNumber()) { promptContext.top_k = inputObject.Get("topK").As().Int32Value(); } if (inputObject.Has("topP") && inputObject.Get("topP").IsNumber()) { promptContext.top_p = inputObject.Get("topP").As().FloatValue(); } if (inputObject.Has("minP") && inputObject.Get("minP").IsNumber()) { promptContext.min_p = inputObject.Get("minP").As().FloatValue(); } if (inputObject.Has("temp") && inputObject.Get("temp").IsNumber()) { promptContext.temp = inputObject.Get("temp").As().FloatValue(); } if (inputObject.Has("nBatch") && inputObject.Get("nBatch").IsNumber()) { promptContext.n_batch = inputObject.Get("nBatch").As().Int32Value(); } if (inputObject.Has("repeatPenalty") && inputObject.Get("repeatPenalty").IsNumber()) { promptContext.repeat_penalty = inputObject.Get("repeatPenalty").As().FloatValue(); } if (inputObject.Has("repeatLastN") && inputObject.Get("repeatLastN").IsNumber()) { promptContext.repeat_last_n = inputObject.Get("repeatLastN").As().Int32Value(); } if (inputObject.Has("contextErase") && inputObject.Get("contextErase").IsNumber()) { promptContext.context_erase = inputObject.Get("contextErase").As().FloatValue(); } if (inputObject.Has("onPromptToken") && inputObject.Get("onPromptToken").IsFunction()) { promptWorkerConfig.promptCallback = inputObject.Get("onPromptToken").As(); promptWorkerConfig.hasPromptCallback = true; } if (inputObject.Has("onResponseToken") && inputObject.Get("onResponseToken").IsFunction()) { promptWorkerConfig.responseCallback = inputObject.Get("onResponseToken").As(); promptWorkerConfig.hasResponseCallback = true; } // copy to protect llmodel resources when splitting to new thread // llmodel_prompt_context copiedPrompt = promptContext; promptWorkerConfig.context = promptContext; promptWorkerConfig.model = GetInference(); promptWorkerConfig.mutex = &inference_mutex; promptWorkerConfig.prompt = prompt; promptWorkerConfig.result = ""; promptWorkerConfig.promptTemplate = inputObject.Get("promptTemplate").As(); if (inputObject.Has("special")) { promptWorkerConfig.special = inputObject.Get("special").As(); } if (inputObject.Has("fakeReply")) { // this will be deleted in the worker promptWorkerConfig.fakeReply = new std::string(inputObject.Get("fakeReply").As().Utf8Value()); } auto worker = new PromptWorker(env, promptWorkerConfig); worker->Queue(); return worker->GetPromise(); } void NodeModelWrapper::Dispose(const Napi::CallbackInfo &info) { llmodel_model_destroy(inference_); } void NodeModelWrapper::SetThreadCount(const Napi::CallbackInfo &info) { if (info[0].IsNumber()) { llmodel_setThreadCount(GetInference(), info[0].As().Int64Value()); } else { Napi::Error::New(info.Env(), "Could not set thread count: argument 1 is NaN").ThrowAsJavaScriptException(); return; } } Napi::Value NodeModelWrapper::GetName(const Napi::CallbackInfo &info) { return Napi::String::New(info.Env(), name); } Napi::Value NodeModelWrapper::ThreadCount(const Napi::CallbackInfo &info) { return Napi::Number::New(info.Env(), llmodel_threadCount(GetInference())); } Napi::Value NodeModelWrapper::GetLibraryPath(const Napi::CallbackInfo &info) { return Napi::String::New(info.Env(), llmodel_get_implementation_search_path()); } llmodel_model NodeModelWrapper::GetInference() { return inference_; } // Exports Bindings Napi::Object Init(Napi::Env env, Napi::Object exports) { exports["LLModel"] = NodeModelWrapper::GetClass(env); return exports; } NODE_API_MODULE(NODE_GYP_MODULE_NAME, Init)