fix/macm1ts (#1746)

* make runtime library backend universal searchable * corepack enable * fix * pass tests * simpler * add more jsdoc * fix testS * fix up circle ci * bump version * remove false positive warning * add disclaimer * update readme * revert * update ts docs --------- Co-authored-by: Matthew Nguyen <matthewpnguyen@Matthews-MacBook-Pro-7.local>
2024-10-01 01:06:10 -04:00 · 2023-12-15 12:44:39 -06:00 · 2023-12-15 12:44:39 -06:00 · a1f27072c2
commit a1f27072c2
parent 3acbef14b7
10 changed files with 166 additions and 653 deletions
--- a/.circleci/continue_config.yml
+++ b/.circleci/continue_config.yml
@ -854,6 +854,7 @@ jobs:
          install-yarn: true
          node-version: "18.16"
      - run: node --version
+      - run: corepack enable
      - node/install-packages:
          app-dir: gpt4all-bindings/typescript
          pkg-manager: yarn
@ -884,6 +885,7 @@ jobs:
          install-yarn: true
          node-version: "18.16"
      - run: node --version
+      - run: corepack enable
      - node/install-packages:
          app-dir: gpt4all-bindings/typescript
          pkg-manager: yarn
@ -896,14 +898,14 @@ jobs:
          name: "Persisting all necessary things to workspace"
          command: |  
            mkdir -p gpt4all-backend/prebuilds/darwin-x64
-            mkdir -p gpt4all-backend/runtimes/darwin-x64
-            cp /tmp/gpt4all-backend/runtimes/osx-x64/*-*.* gpt4all-backend/runtimes/darwin-x64
+            mkdir -p gpt4all-backend/runtimes/darwin
+            cp /tmp/gpt4all-backend/runtimes/osx-x64/*-*.* gpt4all-backend/runtimes/darwin
            cp gpt4all-bindings/typescript/prebuilds/darwin-x64/*.node gpt4all-backend/prebuilds/darwin-x64
      - persist_to_workspace:
          root: gpt4all-backend
          paths:
            - prebuilds/darwin-x64/*.node
-            - runtimes/darwin-x64/*-*.*
+            - runtimes/darwin/*-*.*

  build-nodejs-windows: 
    executor:
@ -925,6 +927,7 @@ jobs:
              nvm install 18.16.0
              nvm use 18.16.0
      - run: node --version 
+      - run: corepack enable
      - run:           
          command: |
            npm install -g yarn
@ -958,6 +961,7 @@ jobs:
          install-yarn: true
          node-version: "18.16"
      - run: node --version
+      - run: corepack enable
      - run: 
          command: |
            cd gpt4all-bindings/typescript
@ -972,9 +976,12 @@ jobs:
            cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/ 
            cp /tmp/gpt4all-backend/prebuilds/linux-x64/*.node prebuilds/linux-x64/    

-            mkdir -p runtimes/darwin-x64/native
+            # darwin has univeral runtime libraries
+            mkdir -p runtimes/darwin/native
            mkdir -p prebuilds/darwin-x64/
-            cp /tmp/gpt4all-backend/runtimes/darwin-x64/*-*.* runtimes/darwin-x64/native/
+
+            cp /tmp/gpt4all-backend/runtimes/darwin/*-*.* runtimes/darwin/native/
+
            cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/    
            
            # Fallback build if user is not on above prebuilds
--- a/gpt4all-bindings/python/docs/gpt4all_typescript.md
+++ b/gpt4all-bindings/python/docs/gpt4all_typescript.md
@ -1,11 +1,14 @@
 # GPT4All Node.js API

+Native Node.js LLM bindings for all.
+
 ```sh
-yarn add gpt4all@alpha
+yarn add gpt4all@latest

-npm install gpt4all@alpha
+npm install gpt4all@latest
+
+pnpm install gpt4all@latest

-pnpm install gpt4all@alpha
 ```

 The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-ts) are now out of date.
@ -15,12 +18,12 @@ The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-t
 *   Everything should work out the box.
 *   See [API Reference](#api-reference)

-### Chat Completion (alpha)
+### Chat Completion

 ```js
 import { createCompletion, loadModel } from '../src/gpt4all.js'

-const model = await loadModel('ggml-vicuna-7b-1.1-q4_2', { verbose: true });
+const model = await loadModel('mistral-7b-openorca.Q4_0.gguf', { verbose: true });

 const response = await createCompletion(model, [
    { role : 'system', content: 'You are meant to be annoying and unhelpful.'  },
@ -29,7 +32,7 @@ const response = await createCompletion(model, [

 ```

-### Embedding (alpha)
+### Embedding

 ```js
 import { createEmbedding, loadModel } from '../src/gpt4all.js'
@ -82,8 +85,6 @@ yarn
 git submodule update --init --depth 1 --recursive
 ```

-**AS OF NEW BACKEND** to build the backend,
-
 ```sh
 yarn build:backend
 ```
@ -152,13 +153,16 @@ This package is in active development, and breaking changes may happen until the

 ##### Table of Contents

-*   [ModelType](#modeltype)
 *   [ModelFile](#modelfile)
    *   [gptj](#gptj)
    *   [llama](#llama)
    *   [mpt](#mpt)
    *   [replit](#replit)
 *   [type](#type)
+*   [InferenceModel](#inferencemodel)
+    *   [dispose](#dispose)
+*   [EmbeddingModel](#embeddingmodel)
+    *   [dispose](#dispose-1)
 *   [LLModel](#llmodel)
    *   [constructor](#constructor)
        *   [Parameters](#parameters)
@ -176,12 +180,20 @@ This package is in active development, and breaking changes may happen until the
    *   [setLibraryPath](#setlibrarypath)
        *   [Parameters](#parameters-4)
    *   [getLibraryPath](#getlibrarypath)
+    *   [initGpuByString](#initgpubystring)
+        *   [Parameters](#parameters-5)
+    *   [hasGpuDevice](#hasgpudevice)
+    *   [listGpu](#listgpu)
+    *   [dispose](#dispose-2)
+*   [GpuDevice](#gpudevice)
+    *   [type](#type-2)
+*   [LoadModelOptions](#loadmodeloptions)
 *   [loadModel](#loadmodel)
-    *   [Parameters](#parameters-5)
-*   [createCompletion](#createcompletion)
    *   [Parameters](#parameters-6)
-*   [createEmbedding](#createembedding)
+*   [createCompletion](#createcompletion)
    *   [Parameters](#parameters-7)
+*   [createEmbedding](#createembedding)
+    *   [Parameters](#parameters-8)
 *   [CompletionOptions](#completionoptions)
    *   [verbose](#verbose)
    *   [systemPromptTemplate](#systemprompttemplate)
@ -214,14 +226,14 @@ This package is in active development, and breaking changes may happen until the
    *   [repeatLastN](#repeatlastn)
    *   [contextErase](#contexterase)
 *   [createTokenStream](#createtokenstream)
-    *   [Parameters](#parameters-8)
+    *   [Parameters](#parameters-9)
 *   [DEFAULT\_DIRECTORY](#default_directory)
 *   [DEFAULT\_LIBRARIES\_DIRECTORY](#default_libraries_directory)
 *   [DEFAULT\_MODEL\_CONFIG](#default_model_config)
-*   [DEFAULT\_PROMT\_CONTEXT](#default_promt_context)
+*   [DEFAULT\_PROMPT\_CONTEXT](#default_prompt_context)
 *   [DEFAULT\_MODEL\_LIST\_URL](#default_model_list_url)
 *   [downloadModel](#downloadmodel)
-    *   [Parameters](#parameters-9)
+    *   [Parameters](#parameters-10)
    *   [Examples](#examples)
 *   [DownloadModelOptions](#downloadmodeloptions)
    *   [modelPath](#modelpath)
@ -232,16 +244,10 @@ This package is in active development, and breaking changes may happen until the
    *   [cancel](#cancel)
    *   [promise](#promise)

-#### ModelType
-
-Type of the model
-
-Type: (`"gptj"` | `"llama"` | `"mpt"` | `"replit"`)
-
 #### ModelFile

 Full list of models available
-@deprecated These model names are outdated and this type will not be maintained, please use a string literal instead
+DEPRECATED!! These model names are outdated and this type will not be maintained, please use a string literal instead

 ##### gptj

@ -271,7 +277,27 @@ Type: `"ggml-replit-code-v1-3b.bin"`

 Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.

-Type: [ModelType](#modeltype)
+Type: ModelType
+
+#### InferenceModel
+
+InferenceModel represents an LLM which can make chat predictions, similar to GPT transformers.
+
+##### dispose
+
+delete and cleanup the native model
+
+Returns **void**&#x20;
+
+#### EmbeddingModel
+
+EmbeddingModel represents an LLM which can create embeddings, which are float arrays
+
+##### dispose
+
+delete and cleanup the native model
+
+Returns **void**&#x20;

 #### LLModel

@ -294,7 +320,7 @@ Initialize a new LLModel.

 either 'gpt', mpt', or 'llama' or undefined

-Returns **([ModelType](#modeltype) | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))**&#x20;
+Returns **(ModelType | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))**&#x20;

 ##### name

@ -376,6 +402,52 @@ Where to get the pluggable backend libraries

 Returns **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;

+##### initGpuByString
+
+Initiate a GPU by a string identifier.
+
+###### Parameters
+
+*   `memory_required` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** Should be in the range size\_t or will throw
+*   `device_name` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** 'amd' | 'nvidia' | 'intel' | 'gpu' | gpu name.
+    read LoadModelOptions.device for more information
+
+Returns **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)**&#x20;
+
+##### hasGpuDevice
+
+From C documentation
+
+Returns **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** True if a GPU device is successfully initialized, false otherwise.
+
+##### listGpu
+
+GPUs that are usable for this LLModel
+
+*   Throws **any** if hasGpuDevice returns false (i think)
+
+Returns **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[GpuDevice](#gpudevice)>**&#x20;
+
+##### dispose
+
+delete and cleanup the native model
+
+Returns **void**&#x20;
+
+#### GpuDevice
+
+an object that contains gpu data on this machine.
+
+##### type
+
+same as VkPhysicalDeviceType
+
+Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
+
+#### LoadModelOptions
+
+Options that configure a model's behavior.
+
 #### loadModel

 Loads a machine learning model with the specified name. The defacto way to create a model.
@ -384,9 +456,9 @@ By default this will download a model from the official GPT4ALL website, if a mo
 ##### Parameters

 *   `modelName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The name of the model to load.
-*   `options` **(LoadModelOptions | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))?** (Optional) Additional options for loading the model.
+*   `options` **([LoadModelOptions](#loadmodeloptions) | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))?** (Optional) Additional options for loading the model.

-Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<(InferenceModel | EmbeddingModel)>** A promise that resolves to an instance of the loaded LLModel.
+Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<([InferenceModel](#inferencemodel) | [EmbeddingModel](#embeddingmodel))>** A promise that resolves to an instance of the loaded LLModel.

 #### createCompletion

@ -394,7 +466,7 @@ The nodejs equivalent to python binding's chat\_completion

 ##### Parameters

-*   `model` **InferenceModel** The language model object.
+*   `model` **[InferenceModel](#inferencemodel)** The language model object.
 *   `messages` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[PromptMessage](#promptmessage)>** The array of messages for the conversation.
 *   `options` **[CompletionOptions](#completionoptions)** The options for creating the completion.

@ -407,7 +479,7 @@ meow

 ##### Parameters

-*   `model` **EmbeddingModel** The language model object.
+*   `model` **[EmbeddingModel](#embeddingmodel)** The language model object.
 *   `text` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** text to embed

 Returns **[Float32Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Float32Array)** The completion result.
@ -652,7 +724,7 @@ Default model configuration.

 Type: ModelConfig

-#### DEFAULT\_PROMT\_CONTEXT
+#### DEFAULT\_PROMPT\_CONTEXT

 Default prompt context.

--- a/gpt4all-bindings/typescript/README.md
+++ b/gpt4all-bindings/typescript/README.md
@ -1,11 +1,14 @@
 # GPT4All Node.js API

+Native Node.js LLM bindings for all.
+
 ```sh
 yarn add gpt4all@latest

 npm install gpt4all@latest

 pnpm install gpt4all@latest
+
 ```

 The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-ts) are now out of date.
@ -20,7 +23,7 @@ The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-t
 ```js
 import { createCompletion, loadModel } from '../src/gpt4all.js'

-const model = await loadModel('ggml-vicuna-7b-1.1-q4_2', { verbose: true });
+const model = await loadModel('mistral-7b-openorca.Q4_0.gguf', { verbose: true });

 const response = await createCompletion(model, [
    { role : 'system', content: 'You are meant to be annoying and unhelpful.'  },
@ -144,587 +147,3 @@ This package is in active development, and breaking changes may happen until the
 *   \[ ] createChatSession ( the python equivalent to create\_chat\_session )

 ### API Reference
-
-<!-- Generated by documentation.js. Update this documentation by updating the source code. -->
-
-##### Table of Contents
-
-*   [ModelType](#modeltype)
-*   [ModelFile](#modelfile)
-    *   [gptj](#gptj)
-    *   [llama](#llama)
-    *   [mpt](#mpt)
-    *   [replit](#replit)
-*   [type](#type)
-*   [LLModel](#llmodel)
-    *   [constructor](#constructor)
-        *   [Parameters](#parameters)
-    *   [type](#type-1)
-    *   [name](#name)
-    *   [stateSize](#statesize)
-    *   [threadCount](#threadcount)
-    *   [setThreadCount](#setthreadcount)
-        *   [Parameters](#parameters-1)
-    *   [raw\_prompt](#raw_prompt)
-        *   [Parameters](#parameters-2)
-    *   [embed](#embed)
-        *   [Parameters](#parameters-3)
-    *   [isModelLoaded](#ismodelloaded)
-    *   [setLibraryPath](#setlibrarypath)
-        *   [Parameters](#parameters-4)
-    *   [getLibraryPath](#getlibrarypath)
-*   [loadModel](#loadmodel)
-    *   [Parameters](#parameters-5)
-*   [createCompletion](#createcompletion)
-    *   [Parameters](#parameters-6)
-*   [createEmbedding](#createembedding)
-    *   [Parameters](#parameters-7)
-*   [CompletionOptions](#completionoptions)
-    *   [verbose](#verbose)
-    *   [systemPromptTemplate](#systemprompttemplate)
-    *   [promptTemplate](#prompttemplate)
-    *   [promptHeader](#promptheader)
-    *   [promptFooter](#promptfooter)
-*   [PromptMessage](#promptmessage)
-    *   [role](#role)
-    *   [content](#content)
-*   [prompt\_tokens](#prompt_tokens)
-*   [completion\_tokens](#completion_tokens)
-*   [total\_tokens](#total_tokens)
-*   [CompletionReturn](#completionreturn)
-    *   [model](#model)
-    *   [usage](#usage)
-    *   [choices](#choices)
-*   [CompletionChoice](#completionchoice)
-    *   [message](#message)
-*   [LLModelPromptContext](#llmodelpromptcontext)
-    *   [logitsSize](#logitssize)
-    *   [tokensSize](#tokenssize)
-    *   [nPast](#npast)
-    *   [nCtx](#nctx)
-    *   [nPredict](#npredict)
-    *   [topK](#topk)
-    *   [topP](#topp)
-    *   [temp](#temp)
-    *   [nBatch](#nbatch)
-    *   [repeatPenalty](#repeatpenalty)
-    *   [repeatLastN](#repeatlastn)
-    *   [contextErase](#contexterase)
-*   [createTokenStream](#createtokenstream)
-    *   [Parameters](#parameters-8)
-*   [DEFAULT\_DIRECTORY](#default_directory)
-*   [DEFAULT\_LIBRARIES\_DIRECTORY](#default_libraries_directory)
-*   [DEFAULT\_MODEL\_CONFIG](#default_model_config)
-*   [DEFAULT\_PROMT\_CONTEXT](#default_promt_context)
-*   [DEFAULT\_MODEL\_LIST\_URL](#default_model_list_url)
-*   [downloadModel](#downloadmodel)
-    *   [Parameters](#parameters-9)
-    *   [Examples](#examples)
-*   [DownloadModelOptions](#downloadmodeloptions)
-    *   [modelPath](#modelpath)
-    *   [verbose](#verbose-1)
-    *   [url](#url)
-    *   [md5sum](#md5sum)
-*   [DownloadController](#downloadcontroller)
-    *   [cancel](#cancel)
-    *   [promise](#promise)
-
-#### ModelType
-
-Type of the model
-
-Type: (`"gptj"` | `"llama"` | `"mpt"` | `"replit"`)
-
-#### ModelFile
-
-Full list of models available
-@deprecated These model names are outdated and this type will not be maintained, please use a string literal instead
-
-##### gptj
-
-List of GPT-J Models
-
-Type: (`"ggml-gpt4all-j-v1.3-groovy.bin"` | `"ggml-gpt4all-j-v1.2-jazzy.bin"` | `"ggml-gpt4all-j-v1.1-breezy.bin"` | `"ggml-gpt4all-j.bin"`)
-
-##### llama
-
-List Llama Models
-
-Type: (`"ggml-gpt4all-l13b-snoozy.bin"` | `"ggml-vicuna-7b-1.1-q4_2.bin"` | `"ggml-vicuna-13b-1.1-q4_2.bin"` | `"ggml-wizardLM-7B.q4_2.bin"` | `"ggml-stable-vicuna-13B.q4_2.bin"` | `"ggml-nous-gpt4-vicuna-13b.bin"` | `"ggml-v3-13b-hermes-q5_1.bin"`)
-
-##### mpt
-
-List of MPT Models
-
-Type: (`"ggml-mpt-7b-base.bin"` | `"ggml-mpt-7b-chat.bin"` | `"ggml-mpt-7b-instruct.bin"`)
-
-##### replit
-
-List of Replit Models
-
-Type: `"ggml-replit-code-v1-3b.bin"`
-
-#### type
-
-Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.
-
-Type: [ModelType](#modeltype)
-
-#### LLModel
-
-LLModel class representing a language model.
-This is a base class that provides common functionality for different types of language models.
-
-##### constructor
-
-Initialize a new LLModel.
-
-###### Parameters
-
-*   `path` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** Absolute path to the model file.
-
-<!---->
-
-*   Throws **[Error](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)** If the model file does not exist.
-
-##### type
-
-either 'gpt', mpt', or 'llama' or undefined
-
-Returns **([ModelType](#modeltype) | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))**&#x20;
-
-##### name
-
-The name of the model.
-
-Returns **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;
-
-##### stateSize
-
-Get the size of the internal state of the model.
-NOTE: This state data is specific to the type of model you have created.
-
-Returns **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** the size in bytes of the internal state of the model
-
-##### threadCount
-
-Get the number of threads used for model inference.
-The default is the number of physical cores your computer has.
-
-Returns **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** The number of threads used for model inference.
-
-##### setThreadCount
-
-Set the number of threads used for model inference.
-
-###### Parameters
-
-*   `newNumber` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** The new number of threads.
-
-Returns **void**&#x20;
-
-##### raw\_prompt
-
-Prompt the model with a given input and optional parameters.
-This is the raw output from model.
-Use the prompt function exported for a value
-
-###### Parameters
-
-*   `q` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The prompt input.
-*   `params` **Partial<[LLModelPromptContext](#llmodelpromptcontext)>** Optional parameters for the prompt context.
-*   `callback` **function (res: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)): void**&#x20;
-
-Returns **void** The result of the model prompt.
-
-##### embed
-
-Embed text with the model. Keep in mind that
-not all models can embed text, (only bert can embed as of 07/16/2023 (mm/dd/yyyy))
-Use the prompt function exported for a value
-
-###### Parameters
-
-*   `text` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;
-*   `q`  The prompt input.
-*   `params`  Optional parameters for the prompt context.
-
-Returns **[Float32Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Float32Array)** The result of the model prompt.
-
-##### isModelLoaded
-
-Whether the model is loaded or not.
-
-Returns **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)**&#x20;
-
-##### setLibraryPath
-
-Where to search for the pluggable backend libraries
-
-###### Parameters
-
-*   `s` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;
-
-Returns **void**&#x20;
-
-##### getLibraryPath
-
-Where to get the pluggable backend libraries
-
-Returns **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;
-
-#### loadModel
-
-Loads a machine learning model with the specified name. The defacto way to create a model.
-By default this will download a model from the official GPT4ALL website, if a model is not present at given path.
-
-##### Parameters
-
-*   `modelName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The name of the model to load.
-*   `options` **(LoadModelOptions | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))?** (Optional) Additional options for loading the model.
-
-Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<(InferenceModel | EmbeddingModel)>** A promise that resolves to an instance of the loaded LLModel.
-
-#### createCompletion
-
-The nodejs equivalent to python binding's chat\_completion
-
-##### Parameters
-
-*   `model` **InferenceModel** The language model object.
-*   `messages` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[PromptMessage](#promptmessage)>** The array of messages for the conversation.
-*   `options` **[CompletionOptions](#completionoptions)** The options for creating the completion.
-
-Returns **[CompletionReturn](#completionreturn)** The completion result.
-
-#### createEmbedding
-
-The nodejs moral equivalent to python binding's Embed4All().embed()
-meow
-
-##### Parameters
-
-*   `model` **EmbeddingModel** The language model object.
-*   `text` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** text to embed
-
-Returns **[Float32Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Float32Array)** The completion result.
-
-#### CompletionOptions
-
-**Extends Partial\<LLModelPromptContext>**
-
-The options for creating the completion.
-
-##### verbose
-
-Indicates if verbose logging is enabled.
-
-Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
-
-##### systemPromptTemplate
-
-Template for the system message. Will be put before the conversation with %1 being replaced by all system messages.
-Note that if this is not defined, system messages will not be included in the prompt.
-
-Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
-
-##### promptTemplate
-
-Template for user messages, with %1 being replaced by the message.
-
-Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
-
-##### promptHeader
-
-The initial instruction for the model, on top of the prompt
-
-Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
-
-##### promptFooter
-
-The last instruction for the model, appended to the end of the prompt.
-
-Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
-
-#### PromptMessage
-
-A message in the conversation, identical to OpenAI's chat message.
-
-##### role
-
-The role of the message.
-
-Type: (`"system"` | `"assistant"` | `"user"`)
-
-##### content
-
-The message content.
-
-Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
-
-#### prompt\_tokens
-
-The number of tokens used in the prompt.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-#### completion\_tokens
-
-The number of tokens used in the completion.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-#### total\_tokens
-
-The total number of tokens used.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-#### CompletionReturn
-
-The result of the completion, similar to OpenAI's format.
-
-##### model
-
-The model used for the completion.
-
-Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
-
-##### usage
-
-Token usage report.
-
-Type: {prompt\_tokens: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number), completion\_tokens: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number), total\_tokens: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)}
-
-##### choices
-
-The generated completions.
-
-Type: [Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[CompletionChoice](#completionchoice)>
-
-#### CompletionChoice
-
-A completion choice, similar to OpenAI's format.
-
-##### message
-
-Response message
-
-Type: [PromptMessage](#promptmessage)
-
-#### LLModelPromptContext
-
-Model inference arguments for generating completions.
-
-##### logitsSize
-
-The size of the raw logits vector.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-##### tokensSize
-
-The size of the raw tokens vector.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-##### nPast
-
-The number of tokens in the past conversation.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-##### nCtx
-
-The number of tokens possible in the context window.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-##### nPredict
-
-The number of tokens to predict.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-##### topK
-
-The top-k logits to sample from.
-Top-K sampling selects the next token only from the top K most likely tokens predicted by the model.
-It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit
-the diversity of the output. A higher value for top-K (eg., 100) will consider more tokens and lead
-to more diverse text, while a lower value (eg., 10) will focus on the most probable tokens and generate
-more conservative text. 30 - 60 is a good range for most tasks.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-##### topP
-
-The nucleus sampling probability threshold.
-Top-P limits the selection of the next token to a subset of tokens with a cumulative probability
-above a threshold P. This method, also known as nucleus sampling, finds a balance between diversity
-and quality by considering both token probabilities and the number of tokens available for sampling.
-When using a higher value for top-P (eg., 0.95), the generated text becomes more diverse.
-On the other hand, a lower value (eg., 0.1) produces more focused and conservative text.
-The default value is 0.4, which is aimed to be the middle ground between focus and diversity, but
-for more creative tasks a higher top-p value will be beneficial, about 0.5-0.9 is a good range for that.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-##### temp
-
-The temperature to adjust the model's output distribution.
-Temperature is like a knob that adjusts how creative or focused the output becomes. Higher temperatures
-(eg., 1.2) increase randomness, resulting in more imaginative and diverse text. Lower temperatures (eg., 0.5)
-make the output more focused, predictable, and conservative. When the temperature is set to 0, the output
-becomes completely deterministic, always selecting the most probable next token and producing identical results
-each time. A safe range would be around 0.6 - 0.85, but you are free to search what value fits best for you.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-##### nBatch
-
-The number of predictions to generate in parallel.
-By splitting the prompt every N tokens, prompt-batch-size reduces RAM usage during processing. However,
-this can increase the processing time as a trade-off. If the N value is set too low (e.g., 10), long prompts
-with 500+ tokens will be most affected, requiring numerous processing runs to complete the prompt processing.
-To ensure optimal performance, setting the prompt-batch-size to 2048 allows processing of all tokens in a single run.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-##### repeatPenalty
-
-The penalty factor for repeated tokens.
-Repeat-penalty can help penalize tokens based on how frequently they occur in the text, including the input prompt.
-A token that has already appeared five times is penalized more heavily than a token that has appeared only one time.
-A value of 1 means that there is no penalty and values larger than 1 discourage repeated tokens.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-##### repeatLastN
-
-The number of last tokens to penalize.
-The repeat-penalty-tokens N option controls the number of tokens in the history to consider for penalizing repetition.
-A larger value will look further back in the generated text to prevent repetitions, while a smaller value will only
-consider recent tokens.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-##### contextErase
-
-The percentage of context to erase if the context window is exceeded.
-
-Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
-
-#### createTokenStream
-
-TODO: Help wanted to implement this
-
-##### Parameters
-
-*   `llmodel` **[LLModel](#llmodel)**&#x20;
-*   `messages` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[PromptMessage](#promptmessage)>**&#x20;
-*   `options` **[CompletionOptions](#completionoptions)**&#x20;
-
-Returns **function (ll: [LLModel](#llmodel)): AsyncGenerator<[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>**&#x20;
-
-#### DEFAULT\_DIRECTORY
-
-From python api:
-models will be stored in (homedir)/.cache/gpt4all/\`
-
-Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
-
-#### DEFAULT\_LIBRARIES\_DIRECTORY
-
-From python api:
-The default path for dynamic libraries to be stored.
-You may separate paths by a semicolon to search in multiple areas.
-This searches DEFAULT\_DIRECTORY/libraries, cwd/libraries, and finally cwd.
-
-Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
-
-#### DEFAULT\_MODEL\_CONFIG
-
-Default model configuration.
-
-Type: ModelConfig
-
-#### DEFAULT\_PROMT\_CONTEXT
-
-Default prompt context.
-
-Type: [LLModelPromptContext](#llmodelpromptcontext)
-
-#### DEFAULT\_MODEL\_LIST\_URL
-
-Default model list url.
-
-Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
-
-#### downloadModel
-
-Initiates the download of a model file.
-By default this downloads without waiting. use the controller returned to alter this behavior.
-
-##### Parameters
-
-*   `modelName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The model to be downloaded.
-*   `options` **DownloadOptions** to pass into the downloader. Default is { location: (cwd), verbose: false }.
-
-##### Examples
-
-```javascript
-const download = downloadModel('ggml-gpt4all-j-v1.3-groovy.bin')
-download.promise.then(() => console.log('Downloaded!'))
-```
-
-*   Throws **[Error](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)** If the model already exists in the specified location.
-*   Throws **[Error](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)** If the model cannot be found at the specified url.
-
-Returns **[DownloadController](#downloadcontroller)** object that allows controlling the download process.
-
-#### DownloadModelOptions
-
-Options for the model download process.
-
-##### modelPath
-
-location to download the model.
-Default is process.cwd(), or the current working directory
-
-Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
-
-##### verbose
-
-Debug mode -- check how long it took to download in seconds
-
-Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
-
-##### url
-
-Remote download url. Defaults to `https://gpt4all.io/models/gguf/<modelName>`
-
-Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
-
-##### md5sum
-
-MD5 sum of the model file. If this is provided, the downloaded file will be checked against this sum.
-If the sums do not match, an error will be thrown and the file will be deleted.
-
-Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
-
-#### DownloadController
-
-Model download controller.
-
-##### cancel
-
-Cancel the request to download if this is called.
-
-Type: function (): void
-
-##### promise
-
-A promise resolving to the downloaded models config once the download is done
-
-Type: [Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)\<ModelConfig>
--- a/gpt4all-bindings/typescript/index.cc
+++ b/gpt4all-bindings/typescript/index.cc
@ -81,7 +81,7 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
  Napi::Value NodeModelWrapper::InitGpuByString(const Napi::CallbackInfo& info) 
  {
    auto env = info.Env();
-    uint32_t memory_required = info[0].As<Napi::Number>();
+    size_t memory_required = static_cast<size_t>(info[0].As<Napi::Number>().Uint32Value());
    
    std::string gpu_device_identifier = info[1].As<Napi::String>();   

@ -149,16 +149,14 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
    }
    if(device != "cpu") {
        size_t mem = llmodel_required_mem(GetInference(), full_weight_path.c_str());
-        if(mem == 0) {
-            std::cout << "WARNING: no memory needed. does this model support gpu?\n";
-        }
        std::cout << "Initiating GPU\n";
-        std::cout << "Memory required estimation: " << mem << "\n";

        auto success = llmodel_gpu_init_gpu_device_by_string(GetInference(), mem, device.c_str());
        if(success) {
            std::cout << "GPU init successfully\n";
        } else {
+            //https://github.com/nomic-ai/gpt4all/blob/3acbef14b7c2436fe033cae9036e695d77461a16/gpt4all-bindings/python/gpt4all/pyllmodel.py#L215
+            //Haven't implemented this but it is still open to contribution
            std::cout << "WARNING: Failed to init GPU\n";
        }
    }
--- a/gpt4all-bindings/typescript/package.json
+++ b/gpt4all-bindings/typescript/package.json
@ -1,6 +1,6 @@
 {
  "name": "gpt4all",
-  "version": "3.0.0",
+  "version": "3.1.0",
  "packageManager": "yarn@3.6.1",
  "main": "src/gpt4all.js",
  "repository": "nomic-ai/gpt4all",
@ -9,9 +9,7 @@
    "test": "jest",
    "build:backend": "node scripts/build.js",
    "build": "node-gyp-build",
-    "predocs:build": "node scripts/docs.js",
-    "docs:build": "documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file ../python/docs/gpt4all_typescript.md",
-    "postdocs:build": "documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file README.md"
+    "docs:build": "node scripts/docs.js && documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file ../python/docs/gpt4all_nodejs.md"
  },
  "files": [
    "src/**/*",
--- a/gpt4all-bindings/typescript/scripts/docs.js
+++ b/gpt4all-bindings/typescript/scripts/docs.js
@ -2,7 +2,11 @@

 const fs = require('fs');

-const newPath = '../python/docs/gpt4all_typescript.md';
-const filepath = 'README.md';
-const data = fs.readFileSync(filepath);
-fs.writeFileSync(newPath, data);
+const newPath = '../python/docs/gpt4all_nodejs.md';
+const filepath = './README.md';
+const intro = fs.readFileSync(filepath);
+
+fs.writeFileSync(
+    newPath, intro
+);
+
--- a/gpt4all-bindings/typescript/src/config.js
+++ b/gpt4all-bindings/typescript/src/config.js
@ -9,7 +9,13 @@ const librarySearchPaths = [
    path.resolve(
        __dirname,
        "..",
-        `runtimes/${process.platform}-${process.arch}/native`
+        `runtimes/${process.platform}-${process.arch}/native`,
+    ),
+    //for darwin. This is hardcoded for now but it should work
+    path.resolve(
+        __dirname,
+        "..",
+        `runtimes/${process.platform}/native`,
    ),
    process.cwd(),
 ];
--- a/gpt4all-bindings/typescript/src/gpt4all.d.ts
+++ b/gpt4all-bindings/typescript/src/gpt4all.d.ts
@ -1,13 +1,12 @@
 /// <reference types="node" />
 declare module "gpt4all";

-/** Type of the model */
 type ModelType = "gptj" | "llama" | "mpt" | "replit";

 // NOTE: "deprecated" tag in below comment breaks the doc generator https://github.com/documentationjs/documentation/issues/1596
 /**
 * Full list of models available
- * @deprecated These model names are outdated and this type will not be maintained, please use a string literal instead
+ * DEPRECATED!! These model names are outdated and this type will not be maintained, please use a string literal instead
 */
 interface ModelFile {
    /** List of GPT-J Models */
@ -34,7 +33,6 @@ interface ModelFile {
    replit: "ggml-replit-code-v1-3b.bin";
 }

-//mirrors py options
 interface LLModelOptions {
    /**
     * Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.
@ -51,7 +49,11 @@ interface ModelConfig {
    path: string;
    url?: string;
 }
-
+/**
+ *
+ * InferenceModel represents an LLM which can make chat predictions, similar to GPT transformers.
+ *
+ */
 declare class InferenceModel {
    constructor(llm: LLModel, config: ModelConfig);
    llm: LLModel;
@ -68,6 +70,9 @@ declare class InferenceModel {
    dispose(): void
 }

+/**
+ * EmbeddingModel represents an LLM which can create embeddings, which are float arrays
+ */
 declare class EmbeddingModel {
    constructor(llm: LLModel, config: ModelConfig);
    llm: LLModel;
@ -171,6 +176,7 @@ declare class LLModel {
    hasGpuDevice(): boolean
    /**
      * GPUs that are usable for this LLModel
+      * @throws if hasGpuDevice returns false (i think)
      * @returns 
      */
    listGpu() : GpuDevice[]
@ -181,8 +187,8 @@ declare class LLModel {
    dispose(): void
 }
 /** 
-  * an object that contains gpu data on this machine.
-  */
+ * an object that contains gpu data on this machine.
+ */
 interface GpuDevice {
    index: number;
    /**
@ -194,6 +200,9 @@ interface GpuDevice {
    vendor: string;
 }

+/**
+  * Options that configure a model's behavior.
+  */
 interface LoadModelOptions {
    modelPath?: string;
    librariesPath?: string;
--- a/gpt4all-bindings/typescript/src/gpt4all.js
+++ b/gpt4all-bindings/typescript/src/gpt4all.js
@ -18,6 +18,7 @@ const {
    DEFAULT_MODEL_LIST_URL,
 } = require("./config.js");
 const { InferenceModel, EmbeddingModel } = require("./models.js");
+const assert = require("assert");

 /**
 * Loads a machine learning model with the specified name. The defacto way to create a model.
@ -45,23 +46,17 @@ async function loadModel(modelName, options = {}) {
        verbose: loadOptions.verbose,
    });

-    const libSearchPaths = loadOptions.librariesPath.split(";");
+    assert.ok(typeof loadOptions.librariesPath === 'string');
+    const existingPaths = loadOptions.librariesPath
+        .split(";")
+        .filter(existsSync)
+        .join(';');
+    console.log("Passing these paths into runtime library search:", existingPaths)

-    let libPath = null;
-
-    for (const searchPath of libSearchPaths) {
-        if (existsSync(searchPath)) {
-            libPath = searchPath;
-            break;
-        }
-    }
-    if (!libPath) {
-        throw Error("Could not find a valid path from " + libSearchPaths);
-    }
    const llmOptions = {
        model_name: appendBinSuffixIfMissing(modelName),
        model_path: loadOptions.modelPath,
-        library_path: libPath,
+        library_path: existingPaths,
        device: loadOptions.device,
    };

--- a/gpt4all-bindings/typescript/test/gpt4all.test.js
+++ b/gpt4all-bindings/typescript/test/gpt4all.test.js
@ -35,6 +35,11 @@ describe("config", () => {
                "..",
                `runtimes/${process.platform}-${process.arch}/native`
            ),
+            path.resolve(
+                __dirname,
+                "..",
+                `runtimes/${process.platform}/native`,
+            ),
            process.cwd(),
        ];
        expect(typeof DEFAULT_LIBRARIES_DIRECTORY).toBe("string");