bindings: replace references to GGMLv3 models with GGUF (#1547)

2024-10-01 01:06:10 -04:00 · 2023-10-22 11:58:28 -04:00 · 2023-10-22 11:58:28 -04:00 · 37b007603a
commit 37b007603a
parent c25dc51935
7 changed files with 29 additions and 34 deletions
--- a/gpt4all-bindings/cli/README.md
+++ b/gpt4all-bindings/cli/README.md
@ -40,5 +40,5 @@ directory, if necessary.
 If you have already saved a model beforehand, specify its path with the `-m`/`--model` argument,
 for example:
 ```shell
-python app.py repl --model /home/user/my-gpt4all-models/GPT4All-13B-snoozy.ggmlv3.q4_0.bin
+python app.py repl --model /home/user/my-gpt4all-models/gpt4all-13b-snoozy-q4_0.gguf
 ```
--- a/gpt4all-bindings/python/README.md
+++ b/gpt4all-bindings/python/README.md
@ -50,7 +50,7 @@ Test it out! In a Python script or console:

 ```python
 from gpt4all import GPT4All
-model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
+model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
 output = model.generate("The capital of France is ", max_tokens=3)
 print(output)
 ```
@ -59,7 +59,7 @@ print(output)
 GPU Usage
 ```python
 from gpt4all import GPT4All
-model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin", device='gpu') # device='amd', device='intel'
+model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf", device='gpu') # device='amd', device='intel'
 output = model.generate("The capital of France is ", max_tokens=3)
 print(output)
 ```
--- a/gpt4all-bindings/python/docs/gpt4all_cli.md
+++ b/gpt4all-bindings/python/docs/gpt4all_cli.md
@ -166,7 +166,7 @@ If you want to use a different model, you can do so with the `-m`/`--model` para
 model file name is provided, it will again check in `.cache/gpt4all/` and might start downloading.
 If instead given a path to an existing model, the command could for example look like this:
 ```shell
-python app.py repl --model /home/user/my-gpt4all-models/GPT4All-13B-snoozy.ggmlv3.q4_0.bin
+python app.py repl --model /home/user/my-gpt4all-models/gpt4all-13b-snoozy-q4_0.gguf
 ```

 When you're done and want to end a session, simply type `/exit`.
--- a/gpt4all-bindings/python/docs/gpt4all_python.md
+++ b/gpt4all-bindings/python/docs/gpt4all_python.md
@ -11,7 +11,7 @@ pip install gpt4all
 === "GPT4All Example"
    ``` py
    from gpt4all import GPT4All
-    model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
+    model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
    output = model.generate("The capital of France is ", max_tokens=3)
    print(output)
    ```
@ -35,7 +35,7 @@ Use the GPT4All `chat_session` context manager to hold chat conversations with t

 === "GPT4All Example"
    ``` py
-    model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
+    model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf')
    with model.chat_session():
        response1 = model.generate(prompt='hello', temp=0)
        response2 = model.generate(prompt='write me a short poem', temp=0)
@ -89,7 +89,7 @@ To interact with GPT4All responses as the model generates, use the `streaming=Tr
 === "GPT4All Streaming Example"
    ``` py
    from gpt4all import GPT4All
-    model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
+    model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
    tokens = []
    for token in model.generate("The capital of France is", max_tokens=20, streaming=True):
        tokens.append(token)
@ -135,7 +135,7 @@ is the same as if it weren't provided; that is, `~/.cache/gpt4all/` is the defau
    ``` py
    from pathlib import Path
    from gpt4all import GPT4All
-    model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin',
+    model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf',
                    model_path=(Path.home() / '.cache' / 'gpt4all'),
                    allow_download=False)
    response = model.generate('my favorite 3 fruits are:', temp=0)
@ -152,7 +152,7 @@ If you want to point it at the chat GUI's default folder, it should be:
    from pathlib import Path
    from gpt4all import GPT4All

-    model_name = 'orca-mini-3b.ggmlv3.q4_0.bin'
+    model_name = 'orca-mini-3b-gguf2-q4_0.gguf'
    model_path = Path.home() / 'Library' / 'Application Support' / 'nomic.ai' / 'GPT4All'
    model = GPT4All(model_name, model_path)
    ```
@ -161,7 +161,7 @@ If you want to point it at the chat GUI's default folder, it should be:
    from pathlib import Path
    from gpt4all import GPT4All
    import os
-    model_name = 'orca-mini-3b.ggmlv3.q4_0.bin'
+    model_name = 'orca-mini-3b-gguf2-q4_0.gguf'
    model_path = Path(os.environ['LOCALAPPDATA']) / 'nomic.ai' / 'GPT4All'
    model = GPT4All(model_name, model_path)
    ```
@ -170,7 +170,7 @@ If you want to point it at the chat GUI's default folder, it should be:
    from pathlib import Path
    from gpt4all import GPT4All

-    model_name = 'orca-mini-3b.ggmlv3.q4_0.bin'
+    model_name = 'orca-mini-3b-gguf2-q4_0.gguf'
    model_path = Path.home() / '.local' / 'share' / 'nomic.ai' / 'GPT4All'
    model = GPT4All(model_name, model_path)
    ```
@ -182,7 +182,7 @@ from pathlib import Path
 import gpt4all.gpt4all
 gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY = Path.home() / 'my' / 'models-directory'
 from gpt4all import GPT4All
-model = GPT4All('orca-mini-3b.ggmlv3.q4_0.bin')
+model = GPT4All('orca-mini-3b-gguf2-q4_0.gguf')
 ...
 ```

@ -193,7 +193,7 @@ Session templates can be customized when starting a `chat_session` context:
 === "GPT4All Custom Session Templates Example"
    ``` py
    from gpt4all import GPT4All
-    model = GPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin')
+    model = GPT4All('wizardlm-13b-v1.2.Q4_0.gguf')
    system_template = 'A chat between a curious user and an artificial intelligence assistant.'
    # many models use triple hash '###' for keywords, Vicunas are simpler:
    prompt_template = 'USER: {0}\nASSISTANT: '
@ -222,7 +222,7 @@ To do the same outside a session, the input has to be formatted manually. For ex

 === "GPT4All Templates Outside a Session Example"
    ``` py
-    model = GPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin')
+    model = GPT4All('wizardlm-13b-v1.2.Q4_0.gguf')
    system_template = 'A chat between a curious user and an artificial intelligence assistant.'
    prompt_template = 'USER: {0}\nASSISTANT: '
    prompts = ['name 3 colors', 'now name 3 fruits', 'what were the 3 colors in your earlier response?']
@ -285,7 +285,7 @@ customized in a subclass. As an example:
    ```
 === "GPT4All Custom Subclass Example"
    ``` py
-    model = RotatingTemplateGPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin')
+    model = RotatingTemplateGPT4All('wizardlm-13b-v1.2.Q4_0.gguf')
    with model.chat_session():  # starting a session is optional in this example
        response1 = model.generate("hi, who are you?")
        print(response1)
@ -345,7 +345,7 @@ logging infrastructure offers [many more customization options][py-logging-cookb
    import logging
    from gpt4all import GPT4All
    logging.basicConfig(level=logging.INFO)
-    model = GPT4All('nous-hermes-13b.ggmlv3.q4_0.bin')
+    model = GPT4All('nous-hermes-llama2-13b.Q4_0.gguf')
    with model.chat_session('You are a geography expert.\nBe terse.',
                            '### Instruction:\n{0}\n### Response:\n'):
        response = model.generate('who are you?', temp=0)
@ -414,7 +414,7 @@ If you know exactly when a model should stop responding, you can add a custom ca
 === "GPT4All Custom Stop Callback"
    ``` py
    from gpt4all import GPT4All
-    model = GPT4All('orca-mini-3b.ggmlv3.q4_0.bin')
+    model = GPT4All('orca-mini-3b-gguf2-q4_0.gguf')

    def stop_on_token_callback(token_id, token_string):
        # one sentence is enough:
--- a/gpt4all-bindings/python/docs/index.md
+++ b/gpt4all-bindings/python/docs/index.md
@ -9,7 +9,7 @@ GPT4All software is optimized to run inference of 3-13 billion parameter large l
 === "GPT4All Example"
    ``` py
    from gpt4all import GPT4All
-    model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
+    model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
    output = model.generate("The capital of France is ", max_tokens=3)
    print(output)
    ```
--- a/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py
@ -8,7 +8,7 @@ import pytest


 def test_inference():
-    model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
+    model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf')
    output_1 = model.generate('hello', top_k=1)

    with model.chat_session():
@ -47,49 +47,44 @@ def do_long_input(model):


 def test_inference_long_orca_3b():
-    model = GPT4All(model_name="orca-mini-3b.ggmlv3.q4_0.bin")
+    model = GPT4All(model_name="orca-mini-3b-gguf2-q4_0.gguf")
    do_long_input(model)


 def test_inference_long_falcon():
-    model = GPT4All(model_name='ggml-model-gpt4all-falcon-q4_0.bin')
+    model = GPT4All(model_name='gpt4all-falcon-q4_0.gguf')
    do_long_input(model)


 def test_inference_long_llama_7b():
-    model = GPT4All(model_name="orca-mini-7b.ggmlv3.q4_0.bin")
+    model = GPT4All(model_name="mistral-7b-openorca.Q4_0.gguf")
    do_long_input(model)


 def test_inference_long_llama_13b():
-    model = GPT4All(model_name='ggml-nous-hermes-13b.ggmlv3.q4_0.bin')
+    model = GPT4All(model_name='nous-hermes-llama2-13b.Q4_0.gguf')
    do_long_input(model)


 def test_inference_long_mpt():
-    model = GPT4All(model_name='ggml-mpt-7b-chat.bin')
+    model = GPT4All(model_name='mpt-7b-chat-q4_0.gguf')
    do_long_input(model)


 def test_inference_long_replit():
-    model = GPT4All(model_name='ggml-replit-code-v1-3b.bin')
-    do_long_input(model)
-
-
-def test_inference_long_groovy():
-    model = GPT4All(model_name='ggml-gpt4all-j-v1.3-groovy.bin')
+    model = GPT4All(model_name='replit-code-v1_5-3b-q4_0.gguf')
    do_long_input(model)


 def test_inference_hparams():
-    model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
+    model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf')

    output = model.generate("The capital of france is ", max_tokens=3)
    assert 'Paris' in output


 def test_inference_falcon():
-    model = GPT4All(model_name='ggml-model-gpt4all-falcon-q4_0.bin')
+    model = GPT4All(model_name='gpt4all-falcon-q4_0.gguf')
    prompt = 'hello'
    output = model.generate(prompt)
    assert isinstance(output, str)
@ -97,7 +92,7 @@ def test_inference_falcon():


 def test_inference_mpt():
-    model = GPT4All(model_name='ggml-mpt-7b-chat.bin')
+    model = GPT4All(model_name='mpt-7b-chat-q4_0.gguf')
    prompt = 'hello'
    output = model.generate(prompt)
    assert isinstance(output, str)
--- a/gpt4all-bindings/typescript/spec/chat.mjs
+++ b/gpt4all-bindings/typescript/spec/chat.mjs
@ -1,7 +1,7 @@
 import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY, loadModel } from '../src/gpt4all.js'

 const model = await loadModel(
-    'orca-mini-3b.ggmlv3.q4_0.bin',
+    'orca-mini-3b-gguf2-q4_0.gguf',
    { verbose: true }
 );
 const ll = model.llm;