bindings: replace references to GGMLv3 models with GGUF (#1547)

This commit is contained in:
cebtenzzre 2023-10-22 11:58:28 -04:00 committed by GitHub
parent c25dc51935
commit 37b007603a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 29 additions and 34 deletions

View File

@ -40,5 +40,5 @@ directory, if necessary.
If you have already saved a model beforehand, specify its path with the `-m`/`--model` argument,
for example:
```shell
python app.py repl --model /home/user/my-gpt4all-models/GPT4All-13B-snoozy.ggmlv3.q4_0.bin
python app.py repl --model /home/user/my-gpt4all-models/gpt4all-13b-snoozy-q4_0.gguf
```

View File

@ -50,7 +50,7 @@ Test it out! In a Python script or console:
```python
from gpt4all import GPT4All
model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
output = model.generate("The capital of France is ", max_tokens=3)
print(output)
```
@ -59,7 +59,7 @@ print(output)
GPU Usage
```python
from gpt4all import GPT4All
model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin", device='gpu') # device='amd', device='intel'
model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf", device='gpu') # device='amd', device='intel'
output = model.generate("The capital of France is ", max_tokens=3)
print(output)
```

View File

@ -166,7 +166,7 @@ If you want to use a different model, you can do so with the `-m`/`--model` para
model file name is provided, it will again check in `.cache/gpt4all/` and might start downloading.
If instead given a path to an existing model, the command could for example look like this:
```shell
python app.py repl --model /home/user/my-gpt4all-models/GPT4All-13B-snoozy.ggmlv3.q4_0.bin
python app.py repl --model /home/user/my-gpt4all-models/gpt4all-13b-snoozy-q4_0.gguf
```
When you're done and want to end a session, simply type `/exit`.

View File

@ -11,7 +11,7 @@ pip install gpt4all
=== "GPT4All Example"
``` py
from gpt4all import GPT4All
model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
output = model.generate("The capital of France is ", max_tokens=3)
print(output)
```
@ -35,7 +35,7 @@ Use the GPT4All `chat_session` context manager to hold chat conversations with t
=== "GPT4All Example"
``` py
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf')
with model.chat_session():
response1 = model.generate(prompt='hello', temp=0)
response2 = model.generate(prompt='write me a short poem', temp=0)
@ -89,7 +89,7 @@ To interact with GPT4All responses as the model generates, use the `streaming=Tr
=== "GPT4All Streaming Example"
``` py
from gpt4all import GPT4All
model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
tokens = []
for token in model.generate("The capital of France is", max_tokens=20, streaming=True):
tokens.append(token)
@ -135,7 +135,7 @@ is the same as if it weren't provided; that is, `~/.cache/gpt4all/` is the defau
``` py
from pathlib import Path
from gpt4all import GPT4All
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin',
model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf',
model_path=(Path.home() / '.cache' / 'gpt4all'),
allow_download=False)
response = model.generate('my favorite 3 fruits are:', temp=0)
@ -152,7 +152,7 @@ If you want to point it at the chat GUI's default folder, it should be:
from pathlib import Path
from gpt4all import GPT4All
model_name = 'orca-mini-3b.ggmlv3.q4_0.bin'
model_name = 'orca-mini-3b-gguf2-q4_0.gguf'
model_path = Path.home() / 'Library' / 'Application Support' / 'nomic.ai' / 'GPT4All'
model = GPT4All(model_name, model_path)
```
@ -161,7 +161,7 @@ If you want to point it at the chat GUI's default folder, it should be:
from pathlib import Path
from gpt4all import GPT4All
import os
model_name = 'orca-mini-3b.ggmlv3.q4_0.bin'
model_name = 'orca-mini-3b-gguf2-q4_0.gguf'
model_path = Path(os.environ['LOCALAPPDATA']) / 'nomic.ai' / 'GPT4All'
model = GPT4All(model_name, model_path)
```
@ -170,7 +170,7 @@ If you want to point it at the chat GUI's default folder, it should be:
from pathlib import Path
from gpt4all import GPT4All
model_name = 'orca-mini-3b.ggmlv3.q4_0.bin'
model_name = 'orca-mini-3b-gguf2-q4_0.gguf'
model_path = Path.home() / '.local' / 'share' / 'nomic.ai' / 'GPT4All'
model = GPT4All(model_name, model_path)
```
@ -182,7 +182,7 @@ from pathlib import Path
import gpt4all.gpt4all
gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY = Path.home() / 'my' / 'models-directory'
from gpt4all import GPT4All
model = GPT4All('orca-mini-3b.ggmlv3.q4_0.bin')
model = GPT4All('orca-mini-3b-gguf2-q4_0.gguf')
...
```
@ -193,7 +193,7 @@ Session templates can be customized when starting a `chat_session` context:
=== "GPT4All Custom Session Templates Example"
``` py
from gpt4all import GPT4All
model = GPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin')
model = GPT4All('wizardlm-13b-v1.2.Q4_0.gguf')
system_template = 'A chat between a curious user and an artificial intelligence assistant.'
# many models use triple hash '###' for keywords, Vicunas are simpler:
prompt_template = 'USER: {0}\nASSISTANT: '
@ -222,7 +222,7 @@ To do the same outside a session, the input has to be formatted manually. For ex
=== "GPT4All Templates Outside a Session Example"
``` py
model = GPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin')
model = GPT4All('wizardlm-13b-v1.2.Q4_0.gguf')
system_template = 'A chat between a curious user and an artificial intelligence assistant.'
prompt_template = 'USER: {0}\nASSISTANT: '
prompts = ['name 3 colors', 'now name 3 fruits', 'what were the 3 colors in your earlier response?']
@ -285,7 +285,7 @@ customized in a subclass. As an example:
```
=== "GPT4All Custom Subclass Example"
``` py
model = RotatingTemplateGPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin')
model = RotatingTemplateGPT4All('wizardlm-13b-v1.2.Q4_0.gguf')
with model.chat_session(): # starting a session is optional in this example
response1 = model.generate("hi, who are you?")
print(response1)
@ -345,7 +345,7 @@ logging infrastructure offers [many more customization options][py-logging-cookb
import logging
from gpt4all import GPT4All
logging.basicConfig(level=logging.INFO)
model = GPT4All('nous-hermes-13b.ggmlv3.q4_0.bin')
model = GPT4All('nous-hermes-llama2-13b.Q4_0.gguf')
with model.chat_session('You are a geography expert.\nBe terse.',
'### Instruction:\n{0}\n### Response:\n'):
response = model.generate('who are you?', temp=0)
@ -414,7 +414,7 @@ If you know exactly when a model should stop responding, you can add a custom ca
=== "GPT4All Custom Stop Callback"
``` py
from gpt4all import GPT4All
model = GPT4All('orca-mini-3b.ggmlv3.q4_0.bin')
model = GPT4All('orca-mini-3b-gguf2-q4_0.gguf')
def stop_on_token_callback(token_id, token_string):
# one sentence is enough:

View File

@ -9,7 +9,7 @@ GPT4All software is optimized to run inference of 3-13 billion parameter large l
=== "GPT4All Example"
``` py
from gpt4all import GPT4All
model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
output = model.generate("The capital of France is ", max_tokens=3)
print(output)
```

View File

@ -8,7 +8,7 @@ import pytest
def test_inference():
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf')
output_1 = model.generate('hello', top_k=1)
with model.chat_session():
@ -47,49 +47,44 @@ def do_long_input(model):
def test_inference_long_orca_3b():
model = GPT4All(model_name="orca-mini-3b.ggmlv3.q4_0.bin")
model = GPT4All(model_name="orca-mini-3b-gguf2-q4_0.gguf")
do_long_input(model)
def test_inference_long_falcon():
model = GPT4All(model_name='ggml-model-gpt4all-falcon-q4_0.bin')
model = GPT4All(model_name='gpt4all-falcon-q4_0.gguf')
do_long_input(model)
def test_inference_long_llama_7b():
model = GPT4All(model_name="orca-mini-7b.ggmlv3.q4_0.bin")
model = GPT4All(model_name="mistral-7b-openorca.Q4_0.gguf")
do_long_input(model)
def test_inference_long_llama_13b():
model = GPT4All(model_name='ggml-nous-hermes-13b.ggmlv3.q4_0.bin')
model = GPT4All(model_name='nous-hermes-llama2-13b.Q4_0.gguf')
do_long_input(model)
def test_inference_long_mpt():
model = GPT4All(model_name='ggml-mpt-7b-chat.bin')
model = GPT4All(model_name='mpt-7b-chat-q4_0.gguf')
do_long_input(model)
def test_inference_long_replit():
model = GPT4All(model_name='ggml-replit-code-v1-3b.bin')
do_long_input(model)
def test_inference_long_groovy():
model = GPT4All(model_name='ggml-gpt4all-j-v1.3-groovy.bin')
model = GPT4All(model_name='replit-code-v1_5-3b-q4_0.gguf')
do_long_input(model)
def test_inference_hparams():
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf')
output = model.generate("The capital of france is ", max_tokens=3)
assert 'Paris' in output
def test_inference_falcon():
model = GPT4All(model_name='ggml-model-gpt4all-falcon-q4_0.bin')
model = GPT4All(model_name='gpt4all-falcon-q4_0.gguf')
prompt = 'hello'
output = model.generate(prompt)
assert isinstance(output, str)
@ -97,7 +92,7 @@ def test_inference_falcon():
def test_inference_mpt():
model = GPT4All(model_name='ggml-mpt-7b-chat.bin')
model = GPT4All(model_name='mpt-7b-chat-q4_0.gguf')
prompt = 'hello'
output = model.generate(prompt)
assert isinstance(output, str)

View File

@ -1,7 +1,7 @@
import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY, loadModel } from '../src/gpt4all.js'
const model = await loadModel(
'orca-mini-3b.ggmlv3.q4_0.bin',
'orca-mini-3b-gguf2-q4_0.gguf',
{ verbose: true }
);
const ll = model.llm;