mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2024-10-01 01:06:10 -04:00
bindings: replace references to GGMLv3 models with GGUF (#1547)
This commit is contained in:
parent
c25dc51935
commit
37b007603a
@ -40,5 +40,5 @@ directory, if necessary.
|
||||
If you have already saved a model beforehand, specify its path with the `-m`/`--model` argument,
|
||||
for example:
|
||||
```shell
|
||||
python app.py repl --model /home/user/my-gpt4all-models/GPT4All-13B-snoozy.ggmlv3.q4_0.bin
|
||||
python app.py repl --model /home/user/my-gpt4all-models/gpt4all-13b-snoozy-q4_0.gguf
|
||||
```
|
||||
|
@ -50,7 +50,7 @@ Test it out! In a Python script or console:
|
||||
|
||||
```python
|
||||
from gpt4all import GPT4All
|
||||
model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
|
||||
model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
|
||||
output = model.generate("The capital of France is ", max_tokens=3)
|
||||
print(output)
|
||||
```
|
||||
@ -59,7 +59,7 @@ print(output)
|
||||
GPU Usage
|
||||
```python
|
||||
from gpt4all import GPT4All
|
||||
model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin", device='gpu') # device='amd', device='intel'
|
||||
model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf", device='gpu') # device='amd', device='intel'
|
||||
output = model.generate("The capital of France is ", max_tokens=3)
|
||||
print(output)
|
||||
```
|
||||
|
@ -166,7 +166,7 @@ If you want to use a different model, you can do so with the `-m`/`--model` para
|
||||
model file name is provided, it will again check in `.cache/gpt4all/` and might start downloading.
|
||||
If instead given a path to an existing model, the command could for example look like this:
|
||||
```shell
|
||||
python app.py repl --model /home/user/my-gpt4all-models/GPT4All-13B-snoozy.ggmlv3.q4_0.bin
|
||||
python app.py repl --model /home/user/my-gpt4all-models/gpt4all-13b-snoozy-q4_0.gguf
|
||||
```
|
||||
|
||||
When you're done and want to end a session, simply type `/exit`.
|
||||
|
@ -11,7 +11,7 @@ pip install gpt4all
|
||||
=== "GPT4All Example"
|
||||
``` py
|
||||
from gpt4all import GPT4All
|
||||
model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
|
||||
model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
|
||||
output = model.generate("The capital of France is ", max_tokens=3)
|
||||
print(output)
|
||||
```
|
||||
@ -35,7 +35,7 @@ Use the GPT4All `chat_session` context manager to hold chat conversations with t
|
||||
|
||||
=== "GPT4All Example"
|
||||
``` py
|
||||
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
|
||||
model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf')
|
||||
with model.chat_session():
|
||||
response1 = model.generate(prompt='hello', temp=0)
|
||||
response2 = model.generate(prompt='write me a short poem', temp=0)
|
||||
@ -89,7 +89,7 @@ To interact with GPT4All responses as the model generates, use the `streaming=Tr
|
||||
=== "GPT4All Streaming Example"
|
||||
``` py
|
||||
from gpt4all import GPT4All
|
||||
model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
|
||||
model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
|
||||
tokens = []
|
||||
for token in model.generate("The capital of France is", max_tokens=20, streaming=True):
|
||||
tokens.append(token)
|
||||
@ -135,7 +135,7 @@ is the same as if it weren't provided; that is, `~/.cache/gpt4all/` is the defau
|
||||
``` py
|
||||
from pathlib import Path
|
||||
from gpt4all import GPT4All
|
||||
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin',
|
||||
model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf',
|
||||
model_path=(Path.home() / '.cache' / 'gpt4all'),
|
||||
allow_download=False)
|
||||
response = model.generate('my favorite 3 fruits are:', temp=0)
|
||||
@ -152,7 +152,7 @@ If you want to point it at the chat GUI's default folder, it should be:
|
||||
from pathlib import Path
|
||||
from gpt4all import GPT4All
|
||||
|
||||
model_name = 'orca-mini-3b.ggmlv3.q4_0.bin'
|
||||
model_name = 'orca-mini-3b-gguf2-q4_0.gguf'
|
||||
model_path = Path.home() / 'Library' / 'Application Support' / 'nomic.ai' / 'GPT4All'
|
||||
model = GPT4All(model_name, model_path)
|
||||
```
|
||||
@ -161,7 +161,7 @@ If you want to point it at the chat GUI's default folder, it should be:
|
||||
from pathlib import Path
|
||||
from gpt4all import GPT4All
|
||||
import os
|
||||
model_name = 'orca-mini-3b.ggmlv3.q4_0.bin'
|
||||
model_name = 'orca-mini-3b-gguf2-q4_0.gguf'
|
||||
model_path = Path(os.environ['LOCALAPPDATA']) / 'nomic.ai' / 'GPT4All'
|
||||
model = GPT4All(model_name, model_path)
|
||||
```
|
||||
@ -170,7 +170,7 @@ If you want to point it at the chat GUI's default folder, it should be:
|
||||
from pathlib import Path
|
||||
from gpt4all import GPT4All
|
||||
|
||||
model_name = 'orca-mini-3b.ggmlv3.q4_0.bin'
|
||||
model_name = 'orca-mini-3b-gguf2-q4_0.gguf'
|
||||
model_path = Path.home() / '.local' / 'share' / 'nomic.ai' / 'GPT4All'
|
||||
model = GPT4All(model_name, model_path)
|
||||
```
|
||||
@ -182,7 +182,7 @@ from pathlib import Path
|
||||
import gpt4all.gpt4all
|
||||
gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY = Path.home() / 'my' / 'models-directory'
|
||||
from gpt4all import GPT4All
|
||||
model = GPT4All('orca-mini-3b.ggmlv3.q4_0.bin')
|
||||
model = GPT4All('orca-mini-3b-gguf2-q4_0.gguf')
|
||||
...
|
||||
```
|
||||
|
||||
@ -193,7 +193,7 @@ Session templates can be customized when starting a `chat_session` context:
|
||||
=== "GPT4All Custom Session Templates Example"
|
||||
``` py
|
||||
from gpt4all import GPT4All
|
||||
model = GPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin')
|
||||
model = GPT4All('wizardlm-13b-v1.2.Q4_0.gguf')
|
||||
system_template = 'A chat between a curious user and an artificial intelligence assistant.'
|
||||
# many models use triple hash '###' for keywords, Vicunas are simpler:
|
||||
prompt_template = 'USER: {0}\nASSISTANT: '
|
||||
@ -222,7 +222,7 @@ To do the same outside a session, the input has to be formatted manually. For ex
|
||||
|
||||
=== "GPT4All Templates Outside a Session Example"
|
||||
``` py
|
||||
model = GPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin')
|
||||
model = GPT4All('wizardlm-13b-v1.2.Q4_0.gguf')
|
||||
system_template = 'A chat between a curious user and an artificial intelligence assistant.'
|
||||
prompt_template = 'USER: {0}\nASSISTANT: '
|
||||
prompts = ['name 3 colors', 'now name 3 fruits', 'what were the 3 colors in your earlier response?']
|
||||
@ -285,7 +285,7 @@ customized in a subclass. As an example:
|
||||
```
|
||||
=== "GPT4All Custom Subclass Example"
|
||||
``` py
|
||||
model = RotatingTemplateGPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin')
|
||||
model = RotatingTemplateGPT4All('wizardlm-13b-v1.2.Q4_0.gguf')
|
||||
with model.chat_session(): # starting a session is optional in this example
|
||||
response1 = model.generate("hi, who are you?")
|
||||
print(response1)
|
||||
@ -345,7 +345,7 @@ logging infrastructure offers [many more customization options][py-logging-cookb
|
||||
import logging
|
||||
from gpt4all import GPT4All
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
model = GPT4All('nous-hermes-13b.ggmlv3.q4_0.bin')
|
||||
model = GPT4All('nous-hermes-llama2-13b.Q4_0.gguf')
|
||||
with model.chat_session('You are a geography expert.\nBe terse.',
|
||||
'### Instruction:\n{0}\n### Response:\n'):
|
||||
response = model.generate('who are you?', temp=0)
|
||||
@ -414,7 +414,7 @@ If you know exactly when a model should stop responding, you can add a custom ca
|
||||
=== "GPT4All Custom Stop Callback"
|
||||
``` py
|
||||
from gpt4all import GPT4All
|
||||
model = GPT4All('orca-mini-3b.ggmlv3.q4_0.bin')
|
||||
model = GPT4All('orca-mini-3b-gguf2-q4_0.gguf')
|
||||
|
||||
def stop_on_token_callback(token_id, token_string):
|
||||
# one sentence is enough:
|
||||
|
@ -9,7 +9,7 @@ GPT4All software is optimized to run inference of 3-13 billion parameter large l
|
||||
=== "GPT4All Example"
|
||||
``` py
|
||||
from gpt4all import GPT4All
|
||||
model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
|
||||
model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
|
||||
output = model.generate("The capital of France is ", max_tokens=3)
|
||||
print(output)
|
||||
```
|
||||
|
@ -8,7 +8,7 @@ import pytest
|
||||
|
||||
|
||||
def test_inference():
|
||||
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
|
||||
model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf')
|
||||
output_1 = model.generate('hello', top_k=1)
|
||||
|
||||
with model.chat_session():
|
||||
@ -47,49 +47,44 @@ def do_long_input(model):
|
||||
|
||||
|
||||
def test_inference_long_orca_3b():
|
||||
model = GPT4All(model_name="orca-mini-3b.ggmlv3.q4_0.bin")
|
||||
model = GPT4All(model_name="orca-mini-3b-gguf2-q4_0.gguf")
|
||||
do_long_input(model)
|
||||
|
||||
|
||||
def test_inference_long_falcon():
|
||||
model = GPT4All(model_name='ggml-model-gpt4all-falcon-q4_0.bin')
|
||||
model = GPT4All(model_name='gpt4all-falcon-q4_0.gguf')
|
||||
do_long_input(model)
|
||||
|
||||
|
||||
def test_inference_long_llama_7b():
|
||||
model = GPT4All(model_name="orca-mini-7b.ggmlv3.q4_0.bin")
|
||||
model = GPT4All(model_name="mistral-7b-openorca.Q4_0.gguf")
|
||||
do_long_input(model)
|
||||
|
||||
|
||||
def test_inference_long_llama_13b():
|
||||
model = GPT4All(model_name='ggml-nous-hermes-13b.ggmlv3.q4_0.bin')
|
||||
model = GPT4All(model_name='nous-hermes-llama2-13b.Q4_0.gguf')
|
||||
do_long_input(model)
|
||||
|
||||
|
||||
def test_inference_long_mpt():
|
||||
model = GPT4All(model_name='ggml-mpt-7b-chat.bin')
|
||||
model = GPT4All(model_name='mpt-7b-chat-q4_0.gguf')
|
||||
do_long_input(model)
|
||||
|
||||
|
||||
def test_inference_long_replit():
|
||||
model = GPT4All(model_name='ggml-replit-code-v1-3b.bin')
|
||||
do_long_input(model)
|
||||
|
||||
|
||||
def test_inference_long_groovy():
|
||||
model = GPT4All(model_name='ggml-gpt4all-j-v1.3-groovy.bin')
|
||||
model = GPT4All(model_name='replit-code-v1_5-3b-q4_0.gguf')
|
||||
do_long_input(model)
|
||||
|
||||
|
||||
def test_inference_hparams():
|
||||
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
|
||||
model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf')
|
||||
|
||||
output = model.generate("The capital of france is ", max_tokens=3)
|
||||
assert 'Paris' in output
|
||||
|
||||
|
||||
def test_inference_falcon():
|
||||
model = GPT4All(model_name='ggml-model-gpt4all-falcon-q4_0.bin')
|
||||
model = GPT4All(model_name='gpt4all-falcon-q4_0.gguf')
|
||||
prompt = 'hello'
|
||||
output = model.generate(prompt)
|
||||
assert isinstance(output, str)
|
||||
@ -97,7 +92,7 @@ def test_inference_falcon():
|
||||
|
||||
|
||||
def test_inference_mpt():
|
||||
model = GPT4All(model_name='ggml-mpt-7b-chat.bin')
|
||||
model = GPT4All(model_name='mpt-7b-chat-q4_0.gguf')
|
||||
prompt = 'hello'
|
||||
output = model.generate(prompt)
|
||||
assert isinstance(output, str)
|
||||
|
@ -1,7 +1,7 @@
|
||||
import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY, loadModel } from '../src/gpt4all.js'
|
||||
|
||||
const model = await loadModel(
|
||||
'orca-mini-3b.ggmlv3.q4_0.bin',
|
||||
'orca-mini-3b-gguf2-q4_0.gguf',
|
||||
{ verbose: true }
|
||||
);
|
||||
const ll = model.llm;
|
||||
|
Loading…
Reference in New Issue
Block a user