diff --git a/gpt4all-bindings/cli/README.md b/gpt4all-bindings/cli/README.md index 8b2d08e8..228253cd 100644 --- a/gpt4all-bindings/cli/README.md +++ b/gpt4all-bindings/cli/README.md @@ -40,5 +40,5 @@ directory, if necessary. If you have already saved a model beforehand, specify its path with the `-m`/`--model` argument, for example: ```shell -python app.py repl --model /home/user/my-gpt4all-models/GPT4All-13B-snoozy.ggmlv3.q4_0.bin +python app.py repl --model /home/user/my-gpt4all-models/gpt4all-13b-snoozy-q4_0.gguf ``` diff --git a/gpt4all-bindings/python/README.md b/gpt4all-bindings/python/README.md index 0e7aeae0..0c72008e 100644 --- a/gpt4all-bindings/python/README.md +++ b/gpt4all-bindings/python/README.md @@ -50,7 +50,7 @@ Test it out! In a Python script or console: ```python from gpt4all import GPT4All -model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin") +model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf") output = model.generate("The capital of France is ", max_tokens=3) print(output) ``` @@ -59,7 +59,7 @@ print(output) GPU Usage ```python from gpt4all import GPT4All -model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin", device='gpu') # device='amd', device='intel' +model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf", device='gpu') # device='amd', device='intel' output = model.generate("The capital of France is ", max_tokens=3) print(output) ``` diff --git a/gpt4all-bindings/python/docs/gpt4all_cli.md b/gpt4all-bindings/python/docs/gpt4all_cli.md index f644057c..799a587d 100644 --- a/gpt4all-bindings/python/docs/gpt4all_cli.md +++ b/gpt4all-bindings/python/docs/gpt4all_cli.md @@ -166,7 +166,7 @@ If you want to use a different model, you can do so with the `-m`/`--model` para model file name is provided, it will again check in `.cache/gpt4all/` and might start downloading. If instead given a path to an existing model, the command could for example look like this: ```shell -python app.py repl --model /home/user/my-gpt4all-models/GPT4All-13B-snoozy.ggmlv3.q4_0.bin +python app.py repl --model /home/user/my-gpt4all-models/gpt4all-13b-snoozy-q4_0.gguf ``` When you're done and want to end a session, simply type `/exit`. diff --git a/gpt4all-bindings/python/docs/gpt4all_python.md b/gpt4all-bindings/python/docs/gpt4all_python.md index 2e6ba863..dd4f6d7f 100644 --- a/gpt4all-bindings/python/docs/gpt4all_python.md +++ b/gpt4all-bindings/python/docs/gpt4all_python.md @@ -11,7 +11,7 @@ pip install gpt4all === "GPT4All Example" ``` py from gpt4all import GPT4All - model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin") + model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf") output = model.generate("The capital of France is ", max_tokens=3) print(output) ``` @@ -35,7 +35,7 @@ Use the GPT4All `chat_session` context manager to hold chat conversations with t === "GPT4All Example" ``` py - model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin') + model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf') with model.chat_session(): response1 = model.generate(prompt='hello', temp=0) response2 = model.generate(prompt='write me a short poem', temp=0) @@ -89,7 +89,7 @@ To interact with GPT4All responses as the model generates, use the `streaming=Tr === "GPT4All Streaming Example" ``` py from gpt4all import GPT4All - model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin") + model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf") tokens = [] for token in model.generate("The capital of France is", max_tokens=20, streaming=True): tokens.append(token) @@ -135,7 +135,7 @@ is the same as if it weren't provided; that is, `~/.cache/gpt4all/` is the defau ``` py from pathlib import Path from gpt4all import GPT4All - model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin', + model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf', model_path=(Path.home() / '.cache' / 'gpt4all'), allow_download=False) response = model.generate('my favorite 3 fruits are:', temp=0) @@ -152,7 +152,7 @@ If you want to point it at the chat GUI's default folder, it should be: from pathlib import Path from gpt4all import GPT4All - model_name = 'orca-mini-3b.ggmlv3.q4_0.bin' + model_name = 'orca-mini-3b-gguf2-q4_0.gguf' model_path = Path.home() / 'Library' / 'Application Support' / 'nomic.ai' / 'GPT4All' model = GPT4All(model_name, model_path) ``` @@ -161,7 +161,7 @@ If you want to point it at the chat GUI's default folder, it should be: from pathlib import Path from gpt4all import GPT4All import os - model_name = 'orca-mini-3b.ggmlv3.q4_0.bin' + model_name = 'orca-mini-3b-gguf2-q4_0.gguf' model_path = Path(os.environ['LOCALAPPDATA']) / 'nomic.ai' / 'GPT4All' model = GPT4All(model_name, model_path) ``` @@ -170,7 +170,7 @@ If you want to point it at the chat GUI's default folder, it should be: from pathlib import Path from gpt4all import GPT4All - model_name = 'orca-mini-3b.ggmlv3.q4_0.bin' + model_name = 'orca-mini-3b-gguf2-q4_0.gguf' model_path = Path.home() / '.local' / 'share' / 'nomic.ai' / 'GPT4All' model = GPT4All(model_name, model_path) ``` @@ -182,7 +182,7 @@ from pathlib import Path import gpt4all.gpt4all gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY = Path.home() / 'my' / 'models-directory' from gpt4all import GPT4All -model = GPT4All('orca-mini-3b.ggmlv3.q4_0.bin') +model = GPT4All('orca-mini-3b-gguf2-q4_0.gguf') ... ``` @@ -193,7 +193,7 @@ Session templates can be customized when starting a `chat_session` context: === "GPT4All Custom Session Templates Example" ``` py from gpt4all import GPT4All - model = GPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin') + model = GPT4All('wizardlm-13b-v1.2.Q4_0.gguf') system_template = 'A chat between a curious user and an artificial intelligence assistant.' # many models use triple hash '###' for keywords, Vicunas are simpler: prompt_template = 'USER: {0}\nASSISTANT: ' @@ -222,7 +222,7 @@ To do the same outside a session, the input has to be formatted manually. For ex === "GPT4All Templates Outside a Session Example" ``` py - model = GPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin') + model = GPT4All('wizardlm-13b-v1.2.Q4_0.gguf') system_template = 'A chat between a curious user and an artificial intelligence assistant.' prompt_template = 'USER: {0}\nASSISTANT: ' prompts = ['name 3 colors', 'now name 3 fruits', 'what were the 3 colors in your earlier response?'] @@ -285,7 +285,7 @@ customized in a subclass. As an example: ``` === "GPT4All Custom Subclass Example" ``` py - model = RotatingTemplateGPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin') + model = RotatingTemplateGPT4All('wizardlm-13b-v1.2.Q4_0.gguf') with model.chat_session(): # starting a session is optional in this example response1 = model.generate("hi, who are you?") print(response1) @@ -345,7 +345,7 @@ logging infrastructure offers [many more customization options][py-logging-cookb import logging from gpt4all import GPT4All logging.basicConfig(level=logging.INFO) - model = GPT4All('nous-hermes-13b.ggmlv3.q4_0.bin') + model = GPT4All('nous-hermes-llama2-13b.Q4_0.gguf') with model.chat_session('You are a geography expert.\nBe terse.', '### Instruction:\n{0}\n### Response:\n'): response = model.generate('who are you?', temp=0) @@ -414,7 +414,7 @@ If you know exactly when a model should stop responding, you can add a custom ca === "GPT4All Custom Stop Callback" ``` py from gpt4all import GPT4All - model = GPT4All('orca-mini-3b.ggmlv3.q4_0.bin') + model = GPT4All('orca-mini-3b-gguf2-q4_0.gguf') def stop_on_token_callback(token_id, token_string): # one sentence is enough: diff --git a/gpt4all-bindings/python/docs/index.md b/gpt4all-bindings/python/docs/index.md index d0ebe45e..9fabf321 100644 --- a/gpt4all-bindings/python/docs/index.md +++ b/gpt4all-bindings/python/docs/index.md @@ -9,7 +9,7 @@ GPT4All software is optimized to run inference of 3-13 billion parameter large l === "GPT4All Example" ``` py from gpt4all import GPT4All - model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin") + model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf") output = model.generate("The capital of France is ", max_tokens=3) print(output) ``` diff --git a/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py b/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py index 74a3214d..5b3c3fba 100644 --- a/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py +++ b/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py @@ -8,7 +8,7 @@ import pytest def test_inference(): - model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin') + model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf') output_1 = model.generate('hello', top_k=1) with model.chat_session(): @@ -47,49 +47,44 @@ def do_long_input(model): def test_inference_long_orca_3b(): - model = GPT4All(model_name="orca-mini-3b.ggmlv3.q4_0.bin") + model = GPT4All(model_name="orca-mini-3b-gguf2-q4_0.gguf") do_long_input(model) def test_inference_long_falcon(): - model = GPT4All(model_name='ggml-model-gpt4all-falcon-q4_0.bin') + model = GPT4All(model_name='gpt4all-falcon-q4_0.gguf') do_long_input(model) def test_inference_long_llama_7b(): - model = GPT4All(model_name="orca-mini-7b.ggmlv3.q4_0.bin") + model = GPT4All(model_name="mistral-7b-openorca.Q4_0.gguf") do_long_input(model) def test_inference_long_llama_13b(): - model = GPT4All(model_name='ggml-nous-hermes-13b.ggmlv3.q4_0.bin') + model = GPT4All(model_name='nous-hermes-llama2-13b.Q4_0.gguf') do_long_input(model) def test_inference_long_mpt(): - model = GPT4All(model_name='ggml-mpt-7b-chat.bin') + model = GPT4All(model_name='mpt-7b-chat-q4_0.gguf') do_long_input(model) def test_inference_long_replit(): - model = GPT4All(model_name='ggml-replit-code-v1-3b.bin') - do_long_input(model) - - -def test_inference_long_groovy(): - model = GPT4All(model_name='ggml-gpt4all-j-v1.3-groovy.bin') + model = GPT4All(model_name='replit-code-v1_5-3b-q4_0.gguf') do_long_input(model) def test_inference_hparams(): - model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin') + model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf') output = model.generate("The capital of france is ", max_tokens=3) assert 'Paris' in output def test_inference_falcon(): - model = GPT4All(model_name='ggml-model-gpt4all-falcon-q4_0.bin') + model = GPT4All(model_name='gpt4all-falcon-q4_0.gguf') prompt = 'hello' output = model.generate(prompt) assert isinstance(output, str) @@ -97,7 +92,7 @@ def test_inference_falcon(): def test_inference_mpt(): - model = GPT4All(model_name='ggml-mpt-7b-chat.bin') + model = GPT4All(model_name='mpt-7b-chat-q4_0.gguf') prompt = 'hello' output = model.generate(prompt) assert isinstance(output, str) diff --git a/gpt4all-bindings/typescript/spec/chat.mjs b/gpt4all-bindings/typescript/spec/chat.mjs index ee893646..1e08ea0e 100644 --- a/gpt4all-bindings/typescript/spec/chat.mjs +++ b/gpt4all-bindings/typescript/spec/chat.mjs @@ -1,7 +1,7 @@ import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY, loadModel } from '../src/gpt4all.js' const model = await loadModel( - 'orca-mini-3b.ggmlv3.q4_0.bin', + 'orca-mini-3b-gguf2-q4_0.gguf', { verbose: true } ); const ll = model.llm;