diff --git a/gpt4all-bindings/cli/README.md b/gpt4all-bindings/cli/README.md
index 8b2d08e8..228253cd 100644
--- a/gpt4all-bindings/cli/README.md
+++ b/gpt4all-bindings/cli/README.md
@@ -40,5 +40,5 @@ directory, if necessary.
 If you have already saved a model beforehand, specify its path with the `-m`/`--model` argument,
 for example:
 ```shell
-python app.py repl --model /home/user/my-gpt4all-models/GPT4All-13B-snoozy.ggmlv3.q4_0.bin
+python app.py repl --model /home/user/my-gpt4all-models/gpt4all-13b-snoozy-q4_0.gguf
 ```
diff --git a/gpt4all-bindings/python/README.md b/gpt4all-bindings/python/README.md
index 0e7aeae0..0c72008e 100644
--- a/gpt4all-bindings/python/README.md
+++ b/gpt4all-bindings/python/README.md
@@ -50,7 +50,7 @@ Test it out! In a Python script or console:
 
 ```python
 from gpt4all import GPT4All
-model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
+model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
 output = model.generate("The capital of France is ", max_tokens=3)
 print(output)
 ```
@@ -59,7 +59,7 @@ print(output)
 GPU Usage
 ```python
 from gpt4all import GPT4All
-model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin", device='gpu') # device='amd', device='intel'
+model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf", device='gpu') # device='amd', device='intel'
 output = model.generate("The capital of France is ", max_tokens=3)
 print(output)
 ```
diff --git a/gpt4all-bindings/python/docs/gpt4all_cli.md b/gpt4all-bindings/python/docs/gpt4all_cli.md
index f644057c..799a587d 100644
--- a/gpt4all-bindings/python/docs/gpt4all_cli.md
+++ b/gpt4all-bindings/python/docs/gpt4all_cli.md
@@ -166,7 +166,7 @@ If you want to use a different model, you can do so with the `-m`/`--model` para
 model file name is provided, it will again check in `.cache/gpt4all/` and might start downloading.
 If instead given a path to an existing model, the command could for example look like this:
 ```shell
-python app.py repl --model /home/user/my-gpt4all-models/GPT4All-13B-snoozy.ggmlv3.q4_0.bin
+python app.py repl --model /home/user/my-gpt4all-models/gpt4all-13b-snoozy-q4_0.gguf
 ```
 
 When you're done and want to end a session, simply type `/exit`.
diff --git a/gpt4all-bindings/python/docs/gpt4all_python.md b/gpt4all-bindings/python/docs/gpt4all_python.md
index 2e6ba863..dd4f6d7f 100644
--- a/gpt4all-bindings/python/docs/gpt4all_python.md
+++ b/gpt4all-bindings/python/docs/gpt4all_python.md
@@ -11,7 +11,7 @@ pip install gpt4all
 === "GPT4All Example"
     ``` py
     from gpt4all import GPT4All
-    model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
+    model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
     output = model.generate("The capital of France is ", max_tokens=3)
     print(output)
     ```
@@ -35,7 +35,7 @@ Use the GPT4All `chat_session` context manager to hold chat conversations with t
 
 === "GPT4All Example"
     ``` py
-    model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
+    model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf')
     with model.chat_session():
         response1 = model.generate(prompt='hello', temp=0)
         response2 = model.generate(prompt='write me a short poem', temp=0)
@@ -89,7 +89,7 @@ To interact with GPT4All responses as the model generates, use the `streaming=Tr
 === "GPT4All Streaming Example"
     ``` py
     from gpt4all import GPT4All
-    model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
+    model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
     tokens = []
     for token in model.generate("The capital of France is", max_tokens=20, streaming=True):
         tokens.append(token)
@@ -135,7 +135,7 @@ is the same as if it weren't provided; that is, `~/.cache/gpt4all/` is the defau
     ``` py
     from pathlib import Path
     from gpt4all import GPT4All
-    model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin',
+    model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf',
                     model_path=(Path.home() / '.cache' / 'gpt4all'),
                     allow_download=False)
     response = model.generate('my favorite 3 fruits are:', temp=0)
@@ -152,7 +152,7 @@ If you want to point it at the chat GUI's default folder, it should be:
     from pathlib import Path
     from gpt4all import GPT4All
 
-    model_name = 'orca-mini-3b.ggmlv3.q4_0.bin'
+    model_name = 'orca-mini-3b-gguf2-q4_0.gguf'
     model_path = Path.home() / 'Library' / 'Application Support' / 'nomic.ai' / 'GPT4All'
     model = GPT4All(model_name, model_path)
     ```
@@ -161,7 +161,7 @@ If you want to point it at the chat GUI's default folder, it should be:
     from pathlib import Path
     from gpt4all import GPT4All
     import os
-    model_name = 'orca-mini-3b.ggmlv3.q4_0.bin'
+    model_name = 'orca-mini-3b-gguf2-q4_0.gguf'
     model_path = Path(os.environ['LOCALAPPDATA']) / 'nomic.ai' / 'GPT4All'
     model = GPT4All(model_name, model_path)
     ```
@@ -170,7 +170,7 @@ If you want to point it at the chat GUI's default folder, it should be:
     from pathlib import Path
     from gpt4all import GPT4All
 
-    model_name = 'orca-mini-3b.ggmlv3.q4_0.bin'
+    model_name = 'orca-mini-3b-gguf2-q4_0.gguf'
     model_path = Path.home() / '.local' / 'share' / 'nomic.ai' / 'GPT4All'
     model = GPT4All(model_name, model_path)
     ```
@@ -182,7 +182,7 @@ from pathlib import Path
 import gpt4all.gpt4all
 gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY = Path.home() / 'my' / 'models-directory'
 from gpt4all import GPT4All
-model = GPT4All('orca-mini-3b.ggmlv3.q4_0.bin')
+model = GPT4All('orca-mini-3b-gguf2-q4_0.gguf')
 ...
 ```
 
@@ -193,7 +193,7 @@ Session templates can be customized when starting a `chat_session` context:
 === "GPT4All Custom Session Templates Example"
     ``` py
     from gpt4all import GPT4All
-    model = GPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin')
+    model = GPT4All('wizardlm-13b-v1.2.Q4_0.gguf')
     system_template = 'A chat between a curious user and an artificial intelligence assistant.'
     # many models use triple hash '###' for keywords, Vicunas are simpler:
     prompt_template = 'USER: {0}\nASSISTANT: '
@@ -222,7 +222,7 @@ To do the same outside a session, the input has to be formatted manually. For ex
 
 === "GPT4All Templates Outside a Session Example"
     ``` py
-    model = GPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin')
+    model = GPT4All('wizardlm-13b-v1.2.Q4_0.gguf')
     system_template = 'A chat between a curious user and an artificial intelligence assistant.'
     prompt_template = 'USER: {0}\nASSISTANT: '
     prompts = ['name 3 colors', 'now name 3 fruits', 'what were the 3 colors in your earlier response?']
@@ -285,7 +285,7 @@ customized in a subclass. As an example:
     ```
 === "GPT4All Custom Subclass Example"
     ``` py
-    model = RotatingTemplateGPT4All('ggml-Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin')
+    model = RotatingTemplateGPT4All('wizardlm-13b-v1.2.Q4_0.gguf')
     with model.chat_session():  # starting a session is optional in this example
         response1 = model.generate("hi, who are you?")
         print(response1)
@@ -345,7 +345,7 @@ logging infrastructure offers [many more customization options][py-logging-cookb
     import logging
     from gpt4all import GPT4All
     logging.basicConfig(level=logging.INFO)
-    model = GPT4All('nous-hermes-13b.ggmlv3.q4_0.bin')
+    model = GPT4All('nous-hermes-llama2-13b.Q4_0.gguf')
     with model.chat_session('You are a geography expert.\nBe terse.',
                             '### Instruction:\n{0}\n### Response:\n'):
         response = model.generate('who are you?', temp=0)
@@ -414,7 +414,7 @@ If you know exactly when a model should stop responding, you can add a custom ca
 === "GPT4All Custom Stop Callback"
     ``` py
     from gpt4all import GPT4All
-    model = GPT4All('orca-mini-3b.ggmlv3.q4_0.bin')
+    model = GPT4All('orca-mini-3b-gguf2-q4_0.gguf')
 
     def stop_on_token_callback(token_id, token_string):
         # one sentence is enough:
diff --git a/gpt4all-bindings/python/docs/index.md b/gpt4all-bindings/python/docs/index.md
index d0ebe45e..9fabf321 100644
--- a/gpt4all-bindings/python/docs/index.md
+++ b/gpt4all-bindings/python/docs/index.md
@@ -9,7 +9,7 @@ GPT4All software is optimized to run inference of 3-13 billion parameter large l
 === "GPT4All Example"
     ``` py
     from gpt4all import GPT4All
-    model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
+    model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
     output = model.generate("The capital of France is ", max_tokens=3)
     print(output)
     ```
diff --git a/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py b/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py
index 74a3214d..5b3c3fba 100644
--- a/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py
@@ -8,7 +8,7 @@ import pytest
 
 
 def test_inference():
-    model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
+    model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf')
     output_1 = model.generate('hello', top_k=1)
 
     with model.chat_session():
@@ -47,49 +47,44 @@ def do_long_input(model):
 
 
 def test_inference_long_orca_3b():
-    model = GPT4All(model_name="orca-mini-3b.ggmlv3.q4_0.bin")
+    model = GPT4All(model_name="orca-mini-3b-gguf2-q4_0.gguf")
     do_long_input(model)
 
 
 def test_inference_long_falcon():
-    model = GPT4All(model_name='ggml-model-gpt4all-falcon-q4_0.bin')
+    model = GPT4All(model_name='gpt4all-falcon-q4_0.gguf')
     do_long_input(model)
 
 
 def test_inference_long_llama_7b():
-    model = GPT4All(model_name="orca-mini-7b.ggmlv3.q4_0.bin")
+    model = GPT4All(model_name="mistral-7b-openorca.Q4_0.gguf")
     do_long_input(model)
 
 
 def test_inference_long_llama_13b():
-    model = GPT4All(model_name='ggml-nous-hermes-13b.ggmlv3.q4_0.bin')
+    model = GPT4All(model_name='nous-hermes-llama2-13b.Q4_0.gguf')
     do_long_input(model)
 
 
 def test_inference_long_mpt():
-    model = GPT4All(model_name='ggml-mpt-7b-chat.bin')
+    model = GPT4All(model_name='mpt-7b-chat-q4_0.gguf')
     do_long_input(model)
 
 
 def test_inference_long_replit():
-    model = GPT4All(model_name='ggml-replit-code-v1-3b.bin')
-    do_long_input(model)
-
-
-def test_inference_long_groovy():
-    model = GPT4All(model_name='ggml-gpt4all-j-v1.3-groovy.bin')
+    model = GPT4All(model_name='replit-code-v1_5-3b-q4_0.gguf')
     do_long_input(model)
 
 
 def test_inference_hparams():
-    model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
+    model = GPT4All(model_name='orca-mini-3b-gguf2-q4_0.gguf')
 
     output = model.generate("The capital of france is ", max_tokens=3)
     assert 'Paris' in output
 
 
 def test_inference_falcon():
-    model = GPT4All(model_name='ggml-model-gpt4all-falcon-q4_0.bin')
+    model = GPT4All(model_name='gpt4all-falcon-q4_0.gguf')
     prompt = 'hello'
     output = model.generate(prompt)
     assert isinstance(output, str)
@@ -97,7 +92,7 @@ def test_inference_falcon():
 
 
 def test_inference_mpt():
-    model = GPT4All(model_name='ggml-mpt-7b-chat.bin')
+    model = GPT4All(model_name='mpt-7b-chat-q4_0.gguf')
     prompt = 'hello'
     output = model.generate(prompt)
     assert isinstance(output, str)
diff --git a/gpt4all-bindings/typescript/spec/chat.mjs b/gpt4all-bindings/typescript/spec/chat.mjs
index ee893646..1e08ea0e 100644
--- a/gpt4all-bindings/typescript/spec/chat.mjs
+++ b/gpt4all-bindings/typescript/spec/chat.mjs
@@ -1,7 +1,7 @@
 import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY, loadModel } from '../src/gpt4all.js'
 
 const model = await loadModel(
-    'orca-mini-3b.ggmlv3.q4_0.bin',
+    'orca-mini-3b-gguf2-q4_0.gguf',
     { verbose: true }
 );
 const ll = model.llm;