Merge branch 'main' into modal_labs_python_docs

Signed-off-by: Andriy Mulyar <andriy.mulyar@gmail.com>
2024-10-01 01:06:10 -04:00 · 2023-05-13 12:00:39 -04:00 · 2023-05-13 12:00:39 -04:00 · aa500eb67d
commit aa500eb67d
parent f6d78fa551 d94f37c1bf
12 changed files with 126 additions and 53 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -170,13 +170,25 @@ workflows:
            branches:
              only:
                - main
-  #build-py-deploy:
-  #  jobs:
-      #- build-py-linux
-      #- build-py-macos
-      #- build-py-windows
-      #- store-and-upload-wheels:
-      #    requires:
-      #      - build-py-windows
-      #      - build-py-linux
-      #      - build-py-macos
+  # build-py-deploy:
+  #   jobs:
+  #     - build-py-linux:
+  #         filters:
+  #           branches:
+  #             only:
+  #     - build-py-macos:
+  #         filters:
+  #           branches:
+  #             only:
+  #     - build-py-windows:
+  #         filters:
+  #           branches:
+  #             only:
+  #     - store-and-upload-wheels:
+  #         filters:
+  #           branches:
+  #             only:
+  #         requires:
+  #           - build-py-windows
+  #           - build-py-linux
+  #           - build-py-macos
--- a/README.md
+++ b/README.md
@ -42,69 +42,52 @@
 <a href="https://python.langchain.com/en/latest/modules/models/llms/integrations/gpt4all.html">🦜️🔗 Official Langchain Backend</a> 
 </p>

-
-
-
 <p align="center">
 GPT4All is made possible by our compute partner <a href="https://www.paperspace.com/">Paperspace</a>.
 </p>

-
+<p align="center">
+  <img width="600" height="365" src="https://user-images.githubusercontent.com/13879686/231876409-e3de1934-93bb-4b4b-9013-b491a969ebbc.gif">
+</p>
+<p align="center">
+Run on an M1 Mac (not sped up!)
+</p>

 ## GPT4All: An ecosystem of open-source on-edge large language models.
-![gpt4all-j-demo](https://user-images.githubusercontent.com/13879686/231876409-e3de1934-93bb-4b4b-9013-b491a969ebbc.gif)
+GTP4All is an ecosystem to train and deploy **powerful** and **customized** large language models that run locally on consumer grade CPUs.

-Run on an M1 Mac (not sped up!)
+The goal is simple - be the best instruction tuned assistant-style language model that any person or enterprise can freely use, distribute and build on.

-
-## Contributing
-GPT4All welcomes contribution, involvment, and discussion from the open source community!
-Please see CONTRIBUTING.md and follow the issue, bug report, and PR markdown templates.
-
-Check project discord, with project owners, or through existing issues/PRs to avoid duplicate work.
-Please make sure to tag all of the above with relevant project identifiers or your contribution could potentially get lost.
-Example tags: `backend`, `bindings`, `python-bindings`, `documentation`, etc.
- 
+A GPT4All model is a 3GB - 8GB file that you can download and plug into the GPT4All open-source ecosystem software. **Nomic AI** supports and maintains this software ecosystem to enforce quality and security alongside spearheading the effort to allow any person or enterprise to easily train and deploy their own on-edge large language models. 


 ### Chat Client
-Run any GPT4All model natively on your home desktop with the auto-updating desktop chat client. See website for exaustive list of models.
-
-<p align="center">
-<a href="https://gpt4all.io">GPT4All Website</a>
-</p>
+Run any GPT4All model natively on your home desktop with the auto-updating desktop chat client. See <a href="https://gpt4all.io">GPT4All Website</a> for a full list of open-source models you can run with this powerful desktop application.

 Direct Installer Links:

-[Mac/OSX](https://gpt4all.io/installers/gpt4all-installer-darwin.dmg)
+* [Mac/OSX](https://gpt4all.io/installers/gpt4all-installer-darwin.dmg)

-[Windows](https://gpt4all.io/installers/gpt4all-installer-win64.exe)
+* [Windows](https://gpt4all.io/installers/gpt4all-installer-win64.exe)

-[Ubuntu](https://gpt4all.io/installers/gpt4all-installer-linux.run)
+* [Ubuntu](https://gpt4all.io/installers/gpt4all-installer-linux.run)

 If you have older hardware that only supports avx and not avx2 you can use these.

-[Mac/OSX - avx-only](https://gpt4all.io/installers/gpt4all-installer-darwin-avx-only.dmg)
+* [Mac/OSX - avx-only](https://gpt4all.io/installers/gpt4all-installer-darwin-avx-only.dmg)

-[Windows - avx-only](https://gpt4all.io/installers/gpt4all-installer-win64-avx-only.exe)
+* [Windows - avx-only](https://gpt4all.io/installers/gpt4all-installer-win64-avx-only.exe)

-[Ubuntu - avx-only](https://gpt4all.io/installers/gpt4all-installer-linux-avx-only.run)
+* [Ubuntu - avx-only](https://gpt4all.io/installers/gpt4all-installer-linux-avx-only.run)


 Find the most up-to-date information on the [GPT4All Website](https://gpt4all.io/)

-### Python Bindings
+### Bindings

-```bash
-pip install gpt4all
-```
+* <a href="https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings/python/README.md">:snake: Official Python Bindings</a>
+* <a href="https://github.com/nomic-ai/gpt4all-ts">:computer: Official Typescript Bindings</a>

-```python
-import gpt4all
-gptj = gpt4all.GPT4All("ggml-gpt4all-j-v1.3-groovy")
-messages = [{"role": "user", "content": "Name 3 colors"}]
-gptj.chat_completion(messages)
-```

 ## Training GPT4All-J

@ -139,7 +122,16 @@ model = AutoModelForCausalLM.from_pretrained("nomic-ai/gpt4all-j-prompt-generati
 ```bash
 accelerate launch --dynamo_backend=inductor --num_processes=8 --num_machines=1 --machine_rank=0 --deepspeed_multinode_launcher standard --mixed_precision=bf16  --use_deepspeed --deepspeed_config_file=configs/deepspeed/ds_config_gptj.json train.py --config configs/train/finetune_gptj.yaml
 ```
-    
+
+## Contributing
+GPT4All welcomes contribution, involvment, and discussion from the open source community!
+Please see CONTRIBUTING.md and follow the issue, bug report, and PR markdown templates.
+
+Check project discord, with project owners, or through existing issues/PRs to avoid duplicate work.
+Please make sure to tag all of the above with relevant project identifiers or your contribution could potentially get lost.
+Example tags: `backend`, `bindings`, `python-bindings`, `documentation`, etc.
+
+
 ## Citation

 If you utilize this repository, models or data in a downstream project, please consider citing it with:
--- a/gpt4all-bindings/python/README.md
+++ b/gpt4all-bindings/python/README.md
@ -17,7 +17,7 @@ pip install gpt4all

 ```
 git clone --recurse-submodules https://github.com/nomic-ai/gpt4all
-cd gpt4all-backend/llmodel/
+cd gpt4all/gpt4all-backend/llmodel/
 mkdir build
 cd build
 cmake ..
--- a/gpt4all-bindings/python/gpt4all/gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/gpt4all.py
@ -29,7 +29,7 @@ class GPT4All():
            model_name: Name of GPT4All or custom model. Including ".bin" file extension is optional but encouraged.
            model_path: Path to directory containing model file or, if file does not exist, where to download model.
                Default is None, in which case models will be stored in `~/.cache/gpt4all/`.
-            model_type: Model architecture to use - currently, only options are 'llama' or 'gptj'. Only required if model
+            model_type: Model architecture to use - currently, options are 'llama', 'gptj', or 'mpt'. Only required if model
                is custom. Note that these models still must be built from llama.cpp or GPTJ ggml architecture.
                Default is None.
            allow_download: Allow API to download models from gpt4all.io. Default is True. 
@ -169,7 +169,8 @@ class GPT4All():
                        messages: List[Dict], 
                        default_prompt_header: bool = True, 
                        default_prompt_footer: bool = True, 
-                        verbose: bool = True) -> str:
+                        verbose: bool = True,
+                        **generate_kwargs) -> str:
        """
        Format list of message dictionaries into a prompt and call model
        generate on prompt. Returns a response dictionary with metadata and
@ -185,6 +186,7 @@ class GPT4All():
                before user/assistant role messages.
            default_prompt_footer: If True (default), add default footer at end of prompt.
            verbose: If True (default), print full prompt and generated response.
+            **generate_kwargs: Optional kwargs to pass to prompt context.

        Returns:
            Response dictionary with:  
@ -201,7 +203,7 @@ class GPT4All():
        if verbose:
            print(full_prompt)

-        response = self.model.generate(full_prompt)
+        response = self.model.generate(full_prompt, **generate_kwargs)

        if verbose:
            print(response)
@ -263,6 +265,8 @@ class GPT4All():
            return pyllmodel.GPTJModel()
        elif model_type == "llama":
            return pyllmodel.LlamaModel()
+        elif model_type == "mpt":
+            return pyllmodel.MPTModel()
        else:
            raise ValueError(f"No corresponding model for model_type: {model_type}")
        
@ -286,13 +290,22 @@ class GPT4All():
            "ggml-vicuna-7b-1.1-q4_2.bin",
            "ggml-vicuna-13b-1.1-q4_2.bin",
            "ggml-wizardLM-7B.q4_2.bin",
-            "ggml-stable-vicuna-13B.q4_2.bin"
+            "ggml-stable-vicuna-13B.q4_2.bin",
+            "ggml-nous-gpt4-vicuna-13b.bin"
+        ]
+
+        MPT_MODELS = [
+            "ggml-mpt-7b-base.bin",
+            "ggml-mpt-7b-chat.bin",
+            "ggml-mpt-7b-instruct.bin"
        ]

        if model_name in GPTJ_MODELS:
            return pyllmodel.GPTJModel()
        elif model_name in LLAMA_MODELS:
            return pyllmodel.LlamaModel()
+        elif model_name in MPT_MODELS:
+            return pyllmodel.MPTModel()
        else:
            err_msg = f"""No corresponding model for provided filename {model_name}.
            If this is a custom model, make sure to specify a valid model_type.
--- a/gpt4all-bindings/python/gpt4all/pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/pyllmodel.py
@ -46,6 +46,9 @@ llmodel.llmodel_gptj_create.restype = ctypes.c_void_p
 llmodel.llmodel_gptj_destroy.argtypes = [ctypes.c_void_p]
 llmodel.llmodel_llama_create.restype = ctypes.c_void_p
 llmodel.llmodel_llama_destroy.argtypes = [ctypes.c_void_p]
+llmodel.llmodel_mpt_create.restype = ctypes.c_void_p
+llmodel.llmodel_mpt_destroy.argtypes = [ctypes.c_void_p]
+

 llmodel.llmodel_loadModel.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
 llmodel.llmodel_loadModel.restype = ctypes.c_bool
@ -236,3 +239,17 @@ class LlamaModel(LLModel):
        if self.model is not None:
            llmodel.llmodel_llama_destroy(self.model)
        super().__del__()
+
+
+class MPTModel(LLModel):
+
+    model_type = "mpt"
+
+    def __init__(self):
+        super().__init__()
+        self.model = llmodel.llmodel_mpt_create()
+
+    def __del__(self):
+        if self.model is not None:
+            llmodel.llmodel_mpt_destroy(self.model)
+        super().__del__()
--- a/gpt4all-bindings/python/setup.py
+++ b/gpt4all-bindings/python/setup.py
@ -61,7 +61,7 @@ copy_prebuilt_C_lib(SRC_CLIB_DIRECtORY,

 setup(
    name=package_name,
-    version="0.2.0",
+    version="0.2.1",
    description="Python bindings for GPT4All",
    author="Richard Guo",
    author_email="richard@nomic.ai",
--- a/gpt4all-bindings/python/tests/init.py
+++ b/gpt4all-bindings/python/tests/init.py
--- a/gpt4all-bindings/python/tests/test_pyllmodel.py
+++ b/gpt4all-bindings/python/tests/test_pyllmodel.py
@ -14,6 +14,24 @@ def test_create_llama():
    llama = pyllmodel.LlamaModel()
    assert llama.model_type == "llama"

+def test_create_mpt():
+    mpt = pyllmodel.MPTModel()
+    assert mpt.model_type == "mpt"
+
+def prompt_unloaded_mpt():
+    mpt = pyllmodel.MPTModel()
+    old_stdout = sys.stdout 
+    collect_response = StringIO()
+    sys.stdout = collect_response
+
+    mpt.prompt("hello there")
+
+    response = collect_response.getvalue()
+    sys.stdout = old_stdout
+
+    response = response.strip()
+    assert response == "MPT ERROR: prompt won't work with an unloaded model!"
+
 def prompt_unloaded_gptj():
    gptj = pyllmodel.GPTJModel()
    old_stdout = sys.stdout 
--- a/gpt4all-chat/chat.cpp
+++ b/gpt4all-chat/chat.cpp
@ -11,6 +11,17 @@ Chat::Chat(QObject *parent)
    , m_responseInProgress(false)
    , m_creationDate(QDateTime::currentSecsSinceEpoch())
    , m_llmodel(new ChatLLM(this))
+{
+    connectLLM();
+}
+
+Chat::~Chat()
+{
+    delete m_llmodel;
+    m_llmodel = nullptr;
+}
+
+void Chat::connectLLM()
 {
    // Should be in same thread
    connect(Download::globalInstance(), &Download::modelListChanged, this, &Chat::modelListChanged, Qt::DirectConnection);
--- a/gpt4all-chat/chat.h
+++ b/gpt4all-chat/chat.h
@ -25,6 +25,8 @@ class Chat : public QObject

 public:
    explicit Chat(QObject *parent = nullptr);
+    virtual ~Chat();
+    void connectLLM();

    QString id() const { return m_id; }
    QString name() const { return m_userName.isEmpty() ? m_name : m_userName; }
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@ -53,6 +53,13 @@ ChatLLM::ChatLLM(Chat *parent)
    m_llmThread.start();
 }

+ChatLLM::~ChatLLM()
+{
+    m_llmThread.quit();
+    m_llmThread.wait();
+    delete m_llmodel;
+}
+
 bool ChatLLM::loadDefaultModel()
 {
    const QList<QString> models = m_chat->modelList();
--- a/gpt4all-chat/chatllm.h
+++ b/gpt4all-chat/chatllm.h
@ -24,6 +24,7 @@ public:
    };

    ChatLLM(Chat *parent);
+    virtual ~ChatLLM();

    bool isModelLoaded() const;
    void regenerateResponse();