python bindings should be quiet by default

* disable llama.cpp logging unless GPT4ALL_VERBOSE_LLAMACPP envvar is nonempty * make verbose flag for retrieve_model default false (but also be overridable via gpt4all constructor) should be able to run a basic test: ```python import gpt4all model = gpt4all.GPT4All('/Users/aaron/Downloads/rift-coder-v0-7b-q4_0.gguf') print(model.generate('def fib(n):')) ``` and see no non-model output when successful
2024-10-01 01:06:10 -04:00 · 2023-10-10 11:10:25 -07:00 · 2023-10-10 11:10:25 -07:00 · afaa291eab
commit afaa291eab
parent 7b611b49f2
2 changed files with 20 additions and 3 deletions
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@ -36,6 +36,17 @@ namespace {
 const char *modelType_ = "LLaMA";
 }

+static void null_log_callback(enum ggml_log_level level, const char* text, void* userdata) {
+    (void)level;
+    (void)text;
+    (void)userdata;
+}
+
+static bool llama_verbose() {
+    const char* var = getenv("GPT4ALL_VERBOSE_LLAMACPP");
+    return var && *var;
+}
+
 struct gpt_params {
    int32_t seed          = -1;   // RNG seed
    int32_t n_keep        = 0;    // number of tokens to keep from initial prompt
@ -144,7 +155,9 @@ bool LLamaModel::loadModel(const std::string &modelPath)
    d_ptr->params.use_mlock  = params.use_mlock;
 #endif
 #ifdef GGML_USE_METAL
-    std::cerr << "llama.cpp: using Metal" << std::endl;
+    if (llama_verbose()) {
+        std::cerr << "llama.cpp: using Metal" << std::endl;
+    }
    // metal always runs the whole model if n_gpu_layers is not 0, at least
    // currently
    d_ptr->params.n_gpu_layers = 1;
@ -390,6 +403,9 @@ DLL_EXPORT bool magic_match(const char * fname) {
 }

 DLL_EXPORT LLModel *construct() {
+    if (!llama_verbose()) {
+        llama_log_set(null_log_callback, nullptr);
+    }
    return new LLamaModel;
 }
 }
--- a/gpt4all-bindings/python/gpt4all/gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/gpt4all.py
@ -67,6 +67,7 @@ class GPT4All:
        allow_download: bool = True,
        n_threads: Optional[int] = None,
        device: Optional[str] = "cpu",
+        verbose: bool = False,
    ):
        """
        Constructor
@ -91,7 +92,7 @@ class GPT4All:
        self.model_type = model_type
        self.model = pyllmodel.LLModel()
        # Retrieve model and download if allowed
-        self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download)
+        self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose)
        if device is not None:
            if device != "cpu":
                self.model.init_gpu(model_path=self.config["path"], device=device)
@ -119,7 +120,7 @@ class GPT4All:
        model_name: str,
        model_path: Optional[Union[str, os.PathLike[str]]] = None,
        allow_download: bool = True,
-        verbose: bool = True,
+        verbose: bool = False,
    ) -> ConfigType:
        """
        Find model file, and if it doesn't exist, download the model.