embllm: fix use of llama ctx before loading (#2465)

This fixes a regression in PR #2396. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2024-10-01 01:06:10 -04:00 · 2024-06-25 11:04:01 -04:00 · 2024-06-25 11:04:01 -04:00 · 1a00882276
commit 1a00882276
parent 9273b49b62
1 changed files with 5 additions and 4 deletions
--- a/gpt4all-chat/embllm.cpp
+++ b/gpt4all-chat/embllm.cpp
@ -84,10 +84,6 @@ bool EmbeddingLLMWorker::loadModel()
        return false;
    }

-    // FIXME(jared): the user may want this to take effect without having to restart
-    int n_threads = MySettings::globalInstance()->threadCount();
-    m_model->setThreadCount(n_threads);
-
    // NOTE: explicitly loads model on CPU to avoid GPU OOM
    // TODO(cebtenzzre): support GPU-accelerated embeddings
    bool success = m_model->loadModel(filePath.toStdString(), 2048, 0);
@ -104,6 +100,11 @@ bool EmbeddingLLMWorker::loadModel()
        m_model = nullptr;
        return false;
    }
+
+    // FIXME(jared): the user may want this to take effect without having to restart
+    int n_threads = MySettings::globalInstance()->threadCount();
+    m_model->setThreadCount(n_threads);
+
    return true;
 }