persistent threadcount setting

threadcount is now on the Settings object and gets reapplied after a model switch
2024-10-01 01:06:10 -04:00 · 2023-04-24 12:24:55 -07:00 · 2023-04-24 12:24:55 -07:00 · 29e3e04fcf
commit 29e3e04fcf
parent 1b0eac2870
3 changed files with 29 additions and 6 deletions
--- a/llm.cpp
+++ b/llm.cpp
@ -101,8 +101,10 @@ bool LLMObject::loadModelPrivate(const QString &modelName)
 }

 void LLMObject::setThreadCount(int32_t n_threads) {
-    m_llmodel->setThreadCount(n_threads);
-    emit threadCountChanged();
+    if (m_llmodel->threadCount() != n_threads) {
+        m_llmodel->setThreadCount(n_threads);
+        emit threadCountChanged();
+    }
 }

 int32_t LLMObject::threadCount() {
@ -297,6 +299,7 @@ LLM::LLM()
    connect(m_llmodel, &LLMObject::modelNameChanged, this, &LLM::modelNameChanged, Qt::QueuedConnection);
    connect(m_llmodel, &LLMObject::modelListChanged, this, &LLM::modelListChanged, Qt::QueuedConnection);
    connect(m_llmodel, &LLMObject::threadCountChanged, this, &LLM::threadCountChanged, Qt::QueuedConnection);
+    connect(m_llmodel, &LLMObject::threadCountChanged, this, &LLM::syncThreadCount, Qt::QueuedConnection);


    connect(this, &LLM::promptRequested, m_llmodel, &LLMObject::prompt, Qt::QueuedConnection);
@ -375,8 +378,16 @@ QList<QString> LLM::modelList() const
    return m_llmodel->modelList();
 }

+void LLM::syncThreadCount() {
+    emit setThreadCountRequested(m_desiredThreadCount);
+}
+
 void LLM::setThreadCount(int32_t n_threads) {
-    emit setThreadCountRequested(n_threads);
+    if (n_threads <= 0) {
+        n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    }
+    m_desiredThreadCount = n_threads;
+    syncThreadCount();
 }

 int32_t LLM::threadCount() {
--- a/llm.h
+++ b/llm.h
@ -82,6 +82,7 @@ public:
    Q_INVOKABLE void resetResponse();
    Q_INVOKABLE void resetContext();
    Q_INVOKABLE void stopGenerating();
+    Q_INVOKABLE void syncThreadCount();
    Q_INVOKABLE void setThreadCount(int32_t n_threads);
    Q_INVOKABLE int32_t threadCount();

@ -116,6 +117,7 @@ private Q_SLOTS:

 private:
    LLMObject *m_llmodel;
+    int32_t m_desiredThreadCount;
    bool m_responseInProgress;

 private:
--- a/qml/SettingsDialog.qml
+++ b/qml/SettingsDialog.qml
@ -31,6 +31,7 @@ Dialog {
    property int defaultTopK: 40
    property int defaultMaxLength: 4096
    property int defaultPromptBatchSize: 9
+    property int defaultThreadCount: 0
    property string defaultPromptTemplate: "The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
 ### Prompt:
 %1
@ -42,6 +43,7 @@ Dialog {
    property alias maxLength: settings.maxLength
    property alias promptBatchSize: settings.promptBatchSize
    property alias promptTemplate: settings.promptTemplate
+    property alias threadCount: settings.threadCount

    Settings {
        id: settings
@ -50,6 +52,7 @@ Dialog {
        property int topK: settingsDialog.defaultTopK
        property int maxLength: settingsDialog.defaultMaxLength
        property int promptBatchSize: settingsDialog.defaultPromptBatchSize
+        property int threadCount: settingsDialog.defaultThreadCount
        property string promptTemplate: settingsDialog.defaultPromptTemplate
    }

@ -60,7 +63,13 @@ Dialog {
        settings.maxLength = defaultMaxLength;
        settings.promptBatchSize = defaultPromptBatchSize;
        settings.promptTemplate = defaultPromptTemplate;
+        settings.threadCount = defaultThreadCount
        settings.sync()
+        LLM.threadCount = settings.threadCount;
+    }
+
+    Component.onCompleted: {
+        LLM.threadCount = settings.threadCount;
    }

    Component.onDestruction: {
@ -264,7 +273,7 @@ Dialog {
             Layout.column: 0
         }
         TextField {
-             text: LLM.threadCount.toString()
+             text: settingsDialog.threadCount.toString()
             color: theme.textColor
             background: Rectangle {
                implicitWidth: 150
@ -272,7 +281,7 @@ Dialog {
                radius: 10
             }
             padding: 10
-             ToolTip.text: qsTr("Amount of processing threads to use")
+             ToolTip.text: qsTr("Amount of processing threads to use, a setting of 0 will use the lesser of 4 or your number of CPU threads")
             ToolTip.visible: hovered
             Layout.row: 5
             Layout.column: 1
@ -280,10 +289,11 @@ Dialog {
             onAccepted: {
                 var val = parseInt(text)
                 if (!isNaN(val)) {
+                     settingsDialog.threadCount = val
                     LLM.threadCount = val
                     focus = false
                 } else {
-                     text = settingsDialog.nThreads.toString()
+                     text = settingsDialog.threadCount.toString()
                 }
             }
            Accessible.role: Accessible.EditableText