gpt4all/llm.h

#ifndef LLM_H
#define LLM_H

#include <QObject>
#include <QThread>
#include "llmodel/gptj.h"
#include "llmodel/llamamodel.h"

class LLMObject : public QObject
{
    Q_OBJECT
    Q_PROPERTY(QList<QString> modelList READ modelList NOTIFY modelListChanged)
    Q_PROPERTY(bool isModelLoaded READ isModelLoaded NOTIFY isModelLoadedChanged)
    Q_PROPERTY(QString response READ response NOTIFY responseChanged)
    Q_PROPERTY(QString modelName READ modelName WRITE setModelName NOTIFY modelNameChanged)
    Q_PROPERTY(int32_t threadCount READ threadCount WRITE setThreadCount NOTIFY threadCountChanged)
    Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)

public:

    LLMObject();

    bool isModelLoaded() const;
    void regenerateResponse();
    void resetResponse();
    void resetContext();
    void stopGenerating() { m_stopGenerating = true; }
    void setThreadCount(int32_t n_threads);
    int32_t threadCount();

    QString response() const;
    QString modelName() const;

    QList<QString> modelList() const;
    void setModelName(const QString &modelName);

    bool isRecalc() const { return m_isRecalc; }

public Q_SLOTS:
    bool prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p,
                float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens);
    bool loadModel();
    void modelNameChangeRequested(const QString &modelName);

Q_SIGNALS:
    void isModelLoadedChanged();
    void responseChanged();
    void responseStarted();
    void responseStopped();
    void modelNameChanged();
    void modelListChanged();
    void threadCountChanged();
    void recalcChanged();
    void sendStartup();
    void sendModelLoaded();
    void sendResetContext();

private:
    void resetContextPrivate();
    bool loadModelPrivate(const QString &modelName);
    bool handlePrompt(int32_t token);
    bool handleResponse(int32_t token, const std::string &response);
    bool handleRecalculate(bool isRecalc);

private:
    LLModel *m_llmodel;
    std::string m_response;
    quint32 m_promptResponseTokens;
    quint32 m_responseLogits;
    QString m_modelName;
    QThread m_llmThread;
    std::atomic<bool> m_stopGenerating;
    bool m_isRecalc;
};

class LLM : public QObject
{
    Q_OBJECT
    Q_PROPERTY(QList<QString> modelList READ modelList NOTIFY modelListChanged)
    Q_PROPERTY(bool isModelLoaded READ isModelLoaded NOTIFY isModelLoadedChanged)
    Q_PROPERTY(QString response READ response NOTIFY responseChanged)
    Q_PROPERTY(QString modelName READ modelName WRITE setModelName NOTIFY modelNameChanged)
    Q_PROPERTY(bool responseInProgress READ responseInProgress NOTIFY responseInProgressChanged)
    Q_PROPERTY(int32_t threadCount READ threadCount WRITE setThreadCount NOTIFY threadCountChanged)
    Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)

public:

    static LLM *globalInstance();

    Q_INVOKABLE bool isModelLoaded() const;
    Q_INVOKABLE void prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p,
                            float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens);
    Q_INVOKABLE void regenerateResponse();
    Q_INVOKABLE void resetResponse();
    Q_INVOKABLE void resetContext();
    Q_INVOKABLE void stopGenerating();
    Q_INVOKABLE void syncThreadCount();
    Q_INVOKABLE void setThreadCount(int32_t n_threads);
    Q_INVOKABLE int32_t threadCount();

    QString response() const;
    bool responseInProgress() const { return m_responseInProgress; }

    QList<QString> modelList() const;

    QString modelName() const;
    void setModelName(const QString &modelName);

    Q_INVOKABLE bool checkForUpdates() const;

    bool isRecalc() const;

Q_SIGNALS:
    void isModelLoadedChanged();
    void responseChanged();
    void responseInProgressChanged();
    void promptRequested(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p,
                         float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens);
    void regenerateResponseRequested();
    void resetResponseRequested();
    void resetContextRequested();
    void modelNameChangeRequested(const QString &modelName);
    void modelNameChanged();
    void modelListChanged();
    void threadCountChanged();
    void setThreadCountRequested(int32_t threadCount);
    void recalcChanged();

private Q_SLOTS:
    void responseStarted();
    void responseStopped();

private:
    LLMObject *m_llmodel;
    int32_t m_desiredThreadCount;
    bool m_responseInProgress;

private:
    explicit LLM();
    ~LLM() {}
    friend class MyLLM;
};

#endif // LLM_H
Initial commit. 2023-04-08 23:28:39 -04:00			`#ifndef LLM_H`
			`#define LLM_H`

			`#include <QObject>`
			`#include <QThread>`
Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. 2023-04-25 19:16:45 -04:00			`#include "llmodel/gptj.h"`
			`#include "llmodel/llamamodel.h"`
Initial commit. 2023-04-08 23:28:39 -04:00
Add an abstraction around gpt-j that will allow other arch models to be loaded in ui. 2023-04-13 22:15:40 -04:00			`class LLMObject : public QObject`
Initial commit. 2023-04-08 23:28:39 -04:00			`{`
			`Q_OBJECT`
Allow unloading/loading/changing of models. 2023-04-18 11:42:16 -04:00			`Q_PROPERTY(QList<QString> modelList READ modelList NOTIFY modelListChanged)`
Initial commit. 2023-04-08 23:28:39 -04:00			`Q_PROPERTY(bool isModelLoaded READ isModelLoaded NOTIFY isModelLoadedChanged)`
			`Q_PROPERTY(QString response READ response NOTIFY responseChanged)`
Allow unloading/loading/changing of models. 2023-04-18 11:42:16 -04:00			`Q_PROPERTY(QString modelName READ modelName WRITE setModelName NOTIFY modelNameChanged)`
Add thread count setting 2023-04-18 09:46:03 -04:00			`Q_PROPERTY(int32_t threadCount READ threadCount WRITE setThreadCount NOTIFY threadCountChanged)`
Infinite context window through trimming. 2023-04-25 11:20:51 -04:00			`Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)`
Initial commit. 2023-04-08 23:28:39 -04:00
			`public:`

Add an abstraction around gpt-j that will allow other arch models to be loaded in ui. 2023-04-13 22:15:40 -04:00			`LLMObject();`
Initial commit. 2023-04-08 23:28:39 -04:00
			`bool isModelLoaded() const;`
Fix the context. 2023-04-17 14:11:41 -04:00			`void regenerateResponse();`
Initial commit. 2023-04-08 23:28:39 -04:00			`void resetResponse();`
Add a reset context feature to clear the chat history and the context for now. 2023-04-10 17:13:22 -04:00			`void resetContext();`
Initial commit. 2023-04-08 23:28:39 -04:00			`void stopGenerating() { m_stopGenerating = true; }`
Add thread count setting 2023-04-18 09:46:03 -04:00			`void setThreadCount(int32_t n_threads);`
			`int32_t threadCount();`
Initial commit. 2023-04-08 23:28:39 -04:00
			`QString response() const;`
Programmatically get the model name from the LLM. The LLM now searches for applicable models in the directory of the executable given a pattern match and then loads the first one it finds. Also, add a busy indicator for model loading. 2023-04-11 08:29:55 -04:00			`QString modelName() const;`
Initial commit. 2023-04-08 23:28:39 -04:00
Allow unloading/loading/changing of models. 2023-04-18 11:42:16 -04:00			`QList<QString> modelList() const;`
			`void setModelName(const QString &modelName);`

Infinite context window through trimming. 2023-04-25 11:20:51 -04:00			`bool isRecalc() const { return m_isRecalc; }`

Initial commit. 2023-04-08 23:28:39 -04:00			`public Q_SLOTS:`
use the settings dialog settings when generating 2023-04-16 01:51:28 -04:00			`bool prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p,`
new settings (model path, repeat penalty) w/ tabs 2023-04-25 10:57:40 -04:00			`float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens);`
Allow unloading/loading/changing of models. 2023-04-18 11:42:16 -04:00			`bool loadModel();`
			`void modelNameChangeRequested(const QString &modelName);`
Initial commit. 2023-04-08 23:28:39 -04:00
			`Q_SIGNALS:`
			`void isModelLoadedChanged();`
			`void responseChanged();`
			`void responseStarted();`
			`void responseStopped();`
Programmatically get the model name from the LLM. The LLM now searches for applicable models in the directory of the executable given a pattern match and then loads the first one it finds. Also, add a busy indicator for model loading. 2023-04-11 08:29:55 -04:00			`void modelNameChanged();`
Allow unloading/loading/changing of models. 2023-04-18 11:42:16 -04:00			`void modelListChanged();`
Add thread count setting 2023-04-18 09:46:03 -04:00			`void threadCountChanged();`
Infinite context window through trimming. 2023-04-25 11:20:51 -04:00			`void recalcChanged();`
Initial support for opt-in telemetry. 2023-04-26 22:05:56 -04:00			`void sendStartup();`
			`void sendModelLoaded();`
			`void sendResetContext();`
Initial commit. 2023-04-08 23:28:39 -04:00
			`private:`
Initial support for opt-in telemetry. 2023-04-26 22:05:56 -04:00			`void resetContextPrivate();`
Allow unloading/loading/changing of models. 2023-04-18 11:42:16 -04:00			`bool loadModelPrivate(const QString &modelName);`
Move the promptCallback to own function. 2023-04-27 11:08:15 -04:00			`bool handlePrompt(int32_t token);`
Implement repeat penalty for both llama and gptj in gui. 2023-04-25 08:38:29 -04:00			`bool handleResponse(int32_t token, const std::string &response);`
Infinite context window through trimming. 2023-04-25 11:20:51 -04:00			`bool handleRecalculate(bool isRecalc);`
Initial commit. 2023-04-08 23:28:39 -04:00
			`private:`
Add an abstraction around gpt-j that will allow other arch models to be loaded in ui. 2023-04-13 22:15:40 -04:00			`LLModel *m_llmodel;`
Initial commit. 2023-04-08 23:28:39 -04:00			`std::string m_response;`
Move the promptCallback to own function. 2023-04-27 11:08:15 -04:00			`quint32 m_promptResponseTokens;`
Erase the correct amount of logits when regenerating which is not the same as the number of tokens. 2023-04-15 09:19:06 -04:00			`quint32 m_responseLogits;`
Programmatically get the model name from the LLM. The LLM now searches for applicable models in the directory of the executable given a pattern match and then loads the first one it finds. Also, add a busy indicator for model loading. 2023-04-11 08:29:55 -04:00			`QString m_modelName;`
Initial commit. 2023-04-08 23:28:39 -04:00			`QThread m_llmThread;`
			`std::atomic<bool> m_stopGenerating;`
Infinite context window through trimming. 2023-04-25 11:20:51 -04:00			`bool m_isRecalc;`
Initial commit. 2023-04-08 23:28:39 -04:00			`};`

			`class LLM : public QObject`
			`{`
			`Q_OBJECT`
Allow unloading/loading/changing of models. 2023-04-18 11:42:16 -04:00			`Q_PROPERTY(QList<QString> modelList READ modelList NOTIFY modelListChanged)`
Initial commit. 2023-04-08 23:28:39 -04:00			`Q_PROPERTY(bool isModelLoaded READ isModelLoaded NOTIFY isModelLoadedChanged)`
			`Q_PROPERTY(QString response READ response NOTIFY responseChanged)`
Allow unloading/loading/changing of models. 2023-04-18 11:42:16 -04:00			`Q_PROPERTY(QString modelName READ modelName WRITE setModelName NOTIFY modelNameChanged)`
Initial commit. 2023-04-08 23:28:39 -04:00			`Q_PROPERTY(bool responseInProgress READ responseInProgress NOTIFY responseInProgressChanged)`
Add thread count setting 2023-04-18 09:46:03 -04:00			`Q_PROPERTY(int32_t threadCount READ threadCount WRITE setThreadCount NOTIFY threadCountChanged)`
Infinite context window through trimming. 2023-04-25 11:20:51 -04:00			`Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)`

Initial commit. 2023-04-08 23:28:39 -04:00			`public:`

			`static LLM *globalInstance();`

			`Q_INVOKABLE bool isModelLoaded() const;`
use the settings dialog settings when generating 2023-04-16 01:51:28 -04:00			`Q_INVOKABLE void prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p,`
new settings (model path, repeat penalty) w/ tabs 2023-04-25 10:57:40 -04:00			`float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens);`
Fix the context. 2023-04-17 14:11:41 -04:00			`Q_INVOKABLE void regenerateResponse();`
Initial commit. 2023-04-08 23:28:39 -04:00			`Q_INVOKABLE void resetResponse();`
Fix the context. 2023-04-17 14:11:41 -04:00			`Q_INVOKABLE void resetContext();`
Initial commit. 2023-04-08 23:28:39 -04:00			`Q_INVOKABLE void stopGenerating();`
persistent threadcount setting threadcount is now on the Settings object and gets reapplied after a model switch 2023-04-24 15:24:55 -04:00			`Q_INVOKABLE void syncThreadCount();`
Add thread count setting 2023-04-18 09:46:03 -04:00			`Q_INVOKABLE void setThreadCount(int32_t n_threads);`
			`Q_INVOKABLE int32_t threadCount();`
Initial commit. 2023-04-08 23:28:39 -04:00
			`QString response() const;`
			`bool responseInProgress() const { return m_responseInProgress; }`

Allow unloading/loading/changing of models. 2023-04-18 11:42:16 -04:00			`QList<QString> modelList() const;`

Programmatically get the model name from the LLM. The LLM now searches for applicable models in the directory of the executable given a pattern match and then loads the first one it finds. Also, add a busy indicator for model loading. 2023-04-11 08:29:55 -04:00			`QString modelName() const;`
Allow unloading/loading/changing of models. 2023-04-18 11:42:16 -04:00			`void setModelName(const QString &modelName);`
Programmatically get the model name from the LLM. The LLM now searches for applicable models in the directory of the executable given a pattern match and then loads the first one it finds. Also, add a busy indicator for model loading. 2023-04-11 08:29:55 -04:00
Big updates to the UI. 2023-04-10 23:34:34 -04:00			`Q_INVOKABLE bool checkForUpdates() const;`

Infinite context window through trimming. 2023-04-25 11:20:51 -04:00			`bool isRecalc() const;`

Initial commit. 2023-04-08 23:28:39 -04:00			`Q_SIGNALS:`
			`void isModelLoadedChanged();`
			`void responseChanged();`
			`void responseInProgressChanged();`
use the settings dialog settings when generating 2023-04-16 01:51:28 -04:00			`void promptRequested(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p,`
new settings (model path, repeat penalty) w/ tabs 2023-04-25 10:57:40 -04:00			`float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens);`
Fix the context. 2023-04-17 14:11:41 -04:00			`void regenerateResponseRequested();`
Initial commit. 2023-04-08 23:28:39 -04:00			`void resetResponseRequested();`
Add a reset context feature to clear the chat history and the context for now. 2023-04-10 17:13:22 -04:00			`void resetContextRequested();`
Allow unloading/loading/changing of models. 2023-04-18 11:42:16 -04:00			`void modelNameChangeRequested(const QString &modelName);`
Programmatically get the model name from the LLM. The LLM now searches for applicable models in the directory of the executable given a pattern match and then loads the first one it finds. Also, add a busy indicator for model loading. 2023-04-11 08:29:55 -04:00			`void modelNameChanged();`
Allow unloading/loading/changing of models. 2023-04-18 11:42:16 -04:00			`void modelListChanged();`
Add thread count setting 2023-04-18 09:46:03 -04:00			`void threadCountChanged();`
			`void setThreadCountRequested(int32_t threadCount);`
Infinite context window through trimming. 2023-04-25 11:20:51 -04:00			`void recalcChanged();`
Initial commit. 2023-04-08 23:28:39 -04:00
			`private Q_SLOTS:`
			`void responseStarted();`
			`void responseStopped();`

			`private:`
Add an abstraction around gpt-j that will allow other arch models to be loaded in ui. 2023-04-13 22:15:40 -04:00			`LLMObject *m_llmodel;`
persistent threadcount setting threadcount is now on the Settings object and gets reapplied after a model switch 2023-04-24 15:24:55 -04:00			`int32_t m_desiredThreadCount;`
Initial commit. 2023-04-08 23:28:39 -04:00			`bool m_responseInProgress;`

			`private:`
			`explicit LLM();`
			`~LLM() {}`
			`friend class MyLLM;`
			`};`

			`#endif // LLM_H`