gpt4all/chatllm.h

#ifndef CHATLLM_H
#define CHATLLM_H

#include <QObject>
#include <QThread>

#include "llmodel/llmodel.h"

class Chat;
class ChatLLM : public QObject
{
    Q_OBJECT
    Q_PROPERTY(bool isModelLoaded READ isModelLoaded NOTIFY isModelLoadedChanged)
    Q_PROPERTY(QString response READ response NOTIFY responseChanged)
    Q_PROPERTY(QString modelName READ modelName WRITE setModelName NOTIFY modelNameChanged)
    Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)
    Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged)

public:
    enum ModelType {
        MPT_,
        GPTJ_,
        LLAMA_
    };

    ChatLLM(Chat *parent);

    bool isModelLoaded() const;
    void regenerateResponse();
    void resetResponse();
    void resetContext();

    void stopGenerating() { m_stopGenerating = true; }

    QString response() const;
    QString modelName() const;

    void setModelName(const QString &modelName);

    bool isRecalc() const { return m_isRecalc; }

    QString generatedName() const { return QString::fromStdString(m_nameResponse); }

    bool serialize(QDataStream &stream, int version);
    bool deserialize(QDataStream &stream, int version);

public Q_SLOTS:
    bool prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict,
        int32_t top_k, float top_p, float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens,
        int32_t n_threads);
    bool loadDefaultModel();
    bool loadModel(const QString &modelName);
    void modelNameChangeRequested(const QString &modelName);
    void unloadModel();
    void reloadModel(const QString &modelName);
    void generateName();
    void handleChatIdChanged();

Q_SIGNALS:
    void isModelLoadedChanged();
    void responseChanged();
    void responseStarted();
    void responseStopped();
    void modelNameChanged();
    void recalcChanged();
    void sendStartup();
    void sendModelLoaded();
    void sendResetContext();
    void generatedNameChanged();
    void stateChanged();

private:
    void resetContextPrivate();
    bool handlePrompt(int32_t token);
    bool handleResponse(int32_t token, const std::string &response);
    bool handleRecalculate(bool isRecalc);
    bool handleNamePrompt(int32_t token);
    bool handleNameResponse(int32_t token, const std::string &response);
    bool handleNameRecalculate(bool isRecalc);
    void saveState();
    void restoreState();

private:
    LLModel::PromptContext m_ctx;
    LLModel *m_llmodel;
    std::string m_response;
    std::string m_nameResponse;
    quint32 m_promptResponseTokens;
    quint32 m_responseLogits;
    QString m_modelName;
    ModelType m_modelType;
    Chat *m_chat;
    QByteArray m_state;
    QThread m_llmThread;
    std::atomic<bool> m_stopGenerating;
    bool m_isRecalc;
};

#endif // CHATLLM_H
Major refactor in prep for multiple conversations. 2023-05-01 09:10:05 -04:00			`#ifndef CHATLLM_H`
			`#define CHATLLM_H`

			`#include <QObject>`
			`#include <QThread>`

			`#include "llmodel/llmodel.h"`

First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 15:31:41 -04:00			`class Chat;`
Major refactor in prep for multiple conversations. 2023-05-01 09:10:05 -04:00			`class ChatLLM : public QObject`
			`{`
			`Q_OBJECT`
			`Q_PROPERTY(bool isModelLoaded READ isModelLoaded NOTIFY isModelLoadedChanged)`
			`Q_PROPERTY(QString response READ response NOTIFY responseChanged)`
			`Q_PROPERTY(QString modelName READ modelName WRITE setModelName NOTIFY modelNameChanged)`
			`Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)`
Generate names via llm. 2023-05-02 11:19:17 -04:00			`Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged)`
Major refactor in prep for multiple conversations. 2023-05-01 09:10:05 -04:00
			`public:`
Fix gptj to have lower memory requirements for kv cache and add versioning to the internal state to smoothly handle such a fix in the future. 2023-05-08 17:23:02 -04:00			`enum ModelType {`
			`MPT_,`
			`GPTJ_,`
			`LLAMA_`
			`};`

First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 15:31:41 -04:00			`ChatLLM(Chat *parent);`
Major refactor in prep for multiple conversations. 2023-05-01 09:10:05 -04:00
			`bool isModelLoaded() const;`
			`void regenerateResponse();`
			`void resetResponse();`
			`void resetContext();`

			`void stopGenerating() { m_stopGenerating = true; }`

			`QString response() const;`
			`QString modelName() const;`

			`void setModelName(const QString &modelName);`

			`bool isRecalc() const { return m_isRecalc; }`

Generate names via llm. 2023-05-02 11:19:17 -04:00			`QString generatedName() const { return QString::fromStdString(m_nameResponse); }`

Convert the old format properly. 2023-05-08 05:52:57 -04:00			`bool serialize(QDataStream &stream, int version);`
			`bool deserialize(QDataStream &stream, int version);`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 15:31:41 -04:00
Major refactor in prep for multiple conversations. 2023-05-01 09:10:05 -04:00			`public Q_SLOTS:`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 15:31:41 -04:00			`bool prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict,`
			`int32_t top_k, float top_p, float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens,`
			`int32_t n_threads);`
			`bool loadDefaultModel();`
			`bool loadModel(const QString &modelName);`
Major refactor in prep for multiple conversations. 2023-05-01 09:10:05 -04:00			`void modelNameChangeRequested(const QString &modelName);`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 15:31:41 -04:00			`void unloadModel();`
			`void reloadModel(const QString &modelName);`
Generate names via llm. 2023-05-02 11:19:17 -04:00			`void generateName();`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 15:31:41 -04:00			`void handleChatIdChanged();`
Major refactor in prep for multiple conversations. 2023-05-01 09:10:05 -04:00
			`Q_SIGNALS:`
			`void isModelLoadedChanged();`
			`void responseChanged();`
			`void responseStarted();`
			`void responseStopped();`
			`void modelNameChanged();`
			`void recalcChanged();`
			`void sendStartup();`
			`void sendModelLoaded();`
			`void sendResetContext();`
Generate names via llm. 2023-05-02 11:19:17 -04:00			`void generatedNameChanged();`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 15:31:41 -04:00			`void stateChanged();`
Major refactor in prep for multiple conversations. 2023-05-01 09:10:05 -04:00
			`private:`
			`void resetContextPrivate();`
			`bool handlePrompt(int32_t token);`
			`bool handleResponse(int32_t token, const std::string &response);`
			`bool handleRecalculate(bool isRecalc);`
Generate names via llm. 2023-05-02 11:19:17 -04:00			`bool handleNamePrompt(int32_t token);`
			`bool handleNameResponse(int32_t token, const std::string &response);`
			`bool handleNameRecalculate(bool isRecalc);`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 15:31:41 -04:00			`void saveState();`
			`void restoreState();`
Major refactor in prep for multiple conversations. 2023-05-01 09:10:05 -04:00
			`private:`
			`LLModel::PromptContext m_ctx;`
			`LLModel *m_llmodel;`
			`std::string m_response;`
Generate names via llm. 2023-05-02 11:19:17 -04:00			`std::string m_nameResponse;`
Major refactor in prep for multiple conversations. 2023-05-01 09:10:05 -04:00			`quint32 m_promptResponseTokens;`
			`quint32 m_responseLogits;`
			`QString m_modelName;`
Fix gptj to have lower memory requirements for kv cache and add versioning to the internal state to smoothly handle such a fix in the future. 2023-05-08 17:23:02 -04:00			`ModelType m_modelType;`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 15:31:41 -04:00			`Chat *m_chat;`
			`QByteArray m_state;`
Major refactor in prep for multiple conversations. 2023-05-01 09:10:05 -04:00			`QThread m_llmThread;`
			`std::atomic<bool> m_stopGenerating;`
			`bool m_isRecalc;`
			`};`

			`#endif // CHATLLM_H`