gpt4all/gpt4all-chat/chatllm.h

126 lines
3.7 KiB
C
Raw Permalink Normal View History

#ifndef CHATLLM_H
#define CHATLLM_H
#include <QObject>
#include <QThread>
#include <QFileInfo>
2023-06-01 14:13:12 -04:00
#include "localdocs.h"
#include "../gpt4all-backend/llmodel.h"
enum LLModelType {
MPT_,
GPTJ_,
LLAMA_,
CHATGPT_,
REPLIT_
};
struct LLModelInfo {
LLModel *model = nullptr;
QFileInfo fileInfo;
// NOTE: This does not store the model type or name on purpose as this is left for ChatLLM which
// must be able to serialize the information even if it is in the unloaded state
};
class Chat;
class ChatLLM : public QObject
{
Q_OBJECT
Q_PROPERTY(bool isModelLoaded READ isModelLoaded NOTIFY isModelLoadedChanged)
Q_PROPERTY(QString response READ response NOTIFY responseChanged)
Q_PROPERTY(QString modelName READ modelName WRITE setModelName NOTIFY modelNameChanged)
Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)
2023-05-02 11:19:17 -04:00
Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged)
public:
ChatLLM(Chat *parent, bool isServer = false);
2023-05-12 14:06:03 -04:00
virtual ~ChatLLM();
bool isModelLoaded() const;
void regenerateResponse();
void resetResponse();
void resetContext();
2023-06-01 16:12:21 -04:00
QList<ResultInfo> databaseResults() const { return m_databaseResults; }
void stopGenerating() { m_stopGenerating = true; }
bool shouldBeLoaded() const { return m_shouldBeLoaded; }
void setShouldBeLoaded(bool b);
QString response() const;
QString modelName() const;
void setModelName(const QString &modelName);
bool isRecalc() const { return m_isRecalc; }
2023-05-02 11:19:17 -04:00
QString generatedName() const { return QString::fromStdString(m_nameResponse); }
2023-05-08 05:52:57 -04:00
bool serialize(QDataStream &stream, int version);
bool deserialize(QDataStream &stream, int version);
public Q_SLOTS:
bool prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict,
int32_t top_k, float top_p, float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens,
int32_t n_threads);
bool loadDefaultModel();
bool loadModel(const QString &modelName);
void modelNameChangeRequested(const QString &modelName);
void forceUnloadModel();
void unloadModel();
void reloadModel();
2023-05-02 11:19:17 -04:00
void generateName();
void handleChatIdChanged();
void handleShouldBeLoadedChanged();
Q_SIGNALS:
void isModelLoadedChanged();
void modelLoadingError(const QString &error);
void responseChanged();
void promptProcessing();
void responseStopped();
void modelNameChanged();
void recalcChanged();
void sendStartup();
void sendModelLoaded();
2023-05-02 11:19:17 -04:00
void generatedNameChanged();
void stateChanged();
2023-05-11 16:46:25 -04:00
void threadStarted();
void shouldBeLoadedChanged();
2023-06-01 14:13:12 -04:00
void requestRetrieveFromDB(const QList<QString> &collections, const QString &text, int retrievalSize, QList<ResultInfo> *results);
2023-05-11 16:46:25 -04:00
protected:
bool handlePrompt(int32_t token);
bool handleResponse(int32_t token, const std::string &response);
bool handleRecalculate(bool isRecalc);
2023-05-02 11:19:17 -04:00
bool handleNamePrompt(int32_t token);
bool handleNameResponse(int32_t token, const std::string &response);
bool handleNameRecalculate(bool isRecalc);
void saveState();
void restoreState();
protected:
LLModel::PromptContext m_ctx;
quint32 m_promptTokens;
quint32 m_promptResponseTokens;
LLModelInfo m_modelInfo;
LLModelType m_modelType;
std::string m_response;
2023-05-02 11:19:17 -04:00
std::string m_nameResponse;
quint32 m_responseLogits;
QString m_modelName;
Chat *m_chat;
QByteArray m_state;
QThread m_llmThread;
std::atomic<bool> m_stopGenerating;
std::atomic<bool> m_shouldBeLoaded;
2023-06-01 16:12:21 -04:00
QList<ResultInfo> m_databaseResults;
bool m_isRecalc;
bool m_isServer;
bool m_isChatGPT;
};
#endif // CHATLLM_H