2023-05-01 09:10:05 -04:00
|
|
|
|
#ifndef CHATLLM_H
|
|
|
|
|
#define CHATLLM_H
|
|
|
|
|
|
2024-06-06 11:59:28 -04:00
|
|
|
|
#include "database.h" // IWYU pragma: keep
|
2024-06-04 14:47:11 -04:00
|
|
|
|
#include "modellist.h"
|
|
|
|
|
|
|
|
|
|
#include "../gpt4all-backend/llmodel.h"
|
|
|
|
|
|
|
|
|
|
#include <QByteArray>
|
|
|
|
|
#include <QElapsedTimer>
|
|
|
|
|
#include <QFileInfo>
|
2024-06-06 11:59:28 -04:00
|
|
|
|
#include <QList>
|
2023-05-01 09:10:05 -04:00
|
|
|
|
#include <QObject>
|
2024-06-04 14:47:11 -04:00
|
|
|
|
#include <QPair>
|
|
|
|
|
#include <QString>
|
2023-05-01 09:10:05 -04:00
|
|
|
|
#include <QThread>
|
2024-06-28 12:57:57 -04:00
|
|
|
|
#include <QVariantMap>
|
2024-06-04 14:47:11 -04:00
|
|
|
|
#include <QVector>
|
|
|
|
|
#include <QtGlobal>
|
2023-05-01 09:10:05 -04:00
|
|
|
|
|
2024-06-04 14:47:11 -04:00
|
|
|
|
#include <atomic>
|
|
|
|
|
#include <cstdint>
|
2024-05-15 14:07:03 -04:00
|
|
|
|
#include <memory>
|
2024-06-26 15:26:27 -04:00
|
|
|
|
#include <optional>
|
2024-06-04 14:47:11 -04:00
|
|
|
|
#include <string>
|
2024-05-15 14:07:03 -04:00
|
|
|
|
|
2024-06-24 18:49:23 -04:00
|
|
|
|
using namespace Qt::Literals::StringLiterals;
|
|
|
|
|
|
2024-06-04 14:47:11 -04:00
|
|
|
|
class QDataStream;
|
2023-05-01 09:10:05 -04:00
|
|
|
|
|
2023-05-13 19:05:35 -04:00
|
|
|
|
enum LLModelType {
|
|
|
|
|
GPTJ_,
|
2023-05-14 20:12:15 -04:00
|
|
|
|
LLAMA_,
|
2024-03-13 18:23:57 -04:00
|
|
|
|
API_,
|
2023-05-13 19:05:35 -04:00
|
|
|
|
};
|
|
|
|
|
|
2024-06-26 15:26:27 -04:00
|
|
|
|
class ChatLLM;
|
|
|
|
|
|
2023-05-13 19:05:35 -04:00
|
|
|
|
struct LLModelInfo {
|
2024-05-15 14:07:03 -04:00
|
|
|
|
std::unique_ptr<LLModel> model;
|
2023-05-13 19:05:35 -04:00
|
|
|
|
QFileInfo fileInfo;
|
2024-06-26 15:26:27 -04:00
|
|
|
|
std::optional<QString> fallbackReason;
|
|
|
|
|
|
2023-05-13 19:05:35 -04:00
|
|
|
|
// NOTE: This does not store the model type or name on purpose as this is left for ChatLLM which
|
|
|
|
|
// must be able to serialize the information even if it is in the unloaded state
|
2024-06-26 15:26:27 -04:00
|
|
|
|
|
|
|
|
|
void resetModel(ChatLLM *cllm, LLModel *model = nullptr);
|
2023-05-13 19:05:35 -04:00
|
|
|
|
};
|
|
|
|
|
|
2023-06-19 14:34:53 -04:00
|
|
|
|
class TokenTimer : public QObject {
|
|
|
|
|
Q_OBJECT
|
|
|
|
|
public:
|
|
|
|
|
explicit TokenTimer(QObject *parent)
|
|
|
|
|
: QObject(parent)
|
|
|
|
|
, m_elapsed(0) {}
|
|
|
|
|
|
|
|
|
|
static int rollingAverage(int oldAvg, int newNumber, int n)
|
|
|
|
|
{
|
|
|
|
|
// i.e. to calculate the new average after then nth number,
|
|
|
|
|
// you multiply the old average by n−1, add the new number, and divide the total by n.
|
|
|
|
|
return qRound(((float(oldAvg) * (n - 1)) + newNumber) / float(n));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void start() { m_tokens = 0; m_elapsed = 0; m_time.invalidate(); }
|
|
|
|
|
void stop() { handleTimeout(); }
|
|
|
|
|
void inc() {
|
|
|
|
|
if (!m_time.isValid())
|
|
|
|
|
m_time.start();
|
|
|
|
|
++m_tokens;
|
|
|
|
|
if (m_time.elapsed() > 999)
|
|
|
|
|
handleTimeout();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Q_SIGNALS:
|
|
|
|
|
void report(const QString &speed);
|
|
|
|
|
|
|
|
|
|
private Q_SLOTS:
|
|
|
|
|
void handleTimeout()
|
|
|
|
|
{
|
|
|
|
|
m_elapsed += m_time.restart();
|
2024-06-24 18:49:23 -04:00
|
|
|
|
emit report(u"%1 tokens/sec"_s.arg(m_tokens / float(m_elapsed / 1000.0f), 0, 'g', 2));
|
2023-06-19 14:34:53 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
QElapsedTimer m_time;
|
|
|
|
|
qint64 m_elapsed;
|
|
|
|
|
quint32 m_tokens;
|
|
|
|
|
};
|
|
|
|
|
|
2023-05-04 15:31:41 -04:00
|
|
|
|
class Chat;
|
2023-05-01 09:10:05 -04:00
|
|
|
|
class ChatLLM : public QObject
|
|
|
|
|
{
|
|
|
|
|
Q_OBJECT
|
|
|
|
|
Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)
|
2024-06-26 15:26:27 -04:00
|
|
|
|
Q_PROPERTY(QString deviceBackend READ deviceBackend NOTIFY loadedModelInfoChanged)
|
|
|
|
|
Q_PROPERTY(QString device READ device NOTIFY loadedModelInfoChanged)
|
|
|
|
|
Q_PROPERTY(QString fallbackReason READ fallbackReason NOTIFY loadedModelInfoChanged)
|
2023-05-01 09:10:05 -04:00
|
|
|
|
public:
|
2023-05-13 19:33:19 -04:00
|
|
|
|
ChatLLM(Chat *parent, bool isServer = false);
|
2023-05-12 14:06:03 -04:00
|
|
|
|
virtual ~ChatLLM();
|
2023-05-01 09:10:05 -04:00
|
|
|
|
|
2024-03-06 16:42:59 -05:00
|
|
|
|
void destroy();
|
2024-05-15 14:07:03 -04:00
|
|
|
|
static void destroyStore();
|
2023-05-01 09:10:05 -04:00
|
|
|
|
bool isModelLoaded() const;
|
|
|
|
|
void regenerateResponse();
|
|
|
|
|
void resetResponse();
|
|
|
|
|
void resetContext();
|
|
|
|
|
|
|
|
|
|
void stopGenerating() { m_stopGenerating = true; }
|
|
|
|
|
|
2023-05-13 19:05:35 -04:00
|
|
|
|
bool shouldBeLoaded() const { return m_shouldBeLoaded; }
|
|
|
|
|
void setShouldBeLoaded(bool b);
|
2024-05-15 14:07:03 -04:00
|
|
|
|
void requestTrySwitchContext();
|
2024-02-07 09:37:59 -05:00
|
|
|
|
void setForceUnloadModel(bool b) { m_forceUnloadModel = b; }
|
2024-03-06 12:59:34 -05:00
|
|
|
|
void setMarkedForDeletion(bool b) { m_markedForDeletion = b; }
|
2023-05-13 19:05:35 -04:00
|
|
|
|
|
2023-05-01 09:10:05 -04:00
|
|
|
|
QString response() const;
|
|
|
|
|
|
2023-06-22 15:44:49 -04:00
|
|
|
|
ModelInfo modelInfo() const;
|
|
|
|
|
void setModelInfo(const ModelInfo &info);
|
2023-05-01 09:10:05 -04:00
|
|
|
|
|
|
|
|
|
bool isRecalc() const { return m_isRecalc; }
|
|
|
|
|
|
2024-06-26 15:26:27 -04:00
|
|
|
|
void acquireModel();
|
|
|
|
|
void resetModel();
|
|
|
|
|
|
|
|
|
|
QString deviceBackend() const
|
|
|
|
|
{
|
|
|
|
|
if (!isModelLoaded()) return QString();
|
|
|
|
|
std::string name = LLModel::GPUDevice::backendIdToName(m_llModelInfo.model->backendName());
|
|
|
|
|
return QString::fromStdString(name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QString device() const
|
|
|
|
|
{
|
|
|
|
|
if (!isModelLoaded()) return QString();
|
|
|
|
|
const char *name = m_llModelInfo.model->gpuDeviceName();
|
|
|
|
|
return name ? QString(name) : u"CPU"_s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// not loaded -> QString(), no fallback -> QString("")
|
|
|
|
|
QString fallbackReason() const
|
|
|
|
|
{
|
|
|
|
|
if (!isModelLoaded()) return QString();
|
|
|
|
|
return m_llModelInfo.fallbackReason.value_or(u""_s);
|
|
|
|
|
}
|
|
|
|
|
|
2023-05-02 11:19:17 -04:00
|
|
|
|
QString generatedName() const { return QString::fromStdString(m_nameResponse); }
|
|
|
|
|
|
2023-10-10 16:43:02 -04:00
|
|
|
|
bool serialize(QDataStream &stream, int version, bool serializeKV);
|
|
|
|
|
bool deserialize(QDataStream &stream, int version, bool deserializeKV, bool discardKV);
|
|
|
|
|
void setStateFromText(const QVector<QPair<QString, QString>> &stateFromText) { m_stateFromText = stateFromText; }
|
2023-05-04 15:31:41 -04:00
|
|
|
|
|
2023-05-01 09:10:05 -04:00
|
|
|
|
public Q_SLOTS:
|
2023-07-01 11:34:21 -04:00
|
|
|
|
bool prompt(const QList<QString> &collectionList, const QString &prompt);
|
2023-05-04 15:31:41 -04:00
|
|
|
|
bool loadDefaultModel();
|
2024-05-15 14:07:03 -04:00
|
|
|
|
void trySwitchContextOfLoadedModel(const ModelInfo &modelInfo);
|
2023-06-22 15:44:49 -04:00
|
|
|
|
bool loadModel(const ModelInfo &modelInfo);
|
|
|
|
|
void modelChangeRequested(const ModelInfo &modelInfo);
|
2023-05-04 15:31:41 -04:00
|
|
|
|
void unloadModel();
|
2023-05-13 19:05:35 -04:00
|
|
|
|
void reloadModel();
|
2023-05-02 11:19:17 -04:00
|
|
|
|
void generateName();
|
2024-07-10 15:45:20 -04:00
|
|
|
|
void generateQuestions(qint64 elapsed);
|
2023-06-19 19:51:28 -04:00
|
|
|
|
void handleChatIdChanged(const QString &id);
|
2023-05-13 19:05:35 -04:00
|
|
|
|
void handleShouldBeLoadedChanged();
|
2023-06-19 14:34:53 -04:00
|
|
|
|
void handleThreadStarted();
|
2023-06-27 11:54:34 -04:00
|
|
|
|
void handleForceMetalChanged(bool forceMetal);
|
2023-09-13 10:32:08 -04:00
|
|
|
|
void handleDeviceChanged();
|
2023-07-01 11:34:21 -04:00
|
|
|
|
void processSystemPrompt();
|
2023-10-10 16:43:02 -04:00
|
|
|
|
void processRestoreStateFromText();
|
2023-05-01 09:10:05 -04:00
|
|
|
|
|
|
|
|
|
Q_SIGNALS:
|
2023-06-22 15:44:49 -04:00
|
|
|
|
void recalcChanged();
|
2024-06-26 15:26:27 -04:00
|
|
|
|
void loadedModelInfoChanged();
|
2024-02-07 09:37:59 -05:00
|
|
|
|
void modelLoadingPercentageChanged(float);
|
2023-05-08 20:51:03 -04:00
|
|
|
|
void modelLoadingError(const QString &error);
|
2024-03-06 17:14:54 -05:00
|
|
|
|
void modelLoadingWarning(const QString &warning);
|
2023-06-20 16:14:30 -04:00
|
|
|
|
void responseChanged(const QString &response);
|
2023-05-20 20:04:36 -04:00
|
|
|
|
void promptProcessing();
|
2024-07-10 15:45:20 -04:00
|
|
|
|
void generatingQuestions();
|
2024-04-25 13:16:52 -04:00
|
|
|
|
void responseStopped(qint64 promptResponseMs);
|
2023-06-20 16:14:30 -04:00
|
|
|
|
void generatedNameChanged(const QString &name);
|
2024-07-10 15:45:20 -04:00
|
|
|
|
void generatedQuestionFinished(const QString &generatedQuestion);
|
2023-05-04 15:31:41 -04:00
|
|
|
|
void stateChanged();
|
2023-05-11 16:46:25 -04:00
|
|
|
|
void threadStarted();
|
2023-05-13 19:05:35 -04:00
|
|
|
|
void shouldBeLoadedChanged();
|
2024-05-15 14:07:03 -04:00
|
|
|
|
void trySwitchContextRequested(const ModelInfo &modelInfo);
|
|
|
|
|
void trySwitchContextOfLoadedModelCompleted(int value);
|
2023-06-01 14:13:12 -04:00
|
|
|
|
void requestRetrieveFromDB(const QList<QString> &collections, const QString &text, int retrievalSize, QList<ResultInfo> *results);
|
2023-06-19 14:34:53 -04:00
|
|
|
|
void reportSpeed(const QString &speed);
|
2023-09-14 08:25:37 -04:00
|
|
|
|
void reportDevice(const QString &device);
|
2023-09-29 14:25:37 -04:00
|
|
|
|
void reportFallbackReason(const QString &fallbackReason);
|
2023-06-19 18:23:54 -04:00
|
|
|
|
void databaseResultsChanged(const QList<ResultInfo>&);
|
2023-06-22 15:44:49 -04:00
|
|
|
|
void modelInfoChanged(const ModelInfo &modelInfo);
|
2023-05-11 16:46:25 -04:00
|
|
|
|
|
|
|
|
|
protected:
|
2023-07-01 11:34:21 -04:00
|
|
|
|
bool promptInternal(const QList<QString> &collectionList, const QString &prompt, const QString &promptTemplate,
|
2024-02-24 17:51:34 -05:00
|
|
|
|
int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty,
|
2023-07-01 11:34:21 -04:00
|
|
|
|
int32_t repeat_penalty_tokens);
|
2023-05-01 09:10:05 -04:00
|
|
|
|
bool handlePrompt(int32_t token);
|
|
|
|
|
bool handleResponse(int32_t token, const std::string &response);
|
|
|
|
|
bool handleRecalculate(bool isRecalc);
|
2023-05-02 11:19:17 -04:00
|
|
|
|
bool handleNamePrompt(int32_t token);
|
|
|
|
|
bool handleNameResponse(int32_t token, const std::string &response);
|
|
|
|
|
bool handleNameRecalculate(bool isRecalc);
|
2023-07-01 11:34:21 -04:00
|
|
|
|
bool handleSystemPrompt(int32_t token);
|
|
|
|
|
bool handleSystemResponse(int32_t token, const std::string &response);
|
|
|
|
|
bool handleSystemRecalculate(bool isRecalc);
|
2023-10-10 16:43:02 -04:00
|
|
|
|
bool handleRestoreStateFromTextPrompt(int32_t token);
|
|
|
|
|
bool handleRestoreStateFromTextResponse(int32_t token, const std::string &response);
|
|
|
|
|
bool handleRestoreStateFromTextRecalculate(bool isRecalc);
|
2024-07-10 15:45:20 -04:00
|
|
|
|
bool handleQuestionPrompt(int32_t token);
|
|
|
|
|
bool handleQuestionResponse(int32_t token, const std::string &response);
|
|
|
|
|
bool handleQuestionRecalculate(bool isRecalc);
|
2023-05-04 15:31:41 -04:00
|
|
|
|
void saveState();
|
|
|
|
|
void restoreState();
|
2023-05-01 09:10:05 -04:00
|
|
|
|
|
2023-05-13 19:33:19 -04:00
|
|
|
|
protected:
|
|
|
|
|
LLModel::PromptContext m_ctx;
|
|
|
|
|
quint32 m_promptTokens;
|
|
|
|
|
quint32 m_promptResponseTokens;
|
2023-06-19 19:51:28 -04:00
|
|
|
|
|
|
|
|
|
private:
|
2024-06-28 12:57:57 -04:00
|
|
|
|
bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps);
|
|
|
|
|
|
2023-05-01 09:10:05 -04:00
|
|
|
|
std::string m_response;
|
2023-05-02 11:19:17 -04:00
|
|
|
|
std::string m_nameResponse;
|
2024-07-10 15:45:20 -04:00
|
|
|
|
QString m_questionResponse;
|
2023-06-22 15:44:49 -04:00
|
|
|
|
LLModelInfo m_llModelInfo;
|
|
|
|
|
LLModelType m_llModelType;
|
|
|
|
|
ModelInfo m_modelInfo;
|
2023-06-19 14:34:53 -04:00
|
|
|
|
TokenTimer *m_timer;
|
2023-05-04 15:31:41 -04:00
|
|
|
|
QByteArray m_state;
|
2023-05-01 09:10:05 -04:00
|
|
|
|
QThread m_llmThread;
|
|
|
|
|
std::atomic<bool> m_stopGenerating;
|
2023-05-13 19:05:35 -04:00
|
|
|
|
std::atomic<bool> m_shouldBeLoaded;
|
2023-06-19 18:24:11 -04:00
|
|
|
|
std::atomic<bool> m_isRecalc;
|
2024-02-07 09:37:59 -05:00
|
|
|
|
std::atomic<bool> m_forceUnloadModel;
|
2024-03-06 12:59:34 -05:00
|
|
|
|
std::atomic<bool> m_markedForDeletion;
|
2023-05-13 19:33:19 -04:00
|
|
|
|
bool m_isServer;
|
2023-06-27 11:54:34 -04:00
|
|
|
|
bool m_forceMetal;
|
|
|
|
|
bool m_reloadingToChangeVariant;
|
2023-07-01 11:34:21 -04:00
|
|
|
|
bool m_processedSystemPrompt;
|
2023-10-10 16:43:02 -04:00
|
|
|
|
bool m_restoreStateFromText;
|
2024-05-15 14:07:03 -04:00
|
|
|
|
// m_pristineLoadedState is set if saveSate is unnecessary, either because:
|
|
|
|
|
// - an unload was queued during LLModel::restoreState()
|
|
|
|
|
// - the chat will be restored from text and hasn't been interacted with yet
|
|
|
|
|
bool m_pristineLoadedState = false;
|
2023-10-10 16:43:02 -04:00
|
|
|
|
QVector<QPair<QString, QString>> m_stateFromText;
|
2023-05-01 09:10:05 -04:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#endif // CHATLLM_H
|