mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2024-10-01 01:06:10 -04:00
f291853e51
Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this.
36 lines
1007 B
C++
36 lines
1007 B
C++
#ifndef LLAMAMODEL_H
|
|
#define LLAMAMODEL_H
|
|
|
|
#include <string>
|
|
#include <functional>
|
|
#include <vector>
|
|
#include "llmodel.h"
|
|
|
|
class LLamaPrivate;
|
|
class LLamaModel : public LLModel {
|
|
public:
|
|
LLamaModel();
|
|
~LLamaModel();
|
|
|
|
bool loadModel(const std::string &modelPath) override;
|
|
bool isModelLoaded() const override;
|
|
size_t stateSize() const override;
|
|
size_t saveState(uint8_t *dest) const override;
|
|
size_t restoreState(const uint8_t *src) override;
|
|
void prompt(const std::string &prompt,
|
|
std::function<bool(int32_t)> promptCallback,
|
|
std::function<bool(int32_t, const std::string&)> responseCallback,
|
|
std::function<bool(bool)> recalculateCallback,
|
|
PromptContext &ctx) override;
|
|
void setThreadCount(int32_t n_threads) override;
|
|
int32_t threadCount() override;
|
|
|
|
protected:
|
|
void recalculateContext(PromptContext &promptCtx,
|
|
std::function<bool(bool)> recalculate) override;
|
|
|
|
private:
|
|
LLamaPrivate *d_ptr;
|
|
};
|
|
|
|
#endif // LLAMAMODEL_H
|