mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2024-10-01 01:06:10 -04:00
llmodel: use std::optional for fakeReply for better ergonomics
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
e55564debe
commit
f7f36e6305
@ -33,7 +33,7 @@ set(LLMODEL_VERSION_PATCH 0)
|
||||
set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
|
||||
project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD 23)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
|
||||
set(BUILD_SHARED_LIBS ON)
|
||||
|
@ -162,7 +162,7 @@ public:
|
||||
bool allowContextShift,
|
||||
PromptContext &ctx,
|
||||
bool special = false,
|
||||
std::string *fakeReply = nullptr);
|
||||
std::optional<std::string_view> fakeReply = {});
|
||||
|
||||
using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend);
|
||||
|
||||
@ -212,7 +212,7 @@ public:
|
||||
protected:
|
||||
// These are pure virtual because subclasses need to implement as the default implementation of
|
||||
// 'prompt' above calls these functions
|
||||
virtual std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special = false) = 0;
|
||||
virtual std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special = false) = 0;
|
||||
virtual bool isSpecialToken(Token id) const = 0;
|
||||
virtual std::string tokenToString(Token id) const = 0;
|
||||
virtual Token sampleToken(PromptContext &ctx) const = 0;
|
||||
|
@ -536,13 +536,13 @@ size_t LLamaModel::restoreState(const uint8_t *src)
|
||||
return llama_set_state_data(d_ptr->ctx, const_cast<uint8_t*>(src));
|
||||
}
|
||||
|
||||
std::vector<LLModel::Token> LLamaModel::tokenize(PromptContext &ctx, const std::string &str, bool special)
|
||||
std::vector<LLModel::Token> LLamaModel::tokenize(PromptContext &ctx, std::string_view str, bool special)
|
||||
{
|
||||
bool atStart = m_tokenize_last_token == -1;
|
||||
bool insertSpace = atStart || isSpecialToken(m_tokenize_last_token);
|
||||
std::vector<LLModel::Token> fres(str.length() + 4);
|
||||
int32_t fres_len = llama_tokenize_gpt4all(
|
||||
d_ptr->model, str.c_str(), str.length(), fres.data(), fres.size(), /*add_special*/ atStart,
|
||||
d_ptr->model, str.data(), str.length(), fres.data(), fres.size(), /*add_special*/ atStart,
|
||||
/*parse_special*/ special, /*insert_space*/ insertSpace
|
||||
);
|
||||
fres.resize(fres_len);
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
struct LLamaPrivate;
|
||||
@ -52,7 +53,7 @@ private:
|
||||
bool m_supportsCompletion = false;
|
||||
|
||||
protected:
|
||||
std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) override;
|
||||
std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special) override;
|
||||
bool isSpecialToken(Token id) const override;
|
||||
std::string tokenToString(Token id) const override;
|
||||
Token sampleToken(PromptContext &ctx) const override;
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
struct LLModelWrapper {
|
||||
@ -130,13 +131,10 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
|
||||
wrapper->promptContext.repeat_last_n = ctx->repeat_last_n;
|
||||
wrapper->promptContext.contextErase = ctx->context_erase;
|
||||
|
||||
std::string fake_reply_str;
|
||||
if (fake_reply) { fake_reply_str = fake_reply; }
|
||||
auto *fake_reply_p = fake_reply ? &fake_reply_str : nullptr;
|
||||
|
||||
// Call the C++ prompt method
|
||||
wrapper->llModel->prompt(prompt, prompt_template, prompt_callback, response_func, allow_context_shift,
|
||||
wrapper->promptContext, special, fake_reply_p);
|
||||
wrapper->promptContext, special,
|
||||
fake_reply ? std::make_optional<std::string_view>(fake_reply) : std::nullopt);
|
||||
|
||||
// Update the C context by giving access to the wrappers raw pointers to std::vector data
|
||||
// which involves no copies
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
namespace ranges = std::ranges;
|
||||
@ -45,7 +46,7 @@ void LLModel::prompt(const std::string &prompt,
|
||||
bool allowContextShift,
|
||||
PromptContext &promptCtx,
|
||||
bool special,
|
||||
std::string *fakeReply)
|
||||
std::optional<std::string_view> fakeReply)
|
||||
{
|
||||
if (!isModelLoaded()) {
|
||||
std::cerr << implementation().modelType() << " ERROR: prompt won't work with an unloaded model!\n";
|
||||
@ -129,7 +130,7 @@ void LLModel::prompt(const std::string &prompt,
|
||||
return; // error
|
||||
|
||||
// decode the assistant's reply, either generated or spoofed
|
||||
if (fakeReply == nullptr) {
|
||||
if (!fakeReply) {
|
||||
generateResponse(responseCallback, allowContextShift, promptCtx);
|
||||
} else {
|
||||
embd_inp = tokenize(promptCtx, *fakeReply, false);
|
||||
|
@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD 23)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
if(APPLE)
|
||||
|
@ -93,7 +93,7 @@ void ChatAPI::prompt(const std::string &prompt,
|
||||
bool allowContextShift,
|
||||
PromptContext &promptCtx,
|
||||
bool special,
|
||||
std::string *fakeReply) {
|
||||
std::optional<std::string_view> fakeReply) {
|
||||
|
||||
Q_UNUSED(promptCallback);
|
||||
Q_UNUSED(allowContextShift);
|
||||
@ -121,7 +121,7 @@ void ChatAPI::prompt(const std::string &prompt,
|
||||
if (fakeReply) {
|
||||
promptCtx.n_past += 1;
|
||||
m_context.append(formattedPrompt);
|
||||
m_context.append(QString::fromStdString(*fakeReply));
|
||||
m_context.append(QString::fromUtf8(fakeReply->data(), fakeReply->size()));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -12,9 +12,10 @@
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <functional>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
class QNetworkAccessManager;
|
||||
@ -72,7 +73,7 @@ public:
|
||||
bool allowContextShift,
|
||||
PromptContext &ctx,
|
||||
bool special,
|
||||
std::string *fakeReply) override;
|
||||
std::optional<std::string_view> fakeReply) override;
|
||||
|
||||
void setThreadCount(int32_t n_threads) override;
|
||||
int32_t threadCount() const override;
|
||||
@ -97,7 +98,7 @@ protected:
|
||||
// them as they are only called from the default implementation of 'prompt' which we override and
|
||||
// completely replace
|
||||
|
||||
std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) override
|
||||
std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special) override
|
||||
{
|
||||
(void)ctx;
|
||||
(void)str;
|
||||
|
@ -801,14 +801,9 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
|
||||
/*allowContextShift*/ true, m_ctx);
|
||||
m_ctx.n_predict = old_n_predict; // now we are ready for a response
|
||||
}
|
||||
std::string fakeReplyStr;
|
||||
std::string *fakeReplyP = nullptr;
|
||||
if (fakeReply) {
|
||||
fakeReplyStr = fakeReply->toStdString();
|
||||
fakeReplyP = &fakeReplyStr;
|
||||
}
|
||||
m_llModelInfo.model->prompt(prompt.toStdString(), promptTemplate.toStdString(), promptFunc, responseFunc,
|
||||
/*allowContextShift*/ true, m_ctx, false, fakeReplyP);
|
||||
/*allowContextShift*/ true, m_ctx, false,
|
||||
fakeReply.transform(std::mem_fn(&QString::toStdString)));
|
||||
#if defined(DEBUG)
|
||||
printf("\n");
|
||||
fflush(stdout);
|
||||
@ -1318,10 +1313,9 @@ void ChatLLM::processRestoreStateFromText()
|
||||
|
||||
auto &response = *it++;
|
||||
Q_ASSERT(response.first != "Prompt: ");
|
||||
auto responseText = response.second.toStdString();
|
||||
|
||||
m_llModelInfo.model->prompt(prompt.second.toStdString(), promptTemplate.toStdString(), promptFunc, nullptr,
|
||||
/*allowContextShift*/ true, m_ctx, false, &responseText);
|
||||
/*allowContextShift*/ true, m_ctx, false, response.second.toUtf8().constData());
|
||||
}
|
||||
|
||||
if (!m_stopGenerating) {
|
||||
|
Loading…
Reference in New Issue
Block a user