Aligned llama implementation style to other implementations

This commit is contained in:
niansa 2023-05-20 00:49:00 +02:00 committed by AT
parent 99b8e3c250
commit 071d42dd4e
2 changed files with 12 additions and 10 deletions

View File

@ -28,6 +28,7 @@
#include <llama.h>
#include <ggml.h>
namespace {
const char *modelType_ = "LLaMA";
}
@ -48,16 +49,7 @@ struct gpt_params {
bool use_mlock = false; // use mlock to keep model in memory
};
struct LLamaPrivate {
const std::string modelPath;
bool modelLoaded;
llama_context *ctx = nullptr;
llama_context_params params;
int64_t n_threads = 0;
bool empty = true;
};
static int llama_sample_top_p_top_k(
int llama_sample_top_p_top_k(
llama_context *ctx,
const llama_token *last_n_tokens_data,
int last_n_tokens_size,
@ -85,6 +77,15 @@ static int llama_sample_top_p_top_k(
return llama_sample_token(ctx, &candidates_p);
}
struct LLamaPrivate {
const std::string modelPath;
bool modelLoaded;
llama_context *ctx = nullptr;
llama_context_params params;
int64_t n_threads = 0;
bool empty = true;
};
LLamaModel::LLamaModel()
: d_ptr(new LLamaPrivate) {
modelType = modelType_;

View File

@ -28,6 +28,7 @@
#include <llama.h>
#include <ggml.h>
namespace {
const char *modelType_ = "LLaMA";
}