mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2024-10-01 01:06:10 -04:00
remove old llama.cpp submodules
This commit is contained in:
parent
cc6db61c93
commit
d87573ea75
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -1,9 +1,3 @@
|
|||||||
[submodule "llama.cpp-230519"]
|
|
||||||
path = gpt4all-backend/llama.cpp-230519
|
|
||||||
url = https://github.com/ggerganov/llama.cpp.git
|
|
||||||
[submodule "llama.cpp-230511"]
|
|
||||||
path = gpt4all-backend/llama.cpp-230511
|
|
||||||
url = https://github.com/nomic-ai/llama.cpp
|
|
||||||
[submodule "llama.cpp-mainline"]
|
[submodule "llama.cpp-mainline"]
|
||||||
path = gpt4all-backend/llama.cpp-mainline
|
path = gpt4all-backend/llama.cpp-mainline
|
||||||
url = https://github.com/nomic-ai/llama.cpp.git
|
url = https://github.com/nomic-ai/llama.cpp.git
|
||||||
|
@ -1 +0,0 @@
|
|||||||
Subproject commit f826aac617e1c5847ecb5115f75433aff82f759a
|
|
@ -1 +0,0 @@
|
|||||||
Subproject commit 5ea43392731040b454c293123839b90e159cbb99
|
|
@ -39,15 +39,10 @@ const char *modelType_ = "LLaMA";
|
|||||||
struct gpt_params {
|
struct gpt_params {
|
||||||
int32_t seed = -1; // RNG seed
|
int32_t seed = -1; // RNG seed
|
||||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||||
#if LLAMA_DATE <= 230511
|
|
||||||
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if LLAMA_DATE >= 230519
|
|
||||||
// sampling parameters
|
// sampling parameters
|
||||||
float tfs_z = 1.0f; // 1.0 = disabled
|
float tfs_z = 1.0f; // 1.0 = disabled
|
||||||
float typical_p = 1.0f; // 1.0 = disabled
|
float typical_p = 1.0f; // 1.0 = disabled
|
||||||
#endif
|
|
||||||
|
|
||||||
std::string prompt = "";
|
std::string prompt = "";
|
||||||
|
|
||||||
@ -57,7 +52,6 @@ struct gpt_params {
|
|||||||
bool use_mlock = false; // use mlock to keep model in memory
|
bool use_mlock = false; // use mlock to keep model in memory
|
||||||
};
|
};
|
||||||
|
|
||||||
#if LLAMA_DATE >= 230519
|
|
||||||
static int llama_sample_top_p_top_k(
|
static int llama_sample_top_p_top_k(
|
||||||
llama_context *ctx,
|
llama_context *ctx,
|
||||||
const llama_token *last_n_tokens_data,
|
const llama_token *last_n_tokens_data,
|
||||||
@ -85,7 +79,6 @@ static int llama_sample_top_p_top_k(
|
|||||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||||
return llama_sample_token(ctx, &candidates_p);
|
return llama_sample_token(ctx, &candidates_p);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
struct LLamaPrivate {
|
struct LLamaPrivate {
|
||||||
const std::string modelPath;
|
const std::string modelPath;
|
||||||
@ -150,9 +143,6 @@ bool LLamaModel::loadModel(const std::string &modelPath)
|
|||||||
#else
|
#else
|
||||||
d_ptr->params.use_mlock = params.use_mlock;
|
d_ptr->params.use_mlock = params.use_mlock;
|
||||||
#endif
|
#endif
|
||||||
#if LLAMA_DATE <= 230511
|
|
||||||
d_ptr->params.n_parts = params.n_parts;
|
|
||||||
#endif
|
|
||||||
#ifdef GGML_USE_METAL
|
#ifdef GGML_USE_METAL
|
||||||
std::cerr << "llama.cpp: using Metal" << std::endl;
|
std::cerr << "llama.cpp: using Metal" << std::endl;
|
||||||
// metal always runs the whole model if n_gpu_layers is not 0, at least
|
// metal always runs the whole model if n_gpu_layers is not 0, at least
|
||||||
|
@ -80,7 +80,6 @@ struct llm_kv_cache {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#if LLAMA_DATE >= 230519
|
|
||||||
inline void ggml_graph_compute_g4a(llm_buffer& buf, ggml_cgraph * graph, int n_threads) {
|
inline void ggml_graph_compute_g4a(llm_buffer& buf, ggml_cgraph * graph, int n_threads) {
|
||||||
struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
|
struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
@ -89,4 +88,3 @@ inline void ggml_graph_compute_g4a(llm_buffer& buf, ggml_cgraph * graph, int n_t
|
|||||||
}
|
}
|
||||||
ggml_graph_compute(graph, &plan);
|
ggml_graph_compute(graph, &plan);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
Loading…
Reference in New Issue
Block a user