mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2024-10-01 01:06:10 -04:00
Use F16 for kv cache on mpt.
This commit is contained in:
parent
dc559c1575
commit
eb77d5157b
@ -347,7 +347,7 @@ bool mpt_model_load(const std::string &fname, std::istream &fin, mpt_model & mod
|
||||
const int n_mem = n_layer*n_ctx;
|
||||
const int n_elements = n_embd*n_mem;
|
||||
|
||||
if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F32, model.hparams.n_ctx)) {
|
||||
if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F16, model.hparams.n_ctx)) {
|
||||
fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
|
||||
ggml_free(ctx);
|
||||
return false;
|
||||
|
Loading…
Reference in New Issue
Block a user