mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2024-10-01 01:06:10 -04:00
Fix VRAM leak when model loading fails (#1901)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
e1eac00ee0
commit
10e3f7bbf5
@ -1 +1 @@
|
|||||||
Subproject commit 47aec1bcc09e090f0b8f196dc0a4e43b89507e4a
|
Subproject commit cd1b5a104b9d3e211a50b9f6c261aced3bf09834
|
@ -150,7 +150,15 @@ size_t LLamaModel::requiredMem(const std::string &modelPath, int n_ctx, int ngl)
|
|||||||
|
|
||||||
bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
||||||
{
|
{
|
||||||
gpt_params params;
|
// clean up after previous loadModel()
|
||||||
|
if (d_ptr->model) {
|
||||||
|
llama_free_model(d_ptr->model);
|
||||||
|
d_ptr->model = nullptr;
|
||||||
|
}
|
||||||
|
if (d_ptr->ctx) {
|
||||||
|
llama_free(d_ptr->ctx);
|
||||||
|
d_ptr->ctx = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
if (n_ctx < 8) {
|
if (n_ctx < 8) {
|
||||||
std::cerr << "warning: minimum context size is 8, using minimum size.\n";
|
std::cerr << "warning: minimum context size is 8, using minimum size.\n";
|
||||||
@ -159,6 +167,8 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
|||||||
|
|
||||||
// -- load the model --
|
// -- load the model --
|
||||||
|
|
||||||
|
gpt_params params;
|
||||||
|
|
||||||
d_ptr->model_params = llama_model_default_params();
|
d_ptr->model_params = llama_model_default_params();
|
||||||
|
|
||||||
d_ptr->model_params.use_mmap = params.use_mmap;
|
d_ptr->model_params.use_mmap = params.use_mmap;
|
||||||
@ -215,8 +225,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
|||||||
|
|
||||||
d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params);
|
d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params);
|
||||||
if (!d_ptr->ctx) {
|
if (!d_ptr->ctx) {
|
||||||
d_ptr->device = -1;
|
|
||||||
std::cerr << "LLAMA ERROR: failed to init context for model " << modelPath << std::endl;
|
std::cerr << "LLAMA ERROR: failed to init context for model " << modelPath << std::endl;
|
||||||
|
llama_free_model(d_ptr->model);
|
||||||
|
d_ptr->model = nullptr;
|
||||||
|
d_ptr->device = -1;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -228,6 +228,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
|||||||
LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
|
LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
|
||||||
m_llModelInfo = LLModelInfo();
|
m_llModelInfo = LLModelInfo();
|
||||||
emit modelLoadingError(QString("Previous attempt to load model resulted in crash for `%1` most likely due to insufficient memory. You should either remove this model or decrease your system RAM usage by closing other applications.").arg(modelInfo.filename()));
|
emit modelLoadingError(QString("Previous attempt to load model resulted in crash for `%1` most likely due to insufficient memory. You should either remove this model or decrease your system RAM usage by closing other applications.").arg(modelInfo.filename()));
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fileInfo.exists()) {
|
if (fileInfo.exists()) {
|
||||||
|
Loading…
Reference in New Issue
Block a user