diff --git a/gpt4all-backend/llama.cpp-mainline b/gpt4all-backend/llama.cpp-mainline index e3c4f65d..a3f03b7e 160000 --- a/gpt4all-backend/llama.cpp-mainline +++ b/gpt4all-backend/llama.cpp-mainline @@ -1 +1 @@ -Subproject commit e3c4f65d786d26f1daa7aebfb1b67cd6c31ea082 +Subproject commit a3f03b7e793ee611c4918235d4532ee535a9530d diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index a0ab7fb6..a6af93e0 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -7,6 +7,16 @@ #include "mysettings.h" #include "../gpt4all-backend/llmodel.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include //#define DEBUG @@ -344,20 +354,36 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) return m_shouldBeLoaded; }); - // Pick the best match for the device - QString actualDevice = m_llModelInfo.model->implementation().buildVariant() == "metal" ? "Metal" : "CPU"; - const QString requestedDevice = MySettings::globalInstance()->device(); - if (requestedDevice == "CPU") { - emit reportFallbackReason(""); // fallback not applicable - } else { - const size_t requiredMemory = m_llModelInfo.model->requiredMem(filePath.toStdString(), n_ctx, ngl); - std::vector availableDevices = m_llModelInfo.model->availableGPUDevices(requiredMemory); - LLModel::GPUDevice *device = nullptr; + emit reportFallbackReason(""); // no fallback yet - if (!availableDevices.empty() && requestedDevice == "Auto" && availableDevices.front().type == 2 /*a discrete gpu*/) { - device = &availableDevices.front(); - } else { - for (LLModel::GPUDevice &d : availableDevices) { + auto approxDeviceMemGB = [](const LLModel::GPUDevice *dev) { + float memGB = dev->heapSize / float(1024 * 1024 * 1024); + return std::floor(memGB * 10.f) / 10.f; // truncate to 1 decimal place + }; + + std::vector availableDevices; + const LLModel::GPUDevice *defaultDevice = nullptr; + { + const size_t requiredMemory = m_llModelInfo.model->requiredMem(filePath.toStdString(), n_ctx, ngl); + availableDevices = m_llModelInfo.model->availableGPUDevices(requiredMemory); + if (!availableDevices.empty() && availableDevices.front().type == 2 /*a discrete gpu*/) { + defaultDevice = &availableDevices.front(); + float memGB = defaultDevice->heapSize / float(1024 * 1024 * 1024); + memGB = std::floor(memGB * 10.f) / 10.f; // truncate to 1 decimal place + modelLoadProps.insert("default_device", QString::fromStdString(defaultDevice->name)); + modelLoadProps.insert("default_device_mem", approxDeviceMemGB(defaultDevice)); + } + } + + const QString requestedDevice = MySettings::globalInstance()->device(); + bool isMetal = m_llModelInfo.model->implementation().buildVariant() == "metal"; + + // Pick the best match for the device + QString actualDevice = isMetal ? "Metal" : "CPU"; + if (!isMetal && requestedDevice != "CPU") { + const auto *device = defaultDevice; + if (requestedDevice != "Auto") { + for (const LLModel::GPUDevice &d : availableDevices) { if (QString::fromStdString(d.name) == requestedDevice) { device = &d; break; @@ -365,7 +391,6 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) } } - emit reportFallbackReason(""); // no fallback yet std::string unavail_reason; if (!device) { // GPU not available @@ -373,6 +398,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) emit reportFallbackReason(QString::fromStdString("
" + unavail_reason)); } else { actualDevice = QString::fromStdString(device->name); + modelLoadProps.insert("requested_device_mem", approxDeviceMemGB(device)); } }