differentiate between init failure and unsupported models

This commit is contained in:
Cebtenzzre 2023-10-04 15:51:46 -04:00 committed by Adam Treat
parent a5b93cf095
commit 672cb850f9
4 changed files with 24 additions and 8 deletions

View File

@ -301,8 +301,9 @@ bool LLamaModel::initializeGPUDevice(size_t memoryRequired, const std::string& d
#endif #endif
} }
bool LLamaModel::initializeGPUDevice(const LLModel::GPUDevice &device) bool LLamaModel::initializeGPUDevice(const LLModel::GPUDevice &device, std::string *unavail_reason)
{ {
bool result = false;
#if defined(GGML_USE_KOMPUTE) #if defined(GGML_USE_KOMPUTE)
ggml_vk_device vkDevice; ggml_vk_device vkDevice;
vkDevice.index = device.index; vkDevice.index = device.index;
@ -310,10 +311,16 @@ bool LLamaModel::initializeGPUDevice(const LLModel::GPUDevice &device)
vkDevice.heapSize = device.heapSize; vkDevice.heapSize = device.heapSize;
vkDevice.name = device.name; vkDevice.name = device.name;
vkDevice.vendor = device.vendor; vkDevice.vendor = device.vendor;
return ggml_vk_init_device(vkDevice); result = ggml_vk_init_device(vkDevice);
if (!result && unavail_reason) {
*unavail_reason = "failed to init device";
}
#else #else
return false; if (unavail_reason) {
*unavail_reason = "built without kompute";
}
#endif #endif
return result;
} }
bool LLamaModel::initializeGPUDevice(int device) bool LLamaModel::initializeGPUDevice(int device)

View File

@ -27,7 +27,7 @@ public:
int32_t threadCount() const override; int32_t threadCount() const override;
std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired) override; std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired) override;
bool initializeGPUDevice(size_t memoryRequired, const std::string& device) override; bool initializeGPUDevice(size_t memoryRequired, const std::string& device) override;
bool initializeGPUDevice(const GPUDevice &device) override; bool initializeGPUDevice(const GPUDevice &device, std::string *unavail_reason) override;
bool initializeGPUDevice(int device) override; bool initializeGPUDevice(int device) override;
bool hasGPUDevice() override; bool hasGPUDevice() override;
bool usingGPUDevice() override; bool usingGPUDevice() override;

View File

@ -97,7 +97,12 @@ public:
virtual std::vector<GPUDevice> availableGPUDevices(size_t /*memoryRequired*/) { return std::vector<GPUDevice>(); } virtual std::vector<GPUDevice> availableGPUDevices(size_t /*memoryRequired*/) { return std::vector<GPUDevice>(); }
virtual bool initializeGPUDevice(size_t /*memoryRequired*/, const std::string& /*device*/) { return false; } virtual bool initializeGPUDevice(size_t /*memoryRequired*/, const std::string& /*device*/) { return false; }
virtual bool initializeGPUDevice(const GPUDevice &/*device*/) { return false; } virtual bool initializeGPUDevice(const GPUDevice &/*device*/, std::string *unavail_reason = nullptr) {
if (unavail_reason) {
*unavail_reason = "unsupported model type";
}
return false;
}
virtual bool initializeGPUDevice(int /*device*/) { return false; } virtual bool initializeGPUDevice(int /*device*/) { return false; }
virtual bool hasGPUDevice() { return false; } virtual bool hasGPUDevice() { return false; }
virtual bool usingGPUDevice() { return false; } virtual bool usingGPUDevice() { return false; }

View File

@ -282,11 +282,14 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
} }
} }
if (!device || !m_llModelInfo.model->initializeGPUDevice(*device)) { emit reportFallbackReason(""); // no fallback yet
emit reportFallbackReason("<br>Using CPU: failed to init device"); std::string unavail_reason;
if (!device) {
// GPU not available
} else if (!m_llModelInfo.model->initializeGPUDevice(*device, &unavail_reason)) {
emit reportFallbackReason(QString::fromStdString("<br>Using CPU: " + unavail_reason));
} else { } else {
actualDevice = QString::fromStdString(device->name); actualDevice = QString::fromStdString(device->name);
emit reportFallbackReason(""); // no fallback yet
} }
} }
@ -306,6 +309,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
// We might have had to fallback to CPU after load if the model is not possible to accelerate // We might have had to fallback to CPU after load if the model is not possible to accelerate
// for instance if the quantization method is not supported on Vulkan yet // for instance if the quantization method is not supported on Vulkan yet
emit reportDevice("CPU"); emit reportDevice("CPU");
// TODO(cebtenzzre): report somewhere if llamamodel decided the model was not supported
emit reportFallbackReason("<br>Using CPU: unsupported quantization type"); emit reportFallbackReason("<br>Using CPU: unsupported quantization type");
} }