From 4a4725182256a9649157430238668a1825be5836 Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Mon, 21 Aug 2023 20:40:17 +0100 Subject: [PATCH] update for gpu build --- CMakeLists.txt | 18 +++++++++++++++--- include/turbopilot/model.hpp | 3 ++- src/gptneox.cpp | 3 ++- src/starcoder.cpp | 2 +- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45a9fe7..a05c924 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS "on") set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") +option(GGML_CLBLAST "ggml: use clBLAST" OFF) +option(GGML_CUBLAS "ggml: use cuBLAS" OFF) + + + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") message(STATUS "ARM detected") if (MSVC) @@ -49,12 +54,19 @@ if (GGML_STATIC) SET(BUILD_SHARED_LIBS OFF) SET(CMAKE_EXE_LINKER_FLAGS "-static") - # if(GGML_OPENBLAS) - # set(BLA_STATIC ON) - # endif() +if (GGML_CUBLAS) + cmake_minimum_required(VERSION 3.17) + + find_package(CUDAToolkit) + if (CUDAToolkit_FOUND) + add_compile_definitions(GGML_USE_CUBLAS) + else() + message(WARNING "cuBLAS not found") + endif() endif() + add_subdirectory(src) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) \ No newline at end of file diff --git a/include/turbopilot/model.hpp b/include/turbopilot/model.hpp index 134b9f4..be0d9fd 100644 --- a/include/turbopilot/model.hpp +++ b/include/turbopilot/model.hpp @@ -68,4 +68,5 @@ protected: std::mutex model_lock; }; -#endif //__TURBOPILOT_MODEL_H \ No newline at end of file + +#endif //__TURBOPILOT_MODEL_H diff --git a/src/gptneox.cpp b/src/gptneox.cpp index 0da58f4..95bec1b 100644 --- a/src/gptneox.cpp +++ b/src/gptneox.cpp @@ -626,10 +626,11 @@ bool GPTNEOXModel::load_model(std::string fname) { printf("inside ggml clblast check\n"); + if(config.n_gpu_layers > 0){ size_t vram_total = 0; int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer); - spdlog::info("Attempting to offload %d layers to GPU", gpu_layers); + spdlog::info("Attempting to offload {} layers to GPU", gpu_layers); for(int i=0; i < gpu_layers; i++) { diff --git a/src/starcoder.cpp b/src/starcoder.cpp index 65d2420..b6c8faa 100644 --- a/src/starcoder.cpp +++ b/src/starcoder.cpp @@ -691,7 +691,7 @@ bool StarcoderModel::load_model(std::string fname) { if(config.n_gpu_layers > 0){ size_t vram_total = 0; int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer); - spdlog::info("Attempting to offload %d layers to GPU", gpu_layers); + spdlog::info("Attempting to offload {} layers to GPU", gpu_layers); for(int i=0; i < gpu_layers; i++) {