From 5f5e9f90be74465363c757a916eac9a5f74eea8f Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Mon, 21 Aug 2023 20:40:17 +0100 Subject: [PATCH] update for gpu build --- CMakeLists.txt | 16 ++++++++++++++++ include/turbopilot/model.hpp | 3 ++- src/gptneox.cpp | 3 ++- src/starcoder.cpp | 2 +- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 22c174d..3a0a675 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS "on") set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") +option(GGML_CLBLAST "ggml: use clBLAST" OFF) +option(GGML_CUBLAS "ggml: use cuBLAS" OFF) + + + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") message(STATUS "ARM detected") if (MSVC) @@ -37,6 +42,17 @@ if (GGML_STATIC) SET(CMAKE_EXE_LINKER_FLAGS "-static") endif() +if (GGML_CUBLAS) + cmake_minimum_required(VERSION 3.17) + + find_package(CUDAToolkit) + if (CUDAToolkit_FOUND) + add_compile_definitions(GGML_USE_CUBLAS) + else() + message(WARNING "cuBLAS not found") + endif() +endif() + add_subdirectory(src) diff --git a/include/turbopilot/model.hpp b/include/turbopilot/model.hpp index e5afca7..2753313 100644 --- a/include/turbopilot/model.hpp +++ b/include/turbopilot/model.hpp @@ -62,4 +62,5 @@ protected: std::mt19937 &rng; }; -#endif //__TURBOPILOT_MODEL_H \ No newline at end of file + +#endif //__TURBOPILOT_MODEL_H diff --git a/src/gptneox.cpp b/src/gptneox.cpp index 40558a9..696003f 100644 --- a/src/gptneox.cpp +++ b/src/gptneox.cpp @@ -620,10 +620,11 @@ bool GPTNEOXModel::load_model(std::string fname) { printf("inside ggml clblast check\n"); + if(config.n_gpu_layers > 0){ size_t vram_total = 0; int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer); - spdlog::info("Attempting to offload %d layers to GPU", gpu_layers); + spdlog::info("Attempting to offload {} layers to GPU", gpu_layers); for(int i=0; i < gpu_layers; i++) { diff --git a/src/starcoder.cpp b/src/starcoder.cpp index 5d16c43..daf7a4e 100644 --- a/src/starcoder.cpp +++ b/src/starcoder.cpp @@ -691,7 +691,7 @@ bool StarcoderModel::load_model(std::string fname) { if(config.n_gpu_layers > 0){ size_t vram_total = 0; int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer); - spdlog::info("Attempting to offload %d layers to GPU", gpu_layers); + spdlog::info("Attempting to offload {} layers to GPU", gpu_layers); for(int i=0; i < gpu_layers; i++) {