update for gpu build

2024-10-01 01:06:01 -04:00 · 2023-08-21 20:40:17 +01:00 · 2023-08-21 20:40:17 +01:00 · 5f5e9f90be
commit 5f5e9f90be
parent 68760434b2
4 changed files with 21 additions and 3 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -5,6 +5,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS "on")
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")

+option(GGML_CLBLAST                 "ggml: use clBLAST"                  OFF)
+option(GGML_CUBLAS                  "ggml: use cuBLAS"                   OFF)
+
+
+
 if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
    message(STATUS "ARM detected")
    if (MSVC)
@ -37,6 +42,17 @@ if (GGML_STATIC)
    SET(CMAKE_EXE_LINKER_FLAGS "-static")
 endif()

+if (GGML_CUBLAS)
+    cmake_minimum_required(VERSION 3.17)
+
+    find_package(CUDAToolkit)
+    if (CUDAToolkit_FOUND)
+        add_compile_definitions(GGML_USE_CUBLAS)
+    else()
+        message(WARNING "cuBLAS not found")
+    endif()
+endif()
+


 add_subdirectory(src)
--- a/include/turbopilot/model.hpp
+++ b/include/turbopilot/model.hpp
@ -62,4 +62,5 @@ protected:
    std::mt19937 &rng;
 };

+
 #endif //__TURBOPILOT_MODEL_H
--- a/src/gptneox.cpp
+++ b/src/gptneox.cpp
@ -620,10 +620,11 @@ bool GPTNEOXModel::load_model(std::string fname) {

    printf("inside ggml clblast check\n");

+
    if(config.n_gpu_layers > 0){
        size_t vram_total = 0;
        int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
-        spdlog::info("Attempting to offload %d layers to GPU", gpu_layers);
+        spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);


        for(int i=0; i < gpu_layers; i++) {
--- a/src/starcoder.cpp
+++ b/src/starcoder.cpp
@ -691,7 +691,7 @@ bool StarcoderModel::load_model(std::string fname) {
    if(config.n_gpu_layers > 0){
        size_t vram_total = 0;
        int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
-        spdlog::info("Attempting to offload %d layers to GPU", gpu_layers);
+        spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);


        for(int i=0; i < gpu_layers; i++) {