update for gpu build

2024-10-01 01:06:01 -04:00 · 2023-08-21 20:40:17 +01:00 · 2023-08-21 20:40:17 +01:00 · 4a47251822
commit 4a47251822
parent b2b4a1480f
4 changed files with 20 additions and 6 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -15,6 +15,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS "on")
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")

+option(GGML_CLBLAST                 "ggml: use clBLAST"                  OFF)
+option(GGML_CUBLAS                  "ggml: use cuBLAS"                   OFF)
+
+
+
 if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
    message(STATUS "ARM detected")
    if (MSVC)
@ -49,12 +54,19 @@ if (GGML_STATIC)
    SET(BUILD_SHARED_LIBS OFF)
    SET(CMAKE_EXE_LINKER_FLAGS "-static")

-    # if(GGML_OPENBLAS)
-    #     set(BLA_STATIC ON)
-    # endif()
+if (GGML_CUBLAS)
+    cmake_minimum_required(VERSION 3.17)
+
+    find_package(CUDAToolkit)
+    if (CUDAToolkit_FOUND)
+        add_compile_definitions(GGML_USE_CUBLAS)
+    else()
+        message(WARNING "cuBLAS not found")
+    endif()
 endif()


+
 add_subdirectory(src)

 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
--- a/include/turbopilot/model.hpp
+++ b/include/turbopilot/model.hpp
@ -68,4 +68,5 @@ protected:
    std::mutex model_lock;
 };

-#endif //__TURBOPILOT_MODEL_H
+
+#endif //__TURBOPILOT_MODEL_H
--- a/src/gptneox.cpp
+++ b/src/gptneox.cpp
@ -626,10 +626,11 @@ bool GPTNEOXModel::load_model(std::string fname) {

    printf("inside ggml clblast check\n");

+
    if(config.n_gpu_layers > 0){
        size_t vram_total = 0;
        int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
-        spdlog::info("Attempting to offload %d layers to GPU", gpu_layers);
+        spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);


        for(int i=0; i < gpu_layers; i++) {
--- a/src/starcoder.cpp
+++ b/src/starcoder.cpp
@ -691,7 +691,7 @@ bool StarcoderModel::load_model(std::string fname) {
    if(config.n_gpu_layers > 0){
        size_t vram_total = 0;
        int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
-        spdlog::info("Attempting to offload %d layers to GPU", gpu_layers);
+        spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);


        for(int i=0; i < gpu_layers; i++) {