update for gpu build

This commit is contained in:
James Ravenscroft 2023-08-21 20:40:17 +01:00
parent b2b4a1480f
commit 4a47251822
4 changed files with 20 additions and 6 deletions

View File

@ -15,6 +15,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS "on")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
option(GGML_CLBLAST "ggml: use clBLAST" OFF)
option(GGML_CUBLAS "ggml: use cuBLAS" OFF)
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
message(STATUS "ARM detected") message(STATUS "ARM detected")
if (MSVC) if (MSVC)
@ -49,10 +54,17 @@ if (GGML_STATIC)
SET(BUILD_SHARED_LIBS OFF) SET(BUILD_SHARED_LIBS OFF)
SET(CMAKE_EXE_LINKER_FLAGS "-static") SET(CMAKE_EXE_LINKER_FLAGS "-static")
# if(GGML_OPENBLAS) if (GGML_CUBLAS)
# set(BLA_STATIC ON) cmake_minimum_required(VERSION 3.17)
# endif()
find_package(CUDAToolkit)
if (CUDAToolkit_FOUND)
add_compile_definitions(GGML_USE_CUBLAS)
else()
message(WARNING "cuBLAS not found")
endif() endif()
endif()
add_subdirectory(src) add_subdirectory(src)

View File

@ -68,4 +68,5 @@ protected:
std::mutex model_lock; std::mutex model_lock;
}; };
#endif //__TURBOPILOT_MODEL_H #endif //__TURBOPILOT_MODEL_H

View File

@ -626,10 +626,11 @@ bool GPTNEOXModel::load_model(std::string fname) {
printf("inside ggml clblast check\n"); printf("inside ggml clblast check\n");
if(config.n_gpu_layers > 0){ if(config.n_gpu_layers > 0){
size_t vram_total = 0; size_t vram_total = 0;
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer); int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
spdlog::info("Attempting to offload %d layers to GPU", gpu_layers); spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);
for(int i=0; i < gpu_layers; i++) { for(int i=0; i < gpu_layers; i++) {

View File

@ -691,7 +691,7 @@ bool StarcoderModel::load_model(std::string fname) {
if(config.n_gpu_layers > 0){ if(config.n_gpu_layers > 0){
size_t vram_total = 0; size_t vram_total = 0;
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer); int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
spdlog::info("Attempting to offload %d layers to GPU", gpu_layers); spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);
for(int i=0; i < gpu_layers; i++) { for(int i=0; i < gpu_layers; i++) {