update for gpu build

This commit is contained in:
James Ravenscroft 2023-08-21 20:40:17 +01:00
parent 68760434b2
commit 5f5e9f90be
4 changed files with 21 additions and 3 deletions

View File

@ -5,6 +5,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS "on")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
option(GGML_CLBLAST "ggml: use clBLAST" OFF)
option(GGML_CUBLAS "ggml: use cuBLAS" OFF)
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
message(STATUS "ARM detected")
if (MSVC)
@ -37,6 +42,17 @@ if (GGML_STATIC)
SET(CMAKE_EXE_LINKER_FLAGS "-static")
endif()
if (GGML_CUBLAS)
cmake_minimum_required(VERSION 3.17)
find_package(CUDAToolkit)
if (CUDAToolkit_FOUND)
add_compile_definitions(GGML_USE_CUBLAS)
else()
message(WARNING "cuBLAS not found")
endif()
endif()
add_subdirectory(src)

View File

@ -62,4 +62,5 @@ protected:
std::mt19937 &rng;
};
#endif //__TURBOPILOT_MODEL_H

View File

@ -620,10 +620,11 @@ bool GPTNEOXModel::load_model(std::string fname) {
printf("inside ggml clblast check\n");
if(config.n_gpu_layers > 0){
size_t vram_total = 0;
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
spdlog::info("Attempting to offload %d layers to GPU", gpu_layers);
spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);
for(int i=0; i < gpu_layers; i++) {

View File

@ -691,7 +691,7 @@ bool StarcoderModel::load_model(std::string fname) {
if(config.n_gpu_layers > 0){
size_t vram_total = 0;
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
spdlog::info("Attempting to offload %d layers to GPU", gpu_layers);
spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);
for(int i=0; i < gpu_layers; i++) {