mirror of
https://github.com/ravenscroftj/turbopilot.git
synced 2024-10-01 01:06:01 -04:00
update for gpu build
This commit is contained in:
parent
b2b4a1480f
commit
4a47251822
@ -15,6 +15,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS "on")
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
|
||||
|
||||
option(GGML_CLBLAST "ggml: use clBLAST" OFF)
|
||||
option(GGML_CUBLAS "ggml: use cuBLAS" OFF)
|
||||
|
||||
|
||||
|
||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
||||
message(STATUS "ARM detected")
|
||||
if (MSVC)
|
||||
@ -49,12 +54,19 @@ if (GGML_STATIC)
|
||||
SET(BUILD_SHARED_LIBS OFF)
|
||||
SET(CMAKE_EXE_LINKER_FLAGS "-static")
|
||||
|
||||
# if(GGML_OPENBLAS)
|
||||
# set(BLA_STATIC ON)
|
||||
# endif()
|
||||
if (GGML_CUBLAS)
|
||||
cmake_minimum_required(VERSION 3.17)
|
||||
|
||||
find_package(CUDAToolkit)
|
||||
if (CUDAToolkit_FOUND)
|
||||
add_compile_definitions(GGML_USE_CUBLAS)
|
||||
else()
|
||||
message(WARNING "cuBLAS not found")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
add_subdirectory(src)
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
@ -68,4 +68,5 @@ protected:
|
||||
std::mutex model_lock;
|
||||
};
|
||||
|
||||
#endif //__TURBOPILOT_MODEL_H
|
||||
|
||||
#endif //__TURBOPILOT_MODEL_H
|
||||
|
@ -626,10 +626,11 @@ bool GPTNEOXModel::load_model(std::string fname) {
|
||||
|
||||
printf("inside ggml clblast check\n");
|
||||
|
||||
|
||||
if(config.n_gpu_layers > 0){
|
||||
size_t vram_total = 0;
|
||||
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
|
||||
spdlog::info("Attempting to offload %d layers to GPU", gpu_layers);
|
||||
spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);
|
||||
|
||||
|
||||
for(int i=0; i < gpu_layers; i++) {
|
||||
|
@ -691,7 +691,7 @@ bool StarcoderModel::load_model(std::string fname) {
|
||||
if(config.n_gpu_layers > 0){
|
||||
size_t vram_total = 0;
|
||||
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
|
||||
spdlog::info("Attempting to offload %d layers to GPU", gpu_layers);
|
||||
spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);
|
||||
|
||||
|
||||
for(int i=0; i < gpu_layers; i++) {
|
||||
|
Loading…
Reference in New Issue
Block a user