mirror of
https://github.com/ravenscroftj/turbopilot.git
synced 2024-10-01 01:06:01 -04:00
update for gpu build
This commit is contained in:
parent
b2b4a1480f
commit
4a47251822
@ -15,6 +15,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS "on")
|
|||||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
|
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
|
||||||
|
|
||||||
|
option(GGML_CLBLAST "ggml: use clBLAST" OFF)
|
||||||
|
option(GGML_CUBLAS "ggml: use cuBLAS" OFF)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
||||||
message(STATUS "ARM detected")
|
message(STATUS "ARM detected")
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
@ -49,10 +54,17 @@ if (GGML_STATIC)
|
|||||||
SET(BUILD_SHARED_LIBS OFF)
|
SET(BUILD_SHARED_LIBS OFF)
|
||||||
SET(CMAKE_EXE_LINKER_FLAGS "-static")
|
SET(CMAKE_EXE_LINKER_FLAGS "-static")
|
||||||
|
|
||||||
# if(GGML_OPENBLAS)
|
if (GGML_CUBLAS)
|
||||||
# set(BLA_STATIC ON)
|
cmake_minimum_required(VERSION 3.17)
|
||||||
# endif()
|
|
||||||
|
find_package(CUDAToolkit)
|
||||||
|
if (CUDAToolkit_FOUND)
|
||||||
|
add_compile_definitions(GGML_USE_CUBLAS)
|
||||||
|
else()
|
||||||
|
message(WARNING "cuBLAS not found")
|
||||||
endif()
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
|
@ -68,4 +68,5 @@ protected:
|
|||||||
std::mutex model_lock;
|
std::mutex model_lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif //__TURBOPILOT_MODEL_H
|
#endif //__TURBOPILOT_MODEL_H
|
@ -626,10 +626,11 @@ bool GPTNEOXModel::load_model(std::string fname) {
|
|||||||
|
|
||||||
printf("inside ggml clblast check\n");
|
printf("inside ggml clblast check\n");
|
||||||
|
|
||||||
|
|
||||||
if(config.n_gpu_layers > 0){
|
if(config.n_gpu_layers > 0){
|
||||||
size_t vram_total = 0;
|
size_t vram_total = 0;
|
||||||
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
|
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
|
||||||
spdlog::info("Attempting to offload %d layers to GPU", gpu_layers);
|
spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);
|
||||||
|
|
||||||
|
|
||||||
for(int i=0; i < gpu_layers; i++) {
|
for(int i=0; i < gpu_layers; i++) {
|
||||||
|
@ -691,7 +691,7 @@ bool StarcoderModel::load_model(std::string fname) {
|
|||||||
if(config.n_gpu_layers > 0){
|
if(config.n_gpu_layers > 0){
|
||||||
size_t vram_total = 0;
|
size_t vram_total = 0;
|
||||||
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
|
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
|
||||||
spdlog::info("Attempting to offload %d layers to GPU", gpu_layers);
|
spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);
|
||||||
|
|
||||||
|
|
||||||
for(int i=0; i < gpu_layers; i++) {
|
for(int i=0; i < gpu_layers; i++) {
|
||||||
|
Loading…
Reference in New Issue
Block a user