mirror of
https://github.com/ravenscroftj/turbopilot.git
synced 2024-10-01 01:06:01 -04:00
update for gpu build
This commit is contained in:
parent
68760434b2
commit
5f5e9f90be
@ -5,6 +5,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS "on")
|
|||||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
|
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
|
||||||
|
|
||||||
|
option(GGML_CLBLAST "ggml: use clBLAST" OFF)
|
||||||
|
option(GGML_CUBLAS "ggml: use cuBLAS" OFF)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
||||||
message(STATUS "ARM detected")
|
message(STATUS "ARM detected")
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
@ -37,6 +42,17 @@ if (GGML_STATIC)
|
|||||||
SET(CMAKE_EXE_LINKER_FLAGS "-static")
|
SET(CMAKE_EXE_LINKER_FLAGS "-static")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (GGML_CUBLAS)
|
||||||
|
cmake_minimum_required(VERSION 3.17)
|
||||||
|
|
||||||
|
find_package(CUDAToolkit)
|
||||||
|
if (CUDAToolkit_FOUND)
|
||||||
|
add_compile_definitions(GGML_USE_CUBLAS)
|
||||||
|
else()
|
||||||
|
message(WARNING "cuBLAS not found")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
|
@ -62,4 +62,5 @@ protected:
|
|||||||
std::mt19937 &rng;
|
std::mt19937 &rng;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif //__TURBOPILOT_MODEL_H
|
#endif //__TURBOPILOT_MODEL_H
|
@ -620,10 +620,11 @@ bool GPTNEOXModel::load_model(std::string fname) {
|
|||||||
|
|
||||||
printf("inside ggml clblast check\n");
|
printf("inside ggml clblast check\n");
|
||||||
|
|
||||||
|
|
||||||
if(config.n_gpu_layers > 0){
|
if(config.n_gpu_layers > 0){
|
||||||
size_t vram_total = 0;
|
size_t vram_total = 0;
|
||||||
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
|
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
|
||||||
spdlog::info("Attempting to offload %d layers to GPU", gpu_layers);
|
spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);
|
||||||
|
|
||||||
|
|
||||||
for(int i=0; i < gpu_layers; i++) {
|
for(int i=0; i < gpu_layers; i++) {
|
||||||
|
@ -691,7 +691,7 @@ bool StarcoderModel::load_model(std::string fname) {
|
|||||||
if(config.n_gpu_layers > 0){
|
if(config.n_gpu_layers > 0){
|
||||||
size_t vram_total = 0;
|
size_t vram_total = 0;
|
||||||
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
|
int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer);
|
||||||
spdlog::info("Attempting to offload %d layers to GPU", gpu_layers);
|
spdlog::info("Attempting to offload {} layers to GPU", gpu_layers);
|
||||||
|
|
||||||
|
|
||||||
for(int i=0; i < gpu_layers; i++) {
|
for(int i=0; i < gpu_layers; i++) {
|
||||||
|
Loading…
Reference in New Issue
Block a user