diff --git a/src/gptneox.cpp b/src/gptneox.cpp index 95bec1b..d80d1dc 100644 --- a/src/gptneox.cpp +++ b/src/gptneox.cpp @@ -624,8 +624,6 @@ bool GPTNEOXModel::load_model(std::string fname) { #if defined(GGML_USE_CLBLAST) || defined(GGML_USE_CUBLAS) - printf("inside ggml clblast check\n"); - if(config.n_gpu_layers > 0){ size_t vram_total = 0; @@ -653,7 +651,7 @@ bool GPTNEOXModel::load_model(std::string fname) { #endif } - fprintf(stderr, "%s: [GPU] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + spdlog::info("{}: [GPU] total VRAM used: {} MB\n", __func__, vram_total / 1024 / 1024); } #endif // defined(GGML_USE_CLBLAST) || defined(GGML_USE_CUBLAS) diff --git a/src/starcoder.cpp b/src/starcoder.cpp index b6c8faa..9ffe3e6 100644 --- a/src/starcoder.cpp +++ b/src/starcoder.cpp @@ -686,8 +686,6 @@ bool StarcoderModel::load_model(std::string fname) { #if defined(GGML_USE_CLBLAST) || defined(GGML_USE_CUBLAS) - printf("inside ggml clblast check\n"); - if(config.n_gpu_layers > 0){ size_t vram_total = 0; int gpu_layers = std::min(config.n_gpu_layers, model->hparams.n_layer); @@ -714,7 +712,7 @@ bool StarcoderModel::load_model(std::string fname) { #endif } - fprintf(stderr, "%s: [GPU] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + spdlog::info("{}: [GPU] total VRAM used: {} MB\n", __func__, vram_total / 1024 / 1024); } #endif // defined(GGML_USE_CLBLAST) || defined(GGML_USE_CUBLAS)