From cd9baacdd7aa47ad6a3614cd33212fad9a869243 Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Wed, 23 Aug 2023 14:56:36 +0000 Subject: [PATCH] add debug logs to stablecode --- src/gptneox.cpp | 16 +++++++++++++++- src/main.cpp | 16 +++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/gptneox.cpp b/src/gptneox.cpp index fc3fcf0..b7fca3b 100644 --- a/src/gptneox.cpp +++ b/src/gptneox.cpp @@ -638,6 +638,12 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool gpt_neox_eval((*model), config.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); + const int64_t t_start_us = ggml_time_us(); + + int64_t t_prompt_us = 0; + + int64_t t_response_us = 0; + for (int i = embd.size(); i < embd_inp.size() + n_predict; i++) { // predict if (embd.size() > 0) { @@ -694,7 +700,8 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool } i += embd.size() - 1; } - + + // end of text token //if (embd.back() == 50256) { @@ -702,6 +709,13 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool break; } } + + t_response_us = ggml_time_us() - t_start_us - t_prompt_us; + + spdlog::debug("{}: sample time = {:8.2f} ms\n", __func__, t_sample_us/1000.0f); + spdlog::debug("{}: predict time = {:8.2f} ms / {:.2f} ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past); + + return result; } diff --git a/src/main.cpp b/src/main.cpp index 9d04be5..c6ef84c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -14,10 +14,18 @@ #include "turbopilot/gptneox.hpp" #include "turbopilot/server.hpp" + +#define TURBOPILOT_VERSION "1.1.0" + int main(int argc, char **argv) { - argparse::ArgumentParser program("turbopilot"); + argparse::ArgumentParser program("turbopilot", TURBOPILOT_VERSION); + + program.add_argument("--debug") + .default_value(false) + .help("Output verbose logs and timings") + .implicit_value(true); program.add_argument("-f", "--model-file") .help("Path to the model that turbopilot should serve") @@ -56,6 +64,7 @@ int main(int argc, char **argv) program.add_argument("prompt").remaining(); + try { program.parse_args(argc, argv); @@ -67,6 +76,11 @@ int main(int argc, char **argv) return 1; } + if(program.get("--debug")){ + spdlog::set_level(spdlog::level::level_enum::debug); + spdlog::debug("debug logging enabled"); + } + ggml_time_init(); const int64_t t_main_start_us = ggml_time_us();