From cd9baacdd7aa47ad6a3614cd33212fad9a869243 Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Wed, 23 Aug 2023 14:56:36 +0000 Subject: [PATCH 1/3] add debug logs to stablecode --- src/gptneox.cpp | 16 +++++++++++++++- src/main.cpp | 16 +++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/gptneox.cpp b/src/gptneox.cpp index fc3fcf0..b7fca3b 100644 --- a/src/gptneox.cpp +++ b/src/gptneox.cpp @@ -638,6 +638,12 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool gpt_neox_eval((*model), config.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); + const int64_t t_start_us = ggml_time_us(); + + int64_t t_prompt_us = 0; + + int64_t t_response_us = 0; + for (int i = embd.size(); i < embd_inp.size() + n_predict; i++) { // predict if (embd.size() > 0) { @@ -694,7 +700,8 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool } i += embd.size() - 1; } - + + // end of text token //if (embd.back() == 50256) { @@ -702,6 +709,13 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool break; } } + + t_response_us = ggml_time_us() - t_start_us - t_prompt_us; + + spdlog::debug("{}: sample time = {:8.2f} ms\n", __func__, t_sample_us/1000.0f); + spdlog::debug("{}: predict time = {:8.2f} ms / {:.2f} ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past); + + return result; } diff --git a/src/main.cpp b/src/main.cpp index 9d04be5..c6ef84c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -14,10 +14,18 @@ #include "turbopilot/gptneox.hpp" #include "turbopilot/server.hpp" + +#define TURBOPILOT_VERSION "1.1.0" + int main(int argc, char **argv) { - argparse::ArgumentParser program("turbopilot"); + argparse::ArgumentParser program("turbopilot", TURBOPILOT_VERSION); + + program.add_argument("--debug") + .default_value(false) + .help("Output verbose logs and timings") + .implicit_value(true); program.add_argument("-f", "--model-file") .help("Path to the model that turbopilot should serve") @@ -56,6 +64,7 @@ int main(int argc, char **argv) program.add_argument("prompt").remaining(); + try { program.parse_args(argc, argv); @@ -67,6 +76,11 @@ int main(int argc, char **argv) return 1; } + if(program.get("--debug")){ + spdlog::set_level(spdlog::level::level_enum::debug); + spdlog::debug("debug logging enabled"); + } + ggml_time_init(); const int64_t t_main_start_us = ggml_time_us(); From bad53ad19001e142c5c7c03f972e1e87dae8564e Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Wed, 23 Aug 2023 14:57:31 +0000 Subject: [PATCH 2/3] add debug logs to codegen --- src/gptj.cpp | 4 ++++ src/gptneox.cpp | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gptj.cpp b/src/gptj.cpp index 64ebd69..f00e107 100644 --- a/src/gptj.cpp +++ b/src/gptj.cpp @@ -642,5 +642,9 @@ std::stringstream GPTJModel::predict(std::string prompt, int max_length, bool in } } + spdlog::debug("{}: sample time = {:8.2f} ms\n", __func__, t_sample_us/1000.0f); + spdlog::debug("{}: predict time = {:8.2f} ms / {:.2f} ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past); + + return result; } diff --git a/src/gptneox.cpp b/src/gptneox.cpp index b7fca3b..51665c7 100644 --- a/src/gptneox.cpp +++ b/src/gptneox.cpp @@ -709,8 +709,6 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool break; } } - - t_response_us = ggml_time_us() - t_start_us - t_prompt_us; spdlog::debug("{}: sample time = {:8.2f} ms\n", __func__, t_sample_us/1000.0f); spdlog::debug("{}: predict time = {:8.2f} ms / {:.2f} ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past); From 6a72e32dab4fda1f06b96ec47616f89625a122a6 Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Wed, 23 Aug 2023 15:53:38 +0000 Subject: [PATCH 3/3] add debug output to starcoder --- src/starcoder.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/starcoder.cpp b/src/starcoder.cpp index 74aaea8..c196f3c 100644 --- a/src/starcoder.cpp +++ b/src/starcoder.cpp @@ -766,6 +766,10 @@ std::stringstream StarcoderModel::predict(std::string prompt, int max_length, bo break; } } + spdlog::debug("{}: sample time = {:8.2f} ms\n", __func__, t_sample_us/1000.0f); + spdlog::debug("{}: predict time = {:8.2f} ms / {:.2f} ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past); + + return result; }