From cd9baacdd7aa47ad6a3614cd33212fad9a869243 Mon Sep 17 00:00:00 2001
From: James Ravenscroft <ravenscroftj@gmail.com>
Date: Wed, 23 Aug 2023 14:56:36 +0000
Subject: [PATCH 1/3] add debug logs to stablecode

---
 src/gptneox.cpp | 16 +++++++++++++++-
 src/main.cpp    | 16 +++++++++++++++-
 2 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/src/gptneox.cpp b/src/gptneox.cpp
index fc3fcf0..b7fca3b 100644
--- a/src/gptneox.cpp
+++ b/src/gptneox.cpp
@@ -638,6 +638,12 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool
 
     gpt_neox_eval((*model), config.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
 
+    const int64_t t_start_us = ggml_time_us();
+
+    int64_t t_prompt_us = 0;
+    
+    int64_t t_response_us = 0;
+
     for (int i = embd.size(); i < embd_inp.size() + n_predict; i++) {
         // predict
         if (embd.size() > 0) {
@@ -694,7 +700,8 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool
             }
             i += embd.size() - 1;
         }
-        
+
+
 
         // end of text token
         //if (embd.back() == 50256) {
@@ -702,6 +709,13 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool
             break;
         }
     }
+    
+    t_response_us = ggml_time_us() - t_start_us - t_prompt_us;
+
+    spdlog::debug("{}:   sample time = {:8.2f} ms\n", __func__, t_sample_us/1000.0f);
+    spdlog::debug("{}:  predict time = {:8.2f} ms / {:.2f} ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past);
+    
+    
 
     return result;
 }
diff --git a/src/main.cpp b/src/main.cpp
index 9d04be5..c6ef84c 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -14,10 +14,18 @@
 #include "turbopilot/gptneox.hpp"
 #include "turbopilot/server.hpp"
 
+
+#define TURBOPILOT_VERSION "1.1.0"
+
 int main(int argc, char **argv)
 {
 
-    argparse::ArgumentParser program("turbopilot");
+    argparse::ArgumentParser program("turbopilot", TURBOPILOT_VERSION);
+
+    program.add_argument("--debug")
+        .default_value(false)
+        .help("Output verbose logs and timings")
+        .implicit_value(true);
 
     program.add_argument("-f", "--model-file")
         .help("Path to the model that turbopilot should serve")
@@ -56,6 +64,7 @@ int main(int argc, char **argv)
     program.add_argument("prompt").remaining();
 
 
+
     try
     {
         program.parse_args(argc, argv);
@@ -67,6 +76,11 @@ int main(int argc, char **argv)
         return 1;
     }
 
+    if(program.get<bool>("--debug")){
+        spdlog::set_level(spdlog::level::level_enum::debug);
+        spdlog::debug("debug logging enabled");
+    }
+
     ggml_time_init();
 
     const int64_t t_main_start_us = ggml_time_us();

From bad53ad19001e142c5c7c03f972e1e87dae8564e Mon Sep 17 00:00:00 2001
From: James Ravenscroft <ravenscroftj@gmail.com>
Date: Wed, 23 Aug 2023 14:57:31 +0000
Subject: [PATCH 2/3] add debug logs to codegen

---
 src/gptj.cpp    | 4 ++++
 src/gptneox.cpp | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gptj.cpp b/src/gptj.cpp
index 64ebd69..f00e107 100644
--- a/src/gptj.cpp
+++ b/src/gptj.cpp
@@ -642,5 +642,9 @@ std::stringstream GPTJModel::predict(std::string prompt, int max_length, bool in
         }
     }
 
+    spdlog::debug("{}:   sample time = {:8.2f} ms\n", __func__, t_sample_us/1000.0f);
+    spdlog::debug("{}:  predict time = {:8.2f} ms / {:.2f} ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past);
+    
+
     return result;
 }
diff --git a/src/gptneox.cpp b/src/gptneox.cpp
index b7fca3b..51665c7 100644
--- a/src/gptneox.cpp
+++ b/src/gptneox.cpp
@@ -709,8 +709,6 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool
             break;
         }
     }
-    
-    t_response_us = ggml_time_us() - t_start_us - t_prompt_us;
 
     spdlog::debug("{}:   sample time = {:8.2f} ms\n", __func__, t_sample_us/1000.0f);
     spdlog::debug("{}:  predict time = {:8.2f} ms / {:.2f} ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past);

From 6a72e32dab4fda1f06b96ec47616f89625a122a6 Mon Sep 17 00:00:00 2001
From: James Ravenscroft <ravenscroftj@gmail.com>
Date: Wed, 23 Aug 2023 15:53:38 +0000
Subject: [PATCH 3/3] add debug output to starcoder

---
 src/starcoder.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/starcoder.cpp b/src/starcoder.cpp
index 74aaea8..c196f3c 100644
--- a/src/starcoder.cpp
+++ b/src/starcoder.cpp
@@ -766,6 +766,10 @@ std::stringstream StarcoderModel::predict(std::string prompt, int max_length, bo
             break;
         }
     }
+    spdlog::debug("{}:   sample time = {:8.2f} ms\n", __func__, t_sample_us/1000.0f);
+    spdlog::debug("{}:  predict time = {:8.2f} ms / {:.2f} ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past);
+    
+    
 
     return result;
 }