mirror of
https://github.com/ravenscroftj/turbopilot.git
synced 2024-09-29 20:25:56 +00:00
Merge pull request #54 from ravenscroftj/feature/debug-timing-logs
Implemented debug log level and added timings to model outputs
This commit is contained in:
commit
11f385066a
@ -642,5 +642,9 @@ std::stringstream GPTJModel::predict(std::string prompt, int max_length, bool in
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spdlog::debug("{}: sample time = {:8.2f} ms\n", __func__, t_sample_us/1000.0f);
|
||||||
|
spdlog::debug("{}: predict time = {:8.2f} ms / {:.2f} ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past);
|
||||||
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -638,6 +638,12 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool
|
|||||||
|
|
||||||
gpt_neox_eval((*model), config.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
|
gpt_neox_eval((*model), config.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
|
||||||
|
|
||||||
|
const int64_t t_start_us = ggml_time_us();
|
||||||
|
|
||||||
|
int64_t t_prompt_us = 0;
|
||||||
|
|
||||||
|
int64_t t_response_us = 0;
|
||||||
|
|
||||||
for (int i = embd.size(); i < embd_inp.size() + n_predict; i++) {
|
for (int i = embd.size(); i < embd_inp.size() + n_predict; i++) {
|
||||||
// predict
|
// predict
|
||||||
if (embd.size() > 0) {
|
if (embd.size() > 0) {
|
||||||
@ -696,6 +702,7 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// end of text token
|
// end of text token
|
||||||
//if (embd.back() == 50256) {
|
//if (embd.back() == 50256) {
|
||||||
if(embd.back() == END_TOKEN_ID){
|
if(embd.back() == END_TOKEN_ID){
|
||||||
@ -703,5 +710,10 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spdlog::debug("{}: sample time = {:8.2f} ms\n", __func__, t_sample_us/1000.0f);
|
||||||
|
spdlog::debug("{}: predict time = {:8.2f} ms / {:.2f} ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
16
src/main.cpp
16
src/main.cpp
@ -14,10 +14,18 @@
|
|||||||
#include "turbopilot/gptneox.hpp"
|
#include "turbopilot/gptneox.hpp"
|
||||||
#include "turbopilot/server.hpp"
|
#include "turbopilot/server.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
#define TURBOPILOT_VERSION "1.1.0"
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
|
|
||||||
argparse::ArgumentParser program("turbopilot");
|
argparse::ArgumentParser program("turbopilot", TURBOPILOT_VERSION);
|
||||||
|
|
||||||
|
program.add_argument("--debug")
|
||||||
|
.default_value(false)
|
||||||
|
.help("Output verbose logs and timings")
|
||||||
|
.implicit_value(true);
|
||||||
|
|
||||||
program.add_argument("-f", "--model-file")
|
program.add_argument("-f", "--model-file")
|
||||||
.help("Path to the model that turbopilot should serve")
|
.help("Path to the model that turbopilot should serve")
|
||||||
@ -56,6 +64,7 @@ int main(int argc, char **argv)
|
|||||||
program.add_argument("prompt").remaining();
|
program.add_argument("prompt").remaining();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
program.parse_args(argc, argv);
|
program.parse_args(argc, argv);
|
||||||
@ -67,6 +76,11 @@ int main(int argc, char **argv)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(program.get<bool>("--debug")){
|
||||||
|
spdlog::set_level(spdlog::level::level_enum::debug);
|
||||||
|
spdlog::debug("debug logging enabled");
|
||||||
|
}
|
||||||
|
|
||||||
ggml_time_init();
|
ggml_time_init();
|
||||||
|
|
||||||
const int64_t t_main_start_us = ggml_time_us();
|
const int64_t t_main_start_us = ggml_time_us();
|
||||||
|
@ -766,6 +766,10 @@ std::stringstream StarcoderModel::predict(std::string prompt, int max_length, bo
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
spdlog::debug("{}: sample time = {:8.2f} ms\n", __func__, t_sample_us/1000.0f);
|
||||||
|
spdlog::debug("{}: predict time = {:8.2f} ms / {:.2f} ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user