mirror of
https://github.com/ravenscroftj/turbopilot.git
synced 2024-10-01 01:06:01 -04:00
add huggingface request handler and refactor old req handler
This commit is contained in:
parent
6ee2d3dc66
commit
f7f1991e2c
@ -6,8 +6,9 @@
|
|||||||
|
|
||||||
#include "crow_all.h"
|
#include "crow_all.h"
|
||||||
|
|
||||||
crow::response serve_response(TurbopilotModel *model, const crow::request& req);
|
crow::response handle_openai_request(TurbopilotModel *model, const crow::request& req);
|
||||||
|
|
||||||
|
crow::response handle_hf_request(TurbopilotModel *model, const crow::request& req);
|
||||||
|
|
||||||
|
|
||||||
#endif // __TURBOPILOT_SERVER_H
|
#endif // __TURBOPILOT_SERVER_H
|
||||||
|
12
src/main.cpp
12
src/main.cpp
@ -117,22 +117,28 @@ int main(int argc, char **argv)
|
|||||||
return res;
|
return res;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
//huggingface code compatible endpoint
|
||||||
|
CROW_ROUTE(app, "/api/generate").methods(crow::HTTPMethod::Post)
|
||||||
|
([&model](const crow::request& req) {
|
||||||
|
return handle_hf_request(model, req);
|
||||||
|
});
|
||||||
|
|
||||||
CROW_ROUTE(app, "/v1/completions").methods(crow::HTTPMethod::Post)
|
CROW_ROUTE(app, "/v1/completions").methods(crow::HTTPMethod::Post)
|
||||||
([&model](const crow::request& req) {
|
([&model](const crow::request& req) {
|
||||||
return serve_response(model, req);
|
return handle_openai_request(model, req);
|
||||||
});
|
});
|
||||||
|
|
||||||
CROW_ROUTE(app, "/v1/engines/codegen/completions").methods(crow::HTTPMethod::Post)
|
CROW_ROUTE(app, "/v1/engines/codegen/completions").methods(crow::HTTPMethod::Post)
|
||||||
([&model](const crow::request& req) {
|
([&model](const crow::request& req) {
|
||||||
return serve_response(model, req);
|
return handle_openai_request(model, req);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
CROW_ROUTE(app, "/v1/engines/copilot-codex/completions").methods(crow::HTTPMethod::Post)
|
CROW_ROUTE(app, "/v1/engines/copilot-codex/completions").methods(crow::HTTPMethod::Post)
|
||||||
([&model](const crow::request& req) {
|
([&model](const crow::request& req) {
|
||||||
return serve_response(model, req);
|
return handle_openai_request(model, req);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
app.port(program.get<int>("--port")).multithreaded().run();
|
app.port(program.get<int>("--port")).multithreaded().run();
|
||||||
|
|
||||||
|
@ -7,11 +7,51 @@
|
|||||||
#include <boost/uuid/uuid_generators.hpp> // generators
|
#include <boost/uuid/uuid_generators.hpp> // generators
|
||||||
#include <boost/uuid/uuid_io.hpp> // streaming operators etc.
|
#include <boost/uuid/uuid_io.hpp> // streaming operators etc.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This function serves requests for autocompletion from crow
|
* This function serves requests for autocompletion from crow
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
crow::response serve_response(TurbopilotModel *model, const crow::request& req){
|
crow::response handle_hf_request(TurbopilotModel *model, const crow::request& req){
|
||||||
|
|
||||||
|
crow::json::rvalue data = crow::json::load(req.body);
|
||||||
|
|
||||||
|
if(!data.has("inputs")){
|
||||||
|
crow::response res;
|
||||||
|
res.code = 400;
|
||||||
|
res.set_header("Content-Type", "application/json");
|
||||||
|
res.body = "{\"message\":\"you must specify inputs field or\"}";
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
// std::string suffix = data["suffix"].s();
|
||||||
|
int maxTokens = 200;
|
||||||
|
if(data.has("max_tokens")){
|
||||||
|
maxTokens = data["max_tokens"].i();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
auto result = model->predict(data["inputs"].s(), maxTokens, false);
|
||||||
|
|
||||||
|
crow::json::wvalue response = {
|
||||||
|
{"generated_text", result.str()},
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
crow::response res;
|
||||||
|
res.code = 200;
|
||||||
|
res.set_header("Content-Type", "application/json");
|
||||||
|
res.body = response.dump(); //ss.str();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function serves requests for autocompletion from crow
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
crow::response handle_openai_request(TurbopilotModel *model, const crow::request& req){
|
||||||
|
|
||||||
crow::json::rvalue data = crow::json::load(req.body);
|
crow::json::rvalue data = crow::json::load(req.body);
|
||||||
|
|
||||||
@ -23,24 +63,6 @@ crow::response serve_response(TurbopilotModel *model, const crow::request& req){
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// tokenize the prompt
|
|
||||||
// std::vector<gpt_vocab::id> embd_inp;
|
|
||||||
|
|
||||||
// if (data.has("prompt")) {
|
|
||||||
// std::string prompt = data["prompt"].s();
|
|
||||||
// embd_inp = ::gpt_tokenize(vocab, prompt);
|
|
||||||
// }
|
|
||||||
// else {
|
|
||||||
// crow::json::rvalue input_ids = data["input_ids"];
|
|
||||||
// for (auto id : input_ids.lo()) {
|
|
||||||
// embd_inp.push_back(id.i());
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
// std::string suffix = data["suffix"].s();
|
// std::string suffix = data["suffix"].s();
|
||||||
int maxTokens = 200;
|
int maxTokens = 200;
|
||||||
if(data.has("max_tokens")){
|
if(data.has("max_tokens")){
|
||||||
@ -64,14 +86,6 @@ crow::response serve_response(TurbopilotModel *model, const crow::request& req){
|
|||||||
crow::json::wvalue::list choices = {choice};
|
crow::json::wvalue::list choices = {choice};
|
||||||
|
|
||||||
|
|
||||||
// crow::json::wvalue usage = {
|
|
||||||
// {"completion_tokens", n_past},
|
|
||||||
// // {"prompt_tokens", static_cast<std::uint64_t>(embd_inp.size())},
|
|
||||||
// {"prompt_tokens", 0},
|
|
||||||
// {"total_tokens", static_cast<std::uint64_t>(n_past - embd_inp.size())}
|
|
||||||
// };
|
|
||||||
|
|
||||||
|
|
||||||
crow::json::wvalue usage = {
|
crow::json::wvalue usage = {
|
||||||
{"completion_tokens", 0},
|
{"completion_tokens", 0},
|
||||||
// {"prompt_tokens", static_cast<std::uint64_t>(embd_inp.size())},
|
// {"prompt_tokens", static_cast<std::uint64_t>(embd_inp.size())},
|
||||||
|
Loading…
Reference in New Issue
Block a user