diff --git a/include/turbopilot/gptj.hpp b/include/turbopilot/gptj.hpp
index 9f1798b..c2acc39 100644
--- a/include/turbopilot/gptj.hpp
+++ b/include/turbopilot/gptj.hpp
@@ -71,7 +71,7 @@ public:
     }
     virtual ~GPTJModel();
     bool load_model(std::string path);
-    virtual std::stringstream predict(std::string prompt, int max_length, bool include_prompt);
+    virtual std::stringstream predict_impl(std::string prompt, int max_length, bool include_prompt);
 
 private:
     gptj_model *model = NULL;
diff --git a/include/turbopilot/gptneox.hpp b/include/turbopilot/gptneox.hpp
index 66aeb95..78c3ed1 100644
--- a/include/turbopilot/gptneox.hpp
+++ b/include/turbopilot/gptneox.hpp
@@ -75,7 +75,7 @@ public:
     }
     virtual ~GPTNEOXModel();
     bool load_model(std::string path);
-    virtual std::stringstream predict(std::string prompt, int max_length, bool include_prompt);
+    virtual std::stringstream predict_impl(std::string prompt, int max_length, bool include_prompt);
 
 private:
     gpt_neox_model *model = NULL;
diff --git a/include/turbopilot/model.hpp b/include/turbopilot/model.hpp
index 2849d08..3fcf28a 100644
--- a/include/turbopilot/model.hpp
+++ b/include/turbopilot/model.hpp
@@ -7,6 +7,7 @@
 #include <map>
 #include <vector>
 #include <random>
+#include <boost/thread.hpp>
 
 typedef void (*offload_func_t)(struct ggml_tensor * tensor);
 void ggml_nop(struct ggml_tensor * tensor);
@@ -54,11 +55,16 @@ public:
         rng(rng) 
     {}
     virtual bool load_model(std::string model_path) = 0;
-    virtual std::stringstream predict(std::string prompt, int max_length, bool include_prompt) = 0;
+    std::stringstream predict(std::string prompt, int max_length, bool include_prompt);
+    void lock();
+    void unlock();
+
 
 protected:
+    virtual std::stringstream predict_impl(std::string prompt, int max_length, bool include_prompt) = 0;
     ModelConfig config;
     std::mt19937 &rng;
+    boost::mutex model_lock;
 };
 
 #endif //__TURBOPILOT_MODEL_H
\ No newline at end of file
diff --git a/include/turbopilot/server.hpp b/include/turbopilot/server.hpp
index 7000544..18d30bd 100644
--- a/include/turbopilot/server.hpp
+++ b/include/turbopilot/server.hpp
@@ -2,6 +2,8 @@
 #define __TURBOPILOT_SERVER_H
 
 
+#include <spdlog/spdlog.h>
+
 #include "turbopilot/model.hpp"
 
 #include "crow_all.h"
@@ -10,6 +12,46 @@ crow::response handle_openai_request(TurbopilotModel *model, const crow::request
 
 crow::response handle_hf_request(TurbopilotModel *model, const crow::request& req);
 
+class TBPLogger : public crow::ILogHandler {
+ public:
+  TBPLogger() {}
+  void log(std::string message, crow::LogLevel crow_level) {
+    // "message" doesn't contain the timestamp and loglevel
+    // prefix the default logger does and it doesn't end
+    // in a newline.
+
+    spdlog::level::level_enum level = spdlog::level::info;
+
+    switch(crow_level){
+        case crow::LogLevel::Critical:
+            level = spdlog::level::critical;
+            break;
+
+        case crow::LogLevel::Error:
+            level = spdlog::level::err;
+            break;
+
+        case crow::LogLevel::Warning:
+            level = spdlog::level::warn;
+            break;
+
+        case crow::LogLevel::Info:
+            level = spdlog::level::info;
+            break;
+
+        case crow::LogLevel::Debug:
+            level = spdlog::level::debug;
+            break;
+
+        default:
+            // if case is not a known value, assume the worst
+            level = spdlog::level::critical;
+    }
+
+    spdlog::log(level, message);
+  }
+};
+
 
 #endif // __TURBOPILOT_SERVER_H
 
diff --git a/include/turbopilot/starcoder.hpp b/include/turbopilot/starcoder.hpp
index f5b7344..1ee94a0 100644
--- a/include/turbopilot/starcoder.hpp
+++ b/include/turbopilot/starcoder.hpp
@@ -68,7 +68,7 @@ public:
     }
     virtual ~StarcoderModel();
     bool load_model(std::string path);
-    virtual std::stringstream predict(std::string prompt, int max_length, bool include_prompt);
+    virtual std::stringstream predict_impl(std::string prompt, int max_length, bool include_prompt);
 
 private:
     starcoder_model *model = NULL;
diff --git a/src/common.cpp b/src/common.cpp
index 2439c56..6435e48 100644
--- a/src/common.cpp
+++ b/src/common.cpp
@@ -4,6 +4,22 @@
 #include <cmath>
 #include <random>
 
+
+void TurbopilotModel::lock(){
+    this->model_lock.lock();
+}
+
+void TurbopilotModel::unlock(){
+    this->model_lock.unlock();
+}
+
+std::stringstream TurbopilotModel::predict(std::string prompt, int max_length, bool include_prompt){
+    lock();
+    auto result = predict_impl(prompt, max_length, include_prompt);
+    unlock();
+    return result;
+}
+
 void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
     (void) tensor;
 }
@@ -163,4 +179,6 @@ gpt_vocab::id gpt_sample_top_k_top_p(
     int idx = dist(rng);
 
     return logits_id[idx].second;
-}
\ No newline at end of file
+}
+
+
diff --git a/src/gptj.cpp b/src/gptj.cpp
index f00e107..80aaeac 100644
--- a/src/gptj.cpp
+++ b/src/gptj.cpp
@@ -556,7 +556,7 @@ bool GPTJModel::load_model(std::string fname) {
     return true;
 }
 
-std::stringstream GPTJModel::predict(std::string prompt, int max_length, bool include_prompt) {
+std::stringstream GPTJModel::predict_impl(std::string prompt, int max_length, bool include_prompt) {
 
     std::stringstream result;
     // tokenize the prompt
diff --git a/src/gptneox.cpp b/src/gptneox.cpp
index 51665c7..fa7b08b 100644
--- a/src/gptneox.cpp
+++ b/src/gptneox.cpp
@@ -91,6 +91,7 @@ bool gpt_neox_eval(
         const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead
         //printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
 
+        
         // reallocate
         buf_size = buf_size_new;
         buf = realloc(buf, buf_size);
@@ -98,6 +99,8 @@ bool gpt_neox_eval(
             fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
             return false;
         }
+
+        spdlog::debug("{}: reallocating context buffer {} -> now {} bytes of tokens in prompt = {}", __func__, buf_size, buf_size_new);
     }
 
     struct ggml_init_params params = {
@@ -283,6 +286,7 @@ bool gpt_neox_eval(
     //    ggml_graph_print   (&gf);
     //    ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot");
     //}
+    
 
     //embd_w.resize(n_vocab*N);
     //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N);
@@ -293,7 +297,9 @@ bool gpt_neox_eval(
 
     if (mem_per_token == 0) {
         mem_per_token = ggml_used_mem(ctx0)/N;
+        
     }
+    spdlog::debug("used_mem = {}\n", ggml_used_mem(ctx0));
     //printf("used_mem = %zu\n", ggml_used_mem(ctx0));
 
     ggml_free(ctx0);
@@ -612,7 +618,7 @@ bool GPTNEOXModel::load_model(std::string fname) {
     return true;
 }
 
-std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool include_prompt) {
+std::stringstream GPTNEOXModel::predict_impl(std::string prompt, int max_length, bool include_prompt) {
 
     std::stringstream result;
     // tokenize the prompt
@@ -631,6 +637,8 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool
 
     std::vector<gpt_vocab::id> embd;
 
+
+
     // determine the required inference memory per token:
     size_t mem_per_token = 0;
 
@@ -717,3 +725,4 @@ std::stringstream GPTNEOXModel::predict(std::string prompt, int max_length, bool
 
     return result;
 }
+
diff --git a/src/main.cpp b/src/main.cpp
index c6ef84c..04c1fb3 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -121,12 +121,17 @@ int main(int argc, char **argv)
     }
 
     t_load_us = ggml_time_us() - t_start_us;
+    
 
     spdlog::info("Loaded model in {:0.2f}ms", t_load_us/1000.0f);
 
 
     crow::SimpleApp app;
 
+    TBPLogger logger;
+
+    crow::logger::setHandler(&logger);
+
     CROW_ROUTE(app, "/")([](){
         return "Hello world";
     });
@@ -168,5 +173,7 @@ int main(int argc, char **argv)
 
     app.port(program.get<int>("--port")).multithreaded().run();
 
+    
+
     free(model);
 }
\ No newline at end of file
diff --git a/src/server.cpp b/src/server.cpp
index b373137..80a4235 100644
--- a/src/server.cpp
+++ b/src/server.cpp
@@ -37,7 +37,6 @@ crow::response handle_hf_request(TurbopilotModel *model, const crow::request& re
         crow::json::wvalue response = {
             {"generated_text", result.str()},
         };
-        
 
         
         crow::response res;
diff --git a/src/starcoder.cpp b/src/starcoder.cpp
index c196f3c..5cbd425 100644
--- a/src/starcoder.cpp
+++ b/src/starcoder.cpp
@@ -44,13 +44,13 @@ bool starcoder_eval(
 
     if (mem_per_token > 0 && mem_per_token*N > buf_size) {
         const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead
-        //printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
+        spdlog::debug("{}: reallocating buffer from {} to {} bytes\n", __func__, buf_size, buf_size_new);
 
         // reallocate
         buf_size = buf_size_new;
         buf = realloc(buf, buf_size);
         if (buf == nullptr) {
-            fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
+            spdlog::error("{}: failed to allocate {} bytes\n", __func__, buf_size);
             return false;
         }
     }
@@ -681,7 +681,7 @@ bool StarcoderModel::load_model(std::string fname) {
 }
 
 
-std::stringstream StarcoderModel::predict(std::string prompt, int max_length, bool include_prompt) {
+std::stringstream StarcoderModel::predict_impl(std::string prompt, int max_length, bool include_prompt) {
 
     std::stringstream result;
     // tokenize the prompt