diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py
index 7167301f..f4743d40 100644
--- a/modules/llamacpp_model.py
+++ b/modules/llamacpp_model.py
@@ -64,7 +64,8 @@ class LlamaCppModel:
             else:
                 cache_capacity = int(shared.args.cache_capacity)
 
-        logger.info("Cache capacity is " + str(cache_capacity) + " bytes")
+        if cache_capacity > 0:
+            logger.info("Cache capacity is " + str(cache_capacity) + " bytes")
 
         if shared.args.tensor_split is None or shared.args.tensor_split.strip() == '':
             tensor_split_list = None
@@ -118,9 +119,7 @@ class LlamaCppModel:
                 self.grammar = None
 
     def generate(self, prompt, state, callback=None):
-
         LogitsProcessorList = llama_cpp_lib().LogitsProcessorList
-
         prompt = prompt if type(prompt) is str else prompt.decode()
 
         # Handle truncation
@@ -163,6 +162,7 @@ class LlamaCppModel:
         for completion_chunk in completion_chunks:
             if shared.stop_everything:
                 break
+
             text = completion_chunk['choices'][0]['text']
             output += text
             if callback: