From 08c622df2e26811440f6b3311dff3553ba20dc86 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 24 Jul 2023 15:26:29 -0700
Subject: [PATCH] Autodetect rms_norm_eps and n_gqa for llama-2-70b

---
 models/config.yaml | 3 +++
 server.py          | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/models/config.yaml b/models/config.yaml
index 1cc5605c..86d7293f 100644
--- a/models/config.yaml
+++ b/models/config.yaml
@@ -277,3 +277,6 @@ TheBloke_WizardLM-30B-GPTQ:
 .*llama-(2|v2).*chat:
   mode: 'instruct'
   instruction_template: 'Llama-v2'
+.*llama.*70b.*ggml.*\.bin:
+  n_gqa: 8
+  rms_norm_eps: 1.0e-5
diff --git a/server.py b/server.py
index 97eb5509..6075dd65 100644
--- a/server.py
+++ b/server.py
@@ -1104,6 +1104,8 @@ if __name__ == "__main__":
         'skip_special_tokens': shared.settings['skip_special_tokens'],
         'custom_stopping_strings': shared.settings['custom_stopping_strings'],
         'truncation_length': shared.settings['truncation_length'],
+        'n_gqa': 0,
+        'rms_norm_eps': 0,
     }
 
     shared.model_config.move_to_end('.*', last=False)  # Move to the beginning