From 08c622df2e26811440f6b3311dff3553ba20dc86 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 24 Jul 2023 15:26:29 -0700 Subject: [PATCH] Autodetect rms_norm_eps and n_gqa for llama-2-70b --- models/config.yaml | 3 +++ server.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/models/config.yaml b/models/config.yaml index 1cc5605c..86d7293f 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -277,3 +277,6 @@ TheBloke_WizardLM-30B-GPTQ: .*llama-(2|v2).*chat: mode: 'instruct' instruction_template: 'Llama-v2' +.*llama.*70b.*ggml.*\.bin: + n_gqa: 8 + rms_norm_eps: 1.0e-5 diff --git a/server.py b/server.py index 97eb5509..6075dd65 100644 --- a/server.py +++ b/server.py @@ -1104,6 +1104,8 @@ if __name__ == "__main__": 'skip_special_tokens': shared.settings['skip_special_tokens'], 'custom_stopping_strings': shared.settings['custom_stopping_strings'], 'truncation_length': shared.settings['truncation_length'], + 'n_gqa': 0, + 'rms_norm_eps': 0, } shared.model_config.move_to_end('.*', last=False) # Move to the beginning