Change rms_norm_eps to 5e-6 for llama-2-70b ggml

Based on https://github.com/ggerganov/llama.cpp/pull/2384
This commit is contained in:
oobabooga 2023-07-25 14:54:57 -07:00
parent ef8637e32d
commit 7bc408b472
2 changed files with 2 additions and 2 deletions

View File

@ -279,4 +279,4 @@ TheBloke_WizardLM-30B-GPTQ:
instruction_template: 'Llama-v2'
.*llama.*70b.*ggml.*\.bin:
n_gqa: 8
rms_norm_eps: 1.0e-5
rms_norm_eps: 5.0e-6

View File

@ -220,7 +220,7 @@ def create_model_menus():
shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=128, value=shared.args.n_gpu_layers)
shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=16384, step=256, label="n_ctx", value=shared.args.n_ctx)
shared.gradio['n_gqa'] = gr.Slider(minimum=0, maximum=16, step=1, label="n_gqa", value=shared.args.n_gqa, info='grouped-query attention. Must be 8 for llama2 70b.')
shared.gradio['rms_norm_eps'] = gr.Slider(minimum=0, maximum=1e-5, step=1e-6, label="rms_norm_eps", value=shared.args.n_gqa, info='Must be 1e-5 for llama2 70b.')
shared.gradio['rms_norm_eps'] = gr.Slider(minimum=0, maximum=1e-5, step=1e-6, label="rms_norm_eps", value=shared.args.n_gqa, info='5e-6 is a good value for llama2 70b.')
shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=str(shared.args.wbits) if shared.args.wbits > 0 else "None")
shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=str(shared.args.groupsize) if shared.args.groupsize > 0 else "None")
shared.gradio['model_type'] = gr.Dropdown(label="model_type", choices=["None", "llama", "opt", "gptj"], value=shared.args.model_type or "None")