diff --git a/models/config.yaml b/models/config.yaml index 86d7293f..7343a0b1 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -279,4 +279,4 @@ TheBloke_WizardLM-30B-GPTQ: instruction_template: 'Llama-v2' .*llama.*70b.*ggml.*\.bin: n_gqa: 8 - rms_norm_eps: 1.0e-5 + rms_norm_eps: 5.0e-6 diff --git a/server.py b/server.py index 6075dd65..9c86d2dc 100644 --- a/server.py +++ b/server.py @@ -220,7 +220,7 @@ def create_model_menus(): shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=128, value=shared.args.n_gpu_layers) shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=16384, step=256, label="n_ctx", value=shared.args.n_ctx) shared.gradio['n_gqa'] = gr.Slider(minimum=0, maximum=16, step=1, label="n_gqa", value=shared.args.n_gqa, info='grouped-query attention. Must be 8 for llama2 70b.') - shared.gradio['rms_norm_eps'] = gr.Slider(minimum=0, maximum=1e-5, step=1e-6, label="rms_norm_eps", value=shared.args.n_gqa, info='Must be 1e-5 for llama2 70b.') + shared.gradio['rms_norm_eps'] = gr.Slider(minimum=0, maximum=1e-5, step=1e-6, label="rms_norm_eps", value=shared.args.n_gqa, info='5e-6 is a good value for llama2 70b.') shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=str(shared.args.wbits) if shared.args.wbits > 0 else "None") shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=str(shared.args.groupsize) if shared.args.groupsize > 0 else "None") shared.gradio['model_type'] = gr.Dropdown(label="model_type", choices=["None", "llama", "opt", "gptj"], value=shared.args.model_type or "None")