diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py index bcb537fa..a2dcb34b 100644 --- a/modules/llamacpp_hf.py +++ b/modules/llamacpp_hf.py @@ -203,10 +203,15 @@ class LlamacppHF(PreTrainedModel): 'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base), 'tensor_split': tensor_split_list, 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb, - 'n_gqa': shared.args.n_gqa or None, - 'rms_norm_eps': shared.args.rms_norm_eps or None, 'logits_all': True, } + + if not is_gguf(model_file): + ggml_params = { + 'n_gqa': shared.args.n_gqa or None, + 'rms_norm_eps': shared.args.rms_norm_eps or None, + } + params = params | ggml_params Llama = llama_cpp_lib(model_file).Llama model = Llama(**params) diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index c3c41541..4908ecb7 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -92,9 +92,14 @@ class LlamaCppModel: 'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base), 'tensor_split': tensor_split_list, 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb, - 'n_gqa': shared.args.n_gqa or None, - 'rms_norm_eps': shared.args.rms_norm_eps or None, } + + if not is_gguf(str(path)): + ggml_params = { + 'n_gqa': shared.args.n_gqa or None, + 'rms_norm_eps': shared.args.rms_norm_eps or None, + } + params = params | ggml_params result.model = Llama(**params) if cache_capacity > 0: