diff --git a/modules/exllama.py b/modules/exllama.py index 7df1d321..12c4c02c 100644 --- a/modules/exllama.py +++ b/modules/exllama.py @@ -56,9 +56,11 @@ class ExllamaModel: config.set_auto_map(shared.args.gpu_split) config.gpu_peer_fix = True - if shared.args.alpha_value > 1 or shared.args.rope_freq_base > 0: - config.alpha_value = RoPE.get_alpha_value(shared.args.alpha_value, shared.args.rope_freq_base) + if shared.args.alpha_value > 1 and shared.args.rope_freq_base == 0: + config.alpha_value = shared.args.alpha_value config.calculate_rotary_embedding_base() + elif shared.args.rope_freq_base > 0: + config.rotary_embedding_base = shared.args.rope_freq_base if torch_version.hip: config.rmsnorm_no_half2 = True diff --git a/modules/exllama_hf.py b/modules/exllama_hf.py index eab92644..81ba1cb7 100644 --- a/modules/exllama_hf.py +++ b/modules/exllama_hf.py @@ -134,9 +134,11 @@ class ExllamaHF(PreTrainedModel): config.set_auto_map(shared.args.gpu_split) config.gpu_peer_fix = True - if shared.args.alpha_value > 1 or shared.args.rope_freq_base > 0: - config.alpha_value = RoPE.get_alpha_value(shared.args.alpha_value, shared.args.rope_freq_base) + if shared.args.alpha_value > 1 and shared.args.rope_freq_base == 0: + config.alpha_value = shared.args.alpha_value config.calculate_rotary_embedding_base() + elif shared.args.rope_freq_base > 0: + config.rotary_embedding_base = shared.args.rope_freq_base if torch.version.hip: config.rmsnorm_no_half2 = True