Do not expose alpha_value to llama.cpp & rope_freq_base to transformers

To avoid confusion
This commit is contained in:
oobabooga 2024-06-23 22:09:24 -07:00
parent b48ab482f8
commit 536f8d58d4
5 changed files with 6 additions and 27 deletions

View File

@ -1,18 +0,0 @@
def get_alpha_value(alpha, base):
'''
Gets alpha_value from alpha_value and rope_freq_base
'''
if base > 0:
return (base / 10000.) ** (63 / 64.)
else:
return alpha
def get_rope_freq_base(alpha, base):
'''
Gets rope_freq_base from alpha_value and rope_freq_base
'''
if base > 0:
return base
else:
return 10000 * alpha ** (64 / 63.)

View File

@ -7,7 +7,7 @@ from torch.nn import CrossEntropyLoss
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
from transformers.modeling_outputs import CausalLMOutputWithPast from transformers.modeling_outputs import CausalLMOutputWithPast
from modules import RoPE, llama_cpp_python_hijack, shared from modules import llama_cpp_python_hijack, shared
from modules.logging_colors import logger from modules.logging_colors import logger
try: try:
@ -212,7 +212,7 @@ class LlamacppHF(PreTrainedModel):
'mul_mat_q': not shared.args.no_mul_mat_q, 'mul_mat_q': not shared.args.no_mul_mat_q,
'numa': shared.args.numa, 'numa': shared.args.numa,
'n_gpu_layers': shared.args.n_gpu_layers, 'n_gpu_layers': shared.args.n_gpu_layers,
'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base), 'rope_freq_base': shared.args.rope_freq_base,
'tensor_split': tensor_split_list, 'tensor_split': tensor_split_list,
'rope_freq_scale': 1.0 / shared.args.compress_pos_emb, 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb,
'logits_all': shared.args.logits_all, 'logits_all': shared.args.logits_all,

View File

@ -4,7 +4,7 @@ from functools import partial
import numpy as np import numpy as np
import torch import torch
from modules import RoPE, llama_cpp_python_hijack, shared from modules import llama_cpp_python_hijack, shared
from modules.callbacks import Iteratorize from modules.callbacks import Iteratorize
from modules.logging_colors import logger from modules.logging_colors import logger
from modules.text_generation import get_max_prompt_length from modules.text_generation import get_max_prompt_length
@ -92,7 +92,7 @@ class LlamaCppModel:
'mul_mat_q': not shared.args.no_mul_mat_q, 'mul_mat_q': not shared.args.no_mul_mat_q,
'numa': shared.args.numa, 'numa': shared.args.numa,
'n_gpu_layers': shared.args.n_gpu_layers, 'n_gpu_layers': shared.args.n_gpu_layers,
'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base), 'rope_freq_base': shared.args.rope_freq_base,
'tensor_split': tensor_split_list, 'tensor_split': tensor_split_list,
'rope_freq_scale': 1.0 / shared.args.compress_pos_emb, 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb,
'offload_kqv': not shared.args.no_offload_kqv, 'offload_kqv': not shared.args.no_offload_kqv,

View File

@ -22,7 +22,6 @@ loaders_and_params = OrderedDict({
'no_use_fast', 'no_use_fast',
'use_flash_attention_2', 'use_flash_attention_2',
'alpha_value', 'alpha_value',
'rope_freq_base',
'compress_pos_emb', 'compress_pos_emb',
'disable_exllama', 'disable_exllama',
'disable_exllamav2', 'disable_exllamav2',
@ -38,7 +37,6 @@ loaders_and_params = OrderedDict({
'no_mmap', 'no_mmap',
'mlock', 'mlock',
'no_mul_mat_q', 'no_mul_mat_q',
'alpha_value',
'rope_freq_base', 'rope_freq_base',
'compress_pos_emb', 'compress_pos_emb',
'cpu', 'cpu',
@ -60,7 +58,6 @@ loaders_and_params = OrderedDict({
'no_mmap', 'no_mmap',
'mlock', 'mlock',
'no_mul_mat_q', 'no_mul_mat_q',
'alpha_value',
'rope_freq_base', 'rope_freq_base',
'compress_pos_emb', 'compress_pos_emb',
'cpu', 'cpu',

View File

@ -25,7 +25,7 @@ from transformers import (
) )
import modules.shared as shared import modules.shared as shared
from modules import RoPE, sampler_hijack from modules import sampler_hijack
from modules.logging_colors import logger from modules.logging_colors import logger
from modules.models_settings import get_model_metadata from modules.models_settings import get_model_metadata
@ -248,7 +248,7 @@ def huggingface_loader(model_name):
if shared.args.compress_pos_emb > 1: if shared.args.compress_pos_emb > 1:
params['rope_scaling'] = {'type': 'linear', 'factor': shared.args.compress_pos_emb} params['rope_scaling'] = {'type': 'linear', 'factor': shared.args.compress_pos_emb}
elif shared.args.alpha_value > 1: elif shared.args.alpha_value > 1:
params['rope_scaling'] = {'type': 'dynamic', 'factor': RoPE.get_alpha_value(shared.args.alpha_value, shared.args.rope_freq_base)} params['rope_scaling'] = {'type': 'dynamic', 'factor': shared.args.alpha_value}
logger.info("TRANSFORMERS_PARAMS=") logger.info("TRANSFORMERS_PARAMS=")
pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(params) pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(params)