Use ExLlama_HF for GPTQ models by default

This commit is contained in:
oobabooga 2023-10-21 20:45:38 -07:00
parent 619093483e
commit 09f807af83

View File

@ -110,7 +110,7 @@ def infer_loader(model_name, model_settings):
if not path_to_model.exists(): if not path_to_model.exists():
loader = None loader = None
elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and type(model_settings['wbits']) is int and model_settings['wbits'] > 0): elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and type(model_settings['wbits']) is int and model_settings['wbits'] > 0):
loader = 'AutoGPTQ' loader = 'ExLlama_HF'
elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()): elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()):
loader = 'AutoAWQ' loader = 'AutoAWQ'
elif len(list(path_to_model.glob('*.gguf'))) > 0: elif len(list(path_to_model.glob('*.gguf'))) > 0: