mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-10-01 01:26:03 -04:00
Load llamacpp before quantized model (#1307)
This commit is contained in:
parent
3961f49524
commit
07de7d0426
@ -99,6 +99,16 @@ def load_model(model_name):
|
||||
|
||||
return model, tokenizer
|
||||
|
||||
# llamacpp model
|
||||
elif shared.is_llamacpp:
|
||||
from modules.llamacpp_model_alternative import LlamaCppModel
|
||||
|
||||
model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
|
||||
print(f"llama.cpp weights detected: {model_file}\n")
|
||||
|
||||
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
|
||||
return model, tokenizer
|
||||
|
||||
# Quantized model
|
||||
elif shared.args.wbits > 0:
|
||||
|
||||
@ -116,16 +126,6 @@ def load_model(model_name):
|
||||
|
||||
model = load_quantized(model_name)
|
||||
|
||||
# llamacpp model
|
||||
elif shared.is_llamacpp:
|
||||
from modules.llamacpp_model_alternative import LlamaCppModel
|
||||
|
||||
model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
|
||||
print(f"llama.cpp weights detected: {model_file}\n")
|
||||
|
||||
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
|
||||
return model, tokenizer
|
||||
|
||||
# Custom
|
||||
else:
|
||||
params = {"low_cpu_mem_usage": True}
|
||||
|
Loading…
Reference in New Issue
Block a user