Load llamacpp before quantized model (#1307)

2024-10-01 01:26:03 -04:00 · 2023-04-17 06:47:26 -07:00 · 2023-04-17 06:47:26 -07:00 · 07de7d0426
commit 07de7d0426
parent 3961f49524
1 changed files with 10 additions and 10 deletions
--- a/modules/models.py
+++ b/modules/models.py
@ -99,6 +99,16 @@ def load_model(model_name):

        return model, tokenizer

+    # llamacpp model
+    elif shared.is_llamacpp:
+        from modules.llamacpp_model_alternative import LlamaCppModel
+
+        model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
+        print(f"llama.cpp weights detected: {model_file}\n")
+
+        model, tokenizer = LlamaCppModel.from_pretrained(model_file)
+        return model, tokenizer
+
    # Quantized model
    elif shared.args.wbits > 0:

@ -116,16 +126,6 @@ def load_model(model_name):

            model = load_quantized(model_name)

-    # llamacpp model
-    elif shared.is_llamacpp:
-        from modules.llamacpp_model_alternative import LlamaCppModel
-
-        model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
-        print(f"llama.cpp weights detected: {model_file}\n")
-
-        model, tokenizer = LlamaCppModel.from_pretrained(model_file)
-        return model, tokenizer
-
    # Custom
    else:
        params = {"low_cpu_mem_usage": True}