Lora fixes for AutoGPTQ (#2818)

2024-09-20 00:07:06 +00:00 · 2023-07-09 04:03:43 +00:00 · 2023-07-09 04:03:43 +00:00 · 74ea7522a0
commit 74ea7522a0
parent 70b088843d
1 changed files with 6 additions and 5 deletions
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@ -9,9 +9,9 @@ from modules.models import reload_model


 def add_lora_to_model(lora_names):
-    if 'GPTQForCausalLM' in shared.model.__class__.__name__:
+    if 'GPTQForCausalLM' in shared.model.__class__.__name__ or shared.args.loader == 'AutoGPTQ':
        add_lora_autogptq(lora_names)
-    elif shared.model.__class__.__name__ in ['ExllamaModel', 'ExllamaHF']:
+    elif shared.model.__class__.__name__ in ['ExllamaModel', 'ExllamaHF'] or shared.args.loader == 'ExLlama':
        add_lora_exllama(lora_names)
    else:
        add_lora_transformers(lora_names)
@ -66,15 +66,16 @@ def add_lora_autogptq(lora_names):
        logger.error("This version of AutoGPTQ does not support LoRA. You need to install from source or wait for a new release.")
        return

-    if len(lora_names) == 0:
-        if len(shared.lora_names) > 0:
-            reload_model()
+    if len(lora_names) == 0:        
+        reload_model()

        shared.lora_names = []
        return
    else:
        if len(lora_names) > 1:
            logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
+        if not shared.args.no_inject_fused_attention:
+            logger.warning('Fused Atttention + AutoGPTQ may break Lora loading. Disable it.')

        peft_config = GPTQLoraConfig(
            inference_mode=True,