Monkey patch fixes

2024-10-01 01:26:03 -04:00 · 2023-04-25 21:20:26 -03:00 · 2023-04-25 21:20:26 -03:00 · 9b272bc8e5
commit 9b272bc8e5
parent da812600f4
3 changed files with 13 additions and 6 deletions
--- a/docs/GPTQ-models-(4-bit-mode).md
+++ b/docs/GPTQ-models-(4-bit-mode).md
@ -127,6 +127,8 @@ cd text-generation-webui/repositories
 git clone https://github.com/johnsmith0031/alpaca_lora_4bit
 ```

+⚠️  I have tested it with the following commit specifically: `9fe5ab364280325f77da15f3541960960961d144`
+
 3. Install https://github.com/sterlind/GPTQ-for-LLaMa with this command:

 ```
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@ -7,23 +7,24 @@ import modules.shared as shared


 def add_lora_to_model(lora_names):
+    shared.lora_names = list(lora_names)
    prior_set = set(shared.lora_names)
    added_set = set(lora_names) - prior_set
    removed_set = prior_set - set(lora_names)
-    shared.lora_names = list(lora_names)

-    # Nothing to do = skip.
+    # If no LoRA needs to be added or removed, exit
    if len(added_set) == 0 and len(removed_set) == 0:
        return

-    # Only adding, and already peft? Do it the easy way.
+    # Add a LoRA when another LoRA is already present
    if len(removed_set) == 0 and len(prior_set) > 0:
        print(f"Adding the LoRA(s) named {added_set} to the model...")
        for lora in added_set:
            shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)
+
        return

-    # If removing anything, disable all and re-add.
+    # If any LoRA needs to be removed, start over
    if len(removed_set) > 0:
        shared.model.disable_adapter()

@ -43,8 +44,7 @@ def add_lora_to_model(lora_names):
            shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)

        if not shared.args.load_in_8bit and not shared.args.cpu:
-            if not shared.args.monkey_patch:
-                shared.model.half()
+            shared.model.half()
            if not hasattr(shared.model, "hf_device_map"):
                if torch.has_mps:
                    device = torch.device('mps')
--- a/modules/monkey_patch_gptq_lora.py
+++ b/modules/monkey_patch_gptq_lora.py
@ -6,6 +6,7 @@ from pathlib import Path
 sys.path.insert(0, str(Path("repositories/alpaca_lora_4bit")))

 import autograd_4bit
+from amp_wrapper import AMPWrapper
 from autograd_4bit import (Autograd4bitQuantLinear,
                           load_llama_model_4bit_low_ram)
 from monkeypatch.peft_tuners_lora_monkey_patch import (
@ -31,6 +32,10 @@ def load_model_llama(model_name):
    autograd_4bit.use_new = True
    autograd_4bit.auto_switch = True

+    model.half()
+    wrapper = AMPWrapper(model)
+    wrapper.apply_generate()
+
    try:
        tokenizer.eos_token_id = 2
        tokenizer.bos_token_id = 1