diff --git a/docs/GPTQ-models-(4-bit-mode).md b/docs/GPTQ-models-(4-bit-mode).md
index 679cabee..dd9a82ab 100644
--- a/docs/GPTQ-models-(4-bit-mode).md
+++ b/docs/GPTQ-models-(4-bit-mode).md
@@ -127,6 +127,8 @@ cd text-generation-webui/repositories
 git clone https://github.com/johnsmith0031/alpaca_lora_4bit
 ```
 
+⚠️  I have tested it with the following commit specifically: `9fe5ab364280325f77da15f3541960960961d144`
+
 3. Install https://github.com/sterlind/GPTQ-for-LLaMa with this command:
 
 ```
diff --git a/modules/LoRA.py b/modules/LoRA.py
index ef1e88aa..a4ebe208 100644
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@@ -7,23 +7,24 @@ import modules.shared as shared
 
 
 def add_lora_to_model(lora_names):
+    shared.lora_names = list(lora_names)
     prior_set = set(shared.lora_names)
     added_set = set(lora_names) - prior_set
     removed_set = prior_set - set(lora_names)
-    shared.lora_names = list(lora_names)
 
-    # Nothing to do = skip.
+    # If no LoRA needs to be added or removed, exit
     if len(added_set) == 0 and len(removed_set) == 0:
         return
 
-    # Only adding, and already peft? Do it the easy way.
+    # Add a LoRA when another LoRA is already present
     if len(removed_set) == 0 and len(prior_set) > 0:
         print(f"Adding the LoRA(s) named {added_set} to the model...")
         for lora in added_set:
             shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)
+
         return
 
-    # If removing anything, disable all and re-add.
+    # If any LoRA needs to be removed, start over
     if len(removed_set) > 0:
         shared.model.disable_adapter()
 
@@ -43,8 +44,7 @@ def add_lora_to_model(lora_names):
             shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)
 
         if not shared.args.load_in_8bit and not shared.args.cpu:
-            if not shared.args.monkey_patch:
-                shared.model.half()
+            shared.model.half()
             if not hasattr(shared.model, "hf_device_map"):
                 if torch.has_mps:
                     device = torch.device('mps')
diff --git a/modules/monkey_patch_gptq_lora.py b/modules/monkey_patch_gptq_lora.py
index 3e591b52..872f7ce3 100644
--- a/modules/monkey_patch_gptq_lora.py
+++ b/modules/monkey_patch_gptq_lora.py
@@ -6,6 +6,7 @@ from pathlib import Path
 sys.path.insert(0, str(Path("repositories/alpaca_lora_4bit")))
 
 import autograd_4bit
+from amp_wrapper import AMPWrapper
 from autograd_4bit import (Autograd4bitQuantLinear,
                            load_llama_model_4bit_low_ram)
 from monkeypatch.peft_tuners_lora_monkey_patch import (
@@ -31,6 +32,10 @@ def load_model_llama(model_name):
     autograd_4bit.use_new = True
     autograd_4bit.auto_switch = True
 
+    model.half()
+    wrapper = AMPWrapper(model)
+    wrapper.apply_generate()
+
     try:
         tokenizer.eos_token_id = 2
         tokenizer.bos_token_id = 1