Monkey patch fixes

This commit is contained in:
oobabooga 2023-04-25 21:20:26 -03:00
parent da812600f4
commit 9b272bc8e5
3 changed files with 13 additions and 6 deletions

View File

@ -127,6 +127,8 @@ cd text-generation-webui/repositories
git clone https://github.com/johnsmith0031/alpaca_lora_4bit
```
⚠️ I have tested it with the following commit specifically: `9fe5ab364280325f77da15f3541960960961d144`
3. Install https://github.com/sterlind/GPTQ-for-LLaMa with this command:
```

View File

@ -7,23 +7,24 @@ import modules.shared as shared
def add_lora_to_model(lora_names):
shared.lora_names = list(lora_names)
prior_set = set(shared.lora_names)
added_set = set(lora_names) - prior_set
removed_set = prior_set - set(lora_names)
shared.lora_names = list(lora_names)
# Nothing to do = skip.
# If no LoRA needs to be added or removed, exit
if len(added_set) == 0 and len(removed_set) == 0:
return
# Only adding, and already peft? Do it the easy way.
# Add a LoRA when another LoRA is already present
if len(removed_set) == 0 and len(prior_set) > 0:
print(f"Adding the LoRA(s) named {added_set} to the model...")
for lora in added_set:
shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)
return
# If removing anything, disable all and re-add.
# If any LoRA needs to be removed, start over
if len(removed_set) > 0:
shared.model.disable_adapter()
@ -43,8 +44,7 @@ def add_lora_to_model(lora_names):
shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)
if not shared.args.load_in_8bit and not shared.args.cpu:
if not shared.args.monkey_patch:
shared.model.half()
shared.model.half()
if not hasattr(shared.model, "hf_device_map"):
if torch.has_mps:
device = torch.device('mps')

View File

@ -6,6 +6,7 @@ from pathlib import Path
sys.path.insert(0, str(Path("repositories/alpaca_lora_4bit")))
import autograd_4bit
from amp_wrapper import AMPWrapper
from autograd_4bit import (Autograd4bitQuantLinear,
load_llama_model_4bit_low_ram)
from monkeypatch.peft_tuners_lora_monkey_patch import (
@ -31,6 +32,10 @@ def load_model_llama(model_name):
autograd_4bit.use_new = True
autograd_4bit.auto_switch = True
model.half()
wrapper = AMPWrapper(model)
wrapper.apply_generate()
try:
tokenizer.eos_token_id = 2
tokenizer.bos_token_id = 1