mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-10-01 01:26:03 -04:00
Monkey patch fixes
This commit is contained in:
parent
da812600f4
commit
9b272bc8e5
@ -127,6 +127,8 @@ cd text-generation-webui/repositories
|
||||
git clone https://github.com/johnsmith0031/alpaca_lora_4bit
|
||||
```
|
||||
|
||||
⚠️ I have tested it with the following commit specifically: `9fe5ab364280325f77da15f3541960960961d144`
|
||||
|
||||
3. Install https://github.com/sterlind/GPTQ-for-LLaMa with this command:
|
||||
|
||||
```
|
||||
|
@ -7,23 +7,24 @@ import modules.shared as shared
|
||||
|
||||
|
||||
def add_lora_to_model(lora_names):
|
||||
shared.lora_names = list(lora_names)
|
||||
prior_set = set(shared.lora_names)
|
||||
added_set = set(lora_names) - prior_set
|
||||
removed_set = prior_set - set(lora_names)
|
||||
shared.lora_names = list(lora_names)
|
||||
|
||||
# Nothing to do = skip.
|
||||
# If no LoRA needs to be added or removed, exit
|
||||
if len(added_set) == 0 and len(removed_set) == 0:
|
||||
return
|
||||
|
||||
# Only adding, and already peft? Do it the easy way.
|
||||
# Add a LoRA when another LoRA is already present
|
||||
if len(removed_set) == 0 and len(prior_set) > 0:
|
||||
print(f"Adding the LoRA(s) named {added_set} to the model...")
|
||||
for lora in added_set:
|
||||
shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)
|
||||
|
||||
return
|
||||
|
||||
# If removing anything, disable all and re-add.
|
||||
# If any LoRA needs to be removed, start over
|
||||
if len(removed_set) > 0:
|
||||
shared.model.disable_adapter()
|
||||
|
||||
@ -43,7 +44,6 @@ def add_lora_to_model(lora_names):
|
||||
shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)
|
||||
|
||||
if not shared.args.load_in_8bit and not shared.args.cpu:
|
||||
if not shared.args.monkey_patch:
|
||||
shared.model.half()
|
||||
if not hasattr(shared.model, "hf_device_map"):
|
||||
if torch.has_mps:
|
||||
|
@ -6,6 +6,7 @@ from pathlib import Path
|
||||
sys.path.insert(0, str(Path("repositories/alpaca_lora_4bit")))
|
||||
|
||||
import autograd_4bit
|
||||
from amp_wrapper import AMPWrapper
|
||||
from autograd_4bit import (Autograd4bitQuantLinear,
|
||||
load_llama_model_4bit_low_ram)
|
||||
from monkeypatch.peft_tuners_lora_monkey_patch import (
|
||||
@ -31,6 +32,10 @@ def load_model_llama(model_name):
|
||||
autograd_4bit.use_new = True
|
||||
autograd_4bit.auto_switch = True
|
||||
|
||||
model.half()
|
||||
wrapper = AMPWrapper(model)
|
||||
wrapper.apply_generate()
|
||||
|
||||
try:
|
||||
tokenizer.eos_token_id = 2
|
||||
tokenizer.bos_token_id = 1
|
||||
|
Loading…
Reference in New Issue
Block a user