Merge remote-tracking branch 'refs/remotes/origin/main'

This commit is contained in:
oobabooga 2023-07-06 22:25:43 -07:00
commit de994331a4
3 changed files with 21 additions and 10 deletions

View File

@ -114,11 +114,12 @@ def add_lora_transformers(lora_names):
if len(lora_names) > 0: if len(lora_names) > 0:
params = {} params = {}
if not shared.args.cpu: if not shared.args.cpu:
params['dtype'] = shared.model.dtype if shared.args.load_in_4bit or shared.args.load_in_8bit:
if hasattr(shared.model, "hf_device_map"): params['peft_type'] = shared.model.dtype
params['device_map'] = {"base_model.model." + k: v for k, v in shared.model.hf_device_map.items()} else:
elif shared.args.load_in_8bit: params['dtype'] = shared.model.dtype
params['device_map'] = {'': 0} if hasattr(shared.model, "hf_device_map"):
params['device_map'] = {"base_model.model." + k: v for k, v in shared.model.hf_device_map.items()}
logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join(lora_names))) logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join(lora_names)))
shared.model = PeftModel.from_pretrained(shared.model, Path(f"{shared.args.lora_dir}/{lora_names[0]}"), adapter_name=lora_names[0], **params) shared.model = PeftModel.from_pretrained(shared.model, Path(f"{shared.args.lora_dir}/{lora_names[0]}"), adapter_name=lora_names[0], **params)

View File

@ -95,11 +95,18 @@ def load_tokenizer(model_name, model):
if any(s in model_name.lower() for s in ['gpt-4chan', 'gpt4chan']) and Path(f"{shared.args.model_dir}/gpt-j-6B/").exists(): if any(s in model_name.lower() for s in ['gpt-4chan', 'gpt4chan']) and Path(f"{shared.args.model_dir}/gpt-j-6B/").exists():
tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/gpt-j-6B/")) tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/gpt-j-6B/"))
elif path_to_model.exists(): elif path_to_model.exists():
tokenizer = AutoTokenizer.from_pretrained( try:
path_to_model, tokenizer = AutoTokenizer.from_pretrained(
trust_remote_code=shared.args.trust_remote_code, path_to_model,
use_fast=False trust_remote_code=shared.args.trust_remote_code,
) use_fast=False
)
except ValueError:
tokenizer = AutoTokenizer.from_pretrained(
path_to_model,
trust_remote_code=shared.args.trust_remote_code,
use_fast=True
)
if tokenizer.__class__.__name__ == 'LlamaTokenizer': if tokenizer.__class__.__name__ == 'LlamaTokenizer':
pairs = [ pairs = [

View File

@ -0,0 +1,3 @@
{
"instruction,output": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n\nUSER: %instruction%\n\nASSISTANT: %output%"
}