2023-03-13 17:34:26 -04:00
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import torch
import torch . nn as nn
import bitsandbytes as bnb
from datasets import load_dataset
import transformers
from transformers import AutoTokenizer , AutoConfig , LLaMAForCausalLM , LLaMATokenizer
from peft import prepare_model_for_int8_training , LoraConfig , get_peft_model
model = LLaMAForCausalLM . from_pretrained (
2023-03-13 20:23:29 -04:00
" decapoda-research/llama-7b-hf " ,
2023-03-13 17:34:26 -04:00
load_in_8bit = True ,
device_map = " auto " ,
)
2023-03-14 00:52:06 -04:00
tokenizer = LLaMATokenizer . from_pretrained (
" decapoda-research/llama-7b-hf " , add_eos_token = True
)
2023-03-13 17:34:26 -04:00
model = prepare_model_for_int8_training ( model )
config = LoraConfig (
r = 4 ,
lora_alpha = 16 ,
target_modules = [ " q_proj " , " v_proj " ] ,
lora_dropout = 0.05 ,
bias = " none " ,
task_type = " CAUSAL_LM " ,
)
model = get_peft_model ( model , config )
2023-03-14 00:52:06 -04:00
tokenizer . pad_token_id = 0 # unk. we want this to be different from the eos token
2023-03-13 17:34:26 -04:00
data = load_dataset ( " json " , data_files = " alpaca_data.json " )
def generate_prompt ( data_point ) :
# sorry about the formatting disaster gotta move fast
2023-03-14 10:14:37 -04:00
if data_point [ " input " ] :
2023-03-13 17:34:26 -04:00
return f """ Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{ data_point [ " instruction " ] }
### Input:
{ data_point [ " input " ] }
2023-03-14 00:52:06 -04:00
### Response:
{ data_point [ " output " ] } """
2023-03-13 17:34:26 -04:00
else :
return f """ Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{ data_point [ " instruction " ] }
2023-03-14 00:52:06 -04:00
### Response:
{ data_point [ " output " ] } """
2023-03-13 17:34:26 -04:00
2023-03-14 00:52:06 -04:00
# optimized for RTX 4090. for larger GPUs, increase some of these?
MICRO_BATCH_SIZE = 4 # this could actually be 5 but i like powers of 2
BATCH_SIZE = 128
2023-03-13 20:23:29 -04:00
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE / / MICRO_BATCH_SIZE
2023-03-14 18:10:33 -04:00
EPOCHS = 3 # we don't need 3 tbh
LEARNING_RATE = 2e-5 # from the original paper
2023-03-14 00:52:06 -04:00
CUTOFF_LEN = 256 # 256 accounts for about 96% of the data
2023-03-13 20:23:29 -04:00
2023-03-14 00:52:06 -04:00
data = data . shuffle ( ) . map (
2023-03-13 17:34:26 -04:00
lambda data_point : tokenizer (
generate_prompt ( data_point ) ,
truncation = True ,
2023-03-13 20:23:29 -04:00
max_length = CUTOFF_LEN ,
2023-03-13 17:34:26 -04:00
padding = " max_length " ,
)
)
trainer = transformers . Trainer (
model = model ,
train_dataset = data [ " train " ] ,
args = transformers . TrainingArguments (
per_device_train_batch_size = MICRO_BATCH_SIZE ,
gradient_accumulation_steps = GRADIENT_ACCUMULATION_STEPS ,
warmup_steps = 100 ,
num_train_epochs = EPOCHS ,
learning_rate = LEARNING_RATE ,
fp16 = True ,
logging_steps = 1 ,
output_dir = " lora-alpaca " ,
save_total_limit = 3 ,
) ,
data_collator = transformers . DataCollatorForLanguageModeling ( tokenizer , mlm = False ) ,
)
model . config . use_cache = False
trainer . train ( resume_from_checkpoint = False )
model . save_pretrained ( " lora-alpaca " )