2023-03-13 17:34:26 -04:00
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import torch
import torch . nn as nn
import bitsandbytes as bnb
from datasets import load_dataset
import transformers
from transformers import AutoTokenizer , AutoConfig , LLaMAForCausalLM , LLaMATokenizer
from peft import prepare_model_for_int8_training , LoraConfig , get_peft_model
2023-03-14 19:30:38 -04:00
# optimized for RTX 4090. for larger GPUs, increase some of these?
MICRO_BATCH_SIZE = 4 # this could actually be 5 but i like powers of 2
BATCH_SIZE = 128
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE / / MICRO_BATCH_SIZE
EPOCHS = 3 # we don't need 3 tbh
2023-03-15 00:33:07 -04:00
LEARNING_RATE = 3e-4 # the Karpathy constant
2023-03-14 19:30:38 -04:00
CUTOFF_LEN = 256 # 256 accounts for about 96% of the data
2023-03-16 01:52:54 -04:00
LORA_R = 8
2023-03-14 19:30:38 -04:00
LORA_ALPHA = 16
LORA_DROPOUT = 0.05
2023-03-13 17:34:26 -04:00
model = LLaMAForCausalLM . from_pretrained (
2023-03-13 20:23:29 -04:00
" decapoda-research/llama-7b-hf " ,
2023-03-13 17:34:26 -04:00
load_in_8bit = True ,
device_map = " auto " ,
)
2023-03-14 00:52:06 -04:00
tokenizer = LLaMATokenizer . from_pretrained (
2023-03-16 01:52:54 -04:00
" decapoda-research/llama-7b-hf " , add_eos_token = True
2023-03-14 00:52:06 -04:00
)
2023-03-13 17:34:26 -04:00
model = prepare_model_for_int8_training ( model )
config = LoraConfig (
2023-03-14 19:30:38 -04:00
r = LORA_R ,
lora_alpha = LORA_ALPHA ,
2023-03-13 17:34:26 -04:00
target_modules = [ " q_proj " , " v_proj " ] ,
2023-03-14 19:30:38 -04:00
lora_dropout = LORA_DROPOUT ,
2023-03-13 17:34:26 -04:00
bias = " none " ,
task_type = " CAUSAL_LM " ,
)
model = get_peft_model ( model , config )
2023-03-14 00:52:06 -04:00
tokenizer . pad_token_id = 0 # unk. we want this to be different from the eos token
2023-03-13 17:34:26 -04:00
data = load_dataset ( " json " , data_files = " alpaca_data.json " )
def generate_prompt ( data_point ) :
# sorry about the formatting disaster gotta move fast
2023-03-14 10:14:37 -04:00
if data_point [ " input " ] :
2023-03-13 17:34:26 -04:00
return f """ Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{ data_point [ " instruction " ] }
### Input:
{ data_point [ " input " ] }
2023-03-14 00:52:06 -04:00
### Response:
{ data_point [ " output " ] } """
2023-03-13 17:34:26 -04:00
else :
return f """ Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{ data_point [ " instruction " ] }
2023-03-14 00:52:06 -04:00
### Response:
{ data_point [ " output " ] } """
2023-03-13 17:34:26 -04:00
2023-03-16 02:58:44 -04:00
def tokenize ( prompt ) :
# there's probably a way to do this with the tokenizer settings
# but again, gotta move fast
result = tokenizer (
prompt ,
2023-03-13 17:34:26 -04:00
truncation = True ,
2023-03-16 02:58:44 -04:00
max_length = CUTOFF_LEN + 1 ,
2023-03-13 17:34:26 -04:00
padding = " max_length " ,
)
2023-03-16 02:58:44 -04:00
return {
" input_ids " : result [ " input_ids " ] [ : - 1 ] ,
" attention_mask " : result [ " attention_mask " ] [ : - 1 ] ,
}
data = data . shuffle ( ) . map ( lambda x : tokenize ( generate_prompt ( x ) ) )
2023-03-13 17:34:26 -04:00
trainer = transformers . Trainer (
model = model ,
2023-03-15 00:45:12 -04:00
train_dataset = data [ " train " ] ,
2023-03-13 17:34:26 -04:00
args = transformers . TrainingArguments (
per_device_train_batch_size = MICRO_BATCH_SIZE ,
gradient_accumulation_steps = GRADIENT_ACCUMULATION_STEPS ,
warmup_steps = 100 ,
num_train_epochs = EPOCHS ,
learning_rate = LEARNING_RATE ,
fp16 = True ,
2023-03-16 02:58:44 -04:00
logging_steps = 20 ,
2023-03-13 17:34:26 -04:00
output_dir = " lora-alpaca " ,
save_total_limit = 3 ,
) ,
data_collator = transformers . DataCollatorForLanguageModeling ( tokenizer , mlm = False ) ,
)
model . config . use_cache = False
trainer . train ( resume_from_checkpoint = False )
model . save_pretrained ( " lora-alpaca " )