alpaca-lora/finetune.py

import os

# os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import torch
import torch.nn as nn
import bitsandbytes as bnb
from datasets import load_dataset
import transformers
from transformers import AutoTokenizer, AutoConfig, LLaMAForCausalLM, LLaMATokenizer
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model

model = LLaMAForCausalLM.from_pretrained(
    "decapoda-research/llama-7b-hf",
    load_in_8bit=True,
    device_map="auto",
)


tokenizer = LLaMATokenizer.from_pretrained(
    "decapoda-research/llama-7b-hf", add_eos_token=True
)

model = prepare_model_for_int8_training(model)

config = LoraConfig(
    r=4,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token
data = load_dataset("json", data_files="alpaca_data.json")


def generate_prompt(data_point):
    # sorry about the formatting disaster gotta move fast
    if data_point["input"]:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{data_point["instruction"]}

### Input:
{data_point["input"]}

### Response:
{data_point["output"]}"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{data_point["instruction"]}

### Response:
{data_point["output"]}"""


# optimized for RTX 4090. for larger GPUs, increase some of these?
MICRO_BATCH_SIZE = 4  # this could actually be 5 but i like powers of 2
BATCH_SIZE = 128
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
EPOCHS = 3  # we don't need 3 tbh
LEARNING_RATE = 2e-5  # from the original paper
CUTOFF_LEN = 256  # 256 accounts for about 96% of the data

data = data.shuffle().map(
    lambda data_point: tokenizer(
        generate_prompt(data_point),
        truncation=True,
        max_length=CUTOFF_LEN,
        padding="max_length",
    )
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=MICRO_BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
        warmup_steps=100,
        num_train_epochs=EPOCHS,
        learning_rate=LEARNING_RATE,
        fp16=True,
        logging_steps=1,
        output_dir="lora-alpaca",
        save_total_limit=3,
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train(resume_from_checkpoint=False)

model.save_pretrained("lora-alpaca")
initial commit 2023-03-13 17:34:26 -04:00			`import os`

			`# os.environ["CUDA_VISIBLE_DEVICES"] = "0"`
			`import torch`
			`import torch.nn as nn`
			`import bitsandbytes as bnb`
			`from datasets import load_dataset`
			`import transformers`
			`from transformers import AutoTokenizer, AutoConfig, LLaMAForCausalLM, LLaMATokenizer`
			`from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model`

			`model = LLaMAForCausalLM.from_pretrained(`
decapoda 2023-03-13 20:23:29 -04:00			`"decapoda-research/llama-7b-hf",`
initial commit 2023-03-13 17:34:26 -04:00			`load_in_8bit=True,`
			`device_map="auto",`
			`)`


tokenizer changes 2023-03-14 00:52:06 -04:00			`tokenizer = LLaMATokenizer.from_pretrained(`
			`"decapoda-research/llama-7b-hf", add_eos_token=True`
			`)`
initial commit 2023-03-13 17:34:26 -04:00
			`model = prepare_model_for_int8_training(model)`

			`config = LoraConfig(`
			`r=4,`
			`lora_alpha=16,`
			`target_modules=["q_proj", "v_proj"],`
			`lora_dropout=0.05,`
			`bias="none",`
			`task_type="CAUSAL_LM",`
			`)`
			`model = get_peft_model(model, config)`
tokenizer changes 2023-03-14 00:52:06 -04:00			`tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token`
initial commit 2023-03-13 17:34:26 -04:00			`data = load_dataset("json", data_files="alpaca_data.json")`


			`def generate_prompt(data_point):`
			`# sorry about the formatting disaster gotta move fast`
Fix bug in generate promp using 'instruction' instead of 'input' 2023-03-14 10:14:37 -04:00			`if data_point["input"]:`
initial commit 2023-03-13 17:34:26 -04:00			`return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.`

			`### Instruction:`
			`{data_point["instruction"]}`

			`### Input:`
			`{data_point["input"]}`

tokenizer changes 2023-03-14 00:52:06 -04:00			`### Response:`
			`{data_point["output"]}"""`
initial commit 2023-03-13 17:34:26 -04:00			`else:`
			`return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.`

			`### Instruction:`
			`{data_point["instruction"]}`

tokenizer changes 2023-03-14 00:52:06 -04:00			`### Response:`
			`{data_point["output"]}"""`
initial commit 2023-03-13 17:34:26 -04:00

tokenizer changes 2023-03-14 00:52:06 -04:00			`# optimized for RTX 4090. for larger GPUs, increase some of these?`
			`MICRO_BATCH_SIZE = 4 # this could actually be 5 but i like powers of 2`
			`BATCH_SIZE = 128`
decapoda 2023-03-13 20:23:29 -04:00			`GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE`
Ready to go 2023-03-14 18:10:33 -04:00			`EPOCHS = 3 # we don't need 3 tbh`
			`LEARNING_RATE = 2e-5 # from the original paper`
tokenizer changes 2023-03-14 00:52:06 -04:00			`CUTOFF_LEN = 256 # 256 accounts for about 96% of the data`
decapoda 2023-03-13 20:23:29 -04:00
tokenizer changes 2023-03-14 00:52:06 -04:00			`data = data.shuffle().map(`
initial commit 2023-03-13 17:34:26 -04:00			`lambda data_point: tokenizer(`
			`generate_prompt(data_point),`
			`truncation=True,`
decapoda 2023-03-13 20:23:29 -04:00			`max_length=CUTOFF_LEN,`
initial commit 2023-03-13 17:34:26 -04:00			`padding="max_length",`
			`)`
			`)`

			`trainer = transformers.Trainer(`
			`model=model,`
			`train_dataset=data["train"],`
			`args=transformers.TrainingArguments(`
			`per_device_train_batch_size=MICRO_BATCH_SIZE,`
			`gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,`
			`warmup_steps=100,`
			`num_train_epochs=EPOCHS,`
			`learning_rate=LEARNING_RATE,`
			`fp16=True,`
			`logging_steps=1,`
			`output_dir="lora-alpaca",`
			`save_total_limit=3,`
			`),`
			`data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),`
			`)`
			`model.config.use_cache = False`
			`trainer.train(resume_from_checkpoint=False)`

			`model.save_pretrained("lora-alpaca")`