2023-03-15 14:11:26 -04:00
import torch
2023-03-13 20:23:29 -04:00
from peft import PeftModel
2023-03-16 15:08:13 -04:00
import transformers
assert (
" LlamaTokenizer " in transformers . _import_structure [ " models.llama " ]
) , " LLaMA is now in HuggingFace ' s main branch. \n Please reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git "
2023-03-16 10:34:33 -04:00
from transformers import LlamaTokenizer , LlamaForCausalLM , GenerationConfig
2023-03-13 18:00:05 -04:00
2023-03-16 10:34:33 -04:00
tokenizer = LlamaTokenizer . from_pretrained ( " decapoda-research/llama-7b-hf " )
2023-03-13 18:00:05 -04:00
2023-03-16 10:34:33 -04:00
model = LlamaForCausalLM . from_pretrained (
2023-03-13 20:23:29 -04:00
" decapoda-research/llama-7b-hf " ,
2023-03-13 18:00:05 -04:00
load_in_8bit = True ,
2023-03-15 20:22:22 -04:00
torch_dtype = torch . float16 ,
2023-03-13 18:00:05 -04:00
device_map = " auto " ,
)
2023-03-15 20:22:22 -04:00
model = PeftModel . from_pretrained (
model , " tloen/alpaca-lora-7b " , torch_dtype = torch . float16
)
2023-03-13 18:00:05 -04:00
2023-03-15 00:33:07 -04:00
def generate_prompt ( instruction , input = None ) :
if input :
return f """ Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
2023-03-13 18:00:05 -04:00
### Instruction:
2023-03-15 00:33:07 -04:00
{ instruction }
### Input:
{ input }
2023-03-13 18:00:05 -04:00
### Response:"""
2023-03-15 00:33:07 -04:00
else :
return f """ Below is an instruction that describes a task. Write a response that appropriately completes the request.
2023-03-13 18:00:05 -04:00
2023-03-15 00:33:07 -04:00
### Instruction:
{ instruction }
2023-03-14 18:10:33 -04:00
2023-03-15 00:33:07 -04:00
### Response:"""
2023-03-15 20:22:22 -04:00
model . eval ( )
2023-03-15 00:33:07 -04:00
def evaluate ( instruction , input = None , * * kwargs ) :
prompt = generate_prompt ( instruction , input )
inputs = tokenizer ( prompt , return_tensors = " pt " )
input_ids = inputs [ " input_ids " ] . cuda ( )
generation_config = GenerationConfig (
temperature = 0.1 ,
2023-03-15 00:41:02 -04:00
top_p = 0.75 ,
2023-03-15 00:33:07 -04:00
num_beams = 4 ,
* * kwargs ,
)
2023-03-15 20:22:22 -04:00
generation_output = model . generate (
input_ids = input_ids ,
generation_config = generation_config ,
return_dict_in_generate = True ,
output_scores = True ,
2023-03-16 12:58:59 -04:00
max_new_tokens = 2048 ,
2023-03-15 20:22:22 -04:00
)
2023-03-15 00:33:07 -04:00
s = generation_output . sequences [ 0 ]
output = tokenizer . decode ( s )
return output . split ( " ### Response: " ) [ 1 ] . strip ( )
if __name__ == " __main__ " :
# testing code for readme
for instruction in [
" Tell me about alpacas. " ,
" Tell me about the president of Mexico in 2019. " ,
" Tell me about the king of France in 2019. " ,
" List all Canadian provinces in alphabetical order. " ,
" Write a Python program that prints the first 10 Fibonacci numbers. " ,
" Write a program that prints the numbers from 1 to 100. But for multiples of three print ' Fizz ' instead of the number and for the multiples of five print ' Buzz ' . For numbers which are multiples of both three and five print ' FizzBuzz ' . " ,
" Tell me five words that rhyme with ' shock ' . " ,
" Translate the sentence ' I have no mouth but I must scream ' into Spanish. " ,
2023-03-16 03:05:11 -04:00
" Count up from 1 to 500. " ,
2023-03-15 00:33:07 -04:00
] :
print ( " Instruction: " , instruction )
print ( " Response: " , evaluate ( instruction ) )
print ( )