2023-02-10 13:40:03 -05:00
import argparse
import base64
import copy
2023-01-21 22:02:46 -05:00
import gc
2023-01-05 23:33:21 -05:00
import glob
2023-01-28 17:16:37 -05:00
import io
2023-02-10 13:40:03 -05:00
import json
2023-02-01 07:57:27 -05:00
import os
2023-02-10 13:40:03 -05:00
import re
import sys
import time
import warnings
2023-02-13 13:25:16 -05:00
import zipfile
2023-02-03 07:02:35 -05:00
from datetime import datetime
2023-01-07 14:33:43 -05:00
from pathlib import Path
2023-02-10 13:40:03 -05:00
2022-12-21 11:27:31 -05:00
import gradio as gr
2023-02-13 13:25:16 -05:00
import numpy as np
2023-02-10 13:40:03 -05:00
import torch
2023-01-21 22:02:46 -05:00
import transformers
2023-02-10 13:40:03 -05:00
from PIL import Image
from tqdm import tqdm
from transformers import AutoConfig
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
2023-02-14 18:38:21 -05:00
from io import BytesIO
2023-02-10 13:40:03 -05:00
2023-01-21 22:02:46 -05:00
from modules . html_generator import *
2023-01-25 08:17:55 -05:00
from modules . stopping_criteria import _SentinelTokenStoppingCriteria
2023-02-10 13:40:03 -05:00
from modules . ui import *
2022-12-21 11:27:31 -05:00
2023-01-15 13:23:41 -05:00
transformers . logging . set_verbosity_error ( )
2023-02-02 21:36:28 -05:00
parser = argparse . ArgumentParser ( formatter_class = lambda prog : argparse . HelpFormatter ( prog , max_help_position = 54 ) )
2023-01-06 18:22:26 -05:00
parser . add_argument ( ' --model ' , type = str , help = ' Name of the model to load by default. ' )
2023-01-16 08:10:09 -05:00
parser . add_argument ( ' --notebook ' , action = ' store_true ' , help = ' Launch the web UI in notebook mode, where the output is written to the same text box as the input. ' )
parser . add_argument ( ' --chat ' , action = ' store_true ' , help = ' Launch the web UI in chat mode. ' )
2023-01-25 17:39:36 -05:00
parser . add_argument ( ' --cai-chat ' , action = ' store_true ' , help = ' Launch the web UI in chat mode with a style similar to Character.AI \' s. If the file img_bot.png or img_bot.jpg exists in the same folder as server.py, this image will be used as the bot \' s profile picture. Similarly, img_me.png or img_me.jpg will be used as your profile picture. ' )
2023-02-15 11:53:38 -05:00
parser . add_argument ( ' --picture ' , action = ' store_true ' , help = ' Adds an ability to send pictures in chat UI modes. Captions are generated by BLIP. ' )
2023-01-09 08:58:46 -05:00
parser . add_argument ( ' --cpu ' , action = ' store_true ' , help = ' Use the CPU to generate text. ' )
2023-01-10 21:16:33 -05:00
parser . add_argument ( ' --load-in-8bit ' , action = ' store_true ' , help = ' Load the model with 8-bit precision. ' )
2023-02-09 13:00:03 -05:00
parser . add_argument ( ' --bf16 ' , action = ' store_true ' , help = ' Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. ' )
2023-01-19 09:09:24 -05:00
parser . add_argument ( ' --auto-devices ' , action = ' store_true ' , help = ' Automatically split the model across the available GPU(s) and CPU. ' )
parser . add_argument ( ' --disk ' , action = ' store_true ' , help = ' If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk. ' )
2023-01-21 00:48:06 -05:00
parser . add_argument ( ' --disk-cache-dir ' , type = str , help = ' Directory to save the disk cache to. Defaults to " cache/ " . ' )
2023-01-20 22:33:41 -05:00
parser . add_argument ( ' --gpu-memory ' , type = int , help = ' Maximum GPU memory in GiB to allocate. This is useful if you get out of memory errors while trying to generate text. Must be an integer number. ' )
2023-01-21 01:05:55 -05:00
parser . add_argument ( ' --cpu-memory ' , type = int , help = ' Maximum CPU memory in GiB to allocate for offloaded weights. Must be an integer number. Defaults to 99. ' )
2023-02-21 19:00:06 -05:00
parser . add_argument ( ' --flexgen ' , action = ' store_true ' , help = ' Enable the use of FlexGen offloading. ' )
2023-02-21 20:08:46 -05:00
parser . add_argument ( ' --percent ' , nargs = " + " , type = int , default = [ 0 , 100 , 100 , 0 , 100 , 0 ] , help = ' FlexGen: allocation percentages. Must be 6 numbers separated by spaces. ' )
2023-02-01 07:57:27 -05:00
parser . add_argument ( ' --deepspeed ' , action = ' store_true ' , help = ' Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration. ' )
2023-02-02 08:25:09 -05:00
parser . add_argument ( ' --nvme-offload-dir ' , type = str , help = ' DeepSpeed: Directory to use for ZeRO-3 NVME offloading. ' )
2023-02-02 08:39:37 -05:00
parser . add_argument ( ' --local_rank ' , type = int , default = 0 , help = ' DeepSpeed: Optional argument for distributed setups. ' )
2023-01-22 14:19:11 -05:00
parser . add_argument ( ' --no-stream ' , action = ' store_true ' , help = ' Don \' t stream the text output in real time. This improves the text generation performance. ' )
2023-01-16 14:35:45 -05:00
parser . add_argument ( ' --settings ' , type = str , help = ' Load the default interface settings from this json file. See settings-template.json for an example. ' )
2023-01-26 22:40:39 -05:00
parser . add_argument ( ' --extensions ' , type = str , help = ' The list of extensions to load. If you want to load more than one extension, write the names separated by commas and between quotation marks, " like,this " . ' )
2023-01-20 21:45:16 -05:00
parser . add_argument ( ' --listen ' , action = ' store_true ' , help = ' Make the web UI reachable from your local network. ' )
2023-01-29 00:54:36 -05:00
parser . add_argument ( ' --listen-port ' , type = int , help = ' The listening port that the server will use. ' )
2023-01-19 15:31:29 -05:00
parser . add_argument ( ' --share ' , action = ' store_true ' , help = ' Create a public URL. This is useful for running the web UI on Google Colab or similar. ' )
2023-01-26 00:12:53 -05:00
parser . add_argument ( ' --verbose ' , action = ' store_true ' , help = ' Print the prompts to the terminal. ' )
2023-01-06 17:56:44 -05:00
args = parser . parse_args ( )
2023-01-14 22:39:51 -05:00
2023-01-22 14:19:11 -05:00
if ( args . chat or args . cai_chat ) and not args . no_stream :
2023-01-25 12:37:41 -05:00
print ( " Warning: chat mode currently becomes somewhat slower with text streaming on. \n Consider starting the web UI with the --no-stream option. \n " )
2023-01-29 12:27:22 -05:00
2023-01-15 13:23:41 -05:00
settings = {
' max_new_tokens ' : 200 ,
' max_new_tokens_min ' : 1 ,
' max_new_tokens_max ' : 2000 ,
' preset ' : ' NovelAI-Sphinx Moth ' ,
' name1 ' : ' Person 1 ' ,
' name2 ' : ' Person 2 ' ,
' context ' : ' This is a conversation between two people. ' ,
' prompt ' : ' Common sense questions and answers \n \n Question: \n Factual answer: ' ,
' prompt_gpt4chan ' : ' ----- \n --- 865467536 \n Input text \n --- 865467537 \n ' ,
' stop_at_newline ' : True ,
2023-02-15 08:18:50 -05:00
' chat_prompt_size ' : 2048 ,
' chat_prompt_size_min ' : 0 ,
' chat_prompt_size_max ' : 2048 ,
2023-01-19 16:58:45 -05:00
' preset_pygmalion ' : ' Pygmalion ' ,
' name1_pygmalion ' : ' You ' ,
' name2_pygmalion ' : ' Kawaii ' ,
2023-01-21 22:49:59 -05:00
' context_pygmalion ' : " Kawaii ' s persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes. \n <START> " ,
2023-01-19 14:46:46 -05:00
' stop_at_newline_pygmalion ' : False ,
2023-01-15 13:23:41 -05:00
}
2023-01-16 14:35:45 -05:00
if args . settings is not None and Path ( args . settings ) . exists ( ) :
2023-01-30 12:17:12 -05:00
new_settings = json . loads ( open ( Path ( args . settings ) , ' r ' ) . read ( ) )
2023-01-16 14:35:45 -05:00
for item in new_settings :
2023-01-28 21:21:40 -05:00
settings [ item ] = new_settings [ item ]
2023-01-14 22:39:51 -05:00
2023-02-21 19:00:06 -05:00
if args . flexgen :
from flexgen . flex_opt import ( Policy , OptLM , TorchDevice , TorchDisk , TorchMixedDevice , CompressionConfig , Env , Task , get_opt_config )
2023-02-01 07:57:27 -05:00
if args . deepspeed :
import deepspeed
from transformers . deepspeed import HfDeepSpeedConfig , is_deepspeed_zero3_enabled
2023-02-02 08:39:37 -05:00
from modules . deepspeed_parameters import generate_ds_config
2023-02-01 07:57:27 -05:00
# Distributed setup
2023-02-02 10:15:44 -05:00
local_rank = args . local_rank if args . local_rank is not None else int ( os . getenv ( " LOCAL_RANK " , " 0 " ) )
2023-02-01 07:57:27 -05:00
world_size = int ( os . getenv ( " WORLD_SIZE " , " 1 " ) )
torch . cuda . set_device ( local_rank )
deepspeed . init_distributed ( )
2023-02-02 08:39:37 -05:00
ds_config = generate_ds_config ( args . bf16 , 1 * world_size , args . nvme_offload_dir )
2023-02-01 07:57:27 -05:00
dschf = HfDeepSpeedConfig ( ds_config ) # Keep this object alive for the Transformers integration
2023-02-14 18:38:21 -05:00
if args . picture and ( args . cai_chat or args . chat ) :
import modules . bot_picture as bot_picture
2022-12-21 11:27:31 -05:00
def load_model ( model_name ) :
2023-01-05 23:41:52 -05:00
print ( f " Loading { model_name } ... " )
2022-12-21 11:27:31 -05:00
t0 = time . time ( )
2023-01-05 23:41:52 -05:00
2023-01-10 21:16:33 -05:00
# Default settings
2023-02-21 19:00:06 -05:00
if not ( args . cpu or args . load_in_8bit or args . auto_devices or args . disk or args . gpu_memory is not None or args . cpu_memory is not None or args . deepspeed or args . flexgen ) :
2023-02-20 13:44:10 -05:00
if any ( size in model_name . lower ( ) for size in ( ' 13b ' , ' 20b ' , ' 30b ' ) ) :
2023-01-10 21:16:33 -05:00
model = AutoModelForCausalLM . from_pretrained ( Path ( f " models/ { model_name } " ) , device_map = ' auto ' , load_in_8bit = True )
else :
2023-02-09 13:00:03 -05:00
model = AutoModelForCausalLM . from_pretrained ( Path ( f " models/ { model_name } " ) , low_cpu_mem_usage = True , torch_dtype = torch . bfloat16 if args . bf16 else torch . float16 ) . cuda ( )
2023-02-01 07:57:27 -05:00
2023-02-21 19:00:06 -05:00
# FlexGen
elif args . flexgen :
gpu = TorchDevice ( " cuda:0 " )
cpu = TorchDevice ( " cpu " )
disk = TorchDisk ( " cache " )
env = Env ( gpu = gpu , cpu = cpu , disk = disk , mixed = TorchMixedDevice ( [ gpu , cpu , disk ] ) )
# Offloading policy
policy = Policy ( 1 , 1 ,
2023-02-21 20:08:46 -05:00
args . percent [ 0 ] , args . percent [ 1 ] ,
args . percent [ 2 ] , args . percent [ 3 ] ,
args . percent [ 4 ] , args . percent [ 5 ] ,
2023-02-21 19:00:06 -05:00
overlap = True , sep_layer = True , pin_weight = True ,
cpu_cache_compute = False , attn_sparsity = 1.0 ,
compress_weight = False ,
comp_weight_config = CompressionConfig (
num_bits = 4 , group_size = 64 ,
group_dim = 0 , symmetric = False ) ,
compress_cache = False ,
comp_cache_config = CompressionConfig (
num_bits = 4 , group_size = 64 ,
group_dim = 2 , symmetric = False ) )
opt_config = get_opt_config ( f " facebook/ { model_name } " )
model = OptLM ( opt_config , env , " models " , policy )
model . init_all_weights ( )
2023-02-01 07:57:27 -05:00
# DeepSpeed ZeRO-3
elif args . deepspeed :
2023-02-02 10:15:44 -05:00
model = AutoModelForCausalLM . from_pretrained ( Path ( f " models/ { model_name } " ) , torch_dtype = torch . bfloat16 if args . bf16 else torch . float16 )
model = deepspeed . initialize ( model = model , config_params = ds_config , model_parameters = None , optimizer = None , lr_scheduler = None ) [ 0 ]
2023-02-01 07:57:27 -05:00
model . module . eval ( ) # Inference
print ( f " DeepSpeed ZeRO-3 is enabled: { is_deepspeed_zero3_enabled ( ) } " )
2023-01-10 21:16:33 -05:00
# Custom
2023-01-06 00:54:33 -05:00
else :
2023-01-10 21:39:50 -05:00
command = " AutoModelForCausalLM.from_pretrained "
2023-02-07 22:19:20 -05:00
params = [ " low_cpu_mem_usage=True " ]
2023-02-11 21:17:06 -05:00
if not args . cpu and not torch . cuda . is_available ( ) :
print ( " Warning: no GPU has been detected. \n Falling back to CPU mode. \n " )
args . cpu = True
2023-01-10 21:16:33 -05:00
2023-01-09 14:28:04 -05:00
if args . cpu :
2023-02-07 22:19:20 -05:00
params . append ( " low_cpu_mem_usage=True " )
params . append ( " torch_dtype=torch.float32 " )
2023-01-09 14:28:04 -05:00
else :
2023-02-07 22:19:20 -05:00
params . append ( " device_map= ' auto ' " )
2023-02-09 13:00:03 -05:00
params . append ( " load_in_8bit=True " if args . load_in_8bit else " torch_dtype=torch.bfloat16 " if args . bf16 else " torch_dtype=torch.float16 " )
2023-01-30 12:17:12 -05:00
2023-01-31 18:47:05 -05:00
if args . gpu_memory :
2023-02-07 22:19:20 -05:00
params . append ( f " max_memory= {{ 0: ' { args . gpu_memory or ' 99 ' } GiB ' , ' cpu ' : ' { args . cpu_memory or ' 99 ' } GiB ' }} " )
2023-02-10 14:36:06 -05:00
elif not args . load_in_8bit :
2023-01-31 18:47:05 -05:00
total_mem = ( torch . cuda . get_device_properties ( 0 ) . total_memory / ( 1024 * 1024 ) )
suggestion = round ( ( total_mem - 1000 ) / 1000 ) * 1000
if total_mem - suggestion < 800 :
suggestion - = 1000
suggestion = int ( round ( suggestion / 1000 ) )
print ( f " \033 [1;32;1mAuto-assiging --gpu-memory { suggestion } for your GPU to try to prevent out-of-memory errors. \n You can manually set other values. \033 [0;37;0m " )
2023-02-07 22:19:20 -05:00
params . append ( f " max_memory= {{ 0: ' { suggestion } GiB ' , ' cpu ' : ' { args . cpu_memory or ' 99 ' } GiB ' }} " )
2023-01-19 09:09:24 -05:00
if args . disk :
2023-02-07 22:19:20 -05:00
params . append ( f " offload_folder= ' { args . disk_cache_dir or ' cache ' } ' " )
2023-01-10 21:16:33 -05:00
2023-02-13 21:34:04 -05:00
command = f " { command } (Path(f ' models/ { model_name } ' ), { ' , ' . join ( set ( params ) ) } ) "
2023-01-10 21:16:33 -05:00
model = eval ( command )
2022-12-21 11:27:31 -05:00
2023-01-06 00:54:33 -05:00
# Loading the tokenizer
2023-01-10 23:10:11 -05:00
if model_name . lower ( ) . startswith ( ( ' gpt4chan ' , ' gpt-4chan ' , ' 4chan ' ) ) and Path ( f " models/gpt-j-6B/ " ) . exists ( ) :
2023-01-07 14:33:43 -05:00
tokenizer = AutoTokenizer . from_pretrained ( Path ( " models/gpt-j-6B/ " ) )
2022-12-21 11:27:31 -05:00
else :
2023-01-07 14:33:43 -05:00
tokenizer = AutoTokenizer . from_pretrained ( Path ( f " models/ { model_name } / " ) )
2023-01-16 11:43:23 -05:00
tokenizer . truncation_side = ' left '
2022-12-21 11:27:31 -05:00
2023-01-06 00:06:59 -05:00
print ( f " Loaded the model in { ( time . time ( ) - t0 ) : .2f } seconds. " )
2022-12-21 11:27:31 -05:00
return model , tokenizer
2023-02-13 13:25:16 -05:00
def load_soft_prompt ( name ) :
global soft_prompt , soft_prompt_tensor
if name == ' None ' :
soft_prompt = False
soft_prompt_tensor = None
else :
with zipfile . ZipFile ( Path ( f ' softprompts/ { name } .zip ' ) ) as zf :
zf . extract ( ' tensor.npy ' )
2023-02-18 23:48:23 -05:00
zf . extract ( ' meta.json ' )
j = json . loads ( open ( ' meta.json ' , ' r ' ) . read ( ) )
print ( f " \n Loading the softprompt \" { name } \" . " )
for field in j :
if field != ' name ' :
if type ( j [ field ] ) is list :
print ( f " { field } : { ' , ' . join ( j [ field ] ) } " )
else :
print ( f " { field } : { j [ field ] } " )
print ( )
2023-02-13 13:25:16 -05:00
tensor = np . load ( ' tensor.npy ' )
2023-02-18 23:48:23 -05:00
Path ( ' tensor.npy ' ) . unlink ( )
Path ( ' meta.json ' ) . unlink ( )
2023-02-13 13:25:16 -05:00
tensor = torch . Tensor ( tensor ) . to ( device = model . device , dtype = model . dtype )
tensor = torch . reshape ( tensor , ( 1 , tensor . shape [ 0 ] , tensor . shape [ 1 ] ) )
soft_prompt = True
soft_prompt_tensor = tensor
return name
2023-02-13 16:48:32 -05:00
def upload_soft_prompt ( file ) :
2023-02-13 13:25:16 -05:00
with zipfile . ZipFile ( io . BytesIO ( file ) ) as zf :
zf . extract ( ' meta.json ' )
j = json . loads ( open ( ' meta.json ' , ' r ' ) . read ( ) )
name = j [ ' name ' ]
2023-02-18 23:48:23 -05:00
Path ( ' meta.json ' ) . unlink ( )
2023-02-13 13:25:16 -05:00
with open ( Path ( f ' softprompts/ { name } .zip ' ) , ' wb ' ) as f :
f . write ( file )
return name
2023-02-07 20:08:21 -05:00
def load_model_wrapper ( selected_model ) :
global model_name , model , tokenizer
if selected_model != model_name :
model_name = selected_model
model = tokenizer = None
if not args . cpu :
gc . collect ( )
torch . cuda . empty_cache ( )
model , tokenizer = load_model ( model_name )
2023-02-12 07:36:27 -05:00
return selected_model
2023-02-07 20:08:21 -05:00
def load_preset_values ( preset_menu , return_dict = False ) :
2023-02-07 22:19:20 -05:00
generate_params = {
2023-02-07 20:08:21 -05:00
' do_sample ' : True ,
' temperature ' : 1 ,
' top_p ' : 1 ,
' typical_p ' : 1 ,
' repetition_penalty ' : 1 ,
' top_k ' : 50 ,
2023-02-07 21:11:04 -05:00
' num_beams ' : 1 ,
2023-02-11 12:48:12 -05:00
' penalty_alpha ' : 0 ,
2023-02-07 21:11:04 -05:00
' min_length ' : 0 ,
' length_penalty ' : 1 ,
' no_repeat_ngram_size ' : 0 ,
' early_stopping ' : False ,
2023-02-07 20:08:21 -05:00
}
with open ( Path ( f ' presets/ { preset_menu } .txt ' ) , ' r ' ) as infile :
preset = infile . read ( )
2023-02-11 12:54:29 -05:00
for i in preset . splitlines ( ) :
i = i . rstrip ( ' , ' ) . strip ( ) . split ( ' = ' )
2023-02-07 20:08:21 -05:00
if len ( i ) == 2 and i [ 0 ] . strip ( ) != ' tokens ' :
2023-02-07 22:19:20 -05:00
generate_params [ i [ 0 ] . strip ( ) ] = eval ( i [ 1 ] . strip ( ) )
2023-02-07 20:08:21 -05:00
2023-02-07 22:19:20 -05:00
generate_params [ ' temperature ' ] = min ( 1.99 , generate_params [ ' temperature ' ] )
2023-02-07 20:08:21 -05:00
if return_dict :
2023-02-07 22:19:20 -05:00
return generate_params
2023-02-07 20:08:21 -05:00
else :
2023-02-11 12:48:12 -05:00
return generate_params [ ' do_sample ' ] , generate_params [ ' temperature ' ] , generate_params [ ' top_p ' ] , generate_params [ ' typical_p ' ] , generate_params [ ' repetition_penalty ' ] , generate_params [ ' top_k ' ] , generate_params [ ' min_length ' ] , generate_params [ ' no_repeat_ngram_size ' ] , generate_params [ ' num_beams ' ] , generate_params [ ' penalty_alpha ' ] , generate_params [ ' length_penalty ' ] , generate_params [ ' early_stopping ' ]
2023-02-07 20:08:21 -05:00
2023-01-06 00:26:33 -05:00
# Removes empty replies from gpt4chan outputs
2022-12-21 11:27:31 -05:00
def fix_gpt4chan ( s ) :
for i in range ( 10 ) :
s = re . sub ( " --- [0-9]* \n >>[0-9]* \n --- " , " --- " , s )
s = re . sub ( " --- [0-9]* \n * \n --- " , " --- " , s )
s = re . sub ( " --- [0-9]* \n \n \n --- " , " --- " , s )
return s
2023-01-16 14:35:45 -05:00
# Fix the LaTeX equations in galactica
2023-01-06 23:56:21 -05:00
def fix_galactica ( s ) :
s = s . replace ( r ' \ [ ' , r ' $ ' )
s = s . replace ( r ' \ ] ' , r ' $ ' )
2023-01-07 10:13:09 -05:00
s = s . replace ( r ' \ ( ' , r ' $ ' )
s = s . replace ( r ' \ ) ' , r ' $ ' )
s = s . replace ( r ' $$ ' , r ' $ ' )
2023-02-18 09:07:55 -05:00
s = re . sub ( r ' \ n ' , r ' \ n \ n ' , s )
s = re . sub ( r " \ n { 3,} " , " \n \n " , s )
2023-01-06 23:56:21 -05:00
return s
2023-02-14 10:06:47 -05:00
def get_max_prompt_length ( tokens ) :
global soft_prompt , soft_prompt_tensor
max_length = 2048 - tokens
if soft_prompt :
max_length - = soft_prompt_tensor . shape [ 1 ]
return max_length
2023-01-25 08:17:55 -05:00
def encode ( prompt , tokens_to_generate = 0 , add_special_tokens = True ) :
2023-02-14 10:06:47 -05:00
input_ids = tokenizer . encode ( str ( prompt ) , return_tensors = ' pt ' , truncation = True , max_length = get_max_prompt_length ( tokens_to_generate ) , add_special_tokens = add_special_tokens )
2023-02-21 19:00:06 -05:00
if args . cpu or args . flexgen :
2023-02-02 10:15:44 -05:00
return input_ids
2023-02-02 11:31:32 -05:00
elif args . deepspeed :
2023-02-02 10:15:44 -05:00
return input_ids . to ( device = local_rank )
2023-02-02 11:31:32 -05:00
else :
return input_ids . cuda ( )
2023-01-17 18:16:23 -05:00
2023-01-19 08:43:05 -05:00
def decode ( output_ids ) :
reply = tokenizer . decode ( output_ids , skip_special_tokens = True )
reply = reply . replace ( r ' <|endoftext|> ' , ' ' )
return reply
def formatted_outputs ( reply , model_name ) :
2023-01-19 12:57:01 -05:00
if not ( args . chat or args . cai_chat ) :
if model_name . lower ( ) . startswith ( ' galactica ' ) :
reply = fix_galactica ( reply )
return reply , reply , generate_basic_html ( reply )
2023-01-21 20:13:01 -05:00
elif model_name . lower ( ) . startswith ( ( ' gpt4chan ' , ' gpt-4chan ' , ' 4chan ' ) ) :
2023-01-19 12:57:01 -05:00
reply = fix_gpt4chan ( reply )
return reply , ' Only applicable for GALACTICA models. ' , generate_4chan_html ( reply )
else :
return reply , ' Only applicable for GALACTICA models. ' , generate_basic_html ( reply )
2023-01-19 08:43:05 -05:00
else :
2023-01-19 12:57:01 -05:00
return reply
2023-01-19 08:43:05 -05:00
2023-02-13 16:48:32 -05:00
def generate_softprompt_input_tensors ( input_ids ) :
inputs_embeds = model . transformer . wte ( input_ids )
inputs_embeds = torch . cat ( ( soft_prompt_tensor , inputs_embeds ) , dim = 1 )
filler_input_ids = torch . zeros ( ( 1 , inputs_embeds . shape [ 1 ] ) , dtype = input_ids . dtype ) . to ( model . device )
filler_input_ids + = model . config . bos_token_id # setting dummy input_ids to bos tokens
return inputs_embeds , filler_input_ids
2023-02-11 12:48:12 -05:00
def generate_reply ( question , tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping , eos_token = None , stopping_string = None ) :
2023-02-13 13:25:16 -05:00
global model_name , model , tokenizer , soft_prompt , soft_prompt_tensor
2022-12-21 11:27:31 -05:00
2023-01-26 22:40:39 -05:00
original_question = question
if not ( args . chat or args . cai_chat ) :
question = apply_extensions ( question , " input " )
2023-01-26 00:12:53 -05:00
if args . verbose :
print ( f " \n \n { question } \n -------------------- \n " )
2023-02-02 10:15:44 -05:00
input_ids = encode ( question , tokens )
2023-02-21 19:00:06 -05:00
cuda = " " if ( args . cpu or args . deepspeed or args . flexgen ) else " .cuda() "
2023-02-02 11:47:08 -05:00
n = tokenizer . eos_token_id if eos_token is None else tokenizer . encode ( eos_token , return_tensors = ' pt ' ) [ 0 ] [ - 1 ]
2023-01-25 08:17:55 -05:00
if stopping_string is not None :
2023-01-26 11:45:19 -05:00
# The stopping_criteria code below was copied from
# https://github.com/PygmalionAI/gradio-ui/blob/master/src/model.py
2023-01-25 08:17:55 -05:00
t = encode ( stopping_string , 0 , add_special_tokens = False )
stopping_criteria_list = transformers . StoppingCriteriaList ( [
_SentinelTokenStoppingCriteria (
2023-01-25 17:37:44 -05:00
sentinel_token_ids = t ,
starting_idx = len ( input_ids [ 0 ] )
)
2023-01-25 08:17:55 -05:00
] )
else :
stopping_criteria_list = None
2023-01-19 08:43:05 -05:00
2023-02-21 19:00:06 -05:00
if not args . flexgen :
generate_params = [
f " eos_token_id= { n } " ,
f " stopping_criteria=stopping_criteria_list " ,
f " do_sample= { do_sample } " ,
f " temperature= { temperature } " ,
f " top_p= { top_p } " ,
f " typical_p= { typical_p } " ,
f " repetition_penalty= { repetition_penalty } " ,
f " top_k= { top_k } " ,
f " min_length= { min_length if args . no_stream else 0 } " ,
f " no_repeat_ngram_size= { no_repeat_ngram_size } " ,
f " num_beams= { num_beams } " ,
f " penalty_alpha= { penalty_alpha } " ,
f " length_penalty= { length_penalty } " ,
f " early_stopping= { early_stopping } " ,
]
else :
generate_params = [
f " do_sample= { do_sample } " ,
f " temperature= { temperature } " ,
]
2023-02-07 20:08:21 -05:00
2023-02-02 11:47:08 -05:00
if args . deepspeed :
generate_params . append ( " synced_gpus=True " )
2023-02-07 20:08:21 -05:00
if args . no_stream :
generate_params . append ( f " max_new_tokens=tokens " )
else :
generate_params . append ( f " max_new_tokens=8 " )
2023-02-02 11:47:08 -05:00
2023-02-13 13:25:16 -05:00
if soft_prompt :
2023-02-13 16:48:32 -05:00
inputs_embeds , filler_input_ids = generate_softprompt_input_tensors ( input_ids )
2023-02-13 13:25:16 -05:00
generate_params . insert ( 0 , " inputs_embeds=inputs_embeds " )
generate_params . insert ( 0 , " filler_input_ids " )
else :
generate_params . insert ( 0 , " input_ids " )
2023-01-19 08:43:05 -05:00
# Generate the entire reply at once
if args . no_stream :
2023-01-22 18:07:19 -05:00
t0 = time . time ( )
2023-02-02 10:15:44 -05:00
with torch . no_grad ( ) :
2023-02-13 21:34:04 -05:00
output = eval ( f " model.generate( { ' , ' . join ( generate_params ) } ) { cuda } " ) [ 0 ]
2023-02-13 13:25:16 -05:00
if soft_prompt :
2023-02-13 16:48:32 -05:00
output = torch . cat ( ( input_ids [ 0 ] , output [ filler_input_ids . shape [ 1 ] : ] ) )
2023-02-13 13:25:16 -05:00
reply = decode ( output )
2023-01-26 22:40:39 -05:00
if not ( args . chat or args . cai_chat ) :
reply = original_question + apply_extensions ( reply [ len ( question ) : ] , " output " )
2023-01-19 08:43:05 -05:00
yield formatted_outputs ( reply , model_name )
2023-02-13 16:48:32 -05:00
t1 = time . time ( )
print ( f " Output generated in { ( t1 - t0 ) : .2f } seconds ( { ( len ( output ) - len ( input_ids [ 0 ] ) ) / ( t1 - t0 ) / 8 : .2f } it/s, { len ( output ) - len ( input_ids [ 0 ] ) } tokens) " )
2023-02-18 20:58:40 -05:00
# Generate the reply 8 tokens at a time
2023-01-19 08:43:05 -05:00
else :
2023-01-26 22:40:39 -05:00
yield formatted_outputs ( original_question , model_name )
2023-01-25 08:38:26 -05:00
for i in tqdm ( range ( tokens / / 8 + 1 ) ) :
2023-02-02 10:15:44 -05:00
with torch . no_grad ( ) :
2023-02-13 21:34:04 -05:00
output = eval ( f " model.generate( { ' , ' . join ( generate_params ) } ) { cuda } " ) [ 0 ]
2023-02-13 13:25:16 -05:00
if soft_prompt :
2023-02-13 16:48:32 -05:00
output = torch . cat ( ( input_ids [ 0 ] , output [ filler_input_ids . shape [ 1 ] : ] ) )
2023-02-13 13:25:16 -05:00
reply = decode ( output )
2023-01-26 22:40:39 -05:00
if not ( args . chat or args . cai_chat ) :
reply = original_question + apply_extensions ( reply [ len ( question ) : ] , " output " )
2023-01-19 08:43:05 -05:00
yield formatted_outputs ( reply , model_name )
2023-02-13 13:25:16 -05:00
2023-02-21 19:00:06 -05:00
if not args . flexgen :
input_ids = torch . reshape ( output , ( 1 , output . shape [ 0 ] ) )
else :
input_ids = np . reshape ( output , ( 1 , output . shape [ 0 ] ) )
2023-02-13 13:25:16 -05:00
if soft_prompt :
2023-02-13 16:48:32 -05:00
inputs_embeds , filler_input_ids = generate_softprompt_input_tensors ( input_ids )
2023-02-13 13:25:16 -05:00
if output [ - 1 ] == n :
2023-01-25 20:27:04 -05:00
break
2023-01-18 19:37:21 -05:00
2023-01-26 22:40:39 -05:00
def apply_extensions ( text , typ ) :
global available_extensions , extension_state
for ext in sorted ( extension_state , key = lambda x : extension_state [ x ] [ 1 ] ) :
if extension_state [ ext ] [ 0 ] == True :
ext_string = f " extensions. { ext } .script "
2023-01-29 08:11:59 -05:00
if typ == " input " and hasattr ( eval ( ext_string ) , " input_modifier " ) :
2023-01-26 22:40:39 -05:00
text = eval ( f " { ext_string } .input_modifier(text) " )
2023-01-29 08:11:59 -05:00
elif typ == " output " and hasattr ( eval ( ext_string ) , " output_modifier " ) :
2023-01-26 22:40:39 -05:00
text = eval ( f " { ext_string } .output_modifier(text) " )
2023-01-29 08:11:59 -05:00
elif typ == " bot_prefix " and hasattr ( eval ( ext_string ) , " bot_prefix_modifier " ) :
text = eval ( f " { ext_string } .bot_prefix_modifier(text) " )
2023-01-26 22:40:39 -05:00
return text
2023-01-29 07:48:18 -05:00
def update_extensions_parameters ( * kwargs ) :
i = 0
for ext in sorted ( extension_state , key = lambda x : extension_state [ x ] [ 1 ] ) :
if extension_state [ ext ] [ 0 ] == True :
params = eval ( f " extensions. { ext } .script.params " )
for param in params :
if len ( kwargs ) > = i + 1 :
params [ param ] = eval ( f " kwargs[ { i } ] " )
i + = 1
2023-02-07 20:08:21 -05:00
def get_available_models ( ) :
2023-02-20 13:05:48 -05:00
return sorted ( [ item . name for item in list ( Path ( ' models/ ' ) . glob ( ' * ' ) ) if not item . name . endswith ( ' .txt ' ) ] , key = str . lower )
2023-02-07 20:08:21 -05:00
def get_available_presets ( ) :
return sorted ( set ( map ( lambda x : ' . ' . join ( str ( x . name ) . split ( ' . ' ) [ : - 1 ] ) , Path ( ' presets ' ) . glob ( ' *.txt ' ) ) ) , key = str . lower )
def get_available_characters ( ) :
return [ " None " ] + sorted ( set ( map ( lambda x : ' . ' . join ( str ( x . name ) . split ( ' . ' ) [ : - 1 ] ) , Path ( ' characters ' ) . glob ( ' *.json ' ) ) ) , key = str . lower )
def get_available_extensions ( ) :
return sorted ( set ( map ( lambda x : x . parts [ 1 ] , Path ( ' extensions ' ) . glob ( ' */script.py ' ) ) ) , key = str . lower )
2023-02-13 13:25:16 -05:00
def get_available_softprompts ( ) :
return [ " None " ] + sorted ( set ( map ( lambda x : ' . ' . join ( str ( x . name ) . split ( ' . ' ) [ : - 1 ] ) , Path ( ' softprompts ' ) . glob ( ' *.zip ' ) ) ) , key = str . lower )
2023-01-29 07:48:18 -05:00
def create_extensions_block ( ) :
extensions_ui_elements = [ ]
default_values = [ ]
2023-02-15 18:58:17 -05:00
if not ( args . chat or args . cai_chat ) :
2023-02-15 18:55:32 -05:00
gr . Markdown ( ' ## Extensions parameters ' )
2023-01-29 07:48:18 -05:00
for ext in sorted ( extension_state , key = lambda x : extension_state [ x ] [ 1 ] ) :
if extension_state [ ext ] [ 0 ] == True :
params = eval ( f " extensions. { ext } .script.params " )
for param in params :
_id = f " { ext } - { param } "
default_value = settings [ _id ] if _id in settings else params [ param ]
default_values . append ( default_value )
if type ( params [ param ] ) == str :
extensions_ui_elements . append ( gr . Textbox ( value = default_value , label = f " { ext } - { param } " ) )
elif type ( params [ param ] ) in [ int , float ] :
extensions_ui_elements . append ( gr . Number ( value = default_value , label = f " { ext } - { param } " ) )
elif type ( params [ param ] ) == bool :
extensions_ui_elements . append ( gr . Checkbox ( value = default_value , label = f " { ext } - { param } " ) )
update_extensions_parameters ( * default_values )
btn_extensions = gr . Button ( " Apply " )
btn_extensions . click ( update_extensions_parameters , [ * extensions_ui_elements ] , [ ] )
2023-02-07 20:08:21 -05:00
def create_settings_menus ( ) :
2023-02-07 22:19:20 -05:00
generate_params = load_preset_values ( settings [ f ' preset { suffix } ' ] , return_dict = True )
2023-01-21 22:49:59 -05:00
2023-02-07 20:08:21 -05:00
with gr . Row ( ) :
with gr . Column ( ) :
with gr . Row ( ) :
model_menu = gr . Dropdown ( choices = available_models , value = model_name , label = ' Model ' )
create_refresh_button ( model_menu , lambda : None , lambda : { " choices " : get_available_models ( ) } , " refresh-button " )
with gr . Column ( ) :
with gr . Row ( ) :
preset_menu = gr . Dropdown ( choices = available_presets , value = settings [ f ' preset { suffix } ' ] , label = ' Generation parameters preset ' )
create_refresh_button ( preset_menu , lambda : None , lambda : { " choices " : get_available_presets ( ) } , " refresh-button " )
2023-01-21 22:49:59 -05:00
2023-02-16 19:55:20 -05:00
with gr . Accordion ( " Custom generation parameters " , open = False , elem_id = " accordion " ) :
2023-02-07 20:08:21 -05:00
with gr . Row ( ) :
2023-02-17 14:33:27 -05:00
do_sample = gr . Checkbox ( value = generate_params [ ' do_sample ' ] , label = " do_sample " )
temperature = gr . Slider ( 0.01 , 1.99 , value = generate_params [ ' temperature ' ] , step = 0.01 , label = " temperature " )
with gr . Row ( ) :
top_k = gr . Slider ( 0 , 200 , value = generate_params [ ' top_k ' ] , step = 1 , label = " top_k " )
top_p = gr . Slider ( 0.0 , 1.0 , value = generate_params [ ' top_p ' ] , step = 0.01 , label = " top_p " )
with gr . Row ( ) :
repetition_penalty = gr . Slider ( 1.0 , 4.99 , value = generate_params [ ' repetition_penalty ' ] , step = 0.01 , label = " repetition_penalty " )
no_repeat_ngram_size = gr . Slider ( 0 , 20 , step = 1 , value = generate_params [ " no_repeat_ngram_size " ] , label = " no_repeat_ngram_size " )
with gr . Row ( ) :
typical_p = gr . Slider ( 0.0 , 1.0 , value = generate_params [ ' typical_p ' ] , step = 0.01 , label = " typical_p " )
min_length = gr . Slider ( 0 , 2000 , step = 1 , value = generate_params [ " min_length " ] if args . no_stream else 0 , label = " min_length " , interactive = args . no_stream )
2023-02-17 14:18:01 -05:00
gr . Markdown ( " Contrastive search: " )
penalty_alpha = gr . Slider ( 0 , 5 , value = generate_params [ " penalty_alpha " ] , label = " penalty_alpha " )
gr . Markdown ( " Beam search (uses a lot of VRAM): " )
2023-02-17 14:33:27 -05:00
with gr . Row ( ) :
num_beams = gr . Slider ( 1 , 20 , step = 1 , value = generate_params [ " num_beams " ] , label = " num_beams " )
length_penalty = gr . Slider ( - 5 , 5 , value = generate_params [ " length_penalty " ] , label = " length_penalty " )
2023-02-17 14:18:01 -05:00
early_stopping = gr . Checkbox ( value = generate_params [ " early_stopping " ] , label = " early_stopping " )
2023-02-07 20:08:21 -05:00
2023-02-16 19:55:20 -05:00
with gr . Accordion ( " Soft prompt " , open = False , elem_id = " accordion " ) :
2023-02-13 13:25:16 -05:00
with gr . Row ( ) :
softprompts_menu = gr . Dropdown ( choices = available_softprompts , value = " None " , label = ' Soft prompt ' )
create_refresh_button ( softprompts_menu , lambda : None , lambda : { " choices " : get_available_softprompts ( ) } , " refresh-button " )
2023-02-13 21:34:04 -05:00
gr . Markdown ( ' Upload a soft prompt (.zip format): ' )
2023-02-13 13:25:16 -05:00
with gr . Row ( ) :
2023-02-17 08:17:15 -05:00
upload_softprompt = gr . File ( type = ' binary ' , file_types = [ " .zip " ] )
2023-02-13 13:25:16 -05:00
2023-02-12 07:36:27 -05:00
model_menu . change ( load_model_wrapper , [ model_menu ] , [ model_menu ] , show_progress = True )
2023-02-11 12:48:12 -05:00
preset_menu . change ( load_preset_values , [ preset_menu ] , [ do_sample , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping ] )
2023-02-13 13:25:16 -05:00
softprompts_menu . change ( load_soft_prompt , [ softprompts_menu ] , [ softprompts_menu ] , show_progress = True )
2023-02-17 08:27:41 -05:00
upload_softprompt . upload ( upload_soft_prompt , [ upload_softprompt ] , [ softprompts_menu ] )
2023-02-11 12:48:12 -05:00
return preset_menu , do_sample , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping
2023-02-07 20:08:21 -05:00
# This gets the new line characters right.
def clean_chat_message ( text ) :
text = text . replace ( ' \n ' , ' \n \n ' )
text = re . sub ( r " \ n { 3,} " , " \n \n " , text )
text = text . strip ( )
return text
2023-01-21 22:49:59 -05:00
2023-02-15 08:18:50 -05:00
def generate_chat_prompt ( text , tokens , name1 , name2 , context , chat_prompt_size , impersonate = False ) :
2023-02-17 10:35:30 -05:00
global soft_prompt , soft_prompt_tensor
2023-02-15 19:32:53 -05:00
text = clean_chat_message ( text )
2023-02-07 20:08:21 -05:00
rows = [ f " { context . strip ( ) } \n " ]
i = len ( history [ ' internal ' ] ) - 1
count = 0
2023-02-17 10:35:30 -05:00
if soft_prompt :
chat_prompt_size - = soft_prompt_tensor . shape [ 1 ]
2023-02-15 08:18:50 -05:00
max_length = min ( get_max_prompt_length ( tokens ) , chat_prompt_size )
2023-02-17 10:35:30 -05:00
2023-02-14 10:06:47 -05:00
while i > = 0 and len ( encode ( ' ' . join ( rows ) , tokens ) [ 0 ] ) < max_length :
2023-02-07 20:08:21 -05:00
rows . insert ( 1 , f " { name2 } : { history [ ' internal ' ] [ i ] [ 1 ] . strip ( ) } \n " )
count + = 1
if not ( history [ ' internal ' ] [ i ] [ 0 ] == ' <|BEGIN-VISIBLE-CHAT|> ' ) :
rows . insert ( 1 , f " { name1 } : { history [ ' internal ' ] [ i ] [ 0 ] . strip ( ) } \n " )
count + = 1
i - = 1
if not impersonate :
rows . append ( f " { name1 } : { text } \n " )
rows . append ( apply_extensions ( f " { name2 } : " , " bot_prefix " ) )
limit = 3
else :
rows . append ( f " { name1 } : " )
limit = 2
2023-02-14 10:06:47 -05:00
while len ( rows ) > limit and len ( encode ( ' ' . join ( rows ) , tokens ) [ 0 ] ) > = max_length :
2023-02-07 20:08:21 -05:00
rows . pop ( 1 )
rows . pop ( 1 )
question = ' ' . join ( rows )
return question
def extract_message_from_reply ( question , reply , current , other , check , extensions = False ) :
next_character_found = False
substring_found = False
2023-02-19 05:52:18 -05:00
previous_idx = [ m . start ( ) for m in re . finditer ( f " (^| \n ) { re . escape ( current ) } : " , question ) ]
idx = [ m . start ( ) for m in re . finditer ( f " (^| \n ) { re . escape ( current ) } : " , reply ) ]
2023-02-07 20:08:21 -05:00
idx = idx [ len ( previous_idx ) - 1 ]
if extensions :
reply = reply [ idx + 1 + len ( apply_extensions ( f " { current } : " , " bot_prefix " ) ) : ]
else :
reply = reply [ idx + 1 + len ( f " { current } : " ) : ]
if check :
reply = reply . split ( ' \n ' ) [ 0 ] . strip ( )
else :
idx = reply . find ( f " \n { other } : " )
if idx != - 1 :
reply = reply [ : idx ]
next_character_found = True
reply = clean_chat_message ( reply )
# Detect if something like "\nYo" is generated just before
# "\nYou:" is completed
tmp = f " \n { other } : "
for j in range ( 1 , len ( tmp ) ) :
if reply [ - j : ] == tmp [ : j ] :
substring_found = True
return reply , next_character_found , substring_found
2023-02-15 11:50:18 -05:00
def generate_chat_picture ( picture , name1 , name2 ) :
text = f ' * { name1 } sends { name2 } a picture that contains the following: " { bot_picture . caption_image ( picture ) } " * '
buffer = BytesIO ( )
picture . save ( buffer , format = " JPEG " )
img_str = base64 . b64encode ( buffer . getvalue ( ) ) . decode ( ' utf-8 ' )
visible_text = f ' <img src= " data:image/jpeg;base64, { img_str } " > '
return text , visible_text
2023-02-16 19:21:45 -05:00
def stop_everything_event ( ) :
global stop_everything
stop_everything = True
2023-02-15 08:18:50 -05:00
def chatbot_wrapper ( text , tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping , name1 , name2 , context , check , chat_prompt_size , picture = None ) :
2023-02-16 19:21:45 -05:00
global stop_everything
stop_everything = False
2023-02-16 10:16:12 -05:00
if ' pygmalion ' in model_name . lower ( ) :
name1 = " You "
2023-02-14 18:38:21 -05:00
if args . picture and picture is not None :
2023-02-14 21:55:46 -05:00
text , visible_text = generate_chat_picture ( picture , name1 , name2 )
else :
visible_text = text
2023-02-20 11:25:46 -05:00
if args . chat :
visible_text = visible_text . replace ( ' \n ' , ' <br> ' )
2023-02-14 18:38:21 -05:00
2023-02-07 20:08:21 -05:00
text = apply_extensions ( text , " input " )
2023-02-15 08:18:50 -05:00
question = generate_chat_prompt ( text , tokens , name1 , name2 , context , chat_prompt_size )
2023-02-07 20:08:21 -05:00
eos_token = ' \n ' if check else None
2023-02-16 19:21:45 -05:00
first = True
2023-02-11 12:48:12 -05:00
for reply in generate_reply ( question , tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping , eos_token = eos_token , stopping_string = f " \n { name1 } : " ) :
2023-02-07 20:08:21 -05:00
reply , next_character_found , substring_found = extract_message_from_reply ( question , reply , name2 , name1 , check , extensions = True )
2023-02-16 19:21:45 -05:00
visible_reply = apply_extensions ( reply , " output " )
2023-02-20 11:25:46 -05:00
if args . chat :
visible_reply = visible_reply . replace ( ' \n ' , ' <br> ' )
2023-02-16 19:21:45 -05:00
# We need this global variable to handle the Stop event,
# otherwise gradio gets confused
if stop_everything :
return history [ ' visible ' ]
if first :
first = False
history [ ' internal ' ] . append ( [ ' ' , ' ' ] )
history [ ' visible ' ] . append ( [ ' ' , ' ' ] )
2023-02-07 20:08:21 -05:00
history [ ' internal ' ] [ - 1 ] = [ text , reply ]
2023-02-16 19:21:45 -05:00
history [ ' visible ' ] [ - 1 ] = [ visible_text , visible_reply ]
2023-02-07 20:08:21 -05:00
if not substring_found :
2023-02-14 21:55:46 -05:00
yield history [ ' visible ' ]
2023-02-07 20:08:21 -05:00
if next_character_found :
break
2023-02-14 21:55:46 -05:00
yield history [ ' visible ' ]
2023-02-07 20:08:21 -05:00
2023-02-15 08:18:50 -05:00
def impersonate_wrapper ( text , tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping , name1 , name2 , context , check , chat_prompt_size , picture = None ) :
2023-02-16 11:24:13 -05:00
if ' pygmalion ' in model_name . lower ( ) :
name1 = " You "
2023-02-15 08:18:50 -05:00
question = generate_chat_prompt ( text , tokens , name1 , name2 , context , chat_prompt_size , impersonate = True )
2023-02-07 20:08:21 -05:00
eos_token = ' \n ' if check else None
2023-02-11 12:48:12 -05:00
for reply in generate_reply ( question , tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping , eos_token = eos_token , stopping_string = f " \n { name2 } : " ) :
2023-02-07 20:08:21 -05:00
reply , next_character_found , substring_found = extract_message_from_reply ( question , reply , name1 , name2 , check , extensions = False )
if not substring_found :
2023-02-15 12:39:26 -05:00
yield reply
2023-02-07 20:08:21 -05:00
if next_character_found :
break
2023-02-15 12:39:26 -05:00
yield reply
2023-02-07 20:08:21 -05:00
2023-02-15 08:18:50 -05:00
def cai_chatbot_wrapper ( text , tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping , name1 , name2 , context , check , chat_prompt_size , picture = None ) :
for _history in chatbot_wrapper ( text , tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping , name1 , name2 , context , check , chat_prompt_size , picture ) :
2023-02-14 21:55:46 -05:00
yield generate_chat_html ( _history , name1 , name2 , character )
2023-02-07 20:08:21 -05:00
2023-02-15 08:18:50 -05:00
def regenerate_wrapper ( text , tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping , name1 , name2 , context , check , chat_prompt_size , picture = None ) :
2023-02-16 10:32:35 -05:00
if character is not None and len ( history [ ' visible ' ] ) == 1 :
2023-02-15 12:39:26 -05:00
if args . cai_chat :
yield generate_chat_html ( history [ ' visible ' ] , name1 , name2 , character )
else :
yield history [ ' visible ' ]
2023-02-16 10:32:35 -05:00
else :
last_visible = history [ ' visible ' ] . pop ( )
last_internal = history [ ' internal ' ] . pop ( )
for _history in chatbot_wrapper ( last_internal [ 0 ] , tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping , name1 , name2 , context , check , chat_prompt_size , picture ) :
if args . cai_chat :
history [ ' visible ' ] [ - 1 ] = [ last_visible [ 0 ] , _history [ - 1 ] [ 1 ] ]
yield generate_chat_html ( history [ ' visible ' ] , name1 , name2 , character )
else :
history [ ' visible ' ] [ - 1 ] = ( last_visible [ 0 ] , _history [ - 1 ] [ 1 ] )
yield history [ ' visible ' ]
2023-02-07 20:08:21 -05:00
def remove_last_message ( name1 , name2 ) :
if not history [ ' internal ' ] [ - 1 ] [ 0 ] == ' <|BEGIN-VISIBLE-CHAT|> ' :
last = history [ ' visible ' ] . pop ( )
history [ ' internal ' ] . pop ( )
else :
last = [ ' ' , ' ' ]
if args . cai_chat :
return generate_chat_html ( history [ ' visible ' ] , name1 , name2 , character ) , last [ 0 ]
else :
return history [ ' visible ' ] , last [ 0 ]
def send_last_reply_to_input ( ) :
2023-02-13 01:29:23 -05:00
if len ( history [ ' internal ' ] ) > 0 :
return history [ ' internal ' ] [ - 1 ] [ 1 ]
2023-02-07 20:08:21 -05:00
else :
return ' '
def replace_last_reply ( text , name1 , name2 ) :
if len ( history [ ' visible ' ] ) > 0 :
2023-02-11 05:59:54 -05:00
if args . cai_chat :
history [ ' visible ' ] [ - 1 ] [ 1 ] = text
else :
history [ ' visible ' ] [ - 1 ] = ( history [ ' visible ' ] [ - 1 ] [ 0 ] , text )
2023-02-07 20:08:21 -05:00
history [ ' internal ' ] [ - 1 ] [ 1 ] = apply_extensions ( text , " input " )
if args . cai_chat :
return generate_chat_html ( history [ ' visible ' ] , name1 , name2 , character )
else :
return history [ ' visible ' ]
def clear_html ( ) :
return generate_chat_html ( [ ] , " " , " " , character )
def clear_chat_log ( _character , name1 , name2 ) :
global history
if _character != ' None ' :
for i in range ( len ( history [ ' internal ' ] ) ) :
if ' <|BEGIN-VISIBLE-CHAT|> ' in history [ ' internal ' ] [ i ] [ 0 ] :
history [ ' visible ' ] = [ [ ' ' , history [ ' internal ' ] [ i ] [ 1 ] ] ]
history [ ' internal ' ] = history [ ' internal ' ] [ : i + 1 ]
break
else :
history [ ' internal ' ] = [ ]
history [ ' visible ' ] = [ ]
if args . cai_chat :
return generate_chat_html ( history [ ' visible ' ] , name1 , name2 , character )
else :
return history [ ' visible ' ]
2023-01-26 22:40:39 -05:00
2023-02-07 20:08:21 -05:00
def redraw_html ( name1 , name2 ) :
global history
return generate_chat_html ( history [ ' visible ' ] , name1 , name2 , character )
def tokenize_dialogue ( dialogue , name1 , name2 ) :
_history = [ ]
dialogue = re . sub ( ' <START> ' , ' ' , dialogue )
dialogue = re . sub ( ' <start> ' , ' ' , dialogue )
dialogue = re . sub ( ' ( \n |^)[Aa]non: ' , ' \\ 1You: ' , dialogue )
dialogue = re . sub ( ' ( \n |^) \ [CHARACTER \ ]: ' , f ' \\ g<1> { name2 } : ' , dialogue )
2023-02-20 17:38:19 -05:00
idx = [ m . start ( ) for m in re . finditer ( f " (^| \n )( { re . escape ( name1 ) } | { re . escape ( name2 ) } ): " , dialogue ) ]
2023-02-07 20:08:21 -05:00
if len ( idx ) == 0 :
return _history
messages = [ ]
for i in range ( len ( idx ) - 1 ) :
messages . append ( dialogue [ idx [ i ] : idx [ i + 1 ] ] . strip ( ) )
messages . append ( dialogue [ idx [ - 1 ] : ] . strip ( ) )
entry = [ ' ' , ' ' ]
for i in messages :
if i . startswith ( f ' { name1 } : ' ) :
entry [ 0 ] = i [ len ( f ' { name1 } : ' ) : ] . strip ( )
elif i . startswith ( f ' { name2 } : ' ) :
entry [ 1 ] = i [ len ( f ' { name2 } : ' ) : ] . strip ( )
if not ( len ( entry [ 0 ] ) == 0 and len ( entry [ 1 ] ) == 0 ) :
_history . append ( entry )
entry = [ ' ' , ' ' ]
print ( f " \033 [1;32;1m \n Dialogue tokenized to: \033 [0;37;0m \n " , end = ' ' )
for row in _history :
for column in row :
print ( " \n " )
for line in column . strip ( ) . split ( ' \n ' ) :
print ( " | " + line + " \n " )
print ( " | \n " )
print ( " ------------------------------ " )
return _history
2023-02-15 09:30:38 -05:00
def save_history ( timestamp = True ) :
if timestamp :
fname = f " { character or ' ' } { ' _ ' if character else ' ' } { datetime . now ( ) . strftime ( ' % Y % m %d - % H % M % S ' ) } .json "
else :
fname = f " { character or ' ' } { ' _ ' if character else ' ' } persistent.json "
2023-02-07 20:08:21 -05:00
if not Path ( ' logs ' ) . exists ( ) :
Path ( ' logs ' ) . mkdir ( )
with open ( Path ( f ' logs/ { fname } ' ) , ' w ' ) as f :
2023-02-18 09:07:55 -05:00
f . write ( json . dumps ( { ' data ' : history [ ' internal ' ] , ' data_visible ' : history [ ' visible ' ] } , indent = 2 ) )
2023-02-07 20:08:21 -05:00
return Path ( f ' logs/ { fname } ' )
def load_history ( file , name1 , name2 ) :
global history
file = file . decode ( ' utf-8 ' )
try :
j = json . loads ( file )
if ' data ' in j :
history [ ' internal ' ] = j [ ' data ' ]
if ' data_visible ' in j :
history [ ' visible ' ] = j [ ' data_visible ' ]
else :
history [ ' visible ' ] = copy . deepcopy ( history [ ' internal ' ] )
# Compatibility with Pygmalion AI's official web UI
elif ' chat ' in j :
history [ ' internal ' ] = [ ' : ' . join ( x . split ( ' : ' ) [ 1 : ] ) . strip ( ) for x in j [ ' chat ' ] ]
if len ( j [ ' chat ' ] ) > 0 and j [ ' chat ' ] [ 0 ] . startswith ( f ' { name2 } : ' ) :
history [ ' internal ' ] = [ [ ' <|BEGIN-VISIBLE-CHAT|> ' , history [ ' internal ' ] [ 0 ] ] ] + [ [ history [ ' internal ' ] [ i ] , history [ ' internal ' ] [ i + 1 ] ] for i in range ( 1 , len ( history [ ' internal ' ] ) - 1 , 2 ) ]
history [ ' visible ' ] = copy . deepcopy ( history [ ' internal ' ] )
history [ ' visible ' ] [ 0 ] [ 0 ] = ' '
else :
history [ ' internal ' ] = [ [ history [ ' internal ' ] [ i ] , history [ ' internal ' ] [ i + 1 ] ] for i in range ( 0 , len ( history [ ' internal ' ] ) - 1 , 2 ) ]
history [ ' visible ' ] = copy . deepcopy ( history [ ' internal ' ] )
except :
history [ ' internal ' ] = tokenize_dialogue ( file , name1 , name2 )
history [ ' visible ' ] = copy . deepcopy ( history [ ' internal ' ] )
def load_character ( _character , name1 , name2 ) :
global history , character
context = " "
history [ ' internal ' ] = [ ]
history [ ' visible ' ] = [ ]
if _character != ' None ' :
character = _character
data = json . loads ( open ( Path ( f ' characters/ { _character } .json ' ) , ' r ' ) . read ( ) )
name2 = data [ ' char_name ' ]
if ' char_persona ' in data and data [ ' char_persona ' ] != ' ' :
context + = f " { data [ ' char_name ' ] } ' s Persona: { data [ ' char_persona ' ] } \n "
if ' world_scenario ' in data and data [ ' world_scenario ' ] != ' ' :
context + = f " Scenario: { data [ ' world_scenario ' ] } \n "
context = f " { context . strip ( ) } \n <START> \n "
if ' example_dialogue ' in data and data [ ' example_dialogue ' ] != ' ' :
history [ ' internal ' ] = tokenize_dialogue ( data [ ' example_dialogue ' ] , name1 , name2 )
if ' char_greeting ' in data and len ( data [ ' char_greeting ' ] . strip ( ) ) > 0 :
history [ ' internal ' ] + = [ [ ' <|BEGIN-VISIBLE-CHAT|> ' , data [ ' char_greeting ' ] ] ]
history [ ' visible ' ] + = [ [ ' ' , apply_extensions ( data [ ' char_greeting ' ] , " output " ) ] ]
else :
history [ ' internal ' ] + = [ [ ' <|BEGIN-VISIBLE-CHAT|> ' , " Hello there! " ] ]
history [ ' visible ' ] + = [ [ ' ' , " Hello there! " ] ]
else :
character = None
context = settings [ ' context_pygmalion ' ]
name2 = settings [ ' name2_pygmalion ' ]
2023-02-15 09:30:38 -05:00
if Path ( f ' logs/ { character } _persistent.json ' ) . exists ( ) :
load_history ( open ( Path ( f ' logs/ { character } _persistent.json ' ) , ' rb ' ) . read ( ) , name1 , name2 )
2023-02-07 20:08:21 -05:00
if args . cai_chat :
return name2 , context , generate_chat_html ( history [ ' visible ' ] , name1 , name2 , character )
else :
return name2 , context , history [ ' visible ' ]
def upload_character ( json_file , img , tavern = False ) :
json_file = json_file if type ( json_file ) == str else json_file . decode ( ' utf-8 ' )
data = json . loads ( json_file )
outfile_name = data [ " char_name " ]
i = 1
while Path ( f ' characters/ { outfile_name } .json ' ) . exists ( ) :
outfile_name = f ' { data [ " char_name " ] } _ { i : 03d } '
i + = 1
if tavern :
outfile_name = f ' TavernAI- { outfile_name } '
with open ( Path ( f ' characters/ { outfile_name } .json ' ) , ' w ' ) as f :
f . write ( json_file )
if img is not None :
img = Image . open ( io . BytesIO ( img ) )
img . save ( Path ( f ' characters/ { outfile_name } .png ' ) )
print ( f ' New character saved to " characters/ { outfile_name } .json " . ' )
return outfile_name
def upload_tavern_character ( img , name1 , name2 ) :
_img = Image . open ( io . BytesIO ( img ) )
_img . getexif ( )
decoded_string = base64 . b64decode ( _img . info [ ' chara ' ] )
_json = json . loads ( decoded_string )
_json = { " char_name " : _json [ ' name ' ] , " char_persona " : _json [ ' description ' ] , " char_greeting " : _json [ " first_mes " ] , " example_dialogue " : _json [ ' mes_example ' ] , " world_scenario " : _json [ ' scenario ' ] }
_json [ ' example_dialogue ' ] = _json [ ' example_dialogue ' ] . replace ( ' {{ user}} ' , name1 ) . replace ( ' {{ char}} ' , _json [ ' char_name ' ] )
return upload_character ( json . dumps ( _json ) , img , tavern = True )
def upload_your_profile_picture ( img ) :
img = Image . open ( io . BytesIO ( img ) )
img . save ( Path ( f ' img_me.png ' ) )
print ( f ' Profile picture saved to " img_me.png " ' )
# Global variables
2023-01-21 22:49:59 -05:00
available_models = get_available_models ( )
available_presets = get_available_presets ( )
available_characters = get_available_characters ( )
2023-01-26 22:40:39 -05:00
available_extensions = get_available_extensions ( )
2023-02-13 13:25:16 -05:00
available_softprompts = get_available_softprompts ( )
2023-01-26 22:40:39 -05:00
extension_state = { }
if args . extensions is not None :
for i , ext in enumerate ( args . extensions . split ( ' , ' ) ) :
if ext in available_extensions :
2023-01-27 08:53:05 -05:00
print ( f ' Loading the extension " { ext } " ... ' , end = ' ' )
ext_string = f " extensions. { ext } .script "
exec ( f " import { ext_string } " )
2023-01-26 22:40:39 -05:00
extension_state [ ext ] = [ True , i ]
2023-01-27 08:53:05 -05:00
print ( f ' Ok. ' )
2023-01-21 22:49:59 -05:00
2023-01-06 17:56:44 -05:00
# Choosing the default model
if args . model is not None :
model_name = args . model
else :
2023-01-06 20:05:37 -05:00
if len ( available_models ) == 0 :
2023-01-06 17:56:44 -05:00
print ( " No models are available! Please download at least one. " )
2023-01-30 12:17:12 -05:00
sys . exit ( 0 )
2023-01-06 17:56:44 -05:00
elif len ( available_models ) == 1 :
i = 0
else :
print ( " The following models are available: \n " )
for i , model in enumerate ( available_models ) :
print ( f " { i + 1 } . { model } " )
print ( f " \n Which one do you want to load? 1- { len ( available_models ) } \n " )
i = int ( input ( ) ) - 1
2023-01-09 10:56:54 -05:00
print ( )
2023-01-06 17:56:44 -05:00
model_name = available_models [ i ]
2022-12-21 11:27:31 -05:00
model , tokenizer = load_model ( model_name )
2023-02-16 19:21:45 -05:00
loaded_preset = None
soft_prompt_tensor = None
2023-02-13 13:25:16 -05:00
soft_prompt = False
2023-02-16 19:21:45 -05:00
stop_everything = False
2023-01-06 17:56:44 -05:00
2023-01-08 18:10:31 -05:00
# UI settings
2023-02-12 07:46:34 -05:00
if model_name . lower ( ) . startswith ( ( ' gpt4chan ' , ' gpt-4chan ' , ' 4chan ' ) ) :
default_text = settings [ ' prompt_gpt4chan ' ]
elif re . match ( ' (rosey|chip|joi)_.*_instruct.* ' , model_name . lower ( ) ) is not None :
default_text = ' User: \n '
else :
default_text = settings [ ' prompt ' ]
2023-01-15 13:23:41 -05:00
description = f " \n \n # Text generation lab \n Generate text using Large Language Models. \n "
2023-02-15 18:20:56 -05:00
2023-02-07 22:19:20 -05:00
suffix = ' _pygmalion ' if ' pygmalion ' in model_name . lower ( ) else ' '
2023-01-29 10:02:44 -05:00
buttons = { }
2023-01-29 12:27:22 -05:00
gen_events = [ ]
2023-02-07 20:08:21 -05:00
history = { ' internal ' : [ ] , ' visible ' : [ ] }
character = None
2023-01-19 12:03:47 -05:00
2023-02-07 20:08:21 -05:00
if args . chat or args . cai_chat :
2023-02-15 09:30:38 -05:00
if Path ( f ' logs/persistent.json ' ) . exists ( ) :
load_history ( open ( Path ( f ' logs/persistent.json ' ) , ' rb ' ) . read ( ) , settings [ f ' name1 { suffix } ' ] , settings [ f ' name2 { suffix } ' ] )
2023-02-15 10:58:11 -05:00
with gr . Blocks ( css = css + chat_css , analytics_enabled = False ) as interface :
2023-01-15 16:16:46 -05:00
if args . cai_chat :
2023-02-15 09:30:38 -05:00
display = gr . HTML ( value = generate_chat_html ( history [ ' visible ' ] , settings [ f ' name1 { suffix } ' ] , settings [ f ' name2 { suffix } ' ] , character ) )
2023-01-15 16:16:46 -05:00
else :
2023-02-16 10:45:05 -05:00
display = gr . Chatbot ( value = history [ ' visible ' ] )
2023-01-23 12:04:01 -05:00
textbox = gr . Textbox ( label = ' Input ' )
2023-01-09 15:23:43 -05:00
with gr . Row ( ) :
2023-01-29 10:02:44 -05:00
buttons [ " Stop " ] = gr . Button ( " Stop " )
2023-02-04 20:53:42 -05:00
buttons [ " Generate " ] = gr . Button ( " Generate " )
2023-01-29 10:02:44 -05:00
buttons [ " Regenerate " ] = gr . Button ( " Regenerate " )
2023-02-04 20:53:42 -05:00
with gr . Row ( ) :
buttons [ " Impersonate " ] = gr . Button ( " Impersonate " )
2023-01-29 10:02:44 -05:00
buttons [ " Remove last " ] = gr . Button ( " Remove last " )
2023-02-15 14:49:52 -05:00
buttons [ " Clear history " ] = gr . Button ( " Clear history " )
2023-01-29 10:02:44 -05:00
with gr . Row ( ) :
buttons [ " Send last reply to input " ] = gr . Button ( " Send last reply to input " )
buttons [ " Replace last reply " ] = gr . Button ( " Replace last reply " )
2023-02-14 18:38:21 -05:00
if args . picture :
with gr . Row ( ) :
2023-02-14 21:55:46 -05:00
picture_select = gr . Image ( label = " Send a picture " , type = ' pil ' )
2023-01-13 13:02:17 -05:00
2023-02-15 18:55:32 -05:00
with gr . Tab ( " Chat settings " ) :
name1 = gr . Textbox ( value = settings [ f ' name1 { suffix } ' ] , lines = 1 , label = ' Your name ' )
name2 = gr . Textbox ( value = settings [ f ' name2 { suffix } ' ] , lines = 1 , label = ' Bot \' s name ' )
context = gr . Textbox ( value = settings [ f ' context { suffix } ' ] , lines = 2 , label = ' Context ' )
with gr . Row ( ) :
character_menu = gr . Dropdown ( choices = available_characters , value = " None " , label = ' Character ' )
create_refresh_button ( character_menu , lambda : None , lambda : { " choices " : get_available_characters ( ) } , " refresh-button " )
2023-01-15 16:16:46 -05:00
2023-02-15 18:55:32 -05:00
with gr . Row ( ) :
check = gr . Checkbox ( value = settings [ f ' stop_at_newline { suffix } ' ] , label = ' Stop generating at new line character? ' )
with gr . Row ( ) :
with gr . Tab ( ' Chat history ' ) :
with gr . Row ( ) :
with gr . Column ( ) :
gr . Markdown ( ' Upload ' )
2023-02-17 08:27:41 -05:00
upload_chat_history = gr . File ( type = ' binary ' , file_types = [ " .json " , " .txt " ] )
2023-02-15 18:55:32 -05:00
with gr . Column ( ) :
gr . Markdown ( ' Download ' )
download = gr . File ( )
buttons [ " Download " ] = gr . Button ( value = " Click me " )
with gr . Tab ( ' Upload character ' ) :
with gr . Row ( ) :
with gr . Column ( ) :
gr . Markdown ( ' 1. Select the JSON file ' )
2023-02-17 08:17:15 -05:00
upload_char = gr . File ( type = ' binary ' , file_types = [ " .json " ] )
2023-02-15 18:55:32 -05:00
with gr . Column ( ) :
gr . Markdown ( ' 2. Select your character \' s profile picture (optional) ' )
2023-02-17 08:17:15 -05:00
upload_img = gr . File ( type = ' binary ' , file_types = [ " image " ] )
2023-02-15 18:55:32 -05:00
buttons [ " Upload character " ] = gr . Button ( value = " Submit " )
with gr . Tab ( ' Upload your profile picture ' ) :
2023-02-17 08:17:15 -05:00
upload_img_me = gr . File ( type = ' binary ' , file_types = [ " image " ] )
2023-02-15 18:55:32 -05:00
with gr . Tab ( ' Upload TavernAI Character Card ' ) :
2023-02-17 08:17:15 -05:00
upload_img_tavern = gr . File ( type = ' binary ' , file_types = [ " image " ] )
2023-02-15 18:55:32 -05:00
with gr . Tab ( " Generation settings " ) :
with gr . Row ( ) :
with gr . Column ( ) :
max_new_tokens = gr . Slider ( minimum = settings [ ' max_new_tokens_min ' ] , maximum = settings [ ' max_new_tokens_max ' ] , step = 1 , label = ' max_new_tokens ' , value = settings [ ' max_new_tokens ' ] )
with gr . Column ( ) :
chat_prompt_size_slider = gr . Slider ( minimum = settings [ ' chat_prompt_size_min ' ] , maximum = settings [ ' chat_prompt_size_max ' ] , step = 1 , label = ' Maximum prompt size in tokens ' , value = settings [ ' chat_prompt_size ' ] )
2023-01-21 22:02:46 -05:00
2023-02-15 18:55:32 -05:00
preset_menu , do_sample , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping = create_settings_menus ( )
2023-01-15 16:16:46 -05:00
2023-01-28 21:00:51 -05:00
if args . extensions is not None :
2023-02-15 18:55:32 -05:00
with gr . Tab ( " Extensions " ) :
create_extensions_block ( )
2023-01-28 21:00:51 -05:00
2023-02-15 08:18:50 -05:00
input_params = [ textbox , max_new_tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping , name1 , name2 , context , check , chat_prompt_size_slider ]
2023-02-14 21:55:46 -05:00
if args . picture :
input_params . append ( picture_select )
2023-02-15 10:46:11 -05:00
function_call = " cai_chatbot_wrapper " if args . cai_chat else " chatbot_wrapper "
gen_events . append ( buttons [ " Generate " ] . click ( eval ( function_call ) , input_params , display , show_progress = args . no_stream , api_name = " textgen " ) )
gen_events . append ( textbox . submit ( eval ( function_call ) , input_params , display , show_progress = args . no_stream ) )
if args . picture :
2023-02-17 08:17:15 -05:00
picture_select . upload ( eval ( function_call ) , input_params , display , show_progress = args . no_stream )
2023-02-14 21:55:46 -05:00
gen_events . append ( buttons [ " Regenerate " ] . click ( regenerate_wrapper , input_params , display , show_progress = args . no_stream ) )
2023-01-29 21:05:17 -05:00
gen_events . append ( buttons [ " Impersonate " ] . click ( impersonate_wrapper , input_params , textbox , show_progress = args . no_stream ) )
2023-02-16 19:21:45 -05:00
buttons [ " Stop " ] . click ( stop_everything_event , [ ] , [ ] , cancels = gen_events )
2023-01-29 10:02:44 -05:00
buttons [ " Send last reply to input " ] . click ( send_last_reply_to_input , [ ] , textbox , show_progress = args . no_stream )
2023-01-29 12:27:22 -05:00
buttons [ " Replace last reply " ] . click ( replace_last_reply , [ textbox , name1 , name2 ] , display , show_progress = args . no_stream )
2023-02-15 14:49:52 -05:00
buttons [ " Clear history " ] . click ( clear_chat_log , [ character_menu , name1 , name2 ] , display )
2023-01-29 12:27:22 -05:00
buttons [ " Remove last " ] . click ( remove_last_message , [ name1 , name2 ] , [ display , textbox ] , show_progress = False )
buttons [ " Download " ] . click ( save_history , inputs = [ ] , outputs = [ download ] )
buttons [ " Upload character " ] . click ( upload_character , [ upload_char , upload_img ] , [ character_menu ] )
2023-02-15 09:38:44 -05:00
2023-02-15 10:46:11 -05:00
# Clearing stuff and saving the history
2023-01-29 10:02:44 -05:00
for i in [ " Generate " , " Regenerate " , " Replace last reply " ] :
buttons [ i ] . click ( lambda x : " " , textbox , textbox , show_progress = False )
2023-02-15 09:30:38 -05:00
buttons [ i ] . click ( lambda : save_history ( timestamp = False ) , [ ] , [ ] , show_progress = False )
2023-02-15 14:49:52 -05:00
buttons [ " Clear history " ] . click ( lambda : save_history ( timestamp = False ) , [ ] , [ ] , show_progress = False )
2023-01-07 23:33:45 -05:00
textbox . submit ( lambda x : " " , textbox , textbox , show_progress = False )
2023-02-15 09:30:38 -05:00
textbox . submit ( lambda : save_history ( timestamp = False ) , [ ] , [ ] , show_progress = False )
2023-02-15 09:38:44 -05:00
2023-01-29 12:27:22 -05:00
character_menu . change ( load_character , [ character_menu , name1 , name2 ] , [ name2 , context , display ] )
2023-02-17 08:27:41 -05:00
upload_chat_history . upload ( load_history , [ upload_chat_history , name1 , name2 ] , [ ] )
upload_img_tavern . upload ( upload_tavern_character , [ upload_img_tavern , name1 , name2 ] , [ character_menu ] )
upload_img_me . upload ( upload_your_profile_picture , [ upload_img_me ] , [ ] )
2023-02-14 21:55:46 -05:00
if args . picture :
2023-02-17 08:27:41 -05:00
picture_select . upload ( lambda : None , [ ] , [ picture_select ] , show_progress = False )
2023-01-19 13:05:42 -05:00
if args . cai_chat :
2023-02-17 08:27:41 -05:00
upload_chat_history . upload ( redraw_html , [ name1 , name2 ] , [ display ] )
upload_img_me . upload ( redraw_html , [ name1 , name2 ] , [ display ] )
2023-01-19 13:05:42 -05:00
else :
2023-02-17 08:27:41 -05:00
upload_chat_history . upload ( lambda : history [ ' visible ' ] , [ ] , [ display ] )
upload_img_me . upload ( lambda : history [ ' visible ' ] , [ ] , [ display ] )
2023-01-19 12:03:47 -05:00
2023-01-18 20:44:47 -05:00
elif args . notebook :
with gr . Blocks ( css = css , analytics_enabled = False ) as interface :
gr . Markdown ( description )
with gr . Tab ( ' Raw ' ) :
textbox = gr . Textbox ( value = default_text , lines = 23 )
with gr . Tab ( ' Markdown ' ) :
markdown = gr . Markdown ( )
with gr . Tab ( ' HTML ' ) :
html = gr . HTML ( )
2023-02-07 20:08:21 -05:00
2023-01-29 10:02:44 -05:00
buttons [ " Generate " ] = gr . Button ( " Generate " )
buttons [ " Stop " ] = gr . Button ( " Stop " )
2023-01-10 23:33:57 -05:00
2023-02-07 20:08:21 -05:00
max_new_tokens = gr . Slider ( minimum = settings [ ' max_new_tokens_min ' ] , maximum = settings [ ' max_new_tokens_max ' ] , step = 1 , label = ' max_new_tokens ' , value = settings [ ' max_new_tokens ' ] )
2023-02-11 12:48:12 -05:00
preset_menu , do_sample , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping = create_settings_menus ( )
2023-01-18 20:44:47 -05:00
2023-01-29 07:48:18 -05:00
if args . extensions is not None :
2023-01-29 10:05:18 -05:00
create_extensions_block ( )
2023-01-29 07:48:18 -05:00
2023-02-11 12:48:12 -05:00
gen_events . append ( buttons [ " Generate " ] . click ( generate_reply , [ textbox , max_new_tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping ] , [ textbox , markdown , html ] , show_progress = args . no_stream , api_name = " textgen " ) )
gen_events . append ( textbox . submit ( generate_reply , [ textbox , max_new_tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping ] , [ textbox , markdown , html ] , show_progress = args . no_stream ) )
2023-01-29 12:27:22 -05:00
buttons [ " Stop " ] . click ( None , None , None , cancels = gen_events )
2023-01-18 20:44:47 -05:00
else :
2023-01-08 18:10:31 -05:00
with gr . Blocks ( css = css , analytics_enabled = False ) as interface :
gr . Markdown ( description )
2023-01-06 20:05:37 -05:00
with gr . Row ( ) :
with gr . Column ( ) :
textbox = gr . Textbox ( value = default_text , lines = 15 , label = ' Input ' )
2023-02-07 20:08:21 -05:00
max_new_tokens = gr . Slider ( minimum = settings [ ' max_new_tokens_min ' ] , maximum = settings [ ' max_new_tokens_max ' ] , step = 1 , label = ' max_new_tokens ' , value = settings [ ' max_new_tokens ' ] )
2023-01-29 10:02:44 -05:00
buttons [ " Generate " ] = gr . Button ( " Generate " )
2023-01-18 20:44:47 -05:00
with gr . Row ( ) :
with gr . Column ( ) :
2023-01-29 12:27:22 -05:00
buttons [ " Continue " ] = gr . Button ( " Continue " )
2023-01-18 20:44:47 -05:00
with gr . Column ( ) :
2023-01-29 10:02:44 -05:00
buttons [ " Stop " ] = gr . Button ( " Stop " )
2023-02-07 20:08:21 -05:00
2023-02-11 12:48:12 -05:00
preset_menu , do_sample , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping = create_settings_menus ( )
2023-01-29 07:48:18 -05:00
if args . extensions is not None :
2023-01-29 10:05:18 -05:00
create_extensions_block ( )
2023-01-29 07:48:18 -05:00
2023-01-06 20:05:37 -05:00
with gr . Column ( ) :
with gr . Tab ( ' Raw ' ) :
2023-01-10 23:36:11 -05:00
output_textbox = gr . Textbox ( lines = 15 , label = ' Output ' )
2023-01-06 20:05:37 -05:00
with gr . Tab ( ' Markdown ' ) :
markdown = gr . Markdown ( )
2023-01-06 21:14:08 -05:00
with gr . Tab ( ' HTML ' ) :
html = gr . HTML ( )
2023-01-06 20:05:37 -05:00
2023-02-11 12:48:12 -05:00
gen_events . append ( buttons [ " Generate " ] . click ( generate_reply , [ textbox , max_new_tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping ] , [ output_textbox , markdown , html ] , show_progress = args . no_stream , api_name = " textgen " ) )
gen_events . append ( textbox . submit ( generate_reply , [ textbox , max_new_tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping ] , [ output_textbox , markdown , html ] , show_progress = args . no_stream ) )
gen_events . append ( buttons [ " Continue " ] . click ( generate_reply , [ output_textbox , max_new_tokens , do_sample , max_new_tokens , temperature , top_p , typical_p , repetition_penalty , top_k , min_length , no_repeat_ngram_size , num_beams , penalty_alpha , length_penalty , early_stopping ] , [ output_textbox , markdown , html ] , show_progress = args . no_stream ) )
2023-01-29 12:27:22 -05:00
buttons [ " Stop " ] . click ( None , None , None , cancels = gen_events )
2022-12-21 11:27:31 -05:00
2023-01-25 14:10:35 -05:00
interface . queue ( )
2023-01-20 21:45:16 -05:00
if args . listen :
2023-02-07 20:08:21 -05:00
interface . launch ( prevent_thread_lock = True , share = args . share , server_name = " 0.0.0.0 " , server_port = args . listen_port )
2023-01-20 21:45:16 -05:00
else :
2023-02-07 20:08:21 -05:00
interface . launch ( prevent_thread_lock = True , share = args . share , server_port = args . listen_port )
# I think that I will need this later
while True :
time . sleep ( 0.5 )