From 2c1fd0d72bf96bc7603fa5ab0d9cde098b339d41 Mon Sep 17 00:00:00 2001 From: SeanScripts <64337075+SeanScripts@users.noreply.github.com> Date: Sun, 20 Aug 2023 18:28:14 -0500 Subject: [PATCH] Add probability dropdown to perplexity_colors extension (#3148) --- extensions/perplexity_colors/script.py | 335 ++++++++++++++++++------- 1 file changed, 241 insertions(+), 94 deletions(-) diff --git a/extensions/perplexity_colors/script.py b/extensions/perplexity_colors/script.py index 84b62a30..d106fabd 100644 --- a/extensions/perplexity_colors/script.py +++ b/extensions/perplexity_colors/script.py @@ -1,17 +1,25 @@ +import re +import time + import gradio +import markdown +import numpy as np import torch from transformers import LogitsProcessor -import numpy as np -from modules import shared +from modules import html_generator, shared +from modules.html_generator import replace_blockquote params = { + 'active': True, 'color_by_perplexity': False, 'color_by_probability': False, - 'ppl_scale': 15.0, # No slider for this right now, because I don't think it really needs to be changed. Very large perplexity scores don't show up often. - #'probability_dropdown': False + 'ppl_scale': 15.0, # No slider for this right now, because I don't think it really needs to be changed. Very large perplexity scores don't show up often. + 'probability_dropdown': False, + 'verbose': False # For debugging mostly } + class PerplexityLogits(LogitsProcessor): def __init__(self, verbose=False): self.generated_token_ids = [] @@ -23,9 +31,10 @@ class PerplexityLogits(LogitsProcessor): self.verbose = verbose def __call__(self, input_ids, scores): + # t0 = time.time() probs = torch.softmax(scores, dim=-1, dtype=torch.float) - log_probs = torch.nan_to_num(torch.log(probs)) - entropy = -torch.sum(probs*log_probs) + log_probs = torch.nan_to_num(torch.log(probs)) # Note: This is to convert log(0) nan to 0, but probs*log_probs makes this 0 not affect the perplexity. + entropy = -torch.sum(probs * log_probs) entropy = entropy.cpu().numpy() perplexity = round(float(np.exp(entropy)), 4) self.perplexities_list.append(perplexity) @@ -36,25 +45,25 @@ class PerplexityLogits(LogitsProcessor): if len(self.selected_probs) > 0: # Is the selected token in the top tokens? if self.verbose: - print(shared.tokenizer.decode(last_token_id)) - print([shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1]]) - print(self.top_probs_list[-1]) - if last_token_id in self.top_token_ids_list[-1]: - idx = self.top_token_ids_list[-1].index(last_token_id) - self.selected_probs.append(self.top_probs_list[-1][idx]) + print('Probs: Token after', shared.tokenizer.decode(last_token_id)) + print('Probs:', [shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1][0]]) + print('Probs:', [round(float(prob), 4) for prob in self.top_probs_list[-1][0]]) + if last_token_id in self.top_token_ids_list[-1][0]: + idx = self.top_token_ids_list[-1][0].index(last_token_id) + self.selected_probs.append(self.top_probs_list[-1][0][idx]) else: - self.top_token_ids_list[-1].append(last_token_id) + self.top_token_ids_list[-1][0].append(last_token_id) last_prob = round(float(self.last_probs[last_token_id]), 4) - self.top_probs_list[-1].append(last_prob) + self.top_probs_list[-1][0].append(last_prob) self.selected_probs.append(last_prob) else: - self.selected_probs.append(1.0) # Placeholder for the last token of the prompt + self.selected_probs.append(1.0) # Placeholder for the last token of the prompt if self.verbose: pplbar = "-" if not np.isnan(perplexity): - pplbar = "*"*round(perplexity) - print(f"{last_token}\t{perplexity:.2f}\t{pplbar}") + pplbar = "*" * round(perplexity) + print(f"PPL: Token after {shared.tokenizer.decode(last_token_id)}\t{perplexity:.2f}\t{pplbar}") # Get top 5 probabilities top_tokens_and_probs = torch.topk(probs, 5) @@ -63,153 +72,291 @@ class PerplexityLogits(LogitsProcessor): self.top_token_ids_list.append(top_token_ids) self.top_probs_list.append(top_probs) - - probs = probs.cpu().numpy().flatten() - self.last_probs = probs # Need to keep this as a reference for top probs + probs = probs.cpu().numpy().flatten() + self.last_probs = probs # Need to keep this as a reference for top probs + + # t1 = time.time() + # print(f"PPL Processor: {(t1-t0):.3f} s") + # About 1 ms, though occasionally up to around 100 ms, not sure why... # Doesn't actually modify the logits! return scores + # Stores the perplexity and top probabilities ppl_logits_processor = None + def logits_processor_modifier(logits_processor_list, input_ids): global ppl_logits_processor - ppl_logits_processor = PerplexityLogits() - logits_processor_list.append(ppl_logits_processor) + if params['active']: + ppl_logits_processor = PerplexityLogits(verbose=params['verbose']) + logits_processor_list.append(ppl_logits_processor) + def output_modifier(text): global ppl_logits_processor + # t0 = time.time() + + if not params['active']: + return text # TODO: It's probably more efficient to do this above rather than modifying all these lists # Remove last element of perplexities_list, top_token_ids_list, top_tokens_list, top_probs_list since everything is off by one because this extension runs before generation perplexities = ppl_logits_processor.perplexities_list[:-1] top_token_ids_list = ppl_logits_processor.top_token_ids_list[:-1] - top_tokens_list = [[shared.tokenizer.decode(token_id) for token_id in top_token_ids] for top_token_ids in top_token_ids_list] + top_tokens_list = [[shared.tokenizer.decode(token_id) for token_id in top_token_ids[0]] for top_token_ids in top_token_ids_list] top_probs_list = ppl_logits_processor.top_probs_list[:-1] # Remove first element of generated_token_ids, generated_tokens, selected_probs because they are for the last token of the prompt gen_token_ids = ppl_logits_processor.generated_token_ids[1:] gen_tokens = [shared.tokenizer.decode(token_id) for token_id in gen_token_ids] sel_probs = ppl_logits_processor.selected_probs[1:] - end_part = '' # Helps with finding the index after replacing part of the text. - in_code = False # Since the tags mess up code blocks, avoid coloring while inside a code block, based on finding tokens with '`' in them + end_part = '' if params['probability_dropdown'] else '' # Helps with finding the index after replacing part of the text. + in_code = False # Since the tags mess up code blocks, avoid coloring while inside a code block, based on finding tokens with '`' in them - if params['color_by_probability'] and params['color_by_perplexity']: - i = 0 - for token, prob, ppl, top_tokens, top_probs in zip(gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list): - if '`' in token: - in_code = not in_code - continue - if in_code: - continue + i = 0 + for token, prob, ppl, top_tokens, top_probs in zip(gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list): + if '`' in token and not params['probability_dropdown']: + in_code = not in_code + continue + if in_code: + continue + color = 'ffffff' + if params['color_by_probability'] and params['color_by_perplexity']: color = probability_perplexity_color_scale(prob, ppl) - if token in text[i:]: - text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1) - i += text[i:].find(end_part) + len(end_part) - elif params['color_by_perplexity']: - i = 0 - for token, ppl, top_tokens, top_probs in zip(gen_tokens, perplexities, top_tokens_list, top_probs_list): - if '`' in token: - in_code = not in_code - continue - if in_code: - continue + elif params['color_by_perplexity']: color = perplexity_color_scale(ppl) - if token in text[i:]: - text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1) - i += text[i:].find(end_part) + len(end_part) - elif params['color_by_probability']: - i = 0 - for token, prob, top_tokens, top_probs in zip(gen_tokens, sel_probs, top_tokens_list, top_probs_list): - if '`' in token: - in_code = not in_code - continue - if in_code: - continue + elif params['color_by_probability']: color = probability_color_scale(prob) - if token in text[i:]: + if token in text[i:]: + if params['probability_dropdown']: + after_token_index = text[i:].find(token) + len(token) + whitespace = text[i:][after_token_index:(after_token_index + 1)] + if whitespace != ' ': + whitespace = '' + text = text[:i] + text[i:].replace(token, add_dropdown_html(token, color, top_tokens, top_probs[0], whitespace, ppl), 1) + else: text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1) - i += text[i:].find(end_part) + len(end_part) + i += text[i:].find(end_part) + len(end_part) - print('Average perplexity:', round(np.mean(perplexities), 4)) + # Use full perplexity list for calculating the average here. + print('Average perplexity:', round(np.mean(ppl_logits_processor.perplexities_list[:-1]), 4)) + # Optional hacky workaround: Without this, spaces get added between every token. With this, there is a little extra whitespace at the top. + # This fixes the tokenization spaces, somehow. However, this also removes any paragraph breaks in the message. + # return '

' + text + '

' + # t1 = time.time() + # print(f"Modifier: {(t1-t0):.3f} s") + # About 50 ms return text -# Green-yellow-red color scale + def probability_color_scale(prob): + ''' + Green-yellow-red color scale + ''' + rv = 0 gv = 0 if prob <= 0.5: rv = 'ff' - gv = hex(int(255*prob*2))[2:] + gv = hex(int(255 * prob * 2))[2:] if len(gv) < 2: - gv = '0'*(2 - len(gv)) + gv + gv = '0' * (2 - len(gv)) + gv else: - rv = hex(int(255 - 255*(prob - 0.5)*2))[2:] + rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:] gv = 'ff' if len(rv) < 2: - rv = '0'*(2 - len(rv)) + rv + rv = '0' * (2 - len(rv)) + rv + return rv + gv + '00' -# Red component only, white for 0 perplexity (sorry if you're not in dark mode) + def perplexity_color_scale(ppl): - value = hex(max(int(255.0 - params['ppl_scale']*(float(ppl)-1.0)), 0))[2:] + ''' + Red component only, white for 0 perplexity (sorry if you're not in dark mode) + ''' + value = hex(max(int(255.0 - params['ppl_scale'] * (float(ppl) - 1.0)), 0))[2:] if len(value) < 2: - value = '0'*(2 - len(value)) + value + value = '0' * (2 - len(value)) + value + return 'ff' + value + value -# Green-yellow-red for probability and blue component for perplexity + def probability_perplexity_color_scale(prob, ppl): + ''' + Green-yellow-red for probability and blue component for perplexity + ''' + rv = 0 gv = 0 - bv = hex(min(max(int(params['ppl_scale']*(float(ppl)-1.0)), 0), 255))[2:] + bv = hex(min(max(int(params['ppl_scale'] * (float(ppl) - 1.0)), 0), 255))[2:] if len(bv) < 2: - bv = '0'*(2 - len(bv)) + bv + bv = '0' * (2 - len(bv)) + bv + if prob <= 0.5: rv = 'ff' - gv = hex(int(255*prob*2))[2:] + gv = hex(int(255 * prob * 2))[2:] if len(gv) < 2: - gv = '0'*(2 - len(gv)) + gv + gv = '0' * (2 - len(gv)) + gv else: - rv = hex(int(255 - 255*(prob - 0.5)*2))[2:] + rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:] gv = 'ff' if len(rv) < 2: - rv = '0'*(2 - len(rv)) + rv + rv = '0' * (2 - len(rv)) + rv + return rv + gv + bv + def add_color_html(token, color): return f'{token}' -""" -# This is still very broken at the moment, needs CSS too but I'm not very good at CSS (and neither is GPT-4 apparently) so I still need to figure that out. -def add_dropdown_html(token, color, top_tokens, top_probs): - html = f'{token}' - return html -""" + row_class = ' class="selected"' if token_option == token else '' + html += f'{token_option}{prob:.4f}' + if perplexity != 0: + ppl_color = perplexity_color_scale(perplexity) + html += f'Perplexity:{perplexity:.4f}' + html += '\n' # The newline would normally be added by markdown.markdown() but this is faster. + return html # About 750 characters per token... + + +def custom_css(): + return """ + .dropdown { + display: none; + position: absolute; + z-index: 50; + background-color: var(--block-background-fill); + box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2); + width: max-content; + overflow: visible; + padding: 5px; + border-radius: 10px; + border: 1px solid var(--border-color-primary); + } + + .dropdown-content { + border: none; + z-index: 50; + } + + .dropdown-content tr.selected { + background-color: var(--block-label-background-fill); + } + + .dropdown-content td { + color: var(--body-text-color); + } + + .hoverable { + color: var(--body-text-color); + position: relative; + display: inline-block; + overflow: visible; + font-size: 15px; + line-height: 1.75; + margin: 0; + padding: 0; + margin-right: -4px; + } + + .hoverable:hover .dropdown { + display: block; + } + + # TODO: This makes the hover menus extend outside the bounds of the chat area, which is good. + # However, it also makes the scrollbar disappear, which is bad. + # The scroll bar needs to still be present. So for now, we can't see dropdowns that extend past the edge of the chat area. + #.chat { + # overflow-y: auto; + #} + """ + +# Monkeypatch applied to html_generator.py +# This fixes an issue where the markdown conversion was causing a large slowdown in generation speeds if too many tokens had probability dropdowns added. +# I'd rather have a more long-term solution, since this really shouldn't be called on all messages for each token, but this works for now. +def convert_to_markdown(string): + # t0 = time.time() + # Blockquote + pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL) + string = pattern.sub(replace_blockquote, string) + + # Code + string = string.replace('\\begin{code}', '```') + string = string.replace('\\end{code}', '```') + string = re.sub(r"(.)```", r"\1\n```", string) + + result = '' + is_code = False + for line in string.split('\n'): + if line.lstrip(' ').startswith('```'): + is_code = not is_code + + result += line + if is_code or line.startswith('|'): # Don't add an extra \n for tables or code + result += '\n' + else: + result += '\n\n' + + if is_code: + result = result + '```' # Unfinished code block + + string = result.strip() + # t1 = time.time() + # print(len(string)) + # print(f"Pre markdown: {(t1-t0):.3f} s") + if params['probability_dropdown'] and '
' in string: + # Prevents all latency introduced by trying to convert the HTML to markdown when it's not even necessary + # print('Monkeypatched') + return string + else: + # t0 = time.time() + return markdown.markdown(string, extensions=['fenced_code', 'tables']) + # t1 = time.time() + # print(f"Markdown: {(t1-t0):.3f} s for string of length {len(string)}") + # print(string) + # print(res) + # return res + + +html_generator.convert_to_markdown = convert_to_markdown + def ui(): - color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.") + def update_active_check(x): + params.update({'active': x}) + def update_color_by_ppl_check(x): params.update({'color_by_perplexity': x}) - color_by_ppl_check.change(update_color_by_ppl_check, color_by_ppl_check, None) - color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.") def update_color_by_prob_check(x): params.update({'color_by_probability': x}) - color_by_prob_check.change(update_color_by_prob_check, color_by_prob_check, None) - # Doesn't work yet... - """ - prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown") def update_prob_dropdown_check(x): params.update({'probability_dropdown': x}) + + active_check = gradio.Checkbox(value=True, label="Compute probabilities and perplexity scores", info="Activate this extension. Note that this extension currently does not work with exllama or llama.cpp.") + color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.") + color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.") + prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown", info="Hover over a token to show a dropdown of top token probabilities. Currently slightly buggy with whitespace between tokens.") + + active_check.change(update_active_check, active_check, None) + color_by_ppl_check.change(update_color_by_ppl_check, color_by_ppl_check, None) + color_by_prob_check.change(update_color_by_prob_check, color_by_prob_check, None) prob_dropdown_check.change(update_prob_dropdown_check, prob_dropdown_check, None) - """