Add probability dropdown to perplexity_colors extension (#3148)

2024-10-01 01:26:03 -04:00 · 2023-08-20 18:28:14 -05:00 · 2023-08-20 18:28:14 -05:00 · 2c1fd0d72b
commit 2c1fd0d72b
parent 0dfd1a8b7d
1 changed files with 241 additions and 94 deletions
--- a/extensions/perplexity_colors/script.py
+++ b/extensions/perplexity_colors/script.py
@ -1,17 +1,25 @@
+import re
+import time
+
 import gradio
+import markdown
+import numpy as np
 import torch
 from transformers import LogitsProcessor
-import numpy as np

-from modules import shared
+from modules import html_generator, shared
+from modules.html_generator import replace_blockquote

 params = {
+    'active': True,
    'color_by_perplexity': False,
    'color_by_probability': False,
    'ppl_scale': 15.0,  # No slider for this right now, because I don't think it really needs to be changed. Very large perplexity scores don't show up often.
-    #'probability_dropdown': False
+    'probability_dropdown': False,
+    'verbose': False  # For debugging mostly
 }

+
 class PerplexityLogits(LogitsProcessor):
    def __init__(self, verbose=False):
        self.generated_token_ids = []
@ -23,9 +31,10 @@ class PerplexityLogits(LogitsProcessor):
        self.verbose = verbose

    def __call__(self, input_ids, scores):
+        # t0 = time.time()
        probs = torch.softmax(scores, dim=-1, dtype=torch.float)
-        log_probs = torch.nan_to_num(torch.log(probs))
-        entropy = -torch.sum(probs*log_probs)
+        log_probs = torch.nan_to_num(torch.log(probs))  # Note: This is to convert log(0) nan to 0, but probs*log_probs makes this 0 not affect the perplexity.
+        entropy = -torch.sum(probs * log_probs)
        entropy = entropy.cpu().numpy()
        perplexity = round(float(np.exp(entropy)), 4)
        self.perplexities_list.append(perplexity)
@ -36,16 +45,16 @@ class PerplexityLogits(LogitsProcessor):
        if len(self.selected_probs) > 0:
            # Is the selected token in the top tokens?
            if self.verbose:
-                print(shared.tokenizer.decode(last_token_id))
-                print([shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1]])
-                print(self.top_probs_list[-1])
-            if last_token_id in self.top_token_ids_list[-1]:
-                idx = self.top_token_ids_list[-1].index(last_token_id)
-                self.selected_probs.append(self.top_probs_list[-1][idx])
+                print('Probs: Token after', shared.tokenizer.decode(last_token_id))
+                print('Probs:', [shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1][0]])
+                print('Probs:', [round(float(prob), 4) for prob in self.top_probs_list[-1][0]])
+            if last_token_id in self.top_token_ids_list[-1][0]:
+                idx = self.top_token_ids_list[-1][0].index(last_token_id)
+                self.selected_probs.append(self.top_probs_list[-1][0][idx])
            else:
-                self.top_token_ids_list[-1].append(last_token_id)
+                self.top_token_ids_list[-1][0].append(last_token_id)
                last_prob = round(float(self.last_probs[last_token_id]), 4)
-                self.top_probs_list[-1].append(last_prob)
+                self.top_probs_list[-1][0].append(last_prob)
                self.selected_probs.append(last_prob)
        else:
            self.selected_probs.append(1.0)  # Placeholder for the last token of the prompt
@ -53,8 +62,8 @@ class PerplexityLogits(LogitsProcessor):
        if self.verbose:
            pplbar = "-"
            if not np.isnan(perplexity):
-                pplbar = "*"*round(perplexity)
-            print(f"{last_token}\t{perplexity:.2f}\t{pplbar}")
+                pplbar = "*" * round(perplexity)
+            print(f"PPL: Token after {shared.tokenizer.decode(last_token_id)}\t{perplexity:.2f}\t{pplbar}")

        # Get top 5 probabilities
        top_tokens_and_probs = torch.topk(probs, 5)
@ -67,149 +76,287 @@ class PerplexityLogits(LogitsProcessor):
        probs = probs.cpu().numpy().flatten()
        self.last_probs = probs  # Need to keep this as a reference for top probs

+        # t1 = time.time()
+        # print(f"PPL Processor: {(t1-t0):.3f} s")
+        # About 1 ms, though occasionally up to around 100 ms, not sure why...
        # Doesn't actually modify the logits!
        return scores

+
 # Stores the perplexity and top probabilities
 ppl_logits_processor = None

+
 def logits_processor_modifier(logits_processor_list, input_ids):
    global ppl_logits_processor
-    ppl_logits_processor = PerplexityLogits()
+    if params['active']:
+        ppl_logits_processor = PerplexityLogits(verbose=params['verbose'])
        logits_processor_list.append(ppl_logits_processor)

+
 def output_modifier(text):
    global ppl_logits_processor
+    # t0 = time.time()
+
+    if not params['active']:
+        return text

    # TODO: It's probably more efficient to do this above rather than modifying all these lists
    # Remove last element of perplexities_list, top_token_ids_list, top_tokens_list, top_probs_list since everything is off by one because this extension runs before generation
    perplexities = ppl_logits_processor.perplexities_list[:-1]
    top_token_ids_list = ppl_logits_processor.top_token_ids_list[:-1]
-    top_tokens_list = [[shared.tokenizer.decode(token_id) for token_id in top_token_ids] for top_token_ids in top_token_ids_list]
+    top_tokens_list = [[shared.tokenizer.decode(token_id) for token_id in top_token_ids[0]] for top_token_ids in top_token_ids_list]
    top_probs_list = ppl_logits_processor.top_probs_list[:-1]
    # Remove first element of generated_token_ids, generated_tokens, selected_probs because they are for the last token of the prompt
    gen_token_ids = ppl_logits_processor.generated_token_ids[1:]
    gen_tokens = [shared.tokenizer.decode(token_id) for token_id in gen_token_ids]
    sel_probs = ppl_logits_processor.selected_probs[1:]

-    end_part = '</span>' # Helps with finding the index after replacing part of the text.
+    end_part = '</div></div>' if params['probability_dropdown'] else '</span>'  # Helps with finding the index after replacing part of the text.
    in_code = False  # Since the <span> tags mess up code blocks, avoid coloring while inside a code block, based on finding tokens with '`' in them

-    if params['color_by_probability'] and params['color_by_perplexity']:
    i = 0
    for token, prob, ppl, top_tokens, top_probs in zip(gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list):
-            if '`' in token:
+        if '`' in token and not params['probability_dropdown']:
            in_code = not in_code
            continue
        if in_code:
            continue
+        color = 'ffffff'
+        if params['color_by_probability'] and params['color_by_perplexity']:
            color = probability_perplexity_color_scale(prob, ppl)
-            if token in text[i:]:
-                text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
-                i += text[i:].find(end_part) + len(end_part)
        elif params['color_by_perplexity']:
-        i = 0
-        for token, ppl, top_tokens, top_probs in zip(gen_tokens, perplexities, top_tokens_list, top_probs_list):
-            if '`' in token:
-                in_code = not in_code
-                continue
-            if in_code:
-                continue
            color = perplexity_color_scale(ppl)
-            if token in text[i:]:
-                text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
-                i += text[i:].find(end_part) + len(end_part)
        elif params['color_by_probability']:
-        i = 0
-        for token, prob, top_tokens, top_probs in zip(gen_tokens, sel_probs, top_tokens_list, top_probs_list):
-            if '`' in token:
-                in_code = not in_code
-                continue
-            if in_code:
-                continue
            color = probability_color_scale(prob)
        if token in text[i:]:
+            if params['probability_dropdown']:
+                after_token_index = text[i:].find(token) + len(token)
+                whitespace = text[i:][after_token_index:(after_token_index + 1)]
+                if whitespace != ' ':
+                    whitespace = ''
+                text = text[:i] + text[i:].replace(token, add_dropdown_html(token, color, top_tokens, top_probs[0], whitespace, ppl), 1)
+            else:
                text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
            i += text[i:].find(end_part) + len(end_part)

-    print('Average perplexity:', round(np.mean(perplexities), 4))
+    # Use full perplexity list for calculating the average here.
+    print('Average perplexity:', round(np.mean(ppl_logits_processor.perplexities_list[:-1]), 4))
+    # Optional hacky workaround: Without this, spaces get added between every token. With this, there is a little extra whitespace at the top.
+    # This fixes the tokenization spaces, somehow. However, this also removes any paragraph breaks in the message.
+    # return '<p>' + text + '</p>'
+    # t1 = time.time()
+    # print(f"Modifier: {(t1-t0):.3f} s")
+    # About 50 ms
    return text

-# Green-yellow-red color scale
+
 def probability_color_scale(prob):
+    '''
+    Green-yellow-red color scale
+    '''
+
    rv = 0
    gv = 0
    if prob <= 0.5:
        rv = 'ff'
-        gv = hex(int(255*prob*2))[2:]
+        gv = hex(int(255 * prob * 2))[2:]
        if len(gv) < 2:
-            gv = '0'*(2 - len(gv)) + gv
+            gv = '0' * (2 - len(gv)) + gv
    else:
-        rv = hex(int(255 - 255*(prob - 0.5)*2))[2:]
+        rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:]
        gv = 'ff'
        if len(rv) < 2:
-            rv = '0'*(2 - len(rv)) + rv
+            rv = '0' * (2 - len(rv)) + rv
+
    return rv + gv + '00'

-# Red component only, white for 0 perplexity (sorry if you're not in dark mode)
+
 def perplexity_color_scale(ppl):
-    value = hex(max(int(255.0 - params['ppl_scale']*(float(ppl)-1.0)), 0))[2:]
+    '''
+    Red component only, white for 0 perplexity (sorry if you're not in dark mode)
+    '''
+    value = hex(max(int(255.0 - params['ppl_scale'] * (float(ppl) - 1.0)), 0))[2:]
    if len(value) < 2:
-        value = '0'*(2 - len(value)) + value
+        value = '0' * (2 - len(value)) + value
+
    return 'ff' + value + value

-# Green-yellow-red for probability and blue component for perplexity
+
 def probability_perplexity_color_scale(prob, ppl):
+    '''
+    Green-yellow-red for probability and blue component for perplexity
+    '''
+
    rv = 0
    gv = 0
-    bv = hex(min(max(int(params['ppl_scale']*(float(ppl)-1.0)), 0), 255))[2:]
+    bv = hex(min(max(int(params['ppl_scale'] * (float(ppl) - 1.0)), 0), 255))[2:]
    if len(bv) < 2:
-            bv = '0'*(2 - len(bv)) + bv
+        bv = '0' * (2 - len(bv)) + bv
+
    if prob <= 0.5:
        rv = 'ff'
-        gv = hex(int(255*prob*2))[2:]
+        gv = hex(int(255 * prob * 2))[2:]
        if len(gv) < 2:
-            gv = '0'*(2 - len(gv)) + gv
+            gv = '0' * (2 - len(gv)) + gv
    else:
-        rv = hex(int(255 - 255*(prob - 0.5)*2))[2:]
+        rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:]
        gv = 'ff'
        if len(rv) < 2:
-            rv = '0'*(2 - len(rv)) + rv
+            rv = '0' * (2 - len(rv)) + rv
+
    return rv + gv + bv

+
 def add_color_html(token, color):
    return f'<span style="color: #{color}">{token}</span>'

-"""
-# This is still very broken at the moment, needs CSS too but I'm not very good at CSS (and neither is GPT-4 apparently) so I still need to figure that out.
-def add_dropdown_html(token, color, top_tokens, top_probs):
-    html = f'<span class="hoverable" style="color: #{color}">{token}<div class="dropdown"><table class="dropdown-content">'
-    for token, prob in zip(top_tokens, top_probs):
-        # TODO: Background color? Bold for selected token?
-        # Bigger issue: Why is there a newline after the first token, and the dropdown fails there?
-        # The HTML ends up like <p><span>word</span></p><div>...</div>,
-        # even though for all other tokens it shows up correctly.
+
+# TODO: Major issue: Applying this to too many tokens will cause a permanent slowdown in generation speed until the messages are removed from the history.
+# I think the issue is from HTML elements taking up space in the visible history, and things like history deepcopy add latency proportional to the size of the history.
+# Potential solution is maybe to modify the main generation code to send just the internal text and not the visible history, to avoid moving too much around.
+# I wonder if we can also avoid using deepcopy here.
+# The whitespace fix here is not perfect -- it will remove whitespace of paragraph breaks and other particular cases.
+def add_dropdown_html(token, color, top_tokens, top_probs, whitespace='', perplexity=0):
+    if whitespace != '':
+        whitespace = '&nbsp;'
+    html = f'<div class="hoverable"><span style="color: #{color}">{token}{whitespace}</span><div class="dropdown"><table class="dropdown-content"><tbody>'
+    for token_option, prob in zip(top_tokens, top_probs):
+        # TODO: Bold for selected token?
+        # Using divs prevented the problem of divs inside spans causing issues.
+        # Now the problem is that divs show the same whitespace of one space between every token.
+        # There is probably some way to fix this in CSS that I don't know about.
        row_color = probability_color_scale(prob)
-        html += f'<tr><td style="color: #{row_color}">{token}</td><td style="color: #{row_color}">{prob}</td></tr>'
-    html += '</table></div></span>'
-    return html
-"""
+        row_class = ' class="selected"' if token_option == token else ''
+        html += f'<tr{row_class}><td style="color: #{row_color}">{token_option}</td><td style="color: #{row_color}">{prob:.4f}</td></tr>'
+    if perplexity != 0:
+        ppl_color = perplexity_color_scale(perplexity)
+        html += f'<tr><td>Perplexity:</td><td style="color: #{ppl_color}">{perplexity:.4f}</td></tr>'
+    html += '</tbody></table></div></div>\n'  # The newline would normally be added by markdown.markdown() but this is faster.
+    return html  # About 750 characters per token...
+
+
+def custom_css():
+    return """
+        .dropdown {
+            display: none;
+            position: absolute;
+            z-index: 50;
+            background-color: var(--block-background-fill);
+            box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
+            width: max-content;
+            overflow: visible;
+            padding: 5px;
+            border-radius: 10px;
+            border: 1px solid var(--border-color-primary);
+        }
+
+        .dropdown-content {
+            border: none;
+            z-index: 50;
+        }
+
+        .dropdown-content tr.selected {
+            background-color: var(--block-label-background-fill);
+        }
+
+        .dropdown-content td {
+            color: var(--body-text-color);
+        }
+
+        .hoverable {
+            color: var(--body-text-color);
+            position: relative;
+            display: inline-block;
+            overflow: visible;
+            font-size: 15px;
+            line-height: 1.75;
+            margin: 0;
+            padding: 0;
+            margin-right: -4px;
+        }
+
+        .hoverable:hover .dropdown {
+            display: block;
+        }
+
+        # TODO: This makes the hover menus extend outside the bounds of the chat area, which is good.
+        # However, it also makes the scrollbar disappear, which is bad.
+        # The scroll bar needs to still be present. So for now, we can't see dropdowns that extend past the edge of the chat area.
+        #.chat {
+        #    overflow-y: auto;
+        #}
+    """
+
+# Monkeypatch applied to html_generator.py
+# This fixes an issue where the markdown conversion was causing a large slowdown in generation speeds if too many tokens had probability dropdowns added.
+# I'd rather have a more long-term solution, since this really shouldn't be called on all messages for each token, but this works for now.
+def convert_to_markdown(string):
+    # t0 = time.time()
+    # Blockquote
+    pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL)
+    string = pattern.sub(replace_blockquote, string)
+
+    # Code
+    string = string.replace('\\begin{code}', '```')
+    string = string.replace('\\end{code}', '```')
+    string = re.sub(r"(.)```", r"\1\n```", string)
+
+    result = ''
+    is_code = False
+    for line in string.split('\n'):
+        if line.lstrip(' ').startswith('```'):
+            is_code = not is_code
+
+        result += line
+        if is_code or line.startswith('|'):  # Don't add an extra \n for tables or code
+            result += '\n'
+        else:
+            result += '\n\n'
+
+    if is_code:
+        result = result + '```'  # Unfinished code block
+
+    string = result.strip()
+    # t1 = time.time()
+    # print(len(string))
+    # print(f"Pre markdown: {(t1-t0):.3f} s")
+    if params['probability_dropdown'] and '<div class="hoverable">' in string:
+        # Prevents all latency introduced by trying to convert the HTML to markdown when it's not even necessary
+        # print('Monkeypatched')
+        return string
+    else:
+        # t0 = time.time()
+        return markdown.markdown(string, extensions=['fenced_code', 'tables'])
+        # t1 = time.time()
+        # print(f"Markdown: {(t1-t0):.3f} s for string of length {len(string)}")
+        # print(string)
+        # print(res)
+        # return res
+
+
+html_generator.convert_to_markdown = convert_to_markdown
+

 def ui():
-    color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.")
+    def update_active_check(x):
+        params.update({'active': x})
+
    def update_color_by_ppl_check(x):
        params.update({'color_by_perplexity': x})
-    color_by_ppl_check.change(update_color_by_ppl_check, color_by_ppl_check, None)

-    color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.")
    def update_color_by_prob_check(x):
        params.update({'color_by_probability': x})
-    color_by_prob_check.change(update_color_by_prob_check, color_by_prob_check, None)

-    # Doesn't work yet...
-    """
-    prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown")
    def update_prob_dropdown_check(x):
        params.update({'probability_dropdown': x})
+
+    active_check = gradio.Checkbox(value=True, label="Compute probabilities and perplexity scores", info="Activate this extension. Note that this extension currently does not work with exllama or llama.cpp.")
+    color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.")
+    color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.")
+    prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown", info="Hover over a token to show a dropdown of top token probabilities. Currently slightly buggy with whitespace between tokens.")
+
+    active_check.change(update_active_check, active_check, None)
+    color_by_ppl_check.change(update_color_by_ppl_check, color_by_ppl_check, None)
+    color_by_prob_check.change(update_color_by_prob_check, color_by_prob_check, None)
    prob_dropdown_check.change(update_prob_dropdown_check, prob_dropdown_check, None)
-    """