text-generation-webui/extensions/elevenlabs_tts/script.py

import re
from pathlib import Path

import elevenlabs
import gradio as gr

from modules import chat, shared
from modules.utils import gradio
from modules.logging_colors import logger

params = {
    'activate': True,
    'api_key': None,
    'selected_voice': 'None',
    'autoplay': False,
    'show_text': True,
    'model': 'eleven_monolingual_v1',
}

voices = None
wav_idx = 0
LANG_MODELS = ['eleven_monolingual_v1', 'eleven_multilingual_v1']


def update_api_key(key):
    params['api_key'] = key
    if key is not None:
        elevenlabs.set_api_key(key)


def refresh_voices():
    global params
    your_voices = elevenlabs.voices()
    voice_names = [voice.name for voice in your_voices]
    return voice_names


def refresh_voices_dd():
    all_voices = refresh_voices()
    return gr.Dropdown.update(value=all_voices[0], choices=all_voices)


def remove_tts_from_history(history):
    for i, entry in enumerate(history['internal']):
        history['visible'][i] = [history['visible'][i][0], entry[1]]

    return history


def toggle_text_in_history(history):
    for i, entry in enumerate(history['visible']):
        visible_reply = entry[1]
        if visible_reply.startswith('<audio'):
            if params['show_text']:
                reply = history['internal'][i][1]
                history['visible'][i] = [history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>\n\n{reply}"]
            else:
                history['visible'][i] = [history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>"]

    return history


def remove_surrounded_chars(string):
    # this expression matches to 'as few symbols as possible (0 upwards) between any asterisks' OR
    # 'as few symbols as possible (0 upwards) between an asterisk and the end of the string'
    return re.sub('\*[^\*]*?(\*|$)', '', string)


def state_modifier(state):
    if not params['activate']:
        return state

    state['stream'] = False
    return state


def input_modifier(string):
    if not params['activate']:
        return string

    shared.processing_message = "*Is recording a voice message...*"
    return string


def history_modifier(history):
    # Remove autoplay from the last reply
    if len(history['internal']) > 0:
        history['visible'][-1] = [
            history['visible'][-1][0],
            history['visible'][-1][1].replace('controls autoplay>', 'controls>')
        ]

    return history


def output_modifier(string):
    global params, wav_idx

    if not params['activate']:
        return string

    original_string = string
    string = remove_surrounded_chars(string)
    string = string.replace('"', '')
    string = string.replace('“', '')
    string = string.replace('\n', ' ')
    string = string.strip()
    if string == '':
        string = 'empty reply, try regenerating'

    output_file = Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.mp3'.format(wav_idx))
    print(f'Outputting audio to {str(output_file)}')
    try:
        audio = elevenlabs.generate(text=string, voice=params['selected_voice'], model=params['model'])
        elevenlabs.save(audio, str(output_file))

        autoplay = 'autoplay' if params['autoplay'] else ''
        string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay}></audio>'
        wav_idx += 1
    except elevenlabs.api.error.UnauthenticatedRateLimitError:
        string = "🤖 ElevenLabs Unauthenticated Rate Limit Reached - Please create an API key to continue\n\n"
    except elevenlabs.api.error.RateLimitError:
        string = "🤖 ElevenLabs API Tier Limit Reached\n\n"
    except elevenlabs.api.error.APIError as err:
        string = f"🤖 ElevenLabs Error: {err}\n\n"

    if params['show_text']:
        string += f'\n\n{original_string}'

    shared.processing_message = "*Is typing...*"
    return string


def ui():
    global voices
    if not voices:
        voices = refresh_voices()
        selected = params['selected_voice']
        if selected == 'None':
            params['selected_voice'] = voices[0]
        elif selected not in voices:
            logger.error(f'Selected voice {selected} not available, switching to {voices[0]}')
            params['selected_voice'] = voices[0]

    # Gradio elements
    with gr.Row():
        activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
        autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
        show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')

    with gr.Row():
        voice = gr.Dropdown(value=params['selected_voice'], choices=voices, label='TTS Voice')
        refresh = gr.Button(value='Refresh')

    with gr.Row():
        if params['api_key']:
            api_key = gr.Textbox(value=params['api_key'], label='API Key')
            update_api_key(params['api_key'])
        else:
            api_key = gr.Textbox(placeholder="Enter your API key.", label='API Key')

    with gr.Row():
        model = gr.Dropdown(value=params['model'], choices=LANG_MODELS, label='Language model')

    with gr.Row():
        convert = gr.Button('Permanently replace audios with the message texts')
        convert_cancel = gr.Button('Cancel', visible=False)
        convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)

    if shared.is_chat():
        # Convert history with confirmation
        convert_arr = [convert_confirm, convert, convert_cancel]
        convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
        convert_confirm.click(
            lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then(
            remove_tts_from_history, gradio('history'), gradio('history')).then(
            chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
            chat.redraw_html, shared.reload_inputs, gradio('display'))

        convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)

        # Toggle message text in history
        show_text.change(
            lambda x: params.update({"show_text": x}), show_text, None).then(
            toggle_text_in_history, gradio('history'), gradio('history')).then(
            chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
            chat.redraw_html, shared.reload_inputs, gradio('display'))

    # Event functions to update the parameters in the backend
    activate.change(lambda x: params.update({'activate': x}), activate, None)
    voice.change(lambda x: params.update({'selected_voice': x}), voice, None)
    api_key.change(update_api_key, api_key, None)
    model.change(lambda x: params.update({'model': x}), model, None)
    # connect.click(check_valid_api, [], connection_status)
    refresh.click(refresh_voices_dd, [], voice)
    # Event functions to update the parameters in the backend
    autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)
Minor changes 2023-03-22 14:55:03 -04:00			`import re`
Rename the folder 2023-03-06 17:38:36 -05:00			`from pathlib import Path`
Move new extension to a separate file 2023-03-06 17:28:53 -05:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`import elevenlabs`
Minor fixes to elevenlabs_tts 2023-05-06 09:57:34 -04:00			`import gradio as gr`
Implement sessions + add basic multi-user support (#2991) 2023-07-03 23:03:30 -04:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`from modules import chat, shared`
Implement sessions + add basic multi-user support (#2991) 2023-07-03 23:03:30 -04:00			`from modules.utils import gradio`
Elevenlabs tts fixes (#2959) * [Fixed] Keep setting option for the voice - It was always changed to the first available voice - Also added an error if the selected voice isn't valid * [Fixed] elevenlabs_tts API key handling - The one from the settings wasn't applied - We always got "Enter your API key", even when the settings specified an api_key * [Added] elevenlabs_tts model selection - Now we can also use the "eleven_multilingual_v1" model. Used for anything but english. 2023-07-11 18:00:37 -04:00			`from modules.logging_colors import logger`
Sort imports 2023-04-07 13:42:03 -04:00
Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`params = {`
			`'activate': True,`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`'api_key': None,`
Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`'selected_voice': 'None',`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`'autoplay': False,`
			`'show_text': True,`
Elevenlabs tts fixes (#2959) * [Fixed] Keep setting option for the voice - It was always changed to the first available voice - Also added an error if the selected voice isn't valid * [Fixed] elevenlabs_tts API key handling - The one from the settings wasn't applied - We always got "Enter your API key", even when the settings specified an api_key * [Added] elevenlabs_tts model selection - Now we can also use the "eleven_multilingual_v1" model. Used for anything but english. 2023-07-11 18:00:37 -04:00			`'model': 'eleven_monolingual_v1',`
Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`}`
Improve readability 2023-03-06 17:46:46 -05:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`voices = None`
Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`wav_idx = 0`
Elevenlabs tts fixes (#2959) * [Fixed] Keep setting option for the voice - It was always changed to the first available voice - Also added an error if the selected voice isn't valid * [Fixed] elevenlabs_tts API key handling - The one from the settings wasn't applied - We always got "Enter your API key", even when the settings specified an api_key * [Added] elevenlabs_tts model selection - Now we can also use the "eleven_multilingual_v1" model. Used for anything but english. 2023-07-11 18:00:37 -04:00			`LANG_MODELS = ['eleven_monolingual_v1', 'eleven_multilingual_v1']`
Move new extension to a separate file 2023-03-06 17:28:53 -05:00

fix: elevenlabs cloned voices do not show up in webui after entering API key (#2107) 2023-05-16 19:21:36 -04:00			`def update_api_key(key):`
			`params['api_key'] = key`
			`if key is not None:`
			`elevenlabs.set_api_key(key)`


Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`def refresh_voices():`
			`global params`
fix: elevenlabs removed the need for the api key for refreshing voices (#2097) 2023-05-16 12:34:49 -04:00			`your_voices = elevenlabs.voices()`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`voice_names = [voice.name for voice in your_voices]`
			`return voice_names`
Make the code more like PEP8 for readability (#862) 2023-04-06 23:15:45 -04:00

Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`def refresh_voices_dd():`
			`all_voices = refresh_voices()`
			`return gr.Dropdown.update(value=all_voices[0], choices=all_voices)`

Make the code more like PEP8 for readability (#862) 2023-04-06 23:15:45 -04:00
Implement sessions + add basic multi-user support (#2991) 2023-07-03 23:03:30 -04:00			`def remove_tts_from_history(history):`
			`for i, entry in enumerate(history['internal']):`
			`history['visible'][i] = [history['visible'][i][0], entry[1]]`

			`return history`
Minor fixes to elevenlabs_tts 2023-05-06 09:57:34 -04:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00
Implement sessions + add basic multi-user support (#2991) 2023-07-03 23:03:30 -04:00			`def toggle_text_in_history(history):`
			`for i, entry in enumerate(history['visible']):`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`visible_reply = entry[1]`
			`if visible_reply.startswith('<audio'):`
			`if params['show_text']:`
Implement sessions + add basic multi-user support (#2991) 2023-07-03 23:03:30 -04:00			`reply = history['internal'][i][1]`
			`history['visible'][i] = [history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>\n\n{reply}"]`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`else:`
Implement sessions + add basic multi-user support (#2991) 2023-07-03 23:03:30 -04:00			`history['visible'][i] = [history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>"]`

			`return history`
Minor fixes to elevenlabs_tts 2023-05-06 09:57:34 -04:00
Make the code more like PEP8 for readability (#862) 2023-04-06 23:15:45 -04:00
Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`def remove_surrounded_chars(string):`
Extensions performance & memory optimisations Reworked remove_surrounded_chars() to use regular expression ( https://regexr.com/7alb5 ) instead of repeated string concatenations for elevenlab_tts, silero_tts, sd_api_pictures. This should be both faster and more robust in handling asterisks. Reduced the memory footprint of send_pictures and sd_api_pictures by scaling the images in the chat to 300 pixels max-side wise. (The user already has the original in case of the sent picture and there's an option to save the SD generation). This should fix history growing annoyingly large with multiple pictures present 2023-03-22 00:47:54 -04:00			`# this expression matches to 'as few symbols as possible (0 upwards) between any asterisks' OR`
			`# 'as few symbols as possible (0 upwards) between an asterisk and the end of the string'`
Make the code more like PEP8 for readability (#862) 2023-04-06 23:15:45 -04:00			`return re.sub('\[^\]?(\\|$)', '', string)`

Move new extension to a separate file 2023-03-06 17:28:53 -05:00
Refactor text_generation.py, add support for custom generation functions (#1817) 2023-05-05 17:53:03 -04:00			`def state_modifier(state):`
FIX silero_tts/elevenlabs_tts activation/deactivation (#2313) 2023-05-24 09:06:38 -04:00			`if not params['activate']:`
			`return state`

Refactor text_generation.py, add support for custom generation functions (#1817) 2023-05-05 17:53:03 -04:00			`state['stream'] = False`
			`return state`


Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`def input_modifier(string):`
FIX silero_tts/elevenlabs_tts activation/deactivation (#2313) 2023-05-24 09:06:38 -04:00			`if not params['activate']:`
			`return string`
Fix elevenlabs_tts too 2023-05-21 13:11:46 -04:00
			`shared.processing_message = "Is recording a voice message..."`
			`return string`


			`def history_modifier(history):`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`# Remove autoplay from the last reply`
Fix elevenlabs_tts too 2023-05-21 13:11:46 -04:00			`if len(history['internal']) > 0:`
			`history['visible'][-1] = [`
			`history['visible'][-1][0],`
			`history['visible'][-1][1].replace('controls autoplay>', 'controls>')`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`]`
Minor fixes to elevenlabs_tts 2023-05-06 09:57:34 -04:00
Fix elevenlabs_tts too 2023-05-21 13:11:46 -04:00			`return history`
Move new extension to a separate file 2023-03-06 17:28:53 -05:00
Make the code more like PEP8 for readability (#862) 2023-04-06 23:15:45 -04:00
Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`def output_modifier(string):`
Remove unused variable 2023-05-06 10:03:12 -04:00			`global params, wav_idx`
Make the code more like PEP8 for readability (#862) 2023-04-06 23:15:45 -04:00
			`if not params['activate']:`
Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`return string`

Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`original_string = string`
Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`string = remove_surrounded_chars(string)`
			`string = string.replace('"', '')`
			`string = string.replace('“', '')`
			`string = string.replace('\n', ' ')`
			`string = string.strip()`
			`if string == '':`
			`string = 'empty reply, try regenerating'`
Make the code more like PEP8 for readability (#862) 2023-04-06 23:15:45 -04:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`output_file = Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.mp3'.format(wav_idx))`
Fix elevenlabs_tts too 2023-05-21 13:11:46 -04:00			`print(f'Outputting audio to {str(output_file)}')`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`try:`
Elevenlabs tts fixes (#2959) * [Fixed] Keep setting option for the voice - It was always changed to the first available voice - Also added an error if the selected voice isn't valid * [Fixed] elevenlabs_tts API key handling - The one from the settings wasn't applied - We always got "Enter your API key", even when the settings specified an api_key * [Added] elevenlabs_tts model selection - Now we can also use the "eleven_multilingual_v1" model. Used for anything but english. 2023-07-11 18:00:37 -04:00			`audio = elevenlabs.generate(text=string, voice=params['selected_voice'], model=params['model'])`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`elevenlabs.save(audio, str(output_file))`

			`autoplay = 'autoplay' if params['autoplay'] else ''`
			`string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay}></audio>'`
			`wav_idx += 1`
			`except elevenlabs.api.error.UnauthenticatedRateLimitError:`
			`string = "🤖 ElevenLabs Unauthenticated Rate Limit Reached - Please create an API key to continue\n\n"`
			`except elevenlabs.api.error.RateLimitError:`
			`string = "🤖 ElevenLabs API Tier Limit Reached\n\n"`
			`except elevenlabs.api.error.APIError as err:`
			`string = f"🤖 ElevenLabs Error: {err}\n\n"`

			`if params['show_text']:`
			`string += f'\n\n{original_string}'`

			`shared.processing_message = "Is typing..."`
Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`return string`

Make the code more like PEP8 for readability (#862) 2023-04-06 23:15:45 -04:00
Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`def ui():`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`global voices`
			`if not voices:`
			`voices = refresh_voices()`
Elevenlabs tts fixes (#2959) * [Fixed] Keep setting option for the voice - It was always changed to the first available voice - Also added an error if the selected voice isn't valid * [Fixed] elevenlabs_tts API key handling - The one from the settings wasn't applied - We always got "Enter your API key", even when the settings specified an api_key * [Added] elevenlabs_tts model selection - Now we can also use the "eleven_multilingual_v1" model. Used for anything but english. 2023-07-11 18:00:37 -04:00			`selected = params['selected_voice']`
			`if selected == 'None':`
			`params['selected_voice'] = voices[0]`
			`elif selected not in voices:`
			`logger.error(f'Selected voice {selected} not available, switching to {voices[0]}')`
			`params['selected_voice'] = voices[0]`
Minor fixes to elevenlabs_tts 2023-05-06 09:57:34 -04:00
Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`# Gradio elements`
			`with gr.Row():`
			`activate = gr.Checkbox(value=params['activate'], label='Activate TTS')`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')`
			`show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')`
Minor fixes to elevenlabs_tts 2023-05-06 09:57:34 -04:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`with gr.Row():`
			`voice = gr.Dropdown(value=params['selected_voice'], choices=voices, label='TTS Voice')`
			`refresh = gr.Button(value='Refresh')`
Minor fixes to elevenlabs_tts 2023-05-06 09:57:34 -04:00
Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`with gr.Row():`
Elevenlabs tts fixes (#2959) * [Fixed] Keep setting option for the voice - It was always changed to the first available voice - Also added an error if the selected voice isn't valid * [Fixed] elevenlabs_tts API key handling - The one from the settings wasn't applied - We always got "Enter your API key", even when the settings specified an api_key * [Added] elevenlabs_tts model selection - Now we can also use the "eleven_multilingual_v1" model. Used for anything but english. 2023-07-11 18:00:37 -04:00			`if params['api_key']:`
			`api_key = gr.Textbox(value=params['api_key'], label='API Key')`
			`update_api_key(params['api_key'])`
			`else:`
			`api_key = gr.Textbox(placeholder="Enter your API key.", label='API Key')`

			`with gr.Row():`
lint 2023-07-12 14:33:25 -04:00			`model = gr.Dropdown(value=params['model'], choices=LANG_MODELS, label='Language model')`
Minor fixes to elevenlabs_tts 2023-05-06 09:57:34 -04:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`with gr.Row():`
			`convert = gr.Button('Permanently replace audios with the message texts')`
			`convert_cancel = gr.Button('Cancel', visible=False)`
			`convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)`

Implement sessions + add basic multi-user support (#2991) 2023-07-03 23:03:30 -04:00			`if shared.is_chat():`
			`# Convert history with confirmation`
			`convert_arr = [convert_confirm, convert, convert_cancel]`
			`convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)`
			`convert_confirm.click(`
			`lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then(`
			`remove_tts_from_history, gradio('history'), gradio('history')).then(`
			`chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(`
			`chat.redraw_html, shared.reload_inputs, gradio('display'))`

			`convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)`

			`# Toggle message text in history`
			`show_text.change(`
			`lambda x: params.update({"show_text": x}), show_text, None).then(`
			`toggle_text_in_history, gradio('history'), gradio('history')).then(`
			`chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(`
			`chat.redraw_html, shared.reload_inputs, gradio('display'))`
Minor fixes to elevenlabs_tts 2023-05-06 09:57:34 -04:00
Move new extension to a separate file 2023-03-06 17:28:53 -05:00			`# Event functions to update the parameters in the backend`
			`activate.change(lambda x: params.update({'activate': x}), activate, None)`
			`voice.change(lambda x: params.update({'selected_voice': x}), voice, None)`
fix: elevenlabs cloned voices do not show up in webui after entering API key (#2107) 2023-05-16 19:21:36 -04:00			`api_key.change(update_api_key, api_key, None)`
Elevenlabs tts fixes (#2959) * [Fixed] Keep setting option for the voice - It was always changed to the first available voice - Also added an error if the selected voice isn't valid * [Fixed] elevenlabs_tts API key handling - The one from the settings wasn't applied - We always got "Enter your API key", even when the settings specified an api_key * [Added] elevenlabs_tts model selection - Now we can also use the "eleven_multilingual_v1" model. Used for anything but english. 2023-07-11 18:00:37 -04:00			`model.change(lambda x: params.update({'model': x}), model, None)`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 09:56:31 -04:00			`# connect.click(check_valid_api, [], connection_status)`
			`refresh.click(refresh_voices_dd, [], voice)`
			`# Event functions to update the parameters in the backend`
			`autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)`