mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-10-01 01:26:03 -04:00
Addressing Whisper STT issues (#5929)
This commit is contained in:
parent
5c6b9c610d
commit
cc825dd1f4
25
extensions/whisper_stt/script.js
Normal file
25
extensions/whisper_stt/script.js
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
var recButton = document.getElementsByClassName("record-button")[0].cloneNode(true);
|
||||||
|
var generate_button = document.getElementById("Generate");
|
||||||
|
generate_button.insertAdjacentElement("afterend", recButton);
|
||||||
|
|
||||||
|
recButton.style.setProperty("margin-left", "-10px");
|
||||||
|
recButton.innerText = "Rec.";
|
||||||
|
|
||||||
|
|
||||||
|
recButton.addEventListener("click", function() {
|
||||||
|
var originalRecordButton = document.getElementsByClassName("record-button")[1];
|
||||||
|
originalRecordButton.click();
|
||||||
|
|
||||||
|
var stopRecordButtons = document.getElementsByClassName("stop-button");
|
||||||
|
if (stopRecordButtons.length > 1) generate_button.parentElement.removeChild(stopRecordButtons[0]);
|
||||||
|
var stopRecordButton = document.getElementsByClassName("stop-button")[0];
|
||||||
|
generate_button.insertAdjacentElement("afterend", stopRecordButton);
|
||||||
|
|
||||||
|
//stopRecordButton.style.setProperty("margin-left", "-10px");
|
||||||
|
stopRecordButton.style.setProperty("padding-right", "10px");
|
||||||
|
recButton.style.display = "none";
|
||||||
|
|
||||||
|
stopRecordButton.addEventListener("click", function() {
|
||||||
|
recButton.style.display = "flex";
|
||||||
|
});
|
||||||
|
});
|
@ -1,5 +1,8 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
import speech_recognition as sr
|
import speech_recognition as sr
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
from modules import shared
|
from modules import shared
|
||||||
|
|
||||||
@ -45,6 +48,11 @@ def do_stt(audio, whipser_model, whipser_language):
|
|||||||
def auto_transcribe(audio, auto_submit, whipser_model, whipser_language):
|
def auto_transcribe(audio, auto_submit, whipser_model, whipser_language):
|
||||||
if audio is None:
|
if audio is None:
|
||||||
return "", ""
|
return "", ""
|
||||||
|
sample_rate, audio_data = audio
|
||||||
|
if not isinstance(audio_data[0], np.ndarray): # workaround for chrome audio. Mono?
|
||||||
|
# Convert to 2 channels, so each sample s_i consists of the same value in both channels [val_i, val_i]
|
||||||
|
audio_data = np.column_stack((audio_data, audio_data))
|
||||||
|
audio = (sample_rate, audio_data)
|
||||||
transcription = do_stt(audio, whipser_model, whipser_language)
|
transcription = do_stt(audio, whipser_model, whipser_language)
|
||||||
if auto_submit:
|
if auto_submit:
|
||||||
input_hijack.update({"state": True, "value": [transcription, transcription]})
|
input_hijack.update({"state": True, "value": [transcription, transcription]})
|
||||||
@ -55,7 +63,7 @@ def auto_transcribe(audio, auto_submit, whipser_model, whipser_language):
|
|||||||
def ui():
|
def ui():
|
||||||
with gr.Accordion("Whisper STT", open=True):
|
with gr.Accordion("Whisper STT", open=True):
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
audio = gr.Audio(source="microphone")
|
audio = gr.Audio(source="microphone", type="numpy")
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Accordion("Settings", open=False):
|
with gr.Accordion("Settings", open=False):
|
||||||
auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=params['auto_submit'])
|
auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=params['auto_submit'])
|
||||||
@ -69,3 +77,13 @@ def ui():
|
|||||||
whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None)
|
whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None)
|
||||||
whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None)
|
whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None)
|
||||||
auto_submit.change(lambda x: params.update({"auto_submit": x}), auto_submit, None)
|
auto_submit.change(lambda x: params.update({"auto_submit": x}), auto_submit, None)
|
||||||
|
|
||||||
|
|
||||||
|
def custom_js():
|
||||||
|
"""
|
||||||
|
Returns custom javascript as a string. It is applied whenever the web UI is
|
||||||
|
loaded.
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
with open(Path(__file__).parent.resolve() / "script.js", "r") as f:
|
||||||
|
return f.read()
|
||||||
|
Loading…
Reference in New Issue
Block a user