From 4c72e43bcfb70102a7330a5748ca2ea0989f0ffb Mon Sep 17 00:00:00 2001 From: EliasVincent Date: Thu, 9 Mar 2023 12:46:50 +0100 Subject: [PATCH] first implementation --- extensions/whisper_stt/requirements.txt | 5 ++++ extensions/whisper_stt/script.py | 40 +++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 extensions/whisper_stt/requirements.txt create mode 100644 extensions/whisper_stt/script.py diff --git a/extensions/whisper_stt/requirements.txt b/extensions/whisper_stt/requirements.txt new file mode 100644 index 00000000..e6e3255f --- /dev/null +++ b/extensions/whisper_stt/requirements.txt @@ -0,0 +1,5 @@ +git+https://github.com/Uberi/speech_recognition.git@010382b +PyAudio +openai-whisper +soundfile +ffmpeg diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py new file mode 100644 index 00000000..287a1fdd --- /dev/null +++ b/extensions/whisper_stt/script.py @@ -0,0 +1,40 @@ +import gradio as gr +import speech_recognition as sr +import modules.shared as shared + +input_hijack = { + 'state': False, + 'value': ["", ""] +} + + +def input_modifier(string): + return string + + +def do_stt(): + transcription = "" + r = sr.Recognizer() + with sr.Microphone() as source: + print("Say something!") + r.adjust_for_ambient_noise(source) + audio = r.listen(source) + + # recognize speech using whisper + try: + transcription = r.recognize_whisper(audio, language="english", model="tiny.en") + print("Whisper thinks you said " + transcription) + except sr.UnknownValueError: + print("Whisper could not understand audio") + except sr.RequestError as e: + print("Could not request results from Whisper") + + # input_modifier(transcription) + input_hijack.update({"state": True, "value": [transcription, transcription]}) + return transcription + + +def ui(): + speech_button = gr.Button(value="STT") + output_transcription = gr.Textbox(label="Speech Preview") + speech_button.click(do_stt, outputs=[output_transcription])