I assume you've already download the RealtimeSTT and understand some python basic
I use the modified python script to use pipe to feed the data instead of through microphone:
if __name__ == "__main__":
import threading
from sys import stdin
from RealtimeSTT import AudioToTextRecorder
# Initialize the audio-to-text recorder without using the microphone directly
# Since we are feeding audio data manually, set use_microphone to False
recorder = AudioToTextRecorder(
language='ja',
debug_mode=True,
use_microphone=False, # Disable built-in microphone usage
spinner=False # Disable spinner animation in the console
)
# Event to signal when to stop the threads
stop_event = threading.Event()
def feed_audio_thread():
"""Thread function to read audio data and feed it to the recorder."""
try:
print("Speak now")
while not stop_event.is_set():
# Read audio data from the stream (in the expected format)
data = stdin.buffer.read(1024)
# Feed the audio data to the recorder
recorder.feed_audio(data)
except Exception as e:
print(f"feed_audio_thread encountered an error: {e}")
finally:
print("Audio stream closed.")
def recorder_transcription_thread():
"""Thread function to handle transcription and process the text."""
def process_text(full_sentence):
"""Callback function to process the transcribed text."""
print("Transcribed text:", full_sentence)
try:
while not stop_event.is_set():
# Get transcribed text and process it using the callback
recorder.text(process_text)
except Exception as e:
print(f"transcription_thread encountered an error: {e}")
finally:
print("Transcription thread exiting.")
try:
# Create and start the audio feeding thread
audio_thread = threading.Thread(target=feed_audio_thread)
audio_thread.daemon = False # Ensure the thread doesn't exit prematurely
audio_thread.start()
# Create and start the transcription thread
transcription_thread = threading.Thread(target=recorder_transcription_thread)
transcription_thread.daemon = False # Ensure the thread doesn't exit prematurely
transcription_thread.start()
# Wait for both threads to finish
audio_thread.join()
transcription_thread.join()
except KeyboardInterrupt:
print("exiting...")
finally:
stop_event.set()
recorder.shutdown()
and then feed the program with audio from pipe:
ffmpeg -f pulse -i default.monitor -f s16le -ar 16k -acodec pcm_s16le -ac 1 -loglevel quiet - | python ./piped_stream.py
you can also use another sink if you want to play it on separated and isolated process, see the github discussion above on HOWTO.
if you have problem with the nvidia cuda library, try to update variable:
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:<path-to-the-project>/RealtimeSTT/.venv/lib/python3.12/site-packages/nvidia/cudnn/lib/"