Play wav file in callback mode [Pyaudio / Python] - python

I wanna play wav file using PyAudio with callback mode. I could play wav file once but then stops. For example, if I want to play a wav file (trumpet sound, etc.) every five seconds, how should I implement this? Also, lemme know how to play array (wav data) in callback mode, just in case.
The code that I've implemented so far in the following...
import pyaudio
import numpy as np
import wave
import time
FORMAT = pyaudio.paInt16
CHANNEL = 2
CHUNK = 1024
RATE = 44100
flag = True
cur_dt = 0
# reading wav file here
wf = wave.open('test_3.wav', 'rb')
# instantiate PyAudio (1)
p = pyaudio.PyAudio()
# define callback (2)
def callback(in_data, frame_count, time_info, status):
global flag, cur_dt, wf
if flag:
wf = wave.open('test_3.wav', 'rb')
flag = False
data = np.zeros((frame_count,), dtype=np.int16)
data = wf.readframes(frame_count)
# convert bytes to int
data_int = np.frombuffer(data, dtype=np.int16)\
# convert int to float
data_float = data_int.astype(np.float32) / 32768.0
# # convert float to int
# data_int_again = (data_float*32768.0).astype(np.int16, order='C')
# ur processing...
f1 = np.fft.fft(data_float)
f2 = np.fft.ifft(f1)
f3 = (f2*32768.0).astype(np.int16)
# make it sound!
data = f3
# cur_dt += 1
# print(cur_dt)
return (data.tobytes(), pyaudio.paContinue)
# open stream using callback (3)
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
stream_callback=callback)
# print(p.get_format_from_width(wf.getsampwidth())) # paInt16
# print(wf.getsampwidth()) # 3
# print(wf.getnchannels()) # 2
# print(wf.getframerate()) # 44100
stream.start_stream()
# wait for stream to finish (5)
while stream.is_active():
time.sleep(5)
flag = True
# stop stream (6)
stream.stop_stream()
stream.close()
wf.close()
# close PyAudio (7)
p.terminate()

Related

How do you save a recorded .wav file to a specific directory in python

I found a python program on the internet that can do voice recording directly using a microphone. However, when the program finishes running, the resulting .wav file created by the program is stored in the directory where the python program was created. So, how do you save the recorded files in a specific directory?
import pyaudio
import wave
form_1 = pyaudio.paInt16
chans = 1 # 1 channel
samp_rate = 48000
chunk = 1024
record_secs = 2
dev_index = 2
wav_output_filename = 'test1.wav' # name of .wav file
audio = pyaudio.PyAudio() # create pyaudio instantiation
# create pyaudio stream
stream = audio.open(format = form_1,rate = samp_rate,channels = chans, \
input_device_index = dev_index,input = True, \
frames_per_buffer=chunk)
print("recording")
frames = []
# loop through stream and append audio chunks to frame array
for ii in range(0,int((samp_rate/chunk)*record_secs)):
data = stream.read(chunk)
frames.append(data)
print("finished recording")
# stop the stream, close it, and terminate the pyaudio instantiation
stream.stop_stream()
stream.close()
audio.terminate()
# save the audio frames as .wav file
wavefile = wave.open(wav_output_filename,'wb')
wavefile.setnchannels(chans)
wavefile.setsampwidth(audio.get_sample_size(form_1))
wavefile.setframerate(samp_rate)
wavefile.writeframes(b''.join(frames))
wavefile.close()
on line 10 :
wav_output_filename = '/path/to/specific/directory/test1.wav'

How to play input data from microphone directly

I want to play many input data from a microphone without buffering. I tried, but there is buffering. Here is my code.
import pyaudio
import wave
import urllib.request
import struct
import numpy as np
import sounddevice as sd
import matplotlib.pyplot as plt
# Callback function---------------------------------
def callback(indata, outdata, frames, time, status):
# if status:
# print(status)
outdata[:] = indata
#---------------------------------------------------
# Parameters ----------------------------------------------
Window_Size = 22050 # Point
FORMAT_D = pyaudio.paFloat32; FORMAT_W = pyaudio.paInt32
CHANNELS = 1 # Mono
Sample_Rate = 22050 # Hz
dT = 1/Sample_Rate
RECORD_SECONDS = 20 # s
NOFFRAMES = int(Sample_Rate/Window_Size * RECORD_SECONDS)
WAVE_OUTPUT_FILENAME = "output.wav"
#-----------------------------------------------------------
p = pyaudio.PyAudio()
stream_D = p.open(format=FORMAT_D,
channels=CHANNELS,
rate=Sample_Rate,
input=True,
frames_per_buffer=Window_Size)
stream_W = p.open(format=FORMAT_W,
channels=CHANNELS,
rate=Sample_Rate,
input=True,
frames_per_buffer=Window_Size)
print("* recording")
frames = []
# "I think the problem appears from here"------------------------------
for i in range(0, int(Sample_Rate/Window_Size * RECORD_SECONDS)):
data_D = stream_D.read(Window_Size)
# data_W = stream_W.read(Window_Size)
decoded = np.fromstring(data_D, 'Float32')
# np.savetxt(str(i)+'ttt.txt',transform)
sd.play(decoded,22050)
# frames.append(data_W)
#-------------------------------------------------------
print("* done recording")
stream_D.stop_stream()
stream_D.close()
p.terminate()
#plt.plot(transform)
#plt.show()
# Save as a wave file---------------------------
#wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
#wf.setnchannels(CHANNELS)
#wf.setsampwidth(p.get_sample_size(FORMAT_W))
#wf.setframerate(Sample_Rate)
#wf.writeframes(b''.join(frames))
#wf.close()
#-------------------------------------------
This code performs to save input data from a microphone at 1s intervals, transform byte data to nparray data (np.transform()), and play the data with a speaker (sd.play()). This code works, but there is buffering when for loop start again. I want to play the sound from a microphone smoothly. When I asked first, someone recommended to use callback function, so I added it, But, I don't know how to use it. How do I get rid of the buffering? Is there some examples? Should I use Threads or multiprocessing?
The delay is due to buffer size ... you will get a negligible delay using a 1k buffer as per
# Window_Size = 22050 # Point
Window_Size = 1024 # Point

Python - Record on loop, stop recording when silent

I am trying to write a Python script that will record 5-second segments of speech on a loop for as long as the user is speaking, and will stop after three wave files of pure silence. How would I go about this?
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
frames_per_buffer = CHUNK)
print("* recording")
frames = []
for j in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
I think you can use https://github.com/rhasspy/rhasspy-silence library
Here is a little code for u (not mine, but it works.):
from rhasspysilence import WebRtcVadRecorder,VoiceCommand, VoiceCommandResult
import threading
import dataclasses
import typing
from queue import Queue
import json
import io
import os
from pathlib import Path
import shlex
import time
import wave
import sys
import subprocess
import pyaudio
pa =pyaudio.PyAudio()
#you can change the options (these are default settings)
vad_mode = 3
sample_rate = 16000
min_seconds= 1
max_seconds = 30
speech_seconds = 0.3
silence_seconds = 0.5
before_seconds = 0.2
chunk_size= 960
skip_seconds = 0
audio_source = None
channels =1
def SpeechToText():
recorder = WebRtcVadRecorder(vad_mode=vad_mode,)
recorder.start()
# file directory
wav_sink = 'wavs/'
wav_dir = None
# file name
wav_filename = 'tester'
if wav_sink:
wav_sink_path = Path(wav_sink)
if wav_sink_path.is_dir():
# Directory to write WAV files
wav_dir = wav_sink_path
else:
# Single WAV file to write
wav_sink = open(wav_sink, "wb")
voice_command: typing.Optional[VoiceCommand] = None
audio_source = pa.open(rate=sample_rate,format=pyaudio.paInt16,channels=channels,input=True,frames_per_buffer=chunk_size)
audio_source.start_stream()
print("Ready", file=sys.stderr)
def buffer_to_wav(buffer: bytes) -> bytes:
"""Wraps a buffer of raw audio data in a WAV"""
rate = int(sample_rate)
width = int(2)
channels = int(1)
with io.BytesIO() as wav_buffer:
wav_file: wave.Wave_write = wave.open(wav_buffer, mode="wb")
with wav_file:
wav_file.setframerate(rate)
wav_file.setsampwidth(width)
wav_file.setnchannels(channels)
wav_file.writeframesraw(buffer)
return wav_buffer.getvalue()
try:
chunk = audio_source.read(chunk_size)
while chunk:
# Look for speech/silence
voice_command = recorder.process_chunk(chunk)
if voice_command:
is_timeout = voice_command.result == VoiceCommandResult.FAILURE
# Reset
audio_data = recorder.stop()
if wav_dir:
# Write WAV to directory
wav_path = (wav_dir / time.strftime(wav_filename)).with_suffix(
".wav"
)
wav_bytes = buffer_to_wav(audio_data)
wav_path.write_bytes(wav_bytes)
print(wav_path)
print('file saved')
break
elif wav_sink:
# Write to WAV file
wav_bytes = core.buffer_to_wav(audio_data)
wav_sink.write(wav_bytes)
# Next audio chunk
chunk = audio_source.read(chunk_size)
finally:
try:
audio_source.close_stream()
except Exception:
pass
#execute command
SpeechToText()

Recording and playing byte list in pyaudio/wave

I want to record and play my voice using pyaudio and wave lib but I don't know how to do it because wave lib requires a path to a file and even if I'm trying to set it as a variable with list of bytes recorded a few second ago, still doesn't work beacuse I can't use 'read' for a list. Does someone have some idea? I want to make a looper like KORG stuff, etc
I want to play it immediately after stopped recording, like real looper, without saving record as file.
There is my code (Python 3.4):
def record(self): #recording a voice
#var for bytes from recording
self.stream = self.player.open(format = self.FORMAT,
channels = self.CHANNELS,
rate = self.RATE,
input = True,
frames_per_buffer = self.CHUNK)
print("Recording")
self.frames = [] #byte list
#recoring for a few seconds (5sec at this moment)
for i in range(0, int(self.RATE / self.CHUNK * self.RECORD_SECONDS)):
self.data = self.stream.read(self.CHUNK) #sing stream do data var
self.frames.append(self.data) #add bytes to the end of a list
print("Stop recording")
self.stopRecording()
def stopRecording(self):
self.stream.stop_stream()
self.stream.close()
print("Recording has been stopped")
self.play()
def play(self): #playing a record
print("Playing")
f = wave.open(self.frames,"rb")
#read data
data = f.readframes(CHUNK)
#play stream
while data != '':
self.stream.write(data)
data = f.readframes(CHUNK)
self.stopPlaying()
After stop your record you need join your appended data, use data = ''.join(self.frames), and at the end build a loop (for, while) to stream all your byte list, here is how i did:
import pyaudio
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 41000
RECORD_SECONDS = 5
p = pyaudio.PyAudio()
stream = p.open(format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
output = True,
frames_per_buffer = chunk)
print ("***Recording***")
all = []
for i in range(0, RATE / chunk * RECORD_SECONDS):
data = stream.read(chunk)
all.append(data)
print("***Stop recording***")
print ("***START PLAY***")
data = ''.join(all)
for i in range(0, len(data), chunk):
stream.write(data[i:i+chunk])

PyAudio Input overflowed

I'm trying to make real-time plotting sound in python. I need to get chunks from my microphone.
Using PyAudio, try to use
import pyaudio
import wave
import sys
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
frames_per_buffer = chunk)
print "* recording"
all = []
for i in range(0, RATE / chunk * RECORD_SECONDS):
data = stream.read(chunk)
all.append(data)
print "* done recording"
stream.close()
p.terminate()
After, I getting the followin error:
* recording
Traceback (most recent call last):
File "gg.py", line 23, in <module>
data = stream.read(chunk)
File "/usr/lib64/python2.7/site-packages/pyaudio.py", line 564, in read
return pa.read_stream(self._stream, num_frames)
IOError: [Errno Input overflowed] -9981
I can't understand this buffer. I want, to use blocking IO mode, so if chunks not available, i want to wait for those chunks. But when I creating try except segment or sleep(0.1), i hear clicks, so this is not what i want.
Please suggest the best solution for my ploblem?
pyaudio.Stream.read() has a keyword parameter exception_on_overflow, set this to False.
For your sample code that would look like:
import pyaudio
import wave
import sys
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
frames_per_buffer = chunk)
print "* recording"
all = []
for i in range(0, RATE / chunk * RECORD_SECONDS):
data = stream.read(chunk, exception_on_overflow = False)
all.append(data)
print "* done recording"
stream.close()
p.terminate()
See the PyAudio documentation for more details.
It seems like a lot of people are encountering this issue. I dug a bit into it and I think it means that between the previous call to stream.read() and this current call, data from the stream was lost (i.e. the buffer filled up faster than you cleared it).
From the doc for Pa_ReadStream() (the PortAudio function that stream.read() eventually ends up calling):
#return On success PaNoError will be returned, or PaInputOverflowed if
input data was discarded by PortAudio after the previous call and
before this call.
(PaInputOverflowed then causes an IOError in the pyaudio wrapper).
If it's OK for you to not capture every single frame, then you may ignore this error. If it's absolutely critical for you to have every frame, then you'll need to find a way to increase the priority of your application. I'm not familiar enough with Python to know a pythonic way to do this, but it's worth trying a simple nice command, or changing the scheduling policy to SCHED_DEADLINE.
Edit:
One issue right now is that when IOError is thrown, you lose all the frames collected in that call. To instead ignore the overflow and just return what we have, you can apply the patch below, which will cause stream.read() to ignore output underrun and input overflow errors from PortAudio (but still throw something if a different error occurred). A better way would be to make this behaviour (throw/no throw) customizable depending on your needs.
diff --git a/src/_portaudiomodule.c b/src/_portaudiomodule.c
index a8f053d..0878e74 100644
--- a/src/_portaudiomodule.c
+++ b/src/_portaudiomodule.c
## -2484,15 +2484,15 ## pa_read_stream(PyObject *self, PyObject *args)
} else {
/* clean up */
_cleanup_Stream_object(streamObject);
+
+ /* free the string buffer */
+ Py_XDECREF(rv);
+
+ PyErr_SetObject(PyExc_IOError,
+ Py_BuildValue("(s,i)",
+ Pa_GetErrorText(err), err));
+ return NULL;
}
-
- /* free the string buffer */
- Py_XDECREF(rv);
-
- PyErr_SetObject(PyExc_IOError,
- Py_BuildValue("(s,i)",
- Pa_GetErrorText(err), err));
- return NULL;
}
return rv;
I got the same error when I ran your code. I looked at the default sample rate of my default audio device, my macbook's internal microphone, it was 48000Hz not 44100Hz.
p.get_device_info_by_index(0)['defaultSampleRate']
Out[12]: 48000.0
When I changed RATE to this value, it worked.
I worked this on OS X 10.10, Got the same error while trying to get audio from the microphone in a SYBA USB card (C Media chipset), and process it in real time with fft's and more:
IOError: [Errno Input overflowed] -9981
The overflow was completely solved when using a Callback Mode, instead of the Blocking Mode, as written by libbkmz.(https://www.python.org/dev/peps/pep-0263/)
Based on that, the bit of the working code looked like this:
"""
Creating the audio stream from our mic
"""
rate=48000
self.chunk=2**12
width = 2
p = pyaudio.PyAudio()
# callback function to stream audio, another thread.
def callback(in_data,frame_count, time_info, status):
self.audio = numpy.fromstring(in_data,dtype=numpy.int16)
return (self.audio, pyaudio.paContinue)
#create a pyaudio object
self.inStream = p.open(format = p.get_format_from_width(width, unsigned=False),
channels=1,
rate=rate,
input=True,
frames_per_buffer=self.chunk,
stream_callback = callback)
"""
Setting up the array that will handle the timeseries of audio data from our input
"""
self.audio = numpy.empty((self.buffersize),dtype="int16")
self.inStream.start_stream()
while True:
try:
self.ANY_FUNCTION() #any function to run parallel to the audio thread, running forever, until ctrl+C is pressed.
except KeyboardInterrupt:
self.inStream.stop_stream()
self.inStream.close()
p.terminate()
print("* Killed Process")
quit()
This code will create a callback function, then create a stream object, start it and then loop in any function. A separate thread streams audio, and that stream is closed when the main loop is stopped. self.audio is used in any function. I also had problems with the thread running forever if not terminated.
Since Pyaudio runs this stream in a separate thread, and this made the audio stream stable, the Blocking mode might have been saturating depending on the speed or timing of the rest of the processes in the script.
Note that the chunk size is 2^12, but smaller chunks work just as well. There are other parameters I considered and played around with to make sure they all made sense:
Chunk size larger or smaller(no effect)
Number and format of bits for the words in the buffer, signed 16 bit in this case.
signedness of variables(tried with unsigned and got saturation patterns)
Nature of mic input, and selection as default in the system, gain etc.
Hope that works for someone!
My other answer solved the problem in most cases. However sometimes the error still occurs.
That was the reason why I scrapped pyaudio and switched to pyalsaaudio. My Raspy now smoothly records any sound.
import alsaaudio
import numpy as np
import array
# constants
CHANNELS = 1
INFORMAT = alsaaudio.PCM_FORMAT_FLOAT_LE
RATE = 44100
FRAMESIZE = 1024
# set up audio input
recorder=alsaaudio.PCM(type=alsaaudio.PCM_CAPTURE)
recorder.setchannels(CHANNELS)
recorder.setrate(RATE)
recorder.setformat(INFORMAT)
recorder.setperiodsize(FRAMESIZE)
buffer = array.array('f')
while <some condition>:
buffer.fromstring(recorder.read()[1])
data = np.array(buffer, dtype='f')
FORMAT = pyaudio.paInt16
Make sure to set the correct format, my internal microphone was set to 24 Bit (see Audio-Midi-Setup application).
I had the same issue on the really slow raspberry pi, but I was able to solve it (for most cases) by using the faster array module for storing the data.
import array
import pyaudio
FORMAT = pyaudio.paInt16
CHANNELS = 1
INPUT_CHANNEL=2
RATE = 48000
CHUNK = 512
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=INPUT_CHANNEL,
frames_per_buffer =CHUNK)
print("* recording")
try:
data = array.array('h')
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data.fromstring(stream.read(CHUNK))
finally:
stream.stop_stream()
stream.close()
p.terminate()
print("* done recording")
The content of data is rather binary afterwards.
But you can use numpy.array(data, dtype='i') to get a numpy array of intergers.
Instead of
chunk = 1024
use:
chunk = 4096
It worked for me on a USB microphone.
This was helpful for me:
input_ = stream.read(chunk, exception_on_overflow=False)
exception_on_overflow = False
For me this helped: https://stackoverflow.com/a/46787874/5047984
I used multiprocessing to write the file in parallel to recording audio. This is my code:
recordAudioSamples.py
import pyaudio
import wave
import datetime
import signal
import ftplib
import sys
import os
# configuration for assos_listen
import config
# run the audio capture and send sound sample processes
# in parallel
from multiprocessing import Process
# CONFIG
CHUNK = config.chunkSize
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = config.samplingRate
RECORD_SECONDS = config.sampleLength
# HELPER FUNCTIONS
# write to ftp
def uploadFile(filename):
print("start uploading file: " + filename)
# connect to container
ftp = ftplib.FTP(config.ftp_server_ip, config.username, config.password)
# write file
ftp.storbinary('STOR '+filename, open(filename, 'rb'))
# close connection
ftp.quit()
print("finished uploading: " +filename)
# write to sd-card
def storeFile(filename,frames):
print("start writing file: " + filename)
wf = wave.open(filename, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
print(filename + " written")
# abort the sampling process
def signal_handler(signal, frame):
print('You pressed Ctrl+C!')
# close stream and pyAudio
stream.stop_stream()
stream.close()
p.terminate()
sys.exit(0)
# MAIN FUNCTION
def recordAudio(p, stream):
sampleNumber = 0
while (True):
print("* recording")
sampleNumber = sampleNumber +1
frames = []
startDateTimeStr = datetime.datetime.now().strftime("%Y_%m_%d_%I_%M_%S_%f")
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
fileName = str(config.sensorID) + "_" + startDateTimeStr + ".wav"
# create a store process to write the file in parallel
storeProcess = Process(target=storeFile, args=(fileName,frames))
storeProcess.start()
if (config.upload == True):
# since waiting for the upload to finish will take some time
# and we do not want to have gaps in our sample
# we start the upload process in parallel
print("start uploading...")
uploadProcess = Process(target=uploadFile, args=(fileName,))
uploadProcess.start()
# ENTRYPOINT FROM CONSOLE
if __name__ == '__main__':
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
# directory to write and read files from
os.chdir(config.storagePath)
# abort by pressing C
signal.signal(signal.SIGINT, signal_handler)
print('\n\n--------------------------\npress Ctrl+C to stop the recording')
# start recording
recordAudio(p, stream)
config.py
### configuration file for assos_listen
# upload
upload = False
# config for this sensor
sensorID = "al_01"
# sampling rate & chunk size
chunkSize = 8192
samplingRate = 44100 # 44100 needed for Aves sampling
# choices=[4000, 8000, 16000, 32000, 44100] :: default 16000
# sample length in seconds
sampleLength = 10
# configuration for assos_store container
ftp_server_ip = "192.168.0.157"
username = "sensor"
password = "sensor"
# storage on assos_listen device
storagePath = "/home/pi/assos_listen_pi/storage/"

Categories

Resources