Python record audio on detected sound - python

I am looking to have a python script run in the background and use pyaudio to record sound files when the threshold of the microphone has reached a certain point. This is for a monitor on a two way radio network. So hence we only want to record transmitted audio.
Tasks in mind:
Record audio input on a n% gate threshold
stop recording after so many seconds of silence
keep recording for so many seconds after audio
Phase 2: input data into MySQL database to search the recordings
I am looking at a file structure of the similar
/home/Recodings/2013/8/23/12-33.wav would be a recording of the transmision on 23/08/2013 # 12:33.wav
I have used the code from
Detect and record a sound with python
I am at a bit of a loss where to go from here now and a little guidance would be greatly appreciated
thank you

The current top answer is a bit outdated and only works for python 2. Here is a version updated for python 3. It wraps the functions into classes and packages everything into one simple easy-to-use version. Note that there is one key difference between the top answer and my script:
The script at the top records for one file and then stops, while my script keeps recording whenever noise is detected and dumps the recordings into a directory as it goes.
The main idea for both scripts are pretty similar:
Step 1: 'Listen' until rms becomes greater than the threshold
Step 2: Start recording, set a timer for when to stop recording, == TIMEOUT_LENGTH
Step 3: If the rms breaks threshold again before the timer times out reset the timer
Step 4: Now that the timer is expired, write the recording to a directory and go back to step 1
import pyaudio
import math
import struct
import wave
import time
import os
Threshold = 10
SHORT_NORMALIZE = (1.0/32768.0)
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
swidth = 2
TIMEOUT_LENGTH = 5
f_name_directory = r'C:\Users\Jason\PyCharmProjects\AutoRecorder\records'
class Recorder:
#staticmethod
def rms(frame):
count = len(frame) / swidth
format = "%dh" % (count)
shorts = struct.unpack(format, frame)
sum_squares = 0.0
for sample in shorts:
n = sample * SHORT_NORMALIZE
sum_squares += n * n
rms = math.pow(sum_squares / count, 0.5)
return rms * 1000
def __init__(self):
self.p = pyaudio.PyAudio()
self.stream = self.p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=chunk)
def record(self):
print('Noise detected, recording beginning')
rec = []
current = time.time()
end = time.time() + TIMEOUT_LENGTH
while current <= end:
data = self.stream.read(chunk)
if self.rms(data) >= Threshold: end = time.time() + TIMEOUT_LENGTH
current = time.time()
rec.append(data)
self.write(b''.join(rec))
def write(self, recording):
n_files = len(os.listdir(f_name_directory))
filename = os.path.join(f_name_directory, '{}.wav'.format(n_files))
wf = wave.open(filename, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(self.p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(recording)
wf.close()
print('Written to file: {}'.format(filename))
print('Returning to listening')
def listen(self):
print('Listening beginning')
while True:
input = self.stream.read(chunk)
rms_val = self.rms(input)
if rms_val > Threshold:
self.record()
a = Recorder()
a.listen()

Some time ago I wrote some of the steps
Record audio input on a n% gate threshold
A: Start a Boolean variable type for "Silence" and you can calculate RMS to decide if Silence is true or False, Set one RMS Threshold
stop recording after so many seconds of silence
A: Do you need calculate one timeout, for it get the Frame Rate, Chunk Size and how many seconds do you want, to calculate your timeout make (FrameRate / chunk * Max_Seconds)
keep recording for so many seconds after audio
A: If Silence is false == (RMS > Threshold) get the last chunk of data of audio (LastBlock) and just keep record :-)
Phase 2: input data into MySQL database to search the recordings
A: This step is up to you
Source code:
import pyaudio
import math
import struct
import wave
#Assuming Energy threshold upper than 30 dB
Threshold = 30
SHORT_NORMALIZE = (1.0/32768.0)
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
swidth = 2
Max_Seconds = 10
TimeoutSignal=((RATE / chunk * Max_Seconds) + 2)
silence = True
FileNameTmp = '/home/Recodings/2013/8/23/12-33.wav'
Time=0
all =[]
def GetStream(chunk):
return stream.read(chunk)
def rms(frame):
count = len(frame)/swidth
format = "%dh"%(count)
# short is 16 bit int
shorts = struct.unpack( format, frame )
sum_squares = 0.0
for sample in shorts:
n = sample * SHORT_NORMALIZE
sum_squares += n*n
# compute the rms
rms = math.pow(sum_squares/count,0.5);
return rms * 1000
def WriteSpeech(WriteData):
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(FileNameTmp, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(WriteData)
wf.close()
def KeepRecord(TimeoutSignal, LastBlock):
all.append(LastBlock)
for i in range(0, TimeoutSignal):
try:
data = GetStream(chunk)
except:
continue
#I chage here (new Ident)
all.append(data)
print "end record after timeout";
data = ''.join(all)
print "write to File";
WriteSpeech(data)
silence = True
Time=0
listen(silence,Time)
def listen(silence,Time):
print "waiting for Speech"
while silence:
try:
input = GetStream(chunk)
except:
continue
rms_value = rms(input)
if (rms_value > Threshold):
silence=False
LastBlock=input
print "hello ederwander I'm Recording...."
KeepRecord(TimeoutSignal, LastBlock)
Time = Time + 1
if (Time > TimeoutSignal):
print "Time Out No Speech Detected"
sys.exit()
p = pyaudio.PyAudio()
stream = p.open(format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
output = True,
frames_per_buffer = chunk)
listen(silence,Time)

So you just need the getLevel(data) function?
A quick hack would be:
def getLevel(data):
sqrsum = 0
for b in data:
b = ord(b)
sqrsum+=b*b
return sqrsum
That should increase with volume. Set your threshold appropriately through trial and error.

For those who have problems installing pyaudio because of the missing portaudio.h, you can do that:
sudo apt-get install portaudio19-dev python-pyaudio python3-pyaudio
the answer is from: portaudio.h: No such file or directory

Related

How to record 2 audio feeds simultaneously?

I want to record a audio track and save it 2 diffrent .wav files. The audio tracks should be saved with a delay of ca. 6 seconds and each .wav should be 12 seconds long.
I tried to do it with multiprocessing and pyaudio, but i cant manage to get it working
Please note that i am a beginner in python and that this is my first post on stackoverflow!
def func1():
#Record and save a 12 seconds long .wav
def func2():
#Record and save a 12 seconds long .wav
if __name__ == '__main__':
p1 = Process(target=func1)
p1.start()
p2 = Process(target=func2)
p2.start()
p1.join()
p2.join()
#start func2 6 seconds after func1
I would expect a data structure like this:
|---1.wav---|---1.wav---|---1.wav---|
|---2.wav---|---2.wav---|---2.wav---|
6sec 12sec 18sec 24sec 30sec 36sec 42sec
EDIT:
I came up with a bit of code that seems to work kind of well. It has a delay of .144 seconds. I am happy about improvement od this code. This code uses threading instead of multiprocessing.
import pyaudio
import wave
from threading import Thread
import time
from datetime import datetime
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
CHUNK = 1024
CHUNK1 = 1024
RECORD_SECONDS = 12
WAVE_OUTPUT_FILENAME1 = name = "outputs/output_1"+datetime.now().strftime("%m:%d:%Y-")
WAVE_OUTPUT_FILENAME2 = name = "outputs/output_2"+datetime.now().strftime("%m:%d:%Y-")
def func1():
while 1==1:
global FORMAT
global CHANNELS
global RATE
global CHUNK
global RECORD_SECONDS
global WAVE_OUTPUT_FILENAME1
WAVE_OUTPUT_FILENAME1 = name = "outputs/output1_"#+datetime.now().strftime("%m:%d:%Y-")
audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
print("recording...")
frames = []
WAVE_OUTPUT_FILENAME1 = WAVE_OUTPUT_FILENAME1+datetime.now().strftime("%H;%M;%S.%f--")
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
WAVE_OUTPUT_FILENAME1 = WAVE_OUTPUT_FILENAME1 + datetime.now().strftime("%H;%M;%S.%f")+".wav"
print("finished recording")
# stop Recording
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME1, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()
def func2():
time.sleep(6)
while 1==1:
global FORMAT
global CHANNELS
global RATE
global CHUNK1
global RECORD_SECONDS
global WAVE_OUTPUT_FILENAME2
WAVE_OUTPUT_FILENAME2 = name = "outputs/output2_"#+datetime.now().strftime("%m:%d:%Y-")
audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK1)
print("recording...")
frames = []
WAVE_OUTPUT_FILENAME2 = WAVE_OUTPUT_FILENAME2+datetime.now().strftime("%H;%M;%S.%f--")
for i in range(0, int(RATE / CHUNK1 * RECORD_SECONDS)):
data = stream.read(CHUNK1)
frames.append(data)
WAVE_OUTPUT_FILENAME2 = WAVE_OUTPUT_FILENAME2 + datetime.now().strftime("%H;%M;%S.%f")+".wav"
print("finished recording")
# stop Recording
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME2, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()
if __name__ == '__main__':
Thread(target = func1).start()
Thread(target = func2).start()
why do you think you need multiprocessing? I think it just complicates things
how about just recording in 6 second (or smaller) chunks/frames and write the correct frames to each file.
I've got a bit carried away, and written a nice class to do this:
import pyaudio
import wave
import time
class OverlappedRecorder:
def __init__(
self, secs_per_file, secs_between_file, *,
num_channels=2, sample_rate=48000,
sample_format=pyaudio.paInt16,
):
# various constants needed later
self.num_channels = num_channels
self.sample_width = pyaudio.get_sample_size(sample_format)
self.sample_rate = sample_rate
self.frames_between_start = int(secs_between_file * sample_rate)
self.frames_per_file = int(secs_per_file * sample_rate)
# mutable state needed to keep everything going
self.files = []
self.frames_till_next_file = 0
self.pa = pyaudio.PyAudio()
self.stream = self.pa.open(
format=sample_format, channels=num_channels,
rate=sample_rate, frames_per_buffer=1024,
input=True, start=False,
stream_callback=self._callback,
)
def sleep_while_active(self):
while self.stream.is_active():
time.sleep(0.2)
def begin_wave_file(self):
"internal function to start a new WAV file"
path = time.strftime(
'recording-%Y-%m-%d-%H.%M.%S.wav',
time.localtime()
)
file = wave.open(path, 'wb')
file.setnchannels(self.num_channels)
file.setsampwidth(self.sample_width)
file.setframerate(self.sample_rate)
self.files.append(file)
# context manager stuff, recording starts when entered using "with"
def __enter__(self):
self.stream.start_stream()
return self
# exiting shuts everything down
def __exit__(self, exc_type, exc_val, exc_tb):
self.stream.stop_stream()
self.stream.close()
self.pa.terminate()
for file in self.files:
file.close()
# called by pyaudio when a new set of frames are ready
def _callback(self, data, frame_count, time_info, status):
self.frames_till_next_file -= frame_count
# see if we need to start a new file
if self.frames_till_next_file < 0:
self.frames_till_next_file += self.frames_between_start
self.begin_wave_file()
# can't remove from lists while iterating
# keep a list of files to close and remove later
done = []
for file in self.files:
remain = self.frames_per_file - file.getnframes()
# add appropriate amount of data to all open files
if frame_count < remain:
file.writeframesraw(data)
else:
remain *= self.sample_width * self.num_channels
file.writeframesraw(data[:remain])
done.append(file)
# close anything that finished
for file in done:
file.close()
self.files.remove(file)
# tell pyaudio to keep going
return (None, pyaudio.paContinue)
basic usage is: create an object, enter it using with and it'll start recording, and when you exit it'll stop and clean up.
rec = OverlappedRecorder(12, 6)
with rec:
time.sleep(30)
will let it run for 30 seconds, or you could do:
with OverlappedRecorder(12, 6) as rec:
rec.sleep_while_active()
to let it run until you hit Ctrl+C to kill the program, or you could put a call to input() in there to make it stop when you press enter, or whatever else you like.
a few comments on the code you posted:
you only need to declare global variables if you're going to modify them
why do you have seperate functions? why not just have a single function, and just delay start()ing the second Thread
why are you setting WAVE_OUTPUT_FILENAME1 so many times? just save the start_time and end_time, then format the string in one go
you don't have to read() in chunks, if you know it's going to fit in memory just read everything all in one go
you shouldn't need to keep starting and stopping recording, just open it once in each thread and if you're lucky samples will accumulate in the buffer while you're writing the wav file to disk
something like:
import pyaudio
import wave
import time
from datetime import datetime
from threading import Thread
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 12
def recorder(prefix):
audio = pyaudio.PyAudio()
stream = audio.open(
format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
)
try:
while True:
start_time = datetime.now()
print("recording started", start_time)
data = stream.read(RATE * RECORD_SECONDS, False)
end_time = datetime.now()
print("finished", end_time)
name = f'{prefix}{start_time:%Y-%m-%d-%H-%M-%S.%f}-{end_time:%H-%M-%S.%f}.wav'
print("writing", name)
with wave.open(name, 'wb') as waveFile:
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(data)
finally:
stream.stop_stream()
stream.close()
audio.terminate()
if __name__ == '__main__':
Thread(target=recorder, args=('outputs/output_1-',)).start()
time.sleep(6)
Thread(target=recorder, args=('outputs/output_2-',)).start()
a few differences:
the version using threading is much less code!
my version allows an arbitrary number of files without using up multiple OS threads for each file (there's the Python thread and pyaudio has an internal thread looking after audio buffers)
my version saves partial files
hope all the helps / makes sense!

Pyaudio record for x seconds while the input pass a treeshold

I have a Python function that start recording when it detect noise, when the sound is bigger than a treeshold, but I need it to record just for an specific amount of time (two seconds for example), but I don't know how to do it. I've tried using time.sleep , but it doesn't work. How can I do it? This is my function:
def stt(language, threshold=THRESHOLD, num_phrases=-1):
"""
Listens to Microphone, extracts phrases from it and sends it to
Google's TTS service and returns response. a "phrase" is sound
surrounded by silence (according to threshold). num_phrases controls
how many phrases to process before finishing the listening process
(-1 for infinite).
"""
#Open stream
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print "* Listening mic. "
audio2send = []
cur_data = '' # current chunk of audio data
rel = RATE/CHUNK
slid_win = deque(maxlen=SILENCE_LIMIT * rel)
#Prepend audio from 0.5 seconds before noise was detected
prev_audio = deque(maxlen=PREV_AUDIO * rel)
started = False
n = num_phrases
response = []
while (num_phrases == -1 or n > 0):
cur_data = stream.read(CHUNK)
slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
#print slid_win[-1]
if(sum([x > THRESHOLD for x in slid_win]) > 0):
if(not started):
print "Starting record of phrase"
started = True
audio2send.append(cur_data)
elif (started is True):
print "Finished"
# The limit was reached, finish capture and deliver.
filename = save_speech(list(prev_audio) + audio2send, p)
# Send file to Google and get response
r = stt_google_wav(filename, language)
if num_phrases == -1:
print r
else:
response.append(r)
# Remove temp file. Comment line to review.
os.remove(filename)
# Reset all
started = False
slid_win = deque(maxlen=SILENCE_LIMIT * rel)
prev_audio = deque(maxlen=0.5 * rel)
audio2send = []
n -= 1
print "Listening ..."
else:
prev_audio.append(cur_data)
print "* Done recording"
stream.close()
p.terminate()
return response
Thanks a lot!

Recording and playing byte list in pyaudio/wave

I want to record and play my voice using pyaudio and wave lib but I don't know how to do it because wave lib requires a path to a file and even if I'm trying to set it as a variable with list of bytes recorded a few second ago, still doesn't work beacuse I can't use 'read' for a list. Does someone have some idea? I want to make a looper like KORG stuff, etc
I want to play it immediately after stopped recording, like real looper, without saving record as file.
There is my code (Python 3.4):
def record(self): #recording a voice
#var for bytes from recording
self.stream = self.player.open(format = self.FORMAT,
channels = self.CHANNELS,
rate = self.RATE,
input = True,
frames_per_buffer = self.CHUNK)
print("Recording")
self.frames = [] #byte list
#recoring for a few seconds (5sec at this moment)
for i in range(0, int(self.RATE / self.CHUNK * self.RECORD_SECONDS)):
self.data = self.stream.read(self.CHUNK) #sing stream do data var
self.frames.append(self.data) #add bytes to the end of a list
print("Stop recording")
self.stopRecording()
def stopRecording(self):
self.stream.stop_stream()
self.stream.close()
print("Recording has been stopped")
self.play()
def play(self): #playing a record
print("Playing")
f = wave.open(self.frames,"rb")
#read data
data = f.readframes(CHUNK)
#play stream
while data != '':
self.stream.write(data)
data = f.readframes(CHUNK)
self.stopPlaying()
After stop your record you need join your appended data, use data = ''.join(self.frames), and at the end build a loop (for, while) to stream all your byte list, here is how i did:
import pyaudio
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 41000
RECORD_SECONDS = 5
p = pyaudio.PyAudio()
stream = p.open(format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
output = True,
frames_per_buffer = chunk)
print ("***Recording***")
all = []
for i in range(0, RATE / chunk * RECORD_SECONDS):
data = stream.read(chunk)
all.append(data)
print("***Stop recording***")
print ("***START PLAY***")
data = ''.join(all)
for i in range(0, len(data), chunk):
stream.write(data[i:i+chunk])

Playing a single channel of audio using PyAudio or similar

I currently have a 2 channel wav file. I would like to have the ability to play one channel at a time. Using the examples from the PyAudio website where you read in a chunk of data through the wave module and then write to stream, I am unsure whether I need to somehow read in one channel of data and write to a one channel stream, or if there is some other solution. Thanks for the help.
I propose you alter the stream and overwrite the channel that you do not want to hear, with data from the channel you want to hear. Normally, channels are interleaved, so the following should do the trick.
import pyaudio
import wave
import sys
chunk = 1024
def convert(data, sampleSize = 4, channel = 0):
for i in range(0, len(data), 2*sampleSize):
for j in range(0, sampleSize):
data[i + j + sampleSize * channel] = data[i + j + sampleSize * (1 - channel)]
if len(sys.argv) < 2:
print "Plays a wave file.\n\n" +\
"Usage: %s filename.wav" % sys.argv[0]
sys.exit(-1)
wf = wave.open(sys.argv[1], 'rb')
p = pyaudio.PyAudio()
# open stream
stream = p.open(format =
p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = wf.getframerate(),
output = True)
# read data
data = wf.readframes(chunk)
convert(data)
# play stream
while data != '':
stream.write(data)
data = wf.readframes(chunk)
convert(data)
stream.close()
p.terminate()

PyAudio Input overflowed

I'm trying to make real-time plotting sound in python. I need to get chunks from my microphone.
Using PyAudio, try to use
import pyaudio
import wave
import sys
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
frames_per_buffer = chunk)
print "* recording"
all = []
for i in range(0, RATE / chunk * RECORD_SECONDS):
data = stream.read(chunk)
all.append(data)
print "* done recording"
stream.close()
p.terminate()
After, I getting the followin error:
* recording
Traceback (most recent call last):
File "gg.py", line 23, in <module>
data = stream.read(chunk)
File "/usr/lib64/python2.7/site-packages/pyaudio.py", line 564, in read
return pa.read_stream(self._stream, num_frames)
IOError: [Errno Input overflowed] -9981
I can't understand this buffer. I want, to use blocking IO mode, so if chunks not available, i want to wait for those chunks. But when I creating try except segment or sleep(0.1), i hear clicks, so this is not what i want.
Please suggest the best solution for my ploblem?
pyaudio.Stream.read() has a keyword parameter exception_on_overflow, set this to False.
For your sample code that would look like:
import pyaudio
import wave
import sys
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
frames_per_buffer = chunk)
print "* recording"
all = []
for i in range(0, RATE / chunk * RECORD_SECONDS):
data = stream.read(chunk, exception_on_overflow = False)
all.append(data)
print "* done recording"
stream.close()
p.terminate()
See the PyAudio documentation for more details.
It seems like a lot of people are encountering this issue. I dug a bit into it and I think it means that between the previous call to stream.read() and this current call, data from the stream was lost (i.e. the buffer filled up faster than you cleared it).
From the doc for Pa_ReadStream() (the PortAudio function that stream.read() eventually ends up calling):
#return On success PaNoError will be returned, or PaInputOverflowed if
input data was discarded by PortAudio after the previous call and
before this call.
(PaInputOverflowed then causes an IOError in the pyaudio wrapper).
If it's OK for you to not capture every single frame, then you may ignore this error. If it's absolutely critical for you to have every frame, then you'll need to find a way to increase the priority of your application. I'm not familiar enough with Python to know a pythonic way to do this, but it's worth trying a simple nice command, or changing the scheduling policy to SCHED_DEADLINE.
Edit:
One issue right now is that when IOError is thrown, you lose all the frames collected in that call. To instead ignore the overflow and just return what we have, you can apply the patch below, which will cause stream.read() to ignore output underrun and input overflow errors from PortAudio (but still throw something if a different error occurred). A better way would be to make this behaviour (throw/no throw) customizable depending on your needs.
diff --git a/src/_portaudiomodule.c b/src/_portaudiomodule.c
index a8f053d..0878e74 100644
--- a/src/_portaudiomodule.c
+++ b/src/_portaudiomodule.c
## -2484,15 +2484,15 ## pa_read_stream(PyObject *self, PyObject *args)
} else {
/* clean up */
_cleanup_Stream_object(streamObject);
+
+ /* free the string buffer */
+ Py_XDECREF(rv);
+
+ PyErr_SetObject(PyExc_IOError,
+ Py_BuildValue("(s,i)",
+ Pa_GetErrorText(err), err));
+ return NULL;
}
-
- /* free the string buffer */
- Py_XDECREF(rv);
-
- PyErr_SetObject(PyExc_IOError,
- Py_BuildValue("(s,i)",
- Pa_GetErrorText(err), err));
- return NULL;
}
return rv;
I got the same error when I ran your code. I looked at the default sample rate of my default audio device, my macbook's internal microphone, it was 48000Hz not 44100Hz.
p.get_device_info_by_index(0)['defaultSampleRate']
Out[12]: 48000.0
When I changed RATE to this value, it worked.
I worked this on OS X 10.10, Got the same error while trying to get audio from the microphone in a SYBA USB card (C Media chipset), and process it in real time with fft's and more:
IOError: [Errno Input overflowed] -9981
The overflow was completely solved when using a Callback Mode, instead of the Blocking Mode, as written by libbkmz.(https://www.python.org/dev/peps/pep-0263/)
Based on that, the bit of the working code looked like this:
"""
Creating the audio stream from our mic
"""
rate=48000
self.chunk=2**12
width = 2
p = pyaudio.PyAudio()
# callback function to stream audio, another thread.
def callback(in_data,frame_count, time_info, status):
self.audio = numpy.fromstring(in_data,dtype=numpy.int16)
return (self.audio, pyaudio.paContinue)
#create a pyaudio object
self.inStream = p.open(format = p.get_format_from_width(width, unsigned=False),
channels=1,
rate=rate,
input=True,
frames_per_buffer=self.chunk,
stream_callback = callback)
"""
Setting up the array that will handle the timeseries of audio data from our input
"""
self.audio = numpy.empty((self.buffersize),dtype="int16")
self.inStream.start_stream()
while True:
try:
self.ANY_FUNCTION() #any function to run parallel to the audio thread, running forever, until ctrl+C is pressed.
except KeyboardInterrupt:
self.inStream.stop_stream()
self.inStream.close()
p.terminate()
print("* Killed Process")
quit()
This code will create a callback function, then create a stream object, start it and then loop in any function. A separate thread streams audio, and that stream is closed when the main loop is stopped. self.audio is used in any function. I also had problems with the thread running forever if not terminated.
Since Pyaudio runs this stream in a separate thread, and this made the audio stream stable, the Blocking mode might have been saturating depending on the speed or timing of the rest of the processes in the script.
Note that the chunk size is 2^12, but smaller chunks work just as well. There are other parameters I considered and played around with to make sure they all made sense:
Chunk size larger or smaller(no effect)
Number and format of bits for the words in the buffer, signed 16 bit in this case.
signedness of variables(tried with unsigned and got saturation patterns)
Nature of mic input, and selection as default in the system, gain etc.
Hope that works for someone!
My other answer solved the problem in most cases. However sometimes the error still occurs.
That was the reason why I scrapped pyaudio and switched to pyalsaaudio. My Raspy now smoothly records any sound.
import alsaaudio
import numpy as np
import array
# constants
CHANNELS = 1
INFORMAT = alsaaudio.PCM_FORMAT_FLOAT_LE
RATE = 44100
FRAMESIZE = 1024
# set up audio input
recorder=alsaaudio.PCM(type=alsaaudio.PCM_CAPTURE)
recorder.setchannels(CHANNELS)
recorder.setrate(RATE)
recorder.setformat(INFORMAT)
recorder.setperiodsize(FRAMESIZE)
buffer = array.array('f')
while <some condition>:
buffer.fromstring(recorder.read()[1])
data = np.array(buffer, dtype='f')
FORMAT = pyaudio.paInt16
Make sure to set the correct format, my internal microphone was set to 24 Bit (see Audio-Midi-Setup application).
I had the same issue on the really slow raspberry pi, but I was able to solve it (for most cases) by using the faster array module for storing the data.
import array
import pyaudio
FORMAT = pyaudio.paInt16
CHANNELS = 1
INPUT_CHANNEL=2
RATE = 48000
CHUNK = 512
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=INPUT_CHANNEL,
frames_per_buffer =CHUNK)
print("* recording")
try:
data = array.array('h')
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data.fromstring(stream.read(CHUNK))
finally:
stream.stop_stream()
stream.close()
p.terminate()
print("* done recording")
The content of data is rather binary afterwards.
But you can use numpy.array(data, dtype='i') to get a numpy array of intergers.
Instead of
chunk = 1024
use:
chunk = 4096
It worked for me on a USB microphone.
This was helpful for me:
input_ = stream.read(chunk, exception_on_overflow=False)
exception_on_overflow = False
For me this helped: https://stackoverflow.com/a/46787874/5047984
I used multiprocessing to write the file in parallel to recording audio. This is my code:
recordAudioSamples.py
import pyaudio
import wave
import datetime
import signal
import ftplib
import sys
import os
# configuration for assos_listen
import config
# run the audio capture and send sound sample processes
# in parallel
from multiprocessing import Process
# CONFIG
CHUNK = config.chunkSize
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = config.samplingRate
RECORD_SECONDS = config.sampleLength
# HELPER FUNCTIONS
# write to ftp
def uploadFile(filename):
print("start uploading file: " + filename)
# connect to container
ftp = ftplib.FTP(config.ftp_server_ip, config.username, config.password)
# write file
ftp.storbinary('STOR '+filename, open(filename, 'rb'))
# close connection
ftp.quit()
print("finished uploading: " +filename)
# write to sd-card
def storeFile(filename,frames):
print("start writing file: " + filename)
wf = wave.open(filename, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
print(filename + " written")
# abort the sampling process
def signal_handler(signal, frame):
print('You pressed Ctrl+C!')
# close stream and pyAudio
stream.stop_stream()
stream.close()
p.terminate()
sys.exit(0)
# MAIN FUNCTION
def recordAudio(p, stream):
sampleNumber = 0
while (True):
print("* recording")
sampleNumber = sampleNumber +1
frames = []
startDateTimeStr = datetime.datetime.now().strftime("%Y_%m_%d_%I_%M_%S_%f")
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
fileName = str(config.sensorID) + "_" + startDateTimeStr + ".wav"
# create a store process to write the file in parallel
storeProcess = Process(target=storeFile, args=(fileName,frames))
storeProcess.start()
if (config.upload == True):
# since waiting for the upload to finish will take some time
# and we do not want to have gaps in our sample
# we start the upload process in parallel
print("start uploading...")
uploadProcess = Process(target=uploadFile, args=(fileName,))
uploadProcess.start()
# ENTRYPOINT FROM CONSOLE
if __name__ == '__main__':
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
# directory to write and read files from
os.chdir(config.storagePath)
# abort by pressing C
signal.signal(signal.SIGINT, signal_handler)
print('\n\n--------------------------\npress Ctrl+C to stop the recording')
# start recording
recordAudio(p, stream)
config.py
### configuration file for assos_listen
# upload
upload = False
# config for this sensor
sensorID = "al_01"
# sampling rate & chunk size
chunkSize = 8192
samplingRate = 44100 # 44100 needed for Aves sampling
# choices=[4000, 8000, 16000, 32000, 44100] :: default 16000
# sample length in seconds
sampleLength = 10
# configuration for assos_store container
ftp_server_ip = "192.168.0.157"
username = "sensor"
password = "sensor"
# storage on assos_listen device
storagePath = "/home/pi/assos_listen_pi/storage/"

Categories

Resources