I need to capture audio clips as WAV files that I can then pass to another bit of python for processing. The problem is that I need to determine when there is audio present and then record it, stop when it goes silent and then pass that file to the processing module.
I'm thinking it should be possible with the wave module to detect when there is pure silence and discard it then as soon as something other than silence is detected start recording, then when the line goes silent again stop the recording.
Just can't quite get my head around it, can anyone get me started with a basic example.
As a follow up to Nick Fortescue's answer, here's a more complete example of how to record from the microphone and process the resulting data:
from sys import byteorder
from array import array
from struct import pack
import pyaudio
import wave
THRESHOLD = 500
CHUNK_SIZE = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
def is_silent(snd_data):
"Returns 'True' if below the 'silent' threshold"
return max(snd_data) < THRESHOLD
def normalize(snd_data):
"Average the volume out"
MAXIMUM = 16384
times = float(MAXIMUM)/max(abs(i) for i in snd_data)
r = array('h')
for i in snd_data:
r.append(int(i*times))
return r
def trim(snd_data):
"Trim the blank spots at the start and end"
def _trim(snd_data):
snd_started = False
r = array('h')
for i in snd_data:
if not snd_started and abs(i)>THRESHOLD:
snd_started = True
r.append(i)
elif snd_started:
r.append(i)
return r
# Trim to the left
snd_data = _trim(snd_data)
# Trim to the right
snd_data.reverse()
snd_data = _trim(snd_data)
snd_data.reverse()
return snd_data
def add_silence(snd_data, seconds):
"Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
silence = [0] * int(seconds * RATE)
r = array('h', silence)
r.extend(snd_data)
r.extend(silence)
return r
def record():
"""
Record a word or words from the microphone and
return the data as an array of signed shorts.
Normalizes the audio, trims silence from the
start and end, and pads with 0.5 seconds of
blank sound to make sure VLC et al can play
it without getting chopped off.
"""
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT, channels=1, rate=RATE,
input=True, output=True,
frames_per_buffer=CHUNK_SIZE)
num_silent = 0
snd_started = False
r = array('h')
while 1:
# little endian, signed short
snd_data = array('h', stream.read(CHUNK_SIZE))
if byteorder == 'big':
snd_data.byteswap()
r.extend(snd_data)
silent = is_silent(snd_data)
if silent and snd_started:
num_silent += 1
elif not silent and not snd_started:
snd_started = True
if snd_started and num_silent > 30:
break
sample_width = p.get_sample_size(FORMAT)
stream.stop_stream()
stream.close()
p.terminate()
r = normalize(r)
r = trim(r)
r = add_silence(r, 0.5)
return sample_width, r
def record_to_file(path):
"Records from the microphone and outputs the resulting data to 'path'"
sample_width, data = record()
data = pack('<' + ('h'*len(data)), *data)
wf = wave.open(path, 'wb')
wf.setnchannels(1)
wf.setsampwidth(sample_width)
wf.setframerate(RATE)
wf.writeframes(data)
wf.close()
if __name__ == '__main__':
print("please speak a word into the microphone")
record_to_file('demo.wav')
print("done - result written to demo.wav")
I believe the WAVE module does not support recording, just processing existing files. You might want to look at PyAudio for actually recording.
WAV is about the world's simplest file format. In paInt16 you just get a signed integer representing a level, and closer to 0 is quieter. I can't remember if WAV files are high byte first or low byte, but something like this ought to work (sorry, I'm not really a python programmer:
from array import array
# you'll probably want to experiment on threshold
# depends how noisy the signal
threshold = 10
max_value = 0
as_ints = array('h', data)
max_value = max(as_ints)
if max_value > threshold:
# not silence
PyAudio code for recording kept for reference:
import pyaudio
import sys
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=chunk)
print "* recording"
for i in range(0, 44100 / chunk * RECORD_SECONDS):
data = stream.read(chunk)
# check for silence here by comparing the level with 0 (or some threshold) for
# the contents of data.
# then write data or not to a file
print "* done"
stream.stop_stream()
stream.close()
p.terminate()
Thanks to cryo for improved version that I based my tested code below:
#Instead of adding silence at start and end of recording (values=0) I add the original audio . This makes audio sound more natural as volume is >0. See trim()
#I also fixed issue with the previous code - accumulated silence counter needs to be cleared once recording is resumed.
from array import array
from struct import pack
from sys import byteorder
import copy
import pyaudio
import wave
THRESHOLD = 500 # audio levels not normalised.
CHUNK_SIZE = 1024
SILENT_CHUNKS = 3 * 44100 / 1024 # about 3sec
FORMAT = pyaudio.paInt16
FRAME_MAX_VALUE = 2 ** 15 - 1
NORMALIZE_MINUS_ONE_dB = 10 ** (-1.0 / 20)
RATE = 44100
CHANNELS = 1
TRIM_APPEND = RATE / 4
def is_silent(data_chunk):
"""Returns 'True' if below the 'silent' threshold"""
return max(data_chunk) < THRESHOLD
def normalize(data_all):
"""Amplify the volume out to max -1dB"""
# MAXIMUM = 16384
normalize_factor = (float(NORMALIZE_MINUS_ONE_dB * FRAME_MAX_VALUE)
/ max(abs(i) for i in data_all))
r = array('h')
for i in data_all:
r.append(int(i * normalize_factor))
return r
def trim(data_all):
_from = 0
_to = len(data_all) - 1
for i, b in enumerate(data_all):
if abs(b) > THRESHOLD:
_from = max(0, i - TRIM_APPEND)
break
for i, b in enumerate(reversed(data_all)):
if abs(b) > THRESHOLD:
_to = min(len(data_all) - 1, len(data_all) - 1 - i + TRIM_APPEND)
break
return copy.deepcopy(data_all[_from:(_to + 1)])
def record():
"""Record a word or words from the microphone and
return the data as an array of signed shorts."""
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, output=True, frames_per_buffer=CHUNK_SIZE)
silent_chunks = 0
audio_started = False
data_all = array('h')
while True:
# little endian, signed short
data_chunk = array('h', stream.read(CHUNK_SIZE))
if byteorder == 'big':
data_chunk.byteswap()
data_all.extend(data_chunk)
silent = is_silent(data_chunk)
if audio_started:
if silent:
silent_chunks += 1
if silent_chunks > SILENT_CHUNKS:
break
else:
silent_chunks = 0
elif not silent:
audio_started = True
sample_width = p.get_sample_size(FORMAT)
stream.stop_stream()
stream.close()
p.terminate()
data_all = trim(data_all) # we trim before normalize as threshhold applies to un-normalized wave (as well as is_silent() function)
data_all = normalize(data_all)
return sample_width, data_all
def record_to_file(path):
"Records from the microphone and outputs the resulting data to 'path'"
sample_width, data = record()
data = pack('<' + ('h' * len(data)), *data)
wave_file = wave.open(path, 'wb')
wave_file.setnchannels(CHANNELS)
wave_file.setsampwidth(sample_width)
wave_file.setframerate(RATE)
wave_file.writeframes(data)
wave_file.close()
if __name__ == '__main__':
print("Wait in silence to begin recording; wait in silence to terminate")
record_to_file('demo.wav')
print("done - result written to demo.wav")
import pyaudio
import wave
from array import array
FORMAT=pyaudio.paInt16
CHANNELS=2
RATE=44100
CHUNK=1024
RECORD_SECONDS=15
FILE_NAME="RECORDING.wav"
audio=pyaudio.PyAudio() #instantiate the pyaudio
#recording prerequisites
stream=audio.open(format=FORMAT,channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
#starting recording
frames=[]
for i in range(0,int(RATE/CHUNK*RECORD_SECONDS)):
data=stream.read(CHUNK)
data_chunk=array('h',data)
vol=max(data_chunk)
if(vol>=500):
print("something said")
frames.append(data)
else:
print("nothing")
print("\n")
#end of recording
stream.stop_stream()
stream.close()
audio.terminate()
#writing to file
wavfile=wave.open(FILE_NAME,'wb')
wavfile.setnchannels(CHANNELS)
wavfile.setsampwidth(audio.get_sample_size(FORMAT))
wavfile.setframerate(RATE)
wavfile.writeframes(b''.join(frames))#append frames recorded to file
wavfile.close()
I think this will help.It is a simple script which will check if there is a silence or not.If silence is detected it will not record otherwise it will record.
The pyaudio website has many examples that are pretty short and clear:
http://people.csail.mit.edu/hubert/pyaudio/
Update 14th of December 2019 - Main example from the above linked website from 2017:
"""PyAudio Example: Play a WAVE file."""
import pyaudio
import wave
import sys
CHUNK = 1024
if len(sys.argv) < 2:
print("Plays a wave file.\n\nUsage: %s filename.wav" % sys.argv[0])
sys.exit(-1)
wf = wave.open(sys.argv[1], 'rb')
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
data = wf.readframes(CHUNK)
while data != '':
stream.write(data)
data = wf.readframes(CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
You might want to look at csounds, also. It has several API's, including Python. It might be able to interact with an A-D interface and gather sound samples.
Related
I'm trying to use PyAudio to record. I'm on a 2021 MacBook Pro (Apple Silicon) with MacOS Monterey 12.2.1, Python 3.9.10. However, when I try to run the following code (taken from this discussion: https://stackoverflow.com/a/6743593/18138581) I get the error below (Please note that the error stems from the "record" function). I have made sure that Terminal has access to the microphone and I tried installing portaudio with homebrew as some have suggested. However, none of it has worked. Does anyone have any clue how I could solve this? Thanks in advance.
from sys import byteorder
from array import array
from struct import pack
import pyaudio
import wave
THRESHOLD = 500
CHUNK_SIZE = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
def is_silent(snd_data):
"Returns 'True' if below the 'silent' threshold"
return max(snd_data) < THRESHOLD
def normalize(snd_data):
"Average the volume out"
MAXIMUM = 16384
times = float(MAXIMUM)/max(abs(i) for i in snd_data)
r = array('h')
for i in snd_data:
r.append(int(i*times))
return r
def trim(snd_data):
"Trim the blank spots at the start and end"
def _trim(snd_data):
snd_started = False
r = array('h')
for i in snd_data:
if not snd_started and abs(i)>THRESHOLD:
snd_started = True
r.append(i)
elif snd_started:
r.append(i)
return r
# Trim to the left
snd_data = _trim(snd_data)
# Trim to the right
snd_data.reverse()
snd_data = _trim(snd_data)
snd_data.reverse()
return snd_data
def add_silence(snd_data, seconds):
"Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
silence = [0] * int(seconds * RATE)
r = array('h', silence)
r.extend(snd_data)
r.extend(silence)
return r
def record():
"""
Record a word or words from the microphone and
return the data as an array of signed shorts.
Normalizes the audio, trims silence from the
start and end, and pads with 0.5 seconds of
blank sound to make sure VLC et al can play
it without getting chopped off.
"""
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT, channels=1, rate=RATE,
input=True, output=True,
frames_per_buffer=CHUNK_SIZE)
num_silent = 0
snd_started = False
r = array('h')
while 1:
# little endian, signed short
snd_data = array('h', stream.read(CHUNK_SIZE))
if byteorder == 'big':
snd_data.byteswap()
r.extend(snd_data)
silent = is_silent(snd_data)
if silent and snd_started:
num_silent += 1
elif not silent and not snd_started:
snd_started = True
if snd_started and num_silent > 30:
break
sample_width = p.get_sample_size(FORMAT)
stream.stop_stream()
stream.close()
p.terminate()
r = normalize(r)
r = trim(r)
r = add_silence(r, 0.5)
return sample_width, r
def record_to_file(path):
"Records from the microphone and outputs the resulting data to 'path'"
sample_width, data = record()
data = pack('<' + ('h'*len(data)), *data)
wf = wave.open(path, 'wb')
wf.setnchannels(1)
wf.setsampwidth(sample_width)
wf.setframerate(RATE)
wf.writeframes(data)
wf.close()
if __name__ == '__main__':
print("please speak a word into the microphone")
record_to_file('demo.wav')
print("done - result written to demo.wav")
Output:
please speak a word into the microphone
||PaMacCore (AUHAL)|| AUHAL component not found.
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
Input In [3], in <cell line: 118>()
118 if __name__ == '__main__':
119 print("please speak a word into the microphone")
--> 120 record_to_file('demo.wav')
121 print("done - result written to demo.wav")
Input In [3], in record_to_file(path)
106 def record_to_file(path):
107 "Records from the microphone and outputs the resulting data to 'path'"
--> 108 sample_width, data = record()
109 data = pack('<' + ('h'*len(data)), *data)
111 wf = wave.open(path, 'wb')
Input In [3], in record()
60 """
61 Record a word or words from the microphone and
62 return the data as an array of signed shorts.
(...)
67 it without getting chopped off.
68 """
69 p = pyaudio.PyAudio()
---> 70 stream = p.open(format=FORMAT, channels=1, rate=RATE,
71 input=True, output=True,
72 frames_per_buffer=CHUNK_SIZE)
74 num_silent = 0
75 snd_started = False
File ~/miniforge3/envs/tfenv/lib/python3.9/site-packages/pyaudio.py:750, in PyAudio.open(self, *args, **kwargs)
742 def open(self, *args, **kwargs):
743 """
744 Open a new stream. See constructor for
745 :py:func:`Stream.__init__` for parameter details.
746
747 :returns: A new :py:class:`Stream`
748 """
--> 750 stream = Stream(self, *args, **kwargs)
751 self._streams.add(stream)
752 return stream
File ~/miniforge3/envs/tfenv/lib/python3.9/site-packages/pyaudio.py:441, in Stream.__init__(self, PA_manager, rate, channels, format, input, output, input_device_index, output_device_index, frames_per_buffer, start, input_host_api_specific_stream_info, output_host_api_specific_stream_info, stream_callback)
438 arguments['stream_callback'] = stream_callback
440 # calling pa.open returns a stream object
--> 441 self._stream = pa.open(**arguments)
443 self._input_latency = self._stream.inputLatency
444 self._output_latency = self._stream.outputLatency
OSError: [Errno -9999] Unanticipated host error
I was wondering how to go about performing noise cancelation on two incoming audio streams of data that I put into two NumPy arrays. One of them contains noise from a USB mic and the other one is from two external microphones which I concatenated which captures the user's voice as well as noise. I have searched the web and found modules such as noisereduce and Padasip. But the problem is I'm working on a Raspberry Pi 4 and it doesn't support noisereduce and I'm having trouble getting Padasip to work. I'm really new to signal processing, so I was wondering which algorithm would be best? And how to implement it in my current situation? If you need more details I will gladly provide them.
Here is the code I'm currently using to capture the audio from the two mics which I concatenate and the noise from the usb mic. As well as my current noise cancelation function I'm trying to implement...
def capture(self, q, index):
print("Capturing audio...")
frames = []
p = pyaudio.PyAudio()
stream = p.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
input_device_index=index,
frames_per_buffer=self.chunk)
for i in range(0, int(self.rate / self.chunk * self.record_seconds)):
data = stream.read(self.chunk)
data_int = list(struct.unpack(str(2 * self.chunk) + 'B', data))
frames.extend(data_int)
print("Done capturing audio!!!")
stream.stop_stream()
stream.close()
p.terminate()
q.put(frames)
def record(self):
print("Recording!!!")
self.rled.on()
q = Queue()
processes = []
nd = []
m1d = []
m2d = []
for i in range(2, 5):
p = Process(target=self.capture, args=(q, i,))
processes.append(p)
p.start()
count = 1
for p in processes:
count += 1
ret = q.get()
if count == 2: nd.extend(ret)
elif count == 3: m1d.extend(ret)
else: m2d.extend(ret)
for p in processes:
p.join()
noise_data = np.array(nd, dtype='b')[::2] + 128
bsmic_data = np.array(m1d, dtype='b')[::2] + 128
jmic_data = np.array(m2d, dtype='b')[::2] + 128
mic_data = np.add(bsmic_data, jmic_data)
self.rled.off()
self.cancel(noise_data, mic_data)
def cancel(self, noise, mic):
#eliminates as much noise as possible from original stream
print("Peforming noise cancelation...")
I've been trying to record audio using pyaudio untill silence is met in the input stream .but segmentation fault happens while running it .i don't think anything is wrong with pyaudio/portaudio installed in my raspberry pi because pyaudio works when i tried to run examples in pyaudio docs it works without any issue .i tried to debug it with pdb and
gdb these are the results :
Recording: Setting up
Thread 1 "python" received signal SIGSEGV, Segmentation fault.
0x7652a298 in ?? ()
from /usr/lib/python2.7/dist-packages/_portaudio.arm-linux- gnueabihf.so
(gdb) backtrace
#0 0x7652a298 in ?? ()
from /usr/lib/python2.7/dist-packages/_portaudio.arm-linux- gnueabihf.so
#1 0x764f47b0 in Pa_GetDeviceInfo ()
from /usr/lib/arm-linux-gnueabihf/libportaudio.so.2
#2 0x7effe2c4 in ?? ()
Backtrace stopped: previous frame identical to this frame (corrupt stack?)
(gdb)
pyaudio callback function
def _callback(self, in_data, frame_count, time_info, status): # pylint: disable=unused-argument
debug = logging.getLogger('alexapi').getEffectiveLevel() == logging.DEBUG
if not in_data:
self._queue.put(False)
return None, pyaudio.paAbort
do_VAD = True
if self._callback_data['force_record'] and not self._callback_data['force_record'][1]:
do_VAD = False
# do not count first 10 frames when doing VAD
if do_VAD and (self._callback_data['frames'] < self._callback_data['throwaway_frames']):
self._callback_data['frames'] += 1
# now do VAD
elif (self._callback_data['force_record'] and self._callback_data['force_record'][0]()) \
or (do_VAD and (self._callback_data['thresholdSilenceMet'] is False)
and ((time.time() - self._callback_data['start']) < self.MAX_RECORDING_LENGTH)):
if do_VAD:
if int(len(in_data) / 2) == self.VAD_PERIOD:
isSpeech = self._vad.is_speech(in_data, self.VAD_SAMPLERATE)
if not isSpeech:
self._callback_data['silenceRun'] += 1
else:
self._callback_data['silenceRun'] = 0
self._callback_data['numSilenceRuns'] += 1
# only count silence runs after the first one
# (allow user to speak for total of max recording length if they haven't said anything yet)
if (self._callback_data['numSilenceRuns'] != 0) \
and ((self._callback_data['silenceRun'] * self.VAD_FRAME_MS) > self.VAD_SILENCE_TIMEOUT):
self._callback_data['thresholdSilenceMet'] = True
else:
self._queue.put(False)
return None, pyaudio.paComplete
self._queue.put(in_data)
if debug:
self._callback_data['audio'] += in_data
return None, pyaudio.paContinue
pyaudio
def _callback(self, in_data, frame_count, time_info, status): # pylint: disable=unused-argument
debug = logging.getLogger('alexapi').getEffectiveLevel() == logging.DEBUG
if not in_data:
self._queue.put(False)
return None, pyaudio.paAbort
do_VAD = True
if self._callback_data['force_record'] and not self._callback_data['force_record'][1]:
do_VAD = False
# do not count first 10 frames when doing VAD
if do_VAD and (self._callback_data['frames'] < self._callback_data['throwaway_frames']):
self._callback_data['frames'] += 1
# now do VAD
elif (self._callback_data['force_record'] and self._callback_data['force_record'][0]()) \
or (do_VAD and (self._callback_data['thresholdSilenceMet'] is False)
and ((time.time() - self._callback_data['start']) < self.MAX_RECORDING_LENGTH)):
if do_VAD:
if int(len(in_data) / 2) == self.VAD_PERIOD:
isSpeech = self._vad.is_speech(in_data, self.VAD_SAMPLERATE)
if not isSpeech:
self._callback_data['silenceRun'] += 1
else:
self._callback_data['silenceRun'] = 0
self._callback_data['numSilenceRuns'] += 1
# only count silence runs after the first one
# (allow user to speak for total of max recording length if they haven't said anything yet)
if (self._callback_data['numSilenceRuns'] != 0) \
and ((self._callback_data['silenceRun'] * self.VAD_FRAME_MS) > self.VAD_SILENCE_TIMEOUT):
self._callback_data['thresholdSilenceMet'] = True
else:
self._queue.put(False)
return None, pyaudio.paComplete
self._queue.put(in_data)
if debug:
self._callback_data['audio'] += in_data
return None, pyaudio.paContinue
These are actually adaptation of the code that i found somewhere on the internet.i double checked my device index and sample rate there is nothing wrong with them
can someone help me sort it out ?
complete code is here
pdb result
> /usr/lib/python2.7/dist-packages/pyaudio.py(438)__init__()
-> arguments['stream_callback'] = stream_callback
(Pdb) step
> /usr/lib/python2.7/dist-packages/pyaudio.py(441)__init__()
-> self._stream = pa.open(**arguments)
(Pdb) step
Segmentation fault
root#raspberrypi:/home/pi/Desktop# python -m pdb rp3test.py
Idk may it's just a bug in pyaudio and everylibs that uses pyaudio such as python sounddevice . cause i tried it with sounddevice library . Finally made it work with this code
def silence_listener(throwaway_frames,filename = "recording.wav"):
# Reenable reading microphone raw data
inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, alsa_card)
inp.setchannels(1)
inp.setrate(VAD_SAMPLERATE)
inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)
inp.setperiodsize(VAD_PERIOD)
audio = ""
# Buffer as long as we haven't heard enough silence or the total size is within max size
thresholdSilenceMet = False
frames = 0
numSilenceRuns = 0
silenceRun = 0
start = time.time()
# do not count first 10 frames when doing VAD
while (frames < throwaway_frames): # VAD_THROWAWAY_FRAMES):
l, data = inp.read()
frames = frames + 1
if l:
audio += data
isSpeech = vad.is_speech(data, VAD_SAMPLERATE)
# now do VAD
while (thresholdSilenceMet == False) and ((time.time() - start) < MAX_RECORDING_LENGTH):
l, data = inp.read()
if l:
audio += data
if (l == VAD_PERIOD):
isSpeech = vad.is_speech(data, VAD_SAMPLERATE)
if (isSpeech == False):
silenceRun = silenceRun + 1
#print "0"
else:
silenceRun = 0
numSilenceRuns = numSilenceRuns + 1
#print "1"
# only count silence runs after the first one
# (allow user to speak for total of max recording length if they haven't said anything yet)
if (numSilenceRuns != 0) and ((silenceRun * VAD_FRAME_MS) > VAD_SILENCE_TIMEOUT):
thresholdSilenceMet = True
if debug: print ("End recording")
rf = open(filename, 'w')
rf.write(audio)
rf.close()
inp.close()
return
I'm trying to record audio and simultaneously print the amplitude of the recorded signal. So I'm saving all datas in stream.read. But when I try to print them, I have a string of bytes and no integers. I would like to know how to convert these signs in order to get amplitude.
This is my code :
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data) # 2 bytes(16 bits) per channel
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
for data in frames:
print(data)
And this is what I obtain :
����# ����
!$
�� ���� ��������������������������
������ �� ��
��
�� ������ ����������������������������
��
����
������������������������������������������������������������������
���������
���������������
% �� ��(��)����,����.����%����#��
�� �� �����������������������
You can certainly inspire yourself by the following code :
#!/usr/bin/python
# open a microphone in pyAudio and listen for taps
import pyaudio
import struct
import math
INITIAL_TAP_THRESHOLD = 0.010
FORMAT = pyaudio.paInt16
SHORT_NORMALIZE = (1.0/32768.0)
CHANNELS = 2
RATE = 44100
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)
# if we get this many noisy blocks in a row, increase the threshold
OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME
# if we get this many quiet blocks in a row, decrease the threshold
UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME
# if the noise was longer than this many blocks, it's not a 'tap'
MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME
def get_rms( block ):
# RMS amplitude is defined as the square root of the
# mean over time of the square of the amplitude.
# so we need to convert this string of bytes into
# a string of 16-bit samples...
# we will get one short out for each
# two chars in the string.
count = len(block)/2
format = "%dh"%(count)
shorts = struct.unpack( format, block )
# iterate over the block.
sum_squares = 0.0
for sample in shorts:
# sample is a signed short in +/- 32768.
# normalize it to 1.0
n = sample * SHORT_NORMALIZE
sum_squares += n*n
return math.sqrt( sum_squares / count )
class TapTester(object):
def __init__(self):
self.pa = pyaudio.PyAudio()
self.stream = self.open_mic_stream()
self.tap_threshold = INITIAL_TAP_THRESHOLD
self.noisycount = MAX_TAP_BLOCKS+1
self.quietcount = 0
self.errorcount = 0
def stop(self):
self.stream.close()
def find_input_device(self):
device_index = None
for i in range( self.pa.get_device_count() ):
devinfo = self.pa.get_device_info_by_index(i)
print( "Device %d: %s"%(i,devinfo["name"]) )
for keyword in ["mic","input"]:
if keyword in devinfo["name"].lower():
print( "Found an input: device %d - %s"% (i,devinfo["name"]) )
device_index = i
return device_index
if device_index == None:
print( "No preferred input found; using default input device." )
return device_index
def open_mic_stream( self ):
device_index = self.find_input_device()
stream = self.pa.open( format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
input_device_index = device_index,
frames_per_buffer = INPUT_FRAMES_PER_BLOCK)
return stream
def tapDetected(self):
print "Tap!"
def listen(self):
try:
block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
except IOError, e:
# dammit.
self.errorcount += 1
print( "(%d) Error recording: %s"%(self.errorcount,e) )
self.noisycount = 1
return
amplitude = get_rms( block )
if amplitude > self.tap_threshold:
# noisy block
self.quietcount = 0
self.noisycount += 1
if self.noisycount > OVERSENSITIVE:
# turn down the sensitivity
self.tap_threshold *= 1.1
else:
# quiet block.
if 1 <= self.noisycount <= MAX_TAP_BLOCKS:
self.tapDetected()
self.noisycount = 0
self.quietcount += 1
if self.quietcount > UNDERSENSITIVE:
# turn up the sensitivity
self.tap_threshold *= 0.9
if __name__ == "__main__":
tt = TapTester()
for i in range(1000):
tt.listen()
It come from this post: [Detect tap with pyaudio from live mic
You can easyly adapt it to put the RMS in a table and plot the table.
PyAudio is giving you binary-encoded audio frames as bytes in a string. See the answer to this question for how to print a human-readable representation of your frames:
Get an audio sample as float number from pyaudio-stream
I guess the question is old and I stumpled over it looking for other answers, but in my project I use something like this.
#Lets assume the constants are defined somewhere
import struct
import pyaudio
import numpy as np
self.input = pyaudio.PyAudio().open(
format=pyaudio.paInt16,
channels=1,
rate=44100,
input=True,
output=False,
frames_per_buffer=1024,
)
wf_data = self.input.read(self.CHUNK)
wf_data = struct.unpack(str(self.CHUNK) + 'h', wf_data)
wf_data = np.array(wf_data)
the paInt16 and the 'h' correspond. You can figure out what letter matches your pyaudio format here.
https://docs.python.org/3/library/struct.html
Credit goes to:
https://www.youtube.com/channel/UC2W0aQEPNpU6XrkFCYifRFQ
I think you could do this
data = stream.read(CHUNK)
for each in data:
print(each)
When dealing with audio you probably want the RMS (root mean squared) value of the signals buffer. I believe it offers a better 'view' of the overall power in an audio signal.
The python standard library as a module called audioop the module has a function called rms.
import pyaudio
import time
import audioop
def get_rms():
# Creates a generator that can iterate rms values
CHUNK = 8
WIDTH = 2
CHANNELS = 1
RATE = 44100
p = pyaudio.PyAudio()
try:
stream = p.open(format=p.get_format_from_width(WIDTH),
channels=CHANNELS,
rate=RATE,
input=True,
output=False,
frames_per_buffer=CHUNK)
# wait a second to allow the stream to be setup
time.sleep(1)
while True:
# read the data
data = stream.read(CHUNK, exception_on_overflow = False)
rms = audioop.rms(data, 1)
yield rms_scaled
finally:
p.terminate()
stream.stop_stream()
stream.close()
You can use the function like this
rms_values = get_rms()
for rms in rms_values:
print(rms)
I'm trying to process an audio signal and divide an audio signal into N discrete samples and then I want to play that samples independently.
How can I do this using python?
import wave
import pygame
import time
def slice(infile, outfilename, start_ms, end_ms):
width = infile.getsampwidth() #Returns sample width in bytes
rate = infile.getframerate() #Returns sampling frequency
fpms = rate / 1000 # frames per ms
length = (end_ms - start_ms) * fpms
start_index = start_ms * fpms
out = wave.open(outfilename, "w")
out.setparams((infile.getnchannels(), width, rate, length, infile.getcomptype(), infile.getcompname()))
infile.rewind() #Rewind the file pointer to the beginning of the audio stream
anchor = infile.tell() #Return current file pointer position
infile.setpos(anchor + start_index) #Set the file pointer to the specified position
out.writeframes(infile.readframes(length)) #Write audio frames and make sure nframes is correct
if __name__ == "__main__":
slice(wave.open("song1.wav", "r"), "out.wav", 500, 5000)
pygame.mixer.init()
pygame.mixer.music.load("out.wav")
pygame.mixer.music.play()
while pygame.mixer.music.get_busy() == True:
continue