PyAudio - Convert stream.read into int to get amplitude - python

I'm trying to record audio and simultaneously print the amplitude of the recorded signal. So I'm saving all datas in stream.read. But when I try to print them, I have a string of bytes and no integers. I would like to know how to convert these signs in order to get amplitude.
This is my code :
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data) # 2 bytes(16 bits) per channel
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
for data in frames:
print(data)
And this is what I obtain :
 ����# ����

!$

�� ���� ��������������������������
������ �� ��
��
�� ������ ����������������������������
��
����
������������������������������������������������������������������
���������
���������������
% �� ��(��)����,����.����%����#��
�� �� �����������������������

You can certainly inspire yourself by the following code : 
#!/usr/bin/python
# open a microphone in pyAudio and listen for taps
import pyaudio
import struct
import math
INITIAL_TAP_THRESHOLD = 0.010
FORMAT = pyaudio.paInt16
SHORT_NORMALIZE = (1.0/32768.0)
CHANNELS = 2
RATE = 44100
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)
# if we get this many noisy blocks in a row, increase the threshold
OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME
# if we get this many quiet blocks in a row, decrease the threshold
UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME
# if the noise was longer than this many blocks, it's not a 'tap'
MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME
def get_rms( block ):
# RMS amplitude is defined as the square root of the
# mean over time of the square of the amplitude.
# so we need to convert this string of bytes into
# a string of 16-bit samples...
# we will get one short out for each
# two chars in the string.
count = len(block)/2
format = "%dh"%(count)
shorts = struct.unpack( format, block )
# iterate over the block.
sum_squares = 0.0
for sample in shorts:
# sample is a signed short in +/- 32768.
# normalize it to 1.0
n = sample * SHORT_NORMALIZE
sum_squares += n*n
return math.sqrt( sum_squares / count )
class TapTester(object):
def __init__(self):
self.pa = pyaudio.PyAudio()
self.stream = self.open_mic_stream()
self.tap_threshold = INITIAL_TAP_THRESHOLD
self.noisycount = MAX_TAP_BLOCKS+1
self.quietcount = 0
self.errorcount = 0
def stop(self):
self.stream.close()
def find_input_device(self):
device_index = None
for i in range( self.pa.get_device_count() ):
devinfo = self.pa.get_device_info_by_index(i)
print( "Device %d: %s"%(i,devinfo["name"]) )
for keyword in ["mic","input"]:
if keyword in devinfo["name"].lower():
print( "Found an input: device %d - %s"% (i,devinfo["name"]) )
device_index = i
return device_index
if device_index == None:
print( "No preferred input found; using default input device." )
return device_index
def open_mic_stream( self ):
device_index = self.find_input_device()
stream = self.pa.open( format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
input_device_index = device_index,
frames_per_buffer = INPUT_FRAMES_PER_BLOCK)
return stream
def tapDetected(self):
print "Tap!"
def listen(self):
try:
block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
except IOError, e:
# dammit.
self.errorcount += 1
print( "(%d) Error recording: %s"%(self.errorcount,e) )
self.noisycount = 1
return
amplitude = get_rms( block )
if amplitude > self.tap_threshold:
# noisy block
self.quietcount = 0
self.noisycount += 1
if self.noisycount > OVERSENSITIVE:
# turn down the sensitivity
self.tap_threshold *= 1.1
else:
# quiet block.
if 1 <= self.noisycount <= MAX_TAP_BLOCKS:
self.tapDetected()
self.noisycount = 0
self.quietcount += 1
if self.quietcount > UNDERSENSITIVE:
# turn up the sensitivity
self.tap_threshold *= 0.9
if __name__ == "__main__":
tt = TapTester()
for i in range(1000):
tt.listen()
It come from this post: [Detect tap with pyaudio from live mic
You can easyly adapt it to put the RMS in a table and plot the table.

PyAudio is giving you binary-encoded audio frames as bytes in a string. See the answer to this question for how to print a human-readable representation of your frames:
Get an audio sample as float number from pyaudio-stream

I guess the question is old and I stumpled over it looking for other answers, but in my project I use something like this.
#Lets assume the constants are defined somewhere
import struct
import pyaudio
import numpy as np
self.input = pyaudio.PyAudio().open(
format=pyaudio.paInt16,
channels=1,
rate=44100,
input=True,
output=False,
frames_per_buffer=1024,
)
wf_data = self.input.read(self.CHUNK)
wf_data = struct.unpack(str(self.CHUNK) + 'h', wf_data)
wf_data = np.array(wf_data)
the paInt16 and the 'h' correspond. You can figure out what letter matches your pyaudio format here.
https://docs.python.org/3/library/struct.html
Credit goes to:
https://www.youtube.com/channel/UC2W0aQEPNpU6XrkFCYifRFQ

I think you could do this
data = stream.read(CHUNK)
for each in data:
print(each)

When dealing with audio you probably want the RMS (root mean squared) value of the signals buffer. I believe it offers a better 'view' of the overall power in an audio signal.
The python standard library as a module called audioop the module has a function called rms.
import pyaudio
import time
import audioop
def get_rms():
# Creates a generator that can iterate rms values
CHUNK = 8
WIDTH = 2
CHANNELS = 1
RATE = 44100
p = pyaudio.PyAudio()
try:
stream = p.open(format=p.get_format_from_width(WIDTH),
channels=CHANNELS,
rate=RATE,
input=True,
output=False,
frames_per_buffer=CHUNK)
# wait a second to allow the stream to be setup
time.sleep(1)
while True:
# read the data
data = stream.read(CHUNK, exception_on_overflow = False)
rms = audioop.rms(data, 1)
yield rms_scaled
finally:
p.terminate()
stream.stop_stream()
stream.close()
You can use the function like this
rms_values = get_rms()
for rms in rms_values:
print(rms)

Related

OSError: [Errno -9999] Unanticipated host error --- PaMacCore (AUHAL)|| AUHAL component not found

I'm trying to use PyAudio to record. I'm on a 2021 MacBook Pro (Apple Silicon) with MacOS Monterey 12.2.1, Python 3.9.10. However, when I try to run the following code (taken from this discussion: https://stackoverflow.com/a/6743593/18138581) I get the error below (Please note that the error stems from the "record" function). I have made sure that Terminal has access to the microphone and I tried installing portaudio with homebrew as some have suggested. However, none of it has worked. Does anyone have any clue how I could solve this? Thanks in advance.
from sys import byteorder
from array import array
from struct import pack
import pyaudio
import wave
THRESHOLD = 500
CHUNK_SIZE = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
def is_silent(snd_data):
"Returns 'True' if below the 'silent' threshold"
return max(snd_data) < THRESHOLD
def normalize(snd_data):
"Average the volume out"
MAXIMUM = 16384
times = float(MAXIMUM)/max(abs(i) for i in snd_data)
r = array('h')
for i in snd_data:
r.append(int(i*times))
return r
def trim(snd_data):
"Trim the blank spots at the start and end"
def _trim(snd_data):
snd_started = False
r = array('h')
for i in snd_data:
if not snd_started and abs(i)>THRESHOLD:
snd_started = True
r.append(i)
elif snd_started:
r.append(i)
return r
# Trim to the left
snd_data = _trim(snd_data)
# Trim to the right
snd_data.reverse()
snd_data = _trim(snd_data)
snd_data.reverse()
return snd_data
def add_silence(snd_data, seconds):
"Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
silence = [0] * int(seconds * RATE)
r = array('h', silence)
r.extend(snd_data)
r.extend(silence)
return r
def record():
"""
Record a word or words from the microphone and
return the data as an array of signed shorts.
Normalizes the audio, trims silence from the
start and end, and pads with 0.5 seconds of
blank sound to make sure VLC et al can play
it without getting chopped off.
"""
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT, channels=1, rate=RATE,
input=True, output=True,
frames_per_buffer=CHUNK_SIZE)
num_silent = 0
snd_started = False
r = array('h')
while 1:
# little endian, signed short
snd_data = array('h', stream.read(CHUNK_SIZE))
if byteorder == 'big':
snd_data.byteswap()
r.extend(snd_data)
silent = is_silent(snd_data)
if silent and snd_started:
num_silent += 1
elif not silent and not snd_started:
snd_started = True
if snd_started and num_silent > 30:
break
sample_width = p.get_sample_size(FORMAT)
stream.stop_stream()
stream.close()
p.terminate()
r = normalize(r)
r = trim(r)
r = add_silence(r, 0.5)
return sample_width, r
def record_to_file(path):
"Records from the microphone and outputs the resulting data to 'path'"
sample_width, data = record()
data = pack('<' + ('h'*len(data)), *data)
wf = wave.open(path, 'wb')
wf.setnchannels(1)
wf.setsampwidth(sample_width)
wf.setframerate(RATE)
wf.writeframes(data)
wf.close()
if __name__ == '__main__':
print("please speak a word into the microphone")
record_to_file('demo.wav')
print("done - result written to demo.wav")
Output:
please speak a word into the microphone
||PaMacCore (AUHAL)|| AUHAL component not found.
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
Input In [3], in <cell line: 118>()
118 if __name__ == '__main__':
119 print("please speak a word into the microphone")
--> 120 record_to_file('demo.wav')
121 print("done - result written to demo.wav")
Input In [3], in record_to_file(path)
106 def record_to_file(path):
107 "Records from the microphone and outputs the resulting data to 'path'"
--> 108 sample_width, data = record()
109 data = pack('<' + ('h'*len(data)), *data)
111 wf = wave.open(path, 'wb')
Input In [3], in record()
60 """
61 Record a word or words from the microphone and
62 return the data as an array of signed shorts.
(...)
67 it without getting chopped off.
68 """
69 p = pyaudio.PyAudio()
---> 70 stream = p.open(format=FORMAT, channels=1, rate=RATE,
71 input=True, output=True,
72 frames_per_buffer=CHUNK_SIZE)
74 num_silent = 0
75 snd_started = False
File ~/miniforge3/envs/tfenv/lib/python3.9/site-packages/pyaudio.py:750, in PyAudio.open(self, *args, **kwargs)
742 def open(self, *args, **kwargs):
743 """
744 Open a new stream. See constructor for
745 :py:func:`Stream.__init__` for parameter details.
746
747 :returns: A new :py:class:`Stream`
748 """
--> 750 stream = Stream(self, *args, **kwargs)
751 self._streams.add(stream)
752 return stream
File ~/miniforge3/envs/tfenv/lib/python3.9/site-packages/pyaudio.py:441, in Stream.__init__(self, PA_manager, rate, channels, format, input, output, input_device_index, output_device_index, frames_per_buffer, start, input_host_api_specific_stream_info, output_host_api_specific_stream_info, stream_callback)
438 arguments['stream_callback'] = stream_callback
440 # calling pa.open returns a stream object
--> 441 self._stream = pa.open(**arguments)
443 self._input_latency = self._stream.inputLatency
444 self._output_latency = self._stream.outputLatency
OSError: [Errno -9999] Unanticipated host error

How to perform noise cancelation in Python using two numpy arrays which are not predefined?

I was wondering how to go about performing noise cancelation on two incoming audio streams of data that I put into two NumPy arrays. One of them contains noise from a USB mic and the other one is from two external microphones which I concatenated which captures the user's voice as well as noise. I have searched the web and found modules such as noisereduce and Padasip. But the problem is I'm working on a Raspberry Pi 4 and it doesn't support noisereduce and I'm having trouble getting Padasip to work. I'm really new to signal processing, so I was wondering which algorithm would be best? And how to implement it in my current situation? If you need more details I will gladly provide them.
Here is the code I'm currently using to capture the audio from the two mics which I concatenate and the noise from the usb mic. As well as my current noise cancelation function I'm trying to implement...
def capture(self, q, index):
print("Capturing audio...")
frames = []
p = pyaudio.PyAudio()
stream = p.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
input_device_index=index,
frames_per_buffer=self.chunk)
for i in range(0, int(self.rate / self.chunk * self.record_seconds)):
data = stream.read(self.chunk)
data_int = list(struct.unpack(str(2 * self.chunk) + 'B', data))
frames.extend(data_int)
print("Done capturing audio!!!")
stream.stop_stream()
stream.close()
p.terminate()
q.put(frames)
def record(self):
print("Recording!!!")
self.rled.on()
q = Queue()
processes = []
nd = []
m1d = []
m2d = []
for i in range(2, 5):
p = Process(target=self.capture, args=(q, i,))
processes.append(p)
p.start()
count = 1
for p in processes:
count += 1
ret = q.get()
if count == 2: nd.extend(ret)
elif count == 3: m1d.extend(ret)
else: m2d.extend(ret)
for p in processes:
p.join()
noise_data = np.array(nd, dtype='b')[::2] + 128
bsmic_data = np.array(m1d, dtype='b')[::2] + 128
jmic_data = np.array(m2d, dtype='b')[::2] + 128
mic_data = np.add(bsmic_data, jmic_data)
self.rled.off()
self.cancel(noise_data, mic_data)
def cancel(self, noise, mic):
#eliminates as much noise as possible from original stream
print("Peforming noise cancelation...")

Sounddevice ValueError: could not broadcast input array from shape (2048) into shape (2048,1)

I know it may seems to be another one of those ValueError post but please hear me out and understand that I have tried Googling and browsing the StackOverflow community for a workable solution for my problem.
I am currently trying to pass my demodulated samples to the sounddevice module and playback it in realtime using its callback function.
Error:
File "rtl-fm-cont.py", line 120, in audio_callback
outdata[:] = data ValueError: could not broadcast input array from shape (2048) into shape (2048,1)
I have attached an full output of this code below:
#!/usr/bin/env python
# library imports ...
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('-d', '--device', type=int_or_str,
help='output device (numeric ID or substring)')
parser.add_argument('-b', '--blocksize', type=int, default=2048,
help='block size (default: %(default)s)')
parser.add_argument(
'-q', '--buffersize', type=int, default=20,
help='number of blocks used for buffering (default: %(default)s)')
args = parser.parse_args()
if args.blocksize == 0:
parser.error('blocksize must not be zero')
if args.buffersize < 1:
parser.error('buffersize must be at least 1')
q = queue.Queue(maxsize=args.buffersize)
event = threading.Event()
device_index = RtlSdr.get_device_index_by_serial('00000001')
class fmDemodulator(object):
# Called for each updates
def __init__(self, sdr=None):
self.sdr = sdr if sdr else RtlSdr(device_index)
def Demod(self, *args):
Fs = self.sdr.sample_rate
# Fc = self.sdr.center_freq
# Read IQ samples
samples = self.sdr.read_samples(4*12*2048)
print ('Fetching {} IQ samples from SDR #{}'.format(len(samples), device_index))
# Convert sampled data into numpy array
x1 = np.array(samples).astype("complex64")
# Downmixed Baseband Signal (Adjust offset to be centered)
offsetFreq = 0 # already centered
fc1 = np.exp(-1.0j*2.0*np.pi* offsetFreq/Fs*np.arange(len(x1)))
x2 = x1 * fc1
# Filter and downsample the FM Radio Signal
bwFM = 200000 # approx. 170 kHz for a single channel
decRate = int(Fs/bwFM)
x3 = signal.decimate(x2, decRate)
newFs = Fs/decRate
### Demodulate 200kHz FM Signal
# Polar discriminator
y4 = x3[1:] * np.conj(x3[:-1])
x4 = np.angle(y4)
# The de-emphasis filter
# Given a signal 'x4' (in a numpy array) with sampling rate newFS
d = newFs * 75e-6 # Calculate the # of samples to hit the -3dB point
x = np.exp(-1/d) # Calculate the decay between each sample
b = [1-x] # Create the filter coefficients
a = [1,-x]
x5 = signal.lfilter(b,a,x4)
# Find a decimation rate to achieve audio sampling rate between 44-48 kHz
audioFreq = 44100
dec_audio = int(newFs/audioFreq)
audioFs = newFs/dec_audio
x6 = signal.decimate(x5, dec_audio)
# Scale audio to adjust volume
x6 *= 10000 / np.max(np.abs(x6))
# debug
print ('decRate: {}, newFs : {}, dec_audio: {}'.format(decRate, newFs, dec_audio))
print ('Output audio: {} samples, audioFreq: {}, audioFs: {}'.format(len(x6), audioFreq, audioFs))
return x6
# https://python-sounddevice.readthedocs.io/en/0.3.6/examples.html
def audio_callback(outdata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
assert frames == args.blocksize
if status.output_underflow:
print('Output underflow: increase blocksize?', file=sys.stderr)
raise sd.CallbackAbort
assert not status
try:
data = q.get_nowait()
print(data)
print(data.dtype)
except queue.Empty:
print('Buffer is empty: increase buffersize?', file=sys.stderr)
raise sd.CallbackAbort
if len(data) < len(outdata):
outdata[:len(data)] = data
outdata[len(data):] = b'\x00' * (len(outdata) - len(data))
raise sd.CallbackStop
else:
outdata[:] = data
def main():
sdr = RtlSdr(device_index)
fm = fmDemodulator(sdr)
# SDR Configurations
sdr.sample_rate = int(2.4e6) # Hz
sdr.center_freq = int(102e6) # Hz
sdr.freq_correction = 77 # PPM +- 20
sdr.gain = 'auto'
samplerate = 50000
channels = 1
try:
for _ in range(args.buffersize):
data = fm.Demod()
if not np.any(data):
break
q.put_nowait(data) # pre-fill queue
stream = sd.OutputStream(
samplerate=samplerate, blocksize=args.blocksize,
device=args.device, channels=channels, dtype='int16',
callback=audio_callback, finished_callback=event.set)
with stream:
timeout = args.blocksize * args.buffersize / samplerate
while np.any(data): # while data
data = fm.Demod()
q.put(data, timeout=timeout)
event.wait() # Wait until playback is finished
except KeyboardInterrupt:
parser.exit('\nInterrupted by user')
except queue.Full:
# A timeout occured, i.e. there was an error in the callback
parser.exit(1)
except Exception as e:
parser.exit(type(e).__name__ + ': ' + str(e))
Output:
Output audio: 2048 samples, audioFreq: 44100, audioFs: 50000.0
Fetching 98304 IQ samples from SDR #0
...
Fetching 98304 IQ samples from SDR #0
decRate: 12, newFs : 200000.0, dec_audio: 4
Output audio: 2048 samples, audioFreq: 44100, audioFs: 50000.0
Fetching 98304 IQ samples from SDR #0
[ 627.05045796 1835.36815837 3381.16496121 ... 401.43836645
-1156.07050642 -1291.0900775 ]
float64
From cffi callback <function _StreamBase.__init__.<locals>.callback_ptr at 0x10eabbea0>:
Traceback (most recent call last):
File "/Users/user/.local/lib/python3.6/site-packages/sounddevice.py", line 741, in callback_ptr
return _wrap_callback(callback, data, frames, time, status)
File "/Users/user/.local/lib/python3.6/site-packages/sounddevice.py", line 2517, in _wrap_callback
decRate: 12, newFs : 200000.0, dec_audio: 4
Output audio: 2048 samples, audioFreq: 44100, audioFs: 50000.0
callback(*args)
File "rtl-fm-cont.py", line 120, in audio_callback
outdata[:] = data
ValueError: could not broadcast input array from shape (2048) into shape (2048,1)
Fetching 98304 IQ samples from SDR #0
decRate: 12, newFs : 200000.0, dec_audio: 4
Output audio: 2048 samples, audioFreq: 44100, audioFs: 50000.0
Your help will highly be appreciated.
Thank you in advance.
Seems like a simple:
outdata[:] = data.reshape(2048,1)
numpy.reshape
Fixed the problem.

Devide audio signal into small samples

I'm trying to process an audio signal and divide an audio signal into N discrete samples and then I want to play that samples independently.
How can I do this using python?
import wave
import pygame
import time
def slice(infile, outfilename, start_ms, end_ms):
width = infile.getsampwidth() #Returns sample width in bytes
rate = infile.getframerate() #Returns sampling frequency
fpms = rate / 1000 # frames per ms
length = (end_ms - start_ms) * fpms
start_index = start_ms * fpms
out = wave.open(outfilename, "w")
out.setparams((infile.getnchannels(), width, rate, length, infile.getcomptype(), infile.getcompname()))
infile.rewind() #Rewind the file pointer to the beginning of the audio stream
anchor = infile.tell() #Return current file pointer position
infile.setpos(anchor + start_index) #Set the file pointer to the specified position
out.writeframes(infile.readframes(length)) #Write audio frames and make sure nframes is correct
if __name__ == "__main__":
slice(wave.open("song1.wav", "r"), "out.wav", 500, 5000)
pygame.mixer.init()
pygame.mixer.music.load("out.wav")
pygame.mixer.music.play()
while pygame.mixer.music.get_busy() == True:
continue

Detect & Record Audio in Python

I need to capture audio clips as WAV files that I can then pass to another bit of python for processing. The problem is that I need to determine when there is audio present and then record it, stop when it goes silent and then pass that file to the processing module.
I'm thinking it should be possible with the wave module to detect when there is pure silence and discard it then as soon as something other than silence is detected start recording, then when the line goes silent again stop the recording.
Just can't quite get my head around it, can anyone get me started with a basic example.
As a follow up to Nick Fortescue's answer, here's a more complete example of how to record from the microphone and process the resulting data:
from sys import byteorder
from array import array
from struct import pack
import pyaudio
import wave
THRESHOLD = 500
CHUNK_SIZE = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
def is_silent(snd_data):
"Returns 'True' if below the 'silent' threshold"
return max(snd_data) < THRESHOLD
def normalize(snd_data):
"Average the volume out"
MAXIMUM = 16384
times = float(MAXIMUM)/max(abs(i) for i in snd_data)
r = array('h')
for i in snd_data:
r.append(int(i*times))
return r
def trim(snd_data):
"Trim the blank spots at the start and end"
def _trim(snd_data):
snd_started = False
r = array('h')
for i in snd_data:
if not snd_started and abs(i)>THRESHOLD:
snd_started = True
r.append(i)
elif snd_started:
r.append(i)
return r
# Trim to the left
snd_data = _trim(snd_data)
# Trim to the right
snd_data.reverse()
snd_data = _trim(snd_data)
snd_data.reverse()
return snd_data
def add_silence(snd_data, seconds):
"Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
silence = [0] * int(seconds * RATE)
r = array('h', silence)
r.extend(snd_data)
r.extend(silence)
return r
def record():
"""
Record a word or words from the microphone and
return the data as an array of signed shorts.
Normalizes the audio, trims silence from the
start and end, and pads with 0.5 seconds of
blank sound to make sure VLC et al can play
it without getting chopped off.
"""
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT, channels=1, rate=RATE,
input=True, output=True,
frames_per_buffer=CHUNK_SIZE)
num_silent = 0
snd_started = False
r = array('h')
while 1:
# little endian, signed short
snd_data = array('h', stream.read(CHUNK_SIZE))
if byteorder == 'big':
snd_data.byteswap()
r.extend(snd_data)
silent = is_silent(snd_data)
if silent and snd_started:
num_silent += 1
elif not silent and not snd_started:
snd_started = True
if snd_started and num_silent > 30:
break
sample_width = p.get_sample_size(FORMAT)
stream.stop_stream()
stream.close()
p.terminate()
r = normalize(r)
r = trim(r)
r = add_silence(r, 0.5)
return sample_width, r
def record_to_file(path):
"Records from the microphone and outputs the resulting data to 'path'"
sample_width, data = record()
data = pack('<' + ('h'*len(data)), *data)
wf = wave.open(path, 'wb')
wf.setnchannels(1)
wf.setsampwidth(sample_width)
wf.setframerate(RATE)
wf.writeframes(data)
wf.close()
if __name__ == '__main__':
print("please speak a word into the microphone")
record_to_file('demo.wav')
print("done - result written to demo.wav")
I believe the WAVE module does not support recording, just processing existing files. You might want to look at PyAudio for actually recording.
WAV is about the world's simplest file format. In paInt16 you just get a signed integer representing a level, and closer to 0 is quieter. I can't remember if WAV files are high byte first or low byte, but something like this ought to work (sorry, I'm not really a python programmer:
from array import array
# you'll probably want to experiment on threshold
# depends how noisy the signal
threshold = 10
max_value = 0
as_ints = array('h', data)
max_value = max(as_ints)
if max_value > threshold:
# not silence
PyAudio code for recording kept for reference:
import pyaudio
import sys
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=chunk)
print "* recording"
for i in range(0, 44100 / chunk * RECORD_SECONDS):
data = stream.read(chunk)
# check for silence here by comparing the level with 0 (or some threshold) for
# the contents of data.
# then write data or not to a file
print "* done"
stream.stop_stream()
stream.close()
p.terminate()
Thanks to cryo for improved version that I based my tested code below:
#Instead of adding silence at start and end of recording (values=0) I add the original audio . This makes audio sound more natural as volume is >0. See trim()
#I also fixed issue with the previous code - accumulated silence counter needs to be cleared once recording is resumed.
from array import array
from struct import pack
from sys import byteorder
import copy
import pyaudio
import wave
THRESHOLD = 500 # audio levels not normalised.
CHUNK_SIZE = 1024
SILENT_CHUNKS = 3 * 44100 / 1024 # about 3sec
FORMAT = pyaudio.paInt16
FRAME_MAX_VALUE = 2 ** 15 - 1
NORMALIZE_MINUS_ONE_dB = 10 ** (-1.0 / 20)
RATE = 44100
CHANNELS = 1
TRIM_APPEND = RATE / 4
def is_silent(data_chunk):
"""Returns 'True' if below the 'silent' threshold"""
return max(data_chunk) < THRESHOLD
def normalize(data_all):
"""Amplify the volume out to max -1dB"""
# MAXIMUM = 16384
normalize_factor = (float(NORMALIZE_MINUS_ONE_dB * FRAME_MAX_VALUE)
/ max(abs(i) for i in data_all))
r = array('h')
for i in data_all:
r.append(int(i * normalize_factor))
return r
def trim(data_all):
_from = 0
_to = len(data_all) - 1
for i, b in enumerate(data_all):
if abs(b) > THRESHOLD:
_from = max(0, i - TRIM_APPEND)
break
for i, b in enumerate(reversed(data_all)):
if abs(b) > THRESHOLD:
_to = min(len(data_all) - 1, len(data_all) - 1 - i + TRIM_APPEND)
break
return copy.deepcopy(data_all[_from:(_to + 1)])
def record():
"""Record a word or words from the microphone and
return the data as an array of signed shorts."""
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, output=True, frames_per_buffer=CHUNK_SIZE)
silent_chunks = 0
audio_started = False
data_all = array('h')
while True:
# little endian, signed short
data_chunk = array('h', stream.read(CHUNK_SIZE))
if byteorder == 'big':
data_chunk.byteswap()
data_all.extend(data_chunk)
silent = is_silent(data_chunk)
if audio_started:
if silent:
silent_chunks += 1
if silent_chunks > SILENT_CHUNKS:
break
else:
silent_chunks = 0
elif not silent:
audio_started = True
sample_width = p.get_sample_size(FORMAT)
stream.stop_stream()
stream.close()
p.terminate()
data_all = trim(data_all) # we trim before normalize as threshhold applies to un-normalized wave (as well as is_silent() function)
data_all = normalize(data_all)
return sample_width, data_all
def record_to_file(path):
"Records from the microphone and outputs the resulting data to 'path'"
sample_width, data = record()
data = pack('<' + ('h' * len(data)), *data)
wave_file = wave.open(path, 'wb')
wave_file.setnchannels(CHANNELS)
wave_file.setsampwidth(sample_width)
wave_file.setframerate(RATE)
wave_file.writeframes(data)
wave_file.close()
if __name__ == '__main__':
print("Wait in silence to begin recording; wait in silence to terminate")
record_to_file('demo.wav')
print("done - result written to demo.wav")
import pyaudio
import wave
from array import array
FORMAT=pyaudio.paInt16
CHANNELS=2
RATE=44100
CHUNK=1024
RECORD_SECONDS=15
FILE_NAME="RECORDING.wav"
audio=pyaudio.PyAudio() #instantiate the pyaudio
#recording prerequisites
stream=audio.open(format=FORMAT,channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
#starting recording
frames=[]
for i in range(0,int(RATE/CHUNK*RECORD_SECONDS)):
data=stream.read(CHUNK)
data_chunk=array('h',data)
vol=max(data_chunk)
if(vol>=500):
print("something said")
frames.append(data)
else:
print("nothing")
print("\n")
#end of recording
stream.stop_stream()
stream.close()
audio.terminate()
#writing to file
wavfile=wave.open(FILE_NAME,'wb')
wavfile.setnchannels(CHANNELS)
wavfile.setsampwidth(audio.get_sample_size(FORMAT))
wavfile.setframerate(RATE)
wavfile.writeframes(b''.join(frames))#append frames recorded to file
wavfile.close()
I think this will help.It is a simple script which will check if there is a silence or not.If silence is detected it will not record otherwise it will record.
The pyaudio website has many examples that are pretty short and clear:
http://people.csail.mit.edu/hubert/pyaudio/
Update 14th of December 2019 - Main example from the above linked website from 2017:
"""PyAudio Example: Play a WAVE file."""
import pyaudio
import wave
import sys
CHUNK = 1024
if len(sys.argv) < 2:
print("Plays a wave file.\n\nUsage: %s filename.wav" % sys.argv[0])
sys.exit(-1)
wf = wave.open(sys.argv[1], 'rb')
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
data = wf.readframes(CHUNK)
while data != '':
stream.write(data)
data = wf.readframes(CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
You might want to look at csounds, also. It has several API's, including Python. It might be able to interact with an A-D interface and gather sound samples.

Categories

Resources