How to get frequency of an audio signal python - python

I want to get the frequency using pyaudio and plot it in a diagram via matplotlib. Therefore I used pyaudio to get the data from my audio input, which works fine but I've no idea how to get the frequency out of a raw signal. I found this piece of code, which should do the job, but I don't know how to apply it to my code.
Here i set up the microphon and prepare for recording:
# constants
CHUNK = 1024 * 2 # samples per frame
FORMAT = pyaudio.paInt16 # audio format (bytes per sample?)
CHANNELS = 1 # single channel for microphone
RATE = 44100 # samples per second
# pyaudio class instance
mic = pyaudio.PyAudio()
# stream object to get data from microphone
stream = mic.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK
)
This is the part of my code where I get the data from my mic:
data = stream.read(CHUNK)
# convert data to integers, make np array, then offset it by 127
data_int = struct.unpack(str(2 * CHUNK) + 'B', data)
# create np array and offset by 128
data_np = np.array(data_int, dtype='b')[::2]
data_np = [i+127 for i in data_np]
I just put this in a while-loop and plotted it in a life-plot.
Here's the full code:
import pyaudio #for capturing the audio-signal
import struct #for converting the binary-data from the signal to integer
import matplotlib.pyplot as plt #for displaying the audio-signal
import numpy as np
#functions
def plot_setup():
# create matplotlib figure and axes
fig=plt.figure()
ax=fig.add_subplot(111)
# variable for plotting
x = np.arange(0, 2 * CHUNK, 2)
# create a line object with random data
line, = ax.plot(x, [128 for i in range(2048)], '-')
# basic formatting for the axes
ax.set_title('AUDIO WAVEFORM')
ax.set_xlabel('samples')
ax.set_ylabel('volume')
ax.set_ylim(0, 255)
ax.set_xlim(0, 2 * CHUNK)
plt.xticks([0, CHUNK, 2 * CHUNK])
plt.yticks([0, 128, 255])
# show the plot
plt.show(block=False)
return fig, line
def measure():
# binary data
data = stream.read(CHUNK)
# convert data to integers, make np array, then offset it by 127
data_int = struct.unpack(str(2 * CHUNK) + 'B', data)
# create np array and offset by 128
data_np = np.array(data_int, dtype='b')[::2]
data_np = [i+127 for i in data_np]
line.set_ydata(data_np)
try:
fig.canvas.draw()
fig.canvas.flush_events()
except:
return 0
# constants
CHUNK = 1024 * 2 # samples per frame
FORMAT = pyaudio.paInt16 # audio format (bytes per sample?)
CHANNELS = 1 # single channel for microphone
RATE = 44100 # samples per second
# pyaudio class instance
mic = pyaudio.PyAudio()
# stream object to get data from microphone
stream = mic.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK
)
if __name__=="__main__":
fig, line=plot_setup()
while True:
m=measure()
if m==0:
break
And this is the output I get:
The final diagram should look exactly the same, except that I want the frequency to be on the y-axis.

Related

Wave readframes do not have equal number with chunk* samplewidth

I am trying find the frequency of a audio using pyaudio library using below code i found on internet.
import pyaudio
import wave
import numpy as np
chunk = 2048
# open up a wave
wf = wave.open('g1.wav', 'rb')
swidth = wf.getsampwidth()
RATE = wf.getframerate()
print(swidth,RATE)
# use a Blackman window
window = np.blackman(chunk)
# open stream
p = pyaudio.PyAudio()
stream = p.open(format = p.get_format_from_width(wf.getsampwidth()),channels = wf.getnchannels(),rate = RATE,output = True)
# read some data
data = wf.readframes(chunk)
print('---')
print(len(data))
print(chunk*swidth)
print('---')
# play stream and find the frequency of each chunk
while len(data) == chunk*swidth:
# write data out to the audio stream
stream.write(data)
# unpack the data and times by the hamming window
indata = np.array(wave.struct.unpack("%dh"%(len(data)/swidth),\
data))*window
# Take the fft and square each value
fftData=abs(np.fft.rfft(indata))**2
# find the maximum
which = fftData[1:].argmax() + 1
# use quadratic interpolation around the max
print(which)
if which != len(fftData)-1:
y0,y1,y2 = np.log(fftData[which-1:which+2:])
x1 = (y2 - y0) * .5 / (2 * y1 - y2 - y0)
# find the frequency and output it
thefreq = (which+x1)*RATE/chunk
print ("The freq is %f Hz." % (thefreq))
else:
thefreq = which*RATE/chunk
print ("The freq is %f Hz." % (thefreq))
# read some more data
data = wf.readframes(chunk)
if data:
stream.write(data)
stream.close()
p.terminate()
For a 3 sec audio i have, it works fine.The audio is played out loud and frequency is found
The freq is 260.053907 Hz.
But for other audios(these are 3s too), the below condition fails for some reason.
while len(data) == chunk*swidth:
i tried printing the values
sample width=2
frame rate=48000
len(data)=8192
chunk*swidth=4096
Why does the length of data differs from chunk*swidth. Kindly help me

Time steps difference in spectrogram

I have an audio file of 10 seconds in length. If I generate the spectrogram using matplotlib, then I get a different number of timesteps as compared to the spectrogram generated by librosa.
Here is the code:
fs = 8000
nfft = 200
noverlap = 120
hop_length = 120
audio = librosa.core.load(path, sr=fs)
# Spectogram generated using matplotlib
spec, freqs, bins, _ = plt.specgram(audio, nfft, fs, noverlap = noverlap)
print(spec.shape) # (101, 5511)
# Using librosa
spectrogram_librosa = np.abs(librosa.stft(audio,
n_fft=n_fft,
hop_length=hop_length,
win_length=nfft,
window='hann')) ** 2
spectrogram_librosa_db = librosa.power_to_db(spectrogram_librosa, ref=np.max)
print(spectrogram_librosa_db.shape) # (101, 3676)
Can someone explain it to me why is there a huge diff in the time steps and how to make sure that both generate the same output?
This is because the noverlap of plt.specgram consider the number of points to overlap the audio segments with, whereas the hop_length consider the step between the segments.
That being said, there is still a 2-points difference between the two results, but this is most possibly due to the boundaries.
import numpy as np
import librosa
import matplotlib.pyplot as plt
path = librosa.util.example_audio_file()
fs = 8000
nfft = 200
noverlap = 120 # overlap
hop_length = 80 # step
audio, fs = librosa.core.load(path, sr=fs)
# Spectogram generated using matplotlib
spec, freqs, bins, _ = plt.specgram(
audio, NFFT=nfft, Fs=fs, noverlap=noverlap,
)
spec = np.log10(spec + 1e-14)
print(spec.shape) # (101, 6144)
# Using librosa
spectrogram_librosa = (
np.abs(
librosa.stft(
audio,
n_fft=nfft,
hop_length=hop_length,
win_length=nfft,
window="hann",
)
)
** 2
)
spectrogram_librosa_db = librosa.power_to_db(spectrogram_librosa, ref=np.max)
print(spectrogram_librosa_db.shape) # (101, 6146)
fig, ax = plt.subplots(2)
ax[0].pcolorfast(spec)
ax[1].pcolorfast(spectrogram_librosa_db)
plt.show()
This outputs the following picture:

How do I stretch the x-axis of a matplotlib spectrogram?

Sorry if this is a really obvious question. I am using matplotlib to generate some spectrograms for use as training data in a machine learning model. The spectrograms are of short clips of music and I want to simulate speeding up or slowing down the song by a random amount to create variations in the data. I have shown my code below for generating each spectrogram. I have temporarily modified it to produce 2 images starting at the same point in the song, one with variation and one without, in order to compare them and see if it is working as intended.
from pydub import AudioSegment
import matplotlib.pyplot as plt
import numpy as np
BPM_VARIATION_AMOUNT = 0.2
FRAME_RATE = 22050
CHUNK_SIZE = 2
BUFFER = FRAME_RATE * 5
def generate_random_specgram(track):
# Read audio data from file
audio = AudioSegment.from_file(track.location)
audio = audio.set_channels(1).set_frame_rate(FRAME_RATE)
samples = audio.get_array_of_samples()
start = np.random.randint(BUFFER, len(samples) - BUFFER)
chunk = samples[start:start + int(CHUNK_SIZE * FRAME_RATE)]
# Plot specgram and save to file
filename = ('specgrams/%s-%s-%s.png' % (track.trackid, start, track.bpm))
plt.figure(figsize=(2.56, 0.64), frameon=False).add_axes([0, 0, 1, 1])
plt.axis('off')
plt.specgram(chunk, Fs = FRAME_RATE)
plt.savefig(filename)
plt.close()
# Perform random variations to the BPM
frame_rate = FRAME_RATE
bpm = track.bpm
variation = 1 - BPM_VARIATION_AMOUNT + (
np.random.random() * BPM_VARIATION_AMOUNT * 2)
bpm *= variation
bpm = round(bpm, 2)
# I thought this next line should have been /= but that stretched the wrong way?
frame_rate *= (bpm / track.bpm)
# Read audio data from file
chunk = samples[start:start + int(CHUNK_SIZE * frame_rate)]
# Plot specgram and save to file
filename = ('specgrams/%s-%s-%s.png' % (track.trackid, start, bpm))
plt.figure(figsize=(2.56, 0.64), frameon=False).add_axes([0, 0, 1, 1])
plt.axis('off')
plt.specgram(chunk, Fs = frame_rate)
plt.savefig(filename)
plt.close()
I thought by changing the Fs parameter given to the specgram function this would stretch the data along the x-axis but instead it seems to be resizing the whole graph and introducing white space at the top of the image in strange and unpredictable ways. I'm sure I'm missing something but I can't see what it is. Below is an image to illustrate what I'm getting.
The framerate is a fixed number that only depends on your data, if you change it you will effectively "stretch" the x-axis but in the wrong way. For example, if you have 1000 data points that correspond to 1 second, your framerate (or better sampling frequency) will be 1000. If your signal is a simple 200Hz sine which slightly increases the frequency in time, the specgram will be:
t = np.linspace(0, 1, 1000)
signal = np.sin((200*2*np.pi + 200*t) * t)
frame_rate = 1000
plt.specgram(signal, Fs=frame_rate);
If you change the framerate you will have a wrong x and y-axis scale. If you set the framerate to be 500 you will have:
t = np.linspace(0, 1, 1000)
signal = np.sin((200*2*np.pi + 200*t) * t)
frame_rate = 500
plt.specgram(signal, Fs=frame_rate);
The plot is very similar, but this time is wrong: you have almost 2 seconds on the x-axis, while you should only have 1, moreover, the starting frequency you read is 100Hz instead of 200Hz.
To conclude, the sampling frequency you set needs to be the correct one. If you want to stretch the plot you can use something like plt.xlim(0.2, 0.4). If you want to avoid the white band on top of the plot you can manually set the ylim to be half the frame rate:
plt.ylim(0, frame_rate/2)
This works because of simple properties of the Fourier transform and Nyquist-Shannon theorem.
The solution to my problem was to set the xlim and ylim of the plot. Here is the code from my testing file in which I finally got rid of all the odd whitespace:
from pydub import AudioSegment
import numpy as np
import matplotlib.pyplot as plt
BUFFER = 5
FRAME_RATE = 22050
SAMPLE_LENGTH = 2
def plot(audio_file, bpm, variation=1):
audio = AudioSegment.from_file(audio_file)
audio = audio.set_channels(1).set_frame_rate(FRAME_RATE)
samples = audio.get_array_of_samples()
chunk_length = int(FRAME_RATE * SAMPLE_LENGTH * variation)
start = np.random.randint(
BUFFER * FRAME_RATE,
len(samples) - (BUFFER * FRAME_RATE) - chunk_length)
chunk = samples[start:start + chunk_length]
plt.figure(figsize=(5.12, 2.56)).add_axes([0, 0, 1, 1])
plt.specgram(chunk, Fs=FRAME_RATE * variation)
plt.xlim(0, SAMPLE_LENGTH)
plt.ylim(0, FRAME_RATE / 2 * variation)
plt.savefig('specgram-%f.png' % (bpm * variation))
plt.close()

scipy.io.wavfile.write no sound

I want to write a signal in a .wav file, but when I do this using
scipy.io.wavfile.write it just create me a .wav without sound.
The .wav has the good length but there is no sound.
I looked for a solution for this problem but I couldn't find help.
My code below :
import scipy as sp
import numpy as np
dt = np.dtype(np.int32)
sig = np.fromfile(filename, dtype=dt, count=-1, sep='')
sp.io.wavfile.write('sound.wav', int(fS), sig)
As a test, I also did a little function :
def write_wav_sin(name,fs,f):
x = np.linspace(0,10,10*fs)
dt = np.dtype(np.float32)
sig = np.sin(2*math.pi*f*x, dtype=dt)
print(type(sig[0]))
sp.io.wavfile.write(name, fs, sig)
plt.plot(x,sig)
With this test it works, but with my other code it doesn't work
Someone knows why I have this problem ?
Check the range of values in sig by printing sig.min() and sig.max(). The values are not scaled by wavfile.write, so it might be that you have a file with values so low that you can't hear them.
Try scaling up the 32 bit integer values, or writing the data as normalized 32 bit floating point. For example, this converts sig to 32 bit floating point values in the range [-1, 1] before saving it:
m = np.max(np.abs(sig))
sigf32 = (sig/m).astype(np.float32)
sp.io.wavfile.write('sound.wav', int(fS), sigf32)
Finally I divided all my signal to have an amplitude max way more little ( my signal had sometimes an amplitude of 500000, to write it in a Wav I divided it by 250000).
With that trick I can listen to the sound but there is something weird, like additionnal artifacts/noise ( I compared it to a .wav obtained with matlab , with the same file )
the code I used is :
import scipy as sp
import numpy as np
dt = np.dtype(np.int32)
sig = np.fromfile(filename, dtype=dt, count=-1, sep='')
sp.io.wavfile.write('sound.wav', int(fS), sig/250000)
Here's a commented example on how to generate a basic wave file with a set duration, frequency, volume and number of samples. Utilizing NumPy and Python's wave library.
import numpy as ny
import struct
import wave
class SoundFile:
def __init__(self, signal):
# https://docs.python.org/3.6/library/wave.html#wave.open
self.file = wave.open('test.wav', 'wb')
self.signal = signal
self.sr = 44100
def write(self):
# https://docs.python.org/3.6/library/wave.html#wave.Wave_write.setparams
self.file.setparams( ( 1, 2, self.sr, 44100 * 4, 'NONE', 'noncompressed' ) )
# https://docs.python.org/3.6/library/wave.html#wave.Wave_write.writeframes
self.file.writeframes( self.signal )
self.file.close()
# signal settings
duration = 4 # duration in Seconds
samplerate = 44100 # Hz (frequency)
samples = duration * samplerate # aka samples per second
frequency = 440 # Hz
period = samplerate / float( frequency ) # of samples
omega = ny.pi * 2 / period # calculate omega (angular frequency)
volume = 16384 # 16384 is the volume measure (max is 32768)
# create sin wave
xaxis = ny.arange( samples, dtype = ny.float )
ydata = volume * ny.sin( xaxis * omega )
# fill blanks
signal = ny.resize( ydata, ( samples, ) )
#create sound file
f = SoundFile( signal )
f.write()
print( 'sound file created' )
Did my best to comment, update, and modify this source by a random blogger.

How to program Power Spectrum of .wav file

So I am trying to calculate the power spectrum of noise I recorded from the sun from a .wav file it recorded to. So far my code is (NEW CODE FROM OLD POST):
import pyaudio
import sys
import struct
import numpy
from pylab import *
import wave
import pyfits
sundata = ('sun_noise_ouput.wav')
chunk = 1024
FORMAT = pyaudio.paInt16 # 16-bit integers
CHANNELS = 1
RATE = 25000
RECORD_SECONDS = 120
p = pyaudio.PyAudio()
# Convert to pair of bytes to numerical datatype
N = len(sundata)/2
data = numpy.zeros(N,dtype=float)
for i in range(N) :
data[i] = struct.unpack('h',sundata[2*i:2*(i+1)])[0]
column = pyfits.Column(name='integer data', array=data, format="J")
fitsoutput = pyfits.new_table([column])
fitsoutput.writeto('sun_noise_output.fits', clobber=True)
wf = wave.open('sun_noise_output.wav', 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(sundata)
wf.close()
dataft = numpy.fft.fft(data)
powerspectrum = abs(dataft)**2
figure()
plot(range(N),data)
figure()
plot(range(N),powerspectrum)
show()
May also help to note that when I try playing the file it returns no audio and says it has length 0:00 seconds
Also when I downlaod a sample from NASA's homepage there is no playback audio and these are the graphs produced:

Categories

Resources