I am trying find the frequency of a audio using pyaudio library using below code i found on internet.
import pyaudio
import wave
import numpy as np
chunk = 2048
# open up a wave
wf = wave.open('g1.wav', 'rb')
swidth = wf.getsampwidth()
RATE = wf.getframerate()
# use a Blackman window
window = np.blackman(chunk)
# open stream
p = pyaudio.PyAudio()
stream = p.open(format = p.get_format_from_width(wf.getsampwidth()),channels = wf.getnchannels(),rate = RATE,output = True)
# read some data
data = wf.readframes(chunk)
# play stream and find the frequency of each chunk
while len(data) == chunk*swidth:
# write data out to the audio stream
# unpack the data and times by the hamming window
indata = np.array(wave.struct.unpack("%dh"%(len(data)/swidth),\
# Take the fft and square each value
# find the maximum
which = fftData[1:].argmax() + 1
# use quadratic interpolation around the max
if which != len(fftData)-1:
y0,y1,y2 = np.log(fftData[which-1:which+2:])
x1 = (y2 - y0) * .5 / (2 * y1 - y2 - y0)
# find the frequency and output it
thefreq = (which+x1)*RATE/chunk
print ("The freq is %f Hz." % (thefreq))
thefreq = which*RATE/chunk
print ("The freq is %f Hz." % (thefreq))
# read some more data
data = wf.readframes(chunk)
if data:
For a 3 sec audio i have, it works fine.The audio is played out loud and frequency is found
The freq is 260.053907 Hz.
But for other audios(these are 3s too), the below condition fails for some reason.
while len(data) == chunk*swidth:
i tried printing the values
sample width=2
frame rate=48000
Why does the length of data differs from chunk*swidth. Kindly help me
I am trying to make a realtime High pass and a low pass filter using Python. What I mean is whenever I play any audio on my computer, I want the low-pass filter to filter the audio in realtime and pass that audio on the left channel (my subwoofer) of my audio amplifier and the High-pass filter to pass audio to my right channel of my audio amplifier (tweeter) and play them simultaneously on left and the right channel.
I was successful in creating a High-pass filter using Python, But it plays it on both channels and it has to be saved to a wav before it is played. I want it to take audio real time from the audio from my computer and then in realtime convert it to go the corresponding right or left audio channels.
Here is the code for the High-pass filter I found on stackoverflow:
import matplotlib.pyplot as plt
import numpy as np
import wave
import sys
import math
import contextlib
fname = 'Lil Nas X Industry Baby Lyrics ft Jack Harlow.wav'
outname = 'filtered.wav'
cutOffFrequency = 400.0
def running_mean(x, windowSize):
cumsum = np.cumsum(np.insert(x, 0, 0))
return (cumsum[windowSize:] - cumsum[:-windowSize]) / windowSize
def interpret_wav(raw_bytes, n_frames, n_channels, sample_width, interleaved = True):
if sample_width == 1:
dtype = np.uint8 # unsigned char
elif sample_width == 2:
dtype = np.int16 # signed 2-byte short
raise ValueError("Only supports 8 and 16 bit audio formats.")
channels = np.fromstring(raw_bytes, dtype=dtype)
if interleaved:
# channels are interleaved, i.e. sample N of channel M follows sample N of channel M-1 in raw data
channels.shape = (n_frames, n_channels)
channels = channels.T
# channels are not interleaved. All samples from channel M occur before all samples from channel M-1
channels.shape = (n_channels, n_frames)
return channels
with contextlib.closing(wave.open(fname,'rb')) as spf:
sampleRate = spf.getframerate()
ampWidth = spf.getsampwidth()
nChannels = spf.getnchannels()
nFrames = spf.getnframes()
signal = spf.readframes(nFrames*nChannels)
channels = interpret_wav(signal, nFrames, nChannels, ampWidth, True)
freqRatio = (cutOffFrequency/sampleRate)
N = int(math.sqrt(0.196196 + freqRatio**2)/freqRatio)
filtered = running_mean(channels[0], N).astype(channels.dtype)
wav_file = wave.open(outname, "w")
wav_file.setparams((1, ampWidth, sampleRate, nFrames, spf.getcomptype(), spf.getcompname()))
I want to get the frequency using pyaudio and plot it in a diagram via matplotlib. Therefore I used pyaudio to get the data from my audio input, which works fine but I've no idea how to get the frequency out of a raw signal. I found this piece of code, which should do the job, but I don't know how to apply it to my code.
Here i set up the microphon and prepare for recording:
# constants
CHUNK = 1024 * 2 # samples per frame
FORMAT = pyaudio.paInt16 # audio format (bytes per sample?)
CHANNELS = 1 # single channel for microphone
RATE = 44100 # samples per second
# pyaudio class instance
mic = pyaudio.PyAudio()
# stream object to get data from microphone
stream = mic.open(
This is the part of my code where I get the data from my mic:
data = stream.read(CHUNK)
# convert data to integers, make np array, then offset it by 127
data_int = struct.unpack(str(2 * CHUNK) + 'B', data)
# create np array and offset by 128
data_np = np.array(data_int, dtype='b')[::2]
data_np = [i+127 for i in data_np]
I just put this in a while-loop and plotted it in a life-plot.
Here's the full code:
import pyaudio #for capturing the audio-signal
import struct #for converting the binary-data from the signal to integer
import matplotlib.pyplot as plt #for displaying the audio-signal
import numpy as np
def plot_setup():
# create matplotlib figure and axes
# variable for plotting
x = np.arange(0, 2 * CHUNK, 2)
# create a line object with random data
line, = ax.plot(x, [128 for i in range(2048)], '-')
# basic formatting for the axes
ax.set_title('AUDIO WAVEFORM')
ax.set_ylim(0, 255)
ax.set_xlim(0, 2 * CHUNK)
plt.xticks([0, CHUNK, 2 * CHUNK])
plt.yticks([0, 128, 255])
# show the plot
return fig, line
def measure():
# binary data
data = stream.read(CHUNK)
# convert data to integers, make np array, then offset it by 127
data_int = struct.unpack(str(2 * CHUNK) + 'B', data)
# create np array and offset by 128
data_np = np.array(data_int, dtype='b')[::2]
data_np = [i+127 for i in data_np]
return 0
# constants
CHUNK = 1024 * 2 # samples per frame
FORMAT = pyaudio.paInt16 # audio format (bytes per sample?)
CHANNELS = 1 # single channel for microphone
RATE = 44100 # samples per second
# pyaudio class instance
mic = pyaudio.PyAudio()
# stream object to get data from microphone
stream = mic.open(
if __name__=="__main__":
fig, line=plot_setup()
while True:
if m==0:
And this is the output I get:
The final diagram should look exactly the same, except that I want the frequency to be on the y-axis.
I have a wav file which contains a recorded chirp sound.
The Frequency sampling 44100
Number of Channels 1
Complete Samplings N 90405
secs 2.05
The chirp sound is only 50ms.
The image of the chirp:
The code I have so far to read the wav file and carry out some basic processing.
fs_rate, signal = wavfile.read("chirp.wav")
print ("Frequency sampling", fs_rate)
l_audio = len(signal.shape)
print ("Channels", l_audio)
if l_audio == 2:
signal = signal.sum(axis=1) / 2
N = signal.shape[0]
print ("Complete Samplings N", N)
secs = N / float(fs_rate)
print ("secs", secs)
Ts = 1.0/fs_rate # sampling interval in time
print ("Timestep between samples Ts", Ts)
t = scipy.arange(0, secs, Ts) # time vector as scipy arange field / numpy.ndarray
FFT = abs(scipy.fft(signal))
FFT_side = FFT[range(N//2)] # one side FFT range
freqs = scipy.fftpack.fftfreq(signal.size, t[1]-t[0])
fft_freqs = np.array(freqs)
freqs_side = freqs[range(N//2)] # one side frequency range
fft_freqs_side = np.array(freqs_side)
p1 = plt.plot(t, signal, "g") # plotting the signal
Using python how do I tell where the first sample point of the chirp is located in the audio file.
First point when the chirp was received.
The signal contains background noise. The result I am expecting
should say this is where your chirp signal starts at it is at a 2kHz frequency.
PS: This is not a homework problem. I am learning DSP. Sort of self-study.
If you know the chirp sequence, you could correlate against that to get start of chirp in stream.
import scipy.signal as sig
h = sp.array(chirp_sequence)
rxy = sig.correlate(signal, h)
start_idx = arg.max(abs(rxy))
"""Play a fixed frequency sound."""
from __future__ import division
import math
from pyaudio import PyAudio
def sine_tone(frequency, duration, volume=1, sample_rate=22050):
n_samples = int(sample_rate * duration)
restframes = n_samples % sample_rate
p = PyAudio()
stream = p.open(format=p.get_format_from_width(1), # 8bit
channels=1, # mono
s = lambda t: volume * math.sin(2 * math.pi * frequency * t / sample_rate)
samples = (int(s(t) * 0x7f + 0x80) for t in range(n_samples))
for buf in zip(*[samples]*sample_rate): # write several samples at a time
# fill remainder of frameset with silence
stream.write(b'\x80' * restframes)
def playScale(scale):
for x in scale:
sine_tone(frequency = x,
duration = 1,
sample_rate = 50000)
The playScale function accepts an array of frequencies and plays them using the sine_tone function. How do I save this series of sounds into .WAV file or a .MP3 file?
you should write all the audio data to one stream, then you can save this stream using the 'wave' library in python which is cappable of manipulating wave files.
However, with your current code i'm not sure how it would work as you are writing seperate streams per sound / tone. Might want to pass a stream into that function so you can append too that stream and save with a different function later wwhen all audio is rendered.
I'm looking for a way to find out the duration of a audio file (.wav) in python. So far i had a look at python wave library, mutagen, pymedia, pymad i was not able to get the duration of the wav file. Pymad gave me the duration but its not consistent.
The duration is equal to the number of frames divided by the framerate (frames per second):
import wave
import contextlib
fname = '/tmp/test.wav'
with contextlib.closing(wave.open(fname,'r')) as f:
frames = f.getnframes()
rate = f.getframerate()
duration = frames / float(rate)
Regarding #edwards' comment, here is some code to produce a 2-channel wave file:
import math
import wave
import struct
FILENAME = "/tmp/test.wav"
freq = 440.0
data_size = 40000
frate = 1000.0
amp = 64000.0
nchannels = 2
sampwidth = 2
framerate = int(frate)
nframes = data_size
comptype = "NONE"
compname = "not compressed"
data = [(math.sin(2 * math.pi * freq * (x / frate)),
math.cos(2 * math.pi * freq * (x / frate))) for x in range(data_size)]
wav_file = wave.open(FILENAME, 'w')
(nchannels, sampwidth, framerate, nframes, comptype, compname))
for values in data:
for v in values:
wav_file.writeframes(struct.pack('h', int(v * amp / 2)))
If you play the resultant file in an audio player, you'll find that is 40 seconds in duration. If you run the code above it also computes the duration to be 40 seconds. So I believe the number of frames is not influenced by the number of channels and the formula above is correct.
the librosa library can do this: librosa
import librosa
A very simple method is to use soundfile (formerly pysoundfile).
Here's some example code of how to do this:
import soundfile as sf
f = sf.SoundFile('447c040d.wav')
print('samples = {}'.format(f.frames))
print('sample rate = {}'.format(f.samplerate))
print('seconds = {}'.format(f.frames / f.samplerate))
The output for that particular file is:
samples = 232569
sample rate = 16000
seconds = 14.5355625
This aligns with soxi:
Input File : '447c040d.wav'
Channels : 1
Sample Rate : 16000
Precision : 16-bit
Duration : 00:00:14.54 = 232569 samples ~ 1090.17 CDDA sectors
File Size : 465k
Bit Rate : 256k
Sample Encoding: 16-bit Signed Integer PCM
we can use ffmpeg to get the duration of any video or audio files.
To install ffmpeg follow this link
import subprocess
import re
process = subprocess.Popen(['ffmpeg', '-i', path_of_wav_file], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout, stderr = process.communicate()
matches = re.search(r"Duration:\s{1}(?P<hours>\d+?):(?P<minutes>\d+?):(?P<seconds>\d+\.\d+?),", stdout.decode(), re.DOTALL).groupdict()
import os
#read the ByteRate field from file (see the Microsoft RIFF WAVE file format)
#ByteRate is located at the first 28th byte
#convert string a into integer/longint value
#a is little endian, so proper conversion is required
for i in range(4):
byteRate=byteRate + ord(a[i])*pow(256,i)
#get the file size in bytes
#the duration of the data, in milliseconds, is given by
print "File duration in miliseconds : " % ms
print "File duration in H,M,S,mS : " % ms/(3600*1000) % "," % ms/(60*1000) % "," % ms/1000 % "," ms%1000
print "Actual sound data (in bytes) : " % fileSize-44
Let,T be the duration between 2 consecutive samples. So, we can write t = nT or t = n/Fs.
from scipy.io import wavfile
Fs, data = wavfile.read('filename.wav')
n = data.size
t = n / Fs
I was trying to get the length of different format of an audio file other than '.wav' and I tried a few of the above solution but didn't work for me
This is what worked for me :
from pydub.utils import mediainfo
To find length of music file, audioread module can be used,
install audioread: pip install audioread
then use this code:
import audioread
with audioread.audio_open(filepath) as f:
totalsec = f.duration
min,sec = divmod(totalsec,60) # divides total time in minute and second
#and store it in min and sec variable respectively
Another solution with pydub:
import pydub
audio_seg = AudioSegment.from_wav('mywav.wav')
total_in_ms = len(audio_seg)
This is short and needs no modules, works with all operating systems:
import os
os.chdir(foo) # Get into the dir with sound
statbuf = os.stat('Sound.wav')
mbytes = statbuf.st_size / 1024
duration = mbytes / 200