I'm looking for a way to find out the duration of a audio file (.wav) in python. So far i had a look at python wave library, mutagen, pymedia, pymad i was not able to get the duration of the wav file. Pymad gave me the duration but its not consistent.
The duration is equal to the number of frames divided by the framerate (frames per second):
import wave
import contextlib
fname = '/tmp/test.wav'
with contextlib.closing(wave.open(fname,'r')) as f:
frames = f.getnframes()
rate = f.getframerate()
duration = frames / float(rate)
print(duration)
Regarding #edwards' comment, here is some code to produce a 2-channel wave file:
import math
import wave
import struct
FILENAME = "/tmp/test.wav"
freq = 440.0
data_size = 40000
frate = 1000.0
amp = 64000.0
nchannels = 2
sampwidth = 2
framerate = int(frate)
nframes = data_size
comptype = "NONE"
compname = "not compressed"
data = [(math.sin(2 * math.pi * freq * (x / frate)),
math.cos(2 * math.pi * freq * (x / frate))) for x in range(data_size)]
try:
wav_file = wave.open(FILENAME, 'w')
wav_file.setparams(
(nchannels, sampwidth, framerate, nframes, comptype, compname))
for values in data:
for v in values:
wav_file.writeframes(struct.pack('h', int(v * amp / 2)))
finally:
wav_file.close()
If you play the resultant file in an audio player, you'll find that is 40 seconds in duration. If you run the code above it also computes the duration to be 40 seconds. So I believe the number of frames is not influenced by the number of channels and the formula above is correct.
the librosa library can do this: librosa
import librosa
librosa.get_duration(filename='my.wav')
A very simple method is to use soundfile (formerly pysoundfile).
Here's some example code of how to do this:
import soundfile as sf
f = sf.SoundFile('447c040d.wav')
print('samples = {}'.format(f.frames))
print('sample rate = {}'.format(f.samplerate))
print('seconds = {}'.format(f.frames / f.samplerate))
The output for that particular file is:
samples = 232569
sample rate = 16000
seconds = 14.5355625
This aligns with soxi:
Input File : '447c040d.wav'
Channels : 1
Sample Rate : 16000
Precision : 16-bit
Duration : 00:00:14.54 = 232569 samples ~ 1090.17 CDDA sectors
File Size : 465k
Bit Rate : 256k
Sample Encoding: 16-bit Signed Integer PCM
we can use ffmpeg to get the duration of any video or audio files.
To install ffmpeg follow this link
import subprocess
import re
process = subprocess.Popen(['ffmpeg', '-i', path_of_wav_file], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout, stderr = process.communicate()
matches = re.search(r"Duration:\s{1}(?P<hours>\d+?):(?P<minutes>\d+?):(?P<seconds>\d+\.\d+?),", stdout.decode(), re.DOTALL).groupdict()
print(matches['hours'])
print(matches['minutes'])
print(matches['seconds'])
import os
path="c:\\windows\\system32\\loopymusic.wav"
f=open(path,"r")
#read the ByteRate field from file (see the Microsoft RIFF WAVE file format)
#https://ccrma.stanford.edu/courses/422/projects/WaveFormat/
#ByteRate is located at the first 28th byte
f.seek(28)
a=f.read(4)
#convert string a into integer/longint value
#a is little endian, so proper conversion is required
byteRate=0
for i in range(4):
byteRate=byteRate + ord(a[i])*pow(256,i)
#get the file size in bytes
fileSize=os.path.getsize(path)
#the duration of the data, in milliseconds, is given by
ms=((fileSize-44)*1000)/byteRate
print "File duration in miliseconds : " % ms
print "File duration in H,M,S,mS : " % ms/(3600*1000) % "," % ms/(60*1000) % "," % ms/1000 % "," ms%1000
print "Actual sound data (in bytes) : " % fileSize-44
f.close()
Let,T be the duration between 2 consecutive samples. So, we can write t = nT or t = n/Fs.
from scipy.io import wavfile
Fs, data = wavfile.read('filename.wav')
n = data.size
t = n / Fs
I was trying to get the length of different format of an audio file other than '.wav' and I tried a few of the above solution but didn't work for me
This is what worked for me :
from pydub.utils import mediainfo
mediainfo('audiofile')['duration']
To find length of music file, audioread module can be used,
install audioread: pip install audioread
then use this code:
import audioread
with audioread.audio_open(filepath) as f:
totalsec = f.duration
min,sec = divmod(totalsec,60) # divides total time in minute and second
#and store it in min and sec variable respectively
Another solution with pydub:
import pydub
audio_seg = AudioSegment.from_wav('mywav.wav')
total_in_ms = len(audio_seg)
This is short and needs no modules, works with all operating systems:
import os
os.chdir(foo) # Get into the dir with sound
statbuf = os.stat('Sound.wav')
mbytes = statbuf.st_size / 1024
duration = mbytes / 200
Related
I am reading bytes from a recorded audio sample. I would like to convert the bytes from the frames variable into a .wav file which I want to be stored in a variable so I can access it without storing it in a file. The code below just stores the recorded data into a variable called frames.
from playsound import playsound
from random import randrange
import pyttsx3
from datetime import datetime
import pyaudio
import speech_recognition as sr
import requests
import wave
import numpy as np
import sounddevice as sd
import math
import time
import os
import struct
def voiceDetection():
SoundThreshHold = 50
TimeoutLength = 5
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 16000
def rms(data):
count = len(data)/2
format = "%dh"%(count)
shorts = struct.unpack( format, data )
sum_squares = 0.0
for sample in shorts:
n = sample * (1.0/32768)
sum_squares += n*n
return math.sqrt( sum_squares / count)*1000
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=chunk)
currentTime = time.time()
end = time.time() + TimeoutLength
frames = []
while currentTime < end:
currentTime = time.time()
data = stream.read(chunk)
if rms(data) >= SoundThreshHold:
end = time.time() + TimeoutLength
frames.append(data)
stream.stop_stream()
stream.close()
p.terminate()
return frames
print(voiceDetection())
Would appreciate any help. Have a happy new year!
Python has a general mechanism for this BytesIO.
BytesIO allows you to create an in-memory file stream that you can read and write to as if it were a file on the file system.
If you just want to get your data as an array, this question has a solution
In general, when you are working with sound/numerical data in Python, you'll want to find out how to get your data in to a NumPy array in order to process it. Most libraries/tool-kits will work with NumPy arrays.
I want to write a signal in a .wav file, but when I do this using
scipy.io.wavfile.write it just create me a .wav without sound.
The .wav has the good length but there is no sound.
I looked for a solution for this problem but I couldn't find help.
My code below :
import scipy as sp
import numpy as np
dt = np.dtype(np.int32)
sig = np.fromfile(filename, dtype=dt, count=-1, sep='')
sp.io.wavfile.write('sound.wav', int(fS), sig)
As a test, I also did a little function :
def write_wav_sin(name,fs,f):
x = np.linspace(0,10,10*fs)
dt = np.dtype(np.float32)
sig = np.sin(2*math.pi*f*x, dtype=dt)
print(type(sig[0]))
sp.io.wavfile.write(name, fs, sig)
plt.plot(x,sig)
With this test it works, but with my other code it doesn't work
Someone knows why I have this problem ?
Check the range of values in sig by printing sig.min() and sig.max(). The values are not scaled by wavfile.write, so it might be that you have a file with values so low that you can't hear them.
Try scaling up the 32 bit integer values, or writing the data as normalized 32 bit floating point. For example, this converts sig to 32 bit floating point values in the range [-1, 1] before saving it:
m = np.max(np.abs(sig))
sigf32 = (sig/m).astype(np.float32)
sp.io.wavfile.write('sound.wav', int(fS), sigf32)
Finally I divided all my signal to have an amplitude max way more little ( my signal had sometimes an amplitude of 500000, to write it in a Wav I divided it by 250000).
With that trick I can listen to the sound but there is something weird, like additionnal artifacts/noise ( I compared it to a .wav obtained with matlab , with the same file )
the code I used is :
import scipy as sp
import numpy as np
dt = np.dtype(np.int32)
sig = np.fromfile(filename, dtype=dt, count=-1, sep='')
sp.io.wavfile.write('sound.wav', int(fS), sig/250000)
Here's a commented example on how to generate a basic wave file with a set duration, frequency, volume and number of samples. Utilizing NumPy and Python's wave library.
import numpy as ny
import struct
import wave
class SoundFile:
def __init__(self, signal):
# https://docs.python.org/3.6/library/wave.html#wave.open
self.file = wave.open('test.wav', 'wb')
self.signal = signal
self.sr = 44100
def write(self):
# https://docs.python.org/3.6/library/wave.html#wave.Wave_write.setparams
self.file.setparams( ( 1, 2, self.sr, 44100 * 4, 'NONE', 'noncompressed' ) )
# https://docs.python.org/3.6/library/wave.html#wave.Wave_write.writeframes
self.file.writeframes( self.signal )
self.file.close()
# signal settings
duration = 4 # duration in Seconds
samplerate = 44100 # Hz (frequency)
samples = duration * samplerate # aka samples per second
frequency = 440 # Hz
period = samplerate / float( frequency ) # of samples
omega = ny.pi * 2 / period # calculate omega (angular frequency)
volume = 16384 # 16384 is the volume measure (max is 32768)
# create sin wave
xaxis = ny.arange( samples, dtype = ny.float )
ydata = volume * ny.sin( xaxis * omega )
# fill blanks
signal = ny.resize( ydata, ( samples, ) )
#create sound file
f = SoundFile( signal )
f.write()
print( 'sound file created' )
Did my best to comment, update, and modify this source by a random blogger.
I'm just getting started with pyaudio and I wrote a simple function to play a note. However the note sounds different depending on the version of Python I'm using:
from __future__ import division
import math
import pyaudio
BITS_PER_BYTE = 8 # for clarity
SAMPLE_BIT_DEPTH = 8 # i.e. each sample is 1 byte
SAMPLES_PER_SECOND = 16000
NOTE_TIME_SECONDS = 1
MIDDLE_C_HZ = 523.3
CYCLES_PER_SECOND = SAMPLES_PER_SECOND / MIDDLE_C_HZ
NUM_SAMPLES = SAMPLES_PER_SECOND * NOTE_TIME_SECONDS
def play_note():
audio = pyaudio.PyAudio()
stream = audio.open(
format=audio.get_format_from_width(SAMPLE_BIT_DEPTH / BITS_PER_BYTE),
channels=1,
rate=SAMPLES_PER_SECOND,
output=True,
)
byte_string = str()
for i in range(NUM_SAMPLES):
# calculate the amplitude for this frame as a float between -1 and 1
frame_amplitude = math.sin(i / (CYCLES_PER_SECOND / math.pi))
# scale the amplitude to an integer between 0 and 255 (inclusive)
scaled_amplitude = int(frame_amplitude * 127 + 128)
# convert amplitude to byte string (ascii value)
byte_string += chr(scaled_amplitude)
stream.write(byte_string)
stream.close()
audio.terminate()
if __name__ == '__main__':
play_note()
In Python 2.7.13 I hear the correct, clear tone. In 3.6.2 it sounds rough, like a square wave.
Why is that, and how would I fix this (or at least start to debug)?
I am on OSX v10.11.6 using portaudio v19.6.0.
It's because you're using a str when you should be using bytes.
This works for me:
byte_array = bytearray() # bytearray instead of str
for i in range(NUM_SAMPLES):
frame_amplitude = math.sin(i / (CYCLES_PER_SECOND / math.pi))
scaled_amplitude = int(frame_amplitude * 127 + 128)
# Note the append here, not +=
byte_array.append(scaled_amplitude)
stream.write(bytes(byte_array))
"""Play a fixed frequency sound."""
from __future__ import division
import math
from pyaudio import PyAudio
def sine_tone(frequency, duration, volume=1, sample_rate=22050):
n_samples = int(sample_rate * duration)
restframes = n_samples % sample_rate
p = PyAudio()
stream = p.open(format=p.get_format_from_width(1), # 8bit
channels=1, # mono
rate=sample_rate,
output=True)
s = lambda t: volume * math.sin(2 * math.pi * frequency * t / sample_rate)
samples = (int(s(t) * 0x7f + 0x80) for t in range(n_samples))
for buf in zip(*[samples]*sample_rate): # write several samples at a time
stream.write(bytes(bytearray(buf)))
# fill remainder of frameset with silence
stream.write(b'\x80' * restframes)
stream.stop_stream()
stream.close()
p.terminate()
def playScale(scale):
for x in scale:
print(x)
sine_tone(frequency = x,
duration = 1,
volume=.5,
sample_rate = 50000)
The playScale function accepts an array of frequencies and plays them using the sine_tone function. How do I save this series of sounds into .WAV file or a .MP3 file?
you should write all the audio data to one stream, then you can save this stream using the 'wave' library in python which is cappable of manipulating wave files.
However, with your current code i'm not sure how it would work as you are writing seperate streams per sound / tone. Might want to pass a stream into that function so you can append too that stream and save with a different function later wwhen all audio is rendered.
https://docs.python.org/2/library/wave.html
So I am trying to calculate the power spectrum of noise I recorded from the sun from a .wav file it recorded to. So far my code is (NEW CODE FROM OLD POST):
import pyaudio
import sys
import struct
import numpy
from pylab import *
import wave
import pyfits
sundata = ('sun_noise_ouput.wav')
chunk = 1024
FORMAT = pyaudio.paInt16 # 16-bit integers
CHANNELS = 1
RATE = 25000
RECORD_SECONDS = 120
p = pyaudio.PyAudio()
# Convert to pair of bytes to numerical datatype
N = len(sundata)/2
data = numpy.zeros(N,dtype=float)
for i in range(N) :
data[i] = struct.unpack('h',sundata[2*i:2*(i+1)])[0]
column = pyfits.Column(name='integer data', array=data, format="J")
fitsoutput = pyfits.new_table([column])
fitsoutput.writeto('sun_noise_output.fits', clobber=True)
wf = wave.open('sun_noise_output.wav', 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(sundata)
wf.close()
dataft = numpy.fft.fft(data)
powerspectrum = abs(dataft)**2
figure()
plot(range(N),data)
figure()
plot(range(N),powerspectrum)
show()
May also help to note that when I try playing the file it returns no audio and says it has length 0:00 seconds
Also when I downlaod a sample from NASA's homepage there is no playback audio and these are the graphs produced: