So I am trying to calculate the power spectrum of noise I recorded from the sun from a .wav file it recorded to. So far my code is (NEW CODE FROM OLD POST):
import pyaudio
import sys
import struct
import numpy
from pylab import *
import wave
import pyfits
sundata = ('sun_noise_ouput.wav')
chunk = 1024
FORMAT = pyaudio.paInt16 # 16-bit integers
CHANNELS = 1
RATE = 25000
RECORD_SECONDS = 120
p = pyaudio.PyAudio()
# Convert to pair of bytes to numerical datatype
N = len(sundata)/2
data = numpy.zeros(N,dtype=float)
for i in range(N) :
data[i] = struct.unpack('h',sundata[2*i:2*(i+1)])[0]
column = pyfits.Column(name='integer data', array=data, format="J")
fitsoutput = pyfits.new_table([column])
fitsoutput.writeto('sun_noise_output.fits', clobber=True)
wf = wave.open('sun_noise_output.wav', 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(sundata)
wf.close()
dataft = numpy.fft.fft(data)
powerspectrum = abs(dataft)**2
figure()
plot(range(N),data)
figure()
plot(range(N),powerspectrum)
show()
May also help to note that when I try playing the file it returns no audio and says it has length 0:00 seconds
Also when I downlaod a sample from NASA's homepage there is no playback audio and these are the graphs produced:
Related
I am reading bytes from a recorded audio sample. I would like to convert the bytes from the frames variable into a .wav file which I want to be stored in a variable so I can access it without storing it in a file. The code below just stores the recorded data into a variable called frames.
from playsound import playsound
from random import randrange
import pyttsx3
from datetime import datetime
import pyaudio
import speech_recognition as sr
import requests
import wave
import numpy as np
import sounddevice as sd
import math
import time
import os
import struct
def voiceDetection():
SoundThreshHold = 50
TimeoutLength = 5
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 16000
def rms(data):
count = len(data)/2
format = "%dh"%(count)
shorts = struct.unpack( format, data )
sum_squares = 0.0
for sample in shorts:
n = sample * (1.0/32768)
sum_squares += n*n
return math.sqrt( sum_squares / count)*1000
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=chunk)
currentTime = time.time()
end = time.time() + TimeoutLength
frames = []
while currentTime < end:
currentTime = time.time()
data = stream.read(chunk)
if rms(data) >= SoundThreshHold:
end = time.time() + TimeoutLength
frames.append(data)
stream.stop_stream()
stream.close()
p.terminate()
return frames
print(voiceDetection())
Would appreciate any help. Have a happy new year!
Python has a general mechanism for this BytesIO.
BytesIO allows you to create an in-memory file stream that you can read and write to as if it were a file on the file system.
If you just want to get your data as an array, this question has a solution
In general, when you are working with sound/numerical data in Python, you'll want to find out how to get your data in to a NumPy array in order to process it. Most libraries/tool-kits will work with NumPy arrays.
I want to get the frequency using pyaudio and plot it in a diagram via matplotlib. Therefore I used pyaudio to get the data from my audio input, which works fine but I've no idea how to get the frequency out of a raw signal. I found this piece of code, which should do the job, but I don't know how to apply it to my code.
Here i set up the microphon and prepare for recording:
# constants
CHUNK = 1024 * 2 # samples per frame
FORMAT = pyaudio.paInt16 # audio format (bytes per sample?)
CHANNELS = 1 # single channel for microphone
RATE = 44100 # samples per second
# pyaudio class instance
mic = pyaudio.PyAudio()
# stream object to get data from microphone
stream = mic.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK
)
This is the part of my code where I get the data from my mic:
data = stream.read(CHUNK)
# convert data to integers, make np array, then offset it by 127
data_int = struct.unpack(str(2 * CHUNK) + 'B', data)
# create np array and offset by 128
data_np = np.array(data_int, dtype='b')[::2]
data_np = [i+127 for i in data_np]
I just put this in a while-loop and plotted it in a life-plot.
Here's the full code:
import pyaudio #for capturing the audio-signal
import struct #for converting the binary-data from the signal to integer
import matplotlib.pyplot as plt #for displaying the audio-signal
import numpy as np
#functions
def plot_setup():
# create matplotlib figure and axes
fig=plt.figure()
ax=fig.add_subplot(111)
# variable for plotting
x = np.arange(0, 2 * CHUNK, 2)
# create a line object with random data
line, = ax.plot(x, [128 for i in range(2048)], '-')
# basic formatting for the axes
ax.set_title('AUDIO WAVEFORM')
ax.set_xlabel('samples')
ax.set_ylabel('volume')
ax.set_ylim(0, 255)
ax.set_xlim(0, 2 * CHUNK)
plt.xticks([0, CHUNK, 2 * CHUNK])
plt.yticks([0, 128, 255])
# show the plot
plt.show(block=False)
return fig, line
def measure():
# binary data
data = stream.read(CHUNK)
# convert data to integers, make np array, then offset it by 127
data_int = struct.unpack(str(2 * CHUNK) + 'B', data)
# create np array and offset by 128
data_np = np.array(data_int, dtype='b')[::2]
data_np = [i+127 for i in data_np]
line.set_ydata(data_np)
try:
fig.canvas.draw()
fig.canvas.flush_events()
except:
return 0
# constants
CHUNK = 1024 * 2 # samples per frame
FORMAT = pyaudio.paInt16 # audio format (bytes per sample?)
CHANNELS = 1 # single channel for microphone
RATE = 44100 # samples per second
# pyaudio class instance
mic = pyaudio.PyAudio()
# stream object to get data from microphone
stream = mic.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK
)
if __name__=="__main__":
fig, line=plot_setup()
while True:
m=measure()
if m==0:
break
And this is the output I get:
The final diagram should look exactly the same, except that I want the frequency to be on the y-axis.
I am reading bytes from wav audio downloaded from a URL. I would like to "reconstruct" these bytes into a .wav file. I have attempted the code below, but the resulting file is pretty much static. For example, when I download audio of myself speaking, the .wav file produced is static only, but I can hear slight alterations/distortions when I know the audio should be playing my voice. What am I doing wrong?
from pprint import pprint
import scipy.io.wavfile
import numpy
#download a wav audio recording from a url
>>>response = client.get_recording(r"someurl.com")
>>>pprint(response)
(b'RIFFv\xfc\x03\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\x80>\x00\x00'
...
b'\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
...
b'\xea\xff\xfd\xff\x10\x00\x0c\x00\xf0\xff\x06\x00\x10\x00\x06\x00'
...)
>>>a=bytearray(response)
>>>pprint(a)
bytearray(b'RIFFv\xfc\x03\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00'
b'\x80>\x00\x00\x00}\x00\x00\x02\x00\x10\x00LISTJ\x00\x00\x00INFOINAM'
b'0\x00\x00\x00Conference d95ac842-08b7-4380-83ec-85ac6428cc41\x00'
b'IART\x06\x00\x00\x00Nexmo\x00data\x00\xfc\x03\x00\xff\xff'
b'\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
...
b'\x12\x00\xf6\xff\t\x00\xed\xff\xf6\xff\xfc\xff\xea\xff\xfd\xff'
...)
>>>b = numpy.array(a, dtype=numpy.int16)
>>>pprint(b)
array([ 82, 73, 70, ..., 255, 248, 255], dtype=int16)
>>>scipy.io.wavfile.write(r"C:\Users\somefolder\newwavfile.wav",
16000, b)
You can simply write the data in response to a file:
with open('myfile.wav', mode='bx') as f:
f.write(response)
If you want to access the audio data as a NumPy array without writing it to a file first, you can do this with the soundfile module like this:
import io
import soundfile as sf
data, samplerate = sf.read(io.BytesIO(response))
See also this example: https://pysoundfile.readthedocs.io/en/0.9.0/#virtual-io
AudioSegment.from_raw() also will work while you have a continues stream of bytes:
import io
from pydub import AudioSegment
current_data is defined as the stream of bytes that you receive
s = io.BytesIO(current_data)
audio = AudioSegment.from_raw(s, sample_width, frame_rate, channels).export(filename, format='wav')
To add wave file header to raw audio bytes (extracted from wave library):
import struct
def write_header(_bytes, _nchannels, _sampwidth, _framerate):
WAVE_FORMAT_PCM = 0x0001
initlength = len(_bytes)
bytes_to_add = b'RIFF'
_nframes = initlength // (_nchannels * _sampwidth)
_datalength = _nframes * _nchannels * _sampwidth
bytes_to_add += struct.pack('<L4s4sLHHLLHH4s',
36 + _datalength, b'WAVE', b'fmt ', 16,
WAVE_FORMAT_PCM, _nchannels, _framerate,
_nchannels * _framerate * _sampwidth,
_nchannels * _sampwidth,
_sampwidth * 8, b'data')
bytes_to_add += struct.pack('<L', _datalength)
return bytes_to_add + _bytes
I faced the same problem while streaming and I used the answers above to write a complete function.
In my case, the byte array was coming from streaming an audio file (the frontend) and the backend needs to process it as a ndarray.
This function simulates how the front-ends sends the audio file as chunks that are accumulated into a byte array:
audio_file_path = 'offline_input/zoom283.wav'
chunk = 1024
wf = wave.open(audio_file_path, 'rb')
audio_input = b''
d = wf.readframes(chunk)
while len(d) > 0:
d = wf.readframes(chunk)
audio_input = audio_input + d
some import libraries:
import io
import wave
import numpy as np
import scipy.io.wavfile
import soundfile as sf
from scipy.io.wavfile import write
Finally, the backend will take a byte array and convert it to ndarray:
def convert_bytearray_to_wav_ndarray(input_bytearray: bytes, sampling_rate=16000):
bytes_wav = bytes()
byte_io = io.BytesIO(bytes_wav)
write(byte_io, sampling_rate, np.frombuffer(input_bytearray, dtype=np.int16))
output_wav = byte_io.read()
output, samplerate = sf.read(io.BytesIO(output_wav))
return output
output = convert_bytearray_to_wav_ndarray(input_bytearray=audio_input)
The output represents the audio file to be processed by the backend:
To check that the file has been received correctly, we write it to the desk:
scipy.io.wavfile.write("output1.wav", 16000, output)
"""Play a fixed frequency sound."""
from __future__ import division
import math
from pyaudio import PyAudio
def sine_tone(frequency, duration, volume=1, sample_rate=22050):
n_samples = int(sample_rate * duration)
restframes = n_samples % sample_rate
p = PyAudio()
stream = p.open(format=p.get_format_from_width(1), # 8bit
channels=1, # mono
rate=sample_rate,
output=True)
s = lambda t: volume * math.sin(2 * math.pi * frequency * t / sample_rate)
samples = (int(s(t) * 0x7f + 0x80) for t in range(n_samples))
for buf in zip(*[samples]*sample_rate): # write several samples at a time
stream.write(bytes(bytearray(buf)))
# fill remainder of frameset with silence
stream.write(b'\x80' * restframes)
stream.stop_stream()
stream.close()
p.terminate()
def playScale(scale):
for x in scale:
print(x)
sine_tone(frequency = x,
duration = 1,
volume=.5,
sample_rate = 50000)
The playScale function accepts an array of frequencies and plays them using the sine_tone function. How do I save this series of sounds into .WAV file or a .MP3 file?
you should write all the audio data to one stream, then you can save this stream using the 'wave' library in python which is cappable of manipulating wave files.
However, with your current code i'm not sure how it would work as you are writing seperate streams per sound / tone. Might want to pass a stream into that function so you can append too that stream and save with a different function later wwhen all audio is rendered.
https://docs.python.org/2/library/wave.html
I'm looking for a way to find out the duration of a audio file (.wav) in python. So far i had a look at python wave library, mutagen, pymedia, pymad i was not able to get the duration of the wav file. Pymad gave me the duration but its not consistent.
The duration is equal to the number of frames divided by the framerate (frames per second):
import wave
import contextlib
fname = '/tmp/test.wav'
with contextlib.closing(wave.open(fname,'r')) as f:
frames = f.getnframes()
rate = f.getframerate()
duration = frames / float(rate)
print(duration)
Regarding #edwards' comment, here is some code to produce a 2-channel wave file:
import math
import wave
import struct
FILENAME = "/tmp/test.wav"
freq = 440.0
data_size = 40000
frate = 1000.0
amp = 64000.0
nchannels = 2
sampwidth = 2
framerate = int(frate)
nframes = data_size
comptype = "NONE"
compname = "not compressed"
data = [(math.sin(2 * math.pi * freq * (x / frate)),
math.cos(2 * math.pi * freq * (x / frate))) for x in range(data_size)]
try:
wav_file = wave.open(FILENAME, 'w')
wav_file.setparams(
(nchannels, sampwidth, framerate, nframes, comptype, compname))
for values in data:
for v in values:
wav_file.writeframes(struct.pack('h', int(v * amp / 2)))
finally:
wav_file.close()
If you play the resultant file in an audio player, you'll find that is 40 seconds in duration. If you run the code above it also computes the duration to be 40 seconds. So I believe the number of frames is not influenced by the number of channels and the formula above is correct.
the librosa library can do this: librosa
import librosa
librosa.get_duration(filename='my.wav')
A very simple method is to use soundfile (formerly pysoundfile).
Here's some example code of how to do this:
import soundfile as sf
f = sf.SoundFile('447c040d.wav')
print('samples = {}'.format(f.frames))
print('sample rate = {}'.format(f.samplerate))
print('seconds = {}'.format(f.frames / f.samplerate))
The output for that particular file is:
samples = 232569
sample rate = 16000
seconds = 14.5355625
This aligns with soxi:
Input File : '447c040d.wav'
Channels : 1
Sample Rate : 16000
Precision : 16-bit
Duration : 00:00:14.54 = 232569 samples ~ 1090.17 CDDA sectors
File Size : 465k
Bit Rate : 256k
Sample Encoding: 16-bit Signed Integer PCM
we can use ffmpeg to get the duration of any video or audio files.
To install ffmpeg follow this link
import subprocess
import re
process = subprocess.Popen(['ffmpeg', '-i', path_of_wav_file], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout, stderr = process.communicate()
matches = re.search(r"Duration:\s{1}(?P<hours>\d+?):(?P<minutes>\d+?):(?P<seconds>\d+\.\d+?),", stdout.decode(), re.DOTALL).groupdict()
print(matches['hours'])
print(matches['minutes'])
print(matches['seconds'])
import os
path="c:\\windows\\system32\\loopymusic.wav"
f=open(path,"r")
#read the ByteRate field from file (see the Microsoft RIFF WAVE file format)
#https://ccrma.stanford.edu/courses/422/projects/WaveFormat/
#ByteRate is located at the first 28th byte
f.seek(28)
a=f.read(4)
#convert string a into integer/longint value
#a is little endian, so proper conversion is required
byteRate=0
for i in range(4):
byteRate=byteRate + ord(a[i])*pow(256,i)
#get the file size in bytes
fileSize=os.path.getsize(path)
#the duration of the data, in milliseconds, is given by
ms=((fileSize-44)*1000)/byteRate
print "File duration in miliseconds : " % ms
print "File duration in H,M,S,mS : " % ms/(3600*1000) % "," % ms/(60*1000) % "," % ms/1000 % "," ms%1000
print "Actual sound data (in bytes) : " % fileSize-44
f.close()
Let,T be the duration between 2 consecutive samples. So, we can write t = nT or t = n/Fs.
from scipy.io import wavfile
Fs, data = wavfile.read('filename.wav')
n = data.size
t = n / Fs
I was trying to get the length of different format of an audio file other than '.wav' and I tried a few of the above solution but didn't work for me
This is what worked for me :
from pydub.utils import mediainfo
mediainfo('audiofile')['duration']
To find length of music file, audioread module can be used,
install audioread: pip install audioread
then use this code:
import audioread
with audioread.audio_open(filepath) as f:
totalsec = f.duration
min,sec = divmod(totalsec,60) # divides total time in minute and second
#and store it in min and sec variable respectively
Another solution with pydub:
import pydub
audio_seg = AudioSegment.from_wav('mywav.wav')
total_in_ms = len(audio_seg)
This is short and needs no modules, works with all operating systems:
import os
os.chdir(foo) # Get into the dir with sound
statbuf = os.stat('Sound.wav')
mbytes = statbuf.st_size / 1024
duration = mbytes / 200