How to create multichannel .WAV file in Python? - python

The .WAV format looks like it should allow more than two channels (nChannels).
But unfortunately scipy.io.wavfile only writes 1 or 2.
I don't really want to manually write my own Python WAV-writer, but I can't find anything out there.
Is there any code out there that does the job?

It turns out the documentation for scipy.io.wavfile is incorrect.
Looking at the source code, I can clearly see that it accepts an arbitrary number of channels.
The following code works:
import numpy as np
from scipy.io import wavfile
fs = 48000
nsamps = fs * 10
A, Csharp, E, G = 440.0, 554.365, 660.0, 783.991
def sine(freqHz):
τ = 2 * np.pi
return np.sin(
np.linspace(0, τ * freqHz * nsamps / fs, nsamps, endpoint=False)
)
A7_chord = np.array( [ sine(A), sine(Csharp), sine(E), sine(G) ] ).T
wavfile.write("A7--4channel.wav", fs, A7_chord)

Related

Writing in a specific format to a txt file in Python

I am trying to write theta in a specific format to a .txt file. I present the current and expected output.
import numpy as np
theta = np.pi/3
with open('Contact angle.txt', 'w+') as f:
f.write(f"theta = {str(theta)}\n")
The current output is
theta = 1.0471975511965976
The expected output is
theta = pi/3
NumPy doesn't understand symbolic math, so that's not going to work. What you should probably use instead is SymPy.
>>> import sympy
>>> theta = sympy.pi / 3
>>> theta
pi/3
And if you need to convert it to float, you can do that:
>>> float(theta)
1.0471975511965979
Why not code it like this:
theta = "pi/3"
with open('Contact angle.txt', 'w+') as f:
f.write(f"theta = {theta}\n")
This might be a job that is best suited for SymPy.
If theta will always be pi/<integer>, then you could do something like
import numpy as np
theta = np.pi/3
divisor = int(np.pi/theta)
with open('Contact angle.txt', 'w+') as f:
f.write(f'theta = pi/{divisor}\n")
The code will have to get a lot more fancy if theta is always some fraction of pi: theta = <integer1>pi/<integer2>
you can write theta as a string and use the function eval to get the value of theta like this:
from numpy import pi
theta = "pi/3"
with open('Contact angle.txt', 'w+') as f:
f.write(f"theta = {theta}\n")
the output of eval(theta) will be 1.0471975511965976

How can I reverse a scipy.signal.spectrogram to audio with Python?

I have:
import librosa
from scipy import signal
import scipy.io.wavfile as sf
samples, sample_rate = sf.read(args.file)
nperseg = int(sample_rate * 0.001 * 20)
frequencies, times, spectrogram = signal.spectrogram(samples,
sample_rate,
nperseg=nperseg,
window=signal.hann(nperseg))
audio_signal = librosa.griffinlim(spectrogram)
print(audio_signal, audio_signal.shape)
sf.write('test.wav', audio_signal, sample_rate)
However, this produces a (near) empty sound file.
As #DrSpill mentioned, scipy.io.wav.read and scipy.io.wav.write orders were wrong and also the import from librosa was not correct. This should do it:
import librosa
import numpy as np
import scipy.signal
import scipy.io.wavfile
# read file
file = "temp/processed_file.wav"
fs, sig = scipy.io.wavfile.read(file)
nperseg = int(fs * 0.001 * 20)
# process
frequencies, times, spectrogram = scipy.signal.spectrogram(sig,
fs,
nperseg=nperseg,
window=scipy.signal.hann(nperseg))
audio_signal = librosa.core.spectrum.griffinlim(spectrogram)
print(audio_signal, audio_signal.shape)
# write output
scipy.io.wavfile.write('test.wav', fs, np.array(audio_signal, dtype=np.int16))
Remark:
The resulting file had an accelerated tempo when I heard it, I think this is due to your processing but with some tweaking it should work.
A good alternative, would be to only use librosa, like this:
import librosa
import numpy as np
# read file
file = "temp/processed_file.wav"
sig, fs = librosa.core.load(file, sr=8000)
# process
abs_spectrogram = np.abs(librosa.core.spectrum.stft(sig))
audio_signal = librosa.core.spectrum.griffinlim(abs_spectrogram)
print(audio_signal, audio_signal.shape)
# write output
librosa.output.write_wav('test2.wav', audio_signal, fs)
librosa.output was removed. It is no longer providing its deprecated output module. Instead try soundfile.write:
import numpy as np
import soundfile as sf
sf.write('stereo_file.wav', np.random.randn(10, 2), 44100, 'PCM_24')
#Per your code you could try:
sf.write('test.wav', audio_signal, sample_rate, 'PCM_24')

Not able to recreate same sound using FFT

I am trying to recreate musical note using top 10 frequencies returned by Fourier Transform (FFT). Resulting sound does not match the original sound. Not sure if I am not finding frequencies correctly or not generating sound from it correctly. The goal of this code is to match the original sound.
Here is my code:
import numpy as np
from scipy.io import wavfile
from scipy.fftpack import fft
import matplotlib.pyplot as plt
i_framerate = 44100
fs, data = wavfile.read('./Flute.nonvib.ff.A4.stereo.wav') # load the data
def findFrequencies(arr_data, i_framerate = 44100, i_top_n =5):
a = arr_data.T[0] # this is a two channel soundtrack, I get the first track
# b=[(ele/2**8.)*2-1 for ele in a] # this is 8-bit track, b is now normalized on [-1,1)
y = fft(a) # calculate fourier transform (complex numbers list)
xf = np.linspace(0,int(i_framerate/2.0),int((i_framerate/2.0))+1) /2 # Need to find out this last /2 part
yf = np.abs(y[:int((i_framerate//2.0))+1])
plt.plot(xf,yf)
yf_top_n = np.argsort(yf)[-i_top_n:][::-1]
amp_top_n = yf[yf_top_n] / np.max(yf[yf_top_n])
freq_top_n = xf[yf_top_n]
return freq_top_n, amp_top_n
def createSoundData(a_freq, a_amp, i_framerate=44100, i_time = 1, f_amp = 1000.0):
n_samples = i_time * i_framerate
x = np.linspace(0,i_time, n_samples)
y = np.zeros(n_samples)
for i in range(len(a_freq)):
y += np.sin(2 * np.pi * a_freq[i] * x)* f_amp * a_amp[i]
data2 = np.c_[y,y] # 2 Channel sound
return data2
top_freq , top_freq_amp = findFrequencies(data, i_framerate = 44100 , i_top_n = 200)
print('Frequencies: ',top_freq)
print('Amplitudes : ',top_freq_amp)
soundData = createSoundData(top_freq, top_freq_amp,i_time = 2, f_amp = 50 / len(top_freq))
wavfile.write('createsound_A4_v6.wav',i_framerate,soundData)
The top 10 spectral frequencies in a musical note are not the same as the center frequencies of the top 10 FFT result bin magnitudes. The actual frequency peaks can be between the FFT bins.
Not only can the frequency peak information be between FFT bins, but the phase information required to reproduce any note transients (attack, decay, etc.) can also be between bins. Spectral information that is between FFT bins is carried by a span (up to the full width) of the complex FFT result.

scipy.io.wavfile.write no sound

I want to write a signal in a .wav file, but when I do this using
scipy.io.wavfile.write it just create me a .wav without sound.
The .wav has the good length but there is no sound.
I looked for a solution for this problem but I couldn't find help.
My code below :
import scipy as sp
import numpy as np
dt = np.dtype(np.int32)
sig = np.fromfile(filename, dtype=dt, count=-1, sep='')
sp.io.wavfile.write('sound.wav', int(fS), sig)
As a test, I also did a little function :
def write_wav_sin(name,fs,f):
x = np.linspace(0,10,10*fs)
dt = np.dtype(np.float32)
sig = np.sin(2*math.pi*f*x, dtype=dt)
print(type(sig[0]))
sp.io.wavfile.write(name, fs, sig)
plt.plot(x,sig)
With this test it works, but with my other code it doesn't work
Someone knows why I have this problem ?
Check the range of values in sig by printing sig.min() and sig.max(). The values are not scaled by wavfile.write, so it might be that you have a file with values so low that you can't hear them.
Try scaling up the 32 bit integer values, or writing the data as normalized 32 bit floating point. For example, this converts sig to 32 bit floating point values in the range [-1, 1] before saving it:
m = np.max(np.abs(sig))
sigf32 = (sig/m).astype(np.float32)
sp.io.wavfile.write('sound.wav', int(fS), sigf32)
Finally I divided all my signal to have an amplitude max way more little ( my signal had sometimes an amplitude of 500000, to write it in a Wav I divided it by 250000).
With that trick I can listen to the sound but there is something weird, like additionnal artifacts/noise ( I compared it to a .wav obtained with matlab , with the same file )
the code I used is :
import scipy as sp
import numpy as np
dt = np.dtype(np.int32)
sig = np.fromfile(filename, dtype=dt, count=-1, sep='')
sp.io.wavfile.write('sound.wav', int(fS), sig/250000)
Here's a commented example on how to generate a basic wave file with a set duration, frequency, volume and number of samples. Utilizing NumPy and Python's wave library.
import numpy as ny
import struct
import wave
class SoundFile:
def __init__(self, signal):
# https://docs.python.org/3.6/library/wave.html#wave.open
self.file = wave.open('test.wav', 'wb')
self.signal = signal
self.sr = 44100
def write(self):
# https://docs.python.org/3.6/library/wave.html#wave.Wave_write.setparams
self.file.setparams( ( 1, 2, self.sr, 44100 * 4, 'NONE', 'noncompressed' ) )
# https://docs.python.org/3.6/library/wave.html#wave.Wave_write.writeframes
self.file.writeframes( self.signal )
self.file.close()
# signal settings
duration = 4 # duration in Seconds
samplerate = 44100 # Hz (frequency)
samples = duration * samplerate # aka samples per second
frequency = 440 # Hz
period = samplerate / float( frequency ) # of samples
omega = ny.pi * 2 / period # calculate omega (angular frequency)
volume = 16384 # 16384 is the volume measure (max is 32768)
# create sin wave
xaxis = ny.arange( samples, dtype = ny.float )
ydata = volume * ny.sin( xaxis * omega )
# fill blanks
signal = ny.resize( ydata, ( samples, ) )
#create sound file
f = SoundFile( signal )
f.write()
print( 'sound file created' )
Did my best to comment, update, and modify this source by a random blogger.

Writing WAV file using Python, Numpy array and WAVE module

I am trying to implement the Karplus-Strong algorithm.
All is looking fine when I play (through Jupyter Notebook using Audio(y, rate=Fs)) the collected numpy array (representing guitar accord).
Unfortunately, writing the numpy array: y, into wav file using WAVE module is incorrect (using the next python code):
noise_output = wave.open('k-s.wav', 'w')
noise_output.setparams((1, 4, Fs, 0, 'NONE', 'not compressed'))
for i in range(0, len(y)):
value = y[i]
packed_value = struct.pack('f', value)
noise_output.writeframes(packed_value)
noise_output.close()
Each element of y is
<type 'numpy.float64'>
How should I amend the writing loop in order write the WAV file correctly?
Some more information about the issue. Before writing to WAV, the first elements of the y array are:
[ 0.33659756 0.33659756 -0.43915295 -0.87036152 1.40708988 0.32123558
-0.6889402 1.9739982 -1.29587159 -0.12299964 2.18381762 0.82228042
0.24593503 -1.28067426 -0.67568838 -0.01843234 -1.830472 1.2729578
-0.56575346 0.55410736]
After writing the elements to the WAV file, close the WAV file and read it again, I got this for the first 20 elements of the collected array:
[ 1051481732 1051481732 -1092560728 -1084305405 1068768133 1050966269
-1087349149 1073523705 -1079648481 -1107564740 1074512811 1062371576
1048303204 -1079775966 -1087571478 -1130954901 -1075163928 1067642952
-1089415880 1057872379]
Here are code samples to write a (stereo) wave file using the wave standard library.
I included two examples: one using numpy, and one that doesn't require any dependencies.
Using a numpy array
Note that if your data is in a numpy array, no need for the struct library.
import wave
import numpy as np
samplerate = 44100
# A note on the left channel for 1 second.
t = np.linspace(0, 1, samplerate)
left_channel = 0.5 * np.sin(2 * np.pi * 440.0 * t)
# Noise on the right channel.
right_channel = np.random.random(size=samplerate)
# Put the channels together with shape (2, 44100).
audio = np.array([left_channel, right_channel]).T
# Convert to (little-endian) 16 bit integers.
audio = (audio * (2 ** 15 - 1)).astype("<h")
with wave.open("sound1.wav", "w") as f:
# 2 Channels.
f.setnchannels(2)
# 2 bytes per sample.
f.setsampwidth(2)
f.setframerate(samplerate)
f.writeframes(audio.tobytes())
Using a list
This is (almost) the same code but without using numpy. No external dependencies are required.
import math
import random
import struct
import wave
samplerate = 44100
left_channel = [
0.5 * math.sin(2 * math.pi * 440.0 * i / samplerate) for i in range(samplerate)
]
right_channel = [random.random() for _ in range(samplerate)]
with wave.open("sound2.wav", "w") as f:
f.setnchannels(2)
f.setsampwidth(2)
f.setframerate(samplerate)
for samples in zip(left_channel, right_channel):
for sample in samples:
sample = int(sample * (2 ** 15 - 1))
f.writeframes(struct.pack("<h", sample))
import scipy.io.wavfile
scipy.io.wavfile.write("karplus.wav", Fs, y)
Tada! AFAIK works with float64 and float32, and probably others. For stereo, shape must be (nb_samples, 2). See scipy.io.wavfile.write.
Read and write wave file to and from a file:
from scipy.io import wavfile
sampling_rate, data = wavfile.read(wpath)
wavfile.write('abc1.wav', sampling_rate, data)

Categories

Resources