I have been looking into a way to play sounds from a list of samples, and I found some modules that can do this.
I am using AudioLazy module to play the sound using the following script:
from audiolazy import AudioIO
sound = Somelist
with AudioIO(True) as player:
player.play(sound, rate=44100)
The problem with this code is that it stop the whole application till the sound stop playing and I can't play multiple sound at the same time.
My program is interactive so what I want is to be able to play multiple sound at the same time,So for instance I can run this script which will play a 5 second sound then at the second 2 I can play a 5 second sound again.
And I don't want the whole program to stop till the sound finish playing.
Here is a simpler solution using pydub.
Using overlay function of AudioSegment module, you can very easily superimpose multiple audio on to each other.
Here is a working code to combine three audio files. Using same concept you can combine multiple audio onto each other.
More on overlay function here
pydub supports multiple audio formats as well.
from pydub import AudioSegment
from pydub.playback import play
audio1 = AudioSegment.from_file("chunk1.wav") #your first audio file
audio2 = AudioSegment.from_file("chunk2.wav") #your second audio file
audio3 = AudioSegment.from_file("chunk3.wav") #your third audio file
mixed = audio1.overlay(audio2) #combine , superimpose audio files
mixed1 = mixed.overlay(audio3) #Further combine , superimpose audio files
#If you need to save mixed file
mixed1.export("mixed.wav", format='wav') #export mixed audio file
play(mixed1) #play mixed audio file
Here are updates as per our discussions.
First we create 44KHz signal and save to sound.wav
Next Read wave file and save signal to text file
Then create three variations of input signal to test overlay.
Original signal has dtype int16
Then we create three audio segments
then mix/overlay as above.
wav signal data is stored in test.txt
Working Modified Code
import numpy as np
from scipy.io.wavfile import read
from pydub import AudioSegment
from pydub.playback import play
import wave, struct, math
#Create 44KHz signal and save to 'sound.wav'
sampleRate = 44100.0 # hertz
duration = 1.0 # seconds
frequency = 440.0 # hertz
wavef = wave.open('sound.wav','w')
wavef.setnchannels(1) # mono
wavef.setsampwidth(2)
wavef.setframerate(sampleRate)
for i in range(int(duration * sampleRate)):
value = int(32767.0*math.cos(frequency*math.pi*float(i)/float(sampleRate)))
data = struct.pack('<h', value)
wavef.writeframesraw( data )
wavef.writeframes('')
wavef.close()
#Read wave file and save signal to text file
rate, signal = read("sound.wav")
np.savetxt('test.txt', signal, delimiter=',') # X is an array
#load wav data from text file
wavedata1 = np.loadtxt("test.txt", comments="#", delimiter=",", unpack=False, dtype=np.int16)
#Create variation of signal
wavedata2 = np.loadtxt("test.txt", comments="#", delimiter=",", unpack=False, dtype=np.int32)
#Create variation of signal
wavedata3 = np.loadtxt("test.txt", comments="#", delimiter=",", unpack=False, dtype=np.float16)
#create first audio segment
audio_segment1 = AudioSegment(
wavedata1.tobytes(),
frame_rate=rate,
sample_width=2,
channels=1
)
#create second audio segment
audio_segment2 = AudioSegment(
wavedata2.tobytes(),
frame_rate=rate,
sample_width=2,
channels=1
)
#create third audio segment
audio_segment3 = AudioSegment(
wavedata3.tobytes(),
frame_rate=rate,
sample_width=2,
channels=1
)
# Play audio (requires ffplay, or pyaudio):
play(audio_segment1)
play(audio_segment2)
play(audio_segment3)
#Mix three audio segments
mixed1 = audio_segment1.overlay(audio_segment2) #combine , superimpose audio files
mixed2 = mixed1.overlay(audio_segment3) #Further combine , superimpose audio files
#If you need to save mixed file
mixed2.export("mixed.wav", format='wav') #export mixed audio file
play(mixed2) #play mixed audio file
Using multiple threads will solve your problem :
import threading
from audiolazy import AudioIO
sound = Somelist
with AudioIO(True) as player:
t = threading.Thread(target=player.play, args=(sound,), kwargs={'rate':44100})
t.start()
I suggest using Pyaudio to do this.
import pyaudio
import wave
sound1 = wave.open("/path/to/sound1", 'rb')
sound2 = wave.open("/path/to/sound2", 'rb')
def callback(in_data, frame_count, time_info, status):
data1 = sound1.readframes(frame_count)
data2 = sound2.readframes(frame_count)
decodeddata1 = numpy.fromstring(data1, numpy.int16)
decodeddata2 = numpy.fromstring(data2, numpy.int16)
newdata = (decodeddata1 * 0.5 + decodeddata2* 0.5).astype(numpy.int16)
return (newdata.tostring(), pyaudio.paContinue)
Related
I am trying to record audio from the microphone with Python. And I have following code:
import pyaudio
import wave
import threading
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
CHUNK = 1024
WAVE_OUTPUT_FILENAME = "file.wav"
stop_ = False
audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
def stop():
global stop_
while True:
if not input('Press Enter >>>'):
print('exit')
stop_ = True
t = threading.Thread(target=stop, daemon=True).start()
frames = []
while True:
data = stream.read(CHUNK)
frames.append(data)
if stop_:
break
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()
My code works fine, but when I play my recording, I don't hear any sound in my final output file (file.wav).
Why do problems occur here and how do I fix them?
Your code is working fine. The problem you are facing is due to the admin rights. The audio file has constant 0 data, therefore, you can't listen to sound in the generated wav file. I suppose your microphone device is installed and working properly. If you are not sure about the audio installation status, then as per operating system do these steps:
MAC OS:
System Preferences->Sound->Input and there you can visualize the bars as make some sound. Make sure the selected device type is Built-in.
Windos OS:
Sound settings and test Microphone by click listen to this device, you may later uncheck it because it will loop back your voice to speakers and will create big noises.
Most probably you are using Mac OS. I had the similar issue, because I was using Atom editor to run the python code. Try to run your code from the terminal of Mac OS (or Power Shell if you are using windows), (in case a popup appears for access to microphone on Mac OS, press Ok). Thats it! your code will record fine. As a tester, please run the code below to check if you can visualize the sound, and make sure to run it through Terminal (No editors or IDEs).
import queue
import sys
from matplotlib.animation import FuncAnimation
import matplotlib.pyplot as plt
import numpy as np
import sounddevice as sd
# Lets define audio variables
# We will use the default PC or Laptop mic to input the sound
device = 0 # id of the audio device by default
window = 1000 # window for the data
downsample = 1 # how much samples to drop
channels = [1] # a list of audio channels
interval = 30 # this is update interval in miliseconds for plot
# lets make a queue
q = queue.Queue()
# Please note that this sd.query_devices has an s in the end.
device_info = sd.query_devices(device, 'input')
samplerate = device_info['default_samplerate']
length = int(window*samplerate/(1000*downsample))
# lets print it
print("Sample Rate: ", samplerate)
# Typical sample rate is 44100 so lets see.
# Ok so lets move forward
# Now we require a variable to hold the samples
plotdata = np.zeros((length,len(channels)))
# Lets look at the shape of this plotdata
print("plotdata shape: ", plotdata.shape)
# So its vector of length 44100
# Or we can also say that its a matrix of rows 44100 and cols 1
# next is to make fig and axis of matplotlib plt
fig,ax = plt.subplots(figsize=(8,4))
# lets set the title
ax.set_title("PyShine")
# Make a matplotlib.lines.Line2D plot item of color green
# R,G,B = 0,1,0.29
lines = ax.plot(plotdata,color = (0,1,0.29))
# We will use an audio call back function to put the data in queue
def audio_callback(indata,frames,time,status):
q.put(indata[::downsample,[0]])
# now we will use an another function
# It will take frame of audio samples from the queue and update
# to the lines
def update_plot(frame):
global plotdata
while True:
try:
data = q.get_nowait()
except queue.Empty:
break
shift = len(data)
plotdata = np.roll(plotdata, -shift,axis = 0)
# Elements that roll beyond the last position are
# re-introduced
plotdata[-shift:,:] = data
for column, line in enumerate(lines):
line.set_ydata(plotdata[:,column])
return lines
ax.set_facecolor((0,0,0))
# Lets add the grid
ax.set_yticks([0])
ax.yaxis.grid(True)
""" INPUT FROM MIC """
stream = sd.InputStream( device = device, channels = max(channels), samplerate = samplerate, callback = audio_callback)
""" OUTPUT """
ani = FuncAnimation(fig,update_plot, interval=interval,blit=True)
with stream:
plt.show()
Save this file as voice.py to a folder (let say AUDIO). Then cd to AUDIO folder from the terminal command and then execute it using:
python3 voice.py
or
python voice.py
depending on your python env name.
By using print(sd.query_devices()), I see a list of devices as below:
Microsoft Sound Mapper - Input, MME (2 in, 0 out)
Microphone (AudioHubNano2D_V1.5, MME (2 in, 0 out)
Internal Microphone (Conexant S, MME (2 in, 0 out)
...
However, if i use device = 0, I can still receive sound from the USB-microphone, which is device number 1. Is it by default, all the audio signal goes to the Sound Mapper? That means if I use device = 0, I will get all audio signal from all audio inputs; and if I just want audio input from one particular device, I need to choose its number x as device = x.
I have another question: is it possible to capture audio signal from device 1 and 2 in one application but in separate manner?
I'm trying to extract frequency from .wav files. So I'm using python wave and numpy, I'm almost done! But I face an error.. I followed this url's answer : Extracting frequencies from a wav file python
when I exract frequency from .wav file that created myself by following that answer, it succeed. However, when I exract frequency from .wav file that recorded by mic. it raised an error :
struct.error: unpack requires a buffer of 288768 bytes
following is my code
import wave
import struct
import numpy as np
if __name__ == '__main__':
wf = wave.open('test6.wav', 'rb')
frame = wf.getnframes()
data_size = wf.getnframes()
frate = wf.getframerate()
data = wf.readframes(data_size)
wf.close()
duration = frame / float(frate)
data = struct.unpack('{n}h'.format(n=data_size), data)
data = np.array(data)
w = np.fft.fft(data)
freqs = np.fft.fftfreq(len(w))
print(freqs.min(), freqs.max())
# (-0.5, 0.499975)
# Find the peak in the coefficients
idx = np.argmax(np.abs(w))
freq = freqs[idx]
freq_in_hertz = abs(freq * frate)
print('freqiency: ',freq_in_hertz)
print('duration: ',duration)
288768 in error message is exactly double of data_size.
So when I use data_size=wf.getnframes()*2, it does not raise error. But, it raise an error with file that created by code.
How can I solve this?
Given that the size of the buffer is exactly double data_size, I would guess that the .wav file you recorded with your mic has two channels instead of one. You can verify this by looking at the output of wf.getnchannels(). It should be 2 for your mic recording.
If this is the case, you can load just one channel of your mic recording by following this answer:
Read the data of a single channel from a stereo wave file in Python
I'm trying to convert my microphone audio into mp3 files and later save them on disk so I can record and save audio tracks.
But to play it back wit pyaudio I need to convert it into wav format.
It's originally recorded in wav and I'm trying to do wav -> mp3 -> wav.
And I've put together the smallest possible debug version of my code, and it looks like the following:
import pyaudio
from array import array
from struct import pack
from sys import byteorder
from io import BytesIO
from pydub import AudioSegment
p = pyaudio.PyAudio()
stream_mic = p.open(rate=11000,
format=pyaudio.paInt16,
channels=1,
input=True,
frames_per_buffer=500)
stream_out = p.open(rate=11000,
format=pyaudio.paInt16,
channels=1,
output=True,
frames_per_buffer=500)
def is_odd(a):
return bool(a - ((a>>1)<<1))
def wav_obj(raw_data):
wavHandle = AudioSegment(data=raw_data, sample_width=2, frame_rate=11000, channels=1)
return wavHandle
def wavToMp3(audioFrame):
mp3 = BytesIO()
file_handle = audioFrame.export(mp3, format="mp3")
mp3.seek(0)
data = mp3.read()
## == Data needs to be multiple of (sample_width * channels)
## Easiest way is to strip of a trailing data,
while is_odd(len(data)):
data = data[:-1]
return AudioSegment(data=data, sample_width=2, frame_rate=11000, channels=1)
def mp3ToWav(audioFrame):
#remasteredAudioFrame = audioFrame.set_frame_rate(11000)
wav = BytesIO()
file_handle = audioFrame.export(wav, format="wav")
wav.seek(0)
return AudioSegment(data=wav.read(), sample_width=2, frame_rate=11000, channels=1)
while 1:
snd_data = array('h', stream_mic.read(500))
if byteorder == 'big':
snd_data.byteswap()
frame = array('h')
frame.extend(snd_data)
wav = wav_obj(frame)
## == convert from .wav -> .mp3 -> .wav
## just to see the loss of audio.
mp3 = wavToMp3(wav)
wav = mp3ToWav(mp3)
stream_out.write(wav.raw_data)
stream_out.stop_stream()
stream_mic.stop_stream()
stream_out.close()
stream_mic.close()
p.terminate()
This creates a horrible static, it's a 100% loss of audio some how.
At first I thought it was the clipping of data[:-1] but that at least to me doesn't appear to be the problem.
secondly I thought the frame rate might be off, so I tried doing set_frame_rate(11000) but that didn't work either.
commenting out these two lines:
mp3 = wavToMp3(wav)
wav = mp3ToWav(mp3)
Makes the playback sound "beautiful". There's no data loss and the quality is just as I've defined it to be.
I can't for the love of me figure out where I'm mixing up the data compression.
Note: I want to do this in-memory if possible because I'll later on work with the data in order to try and create effects etc.
Progress
As #Anthon pointed out, I should isolate the conversions one by one and see which of the two fail. And I did this by saving the wav -> mp3 to disk.
started = time()
sound = AudioSegment(data=b'', sample_width=2, frame_rate=11000, channels=1)
while 1:
snd_data = array('h', stream_mic.read(500))
if byteorder == 'big':
snd_data.byteswap()
frame = array('h')
frame.extend(snd_data)
wav = wav_obj(frame)
## == convert from .wav -> .mp3 -> .wav
## just to see the loss of audio.
mp3 = wavToMp3(wav)
sound = sound + mp3
#wav = mp3ToWav(mp3)
#stream_out.write(mp3.raw_data)
if time() - started > 1.5:
break
print(sound.raw_data)
with open('test.mp3', 'wb') as fh:
fh.write(sound.raw_data)
I then slammed test.mp3 into audacity to see what the wave formed looked like.
Sure enough it looks like it's the .mp3 conversion that's wonky.
It looks to the naked eye as if the audio frames were getting dragged out and distorted indivudually. The sound should be roughly 1.5 seconds long in total, but looking at the wave form, there's pauses and delays in each frame added by the compressor:
So i bumped up the value frames_per_buffer=500 to 2000 on all instances where this value was used:
stream_mic = p.open(frames_per_buffer=5000, ...)
stream_out = p.open(frames_per_buffer=5000, ...)
stream_mic.read(5000)
Any value above my 1.5 seconds should be a good enough value.
Sure enough, the wave form looks completely different:
What appears to be working is the following:
wav = wav_obj(frame)
sound = sound + wav
sound.export("test.mp3",
format="mp3",
bitrate="11k",
tags={"album": "test", "artist": "Not Ariana Grande"})
So some how, the wav frame -> mp3 frame -> combine several mp3 frames doesn't work.
But doing wav frame -> combine several wav frames -> export to mp3 works.
This is where I'm stuck again.
Obviously the mp3 conversion is wonky, and it some how adds distortion pitches across the wave form per segment.
My hope is that someone of you here on SO have worked with pydub and especially mp3 conversion, because I'm quite lost.
Here's the mp3:s gathered:
https://hvornum.se/500.mp3
https://hvornum.se/5000.mp3
I've been trying to work on a project to detect time shift between two streaming audio signals. I worked with python3, Pyaudio and I'm using a Motux828 sound card with a Neumann KU-100 microphone which takes a stereo input. So when i check my input_device_index I am the correct one which is the 4th one connnected to MOTU soundcard.
However when i record with:
import time
import pyaudio
import wave
CHUNK = 1024 * 3 # Chunk is the bytes which are currently processed
FORMAT = pyaudio.paInt16
RATE = 44100
RECORD_SECONDS = 2
WAVE_OUTPUT = "temp.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,channels=2,rate=RATE,input=True,frames_per_buffer=CHUNK,input_device_index=4)
frames = [] # np array storing all the data
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream1.read(CHUNK)
frames.append(data1)
stream.stop_stream()
stream.close()
p.terminate()
wavef = wave.open(WAVE_OUTPUT, 'wb') # opening the file
wavef.setnchannels(1)
wavef.setsampwidth(p.get_sample_size(FORMAT))
wavef.setframerate(RATE)
wavef.writeframes(b''.join(frames1)) # writing the data to be saved
wavef.close()
I record a wave file with no sound, with almost no noise(naturally)
Also I can record with 3rd party softwares with the specific microphone.
It works completely, fine.
NOTE:
Sound card is 24-bit depth normally, I also tried paInt24 that records a wave file with pure noise
I think u mentioned wrong variable names as i seen your code. The wrong variables are :
data = stream1.read(CHUNK)
frames.append(data1)
wavef.writeframes(b''.join(frames1))
the correct values are :
data = stream.read(CHUNK)
frames.append(data)
wavef.writeframes(b''.join(frames))
The following code writes a simple sine at frequency 400Hz to a mono WAV file. How should this code be changed in order to produce a stereo WAV file. The second channel should be in a different frequency.
import math
import wave
import struct
freq = 440.0
data_size = 40000
fname = "WaveTest.wav"
frate = 11025.0 # framerate as a float
amp = 64000.0 # multiplier for amplitude
sine_list_x = []
for x in range(data_size):
sine_list_x.append(math.sin(2*math.pi*freq*(x/frate)))
wav_file = wave.open(fname, "w")
nchannels = 1
sampwidth = 2
framerate = int(frate)
nframes = data_size
comptype = "NONE"
compname = "not compressed"
wav_file.setparams((nchannels, sampwidth, framerate, nframes,
comptype, compname))
for s in sine_list_x:
# write the audio frames to file
wav_file.writeframes(struct.pack('h', int(s*amp/2)))
wav_file.close()
Build a parallel sine_list_y list with the other frequency / channel, set nchannels=2, and in the output loop use for s, t in zip(sine_list_x, sine_list_y): as the header clause, and a body with two writeframes calls -- one for s, one for t. IOW, corresponding frames for the two channels "alternate" in the file.
See e.g. this page for a thorough description of all possible WAV file formats, and I quote:
Multi-channel digital audio samples
are stored as interlaced wave data
which simply means that the audio
samples of a multi-channel (such as
stereo and surround) wave file are
stored by cycling through the audio
samples for each channel before
advancing to the next sample time.
This is done so that the audio files
can be played or streamed before the
entire file can be read. This is handy
when playing a large file from disk
(that may not completely fit into
memory) or streaming a file over the
Internet. The values in the diagram
below would be stored in a Wave file
in the order they are listed in the
Value column (top to bottom).
and the following table clearly shows the channels' samples going left, right, left, right, ...
For an example producing a stereo .wav file, see the test_wave.py module.
The test produces an all-zero file.
You can modify by inserting alternating sample values.
nchannels = 2
sampwidth = 2
framerate = 8000
nframes = 100
# ...
def test_it(self):
self.f = wave.open(TESTFN, 'wb')
self.f.setnchannels(nchannels)
self.f.setsampwidth(sampwidth)
self.f.setframerate(framerate)
self.f.setnframes(nframes)
output = '\0' * nframes * nchannels * sampwidth
self.f.writeframes(output)
self.f.close()
Another option is to use the SciPy and NumPy libraries. In the below example, we produce a stereo wave file where the left channel has a low-frequency tone while the right channel has a higher-frequency tone. (Note: Use VLC player to play the audio)
To install SciPy, see: https://pypi.org/project/scipy/
import numpy as np
from scipy.io import wavfile
# User input
duration=5.0
toneFrequency_left=500 #Hz (20,000 Hz max value)
toneFrequency_right=1200 #Hz (20,000 Hz max value)
# Constants
samplingFrequency=48000
# Generate Tones
time_x=np.arange(0, duration, 1.0/float(samplingFrequency))
toneLeft_y=np.cos(2.0 * np.pi * toneFrequency_left * time_x)
toneRight_y=np.cos(2.0 * np.pi * toneFrequency_right * time_x)
# A 2D array where the left and right tones are contained in their respective rows
tone_y_stereo=np.vstack((toneLeft_y, toneRight_y))
# Reshape 2D array so that the left and right tones are contained in their respective columns
tone_y_stereo=tone_y_stereo.transpose()
# Produce an audio file that contains stereo sound
wavfile.write('stereoAudio.wav', samplingFrequency, tone_y_stereo)
Environment Notes
Version Used
Python 3.7.1
Python 3.7.1
SciPy 1.1.0