I want to read data from the file and plot the fft of it.
I have used the scipy's fft function to plot the fft of the synthetically generated signal data however while I am trying to save the generated signal data and plot the fft I am getting the peaks as I was getting but the value of the amplitude and frequency is off as compared to the Original plotting.I found problem with the second fft function. Can anyone help me with the same.
import numpy as np
import pandas as pd
import scipy
from scipy.fftpack import fft,fftfreq
import matplotlib.pyplot as plt
import csv
#Function to plot the raw data
def raw_plot(signals,time):
'''Plot the raw values in the time domain '''
plt.figure(0)
plt.plot(time,signals)
plt.xlabel("Time(s)")
plt.ylabel("Amplitude(g)")
plt.show()
#Function to plot the fft of the data
def fft_signal(signal, sampling_frequency, timestart, timeend):
T = 1 / sampling_frequency
N = (timeend - timestart) * sampling_frequency
x = np.linspace(0.0, 1.0 / (2.0 * T), int(N / 2))
# print("Values of X are :",x)
# print(len(x))
yr2 = fft(signal)
y2 = 2 / N * np.abs(yr2[0:int(N / 2)])
# amp_spec = abs(fft(signal))/N
# freq = np.linspace(0,2,num=N)
plt.figure(2)
plt.plot(x, y2)
plt.xlabel("Frequency")
plt.ylabel("Amplitude")
plt.show()
def fftsignal2(data, sampling_freq = 100, timestep = 1 ):
N = len(data)
FS = 1000
T= 1/FS
timestep = timestep
yr2 = fft(data)
y2 = 2 / N * np.abs(yr2[0:int(N / 2)])
# f= np.arange(0,N)*FS/N
# f= np.linspace(0,1.0/T,N)
frq = np.fft.fftfreq(N,d=timestep)
# f= np.linspace(0.0,int(N/2)*sampling_freq)
frq= frq[0:len(frq)//2]
print("Frequencies Length :",len(frq))
print("DATA LENGTH :",len(y2))
plt.figure(3)
plt.plot(frq,y2)
plt.xlabel("Frequency")
plt.ylabel("Amplitude")
plt.show()
Mag1_Hz = 70
Mag1_AMP = 10
Mag4_AMP = 60
Mag4_Hz= 50
Mag2_Hz = 20
Mag2_AMP = 8
time_start = 0
time_end = 4
Sampling_frequency = 100
No_of_samples = (time_end-time_start) * Sampling_frequency
time = np.linspace(time_start,time_end,No_of_samples)
#Generate the signals values
data_signal = Mag1_AMP * np.sin(2* np.pi * Mag1_Hz * time ) + Mag2_AMP * np.cos(2* np.pi * Mag2_Hz * time)
raw_plot(signals=data_signal[:800],time=time[:800])
fft_signal(data_signal,sampling_frequency=Sampling_frequency,timestart=time_start,timeend=time_end)
# for (time, datavalues) in zip(time,data_signal):
# with open("data_signals_27.csv","a",newline="") as file:
# writefile = csv.writer(file)
# print("Time%f,Data %f"%(time,datavalues))
# print(time,datavalues)
# writefile.writerow([time,datavalues])
fftsignal2(data= data_signal, sampling_freq= Sampling_frequency,timestep=2)
The peaks are coming right in the fft_signal function no 1 while I am trying to construct the fftsignal2 to plot the fft of the data so the peaks which I am getting is correct but the magnitude and frequency is off.I don't understand why ? Can anyone help me with that.
Related
i'm trying to get the frequency of a signal via fourier transform but it's not able to recognize it (sets the peak to f=0). Maybe something is wrong in my code (FULL reprudible code at the end of the page):
PF = fft.fft(Y[0,:])/Npoints #/Npoints to get the true amplitudes
ZF = fft.fft(Y[1,:])/Npoints
freq = fft.fftfreq(Npoints,deltaT)
PF = fft.fftshift(PF) #change of ordering so that the frequencies are increasing
ZF = fft.fftshift(ZF)
freq = fft.fftshift(freq)
plt.plot(freq, np.abs(PF))
plt.show()
plt.plot(T,Y[0,:])
plt.show()
where Npoints is the number of intervals (points) and deltaT is the time spacing of the intervals. You can see that the peak is at f=0
I show also a plot of Y[0,:] (my signal) over time where it's clear that the signal has a characteristic frequency
FULL REPRUDICIBLE CODE
import numpy as np
import matplotlib.pyplot as plt
#numerical integration
from scipy.integrate import solve_ivp
import scipy.fft as fft
r=0.5
g=0.4
e=0.6
H=0.6
m=0.15
#define a vector of K between 0 and 4 with 50 componets
K=np.arange(0.1,4,0.4)
tsteps=np.arange(7200,10000,5)
Npoints=len(tsteps)
deltaT=2800/Npoints #sample spacing
for k in K :
i=0
def RmAmodel(t,y):
return [r*y[0]*(1-y[0]/k)-g*y[0]/(y[0]+H)*y[1], e*g*y[0]/(y[1]+H)*y[1]-m*y[1]]
sol = solve_ivp(RmAmodel, [0,10000], [3,3], t_eval=tsteps) #t_eval specify the points where the solution is desired
T=sol.t
Y=sol.y
vk=[]
for i in range(Npoints):
vk.append(k)
XYZ=[vk,Y[0,:],Y[1,:]]
#check periodicity over P and Z with fourier transform
#try Fourier analysis just for the last value of K
PF = fft.fft(Y[0,:])/Npoints #/Npoints to get the true amplitudes
ZF = fft.fft(Y[1,:])/Npoints
freq = fft.fftfreq(Npoints,deltaT)
PF = fft.fftshift(PF) #change of ordering so that the frequencies are increasing
ZF = fft.fftshift(ZF)
freq = fft.fftshift(freq)
plt.plot(T,Y[0,:])
plt.show()
plt.plot(freq, np.abs(PF))
plt.show()
I can't pinpoint where the problem is. It looks like there is some problem in the fft code. Anyway, I have little time so I will just put a sample code I made before. You can use it as reference or copy-paste it. It should work.
import numpy as np
import matplotlib.pyplot as plt
from scipy.fft import fft, fftfreq
fs = 1000 #sampling frequency
T = 1/fs #sampling period
N = int((1 / T) + 1) #number of sample points for 1 second
t = np.linspace(0, 1, N) #time array
pi = np.pi
sig1 = 1 * np.sin(2*pi*10*t)
sig2 = 2 * np.sin(2*pi*30*t)
sig3 = 3 * np.sin(2*pi*50*t)
#generate signal
signal = sig1 + sig2 + sig3
#plot signal
plt.plot(t, signal)
plt.show()
signal_fft = fft(signal) #getting fft
f2 = np.abs(signal_fft / N) #full spectrum
f1 = f2[:N//2] #half spectrum
f1[1:] = 2*f1[1:] #actual amplitude
freq = fs * np.linspace(0,N/2,int(N/2)) / N #frequency array
#plot fft result
plt.plot(freq, f1)
plt.xlim(0,100)
plt.show()
I am computing PSD of a signal, and I want the power from frequency range 0Hz to 20Hz. This is what i tried using linspace
df = pd.read_csv(path)
df = pd.DataFrame(df)
x = np.linspace(0, 20, 41)
dt = x[1] - x[0]
fs = 1 / dt
f,P = signal.welch(df, fs=5, nperseg=30, noverlap=5,axis=0)
Here, I get 6 frequency components from 0Hz to 2.5Hz, but I want to compute the power for frequency range till 20Hz
Can anyone help me here to do the same.
The PSD only goes from 0 to fs/2, you should specify the correct sample frequency in the fs parameter, 1024 in your case.
This example illustrates how to get the PSD for a sinusoidal signal:
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import welch
Fs = 1024 # Hz
Ts = 1/Fs
time = np.arange(0, 2, Ts) # 2 seconds
freqs = [20, 50, 100] # frequencies in Hz
x = np.zeros(len(time))
for f in freqs:
x += np.sin(2 * np.pi * f * time)
plt.plot(x)
f, P = welch(x, fs=Fs)
plt.figure()
plt.stem(f, P)
Below is my code
where I am wanting to sweep a 400MHz tone between -50Mhz and 50Mhz. Meaning the
sweep should span between 350MHz and 450MHz. But that is no the case. Don't
understand what the reason for this is. It has been suggested that this is because frequency is the derivative of the phase. I have given that a shot too in the commented lines in the 'sweep_sine' function but that does not seem to help either. Any help would be appreciated.
Image of unexpected program output added. As you can see I am able to shift my 400MHz tone to 300MHz. When I try to sweep in a fashion similar to shift; the output is incorrect, as in it does not sweep from 350MHz to 450MHz with the 400MHz tone in the middle.
I am able to sweep the signal correctly now as can be seen in image 2. The time domain signal also looks fine when the signal is of the e^i2*pift form. But when I use a real signal of the form sin(2*pift) the time domain version looks corrupted (image 3). What could be the reason for that? Thanks.
https://i.stack.imgur.com/9H1Dk.png
https://i.stack.imgur.com/Ey3tQ.png
https://i.stack.imgur.com/FzmDS.png
import numpy as np
import matplotlib.pyplot as plt
def gen_sig(freq=380e6, fs=6080e6, n_samp=6400):
ts = 1/fs
t_arr = np.arange(0, n_samp)*ts
sig = np.exp(2 * 1j * np.pi * freq * t_arr)
#sig = np.sin(2 * np.pi * freq * t_arr)
return sig,ts
def freq_shift_sine(sine, ts, shift_freq = 50e6):
tx_sig_t = np.linspace(0, ts*sine.shape[-1], num=sine.shape[-1])
#tx_sig_sqrd = np.square(tx_sig_t)
#hift the sine
freq_shftd_sig = sine * np.exp(1.0j * 2 * np.pi * (shift_freq * tx_sig_t))
#freq_shftd_sig = sine * np.exp(1.0j * np.pi * (shift_freq * tx_sig_sqrd))
return freq_shftd_sig
def sweep_sine(sine, ts, up_lim = 50e6, low_lim = -50e6):
tx_sig_t = np.arange(0, sine.shape[-1])*ts
tx_sig_sqrd = np.square(tx_sig_t)
phi = low_lim*tx_sig_t + (up_lim-low_lim)*(tx_sig_sqrd/(2*ts*sine.shape[-1]))
dopp_shftd_sig = sine * np.exp(1.0j* 2 *np.pi * phi)
return dopp_shftd_sig
if __name__=='__main__':
#generate a sine wave 16 times over sampled
tx_sig, t_samp = gen_sig(freq=400e6, fs=6400e6, n_samp=6400)
#do an fft
tx_sig_fft = np.fft.fft(tx_sig)
#generate freqency axis for fft
freq_arr = np.fft.fftfreq(tx_sig.shape[-1], t_samp)
#shift sine wave
tx_sig_shifted = freq_shift_sine(tx_sig, t_samp, shift_freq = -100e6)
#fft the shifted sine
tx_sig_shftd_fft = np.fft.fft(tx_sig_shifted)
#sweep sine wave by up_lim+low_lim Hz
tx_sig_swept = sweep_sine(tx_sig, t_samp, up_lim = 50e6, low_lim = -50e6)
#fft the swept sine
tx_sig_swept_fft = np.fft.fft(tx_sig_swept)
plt.figure()
plt.plot(freq_arr, abs(tx_sig_fft))
plt.plot(freq_arr, abs(tx_sig_shftd_fft))
plt.plot(freq_arr, abs(tx_sig_swept_fft))
plt.axis([0,1e9, 0, 2e3])
plt.figure()
plt.plot(tx_sig)
plt.plot(tx_sig_shifted)
plt.plot(tx_sig_swept)
plt.axis([0,100, -1.2, 1.2])
I may be wrong but I think the problem is in your signal. You have only a real part of it, and shifting the phase in the complex plane doesn't help so much in the real part.
The possible treatment for this problem is to make the signal a complex one. The best way is to make a Hilbert transform and use it as a signal.
Your code may look like this
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import hilbert
def gen_sine(freq=380e6, fs=6080e6, n_samp=6400):
ts = 1/fs
t_arr = np.arange(0, n_samp)*ts
#sine_sig = np.exp(2 * 1j * np.pi * freq * t_arr)
sine_sig = np.sin(2 * np.pi * freq * t_arr)
return sine_sig,ts
def freq_shift_sine(sine, ts, shift_freq = 50e6):
tx_sig_t = np.linspace(0, ts*sine.shape[-1], num=sine.shape[-1])
#tx_sig_sqrd = np.square(tx_sig_t)
#hift the sine
freq_shftd_sig = hilbert(tx_sig) * np.exp(1.0j * 2 * np.pi * (shift_freq * tx_sig_t))
#freq_shftd_sig = sine * np.exp(1.0j * np.pi * (shift_freq * tx_sig_sqrd))
return freq_shftd_sig
def sweep_sine(sine, ts, up_lim = 50e6, low_lim = -50e6):
#tx_sig_t = np.arange(0, sine.shape[-1])*ts
tx_sig_t = np.linspace(0, ts*sine.shape[-1], num=sine.shape[-1])
#tx_sig_sqrd = np.square(tx_sig_t)
freq_step_arr = np.linspace(low_lim, up_lim, sine.shape[-1])
dopp_shftd_sig = hilbert(tx_sig) * np.exp(1.0j * 2 * np.pi * (freq_step_arr * tx_sig_t))
#dopp_shftd_sig = sine * np.exp(1.0j * np.pi * (freq_step_arr * tx_sig_sqrd))
return dopp_shftd_sig
if __name__=='__main__':
#generate a sine wave 16 times over sampled
tx_sig, t_samp = gen_sine(freq=400e6, fs=6400e6, n_samp=6400)
#do an fft
tx_sig_fft = np.fft.fft(tx_sig)
#generate freqency axis for fft
freq_arr = np.fft.fftfreq(tx_sig.shape[-1], t_samp)
#shift sine wave
tx_sig_shifted = freq_shift_sine(tx_sig, t_samp, shift_freq = -100e6)
#fft the shifted sine
tx_sig_shftd_fft = np.fft.fft(tx_sig_shifted)
#sweep sine wave by up_lim+low_lim Hz
tx_sig_swept = sweep_sine(tx_sig, t_samp, up_lim = 50e6, low_lim = -50e6)
#fft the swept sine
tx_sig_swept_fft = np.fft.fft(tx_sig_swept)
#plt.figure()
#plt.plot(freq_arr, abs(tx_sig_swept_fft))
#plot sine wave fft
#plt.figure()
plt.figure(1)
plt.plot(freq_arr, abs(tx_sig_fft))
plt.plot(freq_arr, abs(tx_sig_shftd_fft))
plt.plot(freq_arr, abs(tx_sig_swept_fft))
plt.axis([0,1e9, 0, 2e3])
plt.figure(2)
plt.specgram(tx_sig_swept, NFFT=80, Fs=6400e6, noverlap=16)
#plt.axis([0,0.000001, 0, 5e6])
plt.figure(3)
plt.subplot(311)
t_time = np.arange(0, tx_sig.shape[-1])*t_samp
plt.plot(t_time, tx_sig)
plt.plot(t_time, np.imag(hilbert(tx_sig)) )
plt.subplot(312)
plt.plot(t_time, tx_sig_shifted)
plt.subplot(313)
plt.plot(t_time, tx_sig_swept )
plt.show()
It produces more or less ok spectrogram and doesn't corrupt the resulted signal. Hope it helps.
You have made a very common mistake. Incrementing both the frequency and the time (when you multiply them to create a phase parameter input for the exp or sine function) at each time step, increments the frequency too much. Instead figure out how much the phase should change at each new time step for the desired frequency at that time step, and add that phase delta instead of doing a multiplication to get a new phase input to your sine or exp function.
I tried to filter some signal with fft.
The signal I am working on is quite complicated and im not really experienced in this topic.
That's why I created a simple sin wave 3Hz and tried to cut off the 3 Hz.
and so far, so good
import numpy as np
import matplotlib.pyplot as plt
from scipy.fftpack import fftfreq, irfft, rfft
t = np.linspace(0, 2*np.pi, 1000, endpoint=True)
f = 3.0 # Frequency in Hz
A = 100.0 # Amplitude in Unit
s = A * np.sin(2*np.pi*f*t) # Signal
dt = t[1] - t[0] # Sample Time
W = fftfreq(s.size, d=dt)
f_signal = rfft(s)
cut_f_signal = f_signal.copy()
cut_f_signal[(np.abs(W)>3)] = 0 # cut signal above 3Hz
cs = irfft(cut_f_signal)
fig = plt.figure(figsize=(10,5))
plt.plot(s)
plt.plot(cs)
What i expected
What i got
I don't really know where the noise is coming from.
I think it is some basic stuff, but i dont get it.
Can someone explain to to me?
Edit
Just further information
Frequency
yf = fft(s)
N = s.size
xf = np.linspace(0, fa/2, N/2, endpoint=True)
fig, ax = plt.subplots()
ax.plot(xf,(2.0/N * np.abs(yf[:N//2])))
plt.xlabel('Frequency ($Hz$)')
plt.ylabel('Amplitude ($Unit$)')
plt.show()
You could change the way you create your signal and use a sample frequency:
fs = 1000
t = np.linspace(0, 1000 / fs, 1000, endpoint=False) # 1000 samples
f = 3.0 # Frequency in Hz
A = 100.0 # Amplitude in Unit
s = A * np.sin(2*np.pi*f*t) # Signal
dt = 1/fs
And here the whole code:
import numpy as np
import matplotlib.pyplot as plt
from scipy.fftpack import fftfreq, irfft, rfft
fs = 1000
t = np.linspace(0, 1000 / fs, 1000, endpoint=False)
f = 3.0 # Frequency in Hz
A = 100.0 # Amplitude in Unit
s = A * np.sin(2*np.pi*f*t) # Signal
dt = 1/fs
W = fftfreq(s.size, d=dt)
f_signal = rfft(s)
cut_f_signal = f_signal.copy()
cut_f_signal[(np.abs(W)>3)] = 0 # cut signal above 3Hz
cs = irfft(cut_f_signal)
fig = plt.figure(figsize=(10,5))
plt.plot(s)
plt.plot(cs)
And with f = 3.0 Hz and (np.abs(W) >= 3):
And with f = 1.0 Hz:
Just some additional information about why A. As solution works better than yours:
A. A's model doesn't include any non-integer frequencies in its Solution and after filtering out the higher frequencies the result looks like:
1.8691714842589136e-12 * exp(2*pi*n*t*0.0)
1.033507502555532e-12 * exp(2*pi*n*t*1.0)
2.439774536202658e-12 * exp(2*pi*n*t*2.0)
-8.346741339115191e-13 * exp(2*pi*n*t*3.0)
-5.817427588021649e-15 * exp(2*pi*n*t*-3.0)
4.476938066992472e-14 * exp(2*pi*n*t*-2.0)
-3.8680170177940454e-13 * exp(2*pi*n*t*-1.0)
while your solution includes components like:
...
177.05936105690256 * exp(2*pi*n*t*1.5899578814880346)
339.28717376420747 * exp(2*pi*n*t*1.7489536696368382)
219.76658524130005 * exp(2*pi*n*t*1.9079494577856417)
352.1094590251063 * exp(2*pi*n*t*2.0669452459344453)
267.23939871205346 * exp(2*pi*n*t*2.2259410340832484)
368.3230130593005 * exp(2*pi*n*t*2.384936822232052)
321.0888818355804 * exp(2*pi*n*t*2.5439326103808555)
...
Please refer to this question regarding possible side effects of zeroing FFT bins out.
Main Problem: How can the scipy.signal.cwt() function be inversed.
I have seen where Matlab has an inverse continuous wavelet transform function which will return the original form of the data by inputting the wavelet transform, although you can filter out the slices you don't want.
MATALAB inverse cwt funciton
Since scipy doesn't appear to have the same function, I have been trying to figure out how to get the data back in the same form, while removing the noise and background.
How do I do this?
I tried squaring it to remove negative values, but this gives me values way to large and not quite right.
Here is what I have been trying:
# Compute the wavelet transform
widths = range(1,11)
cwtmatr = signal.cwt(xy['y'], signal.ricker, widths)
# Maybe we multiple by the original data? and square?
WT_to_original_data = (xy['y'] * cwtmatr)**2
And here is a fully compilable short script to show you the type of data I am trying to get and what I have etc.:
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
# Make some random data with peaks and noise
def make_peaks(x):
bkg_peaks = np.array(np.zeros(len(x)))
desired_peaks = np.array(np.zeros(len(x)))
# Make peaks which contain the data desired
# (Mid range/frequency peaks)
for i in range(0,10):
center = x[-1] * np.random.random() - x[0]
amp = 60 * np.random.random() + 10
width = 10 * np.random.random() + 5
desired_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
# Also make background peaks (not desired)
for i in range(0,3):
center = x[-1] * np.random.random() - x[0]
amp = 40 * np.random.random() + 10
width = 100 * np.random.random() + 100
bkg_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
return bkg_peaks, desired_peaks
x = np.array(range(0, 1000))
bkg_peaks, desired_peaks = make_peaks(x)
y_noise = np.random.normal(loc=30, scale=10, size=len(x))
y = bkg_peaks + desired_peaks + y_noise
xy = np.array( zip(x,y), dtype=[('x',float), ('y',float)])
# Compute the wavelet transform
# I can't figure out what the width is or does?
widths = range(1,11)
# Ricker is 2nd derivative of Gaussian
# (*close* to what *most* of the features are in my data)
# (They're actually Lorentzians and Breit-Wigner-Fano lines)
cwtmatr = signal.cwt(xy['y'], signal.ricker, widths)
# Maybe we multiple by the original data? and square?
WT = (xy['y'] * cwtmatr)**2
# plot the data and results
fig = plt.figure()
ax_raw_data = fig.add_subplot(4,3,1)
ax = {}
for i in range(0, 11):
ax[i] = fig.add_subplot(4,3, i+2)
ax_desired_transformed_data = fig.add_subplot(4,3,12)
ax_raw_data.plot(xy['x'], xy['y'], 'g-')
for i in range(0,10):
ax[i].plot(xy['x'], WT[i])
ax_desired_transformed_data.plot(xy['x'], desired_peaks, 'k-')
fig.tight_layout()
plt.show()
This script will output this image:
Where the first plot is the raw data, the middle plots are the wavelet transforms and the last plot is what I want to get out as the processed (background and noise removed) data.
Does anyone have any suggestions? Thank you so much for the help.
I ended up finding a package which provides an inverse wavelet transform function called mlpy. The function is mlpy.wavelet.uwt. This is the compilable script I ended up with which may interest people if they are trying to do noise or background removal:
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
import mlpy.wavelet as wave
# Make some random data with peaks and noise
############################################################
def gen_data():
def make_peaks(x):
bkg_peaks = np.array(np.zeros(len(x)))
desired_peaks = np.array(np.zeros(len(x)))
# Make peaks which contain the data desired
# (Mid range/frequency peaks)
for i in range(0,10):
center = x[-1] * np.random.random() - x[0]
amp = 100 * np.random.random() + 10
width = 10 * np.random.random() + 5
desired_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
# Also make background peaks (not desired)
for i in range(0,3):
center = x[-1] * np.random.random() - x[0]
amp = 80 * np.random.random() + 10
width = 100 * np.random.random() + 100
bkg_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
return bkg_peaks, desired_peaks
# make x axis
x = np.array(range(0, 1000))
bkg_peaks, desired_peaks = make_peaks(x)
avg_noise_level = 30
std_dev_noise = 10
size = len(x)
scattering_noise_amp = 100
scat_center = 100
scat_width = 15
scat_std_dev_noise = 100
y_scattering_noise = np.random.normal(scattering_noise_amp, scat_std_dev_noise, size) * np.e**(-(x-scat_center)**2/(2*scat_width**2))
y_noise = np.random.normal(avg_noise_level, std_dev_noise, size) + y_scattering_noise
y = bkg_peaks + desired_peaks + y_noise
xy = np.array( zip(x,y), dtype=[('x',float), ('y',float)])
return xy
# Random data Generated
#############################################################
xy = gen_data()
# Make 2**n amount of data
new_y, bool_y = wave.pad(xy['y'])
orig_mask = np.where(bool_y==True)
# wavelet transform parameters
levels = 8
wf = 'h'
k = 2
# Remove Noise first
# Wave transform
wt = wave.uwt(new_y, wf, k, levels)
# Matrix of the difference between each wavelet level and the original data
diff_array = np.array([(wave.iuwt(wt[i:i+1], wf, k)-new_y) for i in range(len(wt))])
# Index of the level which is most similar to original data (to obtain smoothed data)
indx = np.argmin(np.sum(diff_array**2, axis=1))
# Use the wavelet levels around this region
noise_wt = wt[indx:indx+1]
# smoothed data in 2^n length
new_y = wave.iuwt(noise_wt, wf, k)
# Background Removal
error = 10000
errdiff = 100
i = -1
iter_y_dict = {0:np.copy(new_y)}
bkg_approx_dict = {0:np.array([])}
while abs(errdiff)>=1*10**-24:
i += 1
# Wave transform
wt = wave.uwt(iter_y_dict[i], wf, k, levels)
# Assume last slice is lowest frequency (background approximation)
bkg_wt = wt[-3:-1]
bkg_approx_dict[i] = wave.iuwt(bkg_wt, wf, k)
# Get the error
errdiff = error - sum(iter_y_dict[i] - bkg_approx_dict[i])**2
error = sum(iter_y_dict[i] - bkg_approx_dict[i])**2
# Make every peak higher than bkg_wt
diff = (new_y - bkg_approx_dict[i])
peak_idxs_to_remove = np.where(diff>0.)[0]
iter_y_dict[i+1] = np.copy(new_y)
iter_y_dict[i+1][peak_idxs_to_remove] = np.copy(bkg_approx_dict[i])[peak_idxs_to_remove]
# new data without noise and background
new_y = new_y[orig_mask]
bkg_approx = bkg_approx_dict[len(bkg_approx_dict.keys())-1][orig_mask]
new_data = diff[orig_mask]
##############################################################
# plot the data and results
fig = plt.figure()
ax_raw_data = fig.add_subplot(121)
ax_WT = fig.add_subplot(122)
ax_raw_data.plot(xy['x'], xy['y'], 'g')
for bkg in bkg_approx_dict.values():
ax_raw_data.plot(xy['x'], bkg[orig_mask], 'k')
ax_WT.plot(xy['x'], new_data, 'y')
fig.tight_layout()
plt.show()
And here is the output I am getting now:
As you can see, there is still a problem with the background removal (it shifts to the right after each iteration), but it is a different question which I will address here.