I like to prototype algorithms in Matlab, but I have the requirement of putting them on a server that also runs quite a bit of Python code. Hence I quickly converted the code to Python and compared the two. The Matlab implementation runs ~1000 times faster (from timing function calls - no profiling). Anyone know off hand why the performance of Python is so slow?
Matlab
% init random data
w = 800;
h = 1200;
hmap = zeros(w,h);
npts = 250;
for i=1:npts
hmap(randi(w),randi(h)) = hmap(randi(w),randi(h))+1;
end
% Params
disksize = 251;
nBreaks = 25;
saturation = .9;
floorthresh =.05;
fh = fspecial('gaussian', disksize, disksize/7);
hmap = conv2(hmap, fh, 'same');
% Scaling, paritioning etc
hmap = hmap/(max(max(hmap)));
hmap(hmap<floorthresh) = 0;
hmap = round(nBreaks * hmap)/nBreaks;
hmap = hmap * (1/saturation);
% Show the image
imshow(hmap, [0,1])
colormap('jet')
Python
import numpy as np
from scipy.signal import convolve2d as conv2
# Test data parameters
w = 800
h = 1200
npts = 250
# generate data
xvals = np.random.randint(w, size=npts)
yvals = np.random.randint(h, size=npts)
# Heatmap parameters
gaussianSize = 250
nbreaks = 25
# Preliminary function definitions
def populateMat(w, h, xvals, yvals):
container = np.zeros((w,h))
for idx in range(0,xvals.size):
x = xvals[idx]
y = yvals[idx]
container[x,y] += 1
return container
def makeGaussian(size, fwhm):
x = np.arange(0, size, 1, float)
y = x[:,np.newaxis]
x0 = y0 = size // 2
return np.exp(-4*np.log(2) * ((x-x0)**2 + (y-y0)**2) / fwhm**2)
# Create the data matrix
dmat = populateMat(w,h,xvals,yvals)
h = makeGaussian(gaussianSize, fwhm=gaussianSize/2)
# Convolve
dmat2 = conv2(dmat, h, mode='same')
# Scaling etc
dmat2 = dmat2 / dmat2.max()
dmat2 = np.round(nbreaks*dmat2)/nbreaks
# Show
imshow(dmat2)
Ok, problem solved for me thanks to suggestion from #Yves Daust's comments;
The filter scipy.ndimage.filters.gaussian_filter utilises the separability of the kernel and reduces the running time to within a single order of magnitude of the matlab implementation.
import numpy as np
from scipy.ndimage.filters import gaussian_filter as gaussian
# Test data parameters
w = 800
h = 1200
npts = 250
# generate data
xvals = np.random.randint(w, size=npts)
yvals = np.random.randint(h, size=npts)
# Heatmap parameters
gaussianSize = 250
nbreaks = 25
# Preliminary function definitions
def populateMat(w, h, xvals, yvals):
container = np.zeros((w,h))
for idx in range(0,xvals.size):
x = xvals[idx]
y = yvals[idx]
container[x,y] += 1
return container
# Create the data matrix
dmat = populateMat(w,h,xvals,yvals)
# Convolve
dmat2 = gaussian(dmat, gaussianSize/7)
# Scaling etc
dmat2 = dmat2 / dmat2.max()
dmat2 = np.round(nbreaks*dmat2)/nbreaks
# Show
imshow(dmat2)
Related
I am trying to fit the surface brightness profile to a galaxy and I either get an error that states:Optimal parameters not found: Number of calls to function has reached maxfev = 800 or it just doesn't fit the data. Has anyone done this before and could offer help?
My Code:
from astropy.io import ascii
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize
from astropy.io import fits
import math
#plate scale for CofC Observatory
f = 3962 #mm
p = 206265./f # arcsec/mm
p = p * 9.e-6 #arcsec/pix
finalp = p * 20 #pc/pix, sombrero = 97, ngc720 = 105, ngc6946 = 20
finalp = finalp/1000 #kpc
#importing files
data_b = ascii.read('/Users/research/Desktop/Dowd.A/Final/data/NGC6946/ngc6946_b_profile.dat')
data_g = ascii.read('/Users/research/Desktop/Dowd.A/Final/data/NGC6946/ngc6946_g_profile.dat')
data_r = ascii.read('/Users/research/Desktop/Dowd.A/Final/data/NGC6946/ngc6946_r_profile.dat')
#Converting x-axis from pixels to kpc
r_b = data_b['x']*finalp
r_g = data_g['x']*finalp
r_r = data_r['x']*finalp
y_b = data_b['y'] - 412. #final number is background, different for each filter
y_g = data_g['y'] - 1288.
y_r = data_r['y'] - 1364.
r_b = np.array(r_b)
y_b = np.array(y_b)
r_g = np.array(r_g)
y_g = np.array(y_g)
r_r = np.array(r_r)
y_r = np.array(y_r)
#fitting the model for sersic and exponential
def GaussPoly(r, munaught, beta, n0):
return munaught * np.exp(-beta*(r/re)**(1/n0))
munaught = 8 #change for each filter
beta = .001 #change for each filter
r = r_b
n0 = 1. #1 for spiral and 4 for elliptical
re = r_b[0]
nlfit, nlpcov = scipy.optimize.curve_fit(GaussPoly, r_b, y_b, p0=[munaught, beta, n0])
munaught, beta, n0 = nlfit
x600 = np.arange(0,10)
fit = GaussPoly(x600, munaught, beta, n0)
plt.plot(x600, fit, color = 'orange')
#commented out filters to see one graph at a time
plt.plot(r_b,y_b, 'b.')
plt.xlabel('Avg Radius (kpc)')
plt.ylabel('Surface Brightness (counts/pixel**2)')
plt.savefig('ngc6946_radial_profile_b')
I have a specific python issue, that desperately needs to be sped up by avoiding the use of a loop, yet, I am at a loss as to how to do this. I need to read in a fits image, convert this to a numpy array (roughly, 2000 x 2000 elements in size), then for each element compute the statistics of a ring of elements around it.
As I have my code now, the statistics of the ring around the element is computed with a function using masks. This is fast but, of course, I call this function 2000x2000 times (the slow part).
I am relatively new to python. I think that using the mask function is clever, but I cannot find a way around individually addressing each element. Best of thanks for any help you can provide.
# First, the function computing the statistics within a ring
around the central pixel:<br/>
# flux = image intensity at pixel (i,j)<br/>
# rad1, rad2 = inner and outer radii<br/>
# array = image array<br/>_
def snr(flux, i, j, rad1, rad2, array):
a, b = i, j
nx, ny = array.shape
y, x = np.ogrid[-a:nx-a, -b:ny-b]
mask = (x*x + y*y >= rad1*rad1) & (x*x + y*y <= rad2*rad2)
Nmask = np.count_nonzero(mask)
noise = 0.6052697 * abs(Nmask * flux - sum(array[mask]))
return noise
# Now, the call to snr for each pixel in the array data1:<br/>_
frame1 = fits.open(in_frame, mode='readonly') # read in fits file
data1 = frame1[ext].data # convert to np array
ny, nx = data1.shape # array dimensions
noise1 = zeros((ny, nx), float) # empty array
r1 = 5 # inner radius (pixels)
r2 = 7 # outer radius (pixels)
# The function is fast, but calling it 2k x 2k times is not:
for j in range(ny):
for i in range(nx):
noise1[i,j] = der_snr(data1[i,j], i, j, r1, r2, data1)
The operation that you are trying to do can be expressed as an image convolution. Try something like this:
import numpy as np
import scipy.ndimage
from astropy.io import fits
def make_kernel(inner_radius, outer_radius):
if inner_radius > outer_radius:
raise ValueError
x, y = np.ogrid[-outer_radius:outer_radius + 1, -outer_radius:outer_radius + 1]
r2 = x * x + y * y
kernel = (r2 >= inner_radius * inner_radius) & (r2 <= outer_radius * outer_radius)
return kernel
in_frame = '<file path>'
ext = '...'
frame1 = fits.open(in_frame, mode='readonly')
data1 = frame1[ext].data
inner_radius = 5
outer_radius = 7
kernel = make_kernel(inner_radius, outer_radius)
n_kernel = np.count_nonzero(kernel)
conv = scipy.ndimage.convolve(data1, kernel, mode='constant')
noise1 = 0.6052697 * np.abs(n_kernel * data1 - conv)
BACKGROUND: I am trying to build a real-time drum simulation model, for which I need really fast matrix-vector products. My matrices are of the size ~5000-10000 rows/cols, out of which only 6 entries per row are non-zero, hence I am inclined to use sparse matrices. I am using scipy.sparse module. The iterations are as below.
Vjk_plus_sparse = Vjk_minus_sparse.transpose()
Vj = Vjk_plus_sparse.dot(constant)
np.put(Vj, Nr, 0.0)
Uj[t] = Uj[t-1] + np.transpose(Vj)/fs
Vj_mat = adj_mat_sparse.multiply(Vj)
Vjk_minus_sparse = Vj_mat-Vjk_plus_sparse.multiply(end_gain)
Here, Vjk_plus_sparse, Vjk_minus_sparse and Vj_mat are sparse CSR matrices, Vj is a numpy array, and Uj is a numpy matrix where each row represents Uj(t). end_gain is an array which is a static numpy array for dampening of vibrations.
THE ISSUE: A single iteration takes about 3 ms for size = 4250. With the most significant
steps being the last 2 lines. They together take about 2.5 ms. I would ideally need it to run in 0.1 ms, which would be more than a 10x speedup. This is the maximum extent of vectorization possible for the problem, and I cannot parallelize as I am marching in time, at least physically it won't be accurate.
ATTEMPTS: I tried fiddling with the sparse data structures, and found best performance with all of them being CSR (Compressed Sparse Row), with the values as quoted above. I also tried to replace the multiply() method with a matrix multiplication, by repeating Vj, but that worsened the time, as the resultant operation would be a sparse*dense operation.
How can I speed this up within python itself? I am open to trying c++ as well, though migrating now would be a major pain. Also, since scipy is essentially based in c, would it even give that much of a speedup?
Added a complete runnable example
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.patches
import math
from mpl_toolkits import mplot3d
import numpy as np
import scipy.sparse as sp
import scipy.fftpack as spf
import matplotlib.animation as animation
import time
sqrt_3 = 1.73205080757
class Pt:
def __init__(self,x_0,y_0):
self.x_0 = x_0
self.y_0 = y_0
self.id = -1
self.neighbours = []
self.distance = (x_0**2 + y_0**2)**0.5
class Circle:
def __init__(self,radius,center):
self.radius = radius
self.center = center
self.nodes = []
def construct_mesh(self, unit):
queue = [self.center]
self.center.distance = 0
curr_id = 0
delta = [(1.,0.), (1./2, (3**0.5)/2),(-1./2, (3**0.5)/2),(-1.,0.), (-1./2,-(3**0.5)/2), (1./2,- (3**0.5)/2)]
node_dict = {}
node_dict[(self.center.x_0,self.center.y_0)] = curr_id
self.nodes.append(self.center)
curr_id+=1
while len(queue)!=0:
curr_pt = queue[0]
queue.pop(0)
# self.nodes.append(curr_pt)
# curr_id+=1
for i in delta:
temp_pt = Pt(curr_pt.x_0 + 2*unit*i[0], curr_pt.y_0 + 2*unit*i[1])
temp_pt.id = curr_id
temp_pt.distance = (temp_pt.x_0 ** 2 + temp_pt.y_0 ** 2)**0.5
# curr_id+=1
if (round(temp_pt.x_0,5), round(temp_pt.y_0,5)) not in node_dict and temp_pt.distance <= self.radius:
# print(temp_pt.x_0, temp_pt.y_0)
self.nodes.append(temp_pt)
node_dict[(round(temp_pt.x_0,5), round(temp_pt.y_0,5))] = curr_id
curr_id+=1
queue.append(temp_pt)
curr_pt.neighbours.append(temp_pt.id)
elif temp_pt.distance <= self.radius:
curr_pt.neighbours.append(node_dict[round(temp_pt.x_0,5), round(temp_pt.y_0,5)])
# print(node_dict)
def plot_neighbours(self, pt):
x = []
y = []
x.append(pt.x_0)
y.append(pt.y_0)
for i in (pt.neighbours):
x.append(self.nodes[i].x_0)
y.append(self.nodes[i].y_0)
plt.scatter(x,y)
plt.axis('scaled')
def boundary_node_ids(self):
boundary_nodes = []
for j in range(len(self.nodes)):
if(len(self.nodes[j].neighbours) < 6):
boundary_nodes.append(j)
return boundary_nodes
def add_rim(self, boundary_node_ids, unit):
c = self.center
rim_ids = []
N = len(self.nodes)
for i in range(len(boundary_node_ids)):
d = self.nodes[boundary_node_ids[i]].distance
xp = self.nodes[boundary_node_ids[i]].x_0
yp = self.nodes[boundary_node_ids[i]].y_0
xnew = xp + xp*unit/d
ynew = yp + yp*unit/d
new_point = Pt(xnew, ynew)
new_point.id = N + i
rim_ids.append(N+i)
self.nodes.append(new_point)
self.nodes[boundary_node_ids[i]].neighbours.append(new_point.id)
self.nodes[N+i].neighbours.append(boundary_node_ids[i])
return rim_ids
def find_nearest_point(mesh, pt):
distances_from_center = np.zeros(len(mesh.nodes))
for i in xrange(len(mesh.nodes)):
distances_from_center[i] = mesh.nodes[i].distance
target_distance = pt.distance
closest_point_id = np.argmin(np.abs(distances_from_center-target_distance))
return closest_point_id
def init_impulse(mesh, impulse, Vj, poi, roi):
data = []
for i in range(len(Vj)):
r = ((mesh.nodes[i].x_0 - mesh.nodes[poi].x_0)**2 + (mesh.nodes[i].y_0 - mesh.nodes[poi].y_0)**2)**0.5
Vj[i] = max(0, impulse*(1. - (r/roi)))
if i in Nr:
Vj[i] = 0.
for k in mesh.nodes[i].neighbours:
data.append(np.asscalar(Vj[i])/2.)
return Vj, data
r = 0.1016 #Radius of drum head
# rho = 2500 #Density of drum head
thickness = 0.001 #Thickness of membrane
# tension = 1500 #Tension in membrane in N
param = 0.9
c = (param/thickness)**(0.5) #Speed of wave in string
duration = 0.25
fs = 4000
delta = c/fs
center = Pt(0,0)
point_of_impact = Pt(r/2., 0)
center.id = 0
mesh = Circle(r,center)
mesh.construct_mesh(delta)
N = len(mesh.nodes)
Nb = []
for j in range(N):
if len(mesh.nodes[j].neighbours) < 6:
Nb.append(j)
Nr = mesh.add_rim(Nb, delta)
N = len(mesh.nodes)
print(N)
row_ind = []
col_ind = []
for j in range(N):
for k in mesh.nodes[j].neighbours:
row_ind.append(j)
col_ind.append(k)
data = np.ones(len(col_ind))
adj_mat_sparse = sp.csr_matrix((data, (row_ind, col_ind)), shape = (N,N))
Vjk_plus = sp.csr_matrix([N, N])
Vj = np.zeros([N,1])
Uj = np.zeros([int(duration*fs), N])
Vj_mat = sp.csc_matrix([N,N])
closest_point_id = find_nearest_point(mesh, point_of_impact)
Vj, Vjk_data = init_impulse(mesh, -10.0, Vj, closest_point_id, r/10.)
Vjk_minus_sparse = sp.csr_matrix((Vjk_data, (row_ind, col_ind)), shape = (N,N))
constant = (1./3)*np.ones([N,1])
Vjk_plus = Vjk_minus_sparse.transpose()
np.put(Vj, Nr, 0.0)
Uj[1] = Uj[0] + np.transpose(Vj)/fs
Vj_mat = adj_mat_sparse.multiply(Vj)
Vjk_minus_sparse = Vj_mat - Vjk_plus
end_gain = np.ones([N,1])
end_gain[Nr] = 1.0
for t in range(2,int(duration*fs)):
Vjk_plus = Vjk_minus_sparse.transpose()
Vj = Vjk_plus.dot(constant)
np.put(Vj, Nr, 0.0)
Uj[t] = Uj[t-1] + np.transpose(Vj)/fs
Vj_mat = adj_mat_sparse.multiply(Vj)
Vjk_minus_sparse = Vj_mat-Vjk_plus.multiply(end_gain)
I have an numpy array I which stores N images of size P (number of pixels). Every image is of size P = q*q.
N = 1000 # number of images
q = 10 # length and width of image
P = q*q # pixels of image
I = np.ones((N,P)) # array with N images of size P
Now I want to delete patches of size ps around a selected index IDX (set all values to zero).
ps = 2 # patch size (ps x ps)
IDX = np.random.randint(0,P,(N,1))
My approach was to reshape every single image using reshape(q,q) and delete the pixels around IDX. Here I have the problem, that I do not know how to compute the position inside the image given IDX. Additionally I have to check if the index is not outside the image.
How to tackle this problem and is there any way to vectorize this procedure?
EDIT:
With the help of #Brenlla I did the following to remove patches. The problem with my approach is, that it needs three for-loops, and I have to reshape every image twice. Is there any way to increase the performance? This part slows down my code significantly.
import numpy as np
import matplotlib.pyplot as plt
def myplot(I):
imgs = 10
for i in range(imgs**2):
plt.subplot(imgs,imgs,(i+1))
plt.imshow(I[i].reshape(q,q), interpolation="none")
plt.axis("off")
plt.show()
N = 10000
q = 28
P = q*q
I = np.random.rand(N,P)
ps = 3
IDX = np.random.randint(0,P,(N,1))
for i in range(N):
img = I[i].reshape(q,q)
y0, x0 = np.unravel_index(IDX[i,0],(q,q))
for x in range(ps):
for y in range(ps):
if (x0+x < q) and (y0+y < q):
img[x0+x,y0+y] = 2.0
I[i] = img.reshape(1,q*q)
myplot(I)
Yes, that can be done, but it involves heavy use of np.broadcasting.
Generate data plus a hard copy of I:
import time
N = 10000
q = 28
P = q*q
ps = 3
I = np.random.rand(N,P)
IDX = np.random.randint(0,P,(N,1))
I_copy = I.copy()
And now run the loop solution. I switched x0 and y0:
t0=time.clock()
for i in range(N):
img = I[i].reshape(q,q)
x0, y0 = np.unravel_index(IDX[i,0],(q,q))
for x in range(ps):
for y in range(ps):
if (x0+x < q) and (y0+y < q):
img[x0+x,y0+y] = 2.0
I[i] = img.reshape(1,q*q)
print('With loop: {:.2f} ms'.format(time.clock()*1e3-t0*1e3))
Approx. 276 ms on my machine. Now the broadcasting:
t0 = time.clock()
x_shift, y_shift = np.meshgrid(range(ps), range(ps))
x, y = np.unravel_index(IDX, (q,q))
#roi for region of interest
roix = x[:,:,None]+x_shift;
roiy = y[:,:,None]+y_shift;
roix[roix>q-1] = q-1; roiy[roiy>q-1] = q-1;
I_copy.reshape(N,q,q)[np.arange(N)[:, None, None], roix, roiy] = 2.0
print('No loop: {:.2f} ms'.format(time.clock()*1e3-t0*1e3))
print(np.array_equal(I, I_copy))
Roughly 80x faster
Main Problem: How can the scipy.signal.cwt() function be inversed.
I have seen where Matlab has an inverse continuous wavelet transform function which will return the original form of the data by inputting the wavelet transform, although you can filter out the slices you don't want.
MATALAB inverse cwt funciton
Since scipy doesn't appear to have the same function, I have been trying to figure out how to get the data back in the same form, while removing the noise and background.
How do I do this?
I tried squaring it to remove negative values, but this gives me values way to large and not quite right.
Here is what I have been trying:
# Compute the wavelet transform
widths = range(1,11)
cwtmatr = signal.cwt(xy['y'], signal.ricker, widths)
# Maybe we multiple by the original data? and square?
WT_to_original_data = (xy['y'] * cwtmatr)**2
And here is a fully compilable short script to show you the type of data I am trying to get and what I have etc.:
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
# Make some random data with peaks and noise
def make_peaks(x):
bkg_peaks = np.array(np.zeros(len(x)))
desired_peaks = np.array(np.zeros(len(x)))
# Make peaks which contain the data desired
# (Mid range/frequency peaks)
for i in range(0,10):
center = x[-1] * np.random.random() - x[0]
amp = 60 * np.random.random() + 10
width = 10 * np.random.random() + 5
desired_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
# Also make background peaks (not desired)
for i in range(0,3):
center = x[-1] * np.random.random() - x[0]
amp = 40 * np.random.random() + 10
width = 100 * np.random.random() + 100
bkg_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
return bkg_peaks, desired_peaks
x = np.array(range(0, 1000))
bkg_peaks, desired_peaks = make_peaks(x)
y_noise = np.random.normal(loc=30, scale=10, size=len(x))
y = bkg_peaks + desired_peaks + y_noise
xy = np.array( zip(x,y), dtype=[('x',float), ('y',float)])
# Compute the wavelet transform
# I can't figure out what the width is or does?
widths = range(1,11)
# Ricker is 2nd derivative of Gaussian
# (*close* to what *most* of the features are in my data)
# (They're actually Lorentzians and Breit-Wigner-Fano lines)
cwtmatr = signal.cwt(xy['y'], signal.ricker, widths)
# Maybe we multiple by the original data? and square?
WT = (xy['y'] * cwtmatr)**2
# plot the data and results
fig = plt.figure()
ax_raw_data = fig.add_subplot(4,3,1)
ax = {}
for i in range(0, 11):
ax[i] = fig.add_subplot(4,3, i+2)
ax_desired_transformed_data = fig.add_subplot(4,3,12)
ax_raw_data.plot(xy['x'], xy['y'], 'g-')
for i in range(0,10):
ax[i].plot(xy['x'], WT[i])
ax_desired_transformed_data.plot(xy['x'], desired_peaks, 'k-')
fig.tight_layout()
plt.show()
This script will output this image:
Where the first plot is the raw data, the middle plots are the wavelet transforms and the last plot is what I want to get out as the processed (background and noise removed) data.
Does anyone have any suggestions? Thank you so much for the help.
I ended up finding a package which provides an inverse wavelet transform function called mlpy. The function is mlpy.wavelet.uwt. This is the compilable script I ended up with which may interest people if they are trying to do noise or background removal:
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
import mlpy.wavelet as wave
# Make some random data with peaks and noise
############################################################
def gen_data():
def make_peaks(x):
bkg_peaks = np.array(np.zeros(len(x)))
desired_peaks = np.array(np.zeros(len(x)))
# Make peaks which contain the data desired
# (Mid range/frequency peaks)
for i in range(0,10):
center = x[-1] * np.random.random() - x[0]
amp = 100 * np.random.random() + 10
width = 10 * np.random.random() + 5
desired_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
# Also make background peaks (not desired)
for i in range(0,3):
center = x[-1] * np.random.random() - x[0]
amp = 80 * np.random.random() + 10
width = 100 * np.random.random() + 100
bkg_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
return bkg_peaks, desired_peaks
# make x axis
x = np.array(range(0, 1000))
bkg_peaks, desired_peaks = make_peaks(x)
avg_noise_level = 30
std_dev_noise = 10
size = len(x)
scattering_noise_amp = 100
scat_center = 100
scat_width = 15
scat_std_dev_noise = 100
y_scattering_noise = np.random.normal(scattering_noise_amp, scat_std_dev_noise, size) * np.e**(-(x-scat_center)**2/(2*scat_width**2))
y_noise = np.random.normal(avg_noise_level, std_dev_noise, size) + y_scattering_noise
y = bkg_peaks + desired_peaks + y_noise
xy = np.array( zip(x,y), dtype=[('x',float), ('y',float)])
return xy
# Random data Generated
#############################################################
xy = gen_data()
# Make 2**n amount of data
new_y, bool_y = wave.pad(xy['y'])
orig_mask = np.where(bool_y==True)
# wavelet transform parameters
levels = 8
wf = 'h'
k = 2
# Remove Noise first
# Wave transform
wt = wave.uwt(new_y, wf, k, levels)
# Matrix of the difference between each wavelet level and the original data
diff_array = np.array([(wave.iuwt(wt[i:i+1], wf, k)-new_y) for i in range(len(wt))])
# Index of the level which is most similar to original data (to obtain smoothed data)
indx = np.argmin(np.sum(diff_array**2, axis=1))
# Use the wavelet levels around this region
noise_wt = wt[indx:indx+1]
# smoothed data in 2^n length
new_y = wave.iuwt(noise_wt, wf, k)
# Background Removal
error = 10000
errdiff = 100
i = -1
iter_y_dict = {0:np.copy(new_y)}
bkg_approx_dict = {0:np.array([])}
while abs(errdiff)>=1*10**-24:
i += 1
# Wave transform
wt = wave.uwt(iter_y_dict[i], wf, k, levels)
# Assume last slice is lowest frequency (background approximation)
bkg_wt = wt[-3:-1]
bkg_approx_dict[i] = wave.iuwt(bkg_wt, wf, k)
# Get the error
errdiff = error - sum(iter_y_dict[i] - bkg_approx_dict[i])**2
error = sum(iter_y_dict[i] - bkg_approx_dict[i])**2
# Make every peak higher than bkg_wt
diff = (new_y - bkg_approx_dict[i])
peak_idxs_to_remove = np.where(diff>0.)[0]
iter_y_dict[i+1] = np.copy(new_y)
iter_y_dict[i+1][peak_idxs_to_remove] = np.copy(bkg_approx_dict[i])[peak_idxs_to_remove]
# new data without noise and background
new_y = new_y[orig_mask]
bkg_approx = bkg_approx_dict[len(bkg_approx_dict.keys())-1][orig_mask]
new_data = diff[orig_mask]
##############################################################
# plot the data and results
fig = plt.figure()
ax_raw_data = fig.add_subplot(121)
ax_WT = fig.add_subplot(122)
ax_raw_data.plot(xy['x'], xy['y'], 'g')
for bkg in bkg_approx_dict.values():
ax_raw_data.plot(xy['x'], bkg[orig_mask], 'k')
ax_WT.plot(xy['x'], new_data, 'y')
fig.tight_layout()
plt.show()
And here is the output I am getting now:
As you can see, there is still a problem with the background removal (it shifts to the right after each iteration), but it is a different question which I will address here.