Main Problem: How can the scipy.signal.cwt() function be inversed.
I have seen where Matlab has an inverse continuous wavelet transform function which will return the original form of the data by inputting the wavelet transform, although you can filter out the slices you don't want.
MATALAB inverse cwt funciton
Since scipy doesn't appear to have the same function, I have been trying to figure out how to get the data back in the same form, while removing the noise and background.
How do I do this?
I tried squaring it to remove negative values, but this gives me values way to large and not quite right.
Here is what I have been trying:
# Compute the wavelet transform
widths = range(1,11)
cwtmatr = signal.cwt(xy['y'], signal.ricker, widths)
# Maybe we multiple by the original data? and square?
WT_to_original_data = (xy['y'] * cwtmatr)**2
And here is a fully compilable short script to show you the type of data I am trying to get and what I have etc.:
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
# Make some random data with peaks and noise
def make_peaks(x):
bkg_peaks = np.array(np.zeros(len(x)))
desired_peaks = np.array(np.zeros(len(x)))
# Make peaks which contain the data desired
# (Mid range/frequency peaks)
for i in range(0,10):
center = x[-1] * np.random.random() - x[0]
amp = 60 * np.random.random() + 10
width = 10 * np.random.random() + 5
desired_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
# Also make background peaks (not desired)
for i in range(0,3):
center = x[-1] * np.random.random() - x[0]
amp = 40 * np.random.random() + 10
width = 100 * np.random.random() + 100
bkg_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
return bkg_peaks, desired_peaks
x = np.array(range(0, 1000))
bkg_peaks, desired_peaks = make_peaks(x)
y_noise = np.random.normal(loc=30, scale=10, size=len(x))
y = bkg_peaks + desired_peaks + y_noise
xy = np.array( zip(x,y), dtype=[('x',float), ('y',float)])
# Compute the wavelet transform
# I can't figure out what the width is or does?
widths = range(1,11)
# Ricker is 2nd derivative of Gaussian
# (*close* to what *most* of the features are in my data)
# (They're actually Lorentzians and Breit-Wigner-Fano lines)
cwtmatr = signal.cwt(xy['y'], signal.ricker, widths)
# Maybe we multiple by the original data? and square?
WT = (xy['y'] * cwtmatr)**2
# plot the data and results
fig = plt.figure()
ax_raw_data = fig.add_subplot(4,3,1)
ax = {}
for i in range(0, 11):
ax[i] = fig.add_subplot(4,3, i+2)
ax_desired_transformed_data = fig.add_subplot(4,3,12)
ax_raw_data.plot(xy['x'], xy['y'], 'g-')
for i in range(0,10):
ax[i].plot(xy['x'], WT[i])
ax_desired_transformed_data.plot(xy['x'], desired_peaks, 'k-')
fig.tight_layout()
plt.show()
This script will output this image:
Where the first plot is the raw data, the middle plots are the wavelet transforms and the last plot is what I want to get out as the processed (background and noise removed) data.
Does anyone have any suggestions? Thank you so much for the help.
I ended up finding a package which provides an inverse wavelet transform function called mlpy. The function is mlpy.wavelet.uwt. This is the compilable script I ended up with which may interest people if they are trying to do noise or background removal:
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
import mlpy.wavelet as wave
# Make some random data with peaks and noise
############################################################
def gen_data():
def make_peaks(x):
bkg_peaks = np.array(np.zeros(len(x)))
desired_peaks = np.array(np.zeros(len(x)))
# Make peaks which contain the data desired
# (Mid range/frequency peaks)
for i in range(0,10):
center = x[-1] * np.random.random() - x[0]
amp = 100 * np.random.random() + 10
width = 10 * np.random.random() + 5
desired_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
# Also make background peaks (not desired)
for i in range(0,3):
center = x[-1] * np.random.random() - x[0]
amp = 80 * np.random.random() + 10
width = 100 * np.random.random() + 100
bkg_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
return bkg_peaks, desired_peaks
# make x axis
x = np.array(range(0, 1000))
bkg_peaks, desired_peaks = make_peaks(x)
avg_noise_level = 30
std_dev_noise = 10
size = len(x)
scattering_noise_amp = 100
scat_center = 100
scat_width = 15
scat_std_dev_noise = 100
y_scattering_noise = np.random.normal(scattering_noise_amp, scat_std_dev_noise, size) * np.e**(-(x-scat_center)**2/(2*scat_width**2))
y_noise = np.random.normal(avg_noise_level, std_dev_noise, size) + y_scattering_noise
y = bkg_peaks + desired_peaks + y_noise
xy = np.array( zip(x,y), dtype=[('x',float), ('y',float)])
return xy
# Random data Generated
#############################################################
xy = gen_data()
# Make 2**n amount of data
new_y, bool_y = wave.pad(xy['y'])
orig_mask = np.where(bool_y==True)
# wavelet transform parameters
levels = 8
wf = 'h'
k = 2
# Remove Noise first
# Wave transform
wt = wave.uwt(new_y, wf, k, levels)
# Matrix of the difference between each wavelet level and the original data
diff_array = np.array([(wave.iuwt(wt[i:i+1], wf, k)-new_y) for i in range(len(wt))])
# Index of the level which is most similar to original data (to obtain smoothed data)
indx = np.argmin(np.sum(diff_array**2, axis=1))
# Use the wavelet levels around this region
noise_wt = wt[indx:indx+1]
# smoothed data in 2^n length
new_y = wave.iuwt(noise_wt, wf, k)
# Background Removal
error = 10000
errdiff = 100
i = -1
iter_y_dict = {0:np.copy(new_y)}
bkg_approx_dict = {0:np.array([])}
while abs(errdiff)>=1*10**-24:
i += 1
# Wave transform
wt = wave.uwt(iter_y_dict[i], wf, k, levels)
# Assume last slice is lowest frequency (background approximation)
bkg_wt = wt[-3:-1]
bkg_approx_dict[i] = wave.iuwt(bkg_wt, wf, k)
# Get the error
errdiff = error - sum(iter_y_dict[i] - bkg_approx_dict[i])**2
error = sum(iter_y_dict[i] - bkg_approx_dict[i])**2
# Make every peak higher than bkg_wt
diff = (new_y - bkg_approx_dict[i])
peak_idxs_to_remove = np.where(diff>0.)[0]
iter_y_dict[i+1] = np.copy(new_y)
iter_y_dict[i+1][peak_idxs_to_remove] = np.copy(bkg_approx_dict[i])[peak_idxs_to_remove]
# new data without noise and background
new_y = new_y[orig_mask]
bkg_approx = bkg_approx_dict[len(bkg_approx_dict.keys())-1][orig_mask]
new_data = diff[orig_mask]
##############################################################
# plot the data and results
fig = plt.figure()
ax_raw_data = fig.add_subplot(121)
ax_WT = fig.add_subplot(122)
ax_raw_data.plot(xy['x'], xy['y'], 'g')
for bkg in bkg_approx_dict.values():
ax_raw_data.plot(xy['x'], bkg[orig_mask], 'k')
ax_WT.plot(xy['x'], new_data, 'y')
fig.tight_layout()
plt.show()
And here is the output I am getting now:
As you can see, there is still a problem with the background removal (it shifts to the right after each iteration), but it is a different question which I will address here.
Related
Overall, I want to calculate a fourier transform of a given data set and filter out some of the frequencies with the biggest absolute values. So:
1) Given a data array D with accompanying times t, 2) find the k biggest fourier coefficients and 3) remove those coefficients from the data, in order to filter out certain signals from the original data.
Something goes wrong in the end when plotting the filtered data set over the given times. I'm not exactly sure, where the error is. The final 'filtered data' plot doesn't look even slightly smoothened and somehow changes its position compared with the original data. Is my code completely bad?
Part 1):
n = 1000
limit_low = 0
limit_high = 0.48
D = np.random.normal(0, 0.5, n) + np.abs(np.random.normal(0, 2, n) * np.sin(np.linspace(0, 3*np.pi, n))) + np.sin(np.linspace(0, 5*np.pi, n))**2 + np.sin(np.linspace(1, 6*np.pi, n))**2
scaling = (limit_high - limit_low) / (max(D) - min(D))
D = D * scaling
D = D + (limit_low - min(D)) # given data
t = linspace(0,D.size-1,D.size) # times
Part 2):
from numpy import linspace
import numpy as np
from scipy import fft, ifft
D_f = fft.fft(D) # fft of D-dataset
#---extract the k biggest coefficients out of D_f ---
k = 15
I, bigvals = [], []
for i in np.argsort(-D_f):
if D_f[i] not in bigvals:
bigvals.append(D_f[i])
I.append(i)
if len(I) == k:
break
bigcofs = np.zeros(len(D_f))
bigcofs[I] = D_f[I] # array with only zeros in in except for the k maximal coefficients
Part 3):
D_filter = fft.ifft(bigcofs)
D_new = D - D_filter
p1=plt.plot(t,D,'r')
p2=plt.plot(t,D_new,'b');
plt.legend((p1[0], p2[0]), ('original data', 'filtered data'))
I appreciate your help, thanks in advance.
There were two issues I noticed:
you likely want the components with largest absolute value, so np.argsort(-np.abs(D_f)) instead of np.argsort(-D_f).
More subtly: bigcofs = np.zeros(len(D_f)) is of type float64 and was discarding the imaginary part at the line bigcofs[I] = D_f[I]. You can fix this with bigcofs = np.zeros(len(D_f), dtype=complex)
I've improved your code slightly below to get desired results:
import numpy as np
from scipy import fft, ifft
import matplotlib.pyplot as plt
n = 1000
limit_low = 0
limit_high = 0.48
N_THRESH = 10
D = 0.5*np.random.normal(0, 0.5, n) + 0.5*np.abs(np.random.normal(0, 2, n) * np.sin(np.linspace(0, 3*np.pi, n))) + np.sin(np.linspace(0, 5*np.pi, n))**2 + np.sin(np.linspace(1, 6*np.pi, n))**2
scaling = (limit_high - limit_low) / (max(D) - min(D))
D = D * scaling
D = D + (limit_low - min(D)) # given data
t = np.linspace(0,D.size-1,D.size) # times
# transformed data
D_fft = fft.fft(D)
# Create boolean mask for N largest indices
idx_sorted = np.argsort(-np.abs(D_fft))
idx = idx_sorted[0:N_THRESH]
mask = np.zeros(D_fft.shape).astype(bool)
mask[idx] = True
# Split fft above, below N_THRESH points:
D_below = D_fft.copy()
D_below[mask] = 0
D_above = D_fft.copy()
D_above[~mask] = 0
#inverse separated functions
D_above = fft.ifft(D_above)
D_below = fft.ifft(D_below)
# plot
plt.ion()
f, (ax1, ax2, ax3) = plt.subplots(3,1)
l1, = ax1.plot(t, D, c="r", label="original")
l2, = ax2.plot(t, D_above, c="g", label="top {} coeff. signal".format(N_THRESH))
l3, = ax3.plot(t, D_below, c="b", label="remaining signal")
f.legend(handles=[l1,l2,l3])
plt.show()
As you can see matplotlib link few wrong points using plt.plot.
data file
How can I improve my curve ?
Can I give him the minimum interval beween two points ?
Will sorting my points help me get a good curve ?
I can sort my x-axis points but y-axis seems more complicated.
Thank you.
import numpy as np
from matplotlib import pyplot as plt
File = "crown" + str(0) + str(0.01)
data = np.loadtxt(File)
data = data.reshape((len(data),3))
print(data)
R0 = 10.
theta = np.arctan2(data[:,1],data[:,0]) #calcul of theta
print(theta)
Radius = np.sqrt(data[:,1]**2 + data[:,0]**2)
Mradius = Radius - np.mean(Radius)
Mradius = Mradius/R0
n = len(theta) # length
Radiushat = np.fft.fft(Mradius,n) #fft of the Radius
PSD = Radiushat * np.conj(Radiushat)/n #Power spectral density
delta = 0.0001
k = (1/(delta*n)) * np.arange(n) #wavenumber
indice = np.where(PSD==np.max(PSD)) #find position indice max PSD array
#print(k[indice][0]) #k_{max}
L = np.arange(1,np.floor(n/2), dtype='int')
# filtered the signal
"""
indices = PSD > np.max(PSD)/3.
PDSclean = PSD * indices
Radiushatclean = indices * Radiushat
ffilt = np.fft.ifft(Radiushatclean)"""
fig,axs = plt.subplots(2,1)
plt.sca(axs[0])
plt.plot(theta,Radius,label="Crown")
ticks = [-np.pi, -np.pi/2., 0, np.pi/2., np.pi]
ticks_labels = ['-π', '-π/2', '0', 'π/2', 'π']
plt.xticks(ticks, ticks_labels)
"""plt.plot(theta,ffilt, label="Filtered")"""
plt.xlabel("theta")
plt.ylabel("R/R(t=0)")
plt.legend()
plt.title('k_{max}=%.2f' % (k[indice][0]))
plt.sca(axs[1])
plt.plot(k[L],PSD[L]/np.max(PSD))
plt.xlim(0,k[L[-1]])
plt.xlabel("k")
plt.ylabel("PSD")
fig.tight_layout()
plt.show()
I want to read data from the file and plot the fft of it.
I have used the scipy's fft function to plot the fft of the synthetically generated signal data however while I am trying to save the generated signal data and plot the fft I am getting the peaks as I was getting but the value of the amplitude and frequency is off as compared to the Original plotting.I found problem with the second fft function. Can anyone help me with the same.
import numpy as np
import pandas as pd
import scipy
from scipy.fftpack import fft,fftfreq
import matplotlib.pyplot as plt
import csv
#Function to plot the raw data
def raw_plot(signals,time):
'''Plot the raw values in the time domain '''
plt.figure(0)
plt.plot(time,signals)
plt.xlabel("Time(s)")
plt.ylabel("Amplitude(g)")
plt.show()
#Function to plot the fft of the data
def fft_signal(signal, sampling_frequency, timestart, timeend):
T = 1 / sampling_frequency
N = (timeend - timestart) * sampling_frequency
x = np.linspace(0.0, 1.0 / (2.0 * T), int(N / 2))
# print("Values of X are :",x)
# print(len(x))
yr2 = fft(signal)
y2 = 2 / N * np.abs(yr2[0:int(N / 2)])
# amp_spec = abs(fft(signal))/N
# freq = np.linspace(0,2,num=N)
plt.figure(2)
plt.plot(x, y2)
plt.xlabel("Frequency")
plt.ylabel("Amplitude")
plt.show()
def fftsignal2(data, sampling_freq = 100, timestep = 1 ):
N = len(data)
FS = 1000
T= 1/FS
timestep = timestep
yr2 = fft(data)
y2 = 2 / N * np.abs(yr2[0:int(N / 2)])
# f= np.arange(0,N)*FS/N
# f= np.linspace(0,1.0/T,N)
frq = np.fft.fftfreq(N,d=timestep)
# f= np.linspace(0.0,int(N/2)*sampling_freq)
frq= frq[0:len(frq)//2]
print("Frequencies Length :",len(frq))
print("DATA LENGTH :",len(y2))
plt.figure(3)
plt.plot(frq,y2)
plt.xlabel("Frequency")
plt.ylabel("Amplitude")
plt.show()
Mag1_Hz = 70
Mag1_AMP = 10
Mag4_AMP = 60
Mag4_Hz= 50
Mag2_Hz = 20
Mag2_AMP = 8
time_start = 0
time_end = 4
Sampling_frequency = 100
No_of_samples = (time_end-time_start) * Sampling_frequency
time = np.linspace(time_start,time_end,No_of_samples)
#Generate the signals values
data_signal = Mag1_AMP * np.sin(2* np.pi * Mag1_Hz * time ) + Mag2_AMP * np.cos(2* np.pi * Mag2_Hz * time)
raw_plot(signals=data_signal[:800],time=time[:800])
fft_signal(data_signal,sampling_frequency=Sampling_frequency,timestart=time_start,timeend=time_end)
# for (time, datavalues) in zip(time,data_signal):
# with open("data_signals_27.csv","a",newline="") as file:
# writefile = csv.writer(file)
# print("Time%f,Data %f"%(time,datavalues))
# print(time,datavalues)
# writefile.writerow([time,datavalues])
fftsignal2(data= data_signal, sampling_freq= Sampling_frequency,timestep=2)
The peaks are coming right in the fft_signal function no 1 while I am trying to construct the fftsignal2 to plot the fft of the data so the peaks which I am getting is correct but the magnitude and frequency is off.I don't understand why ? Can anyone help me with that.
I am plotting 2D images of energy and density distribution. There is always a slight misalignment in the mapping where the very first "columns" seem to go to the last columns during the plot.
I have attach link to for data test file.
Data files
Here is the plot :
Is there anything to prevent this ?
The partial code in plotting is as follows:
import numpy as np
import matplotlib.pyplot as plt
import pylab as pyl
import scipy.stats as ss
import matplotlib.ticker as ticker
import matplotlib.transforms as tr
#%matplotlib inline
pi = 3.1415
n = 5e24 # density plasma
m = 9.109e-31
eps = 8.85e-12
e = 1.6021725e-19
c = 3e8
wp=np.sqrt(n*e*e/(m*eps))
kp = np.sqrt(n*e*e/(m*eps))/c #plasma wavenumber
case=400
## decide on the target range of analysis for multiples
start= 20500
end = 21500
gap = 1000
## Multiples plots
def target_range (start, end, gap):
while start<= end:
yield start
start += gap
for step in target_range(start, end, gap):
fdata =np.genfromtxt('./beam_{}'.format(step)).reshape(-1,6)
## dimension, dt, and superpaticle
xBoxsize = 50e-6 #window size
yBoxsize = 80e-6 #window size
xbind = 10
ybind = 1
dx = 4e-8 #cell size
dy = 4e-7 #cell size
dz = 1e-6 #assume to be same as dy
dt = 1.3209965456e-16
sptcl = 1.6e10
xsub = 0e-6
xmax = dt*step*c
xmin = xmax - xBoxsize
ysub = 1e-7
ymin = ysub #to make our view window
ymax = yBoxsize - ysub
xbins = int((xmax - xmin)/(dx*xbind))
ybins = int((ymax - ymin)/(dy*ybind))
#zbins = int((zmax - zmin)/dz) #option for 3D
# To make or define "data_arr" as a matrix with 2D array size 'xbins x ybins'
data_arr = np.zeros((2,xbins,ybins), dtype=np.float)
for line in fdata:
x = int((line[0]-xmin)/(dx*xbind))
y = int((line[1]-ymin)/(dy*ybind))
#z = int((line[2]-zmin)/dz)
if x >= xbins: x = xbins - 1
if y >= ybins: y = ybins - 1
#if z >= zbins: z = zbins - 1
data_arr[0, x, y] = data_arr[0,x, y] + 1 #cummulative adding up the number of particles
energy_total = np.sqrt(1+ line[2]*line[2]/(c*c)+line[3]*line[3]/(c*c))/0.511
data_arr[1, x, y] += energy_total
#array 1 tells us the energy while array 0 tells us the particles
## make average energy , total energy/particle number
np.errstate(divide='ignore',invalid='ignore')
en_arr = np.true_divide(data_arr[1],data_arr[0]) # total energy/number of particles
en_arr[en_arr == np.inf] = 0
en_arr = np.nan_to_num(en_arr)
en_arr = en_arr.T
## This part is real density of the distribution
data_arr[0]= data_arr[0] * sptcl/dx/dy #in m-3
d = data_arr[0].T
## Plot and save density and energy distribution figures
den_dist=plt.figure(1)
plt.imshow(d,origin='lower', aspect = 'auto',cmap =plt.get_cmap('gnuplot'),extent =(xmin/1e-3,xmax/1e-3,ymin/1e-6,ymax/1e-6))
plt.title('Density_dist [m-3]_{}'.format(step))
plt.xlabel('distance[mm]')
plt.ylabel('y [um]')
plt.colorbar()
plt.show()
den_dist.savefig("./Qen_distribution_{}.png".format(step),format ='png')
#note:cmap: rainbow, hot,jet,gnuplot,plasma
energy_dist=plt.figure(2)
plt.imshow(en_arr, origin ='lower',aspect = 'auto', cmap =plt.get_cmap('jet'),extent =(xmin/1e-3,xmax/1e-3,ymin/1e-6,ymax/1e-6))
plt.title ('Energy_dist [MeV]_{} '.format(step))
plt.xlabel('distance[mm]')
plt.ylabel('y [um]')
plt.colorbar()
plt.show()
energy_dist.savefig("./Qenergy_distribution_{}.png".format(step),format ='png')
I like to prototype algorithms in Matlab, but I have the requirement of putting them on a server that also runs quite a bit of Python code. Hence I quickly converted the code to Python and compared the two. The Matlab implementation runs ~1000 times faster (from timing function calls - no profiling). Anyone know off hand why the performance of Python is so slow?
Matlab
% init random data
w = 800;
h = 1200;
hmap = zeros(w,h);
npts = 250;
for i=1:npts
hmap(randi(w),randi(h)) = hmap(randi(w),randi(h))+1;
end
% Params
disksize = 251;
nBreaks = 25;
saturation = .9;
floorthresh =.05;
fh = fspecial('gaussian', disksize, disksize/7);
hmap = conv2(hmap, fh, 'same');
% Scaling, paritioning etc
hmap = hmap/(max(max(hmap)));
hmap(hmap<floorthresh) = 0;
hmap = round(nBreaks * hmap)/nBreaks;
hmap = hmap * (1/saturation);
% Show the image
imshow(hmap, [0,1])
colormap('jet')
Python
import numpy as np
from scipy.signal import convolve2d as conv2
# Test data parameters
w = 800
h = 1200
npts = 250
# generate data
xvals = np.random.randint(w, size=npts)
yvals = np.random.randint(h, size=npts)
# Heatmap parameters
gaussianSize = 250
nbreaks = 25
# Preliminary function definitions
def populateMat(w, h, xvals, yvals):
container = np.zeros((w,h))
for idx in range(0,xvals.size):
x = xvals[idx]
y = yvals[idx]
container[x,y] += 1
return container
def makeGaussian(size, fwhm):
x = np.arange(0, size, 1, float)
y = x[:,np.newaxis]
x0 = y0 = size // 2
return np.exp(-4*np.log(2) * ((x-x0)**2 + (y-y0)**2) / fwhm**2)
# Create the data matrix
dmat = populateMat(w,h,xvals,yvals)
h = makeGaussian(gaussianSize, fwhm=gaussianSize/2)
# Convolve
dmat2 = conv2(dmat, h, mode='same')
# Scaling etc
dmat2 = dmat2 / dmat2.max()
dmat2 = np.round(nbreaks*dmat2)/nbreaks
# Show
imshow(dmat2)
Ok, problem solved for me thanks to suggestion from #Yves Daust's comments;
The filter scipy.ndimage.filters.gaussian_filter utilises the separability of the kernel and reduces the running time to within a single order of magnitude of the matlab implementation.
import numpy as np
from scipy.ndimage.filters import gaussian_filter as gaussian
# Test data parameters
w = 800
h = 1200
npts = 250
# generate data
xvals = np.random.randint(w, size=npts)
yvals = np.random.randint(h, size=npts)
# Heatmap parameters
gaussianSize = 250
nbreaks = 25
# Preliminary function definitions
def populateMat(w, h, xvals, yvals):
container = np.zeros((w,h))
for idx in range(0,xvals.size):
x = xvals[idx]
y = yvals[idx]
container[x,y] += 1
return container
# Create the data matrix
dmat = populateMat(w,h,xvals,yvals)
# Convolve
dmat2 = gaussian(dmat, gaussianSize/7)
# Scaling etc
dmat2 = dmat2 / dmat2.max()
dmat2 = np.round(nbreaks*dmat2)/nbreaks
# Show
imshow(dmat2)