Python fast Fourier transform for very noisy data - python

I have a file with velocity magnitude data and vorticity magnitude data from a fluid simulation.
I want to find out what is the frequency for these two data sets.
my code:
# -*- coding: utf-8 -*-
"""
Spyder Editor
This is a temporary script file.
"""
import re
import math
import matplotlib.pyplot as plt
import numpy as np
probeU1 = []
probeV1 = []
# this creates an array containig all the timesteps, cutting of the first 180, because the system has to stabelize.
number = [ round(x * 0.1, 1) for x in range(180, 301)]
# this function loops over the different time directories, and reads the velocity file.
for i in range(len(number)):
filenamepath = "/Refinement/Vorticity4/probes/" +str(number[i]) + "/U"
data= open(filenamepath,"r")
temparray = []
#removes all the formatting around the data
for line in data:
if line.startswith('#'):
continue
else:
line = re.sub('[()]', "", line)
values = line.split()
#print values[1], values[2]
xco = values[1::3]
yco = values[2::3]
#here it extracts all the velocity data from all the different probes
for i in range(len(xco)):
floatx = float(xco[i])
floaty = float(yco[i])
temp1 = math.pow(floatx,2)
temp2 = math.pow(floaty,2)
#print temp2, temp1
temp3 = temp1+temp2
temp4 = math.sqrt(temp3)
#takes the magnitude of the velocity
#print temp4
temparray.append(temp4)
probeU1.append(temparray)
#
#print probeU1[0]
#print len(probeU1[0])
#
# this function loops over the different time directories, and reads the vorticity file.
for i in range(len(number)):
filenamepath = "/Refinement/Vorticity4/probes/" +str(number[i]) + "/vorticity"
data= open(filenamepath,"r")
# print data.read()
temparray1 = []
for line in data:
if line.startswith('#'):
continue
else:
line = re.sub('[()]', "", line)
values = line.split()
zco = values[3::3]
#because the 2 dimensionallity the z-component of the vorticity is already the magnitude
for i in range(len(zco)):
abso = float(zco[i])
add = np.abs(abso)
temparray1.append(add)
probeV1.append(temparray1)
#Old code block to display the data and check that it made a wave pattern(which it did)
##Printing all probe data from 180-300 in one graph(unintelligible)
#for i in range(len(probeU1[1])):
# B=[]
# for l in probeU1:
# B.append(l[i])
## print 'B=', B
## print i
# plt.plot(number,B)
#
#
#plt.ylabel('magnitude of velocity')
#plt.show()
#
##Printing all probe data from 180-300 in one graph(unintelligible)
#for i in range(len(probeV1[1])):
# R=[]
# for l in probeV1:
# R.append(l[i])
## print 'R=', R
## print i
# plt.plot(number,R)
#
#
#plt.ylabel('magnitude of vorticity')
#plt.show()
#Here is where the magic happens, (i hope)
ans=[]
for i in range(len(probeU1[1])):
b=[]
#probeU1 is a nested list, because there are 117 different probes, wich all have the data from timestep 180-301
for l in probeU1:
b.append(l[i])
#the freqeuncy was not oscillating around 0, so moved it there by substracting the mean
B=b-np.mean(b)
#here the fft happens
u = np.fft.fft(B)
#This should calculate the frequencies?
freq = np.fft.fftfreq(len(B), d= (number[1] - number[0]))
# If im not mistakes this finds the peak frequency for 1 probe and passes it another list
val = np.argmax(np.abs(u))
ans.append(np.abs(freq[val]))
plt.plot(freq, np.abs(u))
#print np.mean(ans)
plt.xlabel('frequency?')
plt.savefig('velocitiy frequency')
plt.show()
# just duplicate to the one above it
ans1=[]
for i in range(len(probeV1[1])):
c=[]
for l in probeU1:
c.append(l[i])
C=c-np.mean(c)
y = np.fft.fft(C)
freq1 = np.fft.fftfreq(len(C), d= (number[1] - number[0]))
val = np.argmax(np.abs(y))
ans1.append(np.abs(freq1[val]))
plt.plot(freq1, np.abs(y))
#print np.mean(ans1)
plt.ylabel('frequency?')
plt.savefig('vorticity frequency')
plt.show()
data.close()
My data contains 117 probes each having their own 121 point of velocity magnitude data.
My aim is to find the dominate frequency for each probe and then collect all those and plot them in a histogram.
My question is about the part where it says this is where the magic happens. I believe the fft is already working correctly
y = np.fft.fft(C)
freq1 = np.fft.fftfreq(len(C), d= (number[1] - number[0]))
And if I'm not mistaken the freq1 list should contain all the frequencies for a given probe. I've checked this list visually and the amount of different frequencies is very high(20+) so the signal is probably very noisy.
# If im not mistakes this finds the peak frequency for 1 probe and passes it another list
val = np.argmax(np.abs(u))
ans.append(np.abs(freq1[val]))
That this part should in theory take the biggest signal from one probe and than put in the "ans" list. But I'm a bit confused as to how i can no correctly identify the right frequency. As there should i theory be one main frequency. How can I correctly estimate the "main" frequency from all this data from all the noise
For reference I'm modeling an Von Karmann vortex street and I'm looking for the frequency of vortex shedding. https://en.wikipedia.org/wiki/K%C3%A1rm%C3%A1n_vortex_street
Can anyone help me on how to solve this?

The line
freq1 = np.fft.fftfreq(len(C), d= (number[1] - number[0]))
Only generates an index going from
freq1 = [0, 1, ..., len(C)/2-1, -len(C)/2, ..., -1] / (d*len(C))
Which is useful to compute your frequencies array as
freq[i] = freq1[i]*alpha
Where alpha is your basic wavenumber computed as
alpha = 1/Ts
Being Ts your sampling period. I think that because freq1 is not scaled you array of frequencies is so high.
Note that if you are sampling your data using different time steps you will need to interpolate it at in a evenly space domain using numpy.interp (for example).
To estimate the main frequency just find the index where the fft-transformed variable is higher and relate that index to freq[i].

Related

How do I find the saturation point of a curve in python?

I have a graph of the number of FRB detections against the Signal to Noise Ratio.
At a certain point, the Signal to Noise ratio flattens out.
The input variable (the number of FRB detections) is defined by
N_vals = numpy.logspace(0, np.log10((10)**(11)), num = 1000)
and I have a series of arrays that correspond to outputs of the Signal to Noise Ratio (they have the same length).
So far, I have used numpy.gradient() on all the Signal-to-Noise (SNR) ratios to obtain the corresponding slope at every point.
I want to obtain the index at which the Signal-to-Noise Ratio dips below a certain threshold.
Using numpy functions designed to find the inflexion point won't work in my case as the gradient continues to increase - just very gradually.
Here is some code to illustrate my initial attempt:
import numpy as np
grad100 = np.gradient(NDM100)
grad300 = np.gradient(NDM300)
grad1000 = np.gradient(NDM1000)
#print(grad100)
grad2 = np.gradient(N2)
grad5 = np.gradient(N5)
grad10 = np.gradient(N10)
glist = [np.array(grad2), np.array(grad5), np.array(grad10), np.array(grad100), np.array(grad300), np.array(grad1000)]
indexlist = []
for g in glist:
for i in g:
satdex = np.where(i == 10**(-4))[0]
indexlist.append(satdex)
Doing this just gives me a list of empty arrays - for instance:
[array([], dtype=int64),..., array([], dtype=int64)]
Does anyone know a better way of doing this? I just want the indices corresponding to the points at which the gradient is 10**(-4) for each array. This is my 'saturation point'.
Please let me know if I need to provide more information and if so, what exactly. I'm not expecting anyone to run my code as there is a lot of it; rather, I'm after some general tips or some commentary on the structure of my code. I've attached the graph that corresponds to my data (the arrows show what I mean by the point at which the SNR flattens out).
I feel that this is a fairly simple programming problem and therefore doesn't warrant the detail that would be found in questions on error messages for example.
SNR curves with arrows indicating what I mean by 'saturation points'
Alright so I think I've got it. I'm attaching my code below. Obviously it's taken out of context here and won't run by itself so this is just so anyone that finds this question can see what kind of structure works. The general idea is that for a given set of curves, I find the x and y-values at which they begin to flatten out.
x = 499
N_vals2 = N_vals[500:]
grad100 = np.gradient(NDM100)
grad300 = np.gradient(NDM300)
grad1000 = np.gradient(NDM1000)
grad2 = np.gradient(N2)
grad5 = np.gradient(N5)
grad10 = np.gradient(N10)
preg_list = [grad100, grad300, grad1000, grad2, grad5, grad10]
g_list = []
for gl in preg_list:
g_list.append(gl[500:])
sneg_list = [NDM100, NDM300, NDM1000, N2, N5, N10]
sn_list = []
for sl in sneg_list:
sn_list.append(sl[500:])
t_list = []
gt_list = []
ic_list = []
for g in g_list:
threshold = 0.1*np.max(g)
thresh_array = np.full(len(g), fill_value = threshold)
t_list.append(threshold)
gt_list.append(thresh_array)
ic = np.isclose(g, thresh_array, rtol = 0.5)
ic_list.append(ic)
index_list = []
grad_list = []
for i in ic_list:
index = np.where(i == True)
index_list.append(index)
for j in g_list:
gval = j[index]
grad_list.append(gval)
saturation_indices = []
for gl in index_list:
first_index = gl[0][0]
saturation_indices.append(first_index)
#print(saturation_indices)
saturation_points = []
sn_list_firsts = [snf[0] for snf in sn_list]
for s in saturation_indices:
n = round(N_vals2[s], 0)
sn_tuple = (n, s)
saturation_points.append(sn_tuple)

Matplotlib - color-coding data plot lines?

I have a python program that reads tsv data and plots it using the matplotlib library.
I feel like my code works pretty well:
def main(compsPath: str, gibbsPath: str):
"""
Given the file paths for comps.tsv and
gibbs.tsv, this main function will
produce two separate plots - one for each file.
"""
# Read tsv data into np record arrays
# Slice off header text
with open(compsPath, 'r') as fcomps:
reader = csv.reader(fcomps, delimiter='\t')
compsHeader = next(reader)
compsData = np.array(list(reader)).astype(np.double)
with open(gibbsPath, 'r') as fgibbs:
reader = csv.reader(fgibbs, delimiter='\t')
gibbsHeader = next(reader)
gibbsData = np.array(list(reader)).astype(np.double)
# Get data dimensions:
# - - - M := Number of metabolites
# - - - N := Number of reactions
M = compsData.shape[1] - 1
N = gibbsData.shape[1] - 1
plotComps(M, compsData, compsHeader)
plotGibbs(N, gibbsData, gibbsHeader)
plt.show()
The plotGibbs function produces the following graphic for the tsv file I'm working with. For this graphic, N=3 (3 reactions).
I would like to indicate at what point in time each reaction becomes unfavorable (in the context of my project, this just means that the reaction stops). This occurs when the gibbs free energy value (∆G) of the reaction is greater than or equal to 0.
I feel like I could best emphasize this by color-coding the line plots my program generates. For negative ∆G values, I would like the line to be green, and for positive or zero ∆G values, I would like the line to be red.
Here is my current code for generating the gibbs free energy plots (does not color-code):
def plotGibbs(N: int, gibbsData: np.ndarray, gibbsHeader):
gibbsFig = plt.figure()
gibbsFig.suptitle("∆G˚ Yield Plotted over Time (days)")
numCols = ceil(N / 2)
numRows = (N // numCols) + 1
for n in range (1, N+1):
ax = gibbsFig.add_subplot(numRows, numCols, n)
ax.set_ylabel(gibbsHeader[n])
ax.set_xlabel(gibbsHeader[0])
ax.plot(gibbsData[:, 0], gibbsData[:, n])
gibbsFig.tight_layout()
How could I make it so that negative values are plotted green, and non-negative values are plotted red?
You could try to find where a change of sign occurs in your data using np.where with a simple condition like gibbsData[:, n]>0 then plot negative/positive data accordingly:
def plotGibbs(N: int, gibbsData: np.ndarray, gibbsHeader):
gibbsFig = plt.figure()
gibbsFig.suptitle("∆G˚ Yield Plotted over Time (days)")
numCols = ceil(N / 2)
numRows = (N // numCols) + 1
for n in range (1, N+1):
ax = gibbsFig.add_subplot(numRows, numCols, n)
ax.set_ylabel(gibbsHeader[n])
ax.set_xlabel(gibbsHeader[0])
# idx where sign change occurs for data n
idx_zero = np.where(gibbsData[:, n]>0)[0][0]
# negatives y values
ax.plot(gibbsData[:idx_zero, 0], gibbsData[:idx_zero,n],'g')
# positive y values
ax.plot(gibbsData[idx_zero:, 0], gibbsData[idx_zero:,n],'r')
gibbsFig.tight_layout()

Having trouble getting timeit to run with numpy

I simply want to see how long it takes this code to execute. There is a similar question here:
timeit module in python does not recognize numpy module
and I understand what they are saying, but I don't get where these lines of code should be placed. Here is what I have. I know its a little long to scroll through, but you can see where I have placed the timeit commands at the beginning and end. This is not working and I am guessing it is because I have placed these lines of code for timeit incorrectly. The code works if I delete the timeit stuff.
Thanks
import timeit
u = timeit.Timer("np.arange(1000)", setup = 'import numpy as np')
#set up variables
m = 4.54
g = 9.81
GR = 8
r_pulley = .1
th1=np.pi/4 #based on motor 1 encoder counts. Number of degrees rotated from + x-axis of base frame 0
th2=np.pi/4 #based on motor 2 encoder counts. Number of degrees rotated from + x-axis of m1 frame 1
th3_motor = np.pi/4*12
th3_pulley = th3_motor/GR
#required forces in x,y,z at end effector
fx = 1
fy = 1
fz = m*g #need to figure this out
l1=6
l2=5
l3=th3_pulley*r_pulley
#Build Homogeneous Tranforms Matrices
H1_0 = np.array(([np.cos(th1),-np.sin(th1),0,0],[np.sin(th1),np.cos(th1),0,0],[0,0,1,l3],[0,0,0,1]))
H2_1 = np.array(([np.cos(th2),-np.sin(th2),0,l1],[np.sin(th2),np.cos(th2),0,0],[0,0,1,0],[0,0,0,1]))
H3_2 = np.array(([1,0,0,l2],[0,1,0,0],[0,0,1,0],[0,0,0,1]))
H2_0 = np.dot(H1_0,H2_1)
H3_0 = np.dot(H2_0,H3_2)
print(np.matrix(H3_0))
#These HTMs are using the way I derived them, not the "correct" way.
#The answers are the same, but I think the processing time will be the same.
#This is because either way the two matrices with all the sines and cosines...
#will be the same. Only difference is in one method the ones and zeroes...
#matrix is the first HTM, in the other method it is the last HTM. So its the...
#same number of matrices with the same information, just being dot-producted...
#in a different order.
#Build Jacobian
#np.cross(x, y)
d10 = H1_0[0:3, 3]
d20 = H2_0[0:3, 3]
d30 = H3_0[0:3, 3]
print(d30)
subt1 = d30-d10
subt2 = d30-d20
#tsubt1 = subt1.transpose()
#tsubt2 = subt2.transpose()
#print(tsubt1)
zeroes = np.array(([0,0,1]))
print(subt1)
print(subt2)
cross1 = np.cross(zeroes, subt1)
cross2 = np.cross(zeroes, subt2)
cross1
cross2
#These cross products are correct but need to be tranposed into columns, right now they are a single row.
#tcross1=cross1.reshape(-1,1)
#tcross2=cross2.reshape(-1,1)
#dont actually need these transposes but I didnt want to forget the command.
# build jacobian (J)
#J = np.zeros((6,2))
#J[0:3,0] = cross1
#J[0:3,1] = cross2
#J[3:6,0] = zeroes
#J[3:6,1] = zeroes
#J
#find torques
J_force = np.zeros((2,3))
J_force[0,:]=cross1
J_force[1,:]=cross2
J_force
#build force matrix
forces = np.array(([fx],[fy],[fz]))
forces
torques = np.dot(J_force,forces)
torques #top number is theta 1 (M1) and bottom number is theta 2 (M2)
#need to add z axis?
print(u.timeit())
# u is a timer eval np.arange(1000)
u = timeit.Timer("np.arange(1000)", setup = 'import numpy as np')
# print how many seconds needed to run np.arange(1000) 1000000 times
# 1000000 is the default value, you can set by passing a int here.
print(u.timeit())
So the following is what you want.
import timeit
def main():
#set up variables
m = 4.54
g = 9.81
GR = 8
r_pulley = .1
th1=np.pi/4 #based on motor 1 encoder counts. Number of degrees rotated from + x-axis of base frame 0
th2=np.pi/4 #based on motor 2 encoder counts. Number of degrees rotated from + x-axis of m1 frame 1
th3_motor = np.pi/4*12
th3_pulley = th3_motor/GR
#required forces in x,y,z at end effector
fx = 1
fy = 1
fz = m*g #need to figure this out
l1=6
l2=5
l3=th3_pulley*r_pulley
#Build Homogeneous Tranforms Matrices
H1_0 = np.array(([np.cos(th1),-np.sin(th1),0,0],[np.sin(th1),np.cos(th1),0,0],[0,0,1,l3],[0,0,0,1]))
H2_1 = np.array(([np.cos(th2),-np.sin(th2),0,l1],[np.sin(th2),np.cos(th2),0,0],[0,0,1,0],[0,0,0,1]))
H3_2 = np.array(([1,0,0,l2],[0,1,0,0],[0,0,1,0],[0,0,0,1]))
H2_0 = np.dot(H1_0,H2_1)
H3_0 = np.dot(H2_0,H3_2)
print(np.matrix(H3_0))
#These HTMs are using the way I derived them, not the "correct" way.
#The answers are the same, but I think the processing time will be the same.
#This is because either way the two matrices with all the sines and cosines...
#will be the same. Only difference is in one method the ones and zeroes...
#matrix is the first HTM, in the other method it is the last HTM. So its the...
#same number of matrices with the same information, just being dot-producted...
#in a different order.
#Build Jacobian
#np.cross(x, y)
d10 = H1_0[0:3, 3]
d20 = H2_0[0:3, 3]
d30 = H3_0[0:3, 3]
print(d30)
subt1 = d30-d10
subt2 = d30-d20
#tsubt1 = subt1.transpose()
#tsubt2 = subt2.transpose()
#print(tsubt1)
zeroes = np.array(([0,0,1]))
print(subt1)
print(subt2)
cross1 = np.cross(zeroes, subt1)
cross2 = np.cross(zeroes, subt2)
cross1
cross2
#These cross products are correct but need to be tranposed into columns, right now they are a single row.
#tcross1=cross1.reshape(-1,1)
#tcross2=cross2.reshape(-1,1)
#dont actually need these transposes but I didnt want to forget the command.
# build jacobian (J)
#J = np.zeros((6,2))
#J[0:3,0] = cross1
#J[0:3,1] = cross2
#J[3:6,0] = zeroes
#J[3:6,1] = zeroes
#J
#find torques
J_force = np.zeros((2,3))
J_force[0,:]=cross1
J_force[1,:]=cross2
J_force
#build force matrix
forces = np.array(([fx],[fy],[fz]))
forces
torques = np.dot(J_force,forces)
torques #top number is theta 1 (M1) and bottom number is theta 2 (M2)
#need to add z axis?
u = timeit.Timer(main)
print(u.timeit(5))

How to extract time domain information when recreating the signal by taking the inverse FFT

I'm trying to recreate the the original signal from the FFT of a signal sample. When taking Inverse FFT, I'm only getting an amplitude information (only one column). How can I get the corresponding time coordinates?
This is a screen shot of my original signal, recorded from 0 to 10s with step 0.001s. When I take the IFFT, I'm getting the same number of data points as my signal, but can't find the corresponding time information.
How can I get the correct time information?
I'm including the Python code code I used and a plot of the 2 signals.
#generating signal here
import numpy as np
k = float ( 3.1416*2)
f1 = 100
f2 = 150
f3 = 250
ds = max(f1,f2,f3)
ds = float(4*ds)
dt = 1.000/ds
lf = min (f1,f2,f3)
lT = 1.00/lf
N = 10 # cycles
totaltime = N*lT
data = []
tt = []
mf = 1/dt
print "TotalTime =", totaltime
for t in np.arange(0.0, totaltime,dt/100 ) :
#t = tk/mf
print t
wave1 = np.sin(k*f1*t)
wave2 = np.sin(k*f2*t)
wave3 = np.sin(k*f3*t)
summ = wave1 + wave2 + wave3
print t," ", summ
tt.append(t)
data.append(summ)
print tt
print data
np.savetxt("data.txt",np.c_[tt,data])
#######################
#taking the FFT here
fourier = []
tt =[]
yy=[]
logname = str("data.txt")
with open (logname,"rb") as wdata:
for line in wdata :
if not line.startswith("#") :
sl = line.split()
c11 = float(sl[0])
#c11 = c1*10**(-12)
c2 = float(sl[1])
tt.append(c11)
yy.append(c2)
n = len(yy)
n1 = len(tt)
print "n=",n,"(",n1,")"
#to calculate the time step , find the difference between 2 time-values
t0 = float(tt[0])
print "t0=",t0
t1 = float(tt[1])
print "t1=",t1
ts = t1 - t0
print "ts=", ts
yf = numpy.fft.fft(yy)
yf_abso = numpy.abs(yf)
freq = numpy.fft.fftfreq(n,d=ts)
numpy.savetxt('fft-data.txt',numpy.c_[freq,yf_abso])
######################
# taking the inverese FFT
filename = str("fft-data.txt")
FFTdata =[]
FREQdata = []
with open (filename,'r') as fftfile :
for line in fftfile :
if not line.startswith("#") :
split_line = line.split()
fpoint = float(split_line[1])
freqz = float(split_line[0])
FFTdata.append(fpoint)
FREQdata.append(freqz)
ireverse = np.fft.ifft(FFTdata)
reverse = np.abs(ireverse)
print type(reverse)
np.savetxt ("ireverse.txt", ireverse)
np.savetxt("reverse.txt", reverse)
The sample locations for the output of the IFFT are the same as those for the input to the FFT. You are doing that part right.
The output of the IFFT looks shifted, but it is not. What happens is that you threw away the phase information of the frequency spectrum when you saved it. You do
yf_abso = numpy.abs(yf)
and then save yf_abso. By taking the absolute value, you have thrown away important information. There is a reason that the FFT produces complex values. Throwing away half that information means you cannot reconstruct the original signal any more.
If you save the complex values, and use those in the last part of your code to compute the IFFT, then the real component of the output of the IFFT will match your input signal. The imaginary component there should be close to zero, different just due to numerical precision issues in floating-point computations.

Plotting Repeating Data Set from File using matplotlib and lists

this is my first post here, so I hope it goes well.
I have a file of data(about 2mb) in the format
angle (space) energy (space) counts
angle (space) energy (space) counts
angle (space) energy (space) counts, etc.
(this is data recorded from a particle accelerator running for ~170 hours, so the file is large)
Angle starts out at 0, and is 0 while energy goes up to about 4500, and then
angle increases by one and energy starts again at 0 and goes up to 4500. This repeats
until theta = 255.
I am trying to create a program that plots the number of counts versus the energy level, energy level being my x axis, and counts being my y axis. I have tried many solutions, but to no avail.
Any help given to me on this would be much appreciated.
My code is posted below.
import matplotlib.pyplot as plt
import numpy as np
import pylab
from numpy import *
from matplotlib.pyplot import *
import math
import sys
import scipy.optimize
"""
Usage
---------------
Takes a file in the format of
Theta |Rel_MeV |Counts
97 4024 0
97 4025 0
97 4026 6
97 4027 2
and graphs it
fileURL is the input for the file to put into the program
txt_Title is the graph label
"""
DEBUG = 1
fileURL = './ne19_peaks_all.dat'
txt_Title = 'Oxygen and Alpha Particle Relative Energy'
MeV_divide_factor = 100
ptSize = 5
MarkerType = '+'
MeV_max = 5000
def main():
# Read the file.
f2 = open(fileURL, 'r')
# read the whole file into a single variable, which is a list of every row of the file.
lines = f2.readlines()
f2.close()
# initialize some variable to be lists:
list_MeV = []
list_counts = []
for i in range(MeV_max):
list_MeV.append(i)
list_counts.append(0)
# scan the rows of the file stored in lines, and put the values into some variables:
for line in lines:
p = line.split()
MeV = float(p[1])/MeV_divide_factor
count = float(p[2])
list_counts[int(MeV)] += count
x_arr = np.array(list_MeV)
y_arr = np.array(list_counts)
plt.plot(x_arr, y_arr, MarkerType)
plt.title(txt_Title)
plt.show()
return 0
def func(x, a, b):
return a*x + b
if __name__ == '__main__':
status = main()
sys.exit(status)
Used a dictionary where each energy level was a key, and with the counts being the values

Categories

Resources