Intersection of 2 plots (sage) - python

could you please help me with finding points of intersection in graphic
import numpy as np
import matplotlib.pyplot as plt
#interact
def foo(count=(1,10,1), t=slider(100,1000,100,default=100)):
plt.cla()
dt = 1/t
x = np.arange(1, step=dt)
xl = np.arange(1, step=.000001)
W = np.zeros(t, np.dtype(float))
#Iteration Logarithm
l = np.sqrt(2*xl*ln(ln(1/xl)))
plt.plot(xl, l,'r--')
plt.plot(xl, -l,'r--')
mu, sig = 0, 1
for ITER in range(1, count+1): #count of W[i] in plot
for i in range(1, len(W)):
W[i] = W[i-1] + np.random.normal(mu, sig) * np.sqrt(dt) #Wiener process
plt.plot(x,W)
plt.xlabel('t',fontsize=26)
plt.ylabel('W(t)',fontsize=26)
plt.grid(True)
#interact
def _dee(deep=slider(0.0001, .4, 0.001, default=.2)): #scale
plt.xlim(0, deep)
plt.ylim(-.5, .5)
plt.show()
I use https://cloud.sagemath.com
Can't realize, how to find points of graphic cross each other.
Thanks.

Although I'm not familiar with sage, getting intersection points given two sampled function should be easy - just calculate the difference between them, and there will be an intersection every time the sign changes.
step=0.1
x= np.arange(-5,5, step=step)
f1 = x**2 - 4
f2 = -x**2 + 4
dif= f2-f1
dif_sign= dif/np.abs(dif) #just the sign of the function
dif_sign_dif= np.diff(dif_sign) #places where the sign changes
change_idxs= np.where(dif_sign_dif!=0)[0] #indexes where the sign changes
print -5+change_idxs*step
output:
[-2. 2.]
Since you are adding brownian motion, you'll get "false positives" once in while, though.

Related

How to produce an array with values in a specific shape?

I would like to create an array with values that range from 0.0 to 1.0 as shown here:
weighting matrix
Basically, the left and top edges should remain close to 1.0 but slowly decay to 0.5 in the corners.
The bottom and right edges should remain close to 0.0
The middle region should be mostly 0.5, and the values should be decaying diagonally from 1.0 to 0.0.
This is what I've tried but it doesn't give me exactly what I would like.
import numpy as np
import matplotlib.pyplot as plt
def sigmoid(x):
y = np.zeros(len(x))
for i in range(len(x)):
y[i] = 1 / (1 + math.exp(-x[i]))
return y
sigmoid_ = sigmoid(np.linspace(20, 2.5, 30))
temp1 = np.repeat(sigmoid_.reshape((1,len(sigmoid_))), repeats=10, axis=0)
sigmoid_ = sigmoid(np.linspace(6, 3, 10))
temp2 = np.repeat(sigmoid_.reshape((len(sigmoid_),1)), repeats=30, axis=1)
alpha1 = temp1 + temp2
sigmoid_ = sigmoid(np.linspace(-2.5, -20, 30))
temp1 = np.repeat(sigmoid_.reshape((1,len(sigmoid_))), repeats=10, axis=0)
sigmoid_ = sigmoid(np.linspace(-3, -6, 10))
temp2 = np.repeat(sigmoid_.reshape((len(sigmoid_),1)), repeats=30, axis=1)
alpha2 = temp1 + temp2
alpha = alpha1 + alpha2
alpha = alpha - np.min(alpha)
alpha = alpha / np.max(alpha)
plt.matshow(alpha)
Which gives me this: results
Can someone help me?
This is the simplest function I can think of:
tune_me = 101
x = np.linspace(0, 1, tune_me)
y = np.linspace(0, 1, tune_me)
xv, yv = np.meshgrid(x, y)
sig = 1/(1 + np.exp(tune_me - xv - yv))
plt.matshow(sig)
But if you want something specific, you should probably figure out your math (maybe on the math stack exchange) before you try to implement it.
I'd use the same function for all parts of the weighting matrix area, if not otherwise required. A sigmoid function (which changes rapidly near the center and slowly away from it) is indeed suitable after appropriate translation and scaling. For the sigmoid function's argument I'd choose the taxicab distance from a corner of the area.
import math
import numpy as np
import matplotlib.pyplot as plt
alpha = np.ndarray((10, 30))
ymax, xmax = alpha.shape[0]-1, alpha.shape[1]-1
for y in range(alpha.shape[0]):
for x in range(alpha.shape[1]):
M = x/xmax+y/ymax # Manhattan distance, range [0;2]
M -= 1 # make range [-1;1], so that inflection point is in the middle
M *= 3.0 # the higher this factor, the steeper the sigmoid curve at the flex
s = 1 / (1 + math.exp(-M)) # sigmoid function, range ]0;1[
alpha[y, x] = 1-s # decay from 1 to 0
plt.matshow(alpha)
plt.show()
# show the values close to 0.5
h = [(y, x) for y in range(alpha.shape[0])
for x in range(alpha.shape[1]) if .4 < alpha[y, x] and alpha[y, x] < .6]
hy, hx = zip(*h)
plt.plot(hx, hy, 'o')
plt.gca().invert_yaxis()
plt.show()
Illustration of values in center region close to 0.5, e. g. in ]0.4;0.6[:

odeint is giving incredibly wrong results when numerically integrating hamilton's equations of motion

I'm at my wits end with this. I am trying to numerically integrate these four equations produced from
from scipy.integrate import odeint
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
import math
# Parameters describing the system
L = 15
l1 = 5
l2 = 10
g = 9.8
m = 0.5
k1 = 3
k2 = 5
# Assign initial conditions to the motion.
x0 = 0.
xDot0 = 0.
theta0 = 0.
thetaDot0 = 0.
u0 = [x0, xDot0, theta0, thetaDot0] # initial conditions for all 4 variables
# This function gives the time derivative of each of the 4 variables. These correspond to the
# four Hamilton equations of motion (1st-order differential equations)
def dudt(u, t):
r = [0., 0., 0., 0.]
s = np.sqrt(-2.0*L*(l1 + u[0])*np.cos(u[2]) + L**2 +(l1 + u[0])**2)
a = l1 + u[0]
p_theta = u[3]*m*pow(a, 2)
r[0] = u[1] # enter the time derivative of y
r[1] = (2.0/m)*pow(a, -3)*pow(p_theta, 2) - k1*u[0] - k2*((L*math.cos(u[2]) - l1 - u[0])*(1 - l2/s)) - m*g*math.sin(u[2]) # enter the time derivative of py
r[2] = u[3] # enter the time derivative of theta
r[3] = -(k2*(L*(a)*math.sin(u[0]))*(1-l2/s)) -m*g*math.cos(u[2]) # enter the time derivative of ptheta
return r
# Use the scipy odeint routine to carry out the numerical integration of the system
# of equations.
t0 = 0 # start time
t1 = 20 # end time
# Choose a set of time values at which to evaluate the solution y(t)
t = np.linspace(t0,t1,1000)
# Call the routing from scipy that does the integration
u = odeint(dudt, u0, t)
font = {'size' : 18}
plt.rc('font', **font)
# Use pyplot from matplotlib to plot the motion y(t)
fig, ax = plt.subplots(figsize=(10,10))
plt.xlim(t0, t1)
ax.set_xlabel('t (s)')
ax.set_ylabel('x (m)')
ax.plot(t,u[...,0])
plt.show()
# Plot the motion theta(t)
fig, ax = plt.subplots(figsize=(10,10))
plt.xlim(t0, t1)
ax.set_xlabel('t (s)')
ax.set_ylabel('theta (degrees)')
ax.plot(t,u[...,2]*180./math.pi)
plt.show()
And I get the following junk plots:
Plot of x vs. time
Plot of angle vs. time
Along with the following error:
/usr/local/lib/python3.6/dist-packages/scipy/integrate/odepack.py:247: ODEintWarning: Excess work done on this call (perhaps wrong Dfun type). Run with full_output = 1 to get quantitative information.
warnings.warn(warning_msg, ODEintWarning)
I'm not sure how to proceed from here, the warning frankly isn't very helpful for me.

How does one implement a subsampled RBF (Radial Basis Function) in Numpy?

I was trying to implement a Radial Basis Function in Python and Numpy as describe by CalTech lecture here. The mathematics seems clear to me so I find it strange that its not working (or it seems to not work). The idea is simple, one chooses a subsampled number of centers for each Gaussian form a kernal matrix and tries to find the best coefficients. i.e. solve Kc = y where K is the guassian kernel (gramm) matrix with least squares. For that I did:
beta = 0.5*np.power(1.0/stddev,2)
Kern = np.exp(-beta*euclidean_distances(X=X,Y=subsampled_data_points,squared=True))
#(C,_,_,_) = np.linalg.lstsq(K,Y_train)
C = np.dot( np.linalg.pinv(Kern), Y )
but when I try to plot my interpolation with the original data they don't look at all alike:
with 100 random centers (from the data set). I also tried 10 centers which produces essentially the same graph as so does using every data point in the training set. I assumed that using every data point in the data set should more or less perfectly copy the curve but it didn't (overfit). It produces:
which doesn't seem correct. I will provide the full code (that runs without error):
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
from scipy.interpolate import Rbf
import matplotlib.pyplot as plt
## Data sets
def get_labels_improved(X,f):
N_train = X.shape[0]
Y = np.zeros( (N_train,1) )
for i in range(N_train):
Y[i] = f(X[i])
return Y
def get_kernel_matrix(x,W,S):
beta = get_beta_np(S)
#beta = 0.5*tf.pow(tf.div( tf.constant(1.0,dtype=tf.float64),S), 2)
Z = -beta*euclidean_distances(X=x,Y=W,squared=True)
K = np.exp(Z)
return K
N = 5000
low_x =-2*np.pi
high_x=2*np.pi
X = low_x + (high_x - low_x) * np.random.rand(N,1)
# f(x) = 2*(2(cos(x)^2 - 1)^2 -1
f = lambda x: 2*np.power( 2*np.power( np.cos(x) ,2) - 1, 2) - 1
Y = get_labels_improved(X , f)
K = 2 # number of centers for RBF
indices=np.random.choice(a=N,size=K) # choose numbers from 0 to D^(1)
subsampled_data_points=X[indices,:] # M_sub x D
stddev = 100
beta = 0.5*np.power(1.0/stddev,2)
Kern = np.exp(-beta*euclidean_distances(X=X,Y=subsampled_data_points,squared=True))
#(C,_,_,_) = np.linalg.lstsq(K,Y_train)
C = np.dot( np.linalg.pinv(Kern), Y )
Y_pred = np.dot( Kern , C )
plt.plot(X, Y, 'o', label='Original data', markersize=1)
plt.plot(X, Y_pred, 'r', label='Fitted line', markersize=1)
plt.legend()
plt.show()
Since the plots look strange I decided to read the docs for the ploting functions but I couldn't find anything obvious that was wrong.
Scaling of interpolating functions
The main problem is unfortunate choice of standard deviation of the functions used for interpolation:
stddev = 100
The features of your functions (its humps) are of size about 1. So, use
stddev = 1
Order of X values
The mess of red lines is there because plt from matplotlib connects consecutive data points, in the order given. Since your X values are in random order, this results in chaotic left-right movements. Use sorted X:
X = np.sort(low_x + (high_x - low_x) * np.random.rand(N,1), axis=0)
Efficiency issues
Your get_labels_improved method is inefficient, looping over the elements of X. Use Y = f(X), leaving the looping to low-level NumPy internals.
Also, the computation of least-squared solution of an overdetermined system should be done with lstsq instead of computing the pseudoinverse (computationally expensive) and multiplying by it.
Here is the cleaned-up code; using 30 centers gives a good fit.
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
import matplotlib.pyplot as plt
N = 5000
low_x =-2*np.pi
high_x=2*np.pi
X = np.sort(low_x + (high_x - low_x) * np.random.rand(N,1), axis=0)
f = lambda x: 2*np.power( 2*np.power( np.cos(x) ,2) - 1, 2) - 1
Y = f(X)
K = 30 # number of centers for RBF
indices=np.random.choice(a=N,size=K) # choose numbers from 0 to D^(1)
subsampled_data_points=X[indices,:] # M_sub x D
stddev = 1
beta = 0.5*np.power(1.0/stddev,2)
Kern = np.exp(-beta*euclidean_distances(X=X, Y=subsampled_data_points,squared=True))
C = np.linalg.lstsq(Kern, Y)[0]
Y_pred = np.dot(Kern, C)
plt.plot(X, Y, 'o', label='Original data', markersize=1)
plt.plot(X, Y_pred, 'r', label='Fitted line', markersize=1)
plt.legend()
plt.show()

Measuring the similarity between two irregular plots

I have two irregular lines as a list of [x,y] coordinates, which has peaks and troughs. The length of the list might vary slightly(unequal). I want to measure their similarity such that to check occurence of the peaks and troughs (of similar depth or height) are coming at proper interval and give a similarity measure. I want to do this in Python. Is there any inbuilt function to do this?
I don't know of any builtin functions in Python to do this.
I can give you a list of possible functions in the Python ecosystem you can use. This is in no way a complete list of functions, and there are probably quite a few methods out there that I am not aware of.
If the data is ordered, but you don't know which data point is the first and which data point is last:
Use the directed Hausdorff distance
If the data is ordered, and you know the first and last points are correct:
Discrete Fréchet distance *
Dynamic Time Warping (DTW) *
Partial Curve Mapping (PCM) **
A Curve-Length distance metric (uses arc length distance from beginning to end) **
Area between two curves **
* Generally mathematical method used in a variety of machine learning tasks
** Methods I've used to identify unique material hysteresis responses
First let's assume we have two of the exact same random X Y data. Note that all of these methods will return a zero. You can install the similaritymeasures from pip if you do not have it.
import numpy as np
from scipy.spatial.distance import directed_hausdorff
import similaritymeasures
import matplotlib.pyplot as plt
# Generate random experimental data
np.random.seed(121)
x = np.random.random(100)
y = np.random.random(100)
P = np.array([x, y]).T
# Generate an exact copy of P, Q, which we will use to compare
Q = P.copy()
dh, ind1, ind2 = directed_hausdorff(P, Q)
df = similaritymeasures.frechet_dist(P, Q)
dtw, d = similaritymeasures.dtw(P, Q)
pcm = similaritymeasures.pcm(P, Q)
area = similaritymeasures.area_between_two_curves(P, Q)
cl = similaritymeasures.curve_length_measure(P, Q)
# all methods will return 0.0 when P and Q are the same
print(dh, df, dtw, pcm, cl, area)
The printed output is
0.0, 0.0, 0.0, 0.0, 0.0, 0.0
This is because the curves P and Q are exactly the same!
Now let's assume P and Q are different.
# Generate random experimental data
np.random.seed(121)
x = np.random.random(100)
y = np.random.random(100)
P = np.array([x, y]).T
# Generate random Q
x = np.random.random(100)
y = np.random.random(100)
Q = np.array([x, y]).T
dh, ind1, ind2 = directed_hausdorff(P, Q)
df = similaritymeasures.frechet_dist(P, Q)
dtw, d = similaritymeasures.dtw(P, Q)
pcm = similaritymeasures.pcm(P, Q)
area = similaritymeasures.area_between_two_curves(P, Q)
cl = similaritymeasures.curve_length_measure(P, Q)
# all methods will return 0.0 when P and Q are the same
print(dh, df, dtw, pcm, cl, area)
The printed output is
0.107, 0.743, 37.69, 21.5, 6.86, 11.8
which quantify how different P is from Q according to each method.
You now have many methods to compare the two curves. I would start with DTW, since this has been used in many time series applications which look like the data you have uploaded.
We can visualize what P and Q look like with the following code.
plt.figure()
plt.plot(P[:, 0], P[:, 1])
plt.plot(Q[:, 0], Q[:, 1])
plt.show()
Since your arrays are not the same size ( and I am assuming you are taking the same real time) , you need to interpolate them to compare across related set of points.
The following code does that, and calculates correlation measures:
#!/usr/bin/python
import numpy as np
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt
import scipy.spatial.distance as ssd
import scipy.stats as ss
x = np.linspace(0, 10, num=11)
x2 = np.linspace(1, 11, num=13)
y = 2*np.cos( x) + 4 + np.random.random(len(x))
y2 = 2* np.cos(x2) + 5 + np.random.random(len(x2))
# Interpolating now, using linear, but you can do better based on your data
f = interp1d(x, y)
f2 = interp1d(x2,y2)
points = 15
xnew = np.linspace ( min(x), max(x), num = points)
xnew2 = np.linspace ( min(x2), max(x2), num = points)
ynew = f(xnew)
ynew2 = f2(xnew2)
plt.plot(x,y, 'r', x2, y2, 'g', xnew, ynew, 'r--', xnew2, ynew2, 'g--')
plt.show()
# Now compute correlations
print ssd.correlation(ynew, ynew2) # Computes a distance measure based on correlation between the two vectors
print np.correlate(ynew, ynew2, mode='valid') # Does a cross-correlation of same sized arrays and gives back correlation
print np.corrcoef(ynew, ynew2) # Gives back the correlation matrix for the two arrays
print ss.spearmanr(ynew, ynew2) # Gives the spearman correlation for the two arrays
Output:
0.499028272458
[ 363.48984942]
[[ 1. 0.50097173]
[ 0.50097173 1. ]]
SpearmanrResult(correlation=0.45357142857142857, pvalue=0.089485900143027278)
Remember that the correlations here are parametric and pearson type and assume monotonicity for calculating correlations. If this is not the case, and you think that your arrays are just changing sign together, you can use Spearman's correlation as in the last example.
I'm not aware of an inbuild function, but sounds like you can modify Levenshtein's distance. The following code is adopted from the code at wikibooks.
def point_distance(p1, p2):
# Define distance, if they are the same, then the distance should be 0
def levenshtein_point(l1, l2):
if len(l1) < len(l2):
return levenshtein(l2, l1)
# len(l1) >= len(l2)
if len(l2) == 0:
return len(l1)
previous_row = range(len(l2) + 1)
for i, p1 in enumerate(l1):
current_row = [i + 1]
for j, p2 in enumerate(l2):
print('{},{}'.format(p1, p2))
insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer
deletions = current_row[j] + 1 # than l2
substitutions = previous_row[j] + point_distance(p1, p2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return previous_row[-1]

Failure of non linear fit to sine curve

I've been trying to fit the amplitude, frequency and phase of a sine curve given some generated two dimensional toy data. (Code at the end)
To get estimates for the three parameters, I first perform an FFT. I use the values from the FFT as initial guesses for the actual frequency and phase and then fit for them (row by row). I wrote my code such that I input which bin of the FFT I want the frequency to be in, so I can check if the fitting is working well. But there's some pretty strange behaviour. If my input bin is say 3.1 (a non integral bin, so the FFT won't give me the right frequency) then the fit works wonderfully. But if the input bin is 3 (so the FFT outputs the exact frequency) then my fit fails, and I'm trying to understand why.
Here's the output when I give the input bins (in the X and Y direction) as 3.0 and 2.1 respectively:
(The plot on the right is data - fit)
Here's the output when I give the input bins as 3.0 and 2.0:
Question: Why does the non linear fit fail when I input the exact frequency of the curve?
Code:
#! /usr/bin/python
# For the purposes of this code, it's easier to think of the X-Y axes as transposed,
# so the X axis is vertical and the Y axis is horizontal
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as optimize
import itertools
import sys
PI = np.pi
# Function which accepts paramters to define a sin curve
# Used for the non linear fit
def sineFit(t, a, f, p):
return a * np.sin(2.0 * PI * f*t + p)
xSize = 18
ySize = 60
npt = xSize * ySize
# Get frequency bin from user input
xFreq = float(sys.argv[1])
yFreq = float(sys.argv[2])
xPeriod = xSize/xFreq
yPeriod = ySize/yFreq
# arrays should be defined here
# Generate the 2D sine curve
for jj in range (0, xSize):
for ii in range(0, ySize):
sineGen[jj, ii] = np.cos(2.0*PI*(ii/xPeriod + jj/yPeriod))
# Compute 2dim FFT as well as freq bins along each axis
fftData = np.fft.fft2(sineGen)
fftMean = np.mean(fftData)
fftRMS = np.std(fftData)
xFreqArr = np.fft.fftfreq(fftData.shape[1]) # Frequency bins along x
yFreqArr = np.fft.fftfreq(fftData.shape[0]) # Frequency bins along y
# Find peak of FFT, and position of peak
maxVal = np.amax(np.abs(fftData))
maxPos = np.where(np.abs(fftData) == maxVal)
# Iterate through peaks in the FFT
# For this example, number of loops will always be only one
prevPhase = -1000
for col, row in itertools.izip(maxPos[0], maxPos[1]):
# Initial guesses for fit parameters from FFT
init_phase = np.angle(fftData[col,row])
init_amp = 2.0 * maxVal/npt
init_freqY = yFreqArr[col]
init_freqX = xFreqArr[row]
cntr = 0
if prevPhase == -1000:
prevPhase = init_phase
guess = [init_amp, init_freqX, prevPhase]
# Fit each row of the 2D sine curve independently
for rr in sineGen:
(amp, freq, phs), pcov = optimize.curve_fit(sineFit, xDat, rr, guess)
# xDat is an linspace array, containing a list of numbers from 0 to xSize-1
# Subtract fit from original data and plot
fitData = sineFit(xDat, amp, freq, phs)
sub1 = rr - fitData
# Plot
fig1 = plt.figure()
ax1 = fig1.add_subplot(121)
p1, = ax1.plot(rr, 'g')
p2, = ax1.plot(fitData, 'b')
plt.legend([p1,p2], ["data", "fit"])
ax2 = fig1.add_subplot(122)
p3, = ax2.plot(sub1)
plt.legend([p3], ['residual1'])
fig1.tight_layout()
plt.show()
cntr += 1
prevPhase = phs # Update guess for phase of sine curve
I've tried to distill the important parts of your question into this answer.
First of all, try fitting a single block of data, not an array. Once you are confident that your model is sufficient you can move on.
Your fit is only going to be as good as your model, if you move on to something not "sine"-like you'll need to adjust accordingly.
Fitting is an "art", in that the initial conditions can greatly change the convergence of the error function. In addition there may be more than one minima in your fits, so you often have to worry about the uniqueness of your proposed solution.
While you were on the right track with your FFT idea, I think your implementation wasn't quite correct. The code below should be a great toy system. It generates random data of the type f(x) = a0*sin(a1*x+a2). Sometimes a random initial guess will work, sometimes it will fail spectacularly. However, using the FFT guess for the frequency the convergence should always work for this system. An example output:
import numpy as np
import pylab as plt
import scipy.optimize as optimize
# This is your target function
def sineFit(t, (a, f, p)):
return a * np.sin(2.0*np.pi*f*t + p)
# This is our "error" function
def err_func(p0, X, Y, target_function):
err = ((Y - target_function(X, p0))**2).sum()
return err
# Try out different parameters, sometimes the random guess works
# sometimes it fails. The FFT solution should always work for this problem
inital_args = np.random.random(3)
X = np.linspace(0, 10, 1000)
Y = sineFit(X, inital_args)
# Use a random inital guess
inital_guess = np.random.random(3)
# Fit
sol = optimize.fmin(err_func, inital_guess, args=(X,Y,sineFit))
# Plot the fit
Y2 = sineFit(X, sol)
plt.figure(figsize=(15,10))
plt.subplot(211)
plt.title("Random Inital Guess: Final Parameters: %s"%sol)
plt.plot(X,Y)
plt.plot(X,Y2,'r',alpha=.5,lw=10)
# Use an improved "fft" guess for the frequency
# this will be the max in k-space
timestep = X[1]-X[0]
guess_k = np.argmax( np.fft.rfft(Y) )
guess_f = np.fft.fftfreq(X.size, timestep)[guess_k]
inital_guess[1] = guess_f
# Guess the amplitiude by taking the max of the absolute values
inital_guess[0] = np.abs(Y).max()
sol = optimize.fmin(err_func, inital_guess, args=(X,Y,sineFit))
Y2 = sineFit(X, sol)
plt.subplot(212)
plt.title("FFT Guess : Final Parameters: %s"%sol)
plt.plot(X,Y)
plt.plot(X,Y2,'r',alpha=.5,lw=10)
plt.show()
The problem is due to a bad initial guess of the phase, not the frequency. While cycling through the rows of genSine (inner loop) you use the fit result of the previous line as initial guess for the next row which does not work always. If you determine the phase from an fft of the current row and use that as initial guess the fit will succeed.
You could change the inner loop as follows:
for n,rr in enumerate(sineGen):
fftx = np.fft.fft(rr)
fftx = fftx[:len(fftx)/2]
idx = np.argmax(np.abs(fftx))
init_phase = np.angle(fftx[idx])
print fftx[idx], init_phase
...
Also you need to change
def sineFit(t, a, f, p):
return a * np.sin(2.0 * np.pi * f*t + p)
to
def sineFit(t, a, f, p):
return a * np.cos(2.0 * np.pi * f*t + p)
since phase=0 means that the imaginary part of the fft is zero and thus the function is cosine like.
Btw. your sample above is still lacking definitions of sineGen and xDat.
Without understanding much of your code, according to http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html:
(amp2, freq2, phs2), pcov = optimize.curve_fit(sineFit, tDat,
sub1, guess2)
should become:
(amp2, freq2, phs2), pcov = optimize.curve_fit(sineFit, tDat,
sub1, p0=guess2)
Assuming that tDat and sub1 are x and y, that should do the trick. But, once again, it is quite difficult to understand such a complex code with so many interlinked variables and no comments at all. A code should always be build from bottom up, meaning that you don't do a loop of fits when a single one is not working, you don't add noise until the code works to fit the non-noisy examples... Good luck!
By "nothing fancy" I meant something like removing EVERYTHING that is not related with the fit, and doing a simplified mock example such as:
import numpy as np
import scipy.optimize as optimize
def sineFit(t, a, f, p):
return a * np.sin(2.0 * np.pi * f*t + p)
# Create array of x and y with given parameters
x = np.asarray(range(100))
y = sineFit(x, 1, 0.05, 0)
# Give a guess and fit, printing result of the fitted values
guess = [1., 0.05, 0.]
print optimize.curve_fit(sineFit, x, y, guess)[0]
The result of this is exactly the answer:
[1. 0.05 0.]
But if you change guess not too much, just enough:
# Give a guess and fit, printing result of the fitted values
guess = [1., 0.06, 0.]
print optimize.curve_fit(sineFit, x, y, guess)[0]
the result gives absurdly wrong numbers:
[ 0.00823701 0.06391323 -1.20382787]
Can you explain this behavior?
You can use curve_fit with a series of trigonometric functions, usually very robust and ajustable to the precision that you need just by increasing the number of terms... here is an example:
from scipy import sin, cos, linspace
def f(x, a0,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,
c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12):
return a0 + s1*sin(1*x) + c1*cos(1*x) \
+ s2*sin(2*x) + c2*cos(2*x) \
+ s3*sin(3*x) + c3*cos(3*x) \
+ s4*sin(4*x) + c4*cos(4*x) \
+ s5*sin(5*x) + c5*cos(5*x) \
+ s6*sin(6*x) + c6*cos(6*x) \
+ s7*sin(7*x) + c7*cos(7*x) \
+ s8*sin(8*x) + c8*cos(8*x) \
+ s9*sin(9*x) + c9*cos(9*x) \
+ s10*sin(9*x) + c10*cos(9*x) \
+ s11*sin(9*x) + c11*cos(9*x) \
+ s12*sin(9*x) + c12*cos(9*x)
from scipy.optimize import curve_fit
pi/2. / (x.max() - x.min())
x_norm *= norm_factor
popt, pcov = curve_fit(f, x_norm, y)
x_fit = linspace(x_norm.min(), x_norm.max(), 1000)
y_fit = f(x_fit, *popt)
plt.plot( x_fit/x_norm, y_fit )

Categories

Resources