Scipy Curve_fit gives a rather weird fit

Scipy Curve_fit gives a rather weird fit - python

Dear Python programmers,
I am currently working with curve_fit from scipy inorder to find out what correlation the x and y data have with echouter. However, the curve fit becomes really weird even when I fit a simple lineair formule towards it. I've tried changing the array to a numpy array at the: def func(x, a, b, c): "Fit functie" return a * np.asarray(x) + b part but it still gives me a graph that looks like a 3 year old who scratched with some red pencil.
One thing I do remember is sorting the values of massflows and rms_smote from low to high. Which you can view above the def func(x, a, b, c) bit. Since the curve_fit was giving me a fit. Yet also kinda scratched out as if you're sketching when the values ware unsorted. I don't know if curve_fit considers data differently if it's sorted or not.
If you need any more information, let me know :) Any suggestion is welcome!
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy.stats import linregress
from scipy.optimize import curve_fit
data_15 = pd.read_csv(r"C:\Users\Thomas\Documents\Pythondata\2022-01-15_SMOTERapport.csv", header= 0, sep=';', decimal=',')
data_06 = pd.read_csv(r"C:\Users\Thomas\Documents\Pythondata\2022-02-06_SMOTERapport.csv", header= 0, sep=';', decimal=',')
data_10 = pd.read_csv(r"C:\Users\Thomas\Documents\Pythondata\2022-02-10_SMOTERapport.csv", header= 0, sep=';', decimal=',')
speed_15 = data_15['SPEED_ACT']
speed_06 = data_06['SPEED_ACT']
speed_10 = data_10['SPEED_ACT']
"Data filter 01_15"
filter = [i for i, e in enumerate(speed_15) if e >= 80]
s_15 = pd.DataFrame(data_15)
speed15 = s_15.filter(items = filter, axis=0)
speed15.reset_index(drop=True, inplace=True)
temp15 = speed15['TP_SMOTE']
foutmetingen2 = [i for i, e in enumerate(temp15) if e < 180]
speed15 = speed15.drop(foutmetingen2)
tp_strip15 = speed15['TP_AMBIENT']
tp_target15 = speed15['TP_TARGET']
tp_smote15 = speed15['TP_SMOTE']
v_15 = speed15['SPEED_ACT']
width15 = speed15['STRIP_WIDTH']
thickness15 = speed15['STRIP_THICKNESS']
power15 = speed15['POWER_INVERTER_PRE']
voltage15 = speed15['VOLTAGE_INVERTER_PRE']
"Data filter 02_06"
filter = [i for i, e in enumerate(speed_06) if e >= 80]
s_06 = pd.DataFrame(data_06)
speed06 = s_06.filter(items = filter, axis=0)
speed06.reset_index(drop=True, inplace=True)
temp06 = speed06['TP_SMOTE']
foutmetingen2 = [i for i, e in enumerate(temp06) if e < 180]
speed06 = speed06.drop(foutmetingen2)
tp_strip06 = speed06['TP_AMBIENT']
tp_target06 = speed06['TP_TARGET']
tp_smote06 = speed06['TP_SMOTE']
v_06 = speed06['SPEED_ACT']
width06 = speed06['STRIP_WIDTH']
thickness06 = speed06['STRIP_THICKNESS']
power06 = speed06['POWER_INVERTER_PRE']
voltage06 = speed06['VOLTAGE_INVERTER_PRE']
"Data filter 02_10"
filter = [i for i, e in enumerate(speed_10) if e >= 80]
s_10 = pd.DataFrame(data_10)
speed10 = s_10.filter(items = filter, axis=0)
speed10.reset_index(drop=True, inplace=True)
temp_01 = speed10['TP_SMOTE']
foutmetingen2 = [i for i, e in enumerate(temp_01) if e < 180]
speed10 = speed10.drop(foutmetingen2)
tp_strip10 = speed10['TP_AMBIENT']
tp_target10 = speed10['TP_TARGET']
tp_smote10 = speed10['TP_SMOTE']
v_10 = speed10['SPEED_ACT']
width10 = speed10['STRIP_WIDTH']
thickness10 = speed10['STRIP_THICKNESS']
power10 = speed10['POWER_INVERTER_PRE']
voltage10 = speed10['VOLTAGE_INVERTER_PRE']
"Constanten"
widthmax = 1253
Kra = 0.002033636
Kosc = 0.073086272
Pnominal = 2200
meting_15 = np.arange(0, len(speed15), 1)
meting_06 = np.arange(0, len(speed06), 1)
meting_10 = np.arange(0, len(speed10), 1)
cp = 480
rho = 7850
"---------------------------------------------------------------------"
def temp(power, speed, width, thickness, tp_strip, tp_target, tp_smote,
voltage):
"Berekende temperatuur vergelijken met target temperatuur"
massflow = (speed/60)*width*10**-3*thickness*10**-3*rho
LossesRA = Kra*Pnominal*(width/widthmax)
LossesOSC = Kosc*Pnominal*(voltage/100)**2
Plosses = (LossesRA + LossesOSC)
power_nl = (power/100)*Pnominal - Plosses
temp_c = ((power_nl*1000)/(massflow*cp)) + tp_strip
verschil_t = (temp_c/tp_target)*100-100
verschil_smote = (temp_c/tp_smote)*100-100
return temp_c, verschil_t, verschil_smote, massflow
temp_15 = temp(power15, v_15, width15, thickness15, tp_strip15, tp_target15,
tp_smote15, voltage15)
temp_06 = temp(power06, v_06, width06, thickness06, tp_strip06, tp_target06,
tp_smote06, voltage06)
temp_10 = temp(power10, v_10, width10, thickness10, tp_strip10, tp_target10,
tp_smote10, voltage10)
"---------------------------------------------------------------------"
def rms(Temperatuurberekend, TemperatuurGemeten):
"De Root Mean Square berekenen tussen berekend en gemeten data"
rootmeansquare = (TemperatuurGemeten - Temperatuurberekend)
rootmeansquare_totaal = np.sum(rootmeansquare)
rootmeansquare_gem = rootmeansquare_totaal/len(rootmeansquare)
return rootmeansquare, rootmeansquare_totaal, rootmeansquare_gem
rms_tp_smote15 = (rms(temp_15[0], tp_smote15))
rms_tp_smote06 = (rms(temp_06[0], tp_smote06))
rms_tp_smote10 = (rms(temp_10[0], tp_smote10))
"----------------------------------------------------------------------"
massflows = [np.sum(temp_06[3])/len(temp_06[3]), np.sum(temp_15[3])/
len(temp_15[3]), np.sum(temp_10[3])/len(temp_10[3])]
rms_smote = [rms_tp_smote06[2], rms_tp_smote10[2], rms_tp_smote15[2]]
rms_tp_smote_pre = np.append(rms_tp_smote15[0].tolist(),
rms_tp_smote06[0].tolist())
rms_tp_smote = np.append(rms_tp_smote_pre, rms_tp_smote10[0].tolist())
massflow_pre = np.append(temp_15[3].tolist(), temp_06[3].tolist())
massflow = np.append(massflow_pre, temp_10[3].tolist())
massflow_sort = np.sort(massflow)
rms_tp_smote_sort = [x for _, x in sorted(zip(massflow, rms_tp_smote))]
a,b,r,p, s_a= linregress (massflows,rms_smote)
print('RC: ' ,a ,'\n','std: ', s_a , '\n', 'Offset: ', b)
def func(x, a, b, c):
"Fit functie"
return a * np.asarray(x) + b
popt, pcov = curve_fit(func, massflow_sort, rms_tp_smote_sort)
popt
functie = func(massflow_sort, *popt)
sns.set_theme(style='whitegrid')
fig, axs = plt.subplots(2, figsize=(10, 10))
axs[0].plot(massflows, rms_smote, label='Temp afwijking als f(massflow)')
axs[0].plot ([massflows[0] ,massflows[len (massflows) -1]] ,
[a*massflows [0]+b,a*massflows[len (massflows) -1]+b] ,
label ='trendlijn')
axs[0].set(xlabel='Mass flow ($kg/s$)',
ylabel='Temperatuur afwijking gem ($\u00b0C$)', title='Met Verliezen')
axs[0].legend(loc='upper right')
axs[1].plot(massflow_sort, rms_tp_smote_sort, 'o', label='Temp/Massflow 01-15')
#axs[1].plot(temp_06[3], rms_tp_smote06[0], 'o', label='Temp/Massflow 02-06')
#axs[1].plot(temp_10[3], rms_tp_smote10[0], 'o', label='Temp/Massflow 02-10')
axs[1].plot(massflow, func(massflow_sort, *popt), 'r-',
label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))
axs[1].set(xlabel='Mass flow ($kg/s$)',
ylabel='Temperatuur afwijking gem ($\u00b0C$)')
axs[1].legend(loc='upper right')
print("Gemiddelde verschil temperatuur smote: ", rms_tp_smote15[1])
print("Gemiddelde uitwijking temperatuur smote: ", rms_tp_smote15[2])

Related

Logistic regression - strange behaviour of the decision boundary when additional parameters are added

I am trying to build a logistic regression model for a dataset consisting of two parameters
x1 and x2, but instead of analyzing just the two of them, I have added their squares as well - x12, x22 and x1· x2.
At the first glance everything looks fine and the error function is decreasing, but whilist drawing the plot of the decision boundary I have noticed, that after circa 500 iterations something strange happens to it.
Here is an animation of the error function as a function of iterations and a respective plot of the decision boundary:
Now,I interpret the decision boundary as a quadratic function
x2=f(x1), where
the relation between both parameters is given like this:
0.5 = θ0 + θ1x1 + θ2x2 + θ3x12 + θ4x1x2
+ θ5x22
Here is the python code I use to do everything:
#!/usr/bin/python3
import numpy as np
import matplotlib.pyplot as plt
from math import log
from matplotlib.animation import FuncAnimation
def sigmoid(x):
return 1.0 / (1.0 + np.exp(-x))
def loadData(filepath):
source=""
try:
f = open(filepath, "r")
source = f.read()
f.close()
except IOError:
print("Error while reading file (" + filepath + ")")
return ""
raw_data = source.split("\n")
raw_data = [x.split(",") for x in raw_data if x !=""]
raw_data = np.matrix(raw_data).astype(float)
return (raw_data[:,:np.size(raw_data,1)-1], raw_data[:,np.size(raw_data, 1)-1:])
def standardize(dataset, skipfirst=True):
means = np.amin(dataset, 0)
deviation = np.std(dataset, 0)
if skipfirst:
dataset[:,1:] -= means[:,1:]
dataset[:,1:] /= deviation[:,1:]
return dataset
else:
dataset -= means
dataset /= deviation
return dataset
def error(X, Y, Theta):
"Calculates error values"
v_sigm = np.vectorize(sigmoid)
h_x = X # Theta
sigmo = v_sigm(h_x)
partial_vect = (Y-1).T # np.log(1-sigmo) - Y.T # np.log(sigmo)
return 1/(2*np.size(Y, axis=0))*np.sum(partial_vect)
def gradientStep(X, Y, Theta, LR):
"Returns new theta Values"
v_sigm = np.vectorize(sigmoid)
h_x = X # Theta
modif = -1*LR/np.size(Y, 0)*(h_x-Y)
sums = np.sum(modif.T # X, axis = 0)
return Theta + sums.T
X, Y = loadData("ex2data1.txt")
#add bias to X
X = np.append(np.ones((np.size(X, 0), 1)), X, axis=1)
added_params = [[x[1]**2, x[1]*x[2], x[2]**2] for x in np.array(X)]
X = np.append(X, np.matrix(added_params), axis=1)
#standardize X
X = standardize(X)
#create vector of parameters
Theta=np.zeros((np.size(X, 1), 1))
iterations = 3000
Theta_vals = []
Error_vals = []
for i in range(0, iterations):
Theta_vals.append(np.asarray(Theta).flatten())
Error_vals.append(error(X, Y, Theta))
Theta = gradientStep(X, Y, Theta, 0.07)
#CALCULATING FINISHES HERE
#plot data:
fig = plt.figure()
def_ax = fig.add_subplot(211)
def_ax.set_xlim(np.amin(X[:,1:2]), np.amax(X[:,1:2]))
def_ax.set_ylim(np.amin(X[:,2:3]), np.amax(X[:,2:3]))
err_ax = fig.add_subplot(212)
err_ax.set_ylim(0, error(X, Y, Theta))
err_ax.set_xlim(0, iterations)
positive_X1 = []
positive_X2 = []
negative_X1 = []
negative_X2 = []
for i in range(0, np.size(Y, 0)):
if(Y[i, 0] == 1):
positive_X1.append(X[i, 1])
positive_X2.append(X[i, 2])
else:
negative_X1.append(X[i, 1])
negative_X2.append(X[i, 2])
err_ax.set_ylim(np.amin(Error_vals), np.amax(Error_vals))
def animation(frame):
global Theta_vals, Error_vals, def_ax, err_ax, positive_X1, positive_X2, negative_X1, negative_X2
def_limX = def_ax.get_xlim()
def_limY = def_ax.get_ylim()
err_limX = err_ax.get_xlim()
err_limY = err_ax.get_ylim()
def_ax.clear()
err_ax.clear()
def_ax.set_xlim(def_limX)
def_ax.set_ylim(def_limY)
err_ax.set_xlim(err_limX)
err_ax.set_ylim(err_limY)
def_ax.scatter(positive_X1, positive_X2, marker="^")
def_ax.scatter(negative_X1, negative_X2, marker="o")
Theta = Theta_vals[frame]
res_x = np.linspace(*def_ax.get_xlim(), num=5)
delta_x = [(Theta[4]*x+Theta[2])**2-4*Theta[5]*(Theta[3]*x**2+Theta[1]*x+Theta[0]-0.5) for x in res_x]
delta_x = [np.sqrt(x) if x >= 0 else 0 for x in delta_x]
minb = [-(Theta[4]*x+Theta[2]) for x in res_x]
res_1 = []
res_2 = []
for i in range(0, len(res_x)):
if Theta[5] == 0:
res_1.append(0)
res_2.append(0)
else:
res_1.append((minb[i]+delta_x[i])/(2*Theta[5]))
res_2.append((minb[i]-+delta_x[i])/(2*Theta[5]))
def_ax.plot(res_x, res_1)
def_ax.plot(res_x, res_2)
err_x = np.linspace(0, frame, frame)
err_y = Error_vals[0:frame]
err_ax.plot(err_x, err_y)
anim = FuncAnimation(fig, animation, frames=iterations, interval=3, repeat_delay=2000)
print(error(X, Y, Theta))
anim.save("anim.mp4")
What could be the reason of such a strange behaviour?

Splittig data in python dataframe and getting the array values automatically

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv('D:\ history/segment.csv')
data = pd.DataFrame(data)
data = data.sort_values(['Prob_score'], ascending=[False])
one = len(data)
actualpaid_overall = len(data.loc[data['paidstatus'] == 1])
data_split = np.array_split(data, 10)
data1 = data_split[0]
actualpaid_ten = len(data1.loc[data1['paidstatus'] == 1])
percent_ten = actualpaid_ten/actualpaid_overall
data2 = data_split[1]
actualpaid_twenty = len(data2.loc[data2['paidstatus'] == 1])
percent_twenty = (actualpaid_twenty/actualpaid_overall) + percent_ten
data3 = data_split[2]
actualpaid_thirty = len(data3.loc[data3['paidstatus'] == 1])
percent_thirty = (actualpaid_thirty/actualpaid_overall) + percent_twenty
data4 = data_split[3]
actualpaid_forty = len(data4.loc[data4['paidstatus'] == 1])
percent_forty = (actualpaid_forty/actualpaid_overall) + percent_thirty
data5 = data_split[4]
actualpaid_fifty = len(data5.loc[data5['paidstatus'] == 1])
percent_fifty = (actualpaid_fifty/actualpaid_overall) + percent_forty
data6 = data_split[5]
actualpaid_sixty = len(data6.loc[data6['paidstatus'] == 1])
percent_sixty = (actualpaid_sixty/actualpaid_overall) + percent_fifty
data7 = data_split[6]
actualpaid_seventy = len(data7.loc[data7['paidstatus'] == 1])
percent_seventy = (actualpaid_seventy/actualpaid_overall) + percent_sixty
data8 = data_split[7]
actualpaid_eighty = len(data8.loc[data8['paidstatus'] == 1])
percent_eighty = (actualpaid_eighty/actualpaid_overall) + percent_seventy
data9 = data_split[8]
actualpaid_ninenty = len(data9.loc[data9['paidstatus'] == 1])
percent_ninenty = (actualpaid_ninenty/actualpaid_overall) + percent_eighty
data10 = data_split[9]
actualpaid_hundred = len(data10.loc[data10['paidstatus'] == 1])
percent_hundred = (actualpaid_hundred/actualpaid_overall) + percent_ninenty
array_x = [10,20,30,40,50,60,70,80,90,100]
array_y = [ percent_ten, percent_twenty, percent_thirty, percent_forty,percent_fifty, percent_sixty, percent_seventy, percent_eighty, percent_ninenty, percent_hundred]
plt.xlabel(' Base')
plt.ylabel(' percent')
ax = plt.plot(array_x,array_y)
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth=0.5, color='0.1')
plt.grid( which='both', axis = 'both', linewidth=0.5,color='0.75')
The above is my python code i have splitted my dataframe into 10 equal sections and plotted the graph but I'm not satisfied with this i have two concerns:
array_x = [10,20,30,40,50,60,70,80,90,100] in this line of code i have manually taken the x values, is there any possible way to process automatically as i have taken split(data,10) it should show 10 array values
As we can see the whole data1,2,3,4...10 is being repeated again and again is there a solution to write this in a function or loop.
Any help with codes will be appreciated. Thanks

I believe you need list comprehension and for count is possible use simplier way - sum of boolean mask, True values are processes like 1, then convert list to numpy array and use numpy.cumsum:
data = pd.read_csv('D:\ history/segment.csv')
data = data.sort_values('Prob_score', ascending=False)
one = len(data)
actualpaid_overall = (data['paidstatus'] == 1).sum()
data_split = np.array_split(data, 10)
x = [len(x) for x in data_split]
y = [(x['paidstatus'] == 1).sum()/actualpaid_overall for x in data_split]
array_x = np.cumsum(np.array(x))
array_y = np.cumsum(np.array(y))
plt.xlabel(' Base')
plt.ylabel(' percent')
ax = plt.plot(array_x,array_y)
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth=0.5, color='0.1')
plt.grid( which='both', axis = 'both', linewidth=0.5,color='0.75')
Sample:
np.random.seed(2019)
N = 1000
data = pd.DataFrame({'paidstatus':np.random.randint(3, size=N),
'Prob_score':np.random.randint(100, size=N)})
#print (data)
data = data.sort_values(['Prob_score'], ascending=[False])
actualpaid_overall = (data['paidstatus'] == 1).sum()
data_split = np.array_split(data, 10)
x = [len(x) for x in data_split]
y = [(x['paidstatus'] == 1).sum()/actualpaid_overall for x in data_split]
array_x = np.cumsum(np.array(x))
array_y = np.cumsum(np.array(y))
print (array_x)
[ 100 200 300 400 500 600 700 800 900 1000]
print (array_y)
[0.09118541 0.18844985 0.27963526 0.38601824 0.49848024 0.61702128
0.72036474 0.81155015 0.9331307 1. ]

ValueError: x and y must have the same first dimension, but have different shapes

import urllib.request
from math import sqrt, fabs, exp
import matplotlib.pyplot as plot
from sklearn.linear_model import enet_path
from sklearn.metrics import roc_auc_score, roc_curve
import numpy
target_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data'
data = urllib.request.urlopen(target_url)
xList = []
for line in data:
#split on comma
row = line.strip().split(",".encode(encoding='utf-8'))
xList.append(row)
xNum = []
labels = []
for row in xList:
lastCol = row.pop()
if lastCol == b'M':
labels.append(1.0)
else:
labels.append(0.0)
attrRow = [float(elt) for elt in row]
xNum.append(attrRow)
nrow = len(xNum)
ncol = len(xNum[1])
alpha = 1.0
xMeans = []
xSD = []
for i in range(ncol):
col = [xNum[j][i] for j in range(nrow)]
mean = sum(col)/nrow
xMeans.append(mean)
colDiff = [(xNum[j][i] - mean) for j in range(nrow)]
sumSq = sum([colDiff[i] * colDiff[i] for i in range(nrow)])
stdDev = sqrt(sumSq/nrow)
xSD.append(stdDev)
xNormalized = []
for i in range(nrow):
rowNormalized = [(xNum[i][j] - xMeans[j])/xSD[j] for j in range(ncol)]
xNormalized.append(rowNormalized)
meanLabel = sum(labels)/nrow
sdLabel = sqrt(sum([(labels[i] - meanLabel) * (labels[i] - meanLabel) for i in range (nrow)])/nrow)
labelNormalized = [(labels[i] - meanLabel)/sdLabel for i in range(nrow)]
nxval = 10
for ixval in range(nxval):
idxTest = [a for a in range (nrow) if a%nxval == ixval]
idxTrain = [a for a in range(nrow) if a%nxval != ixval]
xTrain = numpy.array([xNormalized[r] for r in idxTrain])
xTest = numpy.array([xNormalized[r] for r in idxTest])
labelTrain = numpy.array([labelNormalized[r] for r in idxTrain])
labelTest = numpy.array([labelNormalized[r] for r in idxTest])
alphas, coefs, _ = enet_path(xTrain, labelTrain, l1_ratio = 0.8, fit_intercept=False, return_models=False)
if ixval == 0:
pred = numpy.dot(xTest, coefs)
yOut = labelTest
else:
yTemp = numpy.array(yOut)
yOut = numpy.concatenate((yTemp, labelTest), axis = 0)
predTemp = numpy.array(pred)
pred = numpy.concatenate((predTemp, numpy.dot(xTest, coefs)), axis = 0)
misClassRate = []
_,nPred = pred.shape
for iPred in range(1, nPred):
predList = list(pred[:, iPred])
errCnt = 0.0
for irow in range(nrow):
if (predList[irow] < 0.0) and (yOut[irow] >= 0.0):
errCnt += 1.0
elif (predList[irow] >= 0.0) and (yOut[irow] < 0.0):
errCnt += 1.0
misClassRate.append(errCnt/nrow)
minError = min(misClassRate)
idxMin = misClassRate.index(minError)
plotAlphas = numpy.array(alphas[1:len(alphas)])
misClassRate_np = numpy.array(misClassRate)
plot.figure()
plot.plot(plotAlphas, misClassRate_np, label='Misclassification Error Across Folds', linewidth=2)
plot.axvline(plotAlphas[idxMin], linestyle='--', label='CV Estimate of Best alpha')
plot.legend()
plot.semilogx()
ax = plot.gca()
ax.invert_xaxis()
plot.xlabel('alpha')
plot.ylabel('Misclassification Error')
plot.axis('tight')
plot.show()
When I executed the code above, it returns: ValueError: x and y must have same first dimension, but have shapes (99,) and (1,).
It seems the problem is due to unequal length in x and y.
Then I checked both plotAlphas and misClassRate_np, they show the same length. Also, both of them has been changed to array but still fail to fix the problem. Can't figure out what's happening.

How to improve nonlinear regression fit better by using scipy.ODR?

I have a problem using scipy.odr to fit data
Here is my data
https://www.dropbox.com/s/zuo6rqokcxsbnh5/data.txt?dl=0
1st column is data x, 2nd is data y
My Python fit code:
import numpy as np
import scipy.odr
import matplotlib.pyplot as plt
a = open('C:/Users/san/Desktop/data.txt','r')
data = np.loadtxt(a, delimiter=' ')
a.close()
#data x
f = data[:,0]
#data y
Soddre = data[:,1]
#####fit function#####
def S11fit(beta,f):
w = 2*np.pi*f
L1 = 25*1e-3
L2 = 25*1e-3
dL = 0.01*1e-3
z0 = 376.73415
Z0 = 50
ZL1 = 50
c = 2.997925e8
miu0 = z0/c
miur = 1
y0 = 1j*w/c;
episr = beta[0]+1j*beta[1]+(beta[2]+1j*beta[3])/(1+1j*beta[4]*w)#My target
ys = 1j*w/c*np.sqrt(episr*miur)
the = np.sqrt(episr/miur)
Zin3_1 = np.tanh(ys*L2)
Zin3_2 = the*np.tanh(y0*dL)
Zin3_3 = the*( 1+the*np.tanh(y0*dL)*np.tanh(ys*L2) )
Zin3 = ZL1*( Zin3_1 + Zin3_2 + Zin3_3*np.tanh(y0*L1) )\
/( Zin3_3 +( Zin3_1 + Zin3_2 )*np.tanh(y0*L1) )
S11t = (Zin3-Z0)/(Zin3+Z0)
return S11t.real
#Setting initial beta
A0r = 1
A0i = 0.02
A0 = A0r+1j*A0i
er1 = 1+0.02
er2 = 1+0.03
B1 = ( er1-er2 )/( 1j*( -w[200]*(er1-A0)+w[300]*(er2-A0) ) )
A1 = (er2-A0)*(1+1j*w[300]*B1)
#Using scipy.odr to fit data
Model = scipy.odr.Model(S11fit)
Data = scipy.odr.Data(f,Soddre)
odr = scipy.odr.ODR(Data, Model, [A0r, A0i,A1.real, A1.imag, B1.real])
output = odr.run()
resulr = output.pprint()
beta = output.beta
print "ODR", beta
#Fitting result
plt.plot(f, Soddre, "b")
plt.plot(f, S11fit(beta, w), "g--", lw = 1)
plt.tight_layout()
plt.show()
#By fitting data get episr
episr = beta[0]+1j*beta[1]+(beta[2]+1j*beta[3])/(1+1j*beta[4]*w)
plt.plot(f, episr.real, "b")
Results
Beta: [ -5.13825455e+00 -1.08908338e+00 5.46365124e+00 -4.76830313e+01
-5.13452718e-10]
The grapg of fitting result and reference:
https://www.dropbox.com/s/9yj4ulo57mqgcc2/Fitting%20result.pdf?dl=0
My fitting curve doesn't fit well
Does anyone have a good suggestion to fit data well?

I can fit my data well.Data need to add deviation in Soddre.
Just add:
ca = np.full((1, 801), 0.01)
and modify Data:
Data = scipy.odr.RealData(f,Soddre,sy=ca)

Looping same program for different data files

For the following program, I am trying to save time copying and pasting tons of code. I would like this program to plot using the data file 19_6.txt and aux.19_6, and then continue by plotting the files with 11,12,20,28,27, and 18 in 19's place with the same code and onto the same plot. Any help would be appreciated. Thanks!
from numpy import *
import matplotlib.pyplot as plt
datasim19 = loadtxt("/home/19_6.txt")
data19 = loadtxt("/home/aux.19_6")
no1=1
no2=2
no3=3
no4=4
no5=5
no7=7
no8=8
no9=9
no10=10
simrecno1inds19 = nonzero(datasim19[:,1]==no1)[0]
simrecno2inds19 = nonzero(datasim19[:,1]==no2)[0]
simrecno3inds19 = nonzero(datasim19[:,1]==no3)[0]
simrecno4inds19 = nonzero(datasim19[:,1]==no4)[0]
simrecno5inds19 = nonzero(datasim19[:,1]==no5)[0]
simrecno7inds19 = nonzero(datasim19[:,1]==no7)[0]
simrecno8inds19 = nonzero(datasim19[:,1]==no8)[0]
simrecno9inds19 = nonzero(datasim19[:,1]==no9)[0]
simrecno10inds19 = nonzero(datasim19[:,1]==no10)[0]
recno1inds19 = nonzero(data19[:,1]==no1)[0]
recno2inds19 = nonzero(data19[:,1]==no2)[0]
recno3inds19 = nonzero(data19[:,1]==no3)[0]
recno4inds19 = nonzero(data19[:,1]==no4)[0]
recno5inds19 = nonzero(data19[:,1]==no5)[0]
recno7inds19 = nonzero(data19[:,1]==no7)[0]
recno8inds19 = nonzero(data19[:,1]==no8)[0]
recno9inds19 = nonzero(data19[:,1]==no9)[0]
recno10inds19 = nonzero(data19[:,1]==no10)[0]
q1sim19 = qsim19[simrecno1inds19]
q2sim19 = qsim19[simrecno2inds19]
q3sim19 = qsim19[simrecno3inds19]
q4sim19 = qsim19[simrecno4inds19]
q5sim19 = qsim19[simrecno5inds19]
q7sim19 = qsim19[simrecno7inds19]
q8sim19 = qsim19[simrecno8inds19]
q9sim19 = qsim19[simrecno9inds19]
q10sim19 = qsim19[simrecno10inds19]
q1_19 = q19[recno1inds19]
q2_19 = q19[recno2inds19]
q3_19 = q19[recno3inds19]
q4_19 = q19[recno4inds19]
q5_19 = q19[recno5inds19]
q7_19 = q19[recno7inds19]
q8_19 = q19[recno8inds19]
q9_19 = q19[recno9inds19]
q10_19 = q19[recno10inds19]
sumq1sim19 = sum(q1sim19)
sumq2sim19 = sum(q2sim19)
sumq3sim19 = sum(q3sim19)
sumq4sim19 = sum(q4sim19)
sumq5sim19 = sum(q5sim19)
sumq7sim19 = sum(q7sim19)
sumq8sim19 = sum(q8sim19)
sumq9sim19 = sum(q9sim19)
sumq10sim19 = sum(q10sim19)
sumq1_19 = sum(q1_19)
sumq2_19 = sum(q2_19)
sumq3_19 = sum(q3_19)
sumq4_19 = sum(q4_19)
sumq5_19 = sum(q5_19)
sumq7_19 = sum(q7_19)
sumq8_19 = sum(q8_19)
sumq9_19 = sum(q9_19)
sumq10_19 = sum(q10_19)
xsim = [no1, no2, no3, no4, no5, no7, no8, no9, no10]
ysim = [sumq1sim_19, sumq2sim_19, sumq3sim_19, sumq4sim_19, sumq5sim_19, sumq7sim_19, sumq8sim_19, sumq9sim_19, sumq10sim_19]
x = [no1, no2, no3, no4, no5,no7, no8, no9, no10]
y = [sumq1_19, sumq2_19, sumq3_19, sumq4_19, sumq5_19, sumq7_19, sumq8_19, sumq9_19, sumq10_19]
plt.plot(x,log(y),'b',label='Data')
plt.plot(xsim,log(ysim),'r',label='Simulation')
plt.legend()
plt.title('Data vs. Simulation')
plt.show()

Tip: when you find yourself using lots of variables called n1, n2, n3 etc. you should probably use lists, dictionaries or other such containers, and loops instead.
For example, try replacing the following code:
simrecno1inds19 = nonzero(datasim19[:,1]==no1)[0]
simrecno2inds19 = nonzero(datasim19[:,1]==no2)[0]
simrecno3inds19 = nonzero(datasim19[:,1]==no3)[0]
simrecno4inds19 = nonzero(datasim19[:,1]==no4)[0]
simrecno5inds19 = nonzero(datasim19[:,1]==no5)[0]
simrecno7inds19 = nonzero(datasim19[:,1]==no7)[0]
simrecno8inds19 = nonzero(datasim19[:,1]==no8)[0]
simrecno9inds19 = nonzero(datasim19[:,1]==no9)[0]
simrecno10inds19 = nonzero(datasim19[:,1]==no10)[0]
With this:
simrecinds19 = [nonzero(datasim19[:,1] == i)[0] for i in range(1, 11)]
Then you can use simrecinds19[0] instead of simrecno1inds19.

You can do something like this:
nList = [19,11,12,20,28,27,18]
for n in nList:
file1 = "/home/" + str(n) + "_6.txt"
file2 = "/home/aux." + str(n) + "_6"
datasim19 = loadtxt(file1)
data19 = loadtxt(file2)
# do the rest of the plotting

You can greatly reduce the size of this script. I'm not quite sure where qsim19 and qsim come from, but take a look:
import numpy as np
import matplotlib.pyplot as plt
for index in [19, 11, 12, 20, 28, 27, 18]:
datasim = loadtxt("/home/%i_6.txt"%index)
data = loadtxt("/home/aux.%i_6"%index)
nos = range(1, 6) + range(7, 11)
simrecno = [np.nonzero(datasim[:,1] == n)[0] for n in nos]
recno = [np.nonzero(data[:,1] == n)[0] for n in nos]
qsim = [qsim[simrecno_i] for simrecno_i in simrecno]
q = [q[recno_i] for recno_i in recno]
sumqsim = [sum(qsim_i) for qsim_i in qsim]
sumq = [sum(q_i) for q_i in q]
xsim = nos
ysim = sumqsim
x = nos
y = sumq
plt.plot(x, log(y), 'b', label='Data')
plt.plot(xsim, log(ysim), 'r', label='Simulation')
plt.legend()
plt.title('Data vs. Simulation')
plt.show()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Scipy Curve_fit gives a rather weird fit - python

Related

Logistic regression - strange behaviour of the decision boundary when additional parameters are added

Splittig data in python dataframe and getting the array values automatically

ValueError: x and y must have the same first dimension, but have different shapes

How to improve nonlinear regression fit better by using scipy.ODR?

Looping same program for different data files

Categories

Resources