python, smoothing 2d plot ? , trend line? - python

The first graph is plotted using original data, and 2nd one is drawn after applying moving average over 15 (days)
I can keep increasing the window of moving average, but it sometimes changes the overal picture.
Is there an another way of smoothing the line?
plt.plot(x,y)
def moving_avg(l, size):
additional = int((size - 1) / 2)
l2 = l[-additional:] + l + l[:additional]
df = pd.DataFrame(l2, columns=["d"])
result = (
df.rolling(size, min_periods=size, center=True).mean()["d"].tolist()
)
result = result[additional:-additional]
return result
x = range(1,365)
y = [0.25769641467531185,
0.25769641467531185,
0.25769641467531185,
0.25769641467531185,
0.15655577299412943,
0.15655577299412943,
0.19569471624266177,
0.15655577299412943,
0.19569471624266177,
0.19569471624266177,
0.15655577299412943,
0.19569471624266177,
0.19569471624266177,
0.15655577299412943,
0.19569471624266177,
0.19569471624266177,
0.19569471624266177,
0.2968353579238442,
0.2968353579238442,
0.2981526713477847,
0.31838079968402117,
0.2792418564354889,
0.2792418564354889,
0.2792418564354889,
0.21724015800283883,
0.17810121475430643,
0.1376449580818335,
0.21855747142677937,
0.21855747142677937,
0.21855747142677937,
0.21855747142677937,
0.21855747142677937,
0.25769641467531174,
0.25769641467531174,
0.15655577299412934,
0.1956947162426617,
0.25769641467531174,
0.25769641467531174,
0.25769641467531174,
0.21855747142677937,
0.25769641467531174,
0.35883705635649416,
0.35883705635649416,
0.25769641467531174,
0.25769641467531174,
0.25769641467531174,
0.2968353579238441,
0.2968353579238441,
0.234833659491194,
0.234833659491194,
0.33597430117237637,
0.33597430117237637,
0.33597430117237637,
0.33597430117237637,
0.33597430117237637,
0.33597430117237637,
0.3979759996050264,
0.33597430117237637,
0.33597430117237637,
0.33597430117237637,
0.33597430117237637,
0.3979759996050264,
0.3979759996050264,
0.33597430117237637,
0.3979759996050264,
0.45997769803767646,
0.4208387547891441,
0.3816998115406118,
0.5839810949029767,
0.5448421516544443,
0.6405573973141552,
0.5899870764735639,
0.48884643479238155,
0.48884643479238155,
0.5279853780409139,
0.32570409467854905,
0.32570409467854905,
0.20770667938383622,
0.19084990577030586,
0.22998884901883823,
0.22998884901883823,
0.2414202266108971,
0.3425608682920795,
0.44370150997326185,
0.5279853780409139,
0.6291260197220963,
0.6291260197220963,
0.6682649629706285,
0.6176946421300374,
0.4545523020162049,
0.3534116603350225,
0.25227101865384016,
0.6231200381515088,
0.5839810949029767,
0.6459827933356266,
0.6459827933356266,
0.6068438500870943,
0.6169579142552125,
0.6675282350958037,
0.19553857391695253,
0.2966792155981349,
0.23467751716548488,
0.28524783800607606,
0.3243867812546084,
0.3142727170864902,
0.3142727170864902,
0.3816998115406117,
0.2805591698594293,
0.2414202266108969,
0.39313118913267053,
0.39313118913267053,
0.4942718308138529,
0.4639296383094982,
0.36278899662831576,
0.3965025438553766,
0.49764318553655895,
0.49764318553655895,
0.559644883969209,
0.4585042422880266,
0.47741505720032246,
0.47741505720032246,
0.4509258415219175,
0.38349874706779596,
0.22035640695396347,
0.15835470852131345,
0.1974936517698458,
0.1974936517698458,
0.1974936517698458,
0.19026932022118995,
0.15655577299412912,
0.15655577299412912,
0.11741682974559677,
0.11741682974559677,
0.0985060148333009,
0.0985060148333009,
0.0985060148333009,
0.13764495808183325,
0.0985060148333009,
0.0985060148333009,
0.05936707158476854,
0.03913894324853206,
0.0492530074166503,
0.08839195066518266,
0.0492530074166503,
0.08839195066518266,
0.12753089391371503,
0.12753089391371503,
0.1781012147543062,
0.16798715058618796,
0.12884820733765562,
0.12884820733765562,
0.12884820733765562,
0.08970926408912326,
0.12884820733765562,
0.07827788649706442,
0.07827788649706442,
0.07827788649706442,
0.1794185281782468,
0.24142022661089685,
0.24142022661089685,
0.24142022661089685,
0.24142022661089685,
0.26670538703119245,
0.26670538703119245,
0.26670538703119245,
0.20470368859854238,
0.20470368859854238,
0.20470368859854238,
0.20470368859854238,
0.22998884901883798,
0.33112949070002035,
0.2691277922673703,
0.2691277922673703,
0.2691277922673703,
0.22998884901883798,
0.25888617521346147,
0.20831585437287034,
0.14631415594022026,
0.14631415594022026,
0.10717521269168791,
0.1577455335322791,
0.25888617521346147,
0.2906732340275474,
0.358100328481669,
0.358100328481669,
0.5212426685955015,
0.5603816118440337,
0.5098112910034426,
0.7120925743658073,
0.651408189357098,
0.6176946421300371,
0.5785556988815047,
0.4154133587676723,
0.47741505720032235,
0.5785556988815047,
0.3004189342582532,
0.3257040946785488,
0.39313118913267037,
0.39313118913267037,
0.39313118913267037,
0.33112949070002035,
0.2691277922673703,
0.3449832735282571,
0.3196981131079615,
0.2590137280992521,
0.30958404893984326,
0.3715857473724933,
0.33244680412396094,
0.3438781817160198,
0.31016463448895903,
0.33039276282519553,
0.28993650615272254,
0.23936618531213139,
0.1773644868794814,
0.1773644868794814,
0.1659331092874225,
0.09850601483330092,
0.14570498095118606,
0.3479862643135508,
0.38712520756208313,
0.4996972268353244,
0.5388361700838568,
0.4996972268353244,
0.4996972268353244,
0.43227013238120277,
0.2502169773550745,
0.21107803410654213,
0.0985060148333009,
0.0985060148333009,
0.13764495808183325,
0.13764495808183325,
0.1767839013303656,
0.15655577299412912,
0.15655577299412912,
0.15655577299412912,
0.15655577299412912,
0.11741682974559677,
0.21855747142677917,
0.3816998115406116,
0.42083875478914395,
0.4599776980376763,
0.4599776980376763,
0.4599776980376763,
0.49911664128620864,
0.3979759996050262,
0.2534893686319086,
0.3154910670645586,
0.3154910670645586,
0.3154910670645586,
0.27635212381602625,
0.23721318056749394,
0.23721318056749394,
0.17941852817824683,
0.07827788649706446,
0.03913894324853211,
0.20228128336236453,
0.20228128336236453,
0.20228128336236453,
0.20228128336236453,
0.24142022661089685,
0.3931311891326704,
0.43227013238120277,
0.2691277922673704,
0.3082667355159027,
0.34740567876443507,
0.3865446220129674,
0.6508276038079822,
0.49911664128620864,
0.4599776980376763,
0.42083875478914395,
0.4154133587676724,
0.40998796274620086,
0.3708490194976685,
0.08187575755143311,
0.09030414435819832,
0.12944308760673068,
0.12944308760673068,
0.13486848362820222,
0.10115493640114144,
0.11560359949845322,
0.12644009682143703,
0.1571506532632042,
0.11801171001467185,
0.11801171001467185,
0.11801171001467185,
0.1571506532632042,
0.19327231100648368,
0.26912779226737044,
0.2637023962458989,
0.30284133949443126,
0.30284133949443126,
0.30284133949443126,
0.2974159434729597,
0.2859845658809008,
0.18484392419971843,
0.17135850530889415,
0.1322195620603618,
0.17135850530889415,
0.1322195620603618,
0.13764495808183336,
0.1996466565144834,
0.1996466565144834,
0.2299888490188381,
0.2691277922673704,
0.2299888490188381,
0.2691277922673704,
0.2691277922673704,
0.20844340725866098,
0.24758235050719332,
0.19701202966660214,
0.1970120296666021,
0.2361509729151345,
0.1970120296666021,
0.1970120296666021,
0.19569471624266155,
0.19026932022118997,
0.2155544806414856,
0.17641553739295326,
0.1372765941444209,
0.17641553739295326,
0.17641553739295326,
0.1372765941444209,
0.14270199016589247,
0.1565557729941292,
0.19569471624266158,
0.2348336594911939,
0.19569471624266158,
0.1565557729941292,
0.19569471624266158,
0.19026932022118997,
0.15113037697265763,
0.11199143372412527,
0.11199143372412527,
0.15113037697265763,
0.15113037697265763,
0.15113037697265763,
0.16798715058618804,
0.20712609383472042,
0.24626503708325276,
0.24626503708325276,
0.24626503708325276,
0.24626503708325276,
0.24626503708325276,
0.2348336594911939,
0.2348336594911939,
0.2348336594911939,
0.2348336594911939,
0.2348336594911939,
0.27397260273972623,
0.27397260273972623,
0.2348336594911939,
0.2348336594911939,
0.2348336594911939,
0.19569471624266158,
0.19569471624266158,
0.19569471624266158,
0.1565557729941292,
0.1565557729941292,
0.11741682974559685,
0.11741682974559685,
0.1565557729941292,
0.21855747142677923,
0.21855747142677923,
0.21855747142677923]
The following shows
sns.distplot(y, bins=100, color='k')
(ok it's a different plot)
But it's also very stepwise and somehow seaborn manages to draw smooth line over it..
How does it do it?
Practically, the following is the best I have now
y_new = moving_avg(y, 31)
import numpy as np
import matplotlib.pyplot as plt
from csaps import csaps
np.random.seed(1234)
xs = np.linspace(x[0], x[-1], 52)
ys = csaps(x, y_new, xs, smooth=0.85)
plt.plot(x, y, 'o', xs, ys, '-')

I was taught the following technique at uni for finding the trendline in biosignals (ppg, ecg, etc), by firstly applying a moving mean to the signal and then a Savitzky-Golay Smoothing Filter
The code is below, I have used another moving average technique which i get along with more from here:
How to calculate rolling / moving average using NumPy / SciPy?
and the Savitzky-Golay filter from Scipy:
https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.savgol_filter.html
import numpy as np
from scipy.signal import savgol_filter
y = np.array(y)
x = range(1,365)
## from: https://stackoverflow.com/questions/14313510/how-to-calculate-rolling-moving-average-using-numpy-scipy
def moving_average(x, w):
return np.convolve(x, np.ones(w), 'valid') / w
y_ave = moving_average(y, 10) ## moving average
x_ave = np.arange(x[0], x[-1], x[-1]/y_ave.shape[0]) ## compensate for shorter signal
y_savgol = savgol_filter(y_ave, 99, 3) ## Savitzky-Golay filtering
fig, axs = plt.subplots(1, figsize=(30,15))
axs.plot(x,y)
axs.plot(x_ave,y_savgol)
print(y_savgol.shape)
you can use the documentation above to adjust the parameters in order to achieve the results you are looking for, the code was able to produce the following figure - which may be too much smoothing depending on what you want to achieve:

Related

How to visualize high-dimension vectors as points in 2D plane?

For example, there are three vectors as below.
[ 0.0377, 0.1808, 0.0807, -0.0703, 0.2427, -0.1957, -0.0712, -0.2137,
-0.0754, -0.1200, 0.1919, 0.0373, 0.0536, 0.0887, -0.1916, -0.1268,
-0.1910, -0.1411, -0.1282, 0.0274, -0.0781, 0.0138, -0.0654, 0.0491,
0.0398, 0.1696, 0.0365, 0.2266, 0.1241, 0.0176, 0.0881, 0.2993,
-0.1425, -0.2535, 0.1801, -0.1188, 0.1251, 0.1840, 0.1112, 0.3172,
0.0844, -0.1142, 0.0662, 0.0910, 0.0416, 0.2104, 0.0781, -0.0348,
-0.1488, 0.0129],
[-0.1302, 0.1581, -0.0897, 0.1024, -0.1133, 0.1076, 0.1595, -0.1047,
0.0760, 0.1092, 0.0062, -0.1567, -0.1448, -0.0548, -0.1275, -0.0689,
-0.1293, 0.1024, 0.1615, 0.0869, 0.2906, -0.2056, 0.0442, -0.0595,
-0.1448, 0.0167, -0.1259, -0.0989, 0.0651, -0.0424, 0.0795, -0.1546,
0.1330, -0.2284, 0.1672, 0.1847, 0.0841, 0.1771, -0.0101, -0.0681,
0.1497, 0.1226, 0.1146, -0.2090, 0.3275, 0.0981, -0.3295, 0.0590,
0.1130, -0.0650],
[-0.1745, -0.1940, -0.1529, -0.0964, 0.2657, -0.0979, 0.1510, -0.1248,
-0.1541, 0.1782, -0.1769, -0.2335, 0.2011, 0.1906, -0.1918, 0.1896,
-0.2183, -0.1543, 0.1816, 0.1684, -0.1318, 0.2285, 0.1784, 0.2260,
-0.2331, 0.0523, 0.1882, 0.1764, -0.1686, 0.2292]
How to plot them as three points in the same 2D plane like this picture below? Thanks!
I use PCA from sklearn, maybe this code help you:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA
usa = [ 0.0377, 0.1808, 0.0807, -0.0703, 0.2427, -0.1957, -0.0712, -0.2137,
-0.0754, -0.1200, 0.1919, 0.0373, 0.0536, 0.0887, -0.1916, -0.1268,
-0.1910, -0.1411, -0.1282, 0.0274, -0.0781, 0.0138, -0.0654, 0.0491,
0.0398, 0.1696, 0.0365, 0.2266, 0.1241, 0.0176, 0.0881, 0.2993,
-0.1425, -0.2535, 0.1801, -0.1188, 0.1251, 0.1840, 0.1112, 0.3172,
0.0844, -0.1142, 0.0662, 0.0910, 0.0416, 0.2104, 0.0781, -0.0348,
-0.1488, 0.0129]
obama = [-0.1302, 0.1581, -0.0897, 0.1024, -0.1133, 0.1076, 0.1595, -0.1047,
0.0760, 0.1092, 0.0062, -0.1567, -0.1448, -0.0548, -0.1275, -0.0689,
-0.1293, 0.1024, 0.1615, 0.0869, 0.2906, -0.2056, 0.0442, -0.0595,
-0.1448, 0.0167, -0.1259, -0.0989, 0.0651, -0.0424, 0.0795, -0.1546,
0.1330, -0.2284, 0.1672, 0.1847, 0.0841, 0.1771, -0.0101, -0.0681,
0.1497, 0.1226, 0.1146, -0.2090, 0.3275, 0.0981, -0.3295, 0.0590,
0.1130, -0.0650]
nationality = [-0.1745, -0.1940, -0.1529, -0.0964, 0.2657, -0.0979, 0.1510, -0.1248,
-0.1541, 0.1782, -0.1769, -0.2335, 0.2011, 0.1906, -0.1918, 0.1896,
-0.2183, -0.1543, 0.1816, 0.1684, -0.1318, 0.2285, 0.1784, 0.2260,
-0.2331, 0.0523, 0.1882, 0.1764, -0.1686, 0.2292]
pca = PCA(n_components=1)
X = np.array(usa).reshape(2,len(usa)//2)
X = pca.fit_transform(X)
Y = np.array(obama).reshape(2,len(obama)//2)
Y = pca.fit_transform(Y)
Z = np.array(nationality).reshape(2,len(nationality)//2)
Z = pca.fit_transform(Z)
x_coordinates = [X[0][0], Y[0][0], Z[0][0]]
y_coordinates = [X[1][0], Y[1][0], Z[1][0]]
colors = ['r','g','b']
annotations=["U.S.A","Obama","Nationality"]
plt.figure(figsize=(8,6))
plt.scatter(x_coordinates, y_coordinates, marker=",", color=colors,s=300)
for i, label in enumerate(annotations):
plt.annotate(label, (x_coordinates[i], y_coordinates[i]))
plt.show()
output:

How to solve warning message in Gekko due to m.connection?

I am using m.connection to estimate variables initial conditions but I am getting 12 warning messages like:
Moreover, the APM file shows:
I am not sure how to solve these messages.
I am following this explanation "If pos1 or pos2 is not None, the associated var must be a GEKKO variable and the position is the (0-indexed) time-discretized index of the variable" to write m.Connection(var1,var2,pos1=None,pos2=None,node1='end',node2='end').
https://gekko.readthedocs.io/en/latest/quick_start.html#connections
Thanks in advance.
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
import math as math
import pandas as pd
tm1 = [0, 0.0667,0.5,1,4, 22.61667]
mca1 = [5.68, 3.48, 3.24, 3.36, 2.96, 1.96]
tm2 = [0, 0.08333,0.5,1,4.25 , 22.8167]
mca2 = [5.68, 4.20, 4.04, 4.00, 3.76, 2.88]
tm3 = [0,0.08333,0.5,1,4.33 , 22.9500]
mca3 = [5.68, 4.64, 4.52, 4.56, 4.24, 3.72]
tm4 = [0,0.08333,0.5,1,4.0833 , 23.0833]
mca4 =[18.90,15.4,14.3,15.10,13.50, 10.90]
tm5 = [0,0.08333,0.5,1,4.5, 23.2167]
mca5 =[18.90, 15.5, 16.30, 16, 14.70, 13.00]
tm6 = [0,0.08333,0.5,1,4.6667, 23.3333 ]
mca6 = [18.90, 15.8, 11.70,15.5,12, 9.5 ]
df1=pd.DataFrame({'time':tm1,'x1':mca1})
df2=pd.DataFrame({'time':tm2,'x2':mca2})
df3=pd.DataFrame({'time':tm3,'x3':mca3})
df4=pd.DataFrame({'time':tm4,'x4':mca4})
df5=pd.DataFrame({'time':tm5,'x5':mca5})
df6=pd.DataFrame({'time':tm6,'x6':mca6})
df1.set_index('time',inplace=True)
df2.set_index('time',inplace=True)
df3.set_index('time',inplace=True)
df4.set_index('time',inplace=True)
df5.set_index('time',inplace=True)
df6.set_index('time',inplace=True)
#simulation time points
dfx = pd.DataFrame({'time':np.linspace(0,25,101)})
dfx.set_index('time',inplace=True)
#merge dataframes
dfxx=dfx.join(df1,how='outer')
dfxxx=dfxx.join(df2,how='outer')
dfxxxx=dfxxx.join(df3,how='outer')
dfxxxxx=dfxxxx.join(df4,how='outer')
dfxxxxxx=dfxxxxx.join(df5,how='outer')
df=dfxxxxxx.join(df6,how='outer')
# get True (1) or False (0) for measurement
df['meas1']=(df['x1'].values==df['x1'].values).astype(int)
df['meas2']=(df['x2'].values==df['x2'].values).astype(int)
df['meas3']=(df['x3'].values==df['x3'].values).astype(int)
df['meas4']=(df['x4'].values==df['x4'].values).astype(int)
df['meas5']=(df['x5'].values==df['x5'].values).astype(int)
df['meas6']=(df['x6'].values==df['x6'].values).astype(int)
#replace NaN with zeros
df0=df.fillna(value=0)
m = GEKKO()
m.time = df0.index.values
meas1 = m.Param(df0['meas1'].values)
meas2 = m.Param(df0['meas2'].values)
meas3 = m.Param(df0['meas3'].values)
meas4 = m.Param(df0['meas4'].values)
meas5 = m.Param(df0['meas5'].values)
meas6 = m.Param(df0['meas6'].values)
#adjustable Parameters
kf=m.FV(1.3,lb=0.01,ub=10)
ks=m.FV(1.3,lb=0.01,ub=10)
cnf01=m.FV(1.3,lb=0.01,ub=10)
cns01=m.FV(1.3,lb=0.01,ub=10)
#constrains
cnf02=m.FV(value=cnf01*0.5,lb=cnf01*0.5, ub=cnf01*0.5)
cns02=m.FV(value=cns01*0.5,lb=cns01*0.5, ub=cns01*0.5)
cnf03=m.FV(value=cnf01*0.25,lb=cnf01*0.25, ub=cnf01*0.25)
cns03=m.FV(value=cns01*0.25,lb=cns01*0.25, ub=cns01*0.25)
cnf04=m.FV(value=cnf01,lb=cnf01, ub=cnf01)
cns04=m.FV(value=cns01,lb=cns01, ub=cns01)
cnf05=m.FV(value=cnf01*0.5,lb=cnf01*0.5, ub=cnf01*0.5)
cns05=m.FV(value=cns01*0.5,lb=cns01*0.5, ub=cns01*0.5)
cnf06=m.FV(value=cnf01*0.25,lb=cnf01*0.25, ub=cnf01*0.25)
cns06=m.FV(value=cns01*0.25,lb=cns01*0.25, ub=cns01*0.25)
#Variables
c1 = m.Var(value=mca1[0])
c2 = m.Var(value=mca2[0])
c3 = m.Var(value=mca3[0])
c4 = m.Var(value=mca4[0])
c5 = m.Var(value=mca5[0])
c6 = m.Var(value=mca6[0])
cm1 = m.Param(df0['x1'].values)
cm2 = m.Param(df0['x2'].values)
cm3 = m.Param(df0['x3'].values)
cm4 = m.Param(df0['x4'].values)
cm5 = m.Param(df0['x5'].values)
cm6 = m.Param(df0['x6'].values)
m.Minimize((meas1*(c1-cm1)**2)+(meas2*(c2-cm2)**2)\
+(meas3*(c3-cm3)**2)+(meas4*(c4-cm4)**2)\
+(meas5*(c5-cm5)**2)+(meas6*(c6-cm6)**2))
cnf1=m.Var(value=cnf01,fixed_initial=False)
cns1=m.Var(value=cns01,fixed_initial=False)
cnf2=m.Var(value=cnf02,fixed_initial=False)
cns2=m.Var(value=cns02,fixed_initial=False)
cnf3=m.Var(value=cnf03,fixed_initial=False)
cns3=m.Var(value=cns03,fixed_initial=False)
cnf4=m.Var(value=cnf04,fixed_initial=False)
cns4=m.Var(value=cns04,fixed_initial=False)
cnf5=m.Var(value=cnf05,fixed_initial=False)
cns5=m.Var(value=cns05,fixed_initial=False)
cnf6=m.Var(value=cnf06,fixed_initial=False)
cns6=m.Var(value=cns06,fixed_initial=False)
#Equations
t = m.Param(value=m.time)
m.Connection(cnf1,cnf01,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cnf2,cnf02,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cnf3,cnf03,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cnf4,cnf04,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cnf5,cnf05,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cnf6,cnf06,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cns1,cns01,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cns2,cns02,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cns3,cns03,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cns4,cns04,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cns5,cns05,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cns6,cns06,pos1=0,pos2=0,node1=1,node2=1)
m.Equation(cnf1.dt()==-kf*c1*cnf1)
m.Equation(cns1.dt()==-ks*c1*cns1)
m.Equation(c1.dt()==cnf1.dt()+cns1.dt())
m.Equation(cnf2.dt()==-kf*c2*cnf2)
m.Equation(cns2.dt()==-ks*c2*cns2)
m.Equation(c2.dt()==cnf2.dt()+cns2.dt())
m.Equation(cnf3.dt()==-kf*c3*cnf3)
m.Equation(cns3.dt()==-ks*c3*cns3)
m.Equation(c3.dt()==cnf3.dt()+cns3.dt())
m.Equation(cnf4.dt()==-kf*c4*cnf4)
m.Equation(cns4.dt()==-ks*c4*cns4)
m.Equation(c4.dt()==cnf4.dt()+cns4.dt())
m.Equation(cnf5.dt()==-kf*c5*cnf5)
m.Equation(cns5.dt()==-ks*c5*cns5)
m.Equation(c5.dt()==cnf5.dt()+cns5.dt())
m.Equation(cnf6.dt()==-kf*c6*cnf6)
m.Equation(cns6.dt()==-ks*c6*cns6)
m.Equation(c6.dt()==cnf6.dt()+cns6.dt())
if True:
kf.STATUS=1
ks.STATUS=1
cnf01.STATUS=1
cns01.STATUS=1
cnf02.STATUS=1
cns02.STATUS=1
cnf03.STATUS=1
cns03.STATUS=1
cnf04.STATUS=1
cns04.STATUS=1
cnf05.STATUS=1
cns05.STATUS=1
cnf06.STATUS=1
cns06.STATUS=1
#Options
m.options.SOLVER = 1 #IPOPT solver
m.options.IMODE = 5 #Dynamic Simultaneous - estimation = MHE
m.options.EV_TYPE = 2 #absolute error
m.options.NODES = 3 #collocation nodes (2,5)
m.solve(disp=True)
m.open_folder()
print('Final SSE Objective: ' + str(m.options.objfcnval))
print('Solution')
print('cnf01 = ' + str(cnf01.value[0]))
print('cns01 = ' + str(cns01.value[0]))
print('kf = ' + str(kf.value[0]))
print('ks = ' + str(ks.value[0]))
print('cns02 = '+ str(cns02.value[0]))
print('cnf02 = '+ str(cnf02.value[0]))
print('cns03 = '+ str(cns03.value[0]))
print('cnf03 = '+ str(cnf03.value[0]))
print('cns04 = '+ str(cns04.value[0]))
print('cnf04 = '+ str(cnf04.value[0]))
print('cns05 = '+ str(cns05.value[0]))
print('cnf05 = '+ str(cnf05.value[0]))
print('cns06 = '+ str(cns06.value[0]))
print('cnf06 = '+ str(cnf06.value[0]))
plt.figure(1,figsize=(8,5))
plt.plot(m.time,c1.value,'r',label='Predicted c1')
plt.plot(m.time,c2.value,'y',label='Predicted c2')
plt.plot(m.time,c3.value,'c',label='Predicted c3')
plt.plot(m.time,c4.value,'g',label='Predicted c4')
plt.plot(m.time,c5.value,'b',label='Predicted c5')
plt.plot(m.time,c6.value,'m',label='Predicted c6')
plt.plot(tm1,mca1,'rx',label='Meas c1')
plt.plot(tm2,mca2,'yx',label='Meas c2')
plt.plot(tm3,mca3,'cx',label='Meas c3')
plt.plot(tm4,mca4,'go',label='Meas c4')
plt.plot(tm5,mca5,'bo',label='Meas c5')
plt.plot(tm6,mca6,'mo',label='Meas c6')
plt.xlabel('time (h)')
plt.ylabel('Concentration (mgCl2/L)')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2)
The underlying node structure has a 1-index instead of a 0-index that is common in Python. Using pos1=1 and pos2=1 resolves the warnings.
m.Connection(cnf1,cnf01,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf2,cnf02,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf3,cnf03,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf4,cnf04,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf5,cnf05,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf6,cnf06,pos1=1,pos2=1,node1=1,node2=1)
Another issue is that Gekko variables shouldn't generally be used to initialize other values. I recommend setting x0=1.3 and using that float to initialize the variables. Change m.Var() to m.SV() to avoid reclassification of m.Var() as an m.FV() during the connection. The m.SV() is a promoted type of variable that is at the same level of precedence as the m.FV(). Here is a complete script although the results don't look optimal.
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
import math as math
import pandas as pd
tm1 = [0, 0.0667,0.5,1,4, 22.61667]
mca1 = [5.68, 3.48, 3.24, 3.36, 2.96, 1.96]
tm2 = [0, 0.08333,0.5,1,4.25 , 22.8167]
mca2 = [5.68, 4.20, 4.04, 4.00, 3.76, 2.88]
tm3 = [0,0.08333,0.5,1,4.33 , 22.9500]
mca3 = [5.68, 4.64, 4.52, 4.56, 4.24, 3.72]
tm4 = [0,0.08333,0.5,1,4.0833 , 23.0833]
mca4 =[18.90,15.4,14.3,15.10,13.50, 10.90]
tm5 = [0,0.08333,0.5,1,4.5, 23.2167]
mca5 =[18.90, 15.5, 16.30, 16, 14.70, 13.00]
tm6 = [0,0.08333,0.5,1,4.6667, 23.3333 ]
mca6 = [18.90, 15.8, 11.70,15.5,12, 9.5 ]
df1=pd.DataFrame({'time':tm1,'x1':mca1})
df2=pd.DataFrame({'time':tm2,'x2':mca2})
df3=pd.DataFrame({'time':tm3,'x3':mca3})
df4=pd.DataFrame({'time':tm4,'x4':mca4})
df5=pd.DataFrame({'time':tm5,'x5':mca5})
df6=pd.DataFrame({'time':tm6,'x6':mca6})
df1.set_index('time',inplace=True)
df2.set_index('time',inplace=True)
df3.set_index('time',inplace=True)
df4.set_index('time',inplace=True)
df5.set_index('time',inplace=True)
df6.set_index('time',inplace=True)
#simulation time points
dfx = pd.DataFrame({'time':np.linspace(0,25,101)})
dfx.set_index('time',inplace=True)
#merge dataframes
dfxx=dfx.join(df1,how='outer')
dfxxx=dfxx.join(df2,how='outer')
dfxxxx=dfxxx.join(df3,how='outer')
dfxxxxx=dfxxxx.join(df4,how='outer')
dfxxxxxx=dfxxxxx.join(df5,how='outer')
df=dfxxxxxx.join(df6,how='outer')
# get True (1) or False (0) for measurement
df['meas1']=(df['x1'].values==df['x1'].values).astype(int)
df['meas2']=(df['x2'].values==df['x2'].values).astype(int)
df['meas3']=(df['x3'].values==df['x3'].values).astype(int)
df['meas4']=(df['x4'].values==df['x4'].values).astype(int)
df['meas5']=(df['x5'].values==df['x5'].values).astype(int)
df['meas6']=(df['x6'].values==df['x6'].values).astype(int)
#replace NaN with zeros
df0=df.fillna(value=0)
m = GEKKO()
m.time = df0.index.values
meas1 = m.Param(df0['meas1'].values)
meas2 = m.Param(df0['meas2'].values)
meas3 = m.Param(df0['meas3'].values)
meas4 = m.Param(df0['meas4'].values)
meas5 = m.Param(df0['meas5'].values)
meas6 = m.Param(df0['meas6'].values)
#adjustable Parameters
kf=m.FV(1.3,lb=0.01,ub=10)
ks=m.FV(1.3,lb=0.01,ub=10)
x0 = 1.3
cnf01=m.FV(x0,lb=0.01,ub=10)
cns01=m.FV(x0,lb=0.01,ub=10)
#constrains
cnf02=m.FV(value=x0*0.5,lb=x0*0.5, ub=x0*0.5)
cns02=m.FV(value=x0*0.5,lb=x0*0.5, ub=x0*0.5)
cnf03=m.FV(value=x0*0.25,lb=x0*0.25, ub=x0*0.25)
cns03=m.FV(value=x0*0.25,lb=x0*0.25, ub=x0*0.25)
cnf04=m.FV(value=x0,lb=x0, ub=x0)
cns04=m.FV(value=x0,lb=x0, ub=x0)
cnf05=m.FV(value=x0*0.5,lb=x0*0.5, ub=x0*0.5)
cns05=m.FV(value=x0*0.5,lb=x0*0.5, ub=x0*0.5)
cnf06=m.FV(value=x0*0.25,lb=x0*0.25, ub=x0*0.25)
cns06=m.FV(value=x0*0.25,lb=x0*0.25, ub=x0*0.25)
#Variables
c1 = m.SV(value=mca1[0])
c2 = m.SV(value=mca2[0])
c3 = m.SV(value=mca3[0])
c4 = m.SV(value=mca4[0])
c5 = m.SV(value=mca5[0])
c6 = m.SV(value=mca6[0])
cm1 = m.Param(df0['x1'].values)
cm2 = m.Param(df0['x2'].values)
cm3 = m.Param(df0['x3'].values)
cm4 = m.Param(df0['x4'].values)
cm5 = m.Param(df0['x5'].values)
cm6 = m.Param(df0['x6'].values)
m.Minimize((meas1*(c1-cm1)**2)+(meas2*(c2-cm2)**2)\
+(meas3*(c3-cm3)**2)+(meas4*(c4-cm4)**2)\
+(meas5*(c5-cm5)**2)+(meas6*(c6-cm6)**2))
cnf1=m.SV(value=x0,fixed_initial=False)
cns1=m.SV(value=x0,fixed_initial=False)
cnf2=m.SV(value=x0,fixed_initial=False)
cns2=m.SV(value=x0,fixed_initial=False)
cnf3=m.SV(value=x0,fixed_initial=False)
cns3=m.SV(value=x0,fixed_initial=False)
cnf4=m.SV(value=x0,fixed_initial=False)
cns4=m.SV(value=x0,fixed_initial=False)
cnf5=m.SV(value=x0,fixed_initial=False)
cns5=m.SV(value=x0,fixed_initial=False)
cnf6=m.SV(value=x0,fixed_initial=False)
cns6=m.SV(value=x0,fixed_initial=False)
#Equations
t = m.Param(value=m.time)
m.Connection(cnf1,cnf01,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf2,cnf02,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf3,cnf03,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf4,cnf04,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf5,cnf05,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf6,cnf06,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cns1,cns01,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cns2,cns02,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cns3,cns03,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cns4,cns04,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cns5,cns05,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cns6,cns06,pos1=1,pos2=1,node1=1,node2=1)
m.Equation(cnf1.dt()==-kf*c1*cnf1)
m.Equation(cns1.dt()==-ks*c1*cns1)
m.Equation(c1.dt()==cnf1.dt()+cns1.dt())
m.Equation(cnf2.dt()==-kf*c2*cnf2)
m.Equation(cns2.dt()==-ks*c2*cns2)
m.Equation(c2.dt()==cnf2.dt()+cns2.dt())
m.Equation(cnf3.dt()==-kf*c3*cnf3)
m.Equation(cns3.dt()==-ks*c3*cns3)
m.Equation(c3.dt()==cnf3.dt()+cns3.dt())
m.Equation(cnf4.dt()==-kf*c4*cnf4)
m.Equation(cns4.dt()==-ks*c4*cns4)
m.Equation(c4.dt()==cnf4.dt()+cns4.dt())
m.Equation(cnf5.dt()==-kf*c5*cnf5)
m.Equation(cns5.dt()==-ks*c5*cns5)
m.Equation(c5.dt()==cnf5.dt()+cns5.dt())
m.Equation(cnf6.dt()==-kf*c6*cnf6)
m.Equation(cns6.dt()==-ks*c6*cns6)
m.Equation(c6.dt()==cnf6.dt()+cns6.dt())
#Options
m.options.SOLVER = 1 # APOPT solver
m.options.IMODE = 5 # Dynamic Simultaneous - estimation = MHE
m.options.EV_TYPE = 2 # Squared error
m.options.NODES = 3 # Collocation nodes (2,5)
if True:
kf.STATUS=1
ks.STATUS=1
cnf01.STATUS=1
cns01.STATUS=1
cnf02.STATUS=1
cns02.STATUS=1
cnf03.STATUS=1
cns03.STATUS=1
cnf04.STATUS=1
cns04.STATUS=1
cnf05.STATUS=1
cns05.STATUS=1
cnf06.STATUS=1
cns06.STATUS=1
m.options.TIME_SHIFT = 0
try:
m.solve(disp=True)
except:
print("don't stop when not finding cnf01...cnf06")
#m.open_folder()
print('Final SSE Objective: ' + str(m.options.objfcnval))
print('Solution')
print('cnf01 = ' + str(cnf1.value[0]))
print('cns01 = ' + str(cns1.value[0]))
print('kf = ' + str(kf.value[0]))
print('ks = ' + str(ks.value[0]))
print('cns02 = '+ str(cns2.value[0]))
print('cnf02 = '+ str(cnf2.value[0]))
print('cns03 = '+ str(cns3.value[0]))
print('cnf03 = '+ str(cnf3.value[0]))
print('cns04 = '+ str(cns4.value[0]))
print('cnf04 = '+ str(cnf4.value[0]))
print('cns05 = '+ str(cns5.value[0]))
print('cnf05 = '+ str(cnf5.value[0]))
print('cns06 = '+ str(cns6.value[0]))
print('cnf06 = '+ str(cnf6.value[0]))
plt.figure(1,figsize=(8,5))
plt.plot(m.time,c1.value,'r',label='Predicted c1')
plt.plot(m.time,c2.value,'y',label='Predicted c2')
plt.plot(m.time,c3.value,'c',label='Predicted c3')
plt.plot(m.time,c4.value,'g',label='Predicted c4')
plt.plot(m.time,c5.value,'b',label='Predicted c5')
plt.plot(m.time,c6.value,'m',label='Predicted c6')
plt.plot(tm1,mca1,'rx',label='Meas c1')
plt.plot(tm2,mca2,'yx',label='Meas c2')
plt.plot(tm3,mca3,'cx',label='Meas c3')
plt.plot(tm4,mca4,'go',label='Meas c4')
plt.plot(tm5,mca5,'bo',label='Meas c5')
plt.plot(tm6,mca6,'mo',label='Meas c6')
plt.xlabel('time (h)')
plt.ylabel('Concentration (mgCl2/L)')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2)
plt.show()

Python - Fit gaussian to noisy data with lmfit

I'm trying to fit a gaussian to this data
x = [4170.177259096838, 4170.377258006199, 4170.577256915561, 4170.777255824922, 4170.977254734283, 4171.177253643645, 4171.377252553006, 4171.577251462368, 4171.777250371729, 4171.977249281091, 4172.177248190453, 4172.377247099814, 4172.577246009175, 4172.777244918537, 4172.977243827898, 4173.17724273726, 4173.377241646621, 4173.577240555983, 4173.777239465344, 4173.977238374706, 4174.177237284067, 4174.377236193429, 4174.57723510279, 4174.777234012152, 4174.977232921513, 4175.177231830875, 4175.377230740236, 4175.577229649598, 4175.777228558959, 4175.977227468321, 4176.177226377682, 4176.377225287044, 4176.577224196405, 4176.777223105767, 4176.977222015128, 4177.17722092449, 4177.377219833851, 4177.577218743213, 4177.777217652574, 4177.977216561936, 4178.177215471297, 4178.377214380659, 4178.57721329002, 4178.777212199382, 4178.977211108743, 4179.177210018105, 4179.377208927466, 4179.577207836828, 4179.777206746189, 4179.977205655551, 4180.177204564912, 4180.377203474274, 4180.577202383635, 4180.777201292997, 4180.977200202357, 4181.17719911172, 4181.377198021081, 4181.577196930443, 4181.777195839804, 4181.977194749166, 4182.177193658527, 4182.377192567888, 4182.5771914772495, 4182.777190386612, 4182.9771892959725, 4183.177188205335, 4183.377187114696, 4183.577186024058, 4183.777184933419, 4183.9771838427805, 4184.177182752143, 4184.3771816615035, 4184.5771805708655, 4184.777179480228, 4184.977178389589, 4185.1771772989505, 4185.3771762083115, 4185.5771751176735, 4185.777174027035, 4185.977172936397, 4186.1771718457585, 4186.3771707551205, 4186.5771696644815, 4186.777168573843, 4186.977167483204, 4187.177166392566, 4187.377165301927, 4187.577164211289, 4187.77716312065, 4187.977162030013, 4188.177160939374, 4188.377159848735, 4188.577158758096, 4188.777157667458, 4188.977156576819, 4189.177155486181, 4189.377154395542, 4189.577153304904, 4189.777152214265, 4189.977151123627, 4190.177150032989, 4190.37714894235, 4190.577147851711, 4190.777146761073, 4190.977145670434, 4191.177144579796, 4191.377143489157, 4191.577142398519, 4191.77714130788, 4191.977140217242, 4192.177139126603, 4192.377138035965, 4192.577136945326, 4192.777135854688, 4192.977134764049, 4193.177133673411, 4193.377132582772, 4193.577131492134, 4193.777130401495, 4193.977129310857, 4194.177128220218, 4194.377127129579, 4194.577126038941, 4194.777124948303, 4194.977123857664, 4195.177122767026, 4195.377121676387, 4195.577120585749, 4195.77711949511, 4195.977118404472, 4196.177117313833, 4196.377116223195, 4196.577115132556, 4196.777114041918, 4196.977112951279, 4197.177111860641, 4197.377110770002, 4197.577109679364, 4197.777108588725, 4197.977107498087, 4198.177106407448, 4198.37710531681, 4198.577104226171, 4198.777103135533, 4198.977102044893, 4199.177100954256, 4199.377099863617, 4199.577098772979, 4199.77709768234, 4199.977096591702, 4200.177095501063, 4200.377094410424, 4200.5770933197855, 4200.777092229148, 4200.9770911385085, 4201.177090047871, 4201.377088957232, 4201.577087866594, 4201.7770867759555, 4201.9770856853165, 4202.177084594679, 4202.377083504041, 4202.5770824134015, 4202.777081322764, 4202.977080232125, 4203.1770791414865, 4203.377078050848, 4203.5770769602095, 4203.777075869571, 4203.9770747789335, 4204.1770736882945, 4204.3770725976565, 4204.5770715070175, 4204.777070416379, 4204.97706932574, 4205.177068235102, 4205.377067144463, 4205.577066053825, 4205.777064963186, 4205.977063872549, 4206.17706278191, 4206.377061691271, 4206.577060600632, 4206.777059509994, 4206.977058419355, 4207.177057328717, 4207.377056238078, 4207.57705514744, 4207.777054056801, 4207.977052966163, 4208.177051875525, 4208.377050784886, 4208.577049694247, 4208.777048603609, 4208.977047512971, 4209.177046422332, 4209.377045331693, 4209.577044241055, 4209.777043150416, 4209.977042059778, 4210.177040969139, 4210.377039878501, 4210.577038787862, 4210.777037697224, 4210.977036606585, 4211.177035515947, 4211.377034425308, 4211.57703333467, 4211.777032244031, 4211.977031153393, 4212.177030062754, 4212.377028972116, 4212.577027881477, 4212.777026790839, 4212.9770257002, 4213.177024609562, 4213.377023518923, 4213.577022428285, 4213.777021337646, 4213.977020247008, 4214.177019156369, 4214.377018065731, 4214.577016975092, 4214.777015884454, 4214.977014793814, 4215.177013703177, 4215.377012612538, 4215.5770115219, 4215.777010431261, 4215.977009340623, 4216.177008249984, 4216.377007159345, 4216.577006068707, 4216.777004978069, 4216.977003887429, 4217.177002796792, 4217.377001706153, 4217.577000615515, 4217.776999524876, 4217.976998434238, 4218.176997343599, 4218.37699625296, 4218.5769951623215, 4218.776994071684, 4218.9769929810445, 4219.176991890407, 4219.376990799769, 4219.5769897091295, 4219.7769886184915, 4219.9769875278525, 4220.176986437215, 4220.376985346577, 4220.5769842559375, 4220.7769831653, 4220.9769820746615, 4221.1769809840225, 4221.376979893384, 4221.5769788027455, 4221.776977712107, 4221.9769766214695, 4222.17697553083, 4222.3769744401925, 4222.576973349554, 4222.776972258915, 4222.976971168276, 4223.176970077638, 4223.376968986999, 4223.576967896361, 4223.776966805722, 4223.976965715085, 4224.176964624445, 4224.376963533807, 4224.576962443168, 4224.77696135253, 4224.976960261891, 4225.176959171253, 4225.376958080614, 4225.576956989976, 4225.776955899337, 4225.976954808699, 4226.17695371806, 4226.376952627422, 4226.576951536783, 4226.776950446145, 4226.976949355506, 4227.176948264868, 4227.376947174229, 4227.576946083591, 4227.776944992952, 4227.976943902314, 4228.176942811675, 4228.376941721037, 4228.576940630398, 4228.776939539759, 4228.976938449121, 4229.176937358483, 4229.376936267844, 4229.576935177205, 4229.776934086567, 4229.976932995929]
y = [1.0063203573226929, 0.9789621233940125, 0.9998905658721924, 0.9947001934051514, 1.023498773574829, 1.0001505613327026, 0.9659610986709596, 1.0141736268997192, 0.9910064339637756, 0.961456060409546, 0.9808377623558044, 0.9717124700546264, 1.0020164251327517, 0.9276596307754515, 1.0044682025909424, 0.9898168444633484, 1.0139398574829102, 1.016809344291687, 0.9985541105270386, 1.0404949188232422, 1.0104306936264038, 1.0101377964019775, 1.0228283405303955, 1.014385461807251, 0.9949180483818054, 0.9398794174194336, 1.0047662258148191, 1.0185784101486206, 0.9942153096199036, 1.0496678352355957, 0.929694890975952, 1.0259612798690796, 1.0174839496612549, 0.9557819366455078, 1.009858012199402, 1.0258405208587646, 1.0318727493286133, 0.9781686067581176, 0.9566296339035034, 0.9626089930534364, 1.040783166885376, 0.9469046592712402, 0.9732370972633362, 1.0082777738571167, 1.0438332557678225, 1.067220687866211, 1.0809389352798462, 1.0122139453887942, 0.995375156402588, 1.025692343711853, 1.0900095701217651, 1.0033329725265503, 0.9947514533996582, 0.9366152882575988, 1.0340673923492432, 1.0574461221694946, 0.9984419345855712, 0.9406535029411316, 1.0367794036865234, 1.0252420902252195, 0.9390246868133544, 1.057265043258667, 1.0652446746826172, 1.0001699924468994, 1.0561981201171875, 0.9452269077301024, 1.0119216442108154, 1.000349760055542, 0.9879921674728394, 0.9834288954734802, 0.976799249649048, 0.9408118724822998, 1.0574927330017092, 1.0466219186782837, 0.97526878118515, 0.9811903238296508, 0.9985196590423584, 0.9862677454948424, 0.964194357395172, 1.0116554498672483, 0.9122620820999146, 0.9972245693206788, 0.9447768926620485, 1.0320085287094116, 1.0034307241439822, 0.965615689754486, 1.0228805541992188, 0.9555847048759459, 1.00389301776886, 0.9856386780738832, 0.9894683361053468, 1.0711736679077148, 0.990192711353302, 1.016653060913086, 1.0263935327529907, 0.9454292058944702, 0.9236765503883362, 0.9511216878890992, 0.9773555994033812, 0.9222095608711244, 0.9599731564521791, 1.0067923069000244, 1.0022263526916504, 0.9766445159912108, 1.0026237964630127, 1.010635256767273, 0.9901092052459716, 0.9869268536567688, 1.0354781150817869, 0.9797658920288086, 0.9543874263763428, 0.9747632145881652, 0.9942164421081544, 1.008299469947815, 0.9546594023704528, 1.0318409204483032, 1.0383642911911009, 1.0332415103912354, 1.0234425067901611, 1.0186198949813845, 1.0179851055145264, 1.0760197639465332, 0.9456835985183716, 1.0079874992370603, 0.9838529229164124, 0.8951097726821899, 0.9530791640281676, 0.9732348322868348, 0.9659185409545898, 1.0089071989059448, 0.963958203792572, 1.0035384893417358, 0.9776629805564879, 0.964256465435028, 0.9468261599540709, 1.0145124197006226, 1.0375784635543823, 0.992344319820404, 0.9584225416183472, 1.0427420139312744, 0.9997742176055908, 0.9584409594535828, 1.0051720142364502, 0.9606672525405884, 0.9797580242156982, 0.9900978207588196, 0.943138301372528, 0.9368865489959716, 0.9272330403327942, 0.9655094146728516, 0.9074565172195436, 0.97406405210495, 0.8742623329162598, 0.9219859838485718, 0.9126378297805786, 0.8354664444923401, 0.9138413667678832, 0.9268960952758788, 0.8841327428817749, 0.9733222126960754, 0.8825243711471558, 0.9243521094322203, 0.9403685927391052, 0.8782523870468141, 0.9003781080245972, 0.8850597143173218, 0.9231640696525574, 0.931676983833313, 0.8601804971694946, 0.8312444686889648, 0.9361259937286376, 0.9289224147796632, 0.8919285535812378, 0.8838070034980774, 0.9187015891075134, 0.9484543204307556, 0.8572731018066406, 0.8458079099655151, 0.92625629901886, 0.9748064875602722, 0.9674397706985474, 0.9326313138008118, 0.9933922290802002, 1.0025516748428345, 0.9956294894218444, 0.8995802998542786, 0.9598655700683594, 1.0185420513153076, 0.9935647249221802, 0.9689980745315552, 0.9919951558113098, 1.0028616189956665, 1.0252325534820557, 1.0221387147903442, 1.009528875350952, 1.0272767543792725, 0.9865442514419556, 0.9821861386299132, 0.95982563495636, 0.9557262063026428, 0.9864148497581482, 1.0166704654693604, 1.0599093437194824, 1.0000406503677368, 0.9622656106948853, 1.0044697523117063, 1.0404677391052246, 1.0023702383041382, 0.9803014993667604, 1.0197279453277588, 0.9902933835983276, 0.998839259147644, 0.966608464717865, 1.0340296030044556, 0.9632315635681152, 0.9758646488189696, 0.9757773876190186, 0.9818265438079834, 1.0110433101654053, 1.0131133794784546, 1.0256367921829224, 1.0690158605575562, 0.9764784574508668, 0.9947471022605896, 0.9979920387268066, 0.9850373864173888, 0.9165602922439576, 0.9634824395179749, 1.052489995956421, 0.9370544552803041, 1.0348092317581177, 1.0473220348358154, 0.9566289782524108, 0.9579214453697203, 0.972671627998352, 0.9536439180374146, 0.9755330085754396, 0.9753606915473938, 0.9924075603485109, 0.9893715381622314, 0.9780346751213074, 1.0207450389862058, 0.9914312362670898, 0.9940584301948548, 1.0417673587799072, 0.977041721343994, 1.0113568305969238, 1.030456304550171, 1.0540854930877686, 0.9963837265968324, 1.002269268035889, 0.9528346061706544, 0.9132148027420044, 1.0386162996292114, 0.9384365677833556, 1.0175614356994631, 1.0362330675125122, 0.9502999186515808, 1.0015273094177246, 0.987025022506714, 0.9869014024734496, 0.9577396512031556, 0.9633736610412598, 1.0747206211090088, 1.1858476400375366, 0.9917531609535216, 1.0963184833526611, 0.9528627991676332, 0.9999563694000244, 1.0115929841995241, 1.0094747543334959, 0.9977090358734132, 0.9800350666046144, 1.0336441993713381, 1.0021690130233765, 0.9629588127136229, 0.9191407561302184, 0.9930744767189026, 1.0318671464920044, 0.975939691066742, 0.9548277258872986, 1.0113637447357178, 0.9920935630798341, 0.9777255654335022, 0.9780721664428712, 0.9507009387016296, 0.9387223720550536, 1.0220414400100708, 1.019809007644653, 0.9822806715965272, 1.0380866527557373, 1.0477066040039062, 1.0222935676574707, 1.0258997678756714, 1.027082443237305, 1.0487046241760254, 0.9292799830436708, 0.999277114868164, 1.044923186302185, 1.0261610746383667]
e = [3.865531107294373e-05, 3.866014958475717e-05, 3.866496626869776e-05, 3.8669764762744314e-05, 3.867453415296041e-05, 3.8679270801367245e-05, 3.8683978345943615e-05, 3.868864223477431e-05, 3.8693269743816934e-05, 3.8697849959135056e-05, 3.870237924274989e-05, 3.8706857594661415e-05, 3.871127773891203e-05, 3.871564331348054e-05, 3.871994340443053e-05, 3.872417437378317e-05, 3.8728336221538484e-05, 3.8732425309717655e-05, 3.8736438000341884e-05, 3.874037065543234e-05, 3.8744219637010247e-05, 3.874798130709678e-05, 3.8751652027713135e-05, 3.875523543683812e-05, 3.8758716982556514e-05, 3.876210394082591e-05, 3.8765389035688706e-05, 3.8768568629166105e-05, 3.87716390832793e-05, 3.877460039802827e-05, 3.877745257341303e-05, 3.878018469549716e-05, 3.8782800402259454e-05, 3.878529605572112e-05, 3.8787664379924536e-05, 3.878991265082732e-05, 3.8792029954493046e-05, 3.8794016290921725e-05, 3.879586802213453e-05, 3.8797588786110275e-05, 3.879916766891256e-05, 3.8800608308520175e-05, 3.88019070669543e-05, 3.880306030623615e-05, 3.880407166434452e-05, 3.8804930227342986e-05, 3.8805643271189176e-05, 3.880619988194667e-05, 3.880660733557306e-05, 3.8806854718131945e-05, 3.8806945667602115e-05, 3.88068801839836e-05, 3.880665099131875e-05, 3.8806265365565196e-05, 3.880571239278652e-05, 3.880499571096152e-05, 3.880410804413259e-05, 3.880305666825734e-05, 3.8801834307378165e-05, 3.8800444599473856e-05, 3.87988802685868e-05, 3.879714495269582e-05, 3.8795235013822094e-05, 3.879315045196563e-05, 3.879089126712642e-05, 3.8788453821325675e-05, 3.8785838114563376e-05, 3.878304414683953e-05, 3.8780071918154135e-05, 3.877691779052839e-05, 3.877357812598348e-05, 3.877006747643463e-05, 3.8766367651987814e-05, 3.876248956657946e-05, 3.875842594425194e-05, 3.8754180422984064e-05, 3.8749749364797026e-05, 3.874513640766963e-05, 3.8740334275644266e-05, 3.873535024467856e-05, 3.8730184314772493e-05, 3.872482920996845e-05, 3.871929220622405e-05, 3.871356602758169e-05, 3.8707657949998975e-05, 3.8701564335497096e-05, 3.8695285184076056e-05, 3.868882413371466e-05, 3.86821739084553e-05, 3.867534178425558e-05, 3.86683241231367e-05, 3.8661124563077465e-05, 3.8653739466099075e-05, 3.8646172470180325e-05, 3.863842357532121e-05, 3.863049278152175e-05, 3.862238008878194e-05, 3.861408913508057e-05, 3.860561628243886e-05, 3.85969651688356e-05, 3.8588135794270784e-05, 3.8579128158744425e-05, 3.856993862427771e-05, 3.856058174278587e-05, 3.855104296235368e-05, 3.854133319691755e-05, 3.853144880849868e-05, 3.852139343507588e-05, 3.851116707664913e-05, 3.8500766095239676e-05, 3.8490205042762675e-05, 3.847947300528176e-05, 3.846857362077572e-05, 3.8457506889244535e-05, 3.844628372462465e-05, 3.843489321297966e-05, 3.8423342630267136e-05, 3.841163197648712e-05, 3.8399768527597196e-05, 3.8387741369660944e-05, 3.8375561416614794e-05, 3.836322866845876e-05, 3.835074676317163e-05, 3.8338112062774605e-05, 3.832533184322529e-05, 3.831240246654488e-05, 3.829932757071219e-05, 3.828611079370603e-05, 3.827275213552639e-05, 3.825925523415208e-05, 3.8245623727561906e-05, 3.8231850339798264e-05, 3.821794962277636e-05, 3.820391066255979e-05, 3.818974801106378e-05, 3.817545439233072e-05, 3.816103708231821e-05, 3.814649244304746e-05, 3.8131831388454884e-05, 3.811704664258287e-05, 3.810214911936782e-05, 3.8087135180830956e-05, 3.807200482697226e-05, 3.805676897172816e-05, 3.804142033914104e-05, 3.802596984314732e-05, 3.801041020778939e-05, 3.799475598498248e-05, 3.7978999898768955e-05, 3.7963145587127656e-05, 3.794720396399498e-05, 3.793116411543451e-05, 3.791503331740387e-05, 3.789882612181827e-05, 3.78825279767625e-05, 3.786614615819417e-05, 3.784968066611327e-05, 3.783314969041385e-05, 3.781653504120186e-05, 3.7799851270392544e-05, 3.7783102015964694e-05, 3.776627636398189e-05, 3.774939614231698e-05, 3.773245043703355e-05, 3.77154428861104e-05, 3.769838440348394e-05, 3.7681271351175376e-05, 3.7664103729184724e-05, 3.764688881346956e-05, 3.762963024200872e-05, 3.7612328014802194e-05, 3.759498213184997e-05, 3.7577603507088504e-05, 3.756018850253895e-05, 3.754274075618014e-05, 3.752526026801206e-05, 3.7507754313992336e-05, 3.749022653209977e-05, 3.747267328435555e-05, 3.7455109122674905e-05, 3.7437519495142624e-05, 3.741991895367392e-05, 3.740230749826878e-05, 3.7384688766906045e-05, 3.736707003554329e-05, 3.7349444028222933e-05, 3.733181438292377e-05, 3.731419565156102e-05, 3.729656964424066e-05, 3.727895818883553e-05, 3.726136128534563e-05, 3.724377893377096e-05, 3.72262074961327e-05, 3.7208654248388484e-05, 3.719112282851711e-05, 3.717361323651858e-05, 3.71561327483505e-05, 3.713868500199169e-05, 3.712127363542095e-05, 3.710389137268066e-05, 3.708654185174965e-05, 3.7069235986564315e-05, 3.7051977415103465e-05, 3.70347588614095e-05, 3.7017580325482406e-05, 3.7000467273173854e-05, 3.698339060065337e-05, 3.6966375773772604e-05, 3.694941915455274e-05, 3.6932531656930216e-05, 3.69156914530322e-05, 3.68989203707315e-05, 3.688222204800695e-05, 3.686558557092212e-05, 3.684902549139224e-05, 3.68325381714385e-05, 3.681613088701852e-05, 3.679980000015348e-05, 3.67835491488222e-05, 3.6767385608982295e-05, 3.675130210467614e-05, 3.6735313187818974e-05, 3.6719411582453176e-05, 3.670359728857875e-05, 3.668788122013211e-05, 3.6672267015092075e-05, 3.6656743759522215e-05, 3.6641329643316574e-05, 3.6626006476581103e-05, 3.661079972516745e-05, 3.65956875612028e-05, 3.658069908851757e-05, 3.656581247923896e-05, 3.655103500932455e-05, 3.653637395473197e-05, 3.652183659141883e-05, 3.650740836746991e-05, 3.649310383480042e-05, 3.647892663138919e-05, 3.6464858567342155e-05, 3.645092147053219e-05, 3.6437118978938095e-05, 3.642343290266581e-05, 3.64098850695882e-05, 3.6396453651832423e-05, 3.638317502918653e-05, 3.637001282186248e-05, 3.635699613369071e-05, 3.6344117688713595e-05, 3.633138112490997e-05, 3.631877552834339e-05, 3.6306315450929105e-05, 3.62940008926671e-05, 3.628181730164215e-05, 3.626977922976948e-05, 3.6257904866943136e-05, 3.6246172385290265e-05, 3.623457087087445e-05, 3.622312215156853e-05, 3.6211833503330126e-05, 3.620068309828639e-05, 3.6189692764310166e-05, 3.617885158746503e-05, 3.616817775764503e-05, 3.615764217101969e-05, 3.6147263017483056e-05, 3.613704757299274e-05, 3.6126984923612326e-05, 3.611708234529942e-05, 3.6107321648159996e-05, 3.609772466006689e-05, 3.60882913810201e-05, 3.6079025448998436e-05, 3.606990867410786e-05, 3.606095197028481e-05, 3.605215169955045e-05, 3.6043515137862414e-05, 3.603503864724189e-05, 3.6026725865667686e-05, 3.6018584069097415e-05, 3.601058415370062e-05, 3.600275158532895e-05, 3.599507545004599e-05, 3.598758848966099e-05, 3.598022158257663e-05, 3.597304748836905e-05, 3.5966018913313746e-05, 3.595916132326238e-05, 3.59524528903421e-05, 3.594591180444695e-05, 3.593953078961931e-05, 3.5933309845859185e-05, 3.592724533518776e-05, 3.5921337257605046e-05, 3.591560744098388e-05, 3.591001950553619e-05, 3.590458072721958e-05, 3.5899327485822134e-05, 3.589421248761937e-05, 3.588925756048411e-05]
I have tried the examples given in
Python gaussian fit on simulated gaussian noisy data, and Fitting (a gaussian) with Scipy vs. ROOT et al without luck.
I'm looking to do this with lmfit because it has several advantages. This attempt was done following lmfit documentation, here is the code and plot
from numpy import sqrt, pi, exp
from lmfit import Model
import matplotlib.pyplot as plt
def gaussian(x, amp, cen, wid):
"1-d gaussian: gaussian(x, amp, cen, wid)"
return (amp/(sqrt(2*pi)*wid)) * exp(-(x-cen)**2 /(2*wid**2))
gmodel = Model(gaussian)
result = gmodel.fit(y, x=x, amp=-0.5, cen=4200, wid=2)
plt.plot(x, y,'ro', ms=6)
plt.plot(x, result.init_fit, 'g--', lw=2)
plt.plot(x, result.best_fit, 'b-', lw=2)
So in green is the fit with the initial parameters, and in blue is what should be the best fit, and as you can see I get a gaussian shifted from my points and a straight line.
Also, the third row of my data are the errors in the y axis. How can I take the errors into account when fitting the data with lmfit?
The easiest way to do this is probably to make use of the built-in models and combine the GaussianModel and ConstantModel. You can use the errors in the fitting using the keyword 'weights' as described here.
You'll probably want to do something like this:
import numpy as np
from lmfit import Model
from lmfit.models import GaussianModel, ConstantModel
import matplotlib.pyplot as plt
xval = np.array(x)
yval = np.array(y)
err = np.array(e)
peak = GaussianModel()
offset = ConstantModel()
model = peak + offset
pars = offset.make_params(c=np.median(y))
pars += peak.guess(yval, x=xval, amplitude=-0.5)
result = model.fit(yval, pars, x=xval, weights=1/err)
print(result.fit_report())
plt.plot(xval, yval, 'ro', ms=6)
plt.plot(xval, result.best_fit, 'b--')

Duplicate Labels & No Line Appearing in Plot in Python

I'm plotting two lines: a set of experimental data points and a mathematical model. I'm getting the experimental data to plot as expected, but the mathematical model will not plot a line (only the symbol). Additionally, I'm getting duplicate legends and despite trying some of the suggestion I've seen on the site I'm not getting to work (probably I'm not executing it correctly).
import numpy as np
from sympy import *
from sympy import Matrix
import matplotlib.pyplot as plt
Stretch = [0.998122066, 1.0157277, 1.034507042, 1.052112676, 1.06971831, 1.088497653, 1.106103286, 1.12370892, 1.143661972, 1.160093897, 1.178873239, 1.196478873, 1.214084507, 1.23286385, 1.249295775, 1.266901408, 1.28685446, 1.303286385, 1.322065728, 1.339671362, 1.357276995, 1.374882629, 1.393661972, 1.411267606, 1.430046948, 1.447652582, 1.464084507, 1.48286385, 1.500469484, 1.518075117, 1.535680751, 1.554460094, 1.572065728, 1.59084507, 1.608450704, 1.626056338, 1.643661972, 1.661267606, 1.680046948, 1.697652582, 1.715258216, 1.734037559, 1.751643192, 1.770422535, 1.78685446, 1.805633803, 1.824413146, 1.844366197, 1.860798122, 1.878403756, 1.894835681, 1.912441315, 1.930046948, 1.948826291, 1.967605634, 1.985211268, 2.00399061, 2.021596244, 2.038028169, 2.057981221, 2.075586854, 2.092018779, 2.110798122, 2.128403756, 2.147183099, 2.165962441, 2.183568075, 2.201173709, 2.218779343, 2.237558685, 2.255164319, 2.272769953, 2.291549296, 2.307981221, 2.326760563, 2.344366197, 2.361971831, 2.380751174, 2.398356808, 2.415962441, 2.434741784, 2.452347418, 2.469953052, 2.488732394, 2.505164319]
Stress = [0.010526316, 0.010549481, 0.01188998, 0.011913146, 0.012594206, 0.012618915, 0.013299975, 0.013323141, 0.014665184, 0.0153447, 0.016027304, 0.016708364, 0.017389424, 0.018729923, 0.018751544, 0.019432604, 0.019458858, 0.019480479, 0.020163084, 0.020844144, 0.020867309, 0.021548369, 0.022230974, 0.022254139, 0.022278849, 0.023617803, 0.024297319, 0.024979923, 0.025660983, 0.026999938, 0.027023104, 0.027705708, 0.029044663, 0.029069372, 0.030408327, 0.031747282, 0.033086237, 0.034425191, 0.035107796, 0.036446751, 0.037785705, 0.039784099, 0.041123054, 0.042463553, 0.044458858, 0.046457252, 0.048455646, 0.051113479, 0.053108784, 0.055763529, 0.059074623, 0.061729367, 0.065042006, 0.069014085, 0.072986163, 0.077614591, 0.081586669, 0.086872992, 0.092815666, 0.099420867, 0.106680875, 0.114597233, 0.123174574, 0.132408265, 0.142301396, 0.152852422, 0.164059797, 0.177240857, 0.191079812, 0.206236101, 0.22073295, 0.238519274, 0.256307141, 0.273434025, 0.293195577, 0.314929269, 0.335347171, 0.357740301, 0.382105572, 0.406470843, 0.434785026, 0.461123981, 0.488778725, 0.516435014, 0.544088213]
c= [6.11739377e+00, 4.78409591e-04]
plt.show()
for o, d in zip (Stress, Stretch):
d1 = d2 = d
d3 = 1/(d1*d2)
d3 = 1/(d1*d2)
C11 = d1**2
C22 = d2**2
C33 = d3**2
p = 4*(c[1]/c[0])*(d3**(c[0]-2))
S11 = -p/C11 + 4*C11*(c[1]/c[0])*(d1**(c[0]-2))
S22 = -p/C22 + 4*C22*(c[1]/c[0])*(d2**(c[0]-2))
T112 = (d1*S11)/(d2*d3)
T222 = (d2*S22)/(d1*d3)
plt.plot(d, T112, 'g--^', label = 'Model')
plt.plot(Stretch, Stress, 'b-o', label = 'Experimental')
plt.subplots_adjust(left=0.15)
plt.grid(True)
plt.ylabel('Stress')
plt.xlabel('Applied Stretch')
plt.title('Stress as a Function of Applied Stretch')
plt.legend()
plt.show()
plt.plot should not be included in a loop over the individual data because it work with 2 lists or 2 arrays.As a first step, i've created 2 lists for your model data, then i was able to plot it :
import matplotlib.pyplot as plt
Stretch = [0.998122066, 1.0157277, 1.034507042, 1.052112676, 1.06971831, 1.088497653, 1.106103286, 1.12370892, 1.143661972, 1.160093897, 1.178873239, 1.196478873, 1.214084507, 1.23286385, 1.249295775, 1.266901408, 1.28685446, 1.303286385, 1.322065728, 1.339671362, 1.357276995, 1.374882629, 1.393661972, 1.411267606, 1.430046948, 1.447652582, 1.464084507, 1.48286385, 1.500469484, 1.518075117, 1.535680751, 1.554460094, 1.572065728, 1.59084507, 1.608450704, 1.626056338, 1.643661972, 1.661267606, 1.680046948, 1.697652582, 1.715258216, 1.734037559, 1.751643192, 1.770422535, 1.78685446, 1.805633803, 1.824413146, 1.844366197, 1.860798122, 1.878403756, 1.894835681, 1.912441315, 1.930046948, 1.948826291, 1.967605634, 1.985211268, 2.00399061, 2.021596244, 2.038028169, 2.057981221, 2.075586854, 2.092018779, 2.110798122, 2.128403756, 2.147183099, 2.165962441, 2.183568075, 2.201173709, 2.218779343, 2.237558685, 2.255164319, 2.272769953, 2.291549296, 2.307981221, 2.326760563, 2.344366197, 2.361971831, 2.380751174, 2.398356808, 2.415962441, 2.434741784, 2.452347418, 2.469953052, 2.488732394, 2.505164319]
Stress = [0.010526316, 0.010549481, 0.01188998, 0.011913146, 0.012594206, 0.012618915, 0.013299975, 0.013323141, 0.014665184, 0.0153447, 0.016027304, 0.016708364, 0.017389424, 0.018729923, 0.018751544, 0.019432604, 0.019458858, 0.019480479, 0.020163084, 0.020844144, 0.020867309, 0.021548369, 0.022230974, 0.022254139, 0.022278849, 0.023617803, 0.024297319, 0.024979923, 0.025660983, 0.026999938, 0.027023104, 0.027705708, 0.029044663, 0.029069372, 0.030408327, 0.031747282, 0.033086237, 0.034425191, 0.035107796, 0.036446751, 0.037785705, 0.039784099, 0.041123054, 0.042463553, 0.044458858, 0.046457252, 0.048455646, 0.051113479, 0.053108784, 0.055763529, 0.059074623, 0.061729367, 0.065042006, 0.069014085, 0.072986163, 0.077614591, 0.081586669, 0.086872992, 0.092815666, 0.099420867, 0.106680875, 0.114597233, 0.123174574, 0.132408265, 0.142301396, 0.152852422, 0.164059797, 0.177240857, 0.191079812, 0.206236101, 0.22073295, 0.238519274, 0.256307141, 0.273434025, 0.293195577, 0.314929269, 0.335347171, 0.357740301, 0.382105572, 0.406470843, 0.434785026, 0.461123981, 0.488778725, 0.516435014, 0.544088213]
c= [6.11739377e+00, 4.78409591e-04]
Stretch_mod=[]
Stress_mod=[]
for o, d in zip (Stress, Stretch):
d1 = d2 = d
d3 = 1/(d1*d2)
d3 = 1/(d1*d2)
C11 = d1**2
C22 = d2**2
C33 = d3**2
p = 4*(c[1]/c[0])*(d3**(c[0]-2))
S11 = -p/C11 + 4*C11*(c[1]/c[0])*(d1**(c[0]-2))
S22 = -p/C22 + 4*C22*(c[1]/c[0])*(d2**(c[0]-2))
T112 = (d1*S11)/(d2*d3)
T222 = (d2*S22)/(d1*d3)
Stretch_mod.append(d)
Stress_mod.append(T112)
plt.plot(Stretch_mod, Stress_mod, 'g--^', label = 'Model')
plt.plot(Stretch, Stress, 'b-o', label = 'Experimental')
plt.subplots_adjust(left=0.15)
plt.grid(True)
plt.ylabel('Stress')
plt.xlabel('Applied Stretch')
plt.title('Stress as a Function of Applied Stretch')
plt.legend()
plt.show()

Incorrect scikit-learn linear model prediction with date offset

I'm trying to predict time-series data, but by offsetting the result by date_offset-timepoints before training and prediction. The reason for doing this is to try and predict date_offset-timepoints into the future with the present data. See http://glowingpython.blogspot.co.za/2015/01/forecasting-beer-consumption-with.html for an example.
So in summary:
data = [1,2,3,4,5] should predict result = [2,3,4,5,6] if date_offset = 1
The results on the plot below show the red line being shifted by date_offset, and not predicting date_offset into the future. No matter how big I make date_offset, it keeps shifting and not predicting the last result I have, i.e. result = 5 (which is already know). In fact, the red line should not shift at all, just loose accuracy the bigger date_offset becomes. What am I doing wrong?
See example code and resulting image below:
from sklearn import linear_model
import matplotlib.pyplot as plt
import numpy as np
date_offset = 1
data = np.array([9330.0, 9470.0, 9550.0, 9620.0, 9600.0, 9585.0, 9600.0, 9600.0, 9430.0, 9460.0, 9450.0, 9650.0, 9620.0, 9650.0, 9500.0, 9400.0, 9165.0, 9100.0, 8755.0, 8850.0, 8990.0, 9150.0, 9195.0, 9175.0, 9250.0, 9200.0, 9350.0, 9280.0, 9370.0, 9470.0, 9445.0, 9440.0, 9280.0, 9325.0, 9170.0, 9270.0, 9200.0, 9450.0, 9510.0, 9371.0, 9499.0, 9499.0, 9400.0, 9500.0, 9550.0, 9670.0, 9700.0, 9760.0, 9767.4599999999991, 9652.0, 9520.0, 9600.0, 9610.0, 9700.0, 9825.0, 9900.0, 9950.0, 9801.0, 9770.0, 9545.0, 9630.0, 9710.0, 9700.0, 9700.0, 9600.0, 9615.0, 9575.0, 9500.0, 9600.0, 9480.0, 9565.0, 9510.0, 9475.0, 9600.0, 9400.0, 9400.0, 9400.0, 9300.0, 9430.0, 9410.0, 9380.0, 9320.0, 9000.0, 9100.0, 9000.0, 9200.0, 9210.0, 9251.0, 9460.0, 9400.0, 9600.0, 9621.0, 9440.0, 9490.0, 9675.0, 9850.0, 9680.0, 10100.0, 9900.0, 10100.0, 9949.0, 10040.0, 10050.0, 10200.0, 10400.0, 10350.0, 10200.0, 10175.0, 10001.0, 10110.0, 10400.0, 10401.0, 10300.0, 10548.0, 10515.0, 10475.0, 10200.0, 10481.0, 10500.0, 10540.0, 10559.0, 10300.0, 10400.0, 10202.0, 10330.0, 10450.0, 10540.0, 10540.0, 10650.0, 10450.0, 10550.0, 10501.0, 10206.0, 10250.0, 10345.0, 10225.0, 10330.0, 10506.0, 11401.0, 11245.0, 11360.0, 11549.0, 11415.0, 11450.0, 11460.0, 11600.0, 11530.0, 11450.0, 11402.0, 11299.0])
data = data[np.newaxis].T
results = np.array([9470.0, 9545.0, 9635.0, 9640.0, 9600.0, 9622.0, 9555.0, 9429.0, 9495.0, 9489.0, 9630.0, 9612.0, 9630.0, 9501.0, 9372.0, 9165.0, 9024.0, 8780.0, 8800.0, 8937.0, 9051.0, 9100.0, 9166.0, 9220.0, 9214.0, 9240.0, 9254.0, 9400.0, 9450.0, 9470.0, 9445.0, 9301.0, 9316.0, 9170.0, 9270.0, 9251.0, 9422.0, 9466.0, 9373.0, 9440.0, 9415.0, 9410.0, 9500.0, 9520.0, 9620.0, 9705.0, 9760.0, 9765.0, 9651.0, 9520.0, 9600.0, 9610.0, 9700.0, 9805.0, 9900.0, 9950.0, 9800.0, 9765.0, 9602.0, 9630.0, 9790.0, 9710.0, 9800.0, 9649.0, 9580.0, 9780.0, 9560.0, 9501.0, 9511.0, 9530.0, 9498.0, 9475.0, 9595.0, 9500.0, 9460.0, 9400.0, 9310.0, 9382.0, 9375.0, 9385.0, 9320.0, 9100.0, 8990.0, 9045.0, 9129.0, 9201.0, 9251.0, 9424.0, 9440.0, 9500.0, 9621.0, 9490.0, 9512.0, 9599.0, 9819.0, 9684.0, 10025.0, 9984.0, 10110.0, 9950.0, 10048.0, 10095.0, 10200.0, 10338.0, 10315.0, 10200.0, 10166.0, 10095.0, 10110.0, 10400.0, 10445.0, 10360.0, 10548.0, 10510.0, 10480.0, 10180.0, 10488.0, 10520.0, 10510.0, 10565.0, 10450.0, 10400.0, 10240.0, 10338.0, 10410.0, 10540.0, 10481.0, 10521.0, 10530.0, 10325.0, 10510.0, 10446.0, 10249.0, 10236.0, 10211.0, 10340.0, 10394.0, 11370.0, 11250.0, 11306.0, 11368.0, 11415.0, 11400.0, 11452.0, 11509.0, 11500.0, 11455.0, 11400.0, 11300.0, 11369.0])
# Date offset to predict next i-days results
data = data[:-date_offset]
results = results[date_offset:]
train_data = data[:-50]
train_results = results[:-50]
test_data = data[-50:]
test_results = results[-50:]
regressor = linear_model.BayesianRidge(normalize=True)
regressor.fit(train_data, train_results)
plt.figure(figsize=(8,6))
plt.plot(regressor.predict(test_data), '--', color='#EB3737', linewidth=2, label='Prediction')
plt.plot(test_results, label='True', color='green', linewidth=2)
plt.legend(loc='best')
plt.show()
First of all, the model is not really bad. For instance, when the real value is 10450, it predict 10350, which is really close. And, obviously, the farther in time the predicted point is, the less accurate its predictions, as the variance is growing and sometimes even bias is also growing. You cannot expect the opposite.
Secondly, it is a linear model, so it cannot be absolutely exact when the predicted variable is not linear by nature.
Thirdly, one have to choose a predicted variable with care. For instance, in this case you might try to predict not the value at time T, but the change in value at time T (i.e. C[T]=V[T]-V[T-1]) or the moving average of the last K values. Here you might (or, on the contrary, might not) find out that you are trying to model the so called "random walk" which is hard to predict exactly by its random nature.
And lastly, you might consider other models, like ARIMA, which are better suited for predicting time series.
Adding back the organize_data step:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import linear_model
def organize_data(to_forecast, window, horizon):
"""
Input:
to_forecast, univariate time series organized as numpy array
window, number of items to use in the forecast window
horizon, horizon of the forecast
Output:
X, a matrix where each row contains a forecast window
y, the target values for each row of X
"""
shape = to_forecast.shape[:-1] + \
(to_forecast.shape[-1] - window + 1, window)
strides = to_forecast.strides + (to_forecast.strides[-1],)
X = np.lib.stride_tricks.as_strided(to_forecast,
shape=shape,
strides=strides)
y = np.array([X[i+horizon][-1] for i in range(len(X)-horizon)])
return X[:-horizon], y
data = np.array([9330.0, 9470.0, 9550.0, 9620.0, 9600.0, 9585.0, 9600.0, 9600.0, 9430.0, 9460.0, 9450.0, 9650.0, 9620.0, 9650.0, 9500.0, 9400.0, 9165.0, 9100.0, 8755.0, 8850.0, 8990.0, 9150.0, 9195.0, 9175.0, 9250.0, 9200.0, 9350.0, 9280.0, 9370.0, 9470.0, 9445.0, 9440.0, 9280.0, 9325.0, 9170.0, 9270.0, 9200.0, 9450.0, 9510.0, 9371.0, 9499.0, 9499.0, 9400.0, 9500.0, 9550.0, 9670.0, 9700.0, 9760.0, 9767.4599999999991, 9652.0, 9520.0, 9600.0, 9610.0, 9700.0, 9825.0, 9900.0, 9950.0, 9801.0, 9770.0, 9545.0, 9630.0, 9710.0, 9700.0, 9700.0, 9600.0, 9615.0, 9575.0, 9500.0, 9600.0, 9480.0, 9565.0, 9510.0, 9475.0, 9600.0, 9400.0, 9400.0, 9400.0, 9300.0, 9430.0, 9410.0, 9380.0, 9320.0, 9000.0, 9100.0, 9000.0, 9200.0, 9210.0, 9251.0, 9460.0, 9400.0, 9600.0, 9621.0, 9440.0, 9490.0, 9675.0, 9850.0, 9680.0, 10100.0, 9900.0, 10100.0, 9949.0, 10040.0, 10050.0, 10200.0, 10400.0, 10350.0, 10200.0, 10175.0, 10001.0, 10110.0, 10400.0, 10401.0, 10300.0, 10548.0, 10515.0, 10475.0, 10200.0, 10481.0, 10500.0, 10540.0, 10559.0, 10300.0, 10400.0, 10202.0, 10330.0, 10450.0, 10540.0, 10540.0, 10650.0, 10450.0, 10550.0, 10501.0, 10206.0, 10250.0, 10345.0, 10225.0, 10330.0, 10506.0, 11401.0, 11245.0, 11360.0, 11549.0, 11415.0, 11450.0, 11460.0, 11600.0, 11530.0, 11450.0, 11402.0, 11299.0])
train_window = 50
k = 5 # number of previous observations to use
h = 2 # forecast horizon
X,y = organize_data(data, k, h)
train_data = X[:train_window]
train_results = y[:train_window]
test_data = X[train_window:]
test_results = y[train_window:]
regressor = linear_model.BayesianRidge(normalize=True)
regressor.fit(train_data, train_results)
plt.figure(figsize=(8,6))
plt.plot(regressor.predict(X), '--', color='#EB3737', linewidth=2, label='Prediction')
plt.plot(y, label='True', color='green', linewidth=2)
plt.legend(loc='best')
plt.show()

Categories

Resources