PyBrain neural network for stock prediction won't learn

PyBrain neural network for stock prediction won't learn - python

I am trying to code a neural network that can forecast some data. Therefore I use PyBrain for python. I figured out that a SupervisedDataset would be a good fit for this task. I took some stock data and put 5 values from it as input and the sixths as the target. Then I build a feed forward network with the buildNetwork function and trained it with the BackpropTrainer.
Anyway, the error won't get less. It is stuck at ~0.6 and it seems to oscillate around there. I tried to tweak the momentum and the learning rate but it didn't help. What am I doing wrong?
from pybrain.datasets import SupervisedDataSet
DS = SupervisedDataSet(5, 1)
DS.addSample((44.055, 44.54, 44.04, 43.975, 43.49), (42.04,))
DS.addSample((44.54, 44.04, 43.975, 43.49, 42.04), (42.6,))
DS.addSample((44.04, 43.975, 43.49, 42.04, 42.6), (42.46,))
DS.addSample((43.975, 43.49, 42.04, 42.6, 42.46), (41.405,))
DS.addSample((43.49, 42.04, 42.6, 42.46, 41.405), (42.385,))
DS.addSample((42.04, 42.6, 42.46, 41.405, 42.385), (42.655,))
DS.addSample((42.6, 42.46, 41.405, 42.385, 42.655), (41.53,))
DS.addSample((42.46, 41.405, 42.385, 42.655, 41.53), (40.09,))
DS.addSample((41.405, 42.385, 42.655, 41.53, 40.09), (39.8,))
DS.addSample((42.385, 42.655, 41.53, 40.09, 39.8), (40.2,))
DS.addSample((42.655, 41.53, 40.09, 39.8, 40.2), (39.915,))
DS.addSample((41.53, 40.09, 39.8, 40.2, 39.915), (40.21,))
DS.addSample((40.09, 39.8, 40.2, 39.915, 40.21), (40.34,))
DS.addSample((39.8, 40.2, 39.915, 40.21, 40.34), (41.195,))
DS.addSample((40.2, 39.915, 40.21, 40.34, 41.195), (41.595,))
DS.addSample((39.915, 40.21, 40.34, 41.195, 41.595), (41.975,))
DS.addSample((40.21, 40.34, 41.195, 41.595, 41.975), (42.045,))
DS.addSample((40.34, 41.195, 41.595, 41.975, 42.045), (40.13,))
DS.addSample((41.195, 41.595, 41.975, 42.045, 40.13), (38.99,))
DS.addSample((41.595, 41.975, 42.045, 40.13, 38.99), (39.81,))
DS.addSample((41.975, 42.045, 40.13, 38.99, 39.81), (40.23,))
DS.addSample((42.045, 40.13, 38.99, 39.81, 40.23), (40.47,))
DS.addSample((40.13, 38.99, 39.81, 40.23, 40.47), (40.45,))
DS.addSample((38.99, 39.81, 40.23, 40.47, 40.45), (40.01,))
DS.addSample((39.81, 40.23, 40.47, 40.45, 40.01), (40.23,))
DS.addSample((40.23, 40.47, 40.45, 40.01, 40.23), (40.2,))
DS.addSample((40.47, 40.45, 40.01, 40.23, 40.2), (41.605,))
DS.addSample((40.45, 40.01, 40.23, 40.2, 41.605), (42.1,))
DS.addSample((40.01, 40.23, 40.2, 41.605, 42.1), (42.135,))
DS.addSample((40.23, 40.2, 41.605, 42.1, 42.135), (41.95,))
DS.addSample((40.2, 41.605, 42.1, 42.135, 41.95), (41.145,))
DS.addSample((41.605, 42.1, 42.135, 41.95, 41.145), (40.635,))
DS.addSample((42.1, 42.135, 41.95, 41.145, 40.635), (41.25,))
DS.addSample((42.135, 41.95, 41.145, 40.635, 41.25), (41.19,))
DS.addSample((41.95, 41.145, 40.635, 41.25, 41.19), (42.065,))
DS.addSample((41.145, 40.635, 41.25, 41.19, 42.065), (42.025,))
DS.addSample((40.635, 41.25, 41.19, 42.065, 42.025), (42.09,))
DS.addSample((41.25, 41.19, 42.065, 42.025, 42.09), (41.79,))
DS.addSample((41.19, 42.065, 42.025, 42.09, 41.79), (43.11,))
from pybrain.tools.shortcuts import buildNetwork
FNN = buildNetwork(DS.indim, 15, DS.outdim, bias=True)
from pybrain.supervised.trainers import BackpropTrainer
TRAINER = BackpropTrainer(FNN, dataset=DS, learningrate = 0.005, \
momentum=0.1, verbose=True)
for i in range(1000):
TRAINER.train()
Edit: Some of the comments doubted that those data would fit for a neural network in general. Therefore I did the same net in MATLAB and it worked just fine. After 11 training epochs the error was less then 0.002.
Furthermore I tried to use the SupervisedDataset from PyBrain but this wouldn't work as well. I am out of ideas now.

I found a solution. Turned out the stock data had to be normalized first. So i wrote this function:
def normalization(data, new_max, new_min):
old_max = 0
old_min = 0
# Finde altes Max- und Minimum
for i in range(len(data)):
if old_max < data[i]:
old_max = data[i]
elif old_min > data[i]:
old_min = data[i]
old_range = (old_max - old_min)
for i in range(len(data)):
if old_range == 0:
data[i] = new_min
else:
new_range = (new_max - new_min)
data[i] = (((data[i] - old_min) * new_range) / old_range) + new_min
I scaled the data between 0 and 1 and voilà - the network would finally learn.

Related

how can i smooth the graph values or extract main signals only

when i try to run the code below i get this graph
my code:
from numpy import nan
import json
import os
import numpy as np
import subprocess
import math
import matplotlib.pyplot as plt
from statistics import mean, stdev
def smooth(t):
new_t = []
for i, x in enumerate(t):
neighbourhood = t[max(i-2,0): i+3]
m = mean(neighbourhood)
s = stdev(neighbourhood, xbar=m)
if abs(x - m) > s:
x = ( t[i - 1 + (i==0)*2] + t[i + 1 - (i+1==len(t))*2] ) / 2
new_t.append(x)
return new_t
def outLiersFN(*U):
outliers=[] # after preprocessing list
#preprocessing Fc =| 2*LF1 prev by 1 - LF2 prev by 2 |
c0 = -2 #(previous) by 2 #from original
c1 =-1 #(previous) #from original
c2 =0 #(current) #from original
c3 = 1 #(next) #from original
preP = U[0] # original list
if c2 == 0:
outliers.append(preP[0])
c1+=1
c2+=1
c0+=1
c3+=1
oldlen = len(preP)
M_RangeOfMotion = 90
while oldlen > c2 :
if c3 == oldlen:
outliers.insert(c2, preP[c2]) #preP[c2] >> last element in old list
break
if (preP[c2] > M_RangeOfMotion and preP[c2] < (preP[c1] + preP[c3])/2) or (preP[c2] < M_RangeOfMotion and preP[c2] > (preP[c1] + preP[c3])/2): #Check Paper 3.3.1
Equ = (preP[c1] + preP[c3])/2 #fn of preprocessing # From third index # ==== inserting current frame
formatted_float = "{:.2f}".format(Equ) #with .2 number only
equu = float(formatted_float) #from string float to float
outliers.insert(c2,equu) # insert the preprocessed value to the List
c1+=1
c2+=1
c0+=1
c3+=1
else :
Equ = preP[c2] # fn of preprocessing #put same element (do nothing)
formatted_float = "{:.2f}".format(Equ) # with .2 number only
equu = float(formatted_float) # from string float to float
outliers.insert(c2, equu) # insert the preprocessed value to the List
c1 += 1
c2 += 1
c0 += 1
c3 += 1
return outliers
def remove_nan(list):
newlist = [x for x in list if math.isnan(x) == False]
return newlist
the_angel = [176.04, 173.82, 170.09, 165.3, 171.8, 178.3, 178.77, 179.24, 179.93, 180.0, 173.39, 166.78, 166.03, 165.28, 165.72, 166.17, 166.71, 167.26, 168.04, 167.22, 166.68, 166.13, 161.53, 165.81, 170.1, 170.05, 170.5, 173.01, 176.02, 174.53, 160.09, 146.33, 146.38, 146.71, 150.33, 153.95, 154.32, 154.69, 134.52, 114.34, 115.6, 116.86, 134.99, 153.12, 152.28, 151.43, 151.36, 152.32, 158.9, 166.52, 177.74, 178.61, 179.47, 167.44, 155.4, 161.54, 167.68, 163.96, 160.24, 137.45, 114.66, 117.78, 120.89, 139.95, 139.62, 125.51, 111.79, 112.07, 112.74, 110.22, 107.7, 107.3, 106.52, 105.73, 103.07, 101.35, 102.5, 104.59, 104.6, 104.49, 104.38, 102.81, 101.25, 100.62, 100.25, 100.15, 100.32, 99.84, 99.36, 100.04, 100.31, 99.14, 98.3, 97.92, 97.41, 96.9, 96.39, 95.88, 95.9, 95.9, 96.02, 96.14, 96.39, 95.2, 94.56, 94.02, 93.88, 93.8, 93.77, 93.88, 94.04, 93.77, 93.65, 93.53, 94.2, 94.88, 92.59, 90.29, 27.01, 32.9, 38.78, 50.19, 61.59, 61.95, 62.31, 97.46, 97.38, 97.04, 96.46, 96.02, 96.1, 96.33, 95.61, 89.47, 89.34, 89.22, 89.48, 89.75, 90.02, 90.28, 88.16, 88.22, 88.29, 88.17, 88.17, 94.98, 94.84, 94.69, 94.94, 94.74, 94.54, 94.69, 94.71, 94.64, 94.58, 94.19, 94.52, 94.85, 87.7, 87.54, 87.38, 95.71, 96.57, 97.11, 97.05, 96.56, 96.07, 95.76, 95.56, 95.35, 95.28, 95.74, 96.2, 96.32, 96.33, 96.2, 96.14, 96.07, 96.07, 96.12, 96.17, 96.28, 96.31, 96.33, 96.16, 96.05, 95.94, 95.33, 88.96, 95.0, 95.78, 88.19, 88.19, 88.19, 87.92, 87.93, 88.03, 87.94, 87.86, 87.85, 87.89, 88.08, 88.01, 87.88, 88.02, 88.15, 88.15, 88.66, 88.73, 88.81, 88.41, 88.55, 88.68, 88.69, 88.02, 87.35, 95.19, 95.39, 95.38, 95.37, 95.27, 95.17, 95.33, 95.32, 95.31, 95.37, 95.42, 95.34, 95.44, 95.53, 95.47, 95.41, 95.13, 94.15, 94.78, 97.64, 97.1, 96.87, 97.03, 96.76, 35.44, 23.63, 23.27, 24.71, 26.16, 96.36, 113.13, 129.9, 96.82, 63.74, 34.25, 33.42, 32.6, 30.69, 31.06, 31.43, 97.14, 97.51, 97.23, 98.54, 100.13, 100.95, 28.82, 33.81, 66.81, 99.82, 102.63, 101.9, 101.44, 102.19, 103.22, 103.67, 104.13, 104.07, 104.73, 105.46, 103.74, 102.02, 103.32, 102.59, 29.54, 28.08, 28.76, 29.79, 30.82, 113.51, 129.34, 145.16, 143.18, 148.29, 153.67, 166.14, 161.16, 151.64, 149.27, 146.9, 151.67, 153.02, 149.28, 145.53, 149.1, 152.67, 158.78, 164.89, 164.84, 164.8, 162.11, 159.42, 156.73, 156.28, 155.83, 156.4, 161.0, 165.59, 164.44, 159.73, 155.76, 156.97, 158.92, 159.15, 159.39, 159.99, 160.44, 160.88, 163.89, 166.9, 167.71, 167.11, 167.0, 167.44, 168.38, 153.16, 137.94, 137.65, 152.09, 169.49, 171.36, 173.22, 174.01, 174.0, 174.2, 174.41, 157.74, 141.09, 149.32, 157.57, 156.4, 148.4, 140.78, 141.06, 141.73, 143.05, 143.91, 156.59, 169.29, 172.17, 175.05, 175.29, 175.27, 175.15, 175.02, 174.81, 174.59, 174.76, 174.94, 175.18, 175.41, 175.23, 174.51, 174.64, 174.77, 174.56, 173.25, 172.38, 174.17, 176.4, 177.27, 177.29, 177.33, 178.64, 179.98, 179.99, 176.0, 172.88, 173.77, 173.8, 173.97, 174.72, 175.24, 176.89, 179.07, 179.27, 178.78, 178.29, 175.61, 174.21, 172.8, 173.05, 173.41, 173.77, 174.65, 175.52, 175.58, 176.15, 176.71, 159.12, 141.54, 141.12, 155.62, 170.53, 165.54, 160.71, 158.22, 156.35, 156.82, 158.55, 160.27, 161.33, 162.39, 162.37, 159.48, 156.59, 156.77, 158.05, 159.32, 158.49, 157.66, 157.7, 157.74, 158.44, 159.14, 150.13, 143.06, 136.0, 125.7, 115.41, 111.19, 106.97, 107.1, 107.24, 107.45, 107.67, 113.34, 119.01, 144.87, 170.73, 174.31, 177.89, 174.78, 171.67, 163.26, 134.58, 105.9, 102.98, 100.77, 101.05, 101.39, 101.73, 99.79, 98.71, 97.64, 97.8, 97.89, 96.67, 95.45, 94.33, 93.38, 92.44, 48.53, 91.4, 91.35, 91.34, 91.33, 90.92, 90.51, 88.63, 87.0, 86.74, 86.48, 96.79, 96.09, 95.46, 95.39, 94.32, 93.25, 93.31, 93.37, 93.11, 92.57, 93.41, 94.25, 96.48, 92.71, 88.94, 90.07, 90.43, 78.06, 77.69, 77.32, 90.1, 89.15, 89.14, 88.85, 88.38, 87.63, 121.2, 120.66, 86.89, 86.42, 85.69, 84.86, 84.86, 85.34, 85.82, 86.07, 86.32, 85.82, 85.32, 86.23, 86.69, 87.15, 87.04, 86.87, 86.58, 86.0, 85.41, 85.41, 85.53, 85.66, 85.7, 85.72, 85.75, 85.92, 86.09, 85.77, 85.45, 84.94, 85.55, 86.16, 86.21, 86.1, 85.77, 85.27, 84.56, 84.99, 85.38, 85.42, 85.98, 86.54, 86.5, 86.45, 86.56, 86.63, 86.35, 86.08, 85.82, 85.51, 85.21, 84.6, 84.84, 84.97, 85.1, 86.12, 86.88, 86.8, 86.46, 86.47, 87.23, 87.8, 88.0, 88.08, 88.16, 87.72, 87.63, 87.37, 86.42, 86.48, 87.24, 87.97, 88.09, 88.19, 88.32, 88.44, 87.82, 87.2, 86.03, 85.78, 91.5, 93.0, 88.2, 88.52, 88.42, 87.28, 85.73, 85.62, 85.5, 85.5, 87.06, 87.6, 88.1, 88.31, 88.53, 88.77, 89.14, 89.52, 89.46, 89.4, 90.28, 89.74, 91.28, 92.17, 92.16, 92.15, 93.08, 94.0, 94.66, 95.32, 94.13, 93.7, 93.32, 93.69, 94.58, 95.47, 97.25, 99.03, 99.63, 99.67, 99.71, 100.33, 101.58, 103.36, 103.49, 103.41, 106.31, 109.34, 109.28, 109.21, 107.76, 106.31, 105.43, 104.94, 104.44, 111.19, 117.93, 115.59, 113.24, 116.15, 119.06, 125.43, 140.72, 156.0, 161.7, 143.52, 135.33, 127.13, 127.68, 148.68, 169.68, 172.2, 174.72, 174.75, 174.66, 158.57, 142.63, 145.13, 153.29, 161.45, 163.34, 165.24, 162.25, 159.89, 159.07, 156.39, 155.21, 156.04, 159.29, 160.07, 160.85, 163.45, 162.93, 161.71, 160.06, 158.4, 144.74, 132.64, 134.57, 150.22, 165.86, 172.95, 174.12, 175.3, 175.5, 176.31, 177.71, 179.72, 168.13, 156.55, 146.24, 155.75, 176.0, 175.99, 175.98, 176.0, 176.02, 176.25, 175.13, 174.26, 173.38, 173.37, 173.46, 176.34, 174.55, 172.77, 168.45, 166.35, 166.47, 168.81, 167.43, 166.79, 167.35, 168.65, 168.51, 168.37, 168.88, 169.74, 171.19, 171.33, 169.91, 168.49, 167.11, 166.83, 167.01, 168.68, 170.34, 170.43, 172.15, 173.86, 177.62, 177.61, 175.34, 173.06, 176.47, 179.87, 179.9, 177.67, 175.67, 175.39, 175.36, 177.03, 176.0, 174.98, 174.96, 174.94, 175.76, 176.57, 169.05, 162.99, 164.97, 168.74, 172.51, 167.38, 165.08, 163.03, 163.81, 164.83, 164.81, 164.8, 165.88, 165.36, 159.61, 153.86, 153.57, 153.61, 153.65, 154.62, 155.58, 157.97, 156.35, 155.66, 154.98, 156.11, 157.24, 159.25, 159.6, 160.43, 161.26, 164.71, 168.17, 147.46, 126.92, 106.38, 105.23, 104.4, 105.37, 106.65, 109.21, 107.44, 104.65, 101.86, 102.35, 102.84, 102.79, 102.19, 101.59, 100.98, 100.38, 98.72, 97.73, 97.32, 96.9, 95.11, 93.97, 94.12, 94.12, 93.1, 92.08, 89.29, 90.35, 90.35, 90.35, 90.35, 86.95, 86.37, 86.06, 85.74, 94.56, 93.16, 92.46, 91.76, 88.55, 85.33, 87.52, 92.18, 93.68, 95.18, 94.4, 92.17, 89.94, 89.4, 89.37, 99.44, 100.98, 102.52, 103.18, 88.96, 88.23, 87.5, 85.2, 85.19, 86.87, 121.42, 155.96, 155.97, 155.97, 86.2, 86.5, 86.8, 87.22, 87.36, 87.34, 87.03, 87.04, 87.05, 86.36, 85.68, 85.71, 85.84, 85.93, 86.01, 86.04, 86.08, 85.92, 86.05, 86.18, 86.17, 86.19, 86.23, 86.22, 86.09, 85.92, 85.66, 85.69, 85.69, 85.31, 84.91, 84.93, 84.95, 84.93, 84.91, 84.9, 84.9, 84.9, 84.9, 85.38, 85.52, 85.66, 85.66, 85.4, 85.14, 85.47, 85.8, 85.72, 85.64, 86.09, 85.84, 85.27, 85.47, 85.66, 85.59, 85.52, 85.38, 85.39, 85.28, 85.17, 85.39, 85.7, 85.98, 86.26, 86.61, 92.97, 93.15, 86.58, 86.58, 86.53, 86.47, 98.55, 99.41, 100.16, 100.9, 89.19, 90.28, 91.38, 91.39, 91.4, 91.44, 92.05, 131.05, 170.63, 170.13, 162.43, 125.64, 88.85, 88.85, 99.08, 100.38, 101.69, 100.74, 99.79, 96.33, 93.31, 93.73, 94.87, 96.01, 96.93, 97.85, 98.97, 97.85, 98.14, 99.37, 102.01, 103.8, 105.58, 108.52, 108.12, 107.72, 106.75, 106.82, 109.08, 112.37, 112.52, 112.66, 112.97, 114.12, 115.64, 117.1, 118.57, 126.13, 133.69, 149.27, 163.96, 166.62, 169.27, 164.94, 160.61, 149.35, 141.18, 143.41, 143.57, 149.26, 157.49, 159.94, 151.93, 147.47, 145.97, 145.56, 145.15, 143.85, 142.54, 142.18, 142.43, 143.12, 144.41, 144.38, 151.99, 159.59, 174.81, 174.94, 175.84, 176.87, 162.41, 152.94, 151.59, 155.24, 155.22, 155.19, 155.04]
p0 = outLiersFN(smooth(remove_nan(the_angel)))
the_angel = p0
plt.plot(the_angel) #list(filter(fun, L1))
plt.show()
print((the_angel))
how can i smooth the values in (the_angel) to get graph like this (red line)
i mean ignoring all unnecessary and noisy values and get only main line instead
you can edit my code or suggest me new filter or algorithm

pandas has a rolling() method for dataframes that you can use to calculate the mean over a window of values, e.g. the 70 closest ones:
import pandas as pd
import matplotlib.pyplot as plt
WINDOW_SIZE = 70
the_angel = [176.04, 173.82, 170.09, 165.3, 171.8, # ...
]
df = pd.DataFrame({'the angel': the_angel})
df[f'mean of {WINDOW_SIZE}'] = df['the angel'].rolling(
window=WINDOW_SIZE, center=True).mean()
df.plot(color=['blue', 'red']);

How to visualize high-dimension vectors as points in 2D plane?

For example, there are three vectors as below.
[ 0.0377, 0.1808, 0.0807, -0.0703, 0.2427, -0.1957, -0.0712, -0.2137,
-0.0754, -0.1200, 0.1919, 0.0373, 0.0536, 0.0887, -0.1916, -0.1268,
-0.1910, -0.1411, -0.1282, 0.0274, -0.0781, 0.0138, -0.0654, 0.0491,
0.0398, 0.1696, 0.0365, 0.2266, 0.1241, 0.0176, 0.0881, 0.2993,
-0.1425, -0.2535, 0.1801, -0.1188, 0.1251, 0.1840, 0.1112, 0.3172,
0.0844, -0.1142, 0.0662, 0.0910, 0.0416, 0.2104, 0.0781, -0.0348,
-0.1488, 0.0129],
[-0.1302, 0.1581, -0.0897, 0.1024, -0.1133, 0.1076, 0.1595, -0.1047,
0.0760, 0.1092, 0.0062, -0.1567, -0.1448, -0.0548, -0.1275, -0.0689,
-0.1293, 0.1024, 0.1615, 0.0869, 0.2906, -0.2056, 0.0442, -0.0595,
-0.1448, 0.0167, -0.1259, -0.0989, 0.0651, -0.0424, 0.0795, -0.1546,
0.1330, -0.2284, 0.1672, 0.1847, 0.0841, 0.1771, -0.0101, -0.0681,
0.1497, 0.1226, 0.1146, -0.2090, 0.3275, 0.0981, -0.3295, 0.0590,
0.1130, -0.0650],
[-0.1745, -0.1940, -0.1529, -0.0964, 0.2657, -0.0979, 0.1510, -0.1248,
-0.1541, 0.1782, -0.1769, -0.2335, 0.2011, 0.1906, -0.1918, 0.1896,
-0.2183, -0.1543, 0.1816, 0.1684, -0.1318, 0.2285, 0.1784, 0.2260,
-0.2331, 0.0523, 0.1882, 0.1764, -0.1686, 0.2292]
How to plot them as three points in the same 2D plane like this picture below? Thanks!

I use PCA from sklearn, maybe this code help you:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA
usa = [ 0.0377, 0.1808, 0.0807, -0.0703, 0.2427, -0.1957, -0.0712, -0.2137,
-0.0754, -0.1200, 0.1919, 0.0373, 0.0536, 0.0887, -0.1916, -0.1268,
-0.1910, -0.1411, -0.1282, 0.0274, -0.0781, 0.0138, -0.0654, 0.0491,
0.0398, 0.1696, 0.0365, 0.2266, 0.1241, 0.0176, 0.0881, 0.2993,
-0.1425, -0.2535, 0.1801, -0.1188, 0.1251, 0.1840, 0.1112, 0.3172,
0.0844, -0.1142, 0.0662, 0.0910, 0.0416, 0.2104, 0.0781, -0.0348,
-0.1488, 0.0129]
obama = [-0.1302, 0.1581, -0.0897, 0.1024, -0.1133, 0.1076, 0.1595, -0.1047,
0.0760, 0.1092, 0.0062, -0.1567, -0.1448, -0.0548, -0.1275, -0.0689,
-0.1293, 0.1024, 0.1615, 0.0869, 0.2906, -0.2056, 0.0442, -0.0595,
-0.1448, 0.0167, -0.1259, -0.0989, 0.0651, -0.0424, 0.0795, -0.1546,
0.1330, -0.2284, 0.1672, 0.1847, 0.0841, 0.1771, -0.0101, -0.0681,
0.1497, 0.1226, 0.1146, -0.2090, 0.3275, 0.0981, -0.3295, 0.0590,
0.1130, -0.0650]
nationality = [-0.1745, -0.1940, -0.1529, -0.0964, 0.2657, -0.0979, 0.1510, -0.1248,
-0.1541, 0.1782, -0.1769, -0.2335, 0.2011, 0.1906, -0.1918, 0.1896,
-0.2183, -0.1543, 0.1816, 0.1684, -0.1318, 0.2285, 0.1784, 0.2260,
-0.2331, 0.0523, 0.1882, 0.1764, -0.1686, 0.2292]
pca = PCA(n_components=1)
X = np.array(usa).reshape(2,len(usa)//2)
X = pca.fit_transform(X)
Y = np.array(obama).reshape(2,len(obama)//2)
Y = pca.fit_transform(Y)
Z = np.array(nationality).reshape(2,len(nationality)//2)
Z = pca.fit_transform(Z)
x_coordinates = [X[0][0], Y[0][0], Z[0][0]]
y_coordinates = [X[1][0], Y[1][0], Z[1][0]]
colors = ['r','g','b']
annotations=["U.S.A","Obama","Nationality"]
plt.figure(figsize=(8,6))
plt.scatter(x_coordinates, y_coordinates, marker=",", color=colors,s=300)
for i, label in enumerate(annotations):
plt.annotate(label, (x_coordinates[i], y_coordinates[i]))
plt.show()
output:

How to solve warning message in Gekko due to m.connection?

I am using m.connection to estimate variables initial conditions but I am getting 12 warning messages like:
Moreover, the APM file shows:
I am not sure how to solve these messages.
I am following this explanation "If pos1 or pos2 is not None, the associated var must be a GEKKO variable and the position is the (0-indexed) time-discretized index of the variable" to write m.Connection(var1,var2,pos1=None,pos2=None,node1='end',node2='end').
https://gekko.readthedocs.io/en/latest/quick_start.html#connections
Thanks in advance.
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
import math as math
import pandas as pd
tm1 = [0, 0.0667,0.5,1,4, 22.61667]
mca1 = [5.68, 3.48, 3.24, 3.36, 2.96, 1.96]
tm2 = [0, 0.08333,0.5,1,4.25 , 22.8167]
mca2 = [5.68, 4.20, 4.04, 4.00, 3.76, 2.88]
tm3 = [0,0.08333,0.5,1,4.33 , 22.9500]
mca3 = [5.68, 4.64, 4.52, 4.56, 4.24, 3.72]
tm4 = [0,0.08333,0.5,1,4.0833 , 23.0833]
mca4 =[18.90,15.4,14.3,15.10,13.50, 10.90]
tm5 = [0,0.08333,0.5,1,4.5, 23.2167]
mca5 =[18.90, 15.5, 16.30, 16, 14.70, 13.00]
tm6 = [0,0.08333,0.5,1,4.6667, 23.3333 ]
mca6 = [18.90, 15.8, 11.70,15.5,12, 9.5 ]
df1=pd.DataFrame({'time':tm1,'x1':mca1})
df2=pd.DataFrame({'time':tm2,'x2':mca2})
df3=pd.DataFrame({'time':tm3,'x3':mca3})
df4=pd.DataFrame({'time':tm4,'x4':mca4})
df5=pd.DataFrame({'time':tm5,'x5':mca5})
df6=pd.DataFrame({'time':tm6,'x6':mca6})
df1.set_index('time',inplace=True)
df2.set_index('time',inplace=True)
df3.set_index('time',inplace=True)
df4.set_index('time',inplace=True)
df5.set_index('time',inplace=True)
df6.set_index('time',inplace=True)
#simulation time points
dfx = pd.DataFrame({'time':np.linspace(0,25,101)})
dfx.set_index('time',inplace=True)
#merge dataframes
dfxx=dfx.join(df1,how='outer')
dfxxx=dfxx.join(df2,how='outer')
dfxxxx=dfxxx.join(df3,how='outer')
dfxxxxx=dfxxxx.join(df4,how='outer')
dfxxxxxx=dfxxxxx.join(df5,how='outer')
df=dfxxxxxx.join(df6,how='outer')
# get True (1) or False (0) for measurement
df['meas1']=(df['x1'].values==df['x1'].values).astype(int)
df['meas2']=(df['x2'].values==df['x2'].values).astype(int)
df['meas3']=(df['x3'].values==df['x3'].values).astype(int)
df['meas4']=(df['x4'].values==df['x4'].values).astype(int)
df['meas5']=(df['x5'].values==df['x5'].values).astype(int)
df['meas6']=(df['x6'].values==df['x6'].values).astype(int)
#replace NaN with zeros
df0=df.fillna(value=0)
m = GEKKO()
m.time = df0.index.values
meas1 = m.Param(df0['meas1'].values)
meas2 = m.Param(df0['meas2'].values)
meas3 = m.Param(df0['meas3'].values)
meas4 = m.Param(df0['meas4'].values)
meas5 = m.Param(df0['meas5'].values)
meas6 = m.Param(df0['meas6'].values)
#adjustable Parameters
kf=m.FV(1.3,lb=0.01,ub=10)
ks=m.FV(1.3,lb=0.01,ub=10)
cnf01=m.FV(1.3,lb=0.01,ub=10)
cns01=m.FV(1.3,lb=0.01,ub=10)
#constrains
cnf02=m.FV(value=cnf01*0.5,lb=cnf01*0.5, ub=cnf01*0.5)
cns02=m.FV(value=cns01*0.5,lb=cns01*0.5, ub=cns01*0.5)
cnf03=m.FV(value=cnf01*0.25,lb=cnf01*0.25, ub=cnf01*0.25)
cns03=m.FV(value=cns01*0.25,lb=cns01*0.25, ub=cns01*0.25)
cnf04=m.FV(value=cnf01,lb=cnf01, ub=cnf01)
cns04=m.FV(value=cns01,lb=cns01, ub=cns01)
cnf05=m.FV(value=cnf01*0.5,lb=cnf01*0.5, ub=cnf01*0.5)
cns05=m.FV(value=cns01*0.5,lb=cns01*0.5, ub=cns01*0.5)
cnf06=m.FV(value=cnf01*0.25,lb=cnf01*0.25, ub=cnf01*0.25)
cns06=m.FV(value=cns01*0.25,lb=cns01*0.25, ub=cns01*0.25)
#Variables
c1 = m.Var(value=mca1[0])
c2 = m.Var(value=mca2[0])
c3 = m.Var(value=mca3[0])
c4 = m.Var(value=mca4[0])
c5 = m.Var(value=mca5[0])
c6 = m.Var(value=mca6[0])
cm1 = m.Param(df0['x1'].values)
cm2 = m.Param(df0['x2'].values)
cm3 = m.Param(df0['x3'].values)
cm4 = m.Param(df0['x4'].values)
cm5 = m.Param(df0['x5'].values)
cm6 = m.Param(df0['x6'].values)
m.Minimize((meas1*(c1-cm1)**2)+(meas2*(c2-cm2)**2)\
+(meas3*(c3-cm3)**2)+(meas4*(c4-cm4)**2)\
+(meas5*(c5-cm5)**2)+(meas6*(c6-cm6)**2))
cnf1=m.Var(value=cnf01,fixed_initial=False)
cns1=m.Var(value=cns01,fixed_initial=False)
cnf2=m.Var(value=cnf02,fixed_initial=False)
cns2=m.Var(value=cns02,fixed_initial=False)
cnf3=m.Var(value=cnf03,fixed_initial=False)
cns3=m.Var(value=cns03,fixed_initial=False)
cnf4=m.Var(value=cnf04,fixed_initial=False)
cns4=m.Var(value=cns04,fixed_initial=False)
cnf5=m.Var(value=cnf05,fixed_initial=False)
cns5=m.Var(value=cns05,fixed_initial=False)
cnf6=m.Var(value=cnf06,fixed_initial=False)
cns6=m.Var(value=cns06,fixed_initial=False)
#Equations
t = m.Param(value=m.time)
m.Connection(cnf1,cnf01,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cnf2,cnf02,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cnf3,cnf03,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cnf4,cnf04,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cnf5,cnf05,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cnf6,cnf06,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cns1,cns01,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cns2,cns02,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cns3,cns03,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cns4,cns04,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cns5,cns05,pos1=0,pos2=0,node1=1,node2=1)
m.Connection(cns6,cns06,pos1=0,pos2=0,node1=1,node2=1)
m.Equation(cnf1.dt()==-kf*c1*cnf1)
m.Equation(cns1.dt()==-ks*c1*cns1)
m.Equation(c1.dt()==cnf1.dt()+cns1.dt())
m.Equation(cnf2.dt()==-kf*c2*cnf2)
m.Equation(cns2.dt()==-ks*c2*cns2)
m.Equation(c2.dt()==cnf2.dt()+cns2.dt())
m.Equation(cnf3.dt()==-kf*c3*cnf3)
m.Equation(cns3.dt()==-ks*c3*cns3)
m.Equation(c3.dt()==cnf3.dt()+cns3.dt())
m.Equation(cnf4.dt()==-kf*c4*cnf4)
m.Equation(cns4.dt()==-ks*c4*cns4)
m.Equation(c4.dt()==cnf4.dt()+cns4.dt())
m.Equation(cnf5.dt()==-kf*c5*cnf5)
m.Equation(cns5.dt()==-ks*c5*cns5)
m.Equation(c5.dt()==cnf5.dt()+cns5.dt())
m.Equation(cnf6.dt()==-kf*c6*cnf6)
m.Equation(cns6.dt()==-ks*c6*cns6)
m.Equation(c6.dt()==cnf6.dt()+cns6.dt())
if True:
kf.STATUS=1
ks.STATUS=1
cnf01.STATUS=1
cns01.STATUS=1
cnf02.STATUS=1
cns02.STATUS=1
cnf03.STATUS=1
cns03.STATUS=1
cnf04.STATUS=1
cns04.STATUS=1
cnf05.STATUS=1
cns05.STATUS=1
cnf06.STATUS=1
cns06.STATUS=1
#Options
m.options.SOLVER = 1 #IPOPT solver
m.options.IMODE = 5 #Dynamic Simultaneous - estimation = MHE
m.options.EV_TYPE = 2 #absolute error
m.options.NODES = 3 #collocation nodes (2,5)
m.solve(disp=True)
m.open_folder()
print('Final SSE Objective: ' + str(m.options.objfcnval))
print('Solution')
print('cnf01 = ' + str(cnf01.value[0]))
print('cns01 = ' + str(cns01.value[0]))
print('kf = ' + str(kf.value[0]))
print('ks = ' + str(ks.value[0]))
print('cns02 = '+ str(cns02.value[0]))
print('cnf02 = '+ str(cnf02.value[0]))
print('cns03 = '+ str(cns03.value[0]))
print('cnf03 = '+ str(cnf03.value[0]))
print('cns04 = '+ str(cns04.value[0]))
print('cnf04 = '+ str(cnf04.value[0]))
print('cns05 = '+ str(cns05.value[0]))
print('cnf05 = '+ str(cnf05.value[0]))
print('cns06 = '+ str(cns06.value[0]))
print('cnf06 = '+ str(cnf06.value[0]))
plt.figure(1,figsize=(8,5))
plt.plot(m.time,c1.value,'r',label='Predicted c1')
plt.plot(m.time,c2.value,'y',label='Predicted c2')
plt.plot(m.time,c3.value,'c',label='Predicted c3')
plt.plot(m.time,c4.value,'g',label='Predicted c4')
plt.plot(m.time,c5.value,'b',label='Predicted c5')
plt.plot(m.time,c6.value,'m',label='Predicted c6')
plt.plot(tm1,mca1,'rx',label='Meas c1')
plt.plot(tm2,mca2,'yx',label='Meas c2')
plt.plot(tm3,mca3,'cx',label='Meas c3')
plt.plot(tm4,mca4,'go',label='Meas c4')
plt.plot(tm5,mca5,'bo',label='Meas c5')
plt.plot(tm6,mca6,'mo',label='Meas c6')
plt.xlabel('time (h)')
plt.ylabel('Concentration (mgCl2/L)')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2)

The underlying node structure has a 1-index instead of a 0-index that is common in Python. Using pos1=1 and pos2=1 resolves the warnings.
m.Connection(cnf1,cnf01,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf2,cnf02,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf3,cnf03,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf4,cnf04,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf5,cnf05,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf6,cnf06,pos1=1,pos2=1,node1=1,node2=1)
Another issue is that Gekko variables shouldn't generally be used to initialize other values. I recommend setting x0=1.3 and using that float to initialize the variables. Change m.Var() to m.SV() to avoid reclassification of m.Var() as an m.FV() during the connection. The m.SV() is a promoted type of variable that is at the same level of precedence as the m.FV(). Here is a complete script although the results don't look optimal.
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
import math as math
import pandas as pd
tm1 = [0, 0.0667,0.5,1,4, 22.61667]
mca1 = [5.68, 3.48, 3.24, 3.36, 2.96, 1.96]
tm2 = [0, 0.08333,0.5,1,4.25 , 22.8167]
mca2 = [5.68, 4.20, 4.04, 4.00, 3.76, 2.88]
tm3 = [0,0.08333,0.5,1,4.33 , 22.9500]
mca3 = [5.68, 4.64, 4.52, 4.56, 4.24, 3.72]
tm4 = [0,0.08333,0.5,1,4.0833 , 23.0833]
mca4 =[18.90,15.4,14.3,15.10,13.50, 10.90]
tm5 = [0,0.08333,0.5,1,4.5, 23.2167]
mca5 =[18.90, 15.5, 16.30, 16, 14.70, 13.00]
tm6 = [0,0.08333,0.5,1,4.6667, 23.3333 ]
mca6 = [18.90, 15.8, 11.70,15.5,12, 9.5 ]
df1=pd.DataFrame({'time':tm1,'x1':mca1})
df2=pd.DataFrame({'time':tm2,'x2':mca2})
df3=pd.DataFrame({'time':tm3,'x3':mca3})
df4=pd.DataFrame({'time':tm4,'x4':mca4})
df5=pd.DataFrame({'time':tm5,'x5':mca5})
df6=pd.DataFrame({'time':tm6,'x6':mca6})
df1.set_index('time',inplace=True)
df2.set_index('time',inplace=True)
df3.set_index('time',inplace=True)
df4.set_index('time',inplace=True)
df5.set_index('time',inplace=True)
df6.set_index('time',inplace=True)
#simulation time points
dfx = pd.DataFrame({'time':np.linspace(0,25,101)})
dfx.set_index('time',inplace=True)
#merge dataframes
dfxx=dfx.join(df1,how='outer')
dfxxx=dfxx.join(df2,how='outer')
dfxxxx=dfxxx.join(df3,how='outer')
dfxxxxx=dfxxxx.join(df4,how='outer')
dfxxxxxx=dfxxxxx.join(df5,how='outer')
df=dfxxxxxx.join(df6,how='outer')
# get True (1) or False (0) for measurement
df['meas1']=(df['x1'].values==df['x1'].values).astype(int)
df['meas2']=(df['x2'].values==df['x2'].values).astype(int)
df['meas3']=(df['x3'].values==df['x3'].values).astype(int)
df['meas4']=(df['x4'].values==df['x4'].values).astype(int)
df['meas5']=(df['x5'].values==df['x5'].values).astype(int)
df['meas6']=(df['x6'].values==df['x6'].values).astype(int)
#replace NaN with zeros
df0=df.fillna(value=0)
m = GEKKO()
m.time = df0.index.values
meas1 = m.Param(df0['meas1'].values)
meas2 = m.Param(df0['meas2'].values)
meas3 = m.Param(df0['meas3'].values)
meas4 = m.Param(df0['meas4'].values)
meas5 = m.Param(df0['meas5'].values)
meas6 = m.Param(df0['meas6'].values)
#adjustable Parameters
kf=m.FV(1.3,lb=0.01,ub=10)
ks=m.FV(1.3,lb=0.01,ub=10)
x0 = 1.3
cnf01=m.FV(x0,lb=0.01,ub=10)
cns01=m.FV(x0,lb=0.01,ub=10)
#constrains
cnf02=m.FV(value=x0*0.5,lb=x0*0.5, ub=x0*0.5)
cns02=m.FV(value=x0*0.5,lb=x0*0.5, ub=x0*0.5)
cnf03=m.FV(value=x0*0.25,lb=x0*0.25, ub=x0*0.25)
cns03=m.FV(value=x0*0.25,lb=x0*0.25, ub=x0*0.25)
cnf04=m.FV(value=x0,lb=x0, ub=x0)
cns04=m.FV(value=x0,lb=x0, ub=x0)
cnf05=m.FV(value=x0*0.5,lb=x0*0.5, ub=x0*0.5)
cns05=m.FV(value=x0*0.5,lb=x0*0.5, ub=x0*0.5)
cnf06=m.FV(value=x0*0.25,lb=x0*0.25, ub=x0*0.25)
cns06=m.FV(value=x0*0.25,lb=x0*0.25, ub=x0*0.25)
#Variables
c1 = m.SV(value=mca1[0])
c2 = m.SV(value=mca2[0])
c3 = m.SV(value=mca3[0])
c4 = m.SV(value=mca4[0])
c5 = m.SV(value=mca5[0])
c6 = m.SV(value=mca6[0])
cm1 = m.Param(df0['x1'].values)
cm2 = m.Param(df0['x2'].values)
cm3 = m.Param(df0['x3'].values)
cm4 = m.Param(df0['x4'].values)
cm5 = m.Param(df0['x5'].values)
cm6 = m.Param(df0['x6'].values)
m.Minimize((meas1*(c1-cm1)**2)+(meas2*(c2-cm2)**2)\
+(meas3*(c3-cm3)**2)+(meas4*(c4-cm4)**2)\
+(meas5*(c5-cm5)**2)+(meas6*(c6-cm6)**2))
cnf1=m.SV(value=x0,fixed_initial=False)
cns1=m.SV(value=x0,fixed_initial=False)
cnf2=m.SV(value=x0,fixed_initial=False)
cns2=m.SV(value=x0,fixed_initial=False)
cnf3=m.SV(value=x0,fixed_initial=False)
cns3=m.SV(value=x0,fixed_initial=False)
cnf4=m.SV(value=x0,fixed_initial=False)
cns4=m.SV(value=x0,fixed_initial=False)
cnf5=m.SV(value=x0,fixed_initial=False)
cns5=m.SV(value=x0,fixed_initial=False)
cnf6=m.SV(value=x0,fixed_initial=False)
cns6=m.SV(value=x0,fixed_initial=False)
#Equations
t = m.Param(value=m.time)
m.Connection(cnf1,cnf01,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf2,cnf02,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf3,cnf03,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf4,cnf04,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf5,cnf05,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cnf6,cnf06,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cns1,cns01,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cns2,cns02,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cns3,cns03,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cns4,cns04,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cns5,cns05,pos1=1,pos2=1,node1=1,node2=1)
m.Connection(cns6,cns06,pos1=1,pos2=1,node1=1,node2=1)
m.Equation(cnf1.dt()==-kf*c1*cnf1)
m.Equation(cns1.dt()==-ks*c1*cns1)
m.Equation(c1.dt()==cnf1.dt()+cns1.dt())
m.Equation(cnf2.dt()==-kf*c2*cnf2)
m.Equation(cns2.dt()==-ks*c2*cns2)
m.Equation(c2.dt()==cnf2.dt()+cns2.dt())
m.Equation(cnf3.dt()==-kf*c3*cnf3)
m.Equation(cns3.dt()==-ks*c3*cns3)
m.Equation(c3.dt()==cnf3.dt()+cns3.dt())
m.Equation(cnf4.dt()==-kf*c4*cnf4)
m.Equation(cns4.dt()==-ks*c4*cns4)
m.Equation(c4.dt()==cnf4.dt()+cns4.dt())
m.Equation(cnf5.dt()==-kf*c5*cnf5)
m.Equation(cns5.dt()==-ks*c5*cns5)
m.Equation(c5.dt()==cnf5.dt()+cns5.dt())
m.Equation(cnf6.dt()==-kf*c6*cnf6)
m.Equation(cns6.dt()==-ks*c6*cns6)
m.Equation(c6.dt()==cnf6.dt()+cns6.dt())
#Options
m.options.SOLVER = 1 # APOPT solver
m.options.IMODE = 5 # Dynamic Simultaneous - estimation = MHE
m.options.EV_TYPE = 2 # Squared error
m.options.NODES = 3 # Collocation nodes (2,5)
if True:
kf.STATUS=1
ks.STATUS=1
cnf01.STATUS=1
cns01.STATUS=1
cnf02.STATUS=1
cns02.STATUS=1
cnf03.STATUS=1
cns03.STATUS=1
cnf04.STATUS=1
cns04.STATUS=1
cnf05.STATUS=1
cns05.STATUS=1
cnf06.STATUS=1
cns06.STATUS=1
m.options.TIME_SHIFT = 0
try:
m.solve(disp=True)
except:
print("don't stop when not finding cnf01...cnf06")
#m.open_folder()
print('Final SSE Objective: ' + str(m.options.objfcnval))
print('Solution')
print('cnf01 = ' + str(cnf1.value[0]))
print('cns01 = ' + str(cns1.value[0]))
print('kf = ' + str(kf.value[0]))
print('ks = ' + str(ks.value[0]))
print('cns02 = '+ str(cns2.value[0]))
print('cnf02 = '+ str(cnf2.value[0]))
print('cns03 = '+ str(cns3.value[0]))
print('cnf03 = '+ str(cnf3.value[0]))
print('cns04 = '+ str(cns4.value[0]))
print('cnf04 = '+ str(cnf4.value[0]))
print('cns05 = '+ str(cns5.value[0]))
print('cnf05 = '+ str(cnf5.value[0]))
print('cns06 = '+ str(cns6.value[0]))
print('cnf06 = '+ str(cnf6.value[0]))
plt.figure(1,figsize=(8,5))
plt.plot(m.time,c1.value,'r',label='Predicted c1')
plt.plot(m.time,c2.value,'y',label='Predicted c2')
plt.plot(m.time,c3.value,'c',label='Predicted c3')
plt.plot(m.time,c4.value,'g',label='Predicted c4')
plt.plot(m.time,c5.value,'b',label='Predicted c5')
plt.plot(m.time,c6.value,'m',label='Predicted c6')
plt.plot(tm1,mca1,'rx',label='Meas c1')
plt.plot(tm2,mca2,'yx',label='Meas c2')
plt.plot(tm3,mca3,'cx',label='Meas c3')
plt.plot(tm4,mca4,'go',label='Meas c4')
plt.plot(tm5,mca5,'bo',label='Meas c5')
plt.plot(tm6,mca6,'mo',label='Meas c6')
plt.xlabel('time (h)')
plt.ylabel('Concentration (mgCl2/L)')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2)
plt.show()

ContainerAlreadyContains error when doing an optimization with cobra

I am trying to run an optimization with cobra to create a coop-medium for two organisms. For this, I set up constraints and objectives like it was explained in the docu. Sadly, my code produces a ContainerAlreadyContains error, and I have no idea where it comes from; an internet search did not help. I was understanding that my newly defined constraints would be added to those already in the model or maybe they would overwrite the old constraints if affecting the same component, or is this incorrect?
I am using two other models, but managed to produce the same error with the cobra testmodel with the code posted below. Using python 3.7.0 and cobra 0.19.0. Hopefully someone can help me; thanks in any case!
import cobra
import copy
import cobra.test
def optimize_model(inmodel, medium, biomass, verbose=False):
model = copy.deepcopy(inmodel)
metabolites = list(medium.keys())
reactions = [r.id for r in model.reactions]
#variables
thetas = {}
for m in metabolites:
var = model.problem.Variable(name="theta_{}".format(m), lb=0, type="binary")
thetas["theta_{}".format(m)] = var
#constraints
constraints = []
for m in metabolites:
try:
const = model.problem.Constraint(model.reactions.get_by_id(m).flux_expression +
model.reactions.get_by_id(m).lower_bound*thetas["theta_"+m],
lb=model.reactions.get_by_id(m).lower_bound,
name="V_COOPM_{}".format(m))
constraints.add_cons_vars(const)
except:
pass
VBM_COMPM = model.optimize().objective_value / 10
cost = model.problem.Constraint(biomass.flux_expression, lb=VBM_COMPM)
constraints.append(cost)
#objective
obj = model.problem.Objective(sum(thetas[t] for t in thetas.keys()),
direction="max")
model.add_cons_vars(constraints)
model.objective = obj
model.solver.update()
status = model.optimize()
medium = {
'EX_ala__L_e': 'Alanine', 'EX_arg__L_e': 'Arginine',
'EX_cys__L_e': 'Cysteine', 'EX_glu__L_e': 'Glutamic acid',
'EX_gly_e': 'Glycine', 'EX_his__L_e': 'Histidine',
'EX_leu__L_e': 'Leucine', 'EX_lys__L_e': 'Lysine', 'EX_orn_e': 'Ornithine',
'EX_phe__L_e': 'Phenylalanine', 'EX_pro__L_e': 'Proline',
'EX_ser__L_e': 'Serine', 'EX_thr__L_e': 'Threonine',
'EX_trp__L_e': 'Tryptophane', 'EX_val__L_e': 'Valine',
'EX_cit_e': 'citric acid', 'EX_fum_e': 'Fumaric acid',
'EX_male_e': 'maleic acid', 'EX_pyr_e': 'pyruvic acid',
'EX_succ_e': 'succinic acid', 'EX_glc__D_e': 'glucose',
'EX_urea_e': 'Urea', 'EX_na1_e': 'Sodium', 'EX_cl_e': 'Chloride',
'EX_k_e': 'Potassium', 'EX_pi_e': 'Phosphate', 'EX_mg2_e': 'Magnesium',
'EX_so4_e': 'Sulphate', 'EX_ca2_e': 'Calcium', 'EX_zn2_e': 'ZnCl2',
'EX_mn2_e': 'MnCl2', 'EX_cobalt2_e': 'CoCl2', 'EX_cu2_e': 'CuCl2',
'EX_ni2_e': 'NiCl2', 'EX_mobd_e': 'MoNa2O4', 'EX_adocbl_e': 'Cyanocobalamine',
'EX_4abz_e': 'p-Aminobenzoic acid', 'EX_btn_e': 'Biotin', 'EX_nac_e': 'Nicotinic acid',
'EX_pnto__R_e': 'Ca-D-Pantothenic acid', 'EX_pydam_e': 'Pyridoxamine-2HCl',
'EX_thm_e': 'Thiamine-dichloride', 'EX_ribflv_e': 'Riboflavin', 'EX_o2_e': 'Oxygen',
'EX_fe2_e': 'Fe3+', 'EX_h2o_e': 'Water', 'EX_co2_e': 'Co2'
}
model = cobra.test.create_test_model("textbook")
for r in model.reactions:
if r.id == "Biomass_Ecoli_core":
biomass = r
break
optimize_model(model, medium, biomass, True)

See here for the change that worked:
https://github.com/opencobra/cobrapy/issues/1026

Incorrect scikit-learn linear model prediction with date offset

I'm trying to predict time-series data, but by offsetting the result by date_offset-timepoints before training and prediction. The reason for doing this is to try and predict date_offset-timepoints into the future with the present data. See http://glowingpython.blogspot.co.za/2015/01/forecasting-beer-consumption-with.html for an example.
So in summary:
data = [1,2,3,4,5] should predict result = [2,3,4,5,6] if date_offset = 1
The results on the plot below show the red line being shifted by date_offset, and not predicting date_offset into the future. No matter how big I make date_offset, it keeps shifting and not predicting the last result I have, i.e. result = 5 (which is already know). In fact, the red line should not shift at all, just loose accuracy the bigger date_offset becomes. What am I doing wrong?
See example code and resulting image below:
from sklearn import linear_model
import matplotlib.pyplot as plt
import numpy as np
date_offset = 1
data = np.array([9330.0, 9470.0, 9550.0, 9620.0, 9600.0, 9585.0, 9600.0, 9600.0, 9430.0, 9460.0, 9450.0, 9650.0, 9620.0, 9650.0, 9500.0, 9400.0, 9165.0, 9100.0, 8755.0, 8850.0, 8990.0, 9150.0, 9195.0, 9175.0, 9250.0, 9200.0, 9350.0, 9280.0, 9370.0, 9470.0, 9445.0, 9440.0, 9280.0, 9325.0, 9170.0, 9270.0, 9200.0, 9450.0, 9510.0, 9371.0, 9499.0, 9499.0, 9400.0, 9500.0, 9550.0, 9670.0, 9700.0, 9760.0, 9767.4599999999991, 9652.0, 9520.0, 9600.0, 9610.0, 9700.0, 9825.0, 9900.0, 9950.0, 9801.0, 9770.0, 9545.0, 9630.0, 9710.0, 9700.0, 9700.0, 9600.0, 9615.0, 9575.0, 9500.0, 9600.0, 9480.0, 9565.0, 9510.0, 9475.0, 9600.0, 9400.0, 9400.0, 9400.0, 9300.0, 9430.0, 9410.0, 9380.0, 9320.0, 9000.0, 9100.0, 9000.0, 9200.0, 9210.0, 9251.0, 9460.0, 9400.0, 9600.0, 9621.0, 9440.0, 9490.0, 9675.0, 9850.0, 9680.0, 10100.0, 9900.0, 10100.0, 9949.0, 10040.0, 10050.0, 10200.0, 10400.0, 10350.0, 10200.0, 10175.0, 10001.0, 10110.0, 10400.0, 10401.0, 10300.0, 10548.0, 10515.0, 10475.0, 10200.0, 10481.0, 10500.0, 10540.0, 10559.0, 10300.0, 10400.0, 10202.0, 10330.0, 10450.0, 10540.0, 10540.0, 10650.0, 10450.0, 10550.0, 10501.0, 10206.0, 10250.0, 10345.0, 10225.0, 10330.0, 10506.0, 11401.0, 11245.0, 11360.0, 11549.0, 11415.0, 11450.0, 11460.0, 11600.0, 11530.0, 11450.0, 11402.0, 11299.0])
data = data[np.newaxis].T
results = np.array([9470.0, 9545.0, 9635.0, 9640.0, 9600.0, 9622.0, 9555.0, 9429.0, 9495.0, 9489.0, 9630.0, 9612.0, 9630.0, 9501.0, 9372.0, 9165.0, 9024.0, 8780.0, 8800.0, 8937.0, 9051.0, 9100.0, 9166.0, 9220.0, 9214.0, 9240.0, 9254.0, 9400.0, 9450.0, 9470.0, 9445.0, 9301.0, 9316.0, 9170.0, 9270.0, 9251.0, 9422.0, 9466.0, 9373.0, 9440.0, 9415.0, 9410.0, 9500.0, 9520.0, 9620.0, 9705.0, 9760.0, 9765.0, 9651.0, 9520.0, 9600.0, 9610.0, 9700.0, 9805.0, 9900.0, 9950.0, 9800.0, 9765.0, 9602.0, 9630.0, 9790.0, 9710.0, 9800.0, 9649.0, 9580.0, 9780.0, 9560.0, 9501.0, 9511.0, 9530.0, 9498.0, 9475.0, 9595.0, 9500.0, 9460.0, 9400.0, 9310.0, 9382.0, 9375.0, 9385.0, 9320.0, 9100.0, 8990.0, 9045.0, 9129.0, 9201.0, 9251.0, 9424.0, 9440.0, 9500.0, 9621.0, 9490.0, 9512.0, 9599.0, 9819.0, 9684.0, 10025.0, 9984.0, 10110.0, 9950.0, 10048.0, 10095.0, 10200.0, 10338.0, 10315.0, 10200.0, 10166.0, 10095.0, 10110.0, 10400.0, 10445.0, 10360.0, 10548.0, 10510.0, 10480.0, 10180.0, 10488.0, 10520.0, 10510.0, 10565.0, 10450.0, 10400.0, 10240.0, 10338.0, 10410.0, 10540.0, 10481.0, 10521.0, 10530.0, 10325.0, 10510.0, 10446.0, 10249.0, 10236.0, 10211.0, 10340.0, 10394.0, 11370.0, 11250.0, 11306.0, 11368.0, 11415.0, 11400.0, 11452.0, 11509.0, 11500.0, 11455.0, 11400.0, 11300.0, 11369.0])
# Date offset to predict next i-days results
data = data[:-date_offset]
results = results[date_offset:]
train_data = data[:-50]
train_results = results[:-50]
test_data = data[-50:]
test_results = results[-50:]
regressor = linear_model.BayesianRidge(normalize=True)
regressor.fit(train_data, train_results)
plt.figure(figsize=(8,6))
plt.plot(regressor.predict(test_data), '--', color='#EB3737', linewidth=2, label='Prediction')
plt.plot(test_results, label='True', color='green', linewidth=2)
plt.legend(loc='best')
plt.show()

First of all, the model is not really bad. For instance, when the real value is 10450, it predict 10350, which is really close. And, obviously, the farther in time the predicted point is, the less accurate its predictions, as the variance is growing and sometimes even bias is also growing. You cannot expect the opposite.
Secondly, it is a linear model, so it cannot be absolutely exact when the predicted variable is not linear by nature.
Thirdly, one have to choose a predicted variable with care. For instance, in this case you might try to predict not the value at time T, but the change in value at time T (i.e. C[T]=V[T]-V[T-1]) or the moving average of the last K values. Here you might (or, on the contrary, might not) find out that you are trying to model the so called "random walk" which is hard to predict exactly by its random nature.
And lastly, you might consider other models, like ARIMA, which are better suited for predicting time series.

Adding back the organize_data step:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import linear_model
def organize_data(to_forecast, window, horizon):
"""
Input:
to_forecast, univariate time series organized as numpy array
window, number of items to use in the forecast window
horizon, horizon of the forecast
Output:
X, a matrix where each row contains a forecast window
y, the target values for each row of X
"""
shape = to_forecast.shape[:-1] + \
(to_forecast.shape[-1] - window + 1, window)
strides = to_forecast.strides + (to_forecast.strides[-1],)
X = np.lib.stride_tricks.as_strided(to_forecast,
shape=shape,
strides=strides)
y = np.array([X[i+horizon][-1] for i in range(len(X)-horizon)])
return X[:-horizon], y
data = np.array([9330.0, 9470.0, 9550.0, 9620.0, 9600.0, 9585.0, 9600.0, 9600.0, 9430.0, 9460.0, 9450.0, 9650.0, 9620.0, 9650.0, 9500.0, 9400.0, 9165.0, 9100.0, 8755.0, 8850.0, 8990.0, 9150.0, 9195.0, 9175.0, 9250.0, 9200.0, 9350.0, 9280.0, 9370.0, 9470.0, 9445.0, 9440.0, 9280.0, 9325.0, 9170.0, 9270.0, 9200.0, 9450.0, 9510.0, 9371.0, 9499.0, 9499.0, 9400.0, 9500.0, 9550.0, 9670.0, 9700.0, 9760.0, 9767.4599999999991, 9652.0, 9520.0, 9600.0, 9610.0, 9700.0, 9825.0, 9900.0, 9950.0, 9801.0, 9770.0, 9545.0, 9630.0, 9710.0, 9700.0, 9700.0, 9600.0, 9615.0, 9575.0, 9500.0, 9600.0, 9480.0, 9565.0, 9510.0, 9475.0, 9600.0, 9400.0, 9400.0, 9400.0, 9300.0, 9430.0, 9410.0, 9380.0, 9320.0, 9000.0, 9100.0, 9000.0, 9200.0, 9210.0, 9251.0, 9460.0, 9400.0, 9600.0, 9621.0, 9440.0, 9490.0, 9675.0, 9850.0, 9680.0, 10100.0, 9900.0, 10100.0, 9949.0, 10040.0, 10050.0, 10200.0, 10400.0, 10350.0, 10200.0, 10175.0, 10001.0, 10110.0, 10400.0, 10401.0, 10300.0, 10548.0, 10515.0, 10475.0, 10200.0, 10481.0, 10500.0, 10540.0, 10559.0, 10300.0, 10400.0, 10202.0, 10330.0, 10450.0, 10540.0, 10540.0, 10650.0, 10450.0, 10550.0, 10501.0, 10206.0, 10250.0, 10345.0, 10225.0, 10330.0, 10506.0, 11401.0, 11245.0, 11360.0, 11549.0, 11415.0, 11450.0, 11460.0, 11600.0, 11530.0, 11450.0, 11402.0, 11299.0])
train_window = 50
k = 5 # number of previous observations to use
h = 2 # forecast horizon
X,y = organize_data(data, k, h)
train_data = X[:train_window]
train_results = y[:train_window]
test_data = X[train_window:]
test_results = y[train_window:]
regressor = linear_model.BayesianRidge(normalize=True)
regressor.fit(train_data, train_results)
plt.figure(figsize=(8,6))
plt.plot(regressor.predict(X), '--', color='#EB3737', linewidth=2, label='Prediction')
plt.plot(y, label='True', color='green', linewidth=2)
plt.legend(loc='best')
plt.show()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

PyBrain neural network for stock prediction won't learn - python

Related

how can i smooth the graph values or extract main signals only

How to visualize high-dimension vectors as points in 2D plane?

How to solve warning message in Gekko due to m.connection?

ContainerAlreadyContains error when doing an optimization with cobra

Incorrect scikit-learn linear model prediction with date offset

Categories

Resources