I made an AI that uses the Adaline algorythm. It seemed to work but it results in an overflow if it's used with my own dataset.
Here's the code:
import sys
import numpy as np
from random import choice
import matplotlib.pyplot as plt
from sklearn.base import BaseEstimator,ClassifierMixin
from sklearn.utils.validation import check_X_y,check_array,check_is_fitted,check_random_state
from sklearn.utils.multiclass import unique_labels
class AdalineEstimator(BaseEstimator,ClassifierMixin):
def __init__(self,eta=.001,n_iterations=500,random_state=None,):
'''
:param eta: Learning rate
:param n_iterations: number of iterations to go through the dataset
:param random_state: you should not change that
'''
self.eta=eta
self.n_iterations=n_iterations
self.random_state=random_state
# creating arrays
self.errors=[]
self.w=[] # creating the weight array
self.wAll=[] # Weights for plotting.
def net_i(self,x):
return np.dot(x,self.w) # input array * weight array
def activation(self,x):
# Linear activation function
return self.net_i(x)
def output(self,x):
# Heaviside function
if self.activation(x) >= 0.0:
return 1
else:
return -1
def fit(self,X=None,y=None):
'''
The learning function. Adjusts the weights.
:param x: Input array
:param y: Answers
'''
random_state=check_random_state(self.random_state) # checking the random_state
self.w=random_state.random_sample(np.size(X,1))
X, y = check_X_y(X, y) # checking X and y
self.X_=X
self.y_=y
for i in range(self.n_iterations):
# Choosing a random user out of the array
rand_index=random_state.randint(0,np.size(X,0))
x_=X[rand_index]
y_=y[rand_index]
s=np.dot(x_,self.w)
print("S is: ",s)
if s != s:
print("nan encountered in S")
sys.exit(1)
error=(y_ -s)**2
self.errors.append(error)
self.w+=self.eta * x_ * (y_-s)
self.wAll.append(self.w.copy())
def predict(self,x):
check_is_fitted(self,['X_','y_']) # Was the model trained before?
y_hat=self.output(x)
return y_hat
def plot(self):
""" Ausgabe des Fehlers und der Lernkurve
Die im Fehlerarray gespeicherten Fehler als Grafik ausgeben
Die Trenngeraden aus den gespeicherten Gewichten ausgeben
"""
x1 = []
x2 = []
colors = []
for i in range(self.X_.shape[0]):
x1.append(self.X_[i][1])
x2.append(self.X_[i][2])
y = self.y_[i]
if y == 1:
colors.append('r') # rot
else:
colors.append('b') # blau
# Raster
plt.style.use('seaborn-whitegrid')
# Errors
plt.plot(self.errors)
# Learning Curve
plt.figure(1)
plt.show()
# Scatter
plt.figure(2)
plt.scatter(x1, x2, c=colors)
# Result Line
x1Line = np.linspace(0.0, 1.0, 2)
x2Line = lambda x1, w0, w1, w2: (-x1 * w1 - w0) / w2;
alpha = 0.0
for idx, weight in enumerate(self.wAll):
# alpha = Transparenz, je näher zum Ziel desto dunkler
if (idx % 100 == 0):
alpha = 1.0 # ( idx / len(self.wAll) )
plt.plot(x1Line, x2Line(x1Line, weight[0], weight[1], weight[2]), alpha=alpha, linestyle='solid',
label=str(idx), linewidth=1.5)
# Ergebnisgerade
plt.plot(x1Line, x2Line(x1Line, weight[0], weight[1], weight[2]), alpha=alpha, linestyle='solid',
label=str(idx), linewidth=2.0)
plt.legend(loc='best', shadow=True)
data = []
with open('data.txt') as file:
for line in file:
dataline=float(line.rstrip())
dataline=round(dataline,-3)
data.append(dataline)
data=np.array(data)
data=data.reshape(-1,6)
X=data[:,0:5]
y=data[:,5]
#X=X[~np.isnan(X)]
Adaline = AdalineEstimator(eta=0.01, n_iterations=200, random_state=10)
Adaline.fit(X, y)
Adaline.plot()
And the dataset is here: https://pastebin.com/Vziav3Q9
The error message is somehow on the beginning (I am using pycharm at the moment, not sure if related) of the output.
RuntimeWarning:
overflow encountered in double_scalars
error=(y_ -s)**2
And then:
RuntimeWarning: invalid value encountered in multiply
self.w+=self.eta * x_ * (y_-s)
How can I fix this?
This is an example:
S is: 1.9288464662803013e+290
[0. 0. 0. 0. 0.]
S is: 0.0
[ 433000. 18000. 6369000. 0. 0.]
S is: -8.776351721574362e+301
[ 5000. 0. 26000. 0. 0.]
S is: inf
[0. 0. 0. 0. 0.]
S is: nan
S is the weighted x:
s=np.dot(x_,self.w)
Eta should be lowered as the weights are too large (I first thought that the inputs are too large but they aren't larger than one million.
eta=0.00000001 does it. 100% correct (400 iterations needed).
Related
Somehow the following code raises the error "ValueError: x0 must have at most 1 dimension." as soon as I add bounds to my Fit. I have absolutely no idea what I'm doing wrong here.
The Goal is to restrain the fit of the 8 Lorentzian Curves to the given bounds.
However, the presented code propably won't lead to a fit, but this is a problem I should be able to solve.
import matplotlib.pyplot as plt
import numpy as np
import scipy as scipy
from scipy.signal import find_peaks, peak_widths
import time
# Functions needed for Fitting model
def lorentzian(x, amp, cen, wid):
return amp*wid**2/((x-cen)**2+wid**2)
def multi_lorentzian(x, params, *args):
if args:
params = [params] + [x for x in args]
try:
params = np.array(params).reshape(len(params)//3, 3)
except:
raise ValueError("Parameter dimensions don't fit the model!")
total_curve = 0
for amp, cen, wid in params:
total_curve += lorentzian(x, amp, cen, wid)
return total_curve
##############################################################################
# create data
samples = 200
start = 2.75
stop = 3
x_incr = (stop-start)/samples
x_array = np.linspace(start, stop, samples) # frequency in GHz
amp_array = np.random.uniform(0.03, 0.1, 8) # 3 bis 10% Kontrast
cen_array = [2.81, 2.829, 2.831, 2.848, 2.897, 2.914, 2.9165, 2.932]
# cen_array = np.random.uniform(start, stop, 8)
wid_array = [0.003, 0.003, 0.003,0.003, 0.003, 0.003, 0.003, 0.003]
y_array = 1-multi_lorentzian(x_array,
np.array([amp_array, cen_array, wid_array]).T)
y_noise = y_array + np.random.normal(0, 1, samples)*1e-3
# mirroring to get maxima instead of minima
y_noise_inv = -y_noise+1
##############################################################################
# prepare guessing of start values
heights= np.random.uniform(0.03, 0.1, 8)
widths = np.random.uniform(0.002, 0.004, 8)
center_guess = cen_array+np.random.normal(0, 1, 8)*1e-3
p0_array =np.array([heights,center_guess, widths]).T
bounds_array = ([0., 2.75, 0.], [1., 3., 0.5])
popt_y, pcov_y = scipy.optimize.curve_fit(multi_lorentzian, x_array, y_noise_inv,
p0=p0_array, bounds= bounds_array)
popt_y = popt_y.reshape(len(popt_y)//3, 3)
single_peaks = [lorentzian(x_array, i, j, k) for i,j,k in popt_y]
perr_y = np.sqrt(np.diag(pcov_y))
residual_y = y_noise_inv - multi_lorentzian(x_array, popt_y)
ss_res = np.sum(residual_y**2)
ss_tot = np.sum((y_noise_inv-np.mean(y_noise_inv))**2)
r_squared = 1 - (ss_res / ss_tot)
Ok, after some digging, the issue was quite simple. p0 is supposed to be flat, not a 2D array that you supplied. I only had to change two lines to make things work.
1st, the bounds array. You're supposed to have as many minimum and maximum values as you have parameters, and since you have 3*8 params, then I just multiplied them as shown here.
bounds_array = ([0., 2.75, 0.]*8, [1., 3., 0.5]*8)
2nd, I flattened p0 when calling curve_fit.
popt_y, pcov_y = scipy.optimize.curve_fit(multi_lorentzian, x_array, y_noise_inv, p0=p0_array.flatten(), bounds= bounds_array)
And this is the fit:
I am trying plot the concentration profile i.e. X (number of particles per unit volume - scalar quantity) with magnitude along the y axis, for a 2-D video of a soliton image forming. It is for a 1D plot along the y axis (center of soliton or near it). I have printed out some X values (self.X) below. Can someone give advise on how to do this?
Soliton video image
The program is run by e.g.: python3 render_video.py ~/tf2-model-g/nucleation_and_motion_in_fluid_2D.mp4 --params params/nucleation_and_motion_in_fluid_2D.yaml
shown at https://github.com/bjdarrer/tf2-model-g/blob/master/README.md
In fluid_model_g.py --> https://github.com/frostburn/tf2-model-g/blob/master/fluid_model_g.py#L260
lines 260 to 271:
def step(self):
self.G, self.X, self.Y = self.reaction_integrator(self.G, self.X, self.Y)
density_of_reactants = (
self.params['density_G'] * self.G +
self.params['density_X'] * self.X +
self.params['density_Y'] * self.Y
)
rho = tf.math.log(self.params['base-density'] + density_of_reactants)
if self.dims == 2:
u, v = self.u, self.v # Store unintegrated flow so that we're on the same timestep
self.u, self.v, divergence = self.flow_integrator(rho, self.u, self.v)
self.G, self.X, self.Y = self.diffusion_advection_integrator(self.G, self.X, self.Y, u, v, divergence)
print("Value of X: ", self.X) # ***** BJD inserted this line 13.11.2020 *****
Value of X: tf.Tensor(
[[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]], shape=(426, 240), dtype=float64)
0% (1 of 960) | | Elapsed Time: 0:00:02 ETA: 0:47:21[swscaler # 0x50eea00] Warning: data is not aligned! This can lead to a speed loss
Value of X: tf.Tensor(
[[-4.63608184e-24 -4.36049800e-24 -3.96075313e-24 ... -5.27033753e-24
-5.03750622e-24 -4.84189560e-24]
[ 2.76706114e-24 2.59755449e-24 2.45328517e-24 ... 2.86976915e-24
2.95354289e-24 2.90518059e-24]
[-6.71999498e-24 -6.43068214e-24 -6.04211608e-24 ... -7.49891717e-24
-7.20906517e-24 -6.96166453e-24]
...
[-7.49120287e-24 -7.43495675e-24 -7.33722895e-24 ... -7.87227733e-24
-7.66850690e-24 -7.55316064e-24]
[-6.62339916e-24 -7.00430269e-24 -7.22697968e-24 ... -5.13241412e-24
-5.62753357e-24 -6.14297167e-24]
[-2.95495662e-24 -2.83991588e-24 -2.64129974e-24 ... -3.35524379e-24
-3.15960041e-24 -3.04178300e-24]], shape=(426, 240), dtype=float64)
0% (2 of 960) | | Elapsed Time: 0:00:03 ETA: 0:26:36Value of X: tf.Tensor(
[[-9.47390594e-25 8.49165771e-25 1.85097134e-24 ... -9.88883148e-24
-6.58483185e-24 -3.49179228e-24]
[-3.94139524e-24 -1.45189300e-24 4.41879845e-25 ... -1.34799377e-23
-1.02628610e-23 -6.95616006e-24]
[ 8.18336833e-24 9.84605465e-24 1.07562913e-23 ... -2.49403108e-25
2.86674937e-24 5.79328209e-24]
At lines 33 to 45 in https://github.com/frostburn/tf2-model-g/blob/master/render_video.py#L33
def make_video_frame(rgb, indexing='ij'):
if indexing == 'ij':
rgb = [tf.transpose(channel) for channel in rgb]
frame = tf.stack(rgb, axis=-1)
frame = tf.clip_by_value(frame, 0.0, 1.0)
return tf.cast(frame * 255, 'uint8').numpy()
def nucleation_and_motion_in_G_gradient_fluid_2D(writer, args, R=16):
dx = 2*R / args.height
x = (np.arange(args.width) - args.width // 2) * dx
y = (np.arange(args.height) - args.height // 2) * dx
x, y = np.meshgrid(x, y, indexing='ij')
How do I access the X values for all y values at the center of the image i.e. the soliton in 1-D?
This was solved by including numpy code: np.savetxt("test.txt", self.X) to save X values to an array text file, for 2nd unit of time, stepping through the program with break points.
See code here:
if self.dims == 2:
u, v = self.u, self.v # Store unintegrated flow so that we're on the same timestep
self.u, self.v, divergence = self.flow_integrator(rho, self.u, self.v)
self.G, self.X, self.Y = self.diffusion_advection_integrator(self.G, self.X, self.Y, u, v, divergence)
print("Value of X: ", self.X) # ***** BJD inserted this line 13.11.2020 *****
np.savetxt("test.txt", self.X) # BJD added 18.11.2020
https://github.com/bjdarrer/tf2-model-g/blob/master/fluid_model_g.py#L272
......................
A 1D plot was made of X values at the 2nd time unit. See attached plot and 1D values below.
1d plot of X values along y axis at row = 120
[ 7.94994701e-24 9.85394009e-24 1.16724732e-23 1.32964743e-23
1.46426920e-23 1.56451249e-23 1.62247381e-23 1.62714663e-23
1.56643109e-23 1.43263700e-23 1.22840384e-23 9.69274339e-24
6.80830998e-24 3.91114440e-24 1.21418229e-24 -1.20497031e-24
-3.41488779e-24 -5.57379433e-24 -7.82536116e-24 -1.01917593e-23
-1.25153317e-23 -1.44811319e-23 -1.57152112e-23 -1.59172105e-23
-1.49702335e-23 -1.29833925e-23 -1.02537849e-23 -7.16747715e-24
-4.07926538e-24 -1.21401588e-24 1.37888695e-24 3.80664014e-24
6.26134589e-24 8.92580731e-24 1.18936633e-23 1.51317529e-23
1.84919882e-23 2.17584411e-23 2.47040950e-23 2.71353081e-23
2.89152259e-23 2.99694201e-23 3.02806073e-23 2.98763983e-23
2.88124186e-23 2.71563628e-23 2.49810332e-23 2.23688269e-23
1.94188927e-23 1.62435883e-23 1.29512082e-23 9.62920834e-24
6.34788108e-24 3.18911971e-24 2.79939637e-25 -2.20131819e-24
-4.06380382e-24 -5.17455116e-24 -5.52257949e-24 -5.23938686e-24
-4.56365781e-24 -3.77035437e-24 -3.10239379e-24 -2.73563768e-24
-2.78161914e-24 -3.30750470e-24 -4.34786101e-24 -5.89968378e-24
-7.91404629e-24 -1.03041562e-23 -1.29750195e-23 -1.58584013e-23
-1.89286104e-23 -2.21875709e-23 -2.56302874e-23 -2.92145279e-23
-3.28513734e-23 -3.64143835e-23 -3.97525079e-23 -4.26964680e-23
-4.50643261e-23 -4.66827795e-23 -4.74343885e-23 -4.73207155e-23
-4.65131763e-23 -4.53633391e-23 -4.43660753e-23 -4.41000796e-23
-4.51905031e-23 -4.83331736e-23 -5.43906759e-23 -6.45346948e-23
-8.03882901e-23 -1.04128146e-22 -1.38534658e-22 -1.87007603e-22
-2.53577094e-22 -3.42926128e-22 -4.60412050e-22 -6.12048255e-22
-8.04399336e-22 -1.04435508e-21 -1.33877363e-21 -1.69401102e-21
-2.11537416e-21 -2.60654177e-21 -3.16899233e-21 -3.80146531e-21
-4.49947504e-21 -5.25490422e-21 -6.05572752e-21 -6.88594224e-21
-7.72579326e-21 -8.55235963e-21 -9.34051728e-21 -1.00642222e-20
-1.06979952e-20 -1.12184568e-20 -1.16057624e-20 -1.18448233e-20
-1.19262340e-20 -1.18468578e-20 -1.16100375e-20 -1.12254034e-20
-1.07082695e-20 -1.00786437e-20 -9.35992740e-21 -8.57742847e-21
-7.75683514e-21 -6.92280202e-21 -6.09777366e-21 -5.30112722e-21
-4.54865757e-21 -3.85237113e-21 -3.22051949e-21 -2.65780148e-21
-2.16568595e-21 -1.74283487e-21 -1.38561728e-21 -1.08869469e-21
-8.45638503e-22 -6.49524336e-22 -4.93445336e-22 -3.70897308e-22
-2.76011146e-22 -2.03638508e-22 -1.49328474e-22 -1.09254138e-22
-8.01470426e-23 -5.92710118e-23 -4.44273344e-23 -3.39526040e-23
-2.66679125e-23 -2.17655977e-23 -1.86595791e-23 -1.68510352e-23
-1.58556626e-23 -1.52061025e-23 -1.45052962e-23 -1.34870661e-23
-1.20474092e-23 -1.02332298e-23 -8.19661117e-24 -6.13229680e-24
-4.21647245e-24 -2.56316371e-24 -1.21193812e-24 -1.52531919e-25
6.23620809e-25 1.07140526e-24 1.07088324e-24 4.54160347e-25
-9.29919036e-25 -3.14710611e-24 -6.14247656e-24 -9.75582634e-24
-1.37745379e-23 -1.79907813e-23 -2.22338974e-23 -2.63725554e-23
-3.03058110e-23 -3.39677826e-23 -3.73524561e-23 -4.05382634e-23
-4.36805719e-23 -4.69567026e-23 -5.04815682e-23 -5.42360534e-23
-5.80453665e-23 -6.16147035e-23 -6.45977063e-23 -6.66618754e-23
-6.75291547e-23 -6.69951561e-23 -6.49463207e-23 -6.13893615e-23
-5.64868983e-23 -5.05742818e-23 -4.41308860e-23 -3.76978298e-23
-3.17621894e-23 -2.66481131e-23 -2.24555364e-23 -1.90673084e-23
-1.62172814e-23 -1.35911829e-23 -1.09278863e-23 -8.09715091e-24
-5.13973303e-24 -2.26083096e-24 2.26451201e-25 1.99600234e-24
2.80923930e-24 2.57837594e-24 1.36540430e-24 -6.73844499e-25
-3.35995393e-24 -6.53161970e-24 -1.00428155e-23 -1.37325142e-23
-1.73985771e-23 -2.07974339e-23 -2.36703294e-23 -2.57826699e-23
-2.69616894e-23 -2.71233461e-23 -2.62838895e-23 -2.45532783e-23
-2.21107261e-23 -1.91693737e-23 -1.59430731e-23 -1.26264947e-23
-9.38909951e-24 -6.37205823e-24 -3.67582761e-24 -1.33786645e-24
6.84229467e-25 2.50366362e-24 4.26174238e-24 6.06727554e-24]
import matplotlib.pyplot as pp # BJD added 18.11.2020
for n in progressbar.progressbar(range(args.num_frames)):
fluid_model_g.step()
if n % args.oversampling == 0:
rgb = [
6*(-fluid_model_g.G + max_G) / (max_G - min_G),
5*(fluid_model_g.Y - min_Y) / (max_Y - min_Y),
0.7*(fluid_model_g.X - min_X) / (max_X - min_X),
]
zero_line = 1 - tf.exp(-600 * fluid_model_g.Y**2)
frame = make_video_frame([c * zero_line for c in rgb])
writer.append_data(frame)
#=========================inserted code==================================================
y1 = np.loadtxt("test.txt") # shape of 2D array of X values = (426, 240)
row1 = y1[120] # row 120 of 2D array (426, 240)
print(row1)
pp.plot(row1)
pp.show()
#===========================================================================
See:
https://github.com/bjdarrer/tf2-model-g/blob/master/backups/render_video___X_seed__1d__backup_11-12-2020_1a.py#L158
I am trying to calculate AIC manually, but my function gives different scores compared to the LassoLarsIC score. Can someone tell me what is wrong with my calculation.
Here my function:
def aic(y_pred, y, k):
ll = (-1/(2*np.var(y)))*np.sum((y_pred-y)**2) - (len(y)/2)*np.log(np.var(y)) - (len(y)/2)*np.log(2*np.pi)
return -2*ll + 2*k
Thanks a lot
Edit:
My example is simple, here is the complete code:
X = np.array([0, 0.1111, 0.2222, 0.3333, 0.4444, 0.5556, 0.6667, 0.7778, 0.8889, 1]).reshape(-1, 1)
y = np.array([0.0528, 0.798 , 0.8486, 0.8719, 0.1732, -0.3629, -0.7528, -0.9985, -0.6727, -0.1197]).reshape(-1, 1)
poly = plf(9)
F = poly.fit_transform(X)[:, 1:]
scl = StandardScaler()
F = scl.fit_transform(F)
aic_lasso = LassoLarsIC(normalize=False)
aic_lasso.fit(F, y)
aic_lasso.criterion_
Output:
array([10. , 7.29642036, 8.9544056 , 7.06390981, 6.14233987,
7.96489293, 7.76894903, 7.61736515, 7.39575925, 7.25866825,
7.01418447, 6.90314784, 6.6465343 , 6.60361937, 8.12547536,
8.09620652, 8.09610375, 10.09599191, 12.0959849 , 12.09597075,
12.09596367, 12.09579736, 10.09579645, 10.09579616, 12.09579393,
12.09579199, 12.09579079, 14.09541338, 16.01988119])
y_pred = aic_lasso.predict(F)
aic(y_pred, y, 2)
Output:
146.42615433502792
K is 2 becuase, lasso sets the other coeff. to 0.
I guess this answer arrives way too late, but the mistake is that you use var(y) instead of std(residuals)
This will work
def aic(resid, nparams):
n = len (resid)
sig = np.std(resid)
ll = -n*np.log(sig*np.sqrt(np.pi*2))- np.sum((resid / sig)**2)/2
return float(2*nparams - 2*ll)
I try to implement Polynomial Regression with Gradient Descent. I want to fit the following function:
The code I use is:
import numpy as np
import matplotlib.pyplot as plt
import scipy.linalg
from sklearn.preprocessing import PolynomialFeatures
np.random.seed(seed=42)
def create_data():
x = PolynomialFeatures(degree=5).fit_transform(np.linspace(-10,10,100).reshape(100,-1))
l = lambda x_i: (1/3)*x_i**3-2*x_i**2+2*x_i+2
data = l(x[:,1])
noise = np.random.normal(0,0.1,size=np.shape(data))
y = data+noise
y= y.reshape(100,1)
return {'x':x,'y':y}
def plot_function(x,y):
fig = plt.figure(figsize=(10,10))
plt.plot(x[:,1],[(1/3)*x_i**3-2*x_i**2+2*x_i+2 for x_i in x[:,1]],c='lightgreen',linewidth=3,zorder=0)
plt.scatter(x[:,1],y)
plt.show()
def w_update(y,x,batch,w_old,eta):
derivative = np.sum([(y[i]-np.dot(w_old.T,x[i,:]))*x[i,:] for i in range(np.shape(x)[0])])
print(derivative)
return w_old+eta*(1/batch)*derivative
# initialize variables
w = np.random.normal(size=(6,1))
data = create_data()
x = data['x']
y = data['y']
plot_function(x,y)
# Update w
w_s = []
Error = []
for i in range(500):
error = (1/2)*np.sum([(y[i]-np.dot(w.T,x[i,:]))**2 for i in range(len(x))])
Error.append(error)
w_prime = w_update(y,x,np.shape(x)[0],w,0.001)
w = w_prime
w_s.append(w)
# Plot the predicted function
plt.plot(x[:,1],np.dot(x,w))
plt.show()
# Plot the error
fig3 = plt.figure()
plt.scatter(range(len(Error[10:])),Error[10:])
plt.show()
But as result I receive smth. strange which is completely out of bounds...I have also tried to alter the number of iterations as well as the parameter theta but it did not help. I assume I have made an mistake in the update of w.
I have found the solution. The Problem is indeed in the part where I calculate the weights. Specifically in:
np.sum([(y[d]-np.dot(w_old.T,x[d,:]))*x[d,:] for d in range(np.shape(x)[0])])
which should be like:
np.sum([-(y[d]-np.dot(w.T.copy(),x[d,:]))*x[d,:].reshape(np.shape(w)) for d in range(len(x))],axis=0)
We have to add np.sum(axis=0) to get the dimensionality we want --> Dimensionality must be equal to w. The numpy sum documentation sais
The default, axis=None, will sum all of the elements of the input
array.
This is not what we want to achieve. Adding axis = 0 sums over the first axis of our array which is of dimensionality (100,7,1) hence the 100 elements of dimensionality (7,1) are summed up and the resulting array is of dimensionality (7,1) which is exactly what we want. Implementing this and cleaning up the code yields:
import numpy as np
import matplotlib.pyplot as plt
import scipy.linalg
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import MinMaxScaler
np.random.seed(seed=42)
def create_data():
x = PolynomialFeatures(degree=6).fit_transform(np.linspace(-2,2,100).reshape(100,-1))
x[:,1:] = MinMaxScaler(feature_range=(-2,2),copy=False).fit_transform(x[:,1:])
l = lambda x_i: np.cos(0.8*np.pi*x_i)
data = l(x[:,1])
noise = np.random.normal(0,0.1,size=np.shape(data))
y = data+noise
y= y.reshape(100,1)
# Normalize Data
return {'x':x,'y':y}
def plot_function(x,y,w,Error,w_s):
fig,ax = plt.subplots(nrows=1,ncols=2,figsize=(40,10))
ax[0].plot(x[:,1],[np.cos(0.8*np.pi*x_i) for x_i in x[:,1]],c='lightgreen',linewidth=3,zorder=0)
ax[0].scatter(x[:,1],y)
ax[0].plot(x[:,1],np.dot(x,w))
ax[0].set_title('Function')
ax[1].scatter(range(iterations),Error)
ax[1].set_title('Error')
plt.show()
# initialize variables
data = create_data()
x = data['x']
y = data['y']
w = np.random.normal(size=(np.shape(x)[1],1))
eta = 0.1
iterations = 10000
batch = 10
def stochastic_gradient_descent(x,y,w,eta):
derivative = -(y-np.dot(w.T,x))*x.reshape(np.shape(w))
return eta*derivative
def batch_gradient_descent(x,y,w,eta):
derivative = np.sum([-(y[d]-np.dot(w.T.copy(),x[d,:]))*x[d,:].reshape(np.shape(w)) for d in range(len(x))],axis=0)
return eta*(1/len(x))*derivative
def mini_batch_gradient_descent(x,y,w,eta,batch):
gradient_sum = np.zeros(shape=np.shape(w))
for b in range(batch):
choice = np.random.choice(list(range(len(x))))
gradient_sum += -(y[choice]-np.dot(w.T,x[choice,:]))*x[choice,:].reshape(np.shape(w))
return eta*(1/batch)*gradient_sum
# Update w
w_s = []
Error = []
for i in range(iterations):
# Calculate error
error = (1/2)*np.sum([(y[i]-np.dot(w.T,x[i,:]))**2 for i in range(len(x))])
Error.append(error)
# Stochastic Gradient Descent
"""
for d in range(len(x)):
w-= stochastic_gradient_descent(x[d,:],y[d],w,eta)
w_s.append(w.copy())
"""
# Minibatch Gradient Descent
"""
w-= mini_batch_gradient_descent(x,y,w,eta,batch)
"""
# Batch Gradient Descent
w -= batch_gradient_descent(x,y,w,eta)
# Show predicted weights
print(w_s)
# Plot the predicted function and the Error
plot_function(x,y,w,Error,w_s)
As result we receive:
Which surely can be improved by altering eta and the number of iterations as well as switching to Stochastic or Mini Batch Gradient Descent or more sophisticated optimization algorithms.
I want to use caffe with a vector label, not integer. I have checked some answers, and it seems HDF5 is a better way. But then I'm stucked with error like:
accuracy_layer.cpp:34] Check failed: outer_num_ * inner_num_ == bottom[1]->count() (50 vs. 200) Number of labels must match number of predictions; e.g., if label axis == 1 and prediction shape is (N, C, H, W), label count (number of labels) must be N*H*W, with integer values in {0, 1, ..., C-1}.
with HDF5 created as:
f = h5py.File('train.h5', 'w')
f.create_dataset('data', (1200, 128), dtype='f8')
f.create_dataset('label', (1200, 4), dtype='f4')
My network is generated by:
def net(hdf5, batch_size):
n = caffe.NetSpec()
n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=2)
n.ip1 = L.InnerProduct(n.data, num_output=50, weight_filler=dict(type='xavier'))
n.relu1 = L.ReLU(n.ip1, in_place=True)
n.ip2 = L.InnerProduct(n.relu1, num_output=50, weight_filler=dict(type='xavier'))
n.relu2 = L.ReLU(n.ip2, in_place=True)
n.ip3 = L.InnerProduct(n.relu1, num_output=4, weight_filler=dict(type='xavier'))
n.accuracy = L.Accuracy(n.ip3, n.label)
n.loss = L.SoftmaxWithLoss(n.ip3, n.label)
return n.to_proto()
with open(PROJECT_HOME + 'auto_train.prototxt', 'w') as f:
f.write(str(net('/home/romulus/code/project/train.h5list', 50)))
with open(PROJECT_HOME + 'auto_test.prototxt', 'w') as f:
f.write(str(net('/home/romulus/code/project/test.h5list', 20)))
It seems I should increase label number and put things in integer rather than array, but if I do this, caffe complains number of data and label is not equal, then exists.
So, what is the correct format to feed multi label data?
Also, I'm so wondering why no one just simply write the data format how HDF5 maps to caffe blobs?
Answer to this question's title:
The HDF5 file should have two dataset in root, named "data" and "label", respectively. The shape is (data amount, dimension). I'm using only one-dimension data, so I'm not sure what's the order of channel, width, and height. Maybe it does not matter. dtype should be float or double.
A sample code creating train set with h5py is:
import h5py, os
import numpy as np
f = h5py.File('train.h5', 'w')
# 1200 data, each is a 128-dim vector
f.create_dataset('data', (1200, 128), dtype='f8')
# Data's labels, each is a 4-dim vector
f.create_dataset('label', (1200, 4), dtype='f4')
# Fill in something with fixed pattern
# Regularize values to between 0 and 1, or SigmoidCrossEntropyLoss will not work
for i in range(1200):
a = np.empty(128)
if i % 4 == 0:
for j in range(128):
a[j] = j / 128.0;
l = [1,0,0,0]
elif i % 4 == 1:
for j in range(128):
a[j] = (128 - j) / 128.0;
l = [1,0,1,0]
elif i % 4 == 2:
for j in range(128):
a[j] = (j % 6) / 128.0;
l = [0,1,1,0]
elif i % 4 == 3:
for j in range(128):
a[j] = (j % 4) * 4 / 128.0;
l = [1,0,1,1]
f['data'][i] = a
f['label'][i] = l
f.close()
Also, the accuracy layer is not needed, simply removing it is fine. Next problem is the loss layer. Since SoftmaxWithLoss has only one output (index of the dimension with max value), it can't be used for multi-label problem. Thank to Adian and Shai, I find SigmoidCrossEntropyLoss is good in this case.
Below is the full code, from data creation, training network, and getting test result:
main.py (modified from caffe lanet example)
import os, sys
PROJECT_HOME = '.../project/'
CAFFE_HOME = '.../caffe/'
os.chdir(PROJECT_HOME)
sys.path.insert(0, CAFFE_HOME + 'caffe/python')
import caffe, h5py
from pylab import *
from caffe import layers as L
def net(hdf5, batch_size):
n = caffe.NetSpec()
n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=2)
n.ip1 = L.InnerProduct(n.data, num_output=50, weight_filler=dict(type='xavier'))
n.relu1 = L.ReLU(n.ip1, in_place=True)
n.ip2 = L.InnerProduct(n.relu1, num_output=50, weight_filler=dict(type='xavier'))
n.relu2 = L.ReLU(n.ip2, in_place=True)
n.ip3 = L.InnerProduct(n.relu2, num_output=4, weight_filler=dict(type='xavier'))
n.loss = L.SigmoidCrossEntropyLoss(n.ip3, n.label)
return n.to_proto()
with open(PROJECT_HOME + 'auto_train.prototxt', 'w') as f:
f.write(str(net(PROJECT_HOME + 'train.h5list', 50)))
with open(PROJECT_HOME + 'auto_test.prototxt', 'w') as f:
f.write(str(net(PROJECT_HOME + 'test.h5list', 20)))
caffe.set_device(0)
caffe.set_mode_gpu()
solver = caffe.SGDSolver(PROJECT_HOME + 'auto_solver.prototxt')
solver.net.forward()
solver.test_nets[0].forward()
solver.step(1)
niter = 200
test_interval = 10
train_loss = zeros(niter)
test_acc = zeros(int(np.ceil(niter * 1.0 / test_interval)))
print len(test_acc)
output = zeros((niter, 8, 4))
# The main solver loop
for it in range(niter):
solver.step(1) # SGD by Caffe
train_loss[it] = solver.net.blobs['loss'].data
solver.test_nets[0].forward(start='data')
output[it] = solver.test_nets[0].blobs['ip3'].data[:8]
if it % test_interval == 0:
print 'Iteration', it, 'testing...'
correct = 0
data = solver.test_nets[0].blobs['ip3'].data
label = solver.test_nets[0].blobs['label'].data
for test_it in range(100):
solver.test_nets[0].forward()
# Positive values map to label 1, while negative values map to label 0
for i in range(len(data)):
for j in range(len(data[i])):
if data[i][j] > 0 and label[i][j] == 1:
correct += 1
elif data[i][j] %lt;= 0 and label[i][j] == 0:
correct += 1
test_acc[int(it / test_interval)] = correct * 1.0 / (len(data) * len(data[0]) * 100)
# Train and test done, outputing convege graph
_, ax1 = subplots()
ax2 = ax1.twinx()
ax1.plot(arange(niter), train_loss)
ax2.plot(test_interval * arange(len(test_acc)), test_acc, 'r')
ax1.set_xlabel('iteration')
ax1.set_ylabel('train loss')
ax2.set_ylabel('test accuracy')
_.savefig('converge.png')
# Check the result of last batch
print solver.test_nets[0].blobs['ip3'].data
print solver.test_nets[0].blobs['label'].data
h5list files simply contain paths of h5 files in each line:
train.h5list
/home/foo/bar/project/train.h5
test.h5list
/home/foo/bar/project/test.h5
and the solver:
auto_solver.prototxt
train_net: "auto_train.prototxt"
test_net: "auto_test.prototxt"
test_iter: 10
test_interval: 20
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
lr_policy: "inv"
gamma: 0.0001
power: 0.75
display: 100
max_iter: 10000
snapshot: 5000
snapshot_prefix: "sed"
solver_mode: GPU
Converge graph:
Last batch result:
[[ 35.91593933 -37.46276474 -6.2579031 -6.30313492]
[ 42.69248581 -43.00864792 13.19664764 -3.35134125]
[ -1.36403108 1.38531208 2.77786589 -0.34310576]
[ 2.91686511 -2.88944006 4.34043217 0.32656598]
...
[ 35.91593933 -37.46276474 -6.2579031 -6.30313492]
[ 42.69248581 -43.00864792 13.19664764 -3.35134125]
[ -1.36403108 1.38531208 2.77786589 -0.34310576]
[ 2.91686511 -2.88944006 4.34043217 0.32656598]]
[[ 1. 0. 0. 0.]
[ 1. 0. 1. 0.]
[ 0. 1. 1. 0.]
[ 1. 0. 1. 1.]
...
[ 1. 0. 0. 0.]
[ 1. 0. 1. 0.]
[ 0. 1. 1. 0.]
[ 1. 0. 1. 1.]]
I think this code still has many things to improve. Any suggestion is appreciated.
Your accuracy layer makes no sense.
The way accuracy layer works: in caffe accuracy layer expects two inputs
(i) a predicted probability vector and
(ii) ground-truth corresponding scalar integer label.
The accuracy layer than checks if the probability of the predicted label is indeed the maximal (or within top_k).
Therefore if you have to classify C different classes, your inputs are going to be N-by-C (where N is batch size) input predicted probabilities for N samples belonging to each of the C classes, and N labels.
The way it is defined in your net: You input accuracy layer N-by-4 predictions and N-by-4 labels -- this makes no sense for caffe.