I want to use caffe with a vector label, not integer. I have checked some answers, and it seems HDF5 is a better way. But then I'm stucked with error like:
accuracy_layer.cpp:34] Check failed: outer_num_ * inner_num_ == bottom[1]->count() (50 vs. 200) Number of labels must match number of predictions; e.g., if label axis == 1 and prediction shape is (N, C, H, W), label count (number of labels) must be N*H*W, with integer values in {0, 1, ..., C-1}.
with HDF5 created as:
f = h5py.File('train.h5', 'w')
f.create_dataset('data', (1200, 128), dtype='f8')
f.create_dataset('label', (1200, 4), dtype='f4')
My network is generated by:
def net(hdf5, batch_size):
n = caffe.NetSpec()
n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=2)
n.ip1 = L.InnerProduct(n.data, num_output=50, weight_filler=dict(type='xavier'))
n.relu1 = L.ReLU(n.ip1, in_place=True)
n.ip2 = L.InnerProduct(n.relu1, num_output=50, weight_filler=dict(type='xavier'))
n.relu2 = L.ReLU(n.ip2, in_place=True)
n.ip3 = L.InnerProduct(n.relu1, num_output=4, weight_filler=dict(type='xavier'))
n.accuracy = L.Accuracy(n.ip3, n.label)
n.loss = L.SoftmaxWithLoss(n.ip3, n.label)
return n.to_proto()
with open(PROJECT_HOME + 'auto_train.prototxt', 'w') as f:
f.write(str(net('/home/romulus/code/project/train.h5list', 50)))
with open(PROJECT_HOME + 'auto_test.prototxt', 'w') as f:
f.write(str(net('/home/romulus/code/project/test.h5list', 20)))
It seems I should increase label number and put things in integer rather than array, but if I do this, caffe complains number of data and label is not equal, then exists.
So, what is the correct format to feed multi label data?
Also, I'm so wondering why no one just simply write the data format how HDF5 maps to caffe blobs?
Answer to this question's title:
The HDF5 file should have two dataset in root, named "data" and "label", respectively. The shape is (data amount, dimension). I'm using only one-dimension data, so I'm not sure what's the order of channel, width, and height. Maybe it does not matter. dtype should be float or double.
A sample code creating train set with h5py is:
import h5py, os
import numpy as np
f = h5py.File('train.h5', 'w')
# 1200 data, each is a 128-dim vector
f.create_dataset('data', (1200, 128), dtype='f8')
# Data's labels, each is a 4-dim vector
f.create_dataset('label', (1200, 4), dtype='f4')
# Fill in something with fixed pattern
# Regularize values to between 0 and 1, or SigmoidCrossEntropyLoss will not work
for i in range(1200):
a = np.empty(128)
if i % 4 == 0:
for j in range(128):
a[j] = j / 128.0;
l = [1,0,0,0]
elif i % 4 == 1:
for j in range(128):
a[j] = (128 - j) / 128.0;
l = [1,0,1,0]
elif i % 4 == 2:
for j in range(128):
a[j] = (j % 6) / 128.0;
l = [0,1,1,0]
elif i % 4 == 3:
for j in range(128):
a[j] = (j % 4) * 4 / 128.0;
l = [1,0,1,1]
f['data'][i] = a
f['label'][i] = l
Also, the accuracy layer is not needed, simply removing it is fine. Next problem is the loss layer. Since SoftmaxWithLoss has only one output (index of the dimension with max value), it can't be used for multi-label problem. Thank to Adian and Shai, I find SigmoidCrossEntropyLoss is good in this case.
Below is the full code, from data creation, training network, and getting test result:
main.py (modified from caffe lanet example)
import os, sys
PROJECT_HOME = '.../project/'
CAFFE_HOME = '.../caffe/'
sys.path.insert(0, CAFFE_HOME + 'caffe/python')
import caffe, h5py
from pylab import *
from caffe import layers as L
def net(hdf5, batch_size):
n = caffe.NetSpec()
n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=2)
n.ip1 = L.InnerProduct(n.data, num_output=50, weight_filler=dict(type='xavier'))
n.relu1 = L.ReLU(n.ip1, in_place=True)
n.ip2 = L.InnerProduct(n.relu1, num_output=50, weight_filler=dict(type='xavier'))
n.relu2 = L.ReLU(n.ip2, in_place=True)
n.ip3 = L.InnerProduct(n.relu2, num_output=4, weight_filler=dict(type='xavier'))
n.loss = L.SigmoidCrossEntropyLoss(n.ip3, n.label)
return n.to_proto()
with open(PROJECT_HOME + 'auto_train.prototxt', 'w') as f:
f.write(str(net(PROJECT_HOME + 'train.h5list', 50)))
with open(PROJECT_HOME + 'auto_test.prototxt', 'w') as f:
f.write(str(net(PROJECT_HOME + 'test.h5list', 20)))
solver = caffe.SGDSolver(PROJECT_HOME + 'auto_solver.prototxt')
niter = 200
test_interval = 10
train_loss = zeros(niter)
test_acc = zeros(int(np.ceil(niter * 1.0 / test_interval)))
print len(test_acc)
output = zeros((niter, 8, 4))
# The main solver loop
for it in range(niter):
solver.step(1) # SGD by Caffe
train_loss[it] = solver.net.blobs['loss'].data
output[it] = solver.test_nets[0].blobs['ip3'].data[:8]
if it % test_interval == 0:
print 'Iteration', it, 'testing...'
correct = 0
data = solver.test_nets[0].blobs['ip3'].data
label = solver.test_nets[0].blobs['label'].data
for test_it in range(100):
# Positive values map to label 1, while negative values map to label 0
for i in range(len(data)):
for j in range(len(data[i])):
if data[i][j] > 0 and label[i][j] == 1:
correct += 1
elif data[i][j] %lt;= 0 and label[i][j] == 0:
correct += 1
test_acc[int(it / test_interval)] = correct * 1.0 / (len(data) * len(data[0]) * 100)
# Train and test done, outputing convege graph
_, ax1 = subplots()
ax2 = ax1.twinx()
ax1.plot(arange(niter), train_loss)
ax2.plot(test_interval * arange(len(test_acc)), test_acc, 'r')
ax1.set_ylabel('train loss')
ax2.set_ylabel('test accuracy')
# Check the result of last batch
print solver.test_nets[0].blobs['ip3'].data
print solver.test_nets[0].blobs['label'].data
h5list files simply contain paths of h5 files in each line:
and the solver:
train_net: "auto_train.prototxt"
test_net: "auto_test.prototxt"
test_iter: 10
test_interval: 20
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
lr_policy: "inv"
gamma: 0.0001
power: 0.75
display: 100
max_iter: 10000
snapshot: 5000
snapshot_prefix: "sed"
solver_mode: GPU
Converge graph:
Last batch result:
[[ 35.91593933 -37.46276474 -6.2579031 -6.30313492]
[ 42.69248581 -43.00864792 13.19664764 -3.35134125]
[ -1.36403108 1.38531208 2.77786589 -0.34310576]
[ 2.91686511 -2.88944006 4.34043217 0.32656598]
[ 35.91593933 -37.46276474 -6.2579031 -6.30313492]
[ 42.69248581 -43.00864792 13.19664764 -3.35134125]
[ -1.36403108 1.38531208 2.77786589 -0.34310576]
[ 2.91686511 -2.88944006 4.34043217 0.32656598]]
[[ 1. 0. 0. 0.]
[ 1. 0. 1. 0.]
[ 0. 1. 1. 0.]
[ 1. 0. 1. 1.]
[ 1. 0. 0. 0.]
[ 1. 0. 1. 0.]
[ 0. 1. 1. 0.]
[ 1. 0. 1. 1.]]
I think this code still has many things to improve. Any suggestion is appreciated.
Your accuracy layer makes no sense.
The way accuracy layer works: in caffe accuracy layer expects two inputs
(i) a predicted probability vector and
(ii) ground-truth corresponding scalar integer label.
The accuracy layer than checks if the probability of the predicted label is indeed the maximal (or within top_k).
Therefore if you have to classify C different classes, your inputs are going to be N-by-C (where N is batch size) input predicted probabilities for N samples belonging to each of the C classes, and N labels.
The way it is defined in your net: You input accuracy layer N-by-4 predictions and N-by-4 labels -- this makes no sense for caffe.
I made an AI that uses the Adaline algorythm. It seemed to work but it results in an overflow if it's used with my own dataset.
Here's the code:
import sys
import numpy as np
from random import choice
import matplotlib.pyplot as plt
from sklearn.base import BaseEstimator,ClassifierMixin
from sklearn.utils.validation import check_X_y,check_array,check_is_fitted,check_random_state
from sklearn.utils.multiclass import unique_labels
class AdalineEstimator(BaseEstimator,ClassifierMixin):
def __init__(self,eta=.001,n_iterations=500,random_state=None,):
:param eta: Learning rate
:param n_iterations: number of iterations to go through the dataset
:param random_state: you should not change that
# creating arrays
self.w=[] # creating the weight array
self.wAll=[] # Weights for plotting.
def net_i(self,x):
return np.dot(x,self.w) # input array * weight array
def activation(self,x):
# Linear activation function
return self.net_i(x)
def output(self,x):
# Heaviside function
if self.activation(x) >= 0.0:
return 1
return -1
def fit(self,X=None,y=None):
The learning function. Adjusts the weights.
:param x: Input array
:param y: Answers
random_state=check_random_state(self.random_state) # checking the random_state
X, y = check_X_y(X, y) # checking X and y
for i in range(self.n_iterations):
# Choosing a random user out of the array
print("S is: ",s)
if s != s:
print("nan encountered in S")
error=(y_ -s)**2
self.w+=self.eta * x_ * (y_-s)
def predict(self,x):
check_is_fitted(self,['X_','y_']) # Was the model trained before?
return y_hat
def plot(self):
""" Ausgabe des Fehlers und der Lernkurve
Die im Fehlerarray gespeicherten Fehler als Grafik ausgeben
Die Trenngeraden aus den gespeicherten Gewichten ausgeben
x1 = []
x2 = []
colors = []
for i in range(self.X_.shape[0]):
y = self.y_[i]
if y == 1:
colors.append('r') # rot
colors.append('b') # blau
# Raster
# Errors
# Learning Curve
# Scatter
plt.scatter(x1, x2, c=colors)
# Result Line
x1Line = np.linspace(0.0, 1.0, 2)
x2Line = lambda x1, w0, w1, w2: (-x1 * w1 - w0) / w2;
alpha = 0.0
for idx, weight in enumerate(self.wAll):
# alpha = Transparenz, je näher zum Ziel desto dunkler
if (idx % 100 == 0):
alpha = 1.0 # ( idx / len(self.wAll) )
plt.plot(x1Line, x2Line(x1Line, weight[0], weight[1], weight[2]), alpha=alpha, linestyle='solid',
label=str(idx), linewidth=1.5)
# Ergebnisgerade
plt.plot(x1Line, x2Line(x1Line, weight[0], weight[1], weight[2]), alpha=alpha, linestyle='solid',
label=str(idx), linewidth=2.0)
plt.legend(loc='best', shadow=True)
data = []
with open('data.txt') as file:
for line in file:
Adaline = AdalineEstimator(eta=0.01, n_iterations=200, random_state=10)
Adaline.fit(X, y)
And the dataset is here: https://pastebin.com/Vziav3Q9
The error message is somehow on the beginning (I am using pycharm at the moment, not sure if related) of the output.
overflow encountered in double_scalars
error=(y_ -s)**2
And then:
RuntimeWarning: invalid value encountered in multiply
self.w+=self.eta * x_ * (y_-s)
How can I fix this?
This is an example:
S is: 1.9288464662803013e+290
[0. 0. 0. 0. 0.]
S is: 0.0
[ 433000. 18000. 6369000. 0. 0.]
S is: -8.776351721574362e+301
[ 5000. 0. 26000. 0. 0.]
S is: inf
[0. 0. 0. 0. 0.]
S is: nan
S is the weighted x:
Eta should be lowered as the weights are too large (I first thought that the inputs are too large but they aren't larger than one million.
eta=0.00000001 does it. 100% correct (400 iterations needed).
Dear jax experts I need your kind help.
Here is a working example (I have follow the advise to simplify my code, although I am not an expert on jax neither on Python to guess what is the heart of the mechanism involved in vmap)
def jax_kernel(rng_key, logpdf, position, log_prob):
key, subkey = jax.random.split(rng_key)
move_proposals = jax.random.normal(key, shape=position.shape)* 0.1
proposal = position + move_proposals
proposal_log_prob = logpdf(proposal)
return proposal, proposal_log_prob
def jax_sampler(rng_key, n_samples, logpdf, initial_position):
def mh_update(i, state):
key, positions, log_prob = state
_, key = jax.random.split(key)
print(f"mh_update: positions[{i-1}]:",jnp.asarray(positions[i-1]))
new_position, new_log_prob = jax_kernel(key,logpdf,positions[i-1],log_prob)
return (key, positions, new_log_prob)
# all positions structure should be set before lax.fori_loop
print("initial_position shape:",initial_position.shape)
all_positions = jnp.zeros((n_samples,)+initial_position.shape)
print("all_positions init:",all_positions.shape)
logp = logpdf(all_positions[0])
# use of a for-loop to be able to debug mh_update instead of a jax.fori_loop
initial_state = (rng_key,all_positions, logp)
val = initial_state
for i in range(1, n_samples):
val = mh_update(i, val)
rng_key, all_positions, log_prob = val
# return all the positions of the parameters (n_chains, n_samples, n_dim)
return all_positions
def func(par):
xi = jnp.asarray(sci_stats.uniform.rvs(size=10))
val = xi*par[1]+par[0]
return jnp.sum(jax.scipy.stats.norm.logpdf(x=val,loc=yi,scale=par[2]))
n_dim = 3 # number of parameters ie. (a,b,s)
n_samples = 5 # number of samples per chain
n_chains = 4 # number of MCMC chains
rng_key = jax.random.PRNGKey(42)
rng_keys = jax.random.split(rng_key, n_chains)
initial_position = jnp.ones((n_dim, n_chains))
print("main initial_position shape",initial_position.shape)
run = jax.vmap(jax_sampler, in_axes=(0, None, None, 1), out_axes=0)
all_positions = run(rng_keys,n_samples,lambda p: func(p),initial_position)
Then my question concerns the dimension evolution print(f"mh_update: positions[{i-1}]:",jnp.asarray(positions[i-1])). I do not understand why positions[i-1]starts with dimension n_dim and then switches to n_chains x n_dim?
Thanks in advance for your comments?
Here is the complete output:
main initial_position shape (3, 4)
initial_position shape: (3,)
all_positions init: (5, 3)
mh_update: positions[0]: [1. 2. 2.]
mh_update: positions[1]: Traced<ShapedArray(float32[3])>with<BatchTrace(level=1/0)>
with val = DeviceArray([[0.9354116 , 1.7876872 , 1.8443539 ],
[0.9844745 , 2.073029 , 1.9511036 ],
[0.98202926, 2.0109322 , 2.094176 ],
[0.9536771 , 1.9731759 , 2.093319 ]], dtype=float32)
batch_dim = 0
mh_update: positions[2]: Traced<ShapedArray(float32[3])>with<BatchTrace(level=1/0)>
with val = DeviceArray([[1.0606856, 1.6707807, 1.8377957],
[1.0465866, 1.9754674, 1.7009288],
[1.1107644, 2.0142047, 2.190575 ],
[1.0089972, 1.9953227, 1.996874 ]], dtype=float32)
batch_dim = 0
mh_update: positions[3]: Traced<ShapedArray(float32[3])>with<BatchTrace(level=1/0)>
with val = DeviceArray([[1.0731456, 1.644405 , 2.1343162],
[1.0599504, 2.0121546, 1.6867112],
[1.0585173, 1.9661485, 2.1573594],
[1.1213307, 1.9335203, 1.9683584]], dtype=float32)
batch_dim = 0
all_positions: [[[1. 2. 2. ]
[0.9354116 1.7876872 1.8443539 ]
[1.0606856 1.6707807 1.8377957 ]
[1.0731456 1.644405 2.1343162 ]
[1.0921828 1.5742197 2.058759 ]]
[[1. 2. 2. ]
[0.9844745 2.073029 1.9511036 ]
[1.0465866 1.9754674 1.7009288 ]
[1.0599504 2.0121546 1.6867112 ]
[1.0835105 2.0051234 1.4766487 ]]
[[1. 2. 2. ]
[0.98202926 2.0109322 2.094176 ]
[1.1107644 2.0142047 2.190575 ]
[1.0585173 1.9661485 2.1573594 ]
[1.1728328 1.981367 2.180744 ]]
[[1. 2. 2. ]
[0.9536771 1.9731759 2.093319 ]
[1.0089972 1.9953227 1.996874 ]
[1.1213307 1.9335203 1.9683584 ]
[1.1148386 1.9598911 2.1721165 ]]]
In the first iteration, you print a concrete array that you have constructed within a vmapped function. It is a float32 array of shape (3,).
After the first iteration, you've constructed a new array via operations on a vmapped input. When you vmap an input like this, JAX replaces your input array with a tracer that is an abstract representation of your input; the printed value looks like this:
with val = DeviceArray([[1.0731456, 1.644405 , 2.1343162],
[1.0599504, 2.0121546, 1.6867112],
[1.0585173, 1.9661485, 2.1573594],
[1.1213307, 1.9335203, 1.9683584]], dtype=float32)
The float32[3] indicates that this tracer represents an array of float32 values of shape (3,): that is, it still has the same type and shape as in the first iteration. But in this case it is not a concrete array with three elements, it is a batched tracer representing each iteration of the vmapped input. The power of the vmap transform is that JAX effectively tracks all implied iterations of the vmapped computation in one pass: in the tracer representation, the rows of val effectively show you the intermediate values for all the vmapped iterations.
For more understanding of how JAX tracing works, a good read is How To Think In JAX in the JAX documentation.
With the following code, I am trying to make cross-validation, splitting into k = 5 folds, and then picking one of those for validation, and the remaining for training.
# size of the different sets:
# features (800,2)
# labels (800,)
lambdaValues = [0.001, 0.01, 0.1, 1, 10, 100]
nFolds = 5
foldSize = int(len(features)/nFolds)
for i in range(len(lambdaValues)):
for ii in range(0, len(features), foldSize):
pick_training = np.array(features)
pick_validation = np.zeros((foldSize, 2))
training_labels = labels
validation_labels = np.zeros((foldSize, 2))
k = ii
# perform k-fold split (here a five-fold split)
for n in range(foldSize):
pick_validation[n] = pick_training[k]
validation_labels[n] = training_labels[k]
pick_training = np.delete(pick_training, k, axis=0)
training_labels = np.delete(training_labels, k, axis=0)
# do something (cross-validate)
# do something (cross-validate)
# do something (cross-validate)
print(pick_training.shape) # expected size (160, 2)
print(pick_validation.shape) # expected size (640, 2)
However, right now when I run it (still missing some actual computations) I get the following error:
I cant quiet understand why this happens, as the values of n, k are correct, and the size of pick_validation is (160,2) and pick_training is (640,2).
Can anyone hint to me why it would say that index 720 is out of bounds? (given the sizes of the arrays above where neither is of size 720).
I need to create a generator for my data to pass into my RNN training function. I have a list of patient samples, where each sample is a time series of length ni (which varies, annoyingly) in three dimensions, and I want to create batches of data where each sample in a batch only belongs to a single patient but each batch may contain multiple patient samples. Doing it this way should maximise the number of samples I can train using with no consequences as my RNN is not stateful. At first I had the following function
def dataIterator(rawDataList, config):
batchSize, nSteps = config.batchSize, config.nSteps
for rawData in rawDataList:
dataLen, dataWidth = rawData.shape
batchLen = dataLen // batchSize
data = np.zeros([batchSize, batchLen, dataWidth], dtype=np.float32)
for i in xrange(batchSize):
data[i] = rawData[batchLen*i:batchLen*(i+1), :]
epochSize = (batchLen - 1) // nSteps
if epochSize == 0:
raise ValueError('epoch_size == 0')
for i in xrange(epochSize):
x = data[:, i*nSteps:(i+1)*nSteps, :]
y = data[:, i*nSteps+1:(i+1)*nSteps+1, :]
yield (x, y)
However this trims each of the patient samples in order to fit the batch size. So I want something that creates all possible batches, including the undersized one at the end. However my unfamiliarity with generators has left me pretty confused. So far I've worked out it's going to have to use modulo aritmetic, but exactly how I'm not sure, so I've only got to this point:
def dataIterator(data, batchSize=batchSize, nSteps=nSteps, nDimensions=3):
nTimePoints = sum([len(x) for x in data])
totalBatchLen = 1+(nTimePoints-1)//batchSize
newData = np.zeros([batchSize, totalBatchLen, nDimensions])
for i in xrange(batchSize):
Here's a short example to show how I would solve the problem without using generators
import numpy as np
nPatients = 3
tsLength = 5
nDimensions = 3
rnnTSLength = 3
batchSize = 3
inputData = np.random.random((nPatients, tsLength, nDimensions))
inputData[1, :, :] *= 10
inputData[2, :, :] *= 100
outputData = []
for i in xrange(tsLength-rnnTSLength):
outputData.append(inputData[0, i:i+rnnTSLength, :])
for i in xrange(tsLength-rnnTSLength):
outputData.append(inputData[1, i:i+rnnTSLength, :])
for i in xrange(tsLength-rnnTSLength):
outputData.append(inputData[2, i:i+rnnTSLength, :])
temp1 = np.array(outputData[:3])
temp2 = np.array(outputData[3:])
npOutput = np.array((temp1, temp2))
print npOutput
Which produces:
[[[[ 3.74540119e-01 9.50714306e-01 7.31993942e-01]
[ 5.98658484e-01 1.56018640e-01 1.55994520e-01]
[ 5.80836122e-02 8.66176146e-01 6.01115012e-01]]
[[ 5.98658484e-01 1.56018640e-01 1.55994520e-01]
[ 5.80836122e-02 8.66176146e-01 6.01115012e-01]
[ 7.08072578e-01 2.05844943e-02 9.69909852e-01]]
[[ 1.83404510e+00 3.04242243e+00 5.24756432e+00]
[ 4.31945019e+00 2.91229140e+00 6.11852895e+00]
[ 1.39493861e+00 2.92144649e+00 3.66361843e+00]]]
[[[ 4.31945019e+00 2.91229140e+00 6.11852895e+00]
[ 1.39493861e+00 2.92144649e+00 3.66361843e+00]
[ 4.56069984e+00 7.85175961e+00 1.99673782e+00]]
[[ 6.07544852e+01 1.70524124e+01 6.50515930e+00]
[ 9.48885537e+01 9.65632033e+01 8.08397348e+01]
[ 3.04613769e+01 9.76721140e+00 6.84233027e+01]]
[[ 9.48885537e+01 9.65632033e+01 8.08397348e+01]
[ 3.04613769e+01 9.76721140e+00 6.84233027e+01]
[ 4.40152494e+01 1.22038235e+01 4.95176910e+01]]]]
Which as you can see has two batches of size three, both of which contain two different 'patients' in them, but the time series for each 'patient' do not overlap.
It's not exactly clear what you are looking for. A small sample of input and desired output would help. Nevertheless, I'll take a stab at what I think you are asking:
def dataIterator(data, batchSize=batchSize):
for patient_data in data:
for n in range(0, len(patient_data), batchSize):
yield patient_data[n:n+batchSize]
I have a neural network (NN) which works perfectly when applied to a single data set. However if I want to run the NN on, for example, one set of data and then create a new instance of the NN to run on different set of data (or even the same set again) then the new instance will produce completely incorrect predictions.
For example, training on an XOR pattern:
data = [[[0,0], [0]],[[0,1], [0]],[[1,0], [0]],[[1,1], [1]]]
n = NN(2, 3, 1) # Create a neural network with 2 input, 3 hidden and 1 output nodes
n.train(data,500,0.5,0) # Train it for 500 iterations with learning rate 0.5 and momentum 0
prediction = np.zeros((len(test)))
for row in range(len(test)):
prediction[row] = n.runNetwork(test[row])[0]
print prediction
# Now do the same thing again but with a new instance and new version of the data.
data2 = [[[0,0], [0]],[[0,1], [0]],[[1,0], [0]],[[1,1], [1]]]
p = NN(2, 3, 1)
prediction2 = np.zeros((len(test2)))
for row in range(len(test2)):
prediction2[row] = p.runNetwork(test2[row])[0]
print prediction2
Will output:
[-0.01 -0. -0.06 0.97]
[ 0. 0. 1. 1.]
Notice that the first prediction is quite good where as the second is completely wrong, and I can't see anything wrong with the class:
import math
import random
import itertools
import numpy as np
def rand(a, b):
return (b-a)*random.random() + a
def sigmoid(x):
return math.tanh(x)
def dsigmoid(y):
return 1.0 - y**2
class NN:
def __init__(self, ni, nh, no):
# number of input, hidden, and output nodes
self.ni = ni + 1 # +1 for bias node
self.nh = nh + 1
self.no = no
# activations for nodes
self.ai = [1.0]*self.ni
self.ah = [1.0]*self.nh
self.ao = [1.0]*self.no
# create weights (rows=number of features, columns=number of processing nodes)
self.wi = np.zeros((self.ni, self.nh))
self.wo = np.zeros((self.nh, self.no))
# set them to random vaules
for i in range(self.ni):
for j in range(self.nh):
self.wi[i][j] = rand(-5, 5)
for j in range(self.nh):
for k in range(self.no):
self.wo[j][k] = rand(-5, 5)
# last change in weights for momentum
self.ci = np.zeros((self.ni, self.nh))
self.co = np.zeros((self.nh, self.no))
def runNetwork(self, inputs):
if len(inputs) != self.ni-1:
raise ValueError('wrong number of inputs')
# input activations
for i in range(self.ni-1):
#self.ai[i] = sigmoid(inputs[i])
self.ai[i] = inputs[i]
# hidden activations
for j in range(self.nh-1):
sum = 0.0
for i in range(self.ni):
sum = sum + self.ai[i] * self.wi[i][j]
self.ah[j] = sigmoid(sum)
# output activations
for k in range(self.no):
sum = 0.0
for j in range(self.nh):
sum = sum + self.ah[j] * self.wo[j][k]
self.ao[k] = sigmoid(sum)
ao_simplified = [round(a,2) for a in self.ao[:]]
return ao_simplified
def backPropagate(self, targets, N, M):
if len(targets) != self.no:
raise ValueError('wrong number of target values')
# calculate error terms for output
output_deltas = [0.0] * self.no
for k in range(self.no):
error = targets[k]-self.ao[k]
output_deltas[k] = dsigmoid(self.ao[k]) * error
# calculate error terms for hidden
hidden_deltas = [0.0] * self.nh
for j in range(self.nh):
error = 0.0
for k in range(self.no):
error = error + output_deltas[k]*self.wo[j][k]
hidden_deltas[j] = dsigmoid(self.ah[j]) * error
# update output weights
for j in range(self.nh):
for k in range(self.no):
change = output_deltas[k]*self.ah[j]
self.wo[j][k] = self.wo[j][k] + N*change + M*self.co[j][k]
self.co[j][k] = change
#print N*change, M*self.co[j][k]
# update input weights
for i in range(self.ni):
for j in range(self.nh):
change = hidden_deltas[j]*self.ai[i]
self.wi[i][j] = self.wi[i][j] + N*change + M*self.ci[i][j]
self.ci[i][j] = change
# calculate error
error = 0.0
for k in range(len(targets)):
error = error + 0.5*(targets[k]-self.ao[k])**2
return error
def train(self, patterns, iterations=1000, N=0.5, M=0.1):
# N: learning rate
# M: momentum factor
for i in range(iterations):
error = 0.0
for p in patterns:
inputs = p[0]
targets = p[1]
error = error + self.backPropagate(targets, N, M)
if i % 100 == 0: # Prints error every 100 iterations
print('error %-.5f' % error)
Any help would be greatly appreciated!
Your error -- if there is one -- doesn't have anything to do with the class. As #Daniel Roseman suggested, the natural guess would be that it was a class/instance variable issue, or maybe a mutable default argument, or multiplication of a list, or something, the most common causes of mysterious behaviour.
Here, though, you're getting different results only because you're using different random numbers each time. If you random.seed(0) before you call NN(2,3,1), you get exactly the same results:
error 2.68110
error 0.44049
error 0.39256
error 0.26315
error 0.00584
[ 0.01 0.01 0.07 0.97]
error 2.68110
error 0.44049
error 0.39256
error 0.26315
error 0.00584
[ 0.01 0.01 0.07 0.97]
I can't judge whether your algorithm is right. Incidentally, I think your rand function is reinventing random.uniform.