The post is directed to user's who have used the library deepxde. May you kindly help me in finding the solution to my problem.
I tried the DeepOnet operator learning example according to the dataset generated from grf from this tutorial website link . Now I want to apply the trained model on a known pair of function like cos(x) and sin(x) ; I'll generate my input data-field data as x = np.arange(0,4*np.pi,0.1) ; u(y) = y = np.cos(x) now I want to plot the anti-derivate of my u(y) and compare it with y = np.cos(x).
What additional code shall I implement in order to do so?
The following code runs well on my colab after installing the deepxde library with pip. Now I just want to extend this and check whether the trained model works okay on my custom fucntions or not? for example cosx , sinx etc. Thanks for the help!
import deepxde as dde
import matplotlib.pyplot as plt
import numpy as np
# Load dataset
d = np.load("antiderivative_aligned_train.npz", allow_pickle=True)
X_train = (d["X"][0].astype(np.float32), d["X"][1].astype(np.float32))
y_train = d["y"].astype(np.float32)
d = np.load("antiderivative_aligned_test.npz", allow_pickle=True)
X_test = (d["X"][0].astype(np.float32), d["X"][1].astype(np.float32))
y_test = d["y"].astype(np.float32)
data = dde.data.TripleCartesianProd(
X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
)
# Choose a network
m = 100
dim_x = 1
net = dde.nn.DeepONetCartesianProd(
[m, 40, 40],
[dim_x, 40, 40],
"relu",
"Glorot normal",
)
# Define a Model
model = dde.Model(data, net)
# Compile and Train
model.compile("adam", lr=0.001, metrics=["mean l2 relative error"])
losshistory, train_state = model.train(iterations=10000)
# Plot the loss trajectory
dde.utils.plot_loss_history(losshistory)
plt.show()
Related
I want to convert the code written in Python into Matlab code. May I know is it possible to do that. l am wonder, how can we use the python libraries in Matlab. Share the procedure to do the conversion
Here is the Data I used:
https://drive.google.com/open?id=1GLm87-5E_6YhUIPZ_CtQLV9F9wcGaTj2
Here is my code in Python:
# imports libraries
import numpy as np
import pandas as pd
import os
import tensorflow as tf
import matplotlib.pyplot as plt
import random
from scipy import signal
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.models import Sequential
from tensorflow import set_random_seed
from tensorflow.keras.initializers import glorot_uniform
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from importlib import reload
# useful pandas display settings
pd.options.display.float_format = '{:.3f}'.format
# useful functions
def plot_history(history, metrics_to_plot):
"""
Function plots history of selected metrics for fitted neural net.
"""
# plot
for metric in metrics_to_plot:
plt.plot(history.history[metric])
# name X axis informatively
plt.xlabel('epoch')
# name Y axis informatively
plt.ylabel('metric')
# add informative legend
plt.legend(metrics_to_plot)
# plot
plt.show()
def plot_fit(y_true, y_pred, title='title'):
"""
Function plots true values and predicted values, sorted in increase order by true values.
"""
# create one dataframe with true values and predicted values
results = y_true.reset_index(drop=True).merge(pd.DataFrame(y_pred), left_index=True, right_index=True)
# rename columns informartively
results.columns = ['true', 'prediction']
# sort for clarity of visualization
results = results.sort_values(by=['true']).reset_index(drop=True)
# plot true values vs predicted values
results.plot()
# adding scatter on line plots
plt.scatter(results.index, results.true, s=5)
plt.scatter(results.index, results.prediction, s=5)
# name X axis informatively
plt.xlabel('obs sorted in ascending order with respect to true values')
# add customizable title
plt.title(title)
# plot
plt.show();
def reset_all_randomness():
"""
Function assures reproducibility of NN estimation results.
"""
# reloads
reload(tf)
reload(np)
reload(random)
# seeds - for reproducibility
os.environ['PYTHONHASHSEED']=str(984797)
random.seed(984797)
set_random_seed(984797)
np.random.seed(984797)
my_init = glorot_uniform(seed=984797)
return my_init
def give_me_mse(true, prediction):
"""
This function returns mse for 2 vectors: true and predicted values.
"""
return np.mean((true-prediction)**2)
# Importing the dataset
X = pd.read_excel(r"C:\filelocation\Data.xlsx","Sheet1").values
y = pd.read_excel(r"C:\filelocation\Data.xlsx","Sheet2").values
# Importing the experiment data
Data = pd.read_excel(r"C:\filelocation\Data.xlsx","Sheet1")
v = pd.DataFrame(Data, columns= ['v']).values
c = pd.DataFrame(Data, columns= ['c']).values
ird = pd.DataFrame(Data, columns= ['ird']).values
tmp = pd.DataFrame(Data, columns= ['tmp']).values
#Data Prepration
ird = ird.ravel()
tmp = tmp.ravel()
ir = np.nanmax(ird)
tp = np.nanmax(tmp)
p = v*c
p = p.ravel()
peaks, _ = signal.find_peaks(p)
nop = len(peaks)
pv = p.max()
#Experimental Data for testing
E_data = np.array([[ir,tp,pv,nop]])
#importing some more libraries
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(np.ravel(y))
y_encoded = encoder.transform(np.ravel(y))
# convert integers to dummy variables (i.e. one hot encoded)
y_dummy = np_utils.to_categorical(y_encoded)
# reset_all_randomness - for reproducibility
my_init = reset_all_randomness()
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test, y_train_dummy, y_test_dummy = train_test_split(X, y, y_dummy, test_size = 0.3, random_state = 20)
# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
E_data = sc.transform(E_data)
# Initialising the ANN
model0 = Sequential()
# Adding 1 hidden layer: the input layer and the first hidden layer
model0.add(Dense(units = 160, activation = 'tanh', input_dim = 4, kernel_initializer=my_init))
# Adding 2 hidden layer
model0.add(Dense(units = 49, activation = 'tanh', kernel_initializer=my_init))
# Adding 3 hidden layer
model0.add(Dense(units = 24, activation = 'tanh', kernel_initializer=my_init))
# Adding 4 hidden layer
model0.add(Dense(units = 15, activation = 'tanh', kernel_initializer=my_init))
# Adding output layer
model0.add(Dense(units = 6, activation = 'softmax', kernel_initializer=my_init))
# Set up Optimizer
Optimizer = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.99)
# Compiling the ANN
model0.compile(optimizer = Optimizer, loss = 'categorical_crossentropy', metrics=['accuracy','categorical_crossentropy','mse'])
# Fitting the ANN to the Train set, at the same time observing quality on Valid set
history = model0.fit(X_train, y_train_dummy, validation_data=(X_test, y_test_dummy), batch_size = 100, epochs = 1500)
# Generate prediction for all Train, Valid set and Experimental set
y_train_pred_model0 = model0.predict(X_train)
y_test_pred_model0 = model0.predict(X_test)
y_exp_pred_model0 = model0.predict(E_data)
# find final prediction by taking class with highest probability
y_train_pred_model0 = np.array([[list(x).index(max(list(x))) + 1] for x in y_train_pred_model0])
y_test_pred_model0 = np.array([[list(x).index(max(list(x))) + 1] for x in y_test_pred_model0])
y_exp_pred_model0 = np.array([[list(x).index(max(list(x))) + 1] for x in y_exp_pred_model0])
# check what metrics are in fact available in history
history.history.keys()
# Inverse scaling
X_train_inverse = sc.inverse_transform(X_train)
X_test_inverse = sc.inverse_transform(X_test)
E_data_inverse = sc.inverse_transform(E_data)
#Plots
print('#######################################################################')
# look at model fitting history
plot_history(history, ['mean_squared_error', 'val_mean_squared_error'])
plot_history(history, ['categorical_crossentropy', 'val_categorical_crossentropy'])
plot_history(history, ['acc', 'val_acc'])
# look at model fit quality
plot_fit(pd.DataFrame(y_train), y_train_pred_model0, 'Fit on train data')
plot_fit(pd.DataFrame(y_test), y_test_pred_model0, 'Fit on test data')
#Results
print('#######################################################################')
print('=============Mean Squared Error============')
print('MSE on train data is: {}'.format(give_me_mse(y_train, y_train_pred_model0)))
print('MSE on test data is: {}'.format(give_me_mse(y_test, y_test_pred_model0)))
print('#######################################################################')
print('================Accuracy===================')
print('Accuracy of ANN is: {} Percentage'.format((accuracy_score(y_test, y_test_pred_model0))*100))
print('#######################################################################')
print('========Result of Test Data set is=========')
for i in range(len(y_test)):
print('%s => %d (expected %s)' % (X_test_inverse[i].tolist(), y_test_pred_model0[i], y_test[i].tolist()))
print('#######################################################################')
print('====Result of Experimental Data set is=====')
print('%s => %d' % (E_data_inverse, y_exp_pred_model0))
There is no "direct" way to convert Python code to MATLAB code.
What you can do is directly translate the approach (the algorithm) and write the code from scratch.
or what I think would be more preferable to you is to directly call python script in MATLAB using their API
here is the link for further reading: https://in.mathworks.com/help/matlab/call-python-libraries.html
for example:
>> py.math.sqrt(4)
ans =
1
To run your own function, you can create a file in your current MATLAB working directory. here is the file ‘hello.py’ that contained these two lines:
def world():
return 'hello world'
Then in MATLAB:
>> py.hello.world();
Hello world!
if you run into errors make sure you're using the supported version of Python and add
pyversion <path_to_executable>
to the start of your MATLAB file.
Although I'm not sure how well it will work considering all the Python libraries you're importing (Scipy, Tensorflow etc)
I have been googling all day trying to find an example of the functional input for two parallel datasets in Keras but I can't find one.
My problem is that I have dataset 1, a set of images of people performing different actions. It is formatted as a csv as follows:
image_url,class
example1.png,BRUSH_TEETH
example2,BRUSH_TEETH
...
example10000.png,DANCING
I will preprocess these and make them all 64x64 in size. My second dataset will be leap motion data where every row is information captured at the same time as the corresponding row in dataset 1
(ignore the column names and values, I'm not sure how they will look like yet as I haven't gathered the data, but they will be one row and parallel to the above dataset1)
x,y,z,a,b,c,d,class
1,2,3,4,5,6,7,BRUSH_TEETH
8,9,10,3,1,3,4,BRUSH_TEETH
...
1,2,3,4,5,6,7,DANCING
I have been reading about the functional API and it seems as if I can run the data object from dataset1 through a CNN while running the same data object from dataset2 through, for example, a deep MLP. Then, using merge or concatenate, bring the two outputs from their final layers to another deep MLP and then finally link this final merged model to an output
Forgetting about the CNN for a minute, a simple example of merging is given by the API as follows:
import keras
input1 = keras.layers.Input(shape=(16,))
x1 = keras.layers.Dense(8, activation='relu')(input1)
input2 = keras.layers.Input(shape=(32,))
x2 = keras.layers.Dense(8, activation='relu')(input2)
# equivalent to added = keras.layers.add([x1, x2])
added = keras.layers.Add()([x1, x2])
out = keras.layers.Dense(4)(added)
model = keras.models.Model(inputs=[input1, input2], outputs=out)
My problem is that I need to feed input1 (when in the form of a CNN) the image contained in the csv while at the same time feeding input2 with the correlating row in the second dataset containing Leap Motion data. PS: how in the above would I continue the model after merging with two dense layers before output? Would it be simply this:
x3 = keras.layers.Dense(100)(added)
x3 = keras.layers.Dense(50)(x3)
out = keras.layers.Dense(4)(x3)
Is this possible to perform? If so, I would MASSIVELY appreciate a helping hand, I'm losing my mind trying to get my head around how the two datasets would be kept in sync with one another!
A sample script that I can try out and play with would be excellent, as I'm relatively new to the Keras framework
Thank you very much!
Please check if this is useful. Tested with Keras 2.2.4.
from keras.layers import Conv2D, MaxPooling2D, Input, Dense, Flatten, concatenate
from keras.models import Model
import numpy as np
img_input = Input(shape=(64, 64, 1)) ## branch 1 with image input
x = Conv2D(64, (3, 3))(img_input)
x = Conv2D(64, (3, 3))(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)
out_a = Dense(64)(x)
num_input = Input(shape=(7,)) ## branch 2 with numerical input
x1 = Dense(8, activation='relu')(num_input)
out_b = Dense(16, activation='relu')(x1)
concatenated = concatenate([out_a, out_b]) ## concatenate the two branches
out = Dense(4, activation='softmax')(concatenated)
model = Model([img_input, num_input], out)
print(model.summary())
model.compile('sgd', 'categorical_crossentropy', ['accuracy'])
### Just for sanity check
X = [np.zeros((1,64,64,1)), np.zeros((1,7))]
y = np.ones((1,4))
model.fit(X, y)
print(model.predict(X))
You can read the input data using Pandas
from PIL import Image
import pandas as pd
def get_num_input():
df = pd.read_csv('num.csv')
columns = list(df.columns)
features = columns[:-1]
cls_name = columns[-1]
X = np.zeros((len(df), len(features)))
Y = list()
for i, row in df.iterrows():
X[i] = row[features]
Y.append(row[cls_name])
return (X, Y)
def get_img_input():
df = pd.read_csv('img.csv')
X_img = np.zeros((len(df), 28, 28)) # change as per image size
Y = list()
for i, row in df.iterrows():
X_img[i] = np.array(Image.open(row['image_url']))
Y.append(row['class'])
return (X_img, Y)
X_num, Y = get_num_input()
X_img, _ = get_img_input() # use one of the Ys
# X feature normalization, convert Y to one-hot representation
model.fit() has a 'validation_split' parameter which can be set to 0.3 to create a 70:30 split. model.fit() returns a History object which can be used to plot the accuracy curves or you can use TensorBoard callback for live tracking.
https://chrisalbon.com/deep_learning/keras/visualize_loss_history/
https://keras.io/callbacks/#tensorboard
In trying to learn a bit about Tensorflow, I had been building a Variational Auto Encoder, which is working, however I noticed that, after training, I was getting different results from the decoders which are sharing the same variables.
I created two decoders, because the first I train against my dataset, the second I want to eventually feed a new Z encoding in order to produce new values.
My check is that I shoud be able to send the Z values generated from the encoding process to both decoders and get equal results.
I have 2 Decoders (D, D_new). D_new shares the variable scope from D.
before training, I can send values into the Encoder (E) to generate output values as well as the Z values it generated (Z_gen).
if I use Z_gen as input to D_new before training then its output is identical to the output of D, which is expected.
After a few iterations of training, however, the output of D compared with D_new begins to diverge (although they are quite similar).
I have paired this down to a more simple version of my code which still reproduces the error. I'm wondering if others have found this to be the case and where I might be able to correct for it.
The below code can be run in a jupyter notebook. I'm using Tensorflow r0.11 and Python 3.5.0
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import os
import pylab as pl
mgc = get_ipython().magic
mgc(u'matplotlib inline')
pl.rcParams['figure.figsize'] = (8.0, 5.0)
##-- Helper function Just for visualizing the data
def plot_values(values, file=None):
t = np.linspace(1.0,len(values[0]),len(values[0]))
for i in range(len(values)):
plt.plot(t,values[i])
if file is None:
plt.show()
else:
plt.savefig(file)
plt.close()
def encoder(input, n_hidden, n_z):
with tf.variable_scope("ENCODER"):
with tf.name_scope("Hidden"):
n_layer_inputs = input.get_shape()[1].value
n_layer_outputs = n_hidden
with tf.name_scope("Weights"):
w = tf.get_variable(name="E_Hidden", shape=[n_layer_inputs, n_layer_outputs], dtype=tf.float32)
with tf.name_scope("Activation"):
a = tf.tanh(tf.matmul(input,w))
prevLayer = a
with tf.name_scope("Z"):
n_layer_inputs = prevLayer.get_shape()[1].value
n_layer_outputs = n_z
with tf.name_scope("Weights"):
w = tf.get_variable(name="E_Z", shape=[n_layer_inputs, n_layer_outputs], dtype=tf.float32)
with tf.name_scope("Activation"):
Z_gen = tf.matmul(prevLayer,w)
return Z_gen
def decoder(input, n_hidden, n_outputs, reuse=False):
with tf.variable_scope("DECODER", reuse=reuse):
with tf.name_scope("Hidden"):
n_layer_inputs = input.get_shape()[1].value
n_layer_outputs = n_hidden
with tf.name_scope("Weights"):
w = tf.get_variable(name="D_Hidden", shape=[n_layer_inputs, n_layer_outputs], dtype=tf.float32)
with tf.name_scope("Activation"):
a = tf.tanh(tf.matmul(input,w))
prevLayer = a
with tf.name_scope("OUTPUT"):
n_layer_inputs = prevLayer.get_shape()[1].value
n_layer_outputs = n_outputs
with tf.name_scope("Weights"):
w = tf.get_variable(name="D_Output", shape=[n_layer_inputs, n_layer_outputs], dtype=tf.float32)
with tf.name_scope("Activation"):
out = tf.sigmoid(tf.matmul(prevLayer,w))
return out
Here is where the Tensorflow graph is setup:
batch_size = 3
n_inputs = 100
n_hidden_nodes = 12
n_z = 2
with tf.variable_scope("INPUT_VARS"):
with tf.name_scope("X"):
X = tf.placeholder(tf.float32, shape=(None, n_inputs))
with tf.name_scope("Z"):
Z = tf.placeholder(tf.float32, shape=(None, n_z))
Z_gen = encoder(X,n_hidden_nodes,n_z)
D = decoder(Z_gen, n_hidden_nodes, n_inputs)
D_new = decoder(Z, n_hidden_nodes, n_inputs, reuse=True)
with tf.name_scope("COST"):
loss = -tf.reduce_mean(X * tf.log(1e-6 + D) + (1-X) * tf.log(1e-6 + 1 - D))
train_step = tf.train.AdamOptimizer(0.001, beta1=0.5).minimize(loss)
I'm generating a training set of 3 samples of normal distribution noise with 100 data points and then sort it to more easily visualize:
train_data = (np.random.normal(0,1,(batch_size,n_inputs)) + 3) / 6.0
train_data.sort()
plot_values(train_data)
startup the session:
sess = tf.InteractiveSession()
sess.run(tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()))
Lets just look at what the network initially generates before training...
resultA, Z_vals = sess.run([D, Z_gen], feed_dict={X:train_data})
plot_values(resultA)
Pulling the Z generated values and feeding them to D_new which is reusing the variables from D:
resultB = sess.run(D_new, feed_dict={Z:Z_vals})
plot_values(resultB)
Just for sanity I'll plot the difference between the two to be sure they're the same...
Now run 1000 training epochs and plot the result...
for i in range(1000):
_, resultA, Z_vals = sess.run([train_step, D, Z_gen], feed_dict={X:train_data})
plot_values(resultA)
Now lets feed those same Z values to D_new and plot those results...
resultB = sess.run(D_new, feed_dict={Z:Z_vals})
plot_values(resultB)
They look pretty similar. But (I think) they should be exactly the same. Let's look at the difference...
plot_values(resultA - resultB)
You can see there is some variation now. This becomes much more dramatic with a larger network on more complex data, but still shows up in this simple example.
Any clues as to what's going on?
There are some methods (don't know which one specifically) which can be supplied with a seed value. Besides those, I'm not even sure if the training process is completely deterministic, especially when the GPU is involved, simply by the nature of parallelization.
See this question.
While I don't have a full explanation for the reason why, I was able to resolve my issue by changing:
for i in range(1000):
_, resultA, Z_vals = sess.run([train_step, D, Z_gen], feed_dict={X:train_data})
plot_values(resultA)
resultB = sess.run(D_new, feed_dict={Z:Z_vals})
plot_values(resultB)
plot_values(resultA - resultB)
to...
for i in range(1000):
_, resultA, Z_vals = sess.run([train_step, D, Z_gen], feed_dict={X:train_data})
resultA, Z_vals = sess.run([D, Z_gen], feed_dict={X:train_data})
plot_values(resultA)
resultB = sess.run(D_new, feed_dict={Z:Z_vals})
plot_values(resultB)
plot_values(resultA - resultB)
Note, that I simply ran and extracted the result and Z_vals one last time, without the train_step.
The reason I was still seeing problems in my more complex setup was that I had bias variables (even though they were set to 0.0) that were being generated with...
b = tf.Variable(tf.constant(self.bias_k, shape=[n_layer_outputs], dtype=tf.float32))
And that is somehow not considered while using reuse with a tf.variable_scope. So there were variables technically not being reused. Why they presented such a problem when set to 0.0 I'm not sure.
I am making use of pybrain to build a network that has 6 input dimensions and one real valued output dimension. The code I use is shown below:
network = buildNetwork(train.indim, 4, train.outdim)
trainer = BackpropTrainer( network, train)
trainer.trainOnDataset(train, 8000)
print 'MSE train', trainer.testOnData(train, verbose = True)
here train is of type Dataset
I want to get the predictions made in trainer.testOnData() as a numpy array. I am able to view the predicted result along with the error but I want it as an array. Is there anyway that this can be done?
Use the activate function of your network:
numpy.array([network.activate(x) for x, _ in train])
Complete example:
from datasets import XORDataSet
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised import BackpropTrainer
import numpy
d = XORDataSet()
n = buildNetwork(d.indim, 4, d.outdim, bias=True)
t = BackpropTrainer(n, learningrate=0.01, momentum=0.99, verbose=True)
t.trainOnDataset(d, 1000)
t.testOnData(verbose=True)
print numpy.array([n.activate(x) for x, _ in d])
(Only works in the directory pybrain/examples/supervised/backprop of pybrain because the XORDataSet is required.)
Hey guys I need a bit of help with my pybrain code. Everything loads fine, but after it trains the first time the training error doesn't go down. In fact, it just stays stuck there at exactly 13.3484055174. I've been checking my code many times and comparing it with other examples, but I consistently get the same problem. I've also already tried changing the number of hidden units, learningrate, momentum, weightdecay to no avail. I've checked the parameters and it starts off from [-1 to 1] then blows up into ~240-250. I was wondering if anyone can see why it's not working. I'm sure it's a really simple 1-liner that I'm missing.
I'm working on the kaggle 0-9 digit classification dataset. I've already gotten the randomforest to work but I really want to make this neural network work too. Any help would get greatly appreciated.
#learn digit classification with a nerual network
import pybrain
from pybrain.datasets import *
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
from pybrain.utilities import percentError
import numpy
print "Importing training and test data"
data = numpy.genfromtxt('trainR.csv', delimiter = ',')
data = data[1:]
traindata = data[:(len(data)/2)]
testdata = data[(len(data)/2)+1:]
print "Importing actual data"
actualdata = numpy.genfromtxt('trainR.csv', delimiter = ',')
print "Adding samples to dataset and setting up neural network"
ds = ClassificationDataSet(784, 10, nb_classes = 10)
for x in traindata:
ds.addSample(tuple(x[1:]),tuple(x[0:1]))
ds._convertToOneOfMany( bounds=[0,1] )
net = buildNetwork(784, 100, 10, bias=True, outclass=SoftmaxLayer)
print "Training the neural network"
trainer = BackpropTrainer(net, dataset=ds, momentum = 0.1,
verbose = True, weightdecay = 0.01)
for i in range(3):
# train the network for 1 epoch
trainer.trainEpochs( 1 )
# evaluate the result on the training and test data
trnresult = percentError( trainer.testOnClassData(), [x[0] for x in traindata] )
# print the result
print "epoch: " + str(trainer.totalepochs) + " train error: " + str(trnresult)
print ""
print "Predicting with the neural network"
answerlist = []
for row in testdata:
answer = numpy.argmax(net.activate(row[1:]))
answerlist.append(answer)
tstresult = percentError(answerlist, [x[0] for x in testdata])
print "Test error: " + str(tstresult)
try changing
ds = ClassificationDataSet(784, 10, nb_classes = 10)
to
ds = ClassificationDataSet(784, 1, nb_classes = 10)
i think ClassificationDataSet second argument is the dimensions of the targets rather than the number of classes this is given by nb_classes. It depends on how your data is organized. Best thing is to enter each target as an integer for each class and then use _convertToOneOfMany()
It would be useful if you provided your first sample