I am trying to do polynomial regression by pytorch. First I just tried only linear regression (b + wx).
model_1 = RegressionModel()
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad = True)
optimizer_1 = torch.optim.SGD([W, b], lr = 0.001)
x_train = torch.FloatTensor(dataset.x_data['LSTAT'])
y_train = torch.FloatTensor(dataset.data['target'])
nb_epochs = 10000
for epoch in range(nb_epochs + 1):
hypothesis = x_train * W + b
cost = torch.nn.functional.mse_loss(hypothesis, y_train.float())
optimizer_1.zero_grad()
cost.backward()
optimizer_1.step()
print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f}, Cost: {:.6f}'.format(epoch,
nb_epochs, W.item(), b.item(), cost.item()))
Then I changed and added some variables to do polynomial regression (b + w1x + w2x^2)
model_2 = RegressionModel()
W1 = torch.zeros(1, requires_grad=True)
W2 = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad = True)
optimizer_2 = torch.optim.SGD([W2, W1, b], lr = 0.0000099)
x_train = torch.FloatTensor(dataset.x_data['LSTAT'])
y_train = torch.FloatTensor(dataset.data['target'])
nb_epochs = 10000
for epoch in range(nb_epochs + 1):
hypothesis = b + x_train * W1 + x_train * x_train * W2
cost = torch.nn.functional.mse_loss(hypothesis, y_train.float())
optimizer_2.zero_grad()
cost.backward()
optimizer_2.step()
print('Epoch {:4d}/{} W1: {:.3f}, W2: {:.3f}, b: {:.3f}, Cost:
{:.6f}'.format(epoch, nb_epochs, W1.item(), W2.item(), b.item(),
cost.item()))
Can I try polynomial regression like this? If not, I would be really appreciate if you let me know. I'm really noob to pytorch...
Your code should work. When working with larger data, it will be more efficient if you do the regression in a single matrix operation. For that, you need to first pre-compute polynomials of your input features:
x_train_polynomial = torch.stack([x_train, x_train ** 2], dim=1)
To save some lines, you can rewrite the projection a linear layer:
import torch.nn as nn
projection = nn.Linear(2, 1, bias=True)
In the training loop, you can call:
hypothesis = projection(x_train_polynomial)
Related
i have build ANFIS model with tensorflow for classification problem. For every epoch i am getting precision and recall as zero. I am using guassian membership function but when i print sigma it is giving 0.Used below code for training
## settings
n = X_train.shape[1] # no of input features
m = 2*n # number of fuzzy rules
learning_rate = 0.01
epochs = 1000
################################ train
X_train_t = tf.placeholder(tf.float32, shape=[None, n]) # Train input
y_train_t = tf.placeholder(tf.float32, shape=None) # Train output
mu = tf.get_variable(name="mu", shape=[m * n], initializer=tf.random_normal_initializer(0, 1)) # mean of Gaussian MFS
sigma = tf.get_variable(name="sigma", shape = [m * n], initializer=tf.random_normal_initializer(0, 1)) # std_dev of Gaussian MFS
w = tf.get_variable(name="w", shape= [1, m], initializer=tf.random_normal_initializer(0, 1))
rula = tf.reduce_prod(tf.reshape(tf.exp( -0.5* ((tf.tile(X_train_t, (1, m))- mu)**2) / (sigma**2)),
(-1, m, n)), axis=2) #activations
Y_train_t = tf.reduce_sum(rula*w,axis=1) / tf.clip_by_value(tf.reduce_sum(rula,axis=1), 1e-8, 1e8)
#loss = tf.losses.log_loss(y_train, Y_train) # loss function
loss = tf.losses.sigmoid_cross_entropy(y_train_t, Y_train_t) # loss function
#loss = tf.sqrt(tf.losses.mean_squared_error(y_train, Y_train))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss) # optimizer
################################ test
X_test_t = tf.placeholder(tf.float32, shape=[None, n]) # Test input
y_test_t = tf.placeholder(tf.float32, shape=None) # Train output
rula_test = tf.reduce_prod(tf.reshape(tf.exp( -0.5* ((tf.tile(X_test_t, (1, m))- mu)**2) / (sigma**2)),
(-1, m, n)), axis=2) # rule activation
Y_test_t = tf.reduce_sum(rula_test*w,axis=1) / tf.clip_by_value(tf.reduce_sum(rula_test,axis=1), 1e-8, 1e8)
loss_test = tf.losses.sigmoid_cross_entropy(y_test_t, Y_test_t) # loss function
################################ start session
x_axis = []
tr_loss, te_loss = [],[]
tr_prec, te_prec = [], []
tr_rec, te_rec = [], []
init=tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for e in range(epochs):
Y_train, loss_tr, _ = sess.run([Y_train_t, loss, optimizer], feed_dict={X_train_t: X_train, y_train_t: y_train})
Y_test, loss_te = sess.run([Y_test_t, loss_test], feed_dict={X_test_t: X_test, y_test_t: y_test})
if (e+1) % 10 == 0:
x_axis.append(e+1)
tr_loss.append(loss_tr)
te_loss.append(loss_te)
Y_train = np.where(Y_train > 0, 1, 0)
Y_test = np.where(Y_test > 0, 1, 0)
prec_tr = precision_score(y_train,Y_train)
prec_te = precision_score(y_test,Y_test)
rec_tr = recall_score(y_train,Y_train)
rec_te = recall_score(y_test,Y_test)
tr_prec.append(prec_tr)
te_prec.append(prec_te)
tr_rec.append(rec_tr)
te_rec.append(rec_te)
code is referenced from https://github.com/subhalingamd/ANFIS-diabetes-prediction/blob/main/main.py
I am new to this algorithm.Please, help me where am gone wrong.
I'm trying to implement linear regression using Rms Prop optimizer from scratch.
Code:
EPOCHS = 100
w3 = tf.Variable(w_vector, dtype = tf.float32)
w4 = tf.Variable(0, dtype = tf.float32)
lr = 1e-5
beta = 0.9
epilson = 1e-7
momentum = 0.0
for epoch in range(1,EPOCHS+1):
mom_w = 0
mom_b = 0
mean_square_w = 0
mean_gradient_w = 0
mean_square_b = 0
mean_gradient_b = 0
y_pred1 = tf.squeeze(tf.matmul(w3,x, transpose_a = True, transpose_b = True) + w4)
dw3, dw4 = gradients_mse(x, y, y_pred1)
# My eqautions for RMS prop
mean_square_w = beta * mean_square_w + (1-beta) * dw3 ** 2
mean_gradient_w = beta * mean_gradient_w + (1-beta) * dw3
mom_w = momentum * mom_w + lr * (dw3/(tf.sqrt(mean_square_w + epilson - mean_gradient_w ** 2)))
mean_square_b = beta * mean_square_b + (1-beta) * dw4 ** 2
mean_gradient_b = beta * mean_gradient_b + (1-beta) * dw4
mom_b = momentum * mom_b + lr * (dw4/(tf.sqrt(mean_square_b + epilson - mean_gradient_b ** 2)))
w3.assign_sub(mom_w)
w4.assign_sub(mom_b)
print('w3 : {}'.format(w3.numpy()))
print('w4 : {}'.format(w4.numpy()))
Output:
w3 : [[-1.2507935]]
w4 : 0.0033333366736769676
Now I create a single layer and single neuron neural network with no activation function. Assign the same weights in its neuron and use RMS prop as optimizer I get different final weights. However, this was not the case for sgd optimizer.
Code:
# using keras to get same results
def create_model():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(units = 1, name = 'd1', input_shape = (x.shape[1],)))
model.compile(optimizer=tf.keras.optimizers.RMSprop(
learning_rate=1e-5, rho=0.9, momentum=0.0, epsilon=1e-07, centered=False),
loss="mse")
return model
model = create_model()
d1 = model.get_layer('d1')
d1_weights = [tf.constant(w_vector, dtype = tf.float32), tf.constant(np.array([0]), dtype = tf.float32)]
d1.set_weights(d1_weights)
model.fit(x, y, epochs = 100)
d1 = model.get_layer('d1')
print('w3 = {}'.format(d1.weights[0].numpy()))
print('w4 = {}'.format(d1.weights[1].numpy()[0]))
Output:
w3 = [[-1.2530397]]
w4 = 0.0010913893347606063
My gradients are calculate correctly for mse loss function. I have crosschecked them with tensorflows inbuilt gradient computation function gradient tape.
Code:
# Computing gradients
def gradients_mse(X, Y, Y_PREDS):
DW1 = tf.matmul(X, tf.reshape(Y-Y_PREDS, (X.shape[0],1)), transpose_a = True) * (-2/X.shape[0])
DW0 = (-2 / X.shape[0]) * tf.reduce_sum(Y - Y_PREDS)
return DW1, DW0
The only thing that can go wrong in this implementation is I think calculation of mom_w and mom_b using incorrect equations.
x.shape = [10,1]
The default batch size is 32 so it will have no effects on weight updates. The same code gives perfectly matching output when I try to use simple gradient descent instead of RMS prop.
I am currently taking the Deep Learning specialization by Deeplearning.ai on Coursera and am on the first assignment that requires implementing Neural Network with Logistic Regression mindset. The problem is that the assignment is implementation of Neural Network as Logistic Regression function for UNSTRUCTURED DATA (IMAGES). I have successfully completed the assignment, getting all the expected outputs. However, I am now trying to use the coded Neural Network for STRUCTURE DATA but come across broadcast error. Part of the code is as below :
The dataset code
path_train = r'C:\Users\Ahmed Ismail Khalid\Desktop\Research Paper\Research Paper Feature Sets\Balanced Feature Sets\Balanced Train combined scores.csv'
path_test = r'C:\Users\Ahmed Ismail Khalid\Desktop\Research Paper\Research Paper Feature Sets\Balanced Feature Sets\Balanced Test combined scores.csv'
df_train = pd.read_csv(path_train)
#df_train = df_train.to_numpy()
df_test = pd.read_csv(path_test)
#df_test = df_test.to_numpy()
x_train = df_train.iloc[:,1:19]
x_train = x_train.to_numpy()
x_train = x_train.T
y_train = df_train.iloc[:,19]
y_train = y_train.to_numpy()
y_train = y_train.reshape(y_train.shape[0],1)
y_train = y_train.T
x_test = df_test.iloc[:,1:19]
x_test = x_test.to_numpy()
x_test = x_test.T
y_test = df_test.iloc[:,19]
y_test = y_test.to_numpy()
y_test = y_test.reshape(y_test.shape[0],1)
y_test = y_test.T
print ("Number of training examples: m_train = " + str(m_train))
print ("Number of testing examples: m_test = " + str(m_test))
print ("train_set_x shape: " + str(x_train.shape))
print ("train_set_y shape: " + str(y_train.shape))
print ("test_set_x shape: " + str(x_test.shape))
print ("test_set_y shape: " + str(y_test.shape))
Output of Dataset Code
Number of training examples: df_train = 713
Number of testing examples: df_test = 237
x_train shape: (18, 713)
y_train shape: (1, 713)
x_test shape: (18, 237)
y_test shape: (1, 237)
The propagate function code
def propagate(w,b,X,Y) :
m = X.shape[1]
A = sigmoid((w.T * X) + b)
cost = (- 1 / m) * np.sum(np.dot(Y,np.log(A)) + np.dot((1 - Y), np.log(1 - A)))
dw = (1 / m) * np.dot((X,(A - Y)).T)
db = (1 / m) * np.sum(A - Y)
assert(dw.shape == w.shape)
assert(db.dtype == float)
cost = np.squeeze(cost)
assert(cost.shape == ())
grads = {"dw": dw,
"db": db}
return grads, cost
The optimize and model functions
**def optimize**(w,b,X,Y,num_iterations,learning_rate,print_cost) :
costs = []
for i in range(num_iterations) :
# Cost and gradient calculation
grads, cost = propagate(w,b,X,Y)
# Retrieve derivatives from gradients
dw = grads['dw']
db = grads['db']
# Update w and b
w = w - learning_rate * dw
b = b - learning_rate * db
if i % 100 == 0:
costs.append(cost)
# Print the cost every 100 training iterations
if print_cost and i % 100 == 0:
print ("Cost after iteration %i: %f" %(i, cost))
params = {"w": w,
"b": b}
grads = {"dw": dw,
"db": db}
return params, grads, costs
**def model**(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False) :
# initialize parameters with zero
w, b = initialize_with_zeros(X_train.shape[0])
# Gradient descent (≈ 1 line of code)
parameters, grads, costs = optimize(w,b,X_train,Y_train,num_iterations,learning_rate,print_cost)
# Retrieve parameters w and b from dictionary "parameters"
w = parameters["w"]
b = parameters["b"]
# Predict train/test set examples (≈ 2 lines of code)
Y_prediction_train = predict(w,b,X_train)
Y_prediction_test = predict(w,b,X_test)
# Print train/test Errors
print("train accuracy: {} %".format(100 - np.mean(abs(Y_prediction_train - Y_train)) * 100))
print("test accuracy: {} %".format(100 - np.mean(abs(Y_prediction_test - Y_test)) * 100))
d = {"costs": costs,
"Y_prediction_test": Y_prediction_test,
"Y_prediction_train" : Y_prediction_train,
"w" : w,
"b" : b,
"learning_rate" : learning_rate,
"num_iterations": num_iterations}
return d
Model Function output
Cost after iteration 0: 0.693147
train accuracy: -0.1402524544179613 %
test accuracy: 0.4219409282700326 %
When I run the code, I get ValueError: operands could not be broadcast together with shapes (1,713) (713,18) at A = sigmoid((w.T * X) + b). I am pretty new to neural networks and usage of numpy, so I can't figure out the problem. Any and all help would be really appreciated. The entire .ipynb file containing the entire code can be downloaded from here
Thanks
The * operator is elementwise multiplication, and your arrays have incompatible shapes. You want matrix multiplication, which you can do with np.matmul() or with the # operator:
A = sigmoid(w.T # X + b)
A lot of ML, especially neural nets, is about keeping the shapes of things straight. Check the shapes of your w, X, and Y — they should be: (features, 1), (features, m), (1, m) respectively, where features is 18 for you, and m is 713.
You should also then be able to make sure that the shape of A matches Y.
I'm using this word level RNN langauge model here: https://github.com/hunkim/word-rnn-tensorflow
How to calculate the accuracy of RNN model in each epoch.
Following is the code in training that shows training loss and other things in each epoch:
for e in range(model.epoch_pointer.eval(), args.num_epochs):
sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
data_loader.reset_batch_pointer()
state = sess.run(model.initial_state)
speed = 0
if args.init_from is None:
assign_op = model.epoch_pointer.assign(e)
sess.run(assign_op)
if args.init_from is not None:
data_loader.pointer = model.batch_pointer.eval()
args.init_from = None
for b in range(data_loader.pointer, data_loader.num_batches):
start = time.time()
x, y = data_loader.next_batch()
feed = {model.input_data: x, model.targets: y, model.initial_state: state,
model.batch_time: speed}
summary, train_loss, state, _, _ = sess.run([merged, model.cost, model.final_state,
model.train_op, model.inc_batch_pointer_op], feed)
train_writer.add_summary(summary, e * data_loader.num_batches + b)
speed = time.time() - start
if (e * data_loader.num_batches + b) % args.batch_size == 0:
print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
.format(e * data_loader.num_batches + b,
args.num_epochs * data_loader.num_batches,
e, train_loss, speed))
if (e * data_loader.num_batches + b) % args.save_every == 0 \
or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
print("model saved to {}".format(checkpoint_path))
train_writer.close()
Since the model has both targets and prediction probabilities for each class.
You can reduce the probabilities tensor to keep the class index of the highest probability.
predictions = tf.cast(tf.argmax(model.probs, axis=2), tf.int32)
Then you can compare to the targets, to know if it successfully predicted or not:
correct_preds = tf.equal(predictions, model.targets)
Finally the accuracy is the ratio between correct prediction over the size of input, aka mean of this boolean tensor.
accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32))
You can also use Tensorflow's tf.metrics.accuracy function.
accuracy, accuracy_update_op = tf.metrics.accuracy(labels = tf.argmax(y, axis = 2), predictions = tf.argmax(predictions, axis = 2), name = 'accuracy')
running_vars_accuracy = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="LSTM/Accuracy")
accuracy_update_op operation will update the two local variables in each batch:
[<tf.Variable 'accuracy/total:0' shape=() dtype=float32_ref>,
<tf.Variable 'accuracy/count:0' shape=() dtype=float32_ref>]
Then, just calling accuracy op will print overall accuracy in each epoch:
for epoch in range(num_epochs):
avg_cost_train = 0.
total_train_batch = int((len(X_train)/(batch_size)) + 1)
running_vars_initializer_accuracy.run()
for _ in range(total_train_batch):
_, miniBatchCost_train, miniBatchAccuracy_train = sess.run([trainer, loss, accuracy_update_op], feed_dict = {X: Xtrain, y: ytrain})
avg_cost_train += miniBatchCost_train / total_train_batch
accuracy_train = sess.run(accuracy)
A note here is that do not call the tf_metric and tf_metric_update in the same session.run() function call.
I am able to perform classification with this code. It outputs the probability for each output labels. But I need to convert this so that it can predict the values. That is, I want to add a regression layer at the end instead of softmax. How can I achieve this? Let's say for example I trained the model for label 1,2,3,4,5. But I want the model to predict the values beyond those 5 labels. Example, Given the input, the model may predict 1.3 or 2.5, etc. I want a continuous output rather than a discrete output.
Update
I am trying to achieve a suggested solution from this question
Here
Let's say I have a training data. I train the model for whole number temperatures like 1,2,3,4,5 degrees. Basically, Those output temperatures are the labels. How can I predict the values that lies between two temperatures like 2.5 degree. It is not possible to train for every values of temperature. How can I achieve this?
My model gives probability of each class predicted
Temp Probability
1 .01
2 .05
3 .56
4 .24
5 .14
I want my model to predict the temperature values like 1.2, 2.7, etc. instead of predicting the probability of each class.
input_height = 1 # 1-Dimensional convulotion
input_width = 90 #window
num_labels = 5 #output labels
num_channels = 8 #input columns
batch_size = 10
kernel_size = 60
depth = 60
num_hidden = 1000
learning_rate = 0.0001
training_epochs = 8
total_batches = train_x.shape[0] # batch_size
X = tf.placeholder(tf.float32, shape=[None,input_height,input_width,num_channels],name="input")
# X = tf.placeholder(tf.float32, shape=[None,input_width * num_channels], name="input")
# X_reshaped = tf.reshape(X,[-1,1,90,3])
Y = tf.placeholder(tf.float32, shape=[None,num_labels])
c = apply_depthwise_conv(X,kernel_size,num_channels,depth)
p = apply_max_pool(c,20,2)
c = apply_depthwise_conv(p,6,depth*num_channels,depth//10)
shape = c.get_shape().as_list()
c_flat = tf.reshape(c, [-1, shape[1] * shape[2] * shape[3]])
f_weights_l1 = weight_variable([shape[1] * shape[2] * depth * num_channels * (depth//10), num_hidden])
f_biases_l1 = bias_variable([num_hidden])
f = tf.nn.tanh(tf.add(tf.matmul(c_flat, f_weights_l1),f_biases_l1))
out_weights = weight_variable([num_hidden, num_labels])
out_biases = bias_variable([num_labels])
y_ = tf.nn.softmax(tf.matmul(f, out_weights) + out_biases,name="y_")
loss = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(loss)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1)) #difference between correct output and expected output
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
cost_history = np.empty(shape=[1], dtype=float)
with tf.Session() as session:
tf.global_variables_initializer().run()
for epoch in range(training_epochs):
for b in range(total_batches):
offset = (b * batch_size) % (train_y.shape[0] - batch_size)
batch_x = train_x[offset:(offset + batch_size), :, :, :]
batch_y = train_y[offset:(offset + batch_size), :]
_, c = session.run([optimizer, loss], feed_dict={X: batch_x, Y: batch_y})
cost_history = np.append(cost_history, c)
print "Epoch: ", epoch, " Training Loss: ", c, " Training Accuracy: ",session.run(accuracy, feed_dict={X: train_x, Y: train_y})
print "Testing Accuracy:", session.run(accuracy, feed_dict={X: test_x, Y: test_y})
If you want to predict which class is detected, just do an arg_max on the output. The one with the highest probability is the detected class.
predict = tf.argmax(y_)