I recently started using pytorch. I've been using tensorflow framework before. I have a piece of code that I implemented with tensorflow, which I now want to convert to the pytorch version.
I'm new to pytorch and I'm not familiar with its functions and the transformation process is not smooth, so I'd like to consult.
Here's the code I want to convert:
def kl_loss_compute(logits1, logits2):
""" KL loss
"""
pred1 = tf.nn.softmax(logits1)
pred2 = tf.nn.softmax(logits2)
loss = tf.reduce_mean(tf.reduce_sum(pred2 * tf.log(1e-8 + pred2 / (pred1 + 1e-8)), 1))
return loss
python: 3.6, ubuntu: 16.04
logits1 and logits2 are FC layer's outputs. Their shape is [batch, n]
Here is my Implementation (I am taking an example of logits of dimension [3,5]):
Tensorflow Version:
import tensorflow as tf
def kl_loss_compute(logits1, logits2):
""" KL loss
"""
pred1 = tf.nn.softmax(logits1)
print(pred1.eval())
pred2 = tf.nn.softmax(logits2)
print(pred2.eval())
loss = tf.reduce_mean(tf.reduce_sum(pred2 * tf.log(1e-8 + pred2 / (pred1 + 1e-8)), 1))
return loss
x1 = tf.random.normal([3, 5], dtype=tf.float32)
x2 = tf.random.normal([3, 5], dtype=tf.float32)
with tf.Session() as sess:
x1 = sess.run(x1)
print(x1)
x2 = sess.run(x2)
print(x2)
print(30*'=')
print(sess.run(kl_loss_compute(x1, x2)))
Output:
[[ 0.9801388 -0.2514422 -0.28299806 0.85130763 0.4565948 ]
[-1.0744809 0.20301117 0.21026622 1.0385195 0.41147012]
[ 1.2385081 1.1003486 -2.0818367 -1.0446491 1.8817908 ]]
[[ 0.04036871 0.82306993 0.82962424 0.5209219 -0.10473887]
[ 1.7777447 -0.6257034 -0.68985045 -1.1191329 -0.2600192 ]
[ 0.03387258 0.44405013 0.08010675 0.9131149 0.6422863 ]]
==============================
[[0.32828477 0.09580362 0.09282765 0.2886025 0.19448158]
[0.04786159 0.17170973 0.17296004 0.39596024 0.21150835]
[0.2556382 0.22265059 0.00923886 0.02606533 0.48640704]]
[[0.12704821 0.27790183 0.27972925 0.20543297 0.10988771]
[0.7349108 0.06644011 0.062312 0.04056362 0.09577343]
[0.12818882 0.19319147 0.13425465 0.30881628 0.23554876]]
0.96658206
PyTorch Version:
def kl_loss_compute(logits1, logits2):
""" KL loss
"""
pred1 = torch.softmax(logits1, dim=-1, dtype=torch.float32)
print(pred1)
pred2 = torch.softmax(logits2, dim=-1, dtype=torch.float32)
print(pred2)
loss = torch.mean(torch.sum(pred2 * torch.log(1e-8 + pred2 / (pred1 + 1e-8)), -1))
return loss
# same inputs are used here as above(see the inputs used in tensorflow code in the output)
x = torch.Tensor([[ 0.9801388, -0.2514422 , -0.28299806 , 0.85130763, 0.4565948 ],
[-1.0744809 , 0.20301117, 0.21026622, 1.0385195, 0.41147012],
[ 1.2385081 , 1.1003486, -2.0818367, -1.0446491, 1.8817908 ]])
y = torch.Tensor([[ 0.04036871 , 0.82306993, 0.82962424, 0.5209219, -0.10473887],
[ 1.7777447 ,-0.6257034, -0.68985045, -1.1191329, -0.2600192 ],
[ 0.03387258 , 0.44405013 , 0.08010675, 0.9131149, 0.6422863 ]])
print(kl_loss_compute(x, y))
Output:
tensor([[0.3283, 0.0958, 0.0928, 0.2886, 0.1945],
[0.0479, 0.1717, 0.1730, 0.3960, 0.2115],
[0.2556, 0.2227, 0.0092, 0.0261, 0.4864]])
tensor([[0.1270, 0.2779, 0.2797, 0.2054, 0.1099],
[0.7349, 0.0664, 0.0623, 0.0406, 0.0958],
[0.1282, 0.1932, 0.1343, 0.3088, 0.2355]])
tensor(0.9666)
Related
I'm working on a small Tensorflow model using Tensorflow 2.3.0. in the model I use several tf.while_loops and TensorArray. The model is not working as expected. I tried to troubleshoot the issue but unfortunately not all Tensorflow behavior is documented and I'm not sure if there is a bug in my model or it is Tensorflow behavior I'm unaware of. For example in my model I multiply my data with some weights inside a tf.while_loop. I then store the result in TensorArray . The TensorArray content is again used in the same fashion until I minimize the loss to train the model.
my problem is that the model does not train as expected . I suspect that tensorflow is freezing the weights and not updating them as I would expect.
How can I make sure that the content of the last TensorArray remains trainable since it is produced using data with trainable weight variables . I'm trying to avoid the issue mentioned here but not sure if I have.
below is a simple example ( dummy model ) just to clarify what I'm doing :
import tensorflow.compat.v1 as tf
tf.compat.v1.disable_eager_execution()
import numpy as np
size=20
randvalues = np.random.choice(a=[0.0, 1.0], size=(size,10, 10), p=[.5, 1-.5])
x = tf.constant(randvalues , tf.float32)
y = tf.constant(randvalues , tf.float32)
init_range = np.sqrt(6.0 /20)
initial = tf.random_uniform([ 10, 10 ], minval=-init_range,
maxval=init_range, dtype=tf.float32)
weights = tf.Variable(initial, name="testWeight1")
w1_summ = tf.summary.histogram("testWeight1" ,weights )
init_range = np.sqrt(6.0 /15)
initial2 = tf.random_uniform([ 10, 5 ], minval=-init_range,
maxval=init_range, dtype=tf.float32)
weights_tied = tf.Variable(initial2, name="tiedWeight")
w2_summ = tf.summary.histogram("tiedWeight" ,weights_tied )
ta = tf.TensorArray(dtype = tf.float32 , size=0 , dynamic_size=True , clear_after_read=False , infer_shape=False )
ta2 = tf.TensorArray(dtype = tf.float32 , size=0 , dynamic_size=True , clear_after_read=False , infer_shape=False )
def condition(counter ,ta1):
return counter < size
def body(counter ,ta1):
with tf.name_scope("firstloop"):
operation1 = tf.matmul(x[counter],weights)
operation2= tf.nn.relu(operation1)
operation3= tf.matmul(operation2,weights_tied)
operation4= tf.matmul(operation3,tf.transpose(weights_tied))
ta1 = ta1.write(counter,tf.reshape(operation4,[-1]))
return counter +1 , ta1
runloop , array1 = tf.while_loop(condition,body,[0 , ta ] , back_prop=True )
def condition2(counter ,ta1 , array1):
return counter < 1
def body2(counter , ta2 ,array1 ):
with tf.name_scope("secondloop"):
operation = array1.stack()
operation4= tf.nn.relu(operation)
ta2 = ta2.write(counter,tf.reshape(operation4,[-1]))
return counter +1 , ta2 ,array1
runloop2 , array2 , _ = tf.while_loop(condition2,body2,[0 , ta2 ,array1] ,back_prop=True)
predictions= array2.stack()
loss=tf.nn.weighted_cross_entropy_with_logits(logits=tf.reshape(predictions,[-1]), targets=tf.reshape(y,[-1]), pos_weight=1)
cost = tf.reduce_mean(loss)
optimizer = tf.train.AdamOptimizer(learning_rate=.001)
gvs = optimizer.compute_gradients(cost)
additional_summeries = [ tf.summary.histogram( "GRAD"+str(g[1]) , g[0]) for g in gvs]
opt_op= optimizer.apply_gradients(gvs)
merge= tf.summary.merge([w2_summ , w1_summ] + additional_summeries )
sess = tf.Session()
summ_writer = tf.summary.FileWriter('C:\\Users\\USER\\Documents\\Projects\\MastersEnv\\GraphAutoEncoder\\gae\\summaries', sess.graph)
sess.run(tf.global_variables_initializer())
for cc in range(1000) :
a , b, = sess.run([runloop2 , opt_op])
c = sess.run(merge)
summ_writer.add_summary(c,cc)
print(cc)
print('done')
In the above example the content of array2 should be the predictions. how can I make sure that the loops and the tensorArray did not affect the train-ability of my variables ? If what I did was incorrect. whats the best approach to achieve the same thing but keeps my result trainable.
Update ::
Ok , So I ran the my model for some times and monitored the loss, accuracy and other scaler metrics. And found a general trend in loss decrease and accuracy and other accuracy related metric increase. the Accuracy and loss are also inverse of each other , to my understanding this indicates that the updates are not random and the model is learning something.
Additionally I monitored the weights and gradients distributions are changing which confirms that variables are being trained.
Can you some please confirm my conclusion and understanding ?
Thanks for your help in Advance.
I am new to tensorflow so I am trying to get my hands dirty by working on a binary classification problem on kaggle. I have trained the model using sigmoid function and got a very good accuracy when tested but when I try to export the prediction to df for submission, I get the error below...I have attached the code and the prediction and the output, please suggest what I am doing wrong, I suspect it has to do with my sigmoid function, thanks.
This is output of the predictions....the expected is 1s and 0s
INFO:tensorflow:Restoring parameters from ./movie_review_variables
Prections are [[3.8743019e-07]
[9.9999821e-01]
[1.7650980e-01]
...
[9.9997473e-01]
[1.4901161e-07]
[7.0333481e-06]]
#Importing tensorflow
import tensorflow as tf
#defining hyperparameters
learning_rate = 0.01
training_epochs = 1000
batch_size = 100
num_labels = 2
num_features = 5000
train_size = 20000
#defining the placeholders and encoding the y placeholder
X = tf.placeholder(tf.float32, shape=[None, num_features])
Y = tf.placeholder(tf.int32, shape=[None])
y_oneHot = tf.one_hot(Y, 1)
#defining the model parameters -- weight and bias
W = tf.Variable(tf.zeros([num_features, 1]))
b = tf.Variable(tf.zeros([1]))
#defining the sigmoid model and setting up the learning algorithm
y_model = tf.nn.sigmoid(tf.add(tf.matmul(X, W), b))
cost = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_model, labels=y_oneHot)
train_optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
#defining operation to measure success rate
correct_prediction = tf.equal(tf.argmax(y_model, 1), tf.argmax(y_oneHot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#saving variables
saver = tf.train.Saver()
#executing the graph and saving the model variables
with tf.Session() as sess: #new session
tf.global_variables_initializer().run()
#Iteratively updating parameter batch by batch
for step in range(training_epochs * train_size // batch_size):
offset = (step * batch_size) % train_size
batch_xs = x_train[offset:(offset + batch_size), :]
batch_labels = y_train[offset:(offset + batch_size)]
#run optimizer on batch
err, _ = sess.run([cost, train_optimizer], feed_dict={X:batch_xs, Y:batch_labels})
if step % 1000 ==0:
print(step, err) #print ongoing result
#Print final learned parameters
w_val = sess.run(W)
print('w', w_val)
b_val = sess.run(b)
print('b', b_val)
print('Accuracy', accuracy.eval(feed_dict={X:x_test, Y:y_test}))
save_path = saver.save(sess, './movie_review_variables')
print('Model saved in path {}'.format(save_path))
#creating csv file for kaggle submission
with tf.Session() as sess:
saver.restore(sess, './movie_review_variables')
predictions = sess.run(y_model, feed_dict={X: test_data_features})
subm2 = pd.DataFrame(data={'id':test['id'],'sentiment':predictions})
subm2.to_csv('subm2nlp.csv', index=False, quoting=3)
print("I am done predicting")
INFO:tensorflow:Restoring parameters from ./movie_review_variables
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-85-fd74ed82109c> in <module>()
5 # print('Prections are {}'.format(predictions))
6
----> 7 subm2 = pd.DataFrame(data={'id':test['id'], 'sentiment':predictions})
8 subm2.to_csv('subm2nlp.csv', index=False, quoting=3)
9 print("I am done predicting")
Exception: Data must be 1-dimensional
You'll need to set some threshold for the sigmoidal output. E.g. split the outputs into bins with space of 0.5 between them:
>>> import numpy as np
>>> x = np.linspace(0, 10, 20)
>>> x
array([ 0. , 0.52631579, 1.05263158, 1.57894737, 2.10526316,
2.63157895, 3.15789474, 3.68421053, 4.21052632, 4.73684211,
5.26315789, 5.78947368, 6.31578947, 6.84210526, 7.36842105,
7.89473684, 8.42105263, 8.94736842, 9.47368421, 10. ])
>>> q = 0.5 # The continuous value between two discrete points
>>> y = q * np.round(x/q)
>>> y
array([ 0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5.5,
6. , 6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5, 10. ])
You can see the definition of the sigmoid function. This will always have a continuous output. If you want to discretize your output, you need to determine some threshold above which you will set your solution to 1 and below will be zero.
pred = tf.math.greater(y_model, tf.constant(0.5))
However, you must be careful choosing an appropriate threshold as it is not guaranteed that your model will be well calibrated with probability. You can choose a suitable threshold based on the best discrimination on some held-out validation set.
It is important that this step is for evaluation only as you will not be able to backpropagate your loss signal through this op.
I'm using the tensorflow LinearRegressor API for a regression problem (https://www.tensorflow.org/api_docs/python/tf/estimator/LinearRegressor). I know the bias in my model is exactly 0.
How can I force LinearRegressor to learn a bias of 0?
Here is a minimal example:
import tensorflow as tf
import numpy as np
from sklearn.linear_model import SGDRegressor
Simulate some data with 2 features (+ bias of 0)
y = 0 + 2*x1 + 3*x2 + noise
np.random.seed(5332)
n = 1000
weights = np.array([
[2],
[3],
])
bias = 0
x = np.random.randn(n, np.shape(weights)[0])
y = (bias + np.matmul(x, weights) + np.random.randn(n, 1)).ravel()
In sklearn I would use fit_intercept=False to force the bias to 0:
ols = SGDRegressor(tol=0.000001, fit_intercept=False)
ols.fit(x, y)
print("True weights: {}".format(weights.ravel()))
print("Learned weights: {}".format(np.round(ols.coef_), 3))
print("True bias: {}".format([bias]))
print("Learned bias: {}".format(np.round(ols.intercept_), 3))
Output:
True weights: [2 3]
Learned weights: [2. 3.]
True bias: [0]
Learned bias: [0.]
In tensorflow I did the following:
column = tf.feature_column.numeric_column('x', shape=np.shape(x)[1])
ols = tf.estimator.LinearRegressor(
feature_columns=[column],
optimizer=tf.train.GradientDescentOptimizer(0.0001)
)
train_input = tf.estimator.inputs.numpy_input_fn(
x={"x": x},
y=y,
shuffle=False,
num_epochs=100,
batch_size=int(len(y) / 20)
)
ols.train(train_input)
print("True weights: {}".format(weights.ravel()))
print("Learned weights: {}".format(np.round(ols.get_variable_value('linear/linear_model/x/weights').flatten(), 3)))
print("True bias: {}".format([bias]))
print("Learned bias: {}".format(np.round(ols.get_variable_value('linear/linear_model/bias_weights').flatten(), 3)))
Output:
True weights: [2 3]
Learned weights: [1.993 2.998]
True bias: [0]
Learned bias: [-0.067]
But the learned bias should be: [0], how can I enforce this?
i guess tf.keras.constraints is what you are searching for.
I want to classify
if input data is under 200 than output is (0, 1)
and if input data is over 200 than output is (1, 0)
input value is sequential integer value and layer is 5.
hidden layer use sigmoid and last hidden layer use softmax function
loss function is reduce_mean and training with gradient descendent
import numpy as np
import tensorflow as tf
def set_x_data():
x_data = np.array([[50]
, [60]
, [70]
, [80]
, [90]
, [110]
, [120]
, [130]
, [140]
, [150]
, [160]
, [170]
, [180]
, [190]
, [200]
, [210]
, [220]
, [230]
, [240]
, [250]
, [260]
, [270]
, [280]
, [290]
, [300]
, [310]
, [320]
, [330]
, [340]
, [350]
, [360]
, [370]
, [380]
, [390]])
return x_data
def set_y_data(x):
y_data = np.array([[0, 1]
, [0, 1]
, [0, 1]
, [0, 1]
, [0, 1]
, [0, 1]
, [0, 1]
, [0, 1]
, [0, 1]
, [0, 1]
, [0, 1]
, [0, 1]
, [0, 1]
, [0, 1]
, [0, 1]
, [0, 1]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]
, [1, 0]])
return y_data
def set_bias(efficiency):
arr = np.array([efficiency])
return arr
W1 = tf.Variable(tf.random_normal([1, 5]), name='weight1')
W2 = tf.Variable(tf.random_normal([5, 5]), name='weight2')
W3 = tf.Variable(tf.random_normal([5, 5]), name='weight3')
W4 = tf.Variable(tf.random_normal([5, 5]), name='weight4')
W5 = tf.Variable(tf.random_normal([5, 2]), name='weight5')
def inference(input, b):
hidden_layer1 = tf.sigmoid(tf.matmul(input, W1) + b)
hidden_layer2 = tf.sigmoid(tf.matmul(hidden_layer1, W2) + b)
hidden_layer3 = tf.sigmoid(tf.matmul(hidden_layer2, W3) + b)
hidden_layer4 = tf.sigmoid(tf.matmul(hidden_layer3, W4) + b)
out_layer = tf.nn.softmax(tf.matmul(hidden_layer4, W5) + b)
return out_layer
def loss(hypothesis, y):
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(hypothesis), reduction_indices=[1]))
return cross_entropy
def train(loss):
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
train = optimizer.minimize(loss)
return train
x_data = set_x_data(1)
y_data = set_y_data(0)
b_data = set_bias(0.8)
x= tf.placeholder(tf.float32, shape=[None, 1])
y= tf.placeholder(tf.float32, shape=[None, 2])
b = tf.placeholder(tf.float32, shape=[None])
hypothesis = inference(x, b)
loss = loss(hypothesis, y)
train = train(loss)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
print(sess.run(W1))
for step in range(2000):
sess.run(train, feed_dict={x:x_data, y:y_data, b:b_data})
print(sess.run(W1))
print(sess.run(hypothesis, feed_dict={x:np.array([[1000]]), b:b_data}))
when I print W1 before training and after training, value doesn't change specially and testing when input = 1000, that value doesn't currect what I expect. I think value nearly close to (1, 0), but result is almost (0.5, 0.5)
I guess that mistakes come from loss function because it was copied from here and there, but I can't be sure about it
upper code is just simplified of my code but I think I have to show my real code
the code is too long so I create new post
classifying data by tensorflow but accuracy value didn't change
There are a few issues in the training of the above network, but with a few changes you can achieve a network that gets this decision function
(The plot in the link shows the score of class 2, i.e. if x > 200)
The list of issues subject to improvement in this network:
The training data is very scarce (only 34 points!) This is typically too small, especially for a 5-layer network as in your case. You typically want many more input samples than parameters in the network. Try adding more input values and reducing the number of layers (as in the code below - I've used floats instead of integers to get more points, but I think it is still compatible).
The input ranges typically require scaling (below I've tried a super-simple scaling by dividing by a constant). This is because you typically want to avoid high ranges of variables (especially of you pass many layers with a soft-max non-linearity, this would destroy the information contained in the very high or very low values). In more advanced cases you might want to do Min-Max Scaling or z-scores.
Try more epochs (and try plotting the evolution of the loss function value). With the given number of epochs, the optimization of the loss function had not converged. Below I do 10x more epochs. See how the code below now almost converges in this plot (and see how 2000 epochs were not enough):
Something that helped was shuffling the (x,y) data. Though this is not crucial in this case, it converges faster (see the paper "Efficient Backprop" by Le Cun). And in more serious examples it is typically needed.
Importantly, I think you want b to be a parameter, not a constant, don't you? The bias of a network is typically also optimized together with the multiplicative weights. (Also, it is not common to use a single, shared bias for all the hidden layers. )
Below is the code. Note there might be further improvements but these few tricks end up with the desired decision function.
I've added some inline comments to indicate changes with respect to the original. I hope you find these pieces of advice insightful!
The code:
import numpy as np
import tensorflow as tf
# I've modified the functions set_x_data and set_y_data
# so as to generate a larger set of numbers.
# Generate a range of numbers from 50 to 390
def set_x_data():
x_data = np.arange(50, 390, 0.1)
return x_data[:,None]
# Assign labels depending on x_data
def set_y_data(x_data):
ydata1 = x_data >= 200
ydata2 = x_data < 200
return np.hstack((ydata1, ydata2))
def set_bias(efficiency):
arr = np.array([efficiency])
return arr
# Let's keep W1 and W5 (one hidden layer only)
# BTW, in this problem you could do with 0 hidden layers. But keeping
# 1 to show it works
W1 = tf.Variable(tf.random_normal([1, 5]), name='weight1')
W5 = tf.Variable(tf.random_normal([5, 2]), name='weight5')
# BTW, b should be a parameter, too.
b = tf.Variable(tf.constant(0.0))
# Just keeping 1 hidden layer
def inference(input):
hidden_layer1 = tf.sigmoid(tf.matmul(input, W1) + b)
out_layer = tf.nn.softmax(tf.matmul(hidden_layer1, W5) + b)
return out_layer
# This is unchanged
def loss(hypothesis, y):
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(hypothesis), reduction_indices=[1]))
return cross_entropy
# This is unchanged
def train(loss):
optimizer =
tf.train.GradientDescentOptimizer(learning_rate=0.1)
train = optimizer.minimize(loss)
return train
# Using SCALE to normalize the input variables (range of inputs too big)
# This is a simple normalization in this case. Other examples are
# Min-Max normalization or z-scores.
SCALE = 1000
x_data = set_x_data()
y_data = set_y_data(x_data)
x_data /= SCALE
# Now only placeholders are x and y (b is a parameter)
x= tf.placeholder(tf.float32, shape=[None, 1])
y= tf.placeholder(tf.float32, shape=[None, 2])
hypothesis = inference(x)
loss = loss(hypothesis, y)
train = train(loss)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
print(sess.run(W1))
# Epochs x 10, it did not converge with fewer epochs
epochs = 20000
losses = np.zeros(epochs)
for step in range(epochs):
# Shuffle data
r = np.random.permutation(x_data.shape[0])
x_data = x_data[r]
y_data = y_data[r,:]
# Small modification here to capture the loss.
_, l = sess.run([train, loss], feed_dict={x:x_data, y:y_data})
losses[step] = l
print(sess.run(W1))
print(sess.run(b))
The code to display the decision function above:
%matplotlib inline
import matplotlib.pyplot as plt
ystar = np.arange(50, 400, 10)[:,None]
plt.plot(ystar, sess.run(hypothesis, feed_dict={x:ystar/SCALE})[:,0])
I code tensorflow program for linear regression. I am using Gradient Descent algorithm for optimizing(Minimising) loss function. But value of loss function is increasing while executing the program. My program and output is in follow.
import tensorflow as tf
W = tf.Variable([.3],dtype=tf.float32)
b = tf.Variable([-.3],dtype=tf.float32)
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
sess = tf.Session()
init = init = tf.global_variables_initializer()
sess.run(init)
lm = W*X + b
delta = tf.square(lm-Y)
loss = tf.reduce_sum(delta)
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
for i in range(8):
print(sess.run([W, b]))
print("loss= %f" %sess.run(loss,{X:[10,20,30,40],Y:[1,2,3,4]}))
sess.run(train, {X: [10,20,30,40],Y: [1,2,3,4]})
sess.close()
Output for my program is
2017-12-07 14:50:10.517685: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations.
[array([ 0.30000001], dtype=float32), array([-0.30000001],dtype=float32)]
loss= 108.359993
[array([-11.09999943], dtype=float32), array([-0.676], dtype=float32)]
loss= 377836.000000
[array([ 662.25195312], dtype=float32), array([ 21.77807617], dtype=float32)]
loss= 1318221568.000000
[array([-39110.421875], dtype=float32), array([-1304.26794434], dtype=float32)]
loss= 4599107289088.000000
[array([ 2310129.25], dtype=float32), array([ 77021.109375], dtype=float32)]
loss= 16045701465112576.000000
[array([ -1.36451664e+08], dtype=float32), array([-4549399.], dtype=float32)]
loss= 55981405829796462592.000000
[array([ 8.05974733e+09], dtype=float32), array([ 2.68717856e+08], dtype=float32)]
loss= 195312036582209632600064.000000
Please provide me a answer why value of loss is increasing instead of decreasing.
Did you try changing the learning rate? Using a lower running rate (~1e-4) and more iterations should work.
More justification as to why a lower learning rate might be required. Note that your loss function is
L = \sum (Wx+b-Y)^2
and dL/dW = \sum 2(Wx+b-Y)*x
and hessian d^2L/d^2W = \sum 2x*x
Now, your loss is diverging because learning rate is more than inverse of hessian which there will be roughly 1/(2*2900). So you should try and decrease the learning rate here.
Note: I wasn't sure how to add math to StackOverflow answer so I had to add it this way.
To do a linear regression this is the code i've been using with numpy:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
print(tf.__version__)
%matplotlib inline
plt.rcParams['figure.figsize'] = (10, 6)
x = np.arange(start=0.0, stop=5.0, step=0.1)
##You can adjust the slope and intercept to verify the changes in the graph
W=1
b=0
# We define de linear ecuation
y= W*x + b
# And plot it thanks to matplotlib
plt.plot(x,y)
plt.ylabel('Dependent Variable')
plt.xlabel('Indepdendent Variable')
plt.show()
With TensorFlow you can use something similar to the code bellow to do a linear regression:
def graph_formula_vs_data(formula, x_vector, y_vector):
"""
This function graphs a formula in the form of a line, vs. data points
"""
x = np.array(range(0, int(max(x_vector))))
y = eval(formula)
plt.plot(x, y)
plt.plot(x_vector, y_vector, "ro")
plt.show()
df=pd.read_csv('./linear_reg_exam_dataset.csv',usecols = [0,1],skiprows = [0],header=None)
d = df.values
data = np.float32(d)
dataset = pd.DataFrame({'x': data[:, 0], 'y': data[:, 1]})
# Number of epochs (times we make the model go through all the data)
n_epochs = 100
# Model parameters
W = tf.Variable([0.], tf.float32)
b = tf.Variable([0.], tf.float32)
y = dataset['y'] # define the target variable (dependent variable) as y
x = dataset['x']
msk = np.random.rand(len(df)) < 0.8
# Model input and output
x_train = x[msk].values.tolist()
y_train = y[msk].values.tolist()
# Validation data (with this we validate that the model has learned to generalize the problem)
x_val = x[~msk].values.tolist()
y_val = y[~msk].values.tolist()
# Model definition
#tf.function
def linear_model(x, W, b):
return W*x + b
# Cost function
loss = lambda: tf.reduce_sum(tf.math.squared_difference(y_train,linear_model(x_train, W, b)))
# optimizer to do the gradient descent
optimizer = tf.optimizers.SGD(0.0000000000001)
# We perform n_epochs training iterations
for i in range(n_epochs):
optimizer.minimize(loss, var_list=[W, b])
# Every 10 epochs we print the data of how W, b evolve and the amount of error there is
if i % 10 == 0 or i == n_epochs-1:
print("Epoch {}".format(i))
print("W: {}".format(W.numpy()))
print("b: {}".format(b.numpy()))
print("loss: {}".format(loss()))
# This formula represents w * x + b in string form to be able to graph it
stringfied_formula=str(W.numpy()) + "*x +" + str(b.numpy())
graph_formula_vs_data(formula=stringfied_formula, x_vector=x_train, y_vector=y_train)
print("\n")
Epoch 99
W: [0.39189553]
b: [0.00059491]
loss: 1458421628928.0
# Evaluation of the model with validation data
stringfied_formula=str(W.numpy()) + "*x +" + str(b.numpy())
graph_formula_vs_data(formula=stringfied_formula, x_vector=x_val, y_vector=y_val)
loss = lambda: tf.reduce_sum(tf.math.squared_difference(y_val,linear_model(x_val, W, b)))
print("\nValidation: ")
print("W: {}".format(W.numpy()))
print("b: {}".format(b.numpy()))
print("loss: {}".format(loss()))
graph_formula_vs_data(formula=stringfied_formula, x_vector=x_val, y_vector=y_val)
Validation:
W: [75.017586]
b: [0.11139687]
loss: 8863.4775390625