Limited shape as output in tensorflow - python

I am trying to randomly generate timeseries data using keras as follows:
import tensorflow as tf
import pandas as pd
import random
input_data = [random.uniform(10,100) for _ in range(350000)]
targets = [random.uniform(10,100) for _ in range(350000)]
dataset = tf.keras.utils.timeseries_dataset_from_array(
input_data, targets, sequence_length=10000)
for batch in dataset:
inputs, targets = batch
break
But the final shape is reduced and coming as:
<tf.Tensor: shape=(128, 10000), dtype=float32, numpy=
array([[22.922523, 44.253967, 41.80049 , ..., 60.444836, 14.977458,
17.970036],
[44.253967, 41.80049 , 34.09485 , ..., 14.977458, 17.970036,
68.27751 ],
[41.80049 , 34.09485 , 37.27845 , ..., 17.970036, 68.27751 ,
98.05703 ],
...,
[13.941159, 51.48634 , 61.248505, ..., 98.093346, 67.3885 ,
34.01148 ],
[51.48634 , 61.248505, 77.34204 , ..., 67.3885 , 34.01148 ,
27.165142],
[61.248505, 77.34204 , 54.856853, ..., 34.01148 , 27.165142,
97.55085 ]], dtype=float32)>
How can i increase size array or is there any limitation?

change the VARIABLE with the number of sample that you want (batch_size).
If you want the whole data you can make batch_size=None
dataset = tf.keras.utils.timeseries_dataset_from_array(
input_data, targets, batch_size=VARIABLE, sequence_length=10000)

Related

tf2.0: Gradient Tape returns None gradient in RNN model

In a model with an embedding layer and SimpleRNN layer, I would like to compute the partial derivative dh_t/dh_0 for each step t.
The structure of my model, including imports and data preprocessing.
Toxic comment train data available: https://www.kaggle.com/c/jigsaw-multilingual-toxic-comment-classification/data?select=jigsaw-toxic-comment-train.csv
GloVe 6B 100d embeddings available: https://nlp.stanford.edu/projects/glove/
### 1. Imports
from __future__ import print_function
import numpy as np
from numpy import array, asarray, zeros
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras import Input, Model
from keras.models import Sequential
from keras.layers.recurrent import LSTM, GRU,SimpleRNN
from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.layers.embeddings import Embedding
from tensorflow.keras.layers import BatchNormalization, PReLU
from sklearn import preprocessing, decomposition, model_selection, metrics, pipeline
from keras.preprocessing import sequence, text
from keras import backend as k
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
### 2. Text data tokenisation and GloVe-100d embeddings:
def data_pp():
train= pd.read_csv('/Users/Toxic comment data/jigsaw-toxic-comment-train.csv') train.drop(['severe_toxic','obscene','threat','insult','identity_hate'],axis=1,inplace=True)
train= train.iloc[:12000,:]
xtr, xte, ytr, yte= train_test_split(train['comment_text'].values,
train['toxic'].values,
stratify= train['toxic'].values,
random_state= 42, test_size= 0.2, shuffle= True)
# Tokenise data
tok= text.Tokenizer(num_words= None)
tok.fit_on_texts(list(xtr)+ list(xte))
input_dim= len(tok.word_index)+1
input_length= train['comment_text'].apply(lambda x: len(str(x).split())).max()
xtr_seq= tok.texts_to_sequences(xtr); xte_seq= tok.texts_to_sequences(xte)
xtr_pad= sequence.pad_sequences(xtr_seq, maxlen= input_length)
xte_pad= sequence.pad_sequences(xte_seq, maxlen= input_length)
print('Shape of tokenised training input:', xtr_pad.shape)
return xtr_pad, ytr, xte_pad, yte, input_dim, input_length, tok
xtr_pad, ytr, xte_pad, yte, input_dim, input_length, tok= data_pp()
# Word embeddings
def embed_mat(input_dim, output_dim, tok):
'''By default output_dim = 100 for GloVe 100d embeddings'''
embedding_dict=dict()
f= open('/Users/GloVe/glove.6B.100d.txt')
for line in f:
values= line.split()
word= values[0]; coefs= asarray(values[1:], dtype= 'float32')
embedding_dict[word]= coefs
f.close()
Emat= zeros((input_dim, output_dim))
for word, i in tok.word_index.items():
embedding_vector= embedding_dict.get(word)
if embedding_vector is not None:
Emat[i]= embedding_vector
print('Embedding weight matrix has shape:', Emat.shape)
return Emat
output_dim = 100
Emat= embed_mat(input_dim, output_dim, took)
### 3. Define model and compute gradients:
# You can let it run for a few steps and stop the process. Then inspect the first step h_t, h_0 and the computed dh_t/dh_0.
# For the case in my comment, you can remove the for-loop over the steps t, comment out ht, and compute tape.gradient(states, h0) instead.
batch_size = 100
inp= Input(batch_shape= (batch_size, input_length), name= 'input')
emb_out= Embedding(input_dim, output_dim, input_length= input_length,
weights= [Emat], trainable= False, name= 'embedding')(inp)
rnn= SimpleRNN(200, return_sequences= True, return_state= False, stateful= True, name= 'simpleRNN')
h0 = tf.convert_to_tensor(np.random.uniform(size= (batch_size, 200)).astype(np.float32))
rnn_allstates= rnn(emb_out, initial_state=h0)
model_rnn = Model(inputs=inp, outputs= rnn_allstates, name= 'model_rnn')
model_rnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
ds = tf.data.Dataset.from_tensor_slices((xtr_pad[:100], ytr[:100])).batch(100)
embedding_layer = model_rnn.layers[1]
rnn_layer = model_rnn.layers[2]
grads_allsteps= []
for b, (x_batch_train, y_batch_train) in enumerate(ds):
for t in range(input_length):
with tf.GradientTape() as tape:
tape.watch(h0)
et = embedding_layer(x_batch_train)
states = rnn_layer(et, initial_state= h0) # (100, 1403, 200)
ht = states[:,t,:]
grad_t= tape.gradient(ht, h0) # (100, 200)
print('Computed gradient dht/dh0 at step ', t+1, 'in batch', b+1)
grads_allsteps.append(grad_t)
At each step t, h_t has shape (100,200), h_0 has shape (100,200). However tape.gradient(ht, h0) returns None for every t. Below is the result of the first step:
for t in range(1):
with tf.GradientTape() as tape:
tape.watch(h0)
et = embedding_layer(x_batch_train)
#tape.watch(et)
states = rnn_layer(et, initial_state= h0) # (100, 1403, 200)
ht = states[:,t,:]
print(ht)
print(h0)
grad_t = tape.gradient(ht, h0)
tf.print(grad_t)
>>
# h_t:
tf.Tensor(
[[ 0.25634336 0.5259362 0.60045886 ... -0.4978792 0.62755316
0.09803997]
[ 0.58387524 0.26037565 0.5646103 ... 0.31233114 0.4853201
0.10877549]
[ 0.17190906 0.68681747 -0.32054633 ... -0.6139967 0.48944488
0.06301598]
...
[ 0.1985917 -0.11821499 -0.47709295 ... -0.05718012 0.16089934
0.20585683]
[ 0.73872745 0.503326 0.25224414 ... -0.5771631 0.03748894
0.09212588]
[-0.6597108 -0.43926442 -0.23546427 ... 0.26760277 0.28221437
-0.4039318 ]], shape=(100, 200), dtype=float32)
# h_0:
tf.Tensor(
[[0.51580787 0.51664346 0.70773274 ... 0.45973232 0.7760376 0.48297063]
[0.61048764 0.26038417 0.60392565 ... 0.7426153 0.15507504 0.57494944]
[0.11859739 0.33591187 0.68375146 ... 0.59409297 0.5302879 0.28876984]
...
[0.12401487 0.39376178 0.9850304 ... 0.21582918 0.9592233 0.5257605 ]
[0.9401199 0.2157638 0.6445949 ... 0.36316434 0.5799403 0.3749675 ]
[0.37230062 0.18162128 0.0739954 ... 0.21624395 0.66291 0.7807376 ]], shape=(100, 200), dtype=float32)
# dh_t/dh_0:
None
There seems to be some difficulty for Gradient tape to watch this h_0, and perform gradient computation. I have successfully used GradientTape watch the inputs e_t to the RNN layer, and computed the gradients dh_t/de_t, but this does not really provide much information about the quality of model fitting.
How can I use it to watch the fixed-time quantity h_0, and thus compute the gradient dh_t/dh_0? Thanks in advance for any help.
Reproducible test case:
### 1. Imports
from __future__ import print_function
import numpy as np
from numpy import array, asarray, zeros
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras import Input, Model
from keras.models import Sequential
from keras.layers.recurrent import LSTM, GRU,SimpleRNN
from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.layers.embeddings import Embedding
from tensorflow.keras.layers import BatchNormalization, PReLU
from sklearn import preprocessing, decomposition, model_selection, metrics, pipeline
from keras.preprocessing import sequence, text
from keras import backend as k
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
### 2. Simulated data and gradient computation:
batch_size = 100; input_length = 5
xtr_pad = tf.random.uniform((batch_size, input_length), maxval = 500, dtype=tf.int32)
ytr = tf.random.normal((batch_size, input_length, 200))
inp= Input(batch_shape= (batch_size, input_length), name= 'input')
emb_out= Embedding(500, 100, input_length= input_length, trainable= False, name= 'embedding')(inp)
rnn= SimpleRNN(200, return_sequences= True, return_state= False, stateful= True, name= 'simpleRNN')
h0 = tf.convert_to_tensor(np.random.uniform(size= (batch_size, 200)).astype(np.float32))
rnn_allstates= rnn(emb_out, initial_state=h0)
model_rnn = Model(inputs=inp, outputs= rnn_allstates, name= 'model_rnn')
model_rnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
ds = tf.data.Dataset.from_tensor_slices((xtr_pad, ytr)).batch(100)
embedding_layer = model_rnn.layers[1]
rnn_layer = model_rnn.layers[2]
grads_allsteps= []
for b, (x_batch_train, y_batch_train) in enumerate(ds):
for t in range(input_length):
with tf.GradientTape() as tape:
tape.watch(h0)
states= model_rnn(x_batch_train)
ht = states[:,t,:]
grad_t= tape.gradient(ht, h0)
print('Computed gradient dht/dh0 at step ', t+1, 'in batch', b+1)
grads_allsteps.append(grad_t)
Something interesting: the first-step gradient is computed and looks fine. The rest are Nones.
grads_allsteps
>>
[<tf.Tensor: shape=(100, 200), dtype=float32, numpy=
array([[ 1.2307187 , -1.0343404 , 0.52859926, ..., -0.09879799,
-1.1407609 , -0.7241671 ],
[ 1.142821 , -1.312029 , 0.37148148, ..., 0.2300478 ,
-1.1440411 , -0.36673146],
[ 1.2778691 , -1.2225235 , 0.69951147, ..., 0.17701946,
-1.2816343 , -0.52648413],
...,
[ 1.1717036 , -1.2444504 , 0.5874837 , ..., -0.13161334,
-1.3752006 , -0.376719 ],
[ 1.1333262 , -1.0013355 , 0.3363382 , ..., -0.22350994,
-1.299541 , -0.5073889 ],
[ 1.18489 , -0.90809333, 0.55045474, ..., -0.10550319,
-1.0866506 , -0.58325446]], dtype=float32)>, None, None, None, None]
You could maybe try using tf.gradients. Also rather use tf.Variable for h0:
# Your imports
#-------
### 2. Simulated data and gradient computation:
batch_size = 100; input_length = 5
xtr_pad = tf.random.uniform((batch_size, input_length), maxval = 500, dtype=tf.int32)
ytr = tf.random.normal((batch_size, input_length, 200))
inp= Input(batch_shape= (batch_size, input_length), name= 'input')
emb_out= Embedding(500, 100, input_length= input_length, trainable= False, name= 'embedding')(inp)
rnn= SimpleRNN(200, return_sequences= True, return_state= False, stateful= True, name= 'simpleRNN')
h0 = tf.Variable(tf.random.uniform((batch_size, 200)))
rnn_allstates= rnn(emb_out, initial_state=h0)
model_rnn = Model(inputs=inp, outputs= rnn_allstates, name= 'model_rnn')
model_rnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
ds = tf.data.Dataset.from_tensor_slices((xtr_pad, ytr)).batch(100)
embedding_layer = model_rnn.layers[1]
rnn_layer = model_rnn.layers[2]
#tf.function
def calculate_t_gradients(t, x, h0):
return tf.gradients(model_rnn(x)[:,t,:], h0)
grads_allsteps= []
for b, (x_batch_train, y_batch_train) in enumerate(ds):
for t in range(input_length):
grads_allsteps.append(calculate_t_gradients(t, x_batch_train, h0))
print(grads_allsteps)
[[<tf.Tensor: shape=(100, 200), dtype=float32, numpy=
array([[ 1.2034059 , -0.46448404, 0.6272926 , ..., -0.40906236,
0.07618493, 0.6338958 ],
[ 1.2781916 , -0.20411322, 0.6174417 , ..., -0.31636393,
-0.23417974, 0.67499626],
[ 1.113218 , -0.65086263, 0.63425934, ..., -0.66614366,
-0.07726163, 0.53647137],
...,
[ 1.3399608 , -0.54088974, 0.6213518 , ..., 0.00831087,
-0.14397278, 0.2614633 ],
[ 1.213171 , -0.42787278, 0.60535026, ..., -0.56198204,
-0.09142771, 0.6212783 ],
[ 1.1901733 , -0.5743524 , 0.36872283, ..., -0.42522985,
-0.0861398 , 0.495057 ]], dtype=float32)>], [<tf.Tensor: shape=(100, 200), dtype=float32, numpy=
array([[ 0.3487598 , 1.2738569 , -0.48500937, ..., 0.6011117 ,
-0.20381093, 0.45596513],
[ 0.37931004, 1.2778724 , -0.8682532 , ..., 0.8170228 ,
0.1456329 , 0.23715591],
[ 0.5984771 , 0.92434835, -0.8879645 , ..., 0.38756457,
-0.17436962, 0.47174054],
...,
[ 0.61081064, 0.99631476, -0.5104377 , ..., 0.5042721 ,
0.02844866, 0.34626445],
[ 0.7126102 , 1.0205276 , -0.60710275, ..., 0.49418694,
-0.16092762, 0.41363668],
[ 0.8581749 , 1.1259711 , -0.5824491 , ..., 0.45388597,
-0.16205123, 0.72434616]], dtype=float32)>], [<tf.Tensor: shape=(100, 200), dtype=float32, numpy=
array([[ 3.8507193e-01, 1.2925258e+00, 1.2027258e+00, ...,
3.2430276e-01, 2.2319333e-01, -2.5218868e-01],
[ 5.9262186e-01, 1.4497797e+00, 1.2479483e+00, ...,
4.6175608e-01, 2.5466472e-01, -2.4279505e-01],
[ 2.5734475e-01, 1.4562432e+00, 1.1020679e+00, ...,
6.6081107e-01, 1.9841105e-01, -2.5595558e-01],
...,
[ 5.1541841e-01, 1.6206543e+00, 9.6205616e-01, ...,
7.2725344e-01, 2.5501373e-01, -7.7709556e-04],
[ 4.4518453e-01, 1.6381552e+00, 1.0112666e+00, ...,
5.5238277e-01, 2.4137528e-01, -2.6242572e-01],
[ 6.6721851e-01, 1.5826726e+00, 1.1282607e+00, ...,
3.2301426e-01, 2.2295776e-01, 1.1724380e-01]], dtype=float32)>], [<tf.Tensor: shape=(100, 200), dtype=float32, numpy=
array([[ 0.14262576, 0.578709 , 0.1149607 , ..., 0.1229499 ,
-0.42344815, 0.8837458 ],
[-0.09711604, 0.04376438, -0.11737494, ..., 0.00389774,
0.01737173, 0.17246482],
[ 0.24414796, 0.30101255, -0.12234146, ..., -0.04850931,
-0.31790918, 0.21326394],
...,
[-0.20562285, 0.21999156, 0.02703794, ..., -0.03547464,
-0.59052145, 0.04695258],
[ 0.2087476 , 0.46558812, -0.18172565, ..., -0.01167884,
-0.20868361, 0.09055485],
[-0.22442941, 0.16119067, 0.10854454, ..., 0.14752978,
-0.32307786, 0.343314 ]], dtype=float32)>], [<tf.Tensor: shape=(100, 200), dtype=float32, numpy=
array([[-1.1414615 , 0.37376842, -1.0230722 , ..., 0.60619426,
0.22550163, -0.6948315 ],
[-1.0124328 , 0.27892357, -0.96915233, ..., 0.7048603 ,
-0.15284726, -0.6734605 ],
[-0.8542529 , 0.25970122, -0.90076745, ..., 0.8825682 ,
-0.02474228, -0.55014515],
...,
[-0.89430666, 0.68327624, -1.0109956 , ..., 0.31722566,
-0.23703958, -0.6766514 ],
[-0.8633691 , 0.28742114, -0.9896866 , ..., 0.98315084,
0.0115847 , -0.55474746],
[-0.7229766 , 0.62417865, -1.2342371 , ..., 0.85149145,
-0.04468453, -0.60606724]], dtype=float32)>]]
You need to make sure the stateful parameter of the SimpleRNN is False, because according to the docs:
If True, the last state for each sample at index i in a batch will be
used as initial state for the sample of index i in the following
batch.
So, your code will also calculate gradients for each timestep if you set stateful to False.
Found a way to implement Gradient Tape repeatedly. del tape after saving the statistic into a list will reduce the burden on GPU
emb_layer= model_rnn.layers[1]; rnn_layer= model_rnn.layers[2]
n_steps = 40
dhtdh0_rnn= []
for t in range(n_steps):
with tf.GradientTape() as tape:
tape.watch(h0)
et= emb_layer(xtr_pad[:100])
ht_all= rnn_layer(et, initial_state= [h0])
ht= ht_all[:,t,:,]
dhtdh0_t= tape.gradient(ht, h0)
grad_agg= tf.reduce_mean(abs(dhtdh0_t), [0,1])
print('step', t+1, 'done')
dhtdh0_rnn.append(np.log(grad_agg))
del tape

Why there are so many array of weights?

I am working on python with keras. I learned in my theory study that in a neural network the weights are only between the input layer and a hidden layer or between hidden layers.
I wrote this code, where I added two layers:
NN.add(Dense(4, input_shape=array_input.shape, activation='relu', name="Layer", kernel_constraint=changeWeight()))
NN.add(Dense(4, activation='relu', name="Output"))
NN.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.3), metrics=['accuracy'])
print(NN.summary())
a = NN.fit(array_input, array_input, epochs=100)
for lay in NN.layers:
print(lay.name)
print(lay.get_weights())
I think that one is the hidden layer (the one renamed "Layer") and the other is the output layer. The problem is that if i printed "lay.get_weights()" there are two arrays of weights, one for each layer. Like this:
[array([[-1.5516974 , -1.600516 , -0. , 0. ],
[-0. , -2.1766946 , 0.32734624, -0. ],
[-0. , -0. , 0.32156652, -0.812184 ],
[-0. , -0. , -0. , -0.7288372 ]],
dtype=float32), array([-1.8015273, -1.801546 , -0.1462403, 0. ], dtype=float32)]
Output
[array([[-1.5045888 , -0.14155084, -0.29977936, -0.0492779 ],
[-1.2379107 , -0.44411597, -0.41499865, -0.2560569 ],
[ 1.2397875 , -0.3541401 , 1.2223543 , 1.5617256 ],
[ 0.18388063, 0.44298917, -0.2201969 , -0.1165269 ]],
dtype=float32), array([-0.82720596, 0. , 1.1942271 , 1.7084894 ], dtype=float32)]
Can someone explain to me where is the problem. I don't understand keras API, do I?
https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#get_weights
get_weights() returns the weight and value of the bias in an array.
Each of your inputs is connected to the first layers. So the weight matrix has a shape of (input.shape, number of neurons in the current layer) and the bias vector has a shape of (number of neurons in the current layer, ).
Therefore, without knowing what your input array contains, I know that this array has a shape of (4,).
For the second layer, the same process is repeated
weight : (number of neurons of the last layer, number of neurons of the current layer)
bias (number of neurone of the current layer,)
Try this example:
NN = Sequential()
NN.add(Dense(2, input_shape=(3,), activation='relu', name="Layer"))
NN.add(Dense(4, activation='relu', name="Output"))
for lay in NN.layers:
print(lay.name)
print(lay.get_weights())
Output:
Layer
[array([[-0.674668 , -0.34347552],
[ 0.63090587, 0.8558588 ],
[-0.5063792 , -0.23311883]], dtype=float32), array([0., 0.], dtype=float32)]
Output
[array([[-0.07787323, 0.22444701, 0.52729607, 0.07616615],
[-0.5380094 , -0.3146367 , -0.73177123, -0.9248886 ]],
dtype=float32), array([0., 0., 0., 0.], dtype=float32)]
Graphical representation :

Keras CNN: Error when checking input: expected conv1d_46_input to have 3 dimensions, but got array with shape (3780, 6)

I created a Conv1D model with Keras and prompted me with a ValueError during training. I don't quite understand where the model is having problems.
I tried to modify the data shape to (3780,6,1) but still prompted me conv1d_46_input to have 3 dimensions, but got array with shape (3780, 6)
def baseline_model():
model = models.Sequential()
model.add(layers.Conv1D(1, 5, input_shape=(6,1), activation="tanh"))
model.add(layers.MaxPool1D(pool_size=2))
model.add(layers.core.Flatten())
model.add(layers.Dense(2))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
return model
# df is pandas DataFrame
X = np.array(df[['rp', 'x', 'y', 'class', 'at', 'dt']], dtype=np.float64)
y = np.array(df[['ap', 'dp']], dtype=np.float64)
# X = np.expand_dims(X, -1)
# y = np.expand_dims(y, -1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)
mode = baseline_model()
history = mode.fit(X_train, y_train, epochs=200, batch_size=32, validation_data=(X_test, y_test))
X=np.array([[-69.3078, 0. , 1. , 1. , 90. , 90. ],
[-69.4585, 0. , 2. , 1. , 90. , 90. ],
[-69.4776, 0. , 3. , 1. , 90. , 90. ],
...,
[-65.8291, 35. , 33. , 1. , 90. , 90. ],
[-71.0137, 35. , 34. , 1. , 90. , 90. ],
[-67.2308, 35. , 35. , 1. , 90. , 90. ]])
y=np.array([[ 15.4463, -17.5046],
[ 15.4777, -17.536 ],
[ 15.5092, -17.5675],
...,
[ 15.8361, -17.8944],
[ 15.8809, -17.9392],
[ 15.9259, -17.9842]])
# X,y type is numpy array
# X shape is (4725, 6) ,y shape is (4725, 2)
# X[0] shape is (6,) , y[0] shape is (2,)
ValueError: Error when checking input: expected conv1d_46_input to have 3 dimensions, but got array with shape (3780, 6)
Your first Conv layer is expecting a data Dim that looks like (Batch_size, dim1, dim2)
Here you have a X shape of (3780, 6), so 3780 arrays of dim 6, but you need to have 3780 arrays of dim (6, 1).
In order to do that, you can simply expand the dim of your X, by doing that :
X = np.expand_dims(X, -1)
This will add the extra dimension that you need.

Keras model predict output is an array with values between 0 and 1

I'm building an autoencoder network for finding outliers in text.
I first built an numpy array with my input represented as ascii texts, but I can't get them back.
My input looks like this:
fab_shadow_black.9.png
fab_shadow_dark.9.png
fab_shadow_light.9.png
fastscroller_handle_normal.xml
fastscroller_handle_pressed.xml
folder_fab.png
ic_account_circle_grey_24dp.xml
ic_action_cancel_light.png
My whole code is as follows:
import sys
from keras import Input, Model
import matplotlib.pyplot as plt
from keras.layers import Dense
import numpy as np
from pprint import pprint
from google.colab import drive
drive.mount('/content/drive')
with open('/content/drive/My Drive/Colab Notebooks/drawables.txt', 'r') as arquivo:
dados = arquivo.read().splitlines()
def tamanho_maior_elemento(lista):
maior = 0
for elemento in lista:
tamanho_elemento = len(elemento)
if tamanho_elemento > maior:
maior = tamanho_elemento
return maior
def texto_para_ascii(lista, tamanho_maior_elemento):
lista_ascii = list()
for elemento in lista:
elemento_ascii_lista = list()
elemento_com_zeros = elemento.ljust(tamanho_maior_elemento, "0")
for caractere in elemento_com_zeros:
elemento_ascii_lista.append(ord(caractere))
lista_ascii.append(elemento_ascii_lista)
return lista_ascii
def ascii_para_texto(lista):
lista_ascii = list()
for elemento in lista:
elemento_ascii_lista = list()
for caractere in elemento:
elemento_ascii_lista.append(chr(caractere))
elemento_ascii_string = "".join(elemento_ascii_lista)
lista_ascii.append(elemento_ascii_string)
return lista_ascii
tamanho_maior_elemento = tamanho_maior_elemento(dados)
tamanho_lista = len(dados)
dados_ascii = texto_para_ascii(dados, tamanho_maior_elemento)
np_dados_ascii = np.array(dados_ascii)
tamanho_comprimido = int(tamanho/5)
dados_input = Input(shape=(tamanho_maior_elemento,))
hidden = Dense(tamanho_comprimido, activation='relu')(dados_input)
output = Dense(tamanho_maior_elemento, activation='relu')(hidden)
resultado = Dense(tamanho_maior_elemento, activation='sigmoid')(output)
autoencoder = Model(input=dados_input, output=resultado)
autoencoder.compile(optimizer='adam', loss='mse')
history = autoencoder.fit(np_dados_ascii, np_dados_ascii, epochs=10)
plt.plot(history.history["loss"])
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.show()
saida_predict = autoencoder.predict(np_dados_ascii)
saida_lista = saida_predict.tolist()
pprint(saida_predict)
pprint(saida_lista)
My input is a numpy array with each string represented as ascii number right-padded by zeroes.
The problem is that the output from predict is a lot of values between zero and one that I can't convert back to text.
array([[1. , 0.9999999 , 1. , ..., 1. , 1. ,
1. ],
[0.99992466, 1. , 1. , ..., 1. , 1. ,
1. ],
[1. , 0.99999994, 1. , ..., 1. , 1. ,
1. ],
...,
[0.9999998 , 0.9999999 , 1. , ..., 1. , 1. ,
0.9999999 ],
[1. , 0.9999998 , 1. , ..., 1. , 1. ,
1. ],
[0.9999999 , 0.99999994, 1. , ..., 1. , 1. ,
1. ]], dtype=float32)
I should be getting an array containing the ascii numbers just like I put in the input, what am I getting wrong?
In your code,
resultado = Dense(tamanho_maior_elemento, activation='sigmoid')(output)
You have used sigmoid activation that's why you have prediction in range 0 to 1. Try to change it with linear activation.
resultado = Dense(tamanho_maior_elemento)(output)
And for linear activation, you have no need to assign anything in activation because here, it mentioned that default it is linear activation.

Difference between layer.weights and layer.get_weights in keras

I have a Keras model. If I get one layer of my model and then call sess.run(layer.weights[0]) and layer.get_weights()[0] I obtain different results.
>>> layer.get_weights()[0]
array([[ 0.05829928, -0.01113867, 0.07874133, ..., -0.0832592 ,
-0.02382897, 0.02150916],
[-0.06571103, 0.06946308, 0.01161512, ..., -0.01296931,
-0.01047098, 0.08497558],
[-0.06404617, 0.01107556, 0.07584237, ..., -0.04085402,
-0.00671811, -0.04153195],
...,
[-0.0100356 , -0.05815255, 0.05809011, ..., 0.0594966 ,
-0.0635704 , -0.04289378],
[-0.01073305, -0.0400929 , -0.01252703, ..., -0.00287437,
0.08347356, 0.04667132],
[-0.03608105, 0.05812681, -0.0146297 , ..., -0.0673831 ,
-0.00531388, -0.02482456]], dtype=float32)
>>> sess.run(layer.weights[0])
array([[-0.03271605, 0.02013551, 0.05350242, ..., 0.06657993,
0.08541366, -0.01483627],
[-0.02411069, -0.03852968, 0.02710939, ..., -0.00030499,
0.07864482, 0.04452118],
[-0.00293329, -0.01251988, -0.01190369, ..., 0.06554652,
-0.01539454, 0.08236458],
...,
[ 0.04456077, -0.00256501, 0.01785846, ..., -0.03573522,
0.00770979, -0.05544731],
[-0.00415177, -0.01014608, -0.0684113 , ..., -0.05186068,
0.04402267, 0.03113024],
[-0.05103095, -0.06083905, -0.0098877 , ..., -0.00747809,
-0.035869 , -0.03331041]], dtype=float32)
Why? I found this other question on Stackoverflow but I don't really understand the given answer.
layer.weights is a tensor variable, so it has to be evaluted in a session.
If you use the session from keras.backend then you should get the same values.
from keras import backend as K
K.get_session().run(layer.weights[0])
which is essentially what Keras does inside get_weights() method.
https://github.com/keras-team/keras/blob/ad578c4c19444af9d1f0e0d51a8283eb0db1a264/keras/engine/base_layer.py#L1061
https://github.com/keras-team/keras/blob/ad578c4c19444af9d1f0e0d51a8283eb0db1a264/keras/backend/tensorflow_backend.py#L2652
In the other linked question, the user got different results because a new session was created and all the variables initialized with init_op.

Categories

Resources