I am running a keras script (no direct call to theano in my script) and I get the following error:
TypeError: ('An update must have the same type as the original shared
variable (shared_var=<TensorType(float32, matrix)>,
shared_var.type=TensorType(float32, matrix),
update_val=Elemwise{add,no_inplace}.0,
update_val.type=TensorType(float64, matrix)).',
'If the difference is related to the broadcast pattern,
you can call the tensor.unbroadcast(var, axis_to_unbroadcast[, ...])
function to remove broadcastable dimensions.')
I have seen the error from folks running theano directly, but not through keras. Not sure what I should do, since I am not dealing with tensors directly.
the problem was that there is a change in keras version (I am currently using keras 0.3.2 with theano 0.8.0) and what used to be fine does not work well with he new keras version.
The following was the original code, and see the fix below.
from keras.models import Sequential
import keras.optimizers
from keras.layers.core import Dense, Dropout
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU
from keras.layers.core import Activation
from keras.optimizers import SGD, Adam
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, RegressorMixin
class NnRegression(BaseEstimator, RegressorMixin):
def __init__(self, apply_standart_scaling=True,
dropx=[0.2, 0.5, 0.5], nb_neuronx=[50, 30], nb_epoch=105, validation_split=0.,
verbose=1):
self.apply_standart_scaling = apply_standart_scaling
self.dropx = dropx
self.nb_neuronx = nb_neuronx
self.nb_epoch = nb_epoch
self.validation_split = validation_split
self.verbose = verbose
def fit(self, X, y):
nb_features = X.shape[1]
self.standart_scaling = StandardScaler() if self.apply_standart_scaling else None
if self.standart_scaling:
X = self.standart_scaling.fit_transform(X)
model = Sequential()
model.add(Dropout(input_shape = (nb_features,),p= self.dropx[0]))
model.add(Dense(output_dim = self.nb_neuronx[0], init='glorot_uniform'))
model.add(PReLU())
model.add(BatchNormalization(self.nb_neuronx[0],)))
model.add(Dropout(self.dropx[1]))
model.add(Dense(self.nb_neuronx[1], init='glorot_uniform'))
model.add(PReLU())
model.add(BatchNormalization(self.nb_neuronx[0],)))
model.add(Dropout(self.dropx[2]))
model.add(Dense(1, init='glorot_uniform'))
nn_verbose = 1 if self.verbose>0 else 0
optz = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
model.compile(optimizer=Adam(),loss='mse')
model.fit(X, y, batch_size=16,
nb_epoch=self.nb_epoch, validation_split=self.validation_split, verbose=nn_verbose)
self.model = model
def predict(self, X):
if self.standart_scaling:
X = self.standart_scaling.transform(X)
return self.model.predict_proba(X, verbose=0)
well, it turns out that the problem is this single line of code:
model.add(BatchNormalization(self.nb_neuronx[0],)))
It should actually be:
model.add(BatchNormalization())
because the number of neurons has no business within the normalization layer (however this did not bother in a previous keras version).
This apparently causes theano to generate new weights that are not float32 but float64, and that triggers the message above.
Related
I want to learn how to prepare data for training samples in python. I found a simple example of a neural network that predicts the stock price. At the moment I am not interested in the accuracy of training the network, but I am interested in how to take any data and prepare it for submission to the neural network.
As an example, I took these stocks over the past 5 years. As planned, the neural network accepts data for the last 50 days as input and predicts the course for the next 5 days. To do this, I read the .csv file, processed the data in such a way that after the transformation I got two dataframes, the first one is responsible for the input data, and the second for the output.
The problem is, no matter what I do, I keep getting errors and so I cannot complete the training. What am I doing wrong? The code is shown below:
import matplotlib.pylab as plt
import torch
import random
import numpy as np
import pandas as pd
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import normalize
import pandas_profiling as pprf
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, BatchNormalization, LeakyReLU
from tensorflow.keras.layers import Activation, Input, MaxPooling1D, Dropout
from tensorflow.keras.layers import AveragePooling1D, Conv1D, Flatten
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.utils import plot_model
from IPython.display import display, Image
random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
data = pd.read_csv('F:\\YNDX_ME.csv')[::]
data = data.drop('Date',axis=1)
data = data.drop('Adj Close',axis=1)
data = data.drop(np.where(data['Volume'] == 0)[0])
data = data.reset_index(drop=True)
#profiler = pprf.ProfileReport(data)
#profiler.to_file(r'F:\profiling.html')
days_edu = 50
days_pred = 5
df_edu_list = []
for i in range(len(data.index)-days_edu-days_pred+1):
df_temp = []
for j in range(days_edu):
df_temp.extend(data.loc[i+j,:].tolist())
df_edu_list.append(df_temp)
df_edu_out_list = []
for i in range(len(data.index)-days_edu-days_pred+1):
df_temp = []
for j in range(5):
df_temp.extend(data.loc[i+j+days_edu,:].tolist())
df_edu_out_list.append(df_temp)
df_edu_train = pd.DataFrame(df_edu_list[:int(len(df_edu_list)*0.8)])
df_edu_val = pd.DataFrame(df_edu_list[int(len(df_edu_list)*0.8):])
df_edu_train_out = pd.DataFrame(df_edu_out_list[:int(len(df_edu_out_list)*0.8)])
df_edu_val_out = pd.DataFrame(df_edu_out_list[int(len(df_edu_out_list)*0.8):])
df_edu_train = normalize(df_edu_train.values)
df_edu_val = normalize(df_edu_val.values)
df_edu_train_out = normalize(df_edu_train_out.values)
df_edu_val_out = normalize(df_edu_val_out.values)
df_edu_train = np.expand_dims(df_edu_train,axis=0)
df_edu_train_out = np.expand_dims(df_edu_train_out,axis=0)
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=5, padding="same", strides=1, input_shape= (959,250),data_format='channels_first'))
model.add(Conv1D(32, 5))
model.add(Dropout(0.3))
model.add(Conv1D(16, 5))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(250, activation='relu'))
model.add(Dense(25, activation=None))
optimizer = Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, amsgrad=False)
model.compile(optimizer=optimizer, loss='mae', metrics=['accuracy'])
EPOCHS = 1000
model.fit(df_edu_train, df_edu_train_out, epochs=EPOCHS)
Error:
InvalidArgumentError: Conv2DCustomBackpropFilterOp only supports NHWC.
[[node gradient_tape/sequential/conv1d/Conv1D/Conv2DBackpropFilter
(defined at C:\Users\nick0\anaconda3\lib\site-packages\keras\optimizer_v2\optimizer_v2.py:464)
]] [Op:__inference_train_function_1046]
Errors may have originated from an input operation.
Input Source operations connected to node gradient_tape/sequential/conv1d/Conv1D/Conv2DBackpropFilter:
In[0] sequential/conv1d/Conv1D/ExpandDims (defined at C:\Users\nick0\anaconda3\lib\site-packages\keras\layers\convolutional.py:231)
In[1] gradient_tape/sequential/conv1d/Conv1D/ShapeN:
In[2] gradient_tape/sequential/conv1d/Conv1D/Reshape:
Update:
Changed data_format = 'channels_first' to data_format = 'channels_last'. The training began, but as I understood, the training took place on the entire training set, i.e. the neural network just thought that there was one example and it was trained on it specifically. How to make the neural network take each line in turn? is each line essentially a separate example?
I've keras model defined as follow
class ConvLayer(Layer) :
def __init__(self, nf, ks=3, s=2, **kwargs):
self.nf = nf
self.grelu = GeneralReLU(leak=0.01)
self.conv = (Conv2D(filters = nf,
kernel_size = ks,
strides = s,
padding = "same",
use_bias = False,
activation = "linear"))
super(ConvLayer, self).__init__(**kwargs)
def rsub(self): return -self.grelu.sub
def set_sub(self, v): self.grelu.sub = -v
def conv_weights(self): return self.conv.weight[0]
def build(self, input_shape):
# No weight to train.
super(ConvLayer, self).build(input_shape) # Be sure to call this at the end
def compute_output_shape(self, input_shape):
output_shape = (input_shape[0],
input_shape[1]/2,
input_shape[2]/2,
self.nf)
return output_shape
def call(self, x):
return self.grelu(self.conv(x))
def __repr__(self):
return f'ConvLayer(nf={self.nf}, activation={self.grelu})'
class ConvModel(tf.keras.Model):
def __init__(self, nfs, input_shape, output_shape, use_bn=False, use_dp=False):
super(ConvModel, self).__init__(name='mlp')
self.use_bn = use_bn
self.use_dp = use_dp
self.num_classes = num_classes
# backbone layers
self.convs = [ConvLayer(nfs[0], s=1, input_shape=input_shape)]
self.convs += [ConvLayer(nf) for nf in nfs[1:]]
# classification layers
self.convs.append(AveragePooling2D())
self.convs.append(Dense(output_shape, activation='softmax'))
def call(self, inputs):
for layer in self.convs: inputs = layer(inputs)
return inputs
I'm able to compile this model without any issues
>>> model.compile(optimizer=tf.keras.optimizers.Adam(lr=lr),
loss='categorical_crossentropy',
metrics=['accuracy'])
But when I query the summary for this model, I see this error
>>> model = ConvModel(nfs, input_shape=(32, 32, 3), output_shape=num_classes)
>>> model.summary()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-220-5f15418b3570> in <module>()
----> 1 model.summary()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/network.py in summary(self, line_length, positions, print_fn)
1575 """
1576 if not self.built:
-> 1577 raise ValueError('This model has not yet been built. '
1578 'Build the model first by calling `build()` or calling '
1579 '`fit()` with some data, or specify '
ValueError: This model has not yet been built. Build the model first by calling `build()` or calling `fit()` with some data, or specify an `input_shape` argument in the first layer(s) for automatic build.
I'm providing input_shape for the first layer of my model, why is throwing this error?
The error says what to do:
This model has not yet been built. Build the model first by calling build()
model.build(input_shape) # `input_shape` is the shape of the input data
# e.g. input_shape = (None, 32, 32, 3)
model.summary()
There is a very big difference between keras subclassed model and other keras models (Sequential and Functional).
Sequential models and Functional models are datastructures that represent a DAG of layers. In simple words, Functional or Sequential model are static graphs of layers built by stacking one on top of each other like LEGO. So when you provide input_shape to first layer, these (Functional and Sequential) models can infer shape of all other layers and build a model. Then you can print input/output shapes using model.summary().
On the other hand, subclassed model is defined via the body (a call method) of Python code. For subclassed model, there is no graph of layers here. We cannot know how layers are connected to each other (because that's defined in the body of call, not as an explicit data structure), so we cannot infer input / output shapes. So for a subclass model, the input/output shape is unknown to us until it is first tested with proper data. In the compile() method, we will do a deferred compile and wait for a proper data. In order for it to infer shape of intermediate layers, we need to run with a proper data and then use model.summary(). Without running the model with a data, it will throw an error as you noticed. Please check GitHub gist for complete code.
The following is an example from Tensorflow website.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class ThreeLayerMLP(keras.Model):
def __init__(self, name=None):
super(ThreeLayerMLP, self).__init__(name=name)
self.dense_1 = layers.Dense(64, activation='relu', name='dense_1')
self.dense_2 = layers.Dense(64, activation='relu', name='dense_2')
self.pred_layer = layers.Dense(10, name='predictions')
def call(self, inputs):
x = self.dense_1(inputs)
x = self.dense_2(x)
return self.pred_layer(x)
def get_model():
return ThreeLayerMLP(name='3_layer_mlp')
model = get_model()
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float32') / 255
x_test = x_test.reshape(10000, 784).astype('float32') / 255
model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
optimizer=keras.optimizers.RMSprop())
model.summary() # This will throw an error as follows
# ValueError: This model has not yet been built. Build the model first by calling `build()` or calling `fit()` with some data, or specify an `input_shape` argument in the first layer(s) for automatic build.
# Need to run with real data to infer shape of different layers
history = model.fit(x_train, y_train,
batch_size=64,
epochs=1)
model.summary()
Thanks!
Another method is to add the attribute input_shape() like this:
model = Sequential()
model.add(Bidirectional(LSTM(n_hidden,return_sequences=False, dropout=0.25,
recurrent_dropout=0.1),input_shape=(n_steps,dim_input)))
# X is a train dataset with features excluding a target variable
input_shape = X.shape
model.build(input_shape)
model.summary()
Make sure you create your model properly. A small typo mistake like the following code may also cause a problem:
model = Model(some-input, some-output, "model-name")
while the correct code should be:
model = Model(some-input, some-output, name="model-name")
If your Tensorflow, Keras version is 2.5.0 then just add Tensorflow when you import Keras package
Not this:
from tensorflow import keras
from keras.models import Sequential
import tensorflow as tf
Like this:
from tensorflow import keras
from tensorflow.keras.models import Sequential
import tensorflow as tf
Version issues of your Tensorflow, Keras, can be the reason for this.
Same problem I encountered during training for the LSTM model for regression.
Error:
ValueError: This model has not yet been built. Build the model first
by calling build() or by calling the model on a batch of data.
Earlier:
from tensorflow.keras.models import Sequential
from tensorflow.python.keras.models import Sequential
Corrected:
from keras.models import Sequential
I was also facing same error, so I have removed model.summary(). Then issue is resolved. As it arises if model of summary is defined before the model is built.
Here is the LINK for description which states that
Raises:
ValueError: if `summary()` is called before the model is built.**
My tensorflow is version 2.4.1
i imported modules like this
### import modules
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPool2D, BatchNormalization, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import scipy
%matplotlib inline
Then i try to create simple compile model like this
def compile_model(model):
# YOUR CODE HERE
model.compile(loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
So my testing function is like this
test_model = Sequential([Dense(100),
Dense(2, activation='softmax')])
compile_model(test_model)
assert isinstance(test_model.optimizer, tf.keras.optimizers.Adam)
assert hasattr(test_model, 'loss')
assert test_model.loss == 'sparse_categorical_crossentropy'
assert ['accuracy'] == test_model._compile_metrics
del test_model
After i ran above code blocks i got this error
AttributeError: 'Sequential' object has no attribute '_compile_metrics'
But i can't seems find any actual document about _compile_metrics
Am i missing something or is it about tensorflow version?
Please help.
Thanks!
Basically, it is about the version, so the sample that i got suppose to run on Tensorflow 2.0.0 but i ran it on 2.4.0 so if i ran the code in 2.0.0 then it works fine.
Update
The answer by OP will only work in TF 2.0, 2.1 only. From TF 2.2 - 2.5, it won't work.
To get the metric name, like accuracy you have to run the model at least one epoch or on a single batch.
def compile_model(model):
# YOUR CODE HERE
model.compile(loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
test_model = Sequential([Dense(256, ),
Dense(2, activation='softmax')])
compile_model(test_model)
assert isinstance(test_model.optimizer, tf.keras.optimizers.Adam)
assert hasattr(test_model, 'loss')
assert test_model.loss == 'sparse_categorical_crossentropy'
Run-on single epoch with dummy set
test_model.fit(x = np.random.uniform(0,1, (37432,512)),
y = np.random.randint(0,2, (37432,1)))
test_model.loss # sparse_categorical_crossentropy
test_model.metrics_names # ['loss', 'accuracy']
assert 'loss' == test_model.metrics_names[0]
assert 'accuracy' == test_model.metrics_names[1]
I am currently using the skopt (scikit-optimize) package for hyperparameter tuning of a neural network (I am trying to minimize -1* accuracy). It seems to run fine (and successfully prints to the console) for several iterations before it raises Value Error: array must not contain infs or NaNs.
What are some possible causes of this? My data does not contain infs or NaNs and neither do my search parameter ranges. The neural network code is quite long, so for brevity, I will paste the relevant sections:
Imports:
import pandas as pd
import numpy as np
from skopt import gp_minimize
from skopt.utils import use_named_args
from skopt.space import Real, Categorical, Integer
from tensorflow.python.framework import ops
from sklearn.model_selection import train_test_split
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Dropout, MaxPooling1D, Flatten
from keras import backend as K
Creation of search parameters:
dim_num_filters_L1 = Integer(low=1, high=50, name='num_filters_L1')
#dim_kernel_size_L1 = Integer(low=1, high=70, name='kernel_size_L1')
dim_activation_L1 = Categorical(categories=['relu', 'linear', 'softmax'], name='activation_L1')
dim_num_filters_L2 = Integer(low=1, high=50, name='num_filters_L2')
#dim_kernel_size_L2 = Integer(low=1, high=70, name='kernel_size_L2')
dim_activation_L2 = Categorical(categories=['relu', 'linear', 'softmax'], name='activation_L2')
dim_num_dense_nodes = Integer(low=1, high=28, name='num_dense_nodes')
dim_activation_L3 = Categorical(categories=['relu', 'linear', 'softmax'], name='activation_L3')
dim_dropout_rate = Real(low = 0, high = 0.5, name = 'dropout_rate')
dim_learning_rate = Real(low=1e-4, high=1e-2, name='learning_rate')
dimensions = [dim_num_filters_L1,
#dim_kernel_size_L1,
dim_activation_L1,
dim_num_filters_L2,
#dim_kernel_size_L2,
dim_activation_L2,
dim_num_dense_nodes,
dim_activation_L3,
dim_dropout_rate,
dim_learning_rate,
]
Function that creates all models that will be tested:
def create_model(num_filters_L1, #kernel_size_L1,
activation_L1,
num_filters_L2, #kernel_size_L2,
activation_L2,
num_dense_nodes, activation_L3,
dropout_rate,
learning_rate):
input_shape = (X_train.shape[1], 1)
model = Sequential()
model.add(Conv1D(num_filters_L1, kernel_size = 40, activation = activation_L1, input_shape = input_shape))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(num_filters_L2, kernel_size=20, activation=activation_L2))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(num_dense_nodes, activation = activation_L3))
model.add(Dropout(dropout_rate))
model.add(Dense(y_train.shape[1], activation='linear'))
adam = tensorflow.keras.optimizers.Adam(learning_rate = learning_rate)
model.compile(optimizer=adam, loss='mean_squared_error', metrics=['accuracy'])
return model
Define fitness function:
#use_named_args(dimensions=dimensions)
def fitness(num_filters_L1, #kernel_size_L1,
activation_L1,
num_filters_L2, #kernel_size_L2,
activation_L2,
num_dense_nodes, activation_L3,
dropout_rate,
learning_rate):
model = create_model(num_filters_L1, #kernel_size_L1,
activation_L1,
num_filters_L2, #kernel_size_L2,
activation_L2,
num_dense_nodes, activation_L3,
dropout_rate,
learning_rate)
history_opt = model.fit(x=X_train,
y=y_train,
validation_data=(X_val,y_val),
shuffle=True,
verbose=2,
epochs=10
)
#return the validation accuracy for the last epoch.
accuracy_opt = model.evaluate(X_test,y_test)[1]
# Print the classification accuracy:
print("Experimental Model Accuracy: {0:.2%}".format(accuracy_opt))
# Delete the Keras model with these hyper-parameters from memory:
del model
# Clear the Keras session, otherwise it will keep adding new models to the same TensorFlow graph each time we create model with a different set of hyper-parameters.
K.clear_session()
ops.reset_default_graph()
# the optimizer aims for the lowest score, so return negative accuracy:
return -accuracy # or sum(RMSE)?
Run hyperparameter search:
gp_result = gp_minimize(func=fitness,
dimensions=dimensions)
print("best accuracy was " + str(round(gp_result.fun *-100,2))+"%.")
Your activation function is not converging in a random acquisition function call. I encountered this problem and removed 'relu' function from search space.
When the model is taking sufficiently long to infer (i.e. enough parameters and data big enough), and when profile_batch is on, the TensorBoard callback fails to write the training metrics to the log events (at least they are not visible in Tensorboard).
Here is the code used to get that failure:
import os.path as op
import time
import numpy as np
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.layers import Conv2D, Input
from tensorflow.keras.models import Model
size = 512
im = Input((size, size, 1))
im_conv = Conv2D(512, 3, padding='same', activation='relu')(im)
im_conv = Conv2D(1, 3, padding='same', activation='linear')(im_conv)
model = Model(im, im_conv)
model.compile(loss='mse', optimizer='adam', metrics=['mae'])
data = np.random.rand(1, size, size, 1)
run_id = f'{int(time.time())}'
log_dir = op.join('logs', run_id)
tboard_cback = TensorBoard(
log_dir=log_dir,
histogram_freq=0,
write_graph=False,
write_images=False,
profile_batch=2,
)
model.fit(
x=data,
y=data,
validation_data=[data, data],
callbacks=[tboard_cback,],
epochs=100,
verbose=0,
);
Here is the Tensorboard viz I have:
Is there something wrong with the way I am using this callback?
I use Python 3.6.8, tensorflow 2.0.0 on GPU (but the behaviour is the same on CPU).
So apparently, this is due to the profiling done in the callback. We can disable it via profile_batch=0. The issue is ongoing and to be followed here: https://github.com/tensorflow/tensorboard/issues/2084