I have downloaded a pretrained Pytorch cifar model from this link that it has a custom architecture. I want to convert this model to a Keras model. I know that there are some tools to do that. For example I can convert the Pytorch model to a standard model like onnx or IR.
Now I have used the IR interface model and the following code to load and save the entire model:
import torch
import torch.nn as nn
import numpy as np
from torch.autograd import Variable
import torch as th
from collections import OrderedDict
class CIFAR(nn.Module):
def __init__(self, features, n_channel, num_classes):
super(CIFAR, self).__init__()
assert isinstance(features, nn.Sequential), type(features)
self.features = features
self.classifier = nn.Sequential(
nn.Linear(n_channel, num_classes)
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def make_layers(cfg, batch_norm=False):
layers = []
in_channels = 3
for i, v in enumerate(cfg):
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
padding = v[1] if isinstance(v, tuple) else 1
out_channels = v[0] if isinstance(v, tuple) else v
conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=padding)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(out_channels, affine=False), nn.ReLU()]
else:
layers += [conv2d, nn.ReLU()]
in_channels = out_channels
return nn.Sequential(*layers)
n_channel=128
cfg = [n_channel, n_channel, 'M', 2*n_channel, 2*n_channel, 'M', 4*n_channel, 4*n_channel, 'M', (8*n_channel, 0), 'M']
layers = make_layers(cfg, batch_norm=True)
model = CIFAR(layers, n_channel=8*n_channel, num_classes=10)
pretrained=True
if pretrained:
m = th.load('MY_PATH/cifar10-d875770b.pth')
state_dict = m.state_dict() if isinstance(m, nn.Module) else m
assert isinstance(state_dict, (dict, OrderedDict)), type(state_dict)
model.load_state_dict(state_dict)
torch.save(model, 'MY_PATH/pytorch.pth')
Now it's time to convert the above Pytorch model pytorch.pth to IR model.
In the cmd I enter this command from the example of here:
mmtoir -f pytorch -d IRModel --inputShape 3,32,32 -n pytorch.pth
But this error appears:
Traceback (most recent call last):
File "c:\users\***\anaconda3\lib\site-packages\mmdnn\conversion\pytorch\pytorch_parser.py", line 76, in __init__
model = torch.load(model_file_name)
File "c:\users\***\anaconda3\lib\site-packages\torch\serialization.py", line 387, in load
return _load(f, map_location, pickle_module, **pickle_load_args)
File "c:\users\***\anaconda3\lib\site-packages\torch\serialization.py", line 574, in _load
result = unpickler.load()
AttributeError: Can't get attribute 'CIFAR' on <module '__main__' from 'C:\\Users\\***\\Anaconda3\\Scripts\\mmtoir.exe\\__main__.py'>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\users\***\anaconda3\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "c:\users\***\anaconda3\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Users\***\Anaconda3\Scripts\mmtoir.exe\__main__.py", line 9, in <module>
File "c:\users\***\anaconda3\lib\site-packages\mmdnn\conversion\_script\convertToIR.py", line 192, in _main
ret = _convert(args)
File "c:\users\***\anaconda3\lib\site-packages\mmdnn\conversion\_script\convertToIR.py", line 92, in _convert
parser = PytorchParser(model, inputshape[0])
File "c:\users\***\anaconda3\lib\site-packages\mmdnn\conversion\pytorch\pytorch_parser.py", line 78, in __init__
model = torch.load(model_file_name, map_location='cpu')
File "c:\users\***\anaconda3\lib\site-packages\torch\serialization.py", line 387, in load
return _load(f, map_location, pickle_module, **pickle_load_args)
File "c:\users\***\anaconda3\lib\site-packages\torch\serialization.py", line 574, in _load
result = unpickler.load()
AttributeError: Can't get attribute 'CIFAR' on <module '__main__' from 'C:\\Users\\***\\Anaconda3\\Scripts\\mmtoir.exe\\__main__.py'>
How can I solve that?
Even though it's been long since this question was asked, I'll share the answer.
The solution in my case was fairly simple, I had installed mmdnn in an anaconda env. And the file mmtoir was stored in the following location:
'user_name/anaconda3/envs/env_name/bin/mmtoir'
And there needed to be an explicit reference of my Neural Network class in this file. Meaning in your case, you need to:
1) Locate this file
2) Copy the definition of CIFAR class
class CIFAR(nn.Module):
def __init__(self, features, n_channel, num_classes):
super(CIFAR, self).__init__()
assert isinstance(features, nn.Sequential), type(features)
self.features = features
self.classifier = nn.Sequential(
nn.Linear(n_channel, num_classes)
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
into mmtoir file. Save it and run the command again.
And above is one of the two methods to solve this issue and is relatively simple. You can read this discussion for further details.
Note: MMdnn currently supports PyTorch=0.4.0 only. So make sure your model was trained on the same version as well. With PyTorch>0.4.0 all steps will run fine but will throw an error at the end.
Related
I am trying to use group normalization as the batch size I can use is small due to memory constraints. To do that I am changing the code in common.py.
class Conv(nn.Module):
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
def forward(self, x):
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
return self.act(self.conv(x))
In this code, the only change done is self.bn = nn.BatchNorm2d(c2) to self.bn = nn.GroupNorm(8, c2)
Now, when trying to run with the command: python3 train.py --data data/coco128.yaml --weights '' --cfg models/yolov5s.yaml --hyp data/hyps/hyp.scratch-low.yaml --epochs 300 --batch 16 --img 640, I get this error.
Traceback (most recent call last):
File "train.py", line 643, in <module>
main(opt)
File "train.py", line 539, in main
train(opt.hyp, opt, device, callbacks)
File "train.py", line 170, in train
optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
File "/home/dori/env3.8/lib/python3.8/site-packages/torch/optim/sgd.py", line 101, in __init__
super(SGD, self).__init__(params, defaults)
File "/home/dori/env3.8/lib/python3.8/site-packages/torch/optim/optimizer.py", line 49, in __init__
raise ValueError("optimizer got an empty parameter list")
ValueError: optimizer got an empty parameter list
There are questions with the same issue posted, suggesting to use nn.ModuleList in place of a list, however, they don't apply here otherwise the BatchNorm2d should also have failed.
The environment is Python 3.7.6, below is my import:
import os, sys
import tensorflow as tf # v2.2.0
tf.compat.v1.enable_eager_execution()
import numpy as np
import matplotlib.pyplot as plt
from sys import platform
import time
import random
import pickle
from tensorflow.keras.layers import ReLU
I tried to clone a tf.keras.Model without success, because ReLU is an unknown activation. Nevertheless, the initiation was successful, so the system should know what ReLU is. I wonder how to fix this.
def init_model(D=8, W=256):
# This is a simple MLP neural network
# D: The number of layers
# H: The neurons in each layer
relu = ReLU()
dense = lambda W=W, act=relu: tf.keras.layers.Dense(W, activation=act, dtype='float32')
inputs = tf.keras.Input(shape=(3 + 3 * 2 * L_embed))
outputs = inputs
for i in range(D):
outputs = dense()(outputs)
if i % 4 == 0 and i > 0:
outputs = tf.concat([outputs, inputs], -1)
outputs = dense(4, act=None)(outputs)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
return model
Then I called:
model_inner_copy = tf.keras.models.clone_model(model)
The error message is:
File "D:/Code Repository/Meta-NeRF/Code/NeRF/Tiny_MAML_NeRF.py", line 211, in train_maml_nerf
model_inner_copy = tf.keras.models.clone_model(model)
File "C:\Users\Jack\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\models.py", line 427, in clone_model
model, input_tensors=input_tensors, layer_fn=clone_function)
File "C:\Users\Jack\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\models.py", line 196, in _clone_functional_model
model, new_input_layers, layer_fn)
File "C:\Users\Jack\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\models.py", line 246, in _clone_layers_and_model_config
config = network.get_network_config(model, serialize_layer_fn=_copy_layer)
File "C:\Users\Jack\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\engine\network.py", line 2119, in get_network_config
layer_config = serialize_layer_fn(layer)
File "C:\Users\Jack\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\models.py", line 243, in _copy_layer
created_layers[layer.name] = layer_fn(layer)
File "C:\Users\Jack\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\models.py", line 61, in _clone_layer
return layer.__class__.from_config(layer.get_config())
File "C:\Users\Jack\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\engine\base_layer.py", line 655, in from_config
return cls(**config)
File "C:\Users\Jack\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\layers\core.py", line 1135, in __init__
self.activation = activations.get(activation)
File "C:\Users\Jack\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\activations.py", line 465, in get
identifier, printable_module_name='activation')
File "C:\Users\Jack\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\utils\generic_utils.py", line 362, in deserialize_keras_object
config, module_objects, custom_objects, printable_module_name)
File "C:\Users\Jack\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\utils\generic_utils.py", line 321, in class_and_config_for_serialized_keras_object
raise ValueError('Unknown ' + printable_module_name + ': ' + class_name)
ValueError: Unknown activation: ReLU
So the problem is that tf.keras.layers.ReLU is a layer that implements the ReLU activation, but it is not an activation function by itself. It is meant to be used as a layer inside your model, not as a parameter of your Dense layer.
To have a function that works as an activation to give as a parameter to Dense, you should use tf.keras.activations.relu.
I have implemented a minimal example of Wavenet, closely following the steps from here - https://github.com/basveeling/wavenet.
The issue is, that the model uses a custom layer, which works fine during training but once the model is reloaded, Keras cannot find the Causal Layer, even though I am using custom objects.
I am using tensorflow 1.13 and keras 2.2.4
Here is an example of the first three key/value pairs for objects.
objects = {'initial_causal_conv': <class 'wavenet_utils.CausalConv1D'>,
'dilated_conv_1_tanh_s0': <class 'wavenet_utils.CausalConv1D'>,
'dilated_conv_1_sigm_s0': <class 'wavenet_utils.CausalConv1D'>,
'...': <class 'wavenet_utils.CausalConv1D'>,
'...': <class 'wavenet_utils.CausalConv1D'>}
model.fit(x=[x_tr1, x_tr2],
y=y_tr1,
epochs=epochs,
batch_size=batch_size,
validation_data=([x_vl1, x_vl2], y_vl1),
callbacks=[checkpoint, early_stopping],
verbose=verbose,
shuffle=True,
class_weight=class_weight)
model = load_model('model.h5', custom_objects=objects)
Which then returns this error:
Traceback (most recent call last):
File "/home/xxx/PycharmProjects/WAVE/DATA_NN.py", line 48, in <module>
objects=objects)
File "/home/xxx/PycharmProjects/WAVE/functions.py", line 572, in run_neural_net
model = load_model('model_conv.h5', custom_objects=objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/saving.py", line 419, in load_model
model = _deserialize_model(f, custom_objects, compile)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/saving.py", line 225, in _deserialize_model
model = model_from_config(model_config, custom_objects=custom_objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/saving.py", line 458, in model_from_config
return deserialize(config, custom_objects=custom_objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 145, in deserialize_keras_object
list(custom_objects.items())))
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/network.py", line 1022, in from_config
process_layer(layer_data)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/network.py", line 1008, in process_layer
custom_objects=custom_objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 138, in deserialize_keras_object
': ' + class_name)
ValueError: Unknown layer: CausalConv1D
When building the model, CausalConv1D must be imported from wavenet_utils.py
Below is the full build_model function
And here is wavenet_utils, containing the class CausalConv1D:
from keras.layers import Conv1D
from keras.utils.conv_utils import conv_output_length
import tensorflow as tf
class CausalConv1D(Conv1D):
def __init__(self, filters, kernel_size, init='glorot_uniform', activation=None,
padding='valid', strides=1, dilation_rate=1, bias_regularizer=None,
activity_regularizer=None, kernel_constraint=None, bias_constraint=None, use_bias=True, causal=False,
output_dim=1,
**kwargs):
self.output_dim = output_dim
super(CausalConv1D, self).__init__(filters,
kernel_size=kernel_size,
strides=strides,
padding=padding,
dilation_rate=dilation_rate,
activation=activation,
use_bias=use_bias,
kernel_initializer=init,
activity_regularizer=activity_regularizer,
bias_regularizer=bias_regularizer,
kernel_constraint=kernel_constraint,
bias_constraint=bias_constraint,
**kwargs)
self.causal = causal
if self.causal and padding != 'valid':
raise ValueError("Causal mode dictates border_mode=valid.")
def build(self, input_shape):
super(CausalConv1D, self).build(input_shape)
def call(self, x):
if self.causal:
def asymmetric_temporal_padding(x, left_pad=1, right_pad=1):
pattern = [[0, 0], [left_pad, right_pad], [0, 0]]
return tf.pad(x, pattern)
x = asymmetric_temporal_padding(x, self.dilation_rate[0] * (self.kernel_size[0] - 1), 0)
return super(CausalConv1D, self).call(x)
def compute_output_shape(self, input_shape):
input_length = input_shape[1]
if self.causal:
input_length += self.dilation_rate[0] * (self.kernel_size[0] - 1)
length = conv_output_length(input_length,
self.kernel_size[0],
self.padding,
self.strides[0],
dilation=self.dilation_rate[0])
shape = tf.TensorShape(input_shape).as_list()
shape[-1] = self.output_dim
return (input_shape[0], length, self.filters)
def get_config(self):
base_config = super(CausalConv1D, self).get_config()
base_config['output_dim'] = self.output_dim
return base_config
EDIT:
I have tried this approach before as well.
objects = {'CausalConv1D': <class 'wavenet_utils.CausalConv1D'>}
model.fit(x=[x_tr1, x_tr2],
y=y_tr1,
epochs=epochs,
batch_size=batch_size,
validation_data=([x_vl1, x_vl2], y_vl1),
callbacks=[checkpoint, early_stopping],
verbose=verbose,
shuffle=True,
class_weight=class_weight)
model = load_model('model.h5', custom_objects=objects)
Which then returns this error:
Traceback (most recent call last):
File "/home/xxx/PycharmProjects/WAVE/DATA_NN.py", line 47, in <module>
objects=objects)
File "/home/xxx/PycharmProjects/WAVE/functions.py", line 574, in run_neural_net
model = load_model('model.h5', custom_objects=objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/saving.py", line 419, in load_model
model = _deserialize_model(f, custom_objects, compile)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/saving.py", line 225, in _deserialize_model
model = model_from_config(model_config, custom_objects=custom_objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/saving.py", line 458, in model_from_config
return deserialize(config, custom_objects=custom_objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 145, in deserialize_keras_object
list(custom_objects.items())))
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/network.py", line 1022, in from_config
process_layer(layer_data)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/network.py", line 1008, in process_layer
custom_objects=custom_objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 147, in deserialize_keras_object
return cls.from_config(config['config'])
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/base_layer.py", line 1109, in from_config
return cls(**config)
File "/home/xxx/PycharmProjects/WAVE/wavenet_utils.py", line 26, in __init__
**kwargs)
TypeError: __init__() got multiple values for keyword argument 'kernel_initializer'
Could this be the issue mentioned here https://github.com/keras-team/keras/issues/12316?
And if so, is there any way around it?
There is only one custom object, which is CausalConv1D.
objects = {'CausalConv1D': wavenet_utils.CausalConv1D}
Now you must be sure that your get_config method is correct and has everything needed in the __init__ method of your layer.
It misses the causal property and has a kernel_initializer coming from the base class that is not supported by your __init__ method.
Let's list every property you need, and then check which ones are in the base config:
filters: in base
kernel_size: in base
init: not in base, but there is kernel_initializer in base!!!!!
kernel_initializer is a config item that your __init__ method doesn't support
rename this init parameter to kernel_initializer
activation: in base
padding: in base
strides: in base
dilation_rate: in base
bias_regularizer: in base
activity_regularizer: in base
kernel_constraint: in base
bias_constraint: in base
use_bias: in base
causal: not in base!
must add this in your config! (or the model will always use the default value)
output_dim: not in base!
**kwargs: in base
Layer's __init__:
def __init__(self, filters, kernel_size,
############## here:
kernel_initializer='glorot_uniform',
#############
activation=None,
padding='valid', strides=1, dilation_rate=1, bias_regularizer=None,
activity_regularizer=None, kernel_constraint=None, bias_constraint=None, use_bias=True, causal=False,
output_dim=1,
**kwargs):
Layer's get_config
It must contain all __init__ params that are not in the base class:
def get_config(self):
base_config = super(CausalConv1D, self).get_config()
base_config['causal'] = self.causal
base_config['output_dim'] = self.output_dim
return base_config
Somehow, no approach I've tried so far has been able to correctly load the model when using load_model. Below is a simple work around which only saves the weights, then deletes the existing model, builds a new one and compiles it again, and loads saved the weights which do save correctly, even with custom layers present.
model = build_model()
checkpoint = ModelCheckpoint('model.h5', monitor='val_acc',
verbose=1, save_best_only=True, save_weights_only=True, mode='max')
model.fit(x, y)
del model
model = build_model()
model.load_weights('model.h5')
model.predict(x_test)
I'm attempting to train a regression model to predict attributes of music such as BPM. The model takes in spectrograms of audio snippets that are 256x128px png files and outputs a couple continuous values. I have the following code so far that I have developed based upon this guide on the tensorflow website:
import tensorflow as tf
import os
import random
import pathlib
AUTOTUNE = tf.data.experimental.AUTOTUNE
TRAINING_DATA_DIR = r'specgrams'
def gen_model():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(256, 128, 3)),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dense(2)
])
model.compile(optimizer=tf.keras.optimizers.RMSprop(0.001),
loss='mse',
metrics=['mse', 'mae'])
return model
def fetch_batch(batch_size=1000):
all_image_paths = []
all_image_labels = []
data_root = pathlib.Path(TRAINING_DATA_DIR)
files = data_root.iterdir()
for file in files:
file = str(file)
all_image_paths.append(os.path.abspath(file))
label = file[:-4].split('-')[2:]
label = float(label[0]) / 200, int(label[1]) / 1000.0
all_image_labels.append(label)
def preprocess_image(path):
img_raw = tf.io.read_file(path)
image = tf.image.decode_png(img_raw, channels=3)
image = tf.image.resize(image, [256, 128])
image /= 255.0
return image
def preprocess(path, label):
return preprocess_image(path), label
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
image_ds = path_ds.map(preprocess_image, num_parallel_calls=AUTOTUNE)
label_ds = tf.data.Dataset.from_tensor_slices(all_image_labels)
ds = tf.data.Dataset.zip((image_ds, label_ds))
ds = ds.shuffle(buffer_size=len(os.listdir(TRAINING_DATA_DIR)))
ds = ds.repeat()
ds = ds.batch(batch_size)
ds = ds.prefetch(buffer_size=AUTOTUNE)
return ds
ds = fetch_batch()
model = gen_model()
model.fit(ds, epochs=1, steps_per_epoch=10)
However I believe I have made a mistake with the structure of my model or how I am preprocessing the training data because I get an error about incorrect dimensions but I'm struggling to narrow down exactly where the issue is. I understand that the guide I followed was for classification problem as opposed to regression and my "labels" are an array of 2 value which is what is causing the problem but I'm not sure how to resolve this.
For context the filenames are in the format xxx-xxx-A-B.png where A and B are the two desired output values of the model. A is a floating-point value somewhere between 70 and 180 and B is an integer value between 0-1000. As such the label variable for each image looks something like this: (0.64, 0.319).
This is the error I am seeing when I attempt to execute the above script:
Traceback (most recent call last):
File "C:\Users\cainy\Desktop\BeatNet\training.py", line 60, in <module>
model.fit(ds, epochs=1, steps_per_epoch=3)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\engine\training.py", line 791, in fit
initial_epoch=initial_epoch)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1515, in fit_generator
steps_name='steps_per_epoch')
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\engine\training_generator.py", line 257, in model_iteration
batch_outs = batch_function(*batch_data)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1259, in train_on_batch
outputs = self._fit_function(ins) # pylint: disable=not-callable
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\backend.py", line 3217, in __call__
outputs = self._graph_fn(*converted_inputs)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\eager\function.py", line 558, in __call__
return self._call_flat(args)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\eager\function.py", line 627, in _call_flat
outputs = self._inference_function.call(ctx, args)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\eager\function.py", line 415, in call
ctx=ctx)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\eager\execute.py", line 66, in quick_execute
six.raise_from(core._status_to_exception(e.code, message), None)
File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.InvalidArgumentError: Can not squeeze dim[1], expected a dimension of 1, got 2
[[{{node metrics/accuracy/Squeeze}}]] [Op:__inference_keras_scratch_graph_734]
Edit: I have uploaded the source code to GitHub here.
You currently only have 1 output - a tensor with length 2 (per batch element). If you want to use/monitor separate losses you'll need to unstack it in both the model output and the labels.
I'm not sure if models.Sequential will be suitable, but you can definitely use the functional API:
def gen_model():
inputs = tf.keras.layers.Input(shape=(256, 128, 3), dtype=tf.float32)
x = inputs
x = tf.keras.layers.Dense(256, activation='relu')
x = tf.keras.layers.Dense(2)
a, b = tf.keras.layers.Lambda(tf.unstack, arguments=dict(axis=-1))(x)
model = tf.keras.models.Model(inputs=inputs, outputs=[a, b])
model.compile(optimizer=tf.keras.optimizers.RMSprop(0.001),
loss=['mse', 'mae'],
metrics=[['mse'], ['mae']])
return model
And in your preprocessing:
def preprocess(path, label):
return preprocess_image(path), tf.unstack(label, axis=-1)
I'm trying to create a recurrent neural network with the Keras functional API in TensorFlow. The RNN takes in tweets and classifies them as positive or negative.
attention_input = keras.Input(shape=(512,), name='attention')
a = keras.layers.Dense(1, activation='sigmoid')(attention_input)
attention_output = keras.layers.Multiply()([attention_input, a])
attention = keras.Model(inputs=attention_input, outputs=attention_output, name='attention_model')
inputs1 = keras.Input(shape=(100,), name='lstm')
x = keras.layers.Embedding(len(tokenizer.word_counts)+1,
100,
weights=[embedding_matrix],
input_length=100,
trainable=True)(inputs1)
x = keras.layers.Bidirectional(tf.keras.layers.LSTM(256, return_sequences=True))(x)
x = keras.layers.TimeDistributed(attention)(x)
x = tf.unstack(x, num=256)
t_sum = x[0]
for i in range(256 - 1):
t_sum = keras.layers.Add()([t_sum, x[i+1]])
lstm = keras.Model(inputs=inputs1, outputs=t_sum, name='lstm_model')
inputs2 = keras.Input(shape=(100,), name='dense')
x = keras.layers.Dense(256, activation='relu')(inputs2)
x = keras.layers.Dropout(0.2)(x)
x = keras.layers.Dense(128, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)
outputs2 = keras.layers.Dense(1, activation='sigmoid')(x)
dense = keras.Model(inputs=inputs2, outputs=outputs2, name='txt_model')
inputs = keras.Input(shape=(100,), name='text')
x = lstm(inputs)
outputs = dense(x)
model = keras.Model(inputs=inputs, outputs=outputs, name='text_model')
model.compile(
loss = 'binary_crossentropy',
optimizer = 'adam',
metrics = ['acc',
tf.keras.metrics.Precision(),
tf.keras.metrics.Recall()])
I get the following runtime error
2019-04-13 10:29:34.855192: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
Traceback (most recent call last):
File ".\main.py", line 25, in <module>
' -> '.join(permutation).lower() : { ** results.get(' -> '.join(permutation).lower(), {}), ** framework.runtime.evaluate(path, permutation, classifiers, cached) }
File "C:\Users\steff\Desktop\Skole\MsT\framework\framework\runtime.py", line 30, in evaluate
classifier.lower() : framework.classifiers.list[classifier.lower()](data)
File "C:\Users\steff\Desktop\Skole\MsT\framework\framework\classifiers\rnn.py", line 93, in evaluate
x = lstm(inputs)
File "C:\Users\steff\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\engine\base_layer.py", line 612, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\steff\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\engine\network.py", line 870, in call
return self._run_internal_graph(inputs, training=training, mask=mask)
File "C:\Users\steff\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\engine\network.py", line 1011, in _run_internal_graph
output_tensors = layer(computed_tensors, **kwargs)
File "C:\Users\steff\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\engine\base_layer.py", line 669, in __call__
self.set_weights(self._initial_weights)
File "C:\Users\steff\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\engine\base_layer.py", line 938, in set_weights
param_values = backend.batch_get_value(params)
File "C:\Users\steff\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\backend.py", line 2837, in batch_get_value
raise RuntimeError('Cannot get value inside Tensorflow graph function.')
RuntimeError: Cannot get value inside Tensorflow graph function.
I can see from the errors that it has something to do with my LSTM model, but I can't see what is the cause of the problem.
I think that you are using Tensorflow 2.0. If this is the case then using the parameter embeddings_initializer= instead of weights= worked.
x = tf.keras.layers.Embedding(vocabulary_size, embedding_dim, embeddings_initializer=tf.keras.initializers.Constant(embedding_matrix), trainable=False)