Related
I am writing a code for deeplearning and I am currently trying to implement Focal Loss in my code, as my model isnt able to train itself in its current state. I am using a GPU. Whenever I run the code, I get the TypeError stated in the title. I have installed the focal loss package and am trying to implement it in my code with:
from focal_loss import BinaryFocalLoss
When I try to run my U-net and have my model train itself while using FocalLoss, I keep getting the error stated earlier. My U-net code is as follows:
class UNet(torch.nn.Module):
def __init__(self, n_classes=1, in_ch=1):
super().__init__()
# list of layers in encoder-decoder with number of filters
c = [16, 32, 64, 128]
# first conv layer that receives the image
self.conv1 = torch.nn.Sequential(
conv3x3_bn(in_ch, c[0]),
conv3x3_bn(c[0], c[0]),
)
# encoder layers
self.conv2 = encoder_conv(c[0], c[1])
self.conv3 = encoder_conv(c[1], c[2])
self.conv4 = encoder_conv(c[2], c[3])
# decoder layers
self.deconv1 = deconv(c[3],c[2])
self.deconv2 = deconv(c[2],c[1])
self.deconv3 = deconv(c[1],c[0])
# last layer conv that gives us the mask
self.out = torch.nn.Conv2d(c[0], n_classes, 3, padding=1)
def forward(self, x):
# encoder
x1 = self.conv1(x)
x2 = self.conv2(x1)
x3 = self.conv3(x2)
x = self.conv4(x3)
# decoder
x = self.deconv1(x, x3)
x = self.deconv2(x, x2)
x = self.deconv3(x, x1)
x = self.out(x)
return x
def conv3x3_bn(ci, co):
return torch.nn.Sequential(
torch.nn.Conv2d(ci, co, 3, padding=1),
torch.nn.BatchNorm2d(co),
torch.nn.ReLU(inplace=True)
)
def encoder_conv(ci, co):
return torch.nn.Sequential(
torch.nn.MaxPool2d(2),
conv3x3_bn(ci, co),
conv3x3_bn(co, co),
)
class deconv(torch.nn.Module):
def __init__(self, ci, co):
super(deconv, self).__init__()
self.upsample = torch.nn.ConvTranspose2d(ci, co, 2, stride=2)
self.conv1 = conv3x3_bn(ci, co)
self.conv2 = conv3x3_bn(co, co)
# receives the output of the previous layer and the output of the stage
# corresponding encoder
def forward(self, x1, x2):
x1 = self.upsample(x1)
diffX = x2.size()[2] - x1.size()[2]
diffY = x2.size()[3] - x1.size()[3]
x1 = F.pad(x1, (diffX, 0, diffY, 0))
# we concatenate the tensors
x = torch.cat([x2, x1], dim=1)
x = self.conv1(x)
x = self.conv2(x)
return x
My DataSet class is as follows:
class Dataset(torch.utils.data.Dataset):
def __init__(self, X, y, n_classes=1):
self.X = X
self.y = y
self.n_classes = n_classes
def __len__(self):
return len(self.X)
def __getitem__(self, ix):
img = nib.load(self.X[ix]).get_data()
mask = nib.load(self.y[ix]).get_data()
mask[mask==2] = 0
img = torch.tensor(img).unsqueeze(0)
#mask = (np.arange(self.n_classes) == mask[...,None]).astype(np.float32)
#return img, torch.from_numpy(mask).permute(2,0,1)
mask = torch.tensor(np.uint8(mask)).unsqueeze(0)
return img, mask
The training model is as follows:
from tqdm import tqdm #allows us to output a smart progress bar
def fit(model, dataloader, epochs=50, lr=3e-4):
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = BinaryFocalLoss(gamma=2) #loss
#criterion = WeightedFocalLoss()
model.to(device) #move model to device, which is the GPU
hist = {'loss': [], 'iou': [], 'evaluation_loss': [], 'evaluation_iou': []}
for epoch in range(1, epochs+1):
bar = tqdm(dataloader['train']) #creates a smart progress bar for train data
train_loss, train_iou = [], [] #create empty lists that are to be filled
model.train()
for imgs, masks in bar: #training the model
imgs, masks = imgs.float().to(device), masks.float().to(device)
optimizer.zero_grad()
y_hat = model(imgs)
y_hat = (y_hat - torch.min(y_hat))/(torch.max(y_hat) - torch.min(y_hat))
#print(y_hat.shape, masks.shape, y_hat.type(), masks.type(), np.unique(y_hat.detach().cpu().numpy()), np.unique(masks.detach().cpu().numpy()))
#print(y_hat.device, masks.device)
#plt.imshow(np.unique(masks.cpu().numpy(), return_counts=True))
loss = criterion(y_hat, masks)
print('a lot of attributes')
loss.backward() #GRADIENT DECENT, adam optimizer
optimizer.step() #updating model with the new gradients
ious = iou(y_hat, masks)
train_loss.append(loss.item())
train_iou.append(ious)
bar.set_description(f"loss {np.mean(train_loss):.5f} iou {np.mean(train_iou):.5f}")
hist['loss'].append(np.mean(train_loss))
hist['iou'].append(np.mean(train_iou))
bar = tqdm(dataloader['evaluation']) #creates a smart progress bar for evaluation data
evaluation_loss, evaluation_iou = [], [] #create empty lists for evaluation loss and iou
model.eval()
with torch.no_grad(): #evaluate the model
for imgs, masks in bar:
imgs, masks = imgs.float().to(device), masks.float().to(device)
y_hat = model(imgs)
y_hat = (y_hat - torch.min(y_hat))/(torch.max(y_hat) - torch.min(y_hat))
loss = criterion(y_hat, masks)
ious = iou(y_hat, masks)
evaluation_loss.append(loss.item())
evaluation_iou.append(ious)
bar.set_description(f"evaluation_loss {np.mean(evaluation_loss):.5f} evaluation_iou {np.mean(evaluation_iou):.5f}")
hist['evaluation_loss'].append(np.mean(evaluation_loss))
hist['evaluation_iou'].append(np.mean(evaluation_iou))
print(f"\nEpoch {epoch}/{epochs} loss {np.mean(train_loss):.5f} iou {np.mean(train_iou):.5f} evaluation_loss {np.mean(evaluation_loss):.5f} evaluation_iou {np.mean(evaluation_iou):.5f}")
return hist
When I run:
model = UNet()
hist = fit(model, dataloader, epochs=50)
I keep getting the following output & error:
TypeError Traceback (most recent call last)
~/tmp/ipykernel_84346/4198189162.py in <module>
1 model = UNet()
----> 2 hist = fit(model, dataloader, epochs=50)
~/tmp/ipykernel_84346/2960851020.py in fit(model, dataloader, epochs, lr)
19 #print(y_hat.device, masks.device)
20 #plt.imshow(np.unique(masks.cpu().numpy(), return_counts=True))
---> 21 loss = criterion(y_hat, masks)
22 print('a lot of attributes')
23 loss.backward() #GRADIENT DECENT, adam optimizer
~/.local/lib/python3.7/site-packages/keras/losses.py in __call__(self, y_true, y_pred, sample_weight)
139 else:
140 call_fn = tf.__internal__.autograph.tf_convert(self.call, tf.__internal__.autograph.control_status_ctx())
--> 141 losses = call_fn(y_true, y_pred)
142 return losses_utils.compute_weighted_loss(
143 losses, sample_weight, reduction=self._get_reduction())
~/.local/lib/python3.7/site-packages/focal_loss/_binary_focal_loss.py in call(self, y_true, y_pred)
394 pos_weight=self.pos_weight,
395 from_logits=self.from_logits,
--> 396 label_smoothing=self.label_smoothing)
397
398
~/.local/lib/python3.7/site-packages/focal_loss/_binary_focal_loss.py in binary_focal_loss(y_true, y_pred, gamma, pos_weight, from_logits, label_smoothing)
250 # Ensure predictions are a floating point tensor; converting labels to a
251 # tensor will be done in the helper functions
--> 252 y_pred = tf.convert_to_tensor(y_pred)
253 if not y_pred.dtype.is_floating:
254 y_pred = tf.dtypes.cast(y_pred, dtype=tf.float32)
~/.local/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
204 """Call target, and fall back on dispatchers if there is a TypeError."""
205 try:
--> 206 return target(*args, **kwargs)
207 except (TypeError, ValueError):
208 # Note: convert_to_eager_tensor currently raises a ValueError, not a
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor_v2_with_dispatch(value, dtype, dtype_hint, name)
1429 """
1430 return convert_to_tensor_v2(
-> 1431 value, dtype=dtype, dtype_hint=dtype_hint, name=name)
1432
1433
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor_v2(value, dtype, dtype_hint, name)
1439 name=name,
1440 preferred_dtype=dtype_hint,
-> 1441 as_ref=False)
1442
1443
~/.local/lib/python3.7/site-packages/tensorflow/python/profiler/trace.py in wrapped(*args, **kwargs)
161 with Trace(trace_name, **trace_kwargs):
162 return func(*args, **kwargs)
--> 163 return func(*args, **kwargs)
164
165 return wrapped
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, dtype_hint, ctx, accepted_result_types)
1564
1565 if ret is None:
-> 1566 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1567
1568 if ret is NotImplemented:
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
344 as_ref=False):
345 _ = as_ref
--> 346 return constant(v, dtype=dtype, name=name)
347
348
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
270 """
271 return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 272 allow_broadcast=True)
273
274
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
281 with trace.Trace("tf.constant"):
282 return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
--> 283 return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
284
285 g = ops.get_default_graph()
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
306 def _constant_eager_impl(ctx, value, dtype, shape, verify_shape):
307 """Creates a constant on the current device."""
--> 308 t = convert_to_eager_tensor(value, ctx, dtype)
309 if shape is None:
310 return t
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
104 dtype = dtypes.as_dtype(dtype).as_datatype_enum
105 ctx.ensure_initialized()
--> 106 return ops.EagerTensor(value, ctx.device_name, dtype)
107
108
~/.local/lib/python3.7/site-packages/torch/_tensor.py in __array__(self, dtype)
641 return handle_torch_function(Tensor.__array__, (self,), self, dtype=dtype)
642 if dtype is None:
--> 643 return self.numpy()
644 else:
645 return self.numpy().astype(dtype, copy=False)
TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
I have been stuck with this problem for 3 days and I cant seem to find a solution. Can someone please help me out?
The output, after changing device to cpu, has become:
RuntimeError Traceback (most recent call last)
~/tmp/ipykernel_56578/1031642734.py in <module>
1 model = UNet()
----> 2 hist = fit(model, dataloader, epochs=5)
~/tmp/ipykernel_56578/2559002388.py in fit(model, dataloader, epochs, lr)
19 #print(y_hat.device, masks.device)
20 #plt.imshow(np.unique(masks.cpu().numpy(), return_counts=True))
---> 21 loss = criterion(y_hat, masks)
22 print('a lot of attributes')
23 loss.backward() #GRADIENT DECENT, adam optimizer
~/.local/lib/python3.7/site-packages/keras/losses.py in __call__(self, y_true, y_pred, sample_weight)
139 else:
140 call_fn = tf.__internal__.autograph.tf_convert(self.call, tf.__internal__.autograph.control_status_ctx())
--> 141 losses = call_fn(y_true, y_pred)
142 return losses_utils.compute_weighted_loss(
143 losses, sample_weight, reduction=self._get_reduction())
~/.local/lib/python3.7/site-packages/focal_loss/_binary_focal_loss.py in call(self, y_true, y_pred)
394 pos_weight=self.pos_weight,
395 from_logits=self.from_logits,
--> 396 label_smoothing=self.label_smoothing)
397
398
~/.local/lib/python3.7/site-packages/focal_loss/_binary_focal_loss.py in binary_focal_loss(y_true, y_pred, gamma, pos_weight, from_logits, label_smoothing)
264 return _binary_focal_loss_from_probs(labels=y_true, p=y_pred,
265 gamma=gamma, pos_weight=pos_weight,
--> 266 label_smoothing=label_smoothing)
267
268
~/.local/lib/python3.7/site-packages/focal_loss/_binary_focal_loss.py in _binary_focal_loss_from_probs(labels, p, gamma, pos_weight, label_smoothing)
556 # Combine loss terms
557 if label_smoothing is None:
--> 558 labels = tf.dtypes.cast(labels, dtype=tf.bool)
559 loss = tf.where(labels, pos_loss, neg_loss)
560 else:
~/.local/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
204 """Call target, and fall back on dispatchers if there is a TypeError."""
205 try:
--> 206 return target(*args, **kwargs)
207 except (TypeError, ValueError):
208 # Note: convert_to_eager_tensor currently raises a ValueError, not a
~/.local/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py in cast(x, dtype, name)
986 # allows some conversions that cast() can't do, e.g. casting numbers to
987 # strings.
--> 988 x = ops.convert_to_tensor(x, name="x")
989 if x.dtype.base_dtype != base_type:
990 x = gen_math_ops.cast(x, base_type, name=name)
~/.local/lib/python3.7/site-packages/tensorflow/python/profiler/trace.py in wrapped(*args, **kwargs)
161 with Trace(trace_name, **trace_kwargs):
162 return func(*args, **kwargs)
--> 163 return func(*args, **kwargs)
164
165 return wrapped
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, dtype_hint, ctx, accepted_result_types)
1564
1565 if ret is None:
-> 1566 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1567
1568 if ret is NotImplemented:
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
344 as_ref=False):
345 _ = as_ref
--> 346 return constant(v, dtype=dtype, name=name)
347
348
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
270 """
271 return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 272 allow_broadcast=True)
273
274
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
281 with trace.Trace("tf.constant"):
282 return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
--> 283 return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
284
285 g = ops.get_default_graph()
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
306 def _constant_eager_impl(ctx, value, dtype, shape, verify_shape):
307 """Creates a constant on the current device."""
--> 308 t = convert_to_eager_tensor(value, ctx, dtype)
309 if shape is None:
310 return t
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
104 dtype = dtypes.as_dtype(dtype).as_datatype_enum
105 ctx.ensure_initialized()
--> 106 return ops.EagerTensor(value, ctx.device_name, dtype)
107
108
~/.local/lib/python3.7/site-packages/torch/_tensor.py in __array__(self, dtype)
641 return handle_torch_function(Tensor.__array__, (self,), self, dtype=dtype)
642 if dtype is None:
--> 643 return self.numpy()
644 else:
645 return self.numpy().astype(dtype, copy=False)
RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.
The rest of the code is the same.
I learned to write custom layers from keras.io. Here it is:
class modrelu(Layer):
def __init__(self, **kwargs):
super(modrelu, self).__init__(**kwargs)
def build(self, input_shape):
print(input_shape)
self.b = self.add_weight(name='brad', shape=(input_shape[0][1],), initializer='uniform',
trainable=True)
super(modrelu, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
assert isinstance(x, list)
ip_r, ip_i = x
comp= tf.complex(ip_r, ip_i)
ABS= tf.math.abs(comp)
ANG= tf.math.angle(comp)
ABS= tf.nn.relu( self.b + ABS)
op_i= ABS * tf.sin(ANG) #K.dot ??
op_r= ABS * tf.cos(ANG)
return [op_r, op_i]
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
shape_a, shape_b = input_shape
return [shape_a, shape_b]
act= modrelu()
a=tf.constant(np.array([[1,2], [4,4]]), dtype='float32')
b=tf.constant(np.array([[3,4], [5, -1]]), dtype='float32')
act([a,b])
When run in Eager execution, I get nice outputs. Without that mode, I get very weird error that I cannot even trace to where it started, it is in a different world. Here it is:
--------------------------------------------------------------------------- TypeError Traceback (most recent call
last)
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\framework\tensor_util.py
in make_tensor_proto(values, dtype, shape, verify_shape)
526 try:
--> 527 str_values = [compat.as_bytes(x) for x in proto_values]
528 except TypeError:
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\framework\tensor_util.py
in (.0)
526 try:
--> 527 str_values = [compat.as_bytes(x) for x in proto_values]
528 except TypeError:
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\util\compat.py
in as_bytes(bytes_or_text, encoding)
60 raise TypeError('Expected binary or unicode string, got %r' %
---> 61 (bytes_or_text,))
62
TypeError: Expected binary or unicode string, got Dimension(2)
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call
last) in
2 a=tf.constant(np.array([[1,2], [4,4]]), dtype='float32')
3 b=tf.constant(np.array([[3,4], [5, -1]]), dtype='float32')
----> 4 act([a,b])
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py
in call(self, inputs, *args, **kwargs)
744 # the user has manually overwritten the build method do we need to
745 # build it.
--> 746 self.build(input_shapes)
747 # We must set self.built since user defined build functions are not
748 # constrained to set self.built.
in build(self, input_shape)
7 print(input_shape)
8 self.b = self.add_weight(name='brad', shape=(input_shape[0][1],), initializer='uniform',
----> 9 trainable=True)
10 # self.b= K.variable(value=np.random.rand(input_shape[0][1])-0.5,
dtype='float32') #
11 super(modrelu, self).build(input_shape) # Be sure to call this at the end
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py
in add_weight(self, name, shape, dtype, initializer, regularizer,
trainable, constraint, partitioner, use_resource, synchronization,
aggregation, **kwargs)
607 collections=collections,
608 synchronization=synchronization,
--> 609 aggregation=aggregation)
610 backend.track_variable(variable)
611
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\training\checkpointable\base.py
in _add_variable_with_custom_getter(self, name, shape, dtype,
initializer, getter, overwrite, **kwargs_for_getter)
637 new_variable = getter(
638 name=name, shape=shape, dtype=dtype, initializer=initializer,
--> 639 **kwargs_for_getter)
640
641 # If we set an initializer and the variable processed it, tracking will not
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py
in make_variable(name, shape, dtype, initializer, partition_info,
trainable, caching_device, validate_shape, constraint, use_resource,
collections, synchronization, aggregation, partitioner) 1975
collections=collections, 1976
synchronization=synchronization,
-> 1977 aggregation=aggregation) 1978 return v 1979
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\ops\variables.py
in call(cls, *args, **kwargs)
181 def call(cls, *args, **kwargs):
182 if cls is VariableV1:
--> 183 return cls._variable_v1_call(*args, **kwargs)
184 elif cls is Variable:
185 return cls._variable_v2_call(*args, **kwargs)
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\ops\variables.py
in _variable_v1_call(cls, initial_value, trainable, collections,
validate_shape, caching_device, name, variable_def, dtype,
expected_shape, import_scope, constraint, use_resource,
synchronization, aggregation)
144 use_resource=use_resource,
145 synchronization=synchronization,
--> 146 aggregation=aggregation)
147
148 def _variable_v2_call(cls,
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\ops\variables.py
in (**kwargs)
123 aggregation=VariableAggregation.NONE):
124 """Call on Variable class. Useful to force the signature."""
--> 125 previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
126 for getter in ops.get_default_graph()._variable_creator_stack: # pylint:
disable=protected-access
127 previous_getter = _make_getter(getter, previous_getter)
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\ops\variable_scope.py
in default_variable_creator(next_creator, **kwargs) 2435
caching_device=caching_device, name=name, dtype=dtype, 2436
constraint=constraint, variable_def=variable_def,
-> 2437 import_scope=import_scope) 2438 else: 2439 return variables.RefVariable(
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\ops\variables.py
in call(cls, *args, **kwargs)
185 return cls._variable_v2_call(*args, **kwargs)
186 else:
--> 187 return super(VariableMetaclass, cls).call(*args, **kwargs)
188
189
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\ops\resource_variable_ops.py
in init(self, initial_value, trainable, collections,
validate_shape, caching_device, name, dtype, variable_def,
import_scope, constraint)
295 name=name,
296 dtype=dtype,
--> 297 constraint=constraint)
298
299 # pylint: disable=unused-argument
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\ops\resource_variable_ops.py
in _init_from_args(self, initial_value, trainable, collections,
validate_shape, caching_device, name, dtype, constraint)
407 with ops.name_scope("Initializer"), ops.device(None):
408 initial_value = ops.convert_to_tensor(
--> 409 initial_value() if init_from_fn else initial_value,
410 name="initial_value", dtype=dtype)
411 self._handle = eager_safe_variable_handle(
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py
in () 1957 initializer = initializer(dtype=dtype)
1958 init_val = lambda: initializer( # pylint:
disable=g-long-lambda
-> 1959 shape, dtype=dtype, partition_info=partition_info) 1960 variable_dtype = dtype.base_dtype 1961 if use_resource
is None:
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\ops\init_ops.py
in call(self, shape, dtype, partition_info)
253 dtype = self.dtype
254 return random_ops.random_uniform(
--> 255 shape, self.minval, self.maxval, dtype, seed=self.seed)
256
257 def get_config(self):
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\ops\random_ops.py
in random_uniform(shape, minval, maxval, dtype, seed, name)
233 maxval = 1
234 with ops.name_scope(name, "random_uniform", [shape, minval, maxval]) as name:
--> 235 shape = _ShapeTensor(shape)
236 minval = ops.convert_to_tensor(minval, dtype=dtype, name="min")
237 maxval = ops.convert_to_tensor(maxval, dtype=dtype, name="max")
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\ops\random_ops.py
in _ShapeTensor(shape)
42 else:
43 dtype = None
---> 44 return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
45
46
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\framework\ops.py
in convert_to_tensor(value, dtype, name, preferred_dtype) 1048
name=name, 1049 preferred_dtype=preferred_dtype,
-> 1050 as_ref=False) 1051 1052
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\framework\ops.py
in internal_convert_to_tensor(value, dtype, name, as_ref,
preferred_dtype, ctx) 1144 1145 if ret is None:
-> 1146 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref) 1147 1148 if ret is NotImplemented:
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\framework\constant_op.py
in _constant_tensor_conversion_function(v, dtype, name, as_ref)
227 as_ref=False):
228 _ = as_ref
--> 229 return constant(v, dtype=dtype, name=name)
230
231
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\framework\constant_op.py
in constant(value, dtype, shape, name, verify_shape)
206 tensor_value.tensor.CopyFrom(
207 tensor_util.make_tensor_proto(
--> 208 value, dtype=dtype, shape=shape, verify_shape=verify_shape))
209 dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
210 const_tensor = g.create_op(
~\AppData\Local\conda\conda\envs\py36\lib\site-packages\tensorflow\python\framework\tensor_util.py
in make_tensor_proto(values, dtype, shape, verify_shape)
529 raise TypeError("Failed to convert object of type %s to Tensor. "
530 "Contents: %s. Consider casting elements to a "
--> 531 "supported type." % (type(values), values))
532 tensor_proto.string_val.extend(str_values)
533 return tensor_proto
TypeError: Failed to convert object of type to Tensor.
Contents: (Dimension(2),). Consider casting elements to a supported
type.
You are getting shape as TensorShape instance. You could use as_list() attribute to convert it to a Python list if the execution is not eager:
if tf.executing_eagerly():
shape = (input_shape[0][1], )
else:
shape = (input_shape[0].as_list()[1], )
In your code it will be like this:
import tensorflow as tf
from tensorflow.keras.layers import Layer
import numpy as np
class modrelu(Layer):
def __init__(self, **kwargs):
super(modrelu, self).__init__(**kwargs)
def build(self, input_shape):
if tf.executing_eagerly():
shape = (input_shape[0][1])
else:
shape = (input_shape[0].as_list()[1], )
self.b = self.add_weight(name='brad',
shape=shape,
initializer='uniform',
trainable=True)
super(modrelu, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
assert isinstance(x, list)
ip_r, ip_i = x
comp = tf.complex(ip_r, ip_i)
ABS = tf.math.abs(comp)
ANG = tf.math.angle(comp)
ABS = tf.nn.relu( self.b + ABS)
op_i = ABS * tf.sin(ANG) #K.dot ??
op_r = ABS * tf.cos(ANG)
return [op_r, op_i]
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
shape_a, shape_b = input_shape
return [shape_a, shape_b]
act = modrelu()
a = tf.constant(np.array([[1,2], [4,4]]), dtype='float32')
b = tf.constant(np.array([[3,4], [5, -1]]), dtype='float32')
act([a,b])
# [<tf.Tensor 'modrelu_6/mul_1:0' shape=(2, 2) dtype=float32>,
# <tf.Tensor 'modrelu_6/mul:0' shape=(2, 2) dtype=float32>]
This is a duplicate Question that i posted earlier today, in the other question i was using an old version of Keras. I've upgraded to Keras 2.0.0 and still was getting a lot of errors that i can't figure out on my own so i'm reposting the question mostly verbatim.
I am trying to understand how to use keras for supply chain forecasting and i keep getting errors that i can't find help for elsewhere. I've tried to do similar tutorials; sunspot forecasting tutorial, pollution multivariate tutorial etc but i'm still not understanding how the input_shape argument works or how to organize my data to get it to be accepted by keras.
My dataset is a single time series describing the number of products we sold every month. I took that single time series, 107 months, and turned it into a 30 row, 77 column data set. I created a training set and test set from that.
from command prompt:
Successfully uninstalled Keras-1.2.0
Successfully installed keras-2.0.0
Python Version: 3.5.4
Here's the code and respective errors i'm getting.
model = Sequential()
model.add(LSTM(input_shape=(77, 1), output_dim = 10))
Traceback
C:\Python35\lib\site-packages\keras\backend\tensorflow_backend.py in concatenate(tensors, axis)
1219 A tensor.
-> 1220 """
1221 zero = _to_tensor(0., x.dtype.base_dtype)
AttributeError: module 'tensorflow' has no attribute 'concat_v2'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-42-ee393fff874d> in <module>()
1 model = Sequential()
----> 2 model.add(LSTM(input_shape=(77, 1), output_dim = 10))
3 #model.add(Dense(10, activation = 'relu'))
4 #model.add(Dense(1, activation = 'softmax'))
C:\Python35\lib\site-packages\keras\models.py in add(self, layer)
292 '`Sequential.from_config(config)`?')
293 return layer_module.deserialize(config, custom_objects=custom_objects)
--> 294
295
296 def model_from_yaml(yaml_string, custom_objects=None):
C:\Python35\lib\site-packages\keras\engine\topology.py in create_input_layer(self, batch_input_shape, input_dtype, name)
396
397 # Check ndim.
--> 398 if spec.ndim is not None:
399 if K.ndim(x) != spec.ndim:
400 raise ValueError('Input ' + str(input_index) +
C:\Python35\lib\site-packages\keras\engine\topology.py in __call__(self, x, mask)
541 # Handle automatic shape inference (only useful for Theano).
542 input_shape = _collect_input_shape(inputs)
--> 543
544 # Actually call the layer, collecting output(s), mask(s), and shape(s).
545 output = self.call(inputs, **kwargs)
C:\Python35\lib\site-packages\keras\layers\recurrent.py in build(self, input_shape)
761 constants.append(dp_mask)
762 else:
--> 763 constants.append([K.cast_to_floatx(1.) for _ in range(3)])
764
765 if 0 < self.recurrent_dropout < 1:
C:\Python35\lib\site-packages\keras\backend\tensorflow_backend.py in concatenate(tensors, axis)
1220 """
1221 zero = _to_tensor(0., x.dtype.base_dtype)
-> 1222 inf = _to_tensor(np.inf, x.dtype.base_dtype)
1223 x = tf.clip_by_value(x, zero, inf)
1224 return tf.sqrt(x)
C:\Python35\lib\site-packages\tensorflow\python\ops\array_ops.py in concat(values, axis, name)
1041 ops.convert_to_tensor(axis,
1042 name="concat_dim",
-> 1043 dtype=dtypes.int32).get_shape(
1044 ).assert_is_compatible_with(tensor_shape.scalar())
1045 return identity(values[0], name=scope)
C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py in convert_to_tensor(value, dtype, name, preferred_dtype)
674 name=name,
675 preferred_dtype=preferred_dtype,
--> 676 as_ref=False)
677
678
C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype)
739
740 if ret is None:
--> 741 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
742
743 if ret is NotImplemented:
C:\Python35\lib\site-packages\tensorflow\python\framework\constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
111 as_ref=False):
112 _ = as_ref
--> 113 return constant(v, dtype=dtype, name=name)
114
115
C:\Python35\lib\site-packages\tensorflow\python\framework\constant_op.py in constant(value, dtype, shape, name, verify_shape)
100 tensor_value = attr_value_pb2.AttrValue()
101 tensor_value.tensor.CopyFrom(
--> 102 tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))
103 dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
104 const_tensor = g.create_op(
C:\Python35\lib\site-packages\tensorflow\python\framework\tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape)
372 nparray = np.empty(shape, dtype=np_dt)
373 else:
--> 374 _AssertCompatible(values, dtype)
375 nparray = np.array(values, dtype=np_dt)
376 # check to them.
C:\Python35\lib\site-packages\tensorflow\python\framework\tensor_util.py in _AssertCompatible(values, dtype)
300 else:
301 raise TypeError("Expected %s, got %s of type '%s' instead." %
--> 302 (dtype.name, repr(mismatch), type(mismatch).__name__))
303
304
TypeError: Expected int32, got <tf.Variable 'lstm_7_W_i:0' shape=(1, 10) dtype=float32_ref> of type 'Variable' instead.
I think that the problem goes around TF version. Version compatibility between Keras and TF is a problem that probably anyone has faced, as TF API changes a lot in a small period of time.
I think that for Keras 2.2.X you need a TF version > 1.10.X
Try updating it and see if the problem is fixed!
I was trying to train a language model with Keras on a corpus of 35 tweets.
I get the error mentioned in the title, with the following trace back:
ValueError Traceback (most recent call last)
<ipython-input-101-5ed366712809> in <module>()
----> 1 create_model(X, Y, max_len, total_words)
<ipython-input-100-798dd17a8b2b> in create_model(predictors, label, max_sequence_len, total_words)
3
4 model = Sequential()
----> 5 model.add(Embedding(total_words, 10))
6 model.add(LSTM(150))
7 model.add(Dropout(0.1))
/usr/local/lib/python3.6/dist-packages/keras/models.py in add(self, layer)
495 # and create the node connecting the current layer
496 # to the input layer we just created.
--> 497 layer(x)
498
499 if len(layer._inbound_nodes[-1].output_tensors) != 1:
/usr/local/lib/python3.6/dist-packages/keras/engine/topology.py in __call__(self, inputs, **kwargs)
590 '`layer.build(batch_input_shape)`')
591 if len(input_shapes) == 1:
--> 592 self.build(input_shapes[0])
593 else:
594 self.build(input_shapes)
/usr/local/lib/python3.6/dist-packages/keras/layers/embeddings.py in build(self, input_shape)
103 regularizer=self.embeddings_regularizer,
104 constraint=self.embeddings_constraint,
--> 105 dtype=self.dtype)
106 self.built = True
107
/usr/local/lib/python3.6/dist-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name +
90 '` call to the Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
/usr/local/lib/python3.6/dist-packages/keras/engine/topology.py in add_weight(self, name, shape, dtype, initializer, regularizer, trainable, constraint)
411 if dtype is None:
412 dtype = K.floatx()
--> 413 weight = K.variable(initializer(shape),
414 dtype=dtype,
415 name=name,
/usr/local/lib/python3.6/dist-packages/keras/initializers.py in __call__(self, shape, dtype)
110 def __call__(self, shape, dtype=None):
111 return K.random_uniform(shape, self.minval, self.maxval,
ā-> 112 dtype=dtype, seed=self.seed)
113
114 def get_config(self):
/usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py in random_uniform(shape, minval, maxval, dtype, seed)
3836 seed = np.random.randint(10e6)
3837 return tf.random_uniform(shape, minval=minval, maxval=maxval,
-> 3838 dtype=dtype, seed=seed)
3839
3840
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/random_ops.py in random_uniform(shape, minval, maxval, dtype, seed, name)
232 maxval = 1
233 with ops.name_scope(name, "random_uniform", [shape, minval, maxval]) as name:
--> 234 shape = _ShapeTensor(shape)
235 minval = ops.convert_to_tensor(minval, dtype=dtype, name="min")
236 maxval = ops.convert_to_tensor(maxval, dtype=dtype, name="max")
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/random_ops.py in _ShapeTensor(shape)
41 else:
42 dtype = None
---> 43 return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
44
45
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, preferred_dtype)
1009 name=name,
1010 preferred_dtype= preferred_dtype,
-> 1011 as_ref=False)
1012
1013
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx)
1105
1106 if ret is None:
-> 1107 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1108
1109 if ret is NotImplemented:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
215 as_ref=False):
216 _ = as_ref
--> 217 return constant(v, dtype=dtype, name=name)
218
219
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name, verify_shape)
194 tensor_value.tensor.CopyFrom(
195 tensor_util.make_tensor_proto(
ā> 196 value, dtype=dtype, shape=shape, verify_shape=verify_shape))
197 dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
198 const_tensor = g.create_op(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape)
443 """ - got shape %s, but wanted %s.""" %
444 (values, list(nparray.shape),
--> 445 _GetDenseDimensions(values)))
446
447 # python/numpy default float type is float64. We prefer float32 instead.
Here is all my code:
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense, Dropout
from keras.preprocessing.text import Tokenizer
from keras.callbacks import EarlyStopping
from keras.models import Sequential
import keras.utils as ku
import numpy as np
def dataset_prep(data):
corpus = data
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1
input_sequences = []
for line in corpus:
token_list = tokenizer.texts_to_sequences([line])[0]
for i in range(1, len(token_list)):
n_gram_sequence = token_list[:i+1]
input_sequences.append(n_gram_sequence)
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len,
padding='pre'))
predictors, next_word = input_sequences[:,:-1], input_sequences[:,-1]
label = ku.to_categorical(next_word, num_classes=total_words)
return input_sequences[:25], input_sequences[25:], max_sequence_len, list(input_sequences)
def create_model(predictors, label, max_sequence_len, total_words):
input_len = max_sequence_len
model = Sequential()
model.add(Embedding(total_words, 10))
model.add(LSTM(150))
model.add(Dropout(0.1))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
history = model.fit(predictors, label, epochs=100, verbose=1)
print(history.history['loss'], history.history['val_loss'])
X, Y, max_len, total_words = dataset_prep(tweet_text)
create_model(X, Y, max_len, total_words)
Iām using Google Colaboratory, with Keras v2.1.6 and TensorFlow 1.9.0
The code is all Python 3 with a Tesla K80 GPU for runtime compilation.
I am trying to implement seq2seq model for text summarization using Tensorflow 1.3.0.
I am trying to use MultiRNNCell and bidirectional_dynamic_rnn in encoding layer. I am missing something, but unable to find it. The error stack trace is not straight forward which makes it more difficult to understand.
I am getting below error while building the Graph.
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
653 graph_def_version, node_def_str, input_shapes, input_tensors,
--> 654 input_tensors_as_shapes, status)
655 except errors.InvalidArgumentError as err:
~/anaconda2/envs/tensorflow/lib/python3.5/contextlib.py in __exit__(self, type, value, traceback)
65 try:
---> 66 next(self.gen)
67 except StopIteration:
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py in raise_exception_on_not_ok_status()
465 compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 466 pywrap_tensorflow.TF_GetCode(status))
467 finally:
InvalidArgumentError: Dimensions must be equal, but are 512 and 256 for 'decoding/decoder/while/BasicDecoderStep/decoder/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/mul' (op: 'Mul') with input shapes: [?,512], [?,256].
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-119-85ee67bc88e5> in <module>()
9 # Create the training and inference logits
10 training_logits, inference_logits = seq2seq_model(input_,target,embeding_matrix,vocab_to_int,source_seq_length,target_seq_length,
---> 11 max_target_seq_length,rnn_size,keep_probability,num_layers,batch_size)
12
13 # Create tensors for the training logits and inference logits
<ipython-input-114-5ad1bf459bd7> in seq2seq_model(source_input, target_input, embeding_matrix, vocab_to_int, source_sequence_length, target_sequence_length, max_target_length, rnn_size, keep_prob, num_layers, batch_size)
15 training_logits, inference_logits = decoding_layer(target_input,encoder_states,embedings,
16 vocab_to_int,rnn_size,target_sequence_length,
---> 17 max_target_length,batch_size,num_layers)
18
19 return training_logits, inference_logits
<ipython-input-113-c2b4542605d2> in decoding_layer(target_inputs, encoder_state, embedding, vocab_to_int, rnn_size, target_sequence_length, max_target_length, batch_size, num_layers)
12
13 training_logits = training_decoder(embed,decoder_cell,encoder_state,output_layer,
---> 14 target_sequence_length,max_target_length)
15
16
<ipython-input-117-012bbcdcf997> in training_decoder(dec_embed_input, decoder_cell, encoder_state, output_layer, target_sequence_length, max_target_length)
17
18 final_outputs, final_state = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,impute_finished=True,
---> 19 maximum_iterations=max_target_length)
20
21 return final_outputs
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope)
284 ],
285 parallel_iterations=parallel_iterations,
--> 286 swap_memory=swap_memory)
287
288 final_outputs_ta = res[1]
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name)
2773 context = WhileContext(parallel_iterations, back_prop, swap_memory, name)
2774 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, context)
-> 2775 result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
2776 return result
2777
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
2602 self.Enter()
2603 original_body_result, exit_vars = self._BuildLoop(
-> 2604 pred, body, original_loop_vars, loop_vars, shape_invariants)
2605 finally:
2606 self.Exit()
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2552 structure=original_loop_vars,
2553 flat_sequence=vars_for_body_with_tensor_arrays)
-> 2554 body_result = body(*packed_vars_for_body)
2555 if not nest.is_sequence(body_result):
2556 body_result = [body_result]
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in body(time, outputs_ta, state, inputs, finished, sequence_lengths)
232 """
233 (next_outputs, decoder_state, next_inputs,
--> 234 decoder_finished) = decoder.step(time, inputs, state)
235 next_finished = math_ops.logical_or(decoder_finished, finished)
236 if maximum_iterations is not None:
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py in step(self, time, inputs, state, name)
137 """
138 with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)):
--> 139 cell_outputs, cell_state = self._cell(inputs, state)
140 if self._output_layer is not None:
141 cell_outputs = self._output_layer(cell_outputs)
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
178 with vs.variable_scope(vs.get_variable_scope(),
179 custom_getter=self._rnn_get_variable):
--> 180 return super(RNNCell, self).__call__(inputs, state)
181
182 def _rnn_get_variable(self, getter, *args, **kwargs):
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
448 # Check input assumptions set after layer building, e.g. input shape.
449 self._assert_input_compatibility(inputs)
--> 450 outputs = self.call(inputs, *args, **kwargs)
451
452 # Apply activity regularization.
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
936 [-1, cell.state_size])
937 cur_state_pos += cell.state_size
--> 938 cur_inp, new_state = cell(cur_inp, cur_state)
939 new_states.append(new_state)
940
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
772 self._recurrent_input_noise,
773 self._input_keep_prob)
--> 774 output, new_state = self._cell(inputs, state, scope)
775 if _should_dropout(self._state_keep_prob):
776 new_state = self._dropout(new_state, "state",
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
178 with vs.variable_scope(vs.get_variable_scope(),
179 custom_getter=self._rnn_get_variable):
--> 180 return super(RNNCell, self).__call__(inputs, state)
181
182 def _rnn_get_variable(self, getter, *args, **kwargs):
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
448 # Check input assumptions set after layer building, e.g. input shape.
449 self._assert_input_compatibility(inputs)
--> 450 outputs = self.call(inputs, *args, **kwargs)
451
452 # Apply activity regularization.
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
405
406 new_c = (
--> 407 c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j))
408 new_h = self._activation(new_c) * sigmoid(o)
409
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py in binary_op_wrapper(x, y)
863 else:
864 raise
--> 865 return func(x, y, name=name)
866
867 def binary_op_wrapper_sparse(sp_x, y):
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py in _mul_dispatch(x, y, name)
1086 is_tensor_y = isinstance(y, ops.Tensor)
1087 if is_tensor_y:
-> 1088 return gen_math_ops._mul(x, y, name=name)
1089 else:
1090 assert isinstance(y, sparse_tensor.SparseTensor) # Case: Dense * Sparse.
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_math_ops.py in _mul(x, y, name)
1447 A `Tensor`. Has the same type as `x`.
1448 """
-> 1449 result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
1450 return result
1451
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py in apply_op(self, op_type_name, name, **keywords)
765 op = g.create_op(op_type_name, inputs, output_types, name=scope,
766 input_types=input_types, attrs=attr_protos,
--> 767 op_def=op_def)
768 if output_structure:
769 outputs = op.outputs
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device)
2630 original_op=self._default_original_op, op_def=op_def)
2631 if compute_shapes:
-> 2632 set_shapes_for_outputs(ret)
2633 self._add_op(ret)
2634 self._record_op_seen_by_control_dependencies(ret)
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py in set_shapes_for_outputs(op)
1909 shape_func = _call_cpp_shape_fn_and_require_op
1910
-> 1911 shapes = shape_func(op)
1912 if shapes is None:
1913 raise RuntimeError(
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py in call_with_requiring(op)
1859
1860 def call_with_requiring(op):
-> 1861 return call_cpp_shape_fn(op, require_shape_fn=True)
1862
1863 _call_cpp_shape_fn_and_require_op = call_with_requiring
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py in call_cpp_shape_fn(op, require_shape_fn)
593 res = _call_cpp_shape_fn_impl(op, input_tensors_needed,
594 input_tensors_as_shapes_needed,
--> 595 require_shape_fn)
596 if not isinstance(res, dict):
597 # Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op).
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
657 missing_shape_fn = True
658 else:
--> 659 raise ValueError(err.message)
660
661 if missing_shape_fn:
ValueError: Dimensions must be equal, but are 512 and 256 for 'decoding/decoder/while/BasicDecoderStep/decoder/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/mul' (op: 'Mul') with input shapes: [?,512], [?,256].
I am not able to understand the error. Which matrix is it trying to refer? Please help me, I am fairly new to Tensorflow.
The error says that inside the LSTM of the decoder (decoding/decoder/while/BasicDecoderStep/decoder/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/mul) there is a dimension mismatch during a multiplication (Mul).
My guess is that, for your implementation, you need twice as many cells for the decoder LSTM as for the encoder LSTM, due to the fact that you are using a bidirectional encoder. If you have a bidirectional encoder with a LSTM with 256 cells, then the result will have 512 units (as you concatenate the outputs of the forward and backward LSTM). Currently the decoder seems to expect an input of 256 cells.