I am working on clinical EHR. I am currently referring to this blog and github link here. I have generated the dataset and processed it as per the instructions in the notebooks present in the repository. I am facing an issue trying to train the model.
build_EHRNN class.
torch.manual_seed(1)
class build_EHRNN(nn.Module):
def __init__(self, inputDimSize=4894, hiddenDimSize=[200,200], batchSize=100, embSize=200, numClass=4894, dropout=0.5, logEps=1e-8):
super(build_EHRNN, self).__init__()
self.inputDimSize = inputDimSize
self.hiddenDimSize = hiddenDimSize
self.numClass = numClass
self.embSize = embSize
self.batchSize = batchSize
self.dropout = nn.Dropout(p=0.5)
self.logEps = logEps
# Embedding inputs
self.W_emb = nn.Parameter(torch.randn(self.inputDimSize, self.embSize).cuda())
self.b_emb = nn.Parameter(torch.zeros(self.embSize).cuda())
self.W_out = nn.Parameter(torch.randn(self.hiddenDimSize, self.numClass).cuda())
self.b_out = nn.Parameter(torch.zeros(self.numClass).cuda())
self.params = [self.W_emb, self.W_out,
self.b_emb, self.b_out]
def forward(self,x, y, h, lengths, mask):
self.emb = torch.tanh(torch.matmul(x, self.W_emb) + self.b_emb)
input_values = self.emb
self.outputs = [input_values]
for i, hiddenSize in enumerate([self.hiddenDimSize, self.hiddenDimSize]): # iterate over layers
rnn = EHRNN(self.inputDimSize,hiddenSize,self.embSize,self.batchSize,self.numClass) # calculate hidden states
hidden_state = []
h = self.init_hidden().cuda()
for i,seq in enumerate(input_values): # loop over sequences in each batch
h = rnn(seq, h)
hidden_state.append(h)
hidden_state = self.dropout(torch.stack(hidden_state)) # apply dropout between layers
input_values = hidden_state
y_linear = torch.matmul(hidden_state, self.W_out) + self.b_out # fully connected layer
yhat = F.softmax(y_linear, dim=1) # yhat
yhat = yhat*mask[:,:,None] # apply mask
# Loss calculation
cross_entropy = -(y * torch.log(yhat + self.logEps) + (1. - y) * torch.log(1. - yhat + self.logEps))
last_step = -torch.mean(y[-1] * torch.log(yhat[-1] + self.logEps) + (1. - y[-1]) * torch.log(1. - yhat[-1] + self.logEps))
prediction_loss = torch.sum(torch.sum(cross_entropy, dim=0),dim=1)/ torch.cuda.FloatTensor(lengths)
cost = torch.mean(prediction_loss) + 0.000001 * (self.W_out ** 2).sum() # regularize
return (yhat, hidden_state, cost)
def init_hidden(self):
return torch.zeros(self.batchSize, self.hiddenDimSize) # initial state
Creating instance and training model
model = build_EHRNN(inputDimSize=4894, hiddenDimSize=200, batchSize=100, embSize=200, numClass=4894, dropout=0.5, logEps=1e-8)
model = model.to(device)
optimizer = torch.optim.Adadelta(model.parameters(), lr = 0.01, rho=0.90)
max_epochs = 10
loss_all = []
iteration = 0
for e in range(max_epochs):
for index in random.sample(range(n_batches), n_batches):
batchX = train[0][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
batchY = train[1][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
optimizer.zero_grad()
x, y, lengths, mask = padding(batchX, batchY, 4894, 4894)
if torch.cuda.is_available():
x, y, lenghts, mask = x.cuda(), y.cuda(), lengths, mask.cuda()
outputs, hidden, cost = model(x,y, h, lengths, mask)
if torch.cuda.is_available():
cost.cuda()
cost.backward()
nn.utils.clip_grad_norm_(model.parameters(), 5)
optimizer.step()
Error:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-14-cff1f002dced> in <module>()
17 x, y, lenghts, mask = x.cuda(), y.cuda(), lengths, mask.cuda()
18
---> 19 outputs, hidden, cost = model(x,y, h, lengths, mask)
20
21 if torch.cuda.is_available():
NameError: name 'h' is not defined
Update:
Removing 'h' param produces the following error
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-14-6495250d91c9> in <module>()
18
19 # outputs, hidden, cost = model(x,y, h, lengths, mask)
---> 20 outputs, hidden, cost = model(x, y, lengths, mask)
21
22 if torch.cuda.is_available():
1 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
<ipython-input-7-3c831fe3ca8d> in forward(self, x, y, lengths, mask)
36 h = rnn(seq, h)
37 hidden_state.append(h)
---> 38 hidden_state = self.dropout(torch.stack(hidden_state)) # apply dropout between layers
39 input_values = hidden_state
40
RuntimeError: stack expects a non-empty TensorList
I think I fixed your error:
replace your forward method to:
def forward(self,x, y, lengths, mask):
self.emb = torch.tanh(torch.matmul(x, self.W_emb) + self.b_emb)
input_values = self.emb
self.outputs = [input_values]
for i, hiddenSize in enumerate([self.hiddenDimSize, self.hiddenDimSize]): # iterate over layers
rnn = EHRNN(self.inputDimSize,hiddenSize,self.embSize,self.batchSize,self.numClass) # calculate hidden states
hidden_state = []
h = self.init_hidden().cuda()
for i,seq in enumerate(input_values): # loop over sequences in each batch
h = rnn(seq, h)
hidden_state.append(h)
hidden_state = self.dropout(torch.stack(hidden_state)) # apply dropout between layers
input_values = hidden_state
y_linear = torch.matmul(hidden_state, self.W_out) + self.b_out # fully connected layer
yhat = F.softmax(y_linear, dim=1) # yhat
yhat = yhat*mask[:,:,None] # apply mask
and replace the line where the error happens to:
outputs, hidden, cost = model(x, y, lengths, mask)
Related
I had a error when I tried predict my data with saved_weight model. My structure program was like this repository, but I had little enhancement for my model.
Here my edited code in models/faster_rcnn.py
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Lambda, Input, Conv2D, TimeDistributed, Dense, Flatten, BatchNormalization, Dropout
from ..utils import bbox_utils, train_utils
class Decoder(Layer):
"""Generating bounding boxes and labels from faster rcnn predictions.
First calculating the boxes from predicted deltas and label probs.
Then applied non max suppression and selecting top_n boxes by scores.
inputs:
roi_bboxes = (batch_size, roi_bbox_size, [y1, x1, y2, x2])
pred_deltas = (batch_size, roi_bbox_size, total_labels * [delta_y, delta_x, delta_h, delta_w])
pred_label_probs = (batch_size, roi_bbox_size, total_labels)
outputs:
pred_bboxes = (batch_size, top_n, [y1, x1, y2, x2])
pred_labels = (batch_size, top_n)
1 to total label number
pred_scores = (batch_size, top_n)
"""
def __init__(self, variances, total_labels, max_total_size=200, score_threshold=0.67, **kwargs):
super(Decoder, self).__init__(**kwargs)
self.variances = variances
self.total_labels = total_labels
self.max_total_size = max_total_size
self.score_threshold = score_threshold
def get_config(self):
config = super(Decoder, self).get_config()
config.update({
"variances": self.variances,
"total_labels": self.total_labels,
"max_total_size": self.max_total_size,
"score_threshold": self.score_threshold
})
return config
def call(self, inputs):
roi_bboxes = inputs[0]
pred_deltas = inputs[1]
pred_label_probs = inputs[2]
batch_size = tf.shape(pred_deltas)[0]
#
pred_deltas = tf.reshape(pred_deltas, (batch_size, -1, self.total_labels, 4))
pred_deltas *= self.variances
#
expanded_roi_bboxes = tf.tile(tf.expand_dims(roi_bboxes, -2), (1, 1, self.total_labels, 1))
pred_bboxes = bbox_utils.get_bboxes_from_deltas(expanded_roi_bboxes, pred_deltas)
#
pred_labels_map = tf.expand_dims(tf.argmax(pred_label_probs, -1), -1)
pred_labels = tf.where(tf.not_equal(pred_labels_map, 0), pred_label_probs, tf.zeros_like(pred_label_probs))
#
final_bboxes, final_scores, final_labels, _ = bbox_utils.non_max_suppression(
pred_bboxes, pred_labels,
max_output_size_per_class=self.max_total_size,
max_total_size=self.max_total_size,
score_threshold=self.score_threshold)
#
return final_bboxes, final_labels, final_scores
class RoIBBox(Layer):
"""Generating bounding boxes from rpn predictions.
First calculating the boxes from predicted deltas and label probs.
Then applied non max suppression and selecting "train or test nms_topn" boxes.
inputs:
rpn_bbox_deltas = (batch_size, img_output_height, img_output_width, anchor_count * [delta_y, delta_x, delta_h, delta_w])
img_output_height and img_output_width are calculated to the base model feature map
rpn_labels = (batch_size, img_output_height, img_output_width, anchor_count)
outputs:
roi_bboxes = (batch_size, train/test_nms_topn, [y1, x1, y2, x2])
"""
def __init__(self, anchors, mode, hyper_params, **kwargs):
super(RoIBBox, self).__init__(**kwargs)
self.hyper_params = hyper_params
self.mode = mode
self.anchors = tf.constant(anchors, dtype=tf.float32)
def get_config(self):
config = super(RoIBBox, self).get_config()
config.update({"hyper_params": self.hyper_params, "anchors": self.anchors.numpy(), "mode": self.mode})
return config
def call(self, inputs):
rpn_bbox_deltas = inputs[0]
rpn_labels = inputs[1]
anchors = self.anchors
#
pre_nms_topn = self.hyper_params["pre_nms_topn"]
post_nms_topn = self.hyper_params["train_nms_topn"] if self.mode == "training" else self.hyper_params["test_nms_topn"]
nms_iou_threshold = self.hyper_params["nms_iou_threshold"]
variances = self.hyper_params["variances"]
total_anchors = anchors.shape[0]
batch_size = tf.shape(rpn_bbox_deltas)[0]
rpn_bbox_deltas = tf.reshape(rpn_bbox_deltas, (batch_size, total_anchors, 4))
rpn_labels = tf.reshape(rpn_labels, (batch_size, total_anchors))
#
rpn_bbox_deltas *= variances
rpn_bboxes = bbox_utils.get_bboxes_from_deltas(anchors, rpn_bbox_deltas)
#
_, pre_indices = tf.nn.top_k(rpn_labels, pre_nms_topn)
#
pre_roi_bboxes = tf.gather(rpn_bboxes, pre_indices, batch_dims=1)
pre_roi_labels = tf.gather(rpn_labels, pre_indices, batch_dims=1)
#
pre_roi_bboxes = tf.reshape(pre_roi_bboxes, (batch_size, pre_nms_topn, 1, 4))
pre_roi_labels = tf.reshape(pre_roi_labels, (batch_size, pre_nms_topn, 1))
#
roi_bboxes, _, _, _ = bbox_utils.non_max_suppression(pre_roi_bboxes, pre_roi_labels,
max_output_size_per_class=post_nms_topn,
max_total_size=post_nms_topn,
iou_threshold=nms_iou_threshold)
#
return tf.stop_gradient(roi_bboxes)
class RoIDelta(Layer):
"""Calculating faster rcnn actual bounding box deltas and labels.
This layer only running on the training phase.
inputs:
roi_bboxes = (batch_size, nms_topn, [y1, x1, y2, x2])
gt_boxes = (batch_size, padded_gt_boxes_size, [y1, x1, y2, x2])
gt_labels = (batch_size, padded_gt_boxes_size)
outputs:
roi_bbox_deltas = (batch_size, train_nms_topn * total_labels, [delta_y, delta_x, delta_h, delta_w])
roi_bbox_labels = (batch_size, train_nms_topn, total_labels)
"""
def __init__(self, hyper_params, **kwargs):
super(RoIDelta, self).__init__(**kwargs)
self.hyper_params = hyper_params
def get_config(self):
config = super(RoIDelta, self).get_config()
config.update({"hyper_params": self.hyper_params})
return config
def call(self, inputs):
roi_bboxes = inputs[0]
gt_boxes = inputs[1]
gt_labels = inputs[2]
total_labels = self.hyper_params["total_labels"]
total_pos_bboxes = self.hyper_params["total_pos_bboxes"]
total_neg_bboxes = self.hyper_params["total_neg_bboxes"]
variances = self.hyper_params["variances"]
batch_size, total_bboxes = tf.shape(roi_bboxes)[0], tf.shape(roi_bboxes)[1]
# Calculate iou values between each bboxes and ground truth boxes
iou_map = bbox_utils.generate_iou_map(roi_bboxes, gt_boxes)
# Get max index value for each row
max_indices_each_gt_box = tf.argmax(iou_map, axis=2, output_type=tf.int32)
# IoU map has iou values for every gt boxes and we merge these values column wise
merged_iou_map = tf.reduce_max(iou_map, axis=2)
#
pos_mask = tf.greater(merged_iou_map, 0.67)
pos_mask = train_utils.randomly_select_xyz_mask(pos_mask, tf.constant([total_pos_bboxes], dtype=tf.int32))
#
neg_mask = tf.logical_and(tf.less(merged_iou_map, 0.47), tf.greater(merged_iou_map, 0.1))
neg_mask = train_utils.randomly_select_xyz_mask(neg_mask, tf.constant([total_neg_bboxes], dtype=tf.int32))
#
gt_boxes_map = tf.gather(gt_boxes, max_indices_each_gt_box, batch_dims=1)
expanded_gt_boxes = tf.where(tf.expand_dims(pos_mask, axis=-1), gt_boxes_map, tf.zeros_like(gt_boxes_map))
#
gt_labels_map = tf.gather(gt_labels, max_indices_each_gt_box, batch_dims=1)
pos_gt_labels = tf.where(pos_mask, gt_labels_map, tf.constant(-1, dtype=tf.int32))
neg_gt_labels = tf.cast(neg_mask, dtype=tf.int32)
expanded_gt_labels = pos_gt_labels + neg_gt_labels
#
roi_bbox_deltas = bbox_utils.get_deltas_from_bboxes(roi_bboxes, expanded_gt_boxes) / variances
#
roi_bbox_labels = tf.one_hot(expanded_gt_labels, total_labels)
scatter_indices = tf.tile(tf.expand_dims(roi_bbox_labels, -1), (1, 1, 1, 4))
roi_bbox_deltas = scatter_indices * tf.expand_dims(roi_bbox_deltas, -2)
roi_bbox_deltas = tf.reshape(roi_bbox_deltas, (batch_size, total_bboxes * total_labels, 4))
#
return tf.stop_gradient(roi_bbox_deltas), tf.stop_gradient(roi_bbox_labels)
class RoIPooling(Layer):
"""Reducing all feature maps to same size.
Firstly cropping bounding boxes from the feature maps and then resizing it to the pooling size.
inputs:
feature_map = (batch_size, img_output_height, img_output_width, channels)
roi_bboxes = (batch_size, train/test_nms_topn, [y1, x1, y2, x2])
outputs:
final_pooling_feature_map = (batch_size, train/test_nms_topn, pooling_size[0], pooling_size[1], channels)
pooling_size usually (7, 7)
"""
def __init__(self, hyper_params, **kwargs):
super(RoIPooling, self).__init__(**kwargs)
self.hyper_params = hyper_params
def get_config(self):
config = super(RoIPooling, self).get_config()
config.update({"hyper_params": self.hyper_params})
return config
def call(self, inputs):
feature_map = inputs[0]
roi_bboxes = inputs[1]
pooling_size = self.hyper_params["pooling_size"]
batch_size, total_bboxes = tf.shape(roi_bboxes)[0], tf.shape(roi_bboxes)[1]
#
row_size = batch_size * total_bboxes
# We need to arange bbox indices for each batch
pooling_bbox_indices = tf.tile(tf.expand_dims(tf.range(batch_size), axis=1), (1, total_bboxes))
pooling_bbox_indices = tf.reshape(pooling_bbox_indices, (-1, ))
pooling_bboxes = tf.reshape(roi_bboxes, (row_size, 4))
# Crop to bounding box size then resize to pooling size
pooling_feature_map = tf.image.crop_and_resize(
feature_map,
pooling_bboxes,
pooling_bbox_indices,
pooling_size
)
final_pooling_feature_map = tf.reshape(pooling_feature_map, (batch_size, total_bboxes, pooling_feature_map.shape[1], pooling_feature_map.shape[2], pooling_feature_map.shape[3]))
return final_pooling_feature_map
def get_model_frcnn(feature_extractor, rpn_model, anchors, hyper_params, mode="training"):
"""Generating rpn model for given backbone base model and hyper params.
inputs:
feature_extractor = feature extractor layer from the base model
rpn_model = tf.keras.model generated rpn model
anchors = (total_anchors, [y1, x1, y2, x2])
these values in normalized format between [0, 1]
hyper_params = dictionary
mode = "training" or "inference"
outputs:
frcnn_model = tf.keras.model
"""
input_img = rpn_model.input
rpn_reg_predictions, rpn_cls_predictions = rpn_model.output
#
roi_bboxes = RoIBBox(anchors, mode, hyper_params, name="roi_bboxes")([rpn_reg_predictions, rpn_cls_predictions])
#
roi_pooled = RoIPooling(hyper_params, name="roi_pooling")([feature_extractor.output, roi_bboxes])
#
output = TimeDistributed(Flatten(), name="frcnn_flatten")(roi_pooled)
output = TimeDistributed(Dense(4096, activation="relu"), name="frcnn_fc1")(output)
# output = TimeDistributed(Dropout(0.5), name="frcnn_dropout1")(output)
output = TimeDistributed(Dense(4096, activation="relu"), name="frcnn_fc2")(output)
output = TimeDistributed(Dropout(0.5), name="frcnn_dropout2")(output)
frcnn_cls_predictions = TimeDistributed(Dense(hyper_params["total_labels"], activation="softmax"), name="frcnn_cls")(output)
frcnn_reg_predictions = TimeDistributed(Dense(hyper_params["total_labels"] * 4, activation="linear"), name="frcnn_reg")(output)
#
if mode == "training":
input_gt_boxes = Input(shape=(None, 4), name="input_gt_boxes", dtype=tf.float32)
input_gt_labels = Input(shape=(None, ), name="input_gt_labels", dtype=tf.int32)
rpn_cls_actuals = Input(shape=(None, None, hyper_params["anchor_count"]), name="input_rpn_cls_actuals", dtype=tf.float32)
rpn_reg_actuals = Input(shape=(None, 4), name="input_rpn_reg_actuals", dtype=tf.float32)
frcnn_reg_actuals, frcnn_cls_actuals = RoIDelta(hyper_params, name="roi_deltas")(
[roi_bboxes, input_gt_boxes, input_gt_labels])
#
loss_names = ["rpn_reg_loss", "rpn_cls_loss", "frcnn_reg_loss", "frcnn_cls_loss"]
rpn_reg_loss_layer = Lambda(train_utils.reg_loss, name=loss_names[0])([rpn_reg_actuals, rpn_reg_predictions])
rpn_cls_loss_layer = Lambda(train_utils.rpn_cls_loss, name=loss_names[1])([rpn_cls_actuals, rpn_cls_predictions])
frcnn_reg_loss_layer = Lambda(train_utils.reg_loss, name=loss_names[2])([frcnn_reg_actuals, frcnn_reg_predictions])
frcnn_cls_loss_layer = Lambda(train_utils.frcnn_cls_loss, name=loss_names[3])([frcnn_cls_actuals, frcnn_cls_predictions])
#
frcnn_model = Model(inputs=[input_img, input_gt_boxes, input_gt_labels,
rpn_reg_actuals, rpn_cls_actuals],
outputs=[roi_bboxes, rpn_reg_predictions, rpn_cls_predictions,
frcnn_reg_predictions, frcnn_cls_predictions,
rpn_reg_loss_layer, rpn_cls_loss_layer,
frcnn_reg_loss_layer, frcnn_cls_loss_layer])
#
for layer_name in loss_names:
layer = frcnn_model.get_layer(layer_name)
frcnn_model.add_loss(layer.output)
frcnn_model.add_metric(layer.output, name=layer_name, aggregation="mean")
#
else:
bboxes, labels, scores = Decoder(hyper_params["variances"], hyper_params["total_labels"], name="faster_rcnn_decoder")(
[roi_bboxes, frcnn_reg_predictions, frcnn_cls_predictions])
frcnn_model = Model(inputs=input_img, outputs=[bboxes, labels, scores])
#
return frcnn_model
def init_model_frcnn(model, hyper_params):
"""Generating dummy data for initialize model.
In this way, the training process can continue from where it left off.
inputs:
model = tf.keras.model
hyper_params = dictionary
"""
final_height, final_width = hyper_params["img_size"], hyper_params["img_size"]
img = tf.random.uniform((1, final_height, final_width, 3))
feature_map_shape = hyper_params["feature_map_shape"]
total_anchors = feature_map_shape * feature_map_shape * hyper_params["anchor_count"]
gt_boxes = tf.random.uniform((1, 1, 4))
gt_labels = tf.random.uniform((1, 1), maxval=hyper_params["total_labels"], dtype=tf.int32)
bbox_deltas = tf.random.uniform((1, total_anchors, 4))
bbox_labels = tf.random.uniform((1, feature_map_shape, feature_map_shape, hyper_params["anchor_count"]), maxval=1, dtype=tf.float32)
model([img, gt_boxes, gt_labels, bbox_deltas, bbox_labels])
and this is code to test a model.
batch_size = 4
epochs = 10
load_weights = False
backbone = "vgg16"
hyper_params = train_utils.get_hyper_params(backbone)
labels = list(label_map_dict.keys()) # my custom label (pothole and crack)
labels = ["bg"] + labels
test_total_item = len(list(test_data))
test_data = test_data.map(lambda data : data_utils.preprocessing_before_frcnn(
data, IMAGE_SIZE, IMAGE_SIZE))
test_data = test_data.padded_batch(
batch_size, padded_shapes=data_shapes, padding_values=padding_values)
load_path = io_utils.get_model_path("faster_rcnn", backbone)
rpn_model, feature_extractor = rpn_vgg16.get_model_vgg16(hyper_params)
frcnn_test_model = faster_rcnn.get_model_frcnn(feature_extractor, rpn_model, anchors, hyper_params, mode="test")
frcnn_test_model.load_weights(load_path)
step_size = train_utils.get_step_size(test_total_item, batch_size)
pred_bboxes, pred_labels, pred_scores = frcnn_test_model.predict(test_data, steps=step_size, verbose=1)
After I run my test code, the error happens like this:
TypeError Traceback (most recent call last)
<ipython-input-26-de9c8627623e> in <module>()
1 step_size = train_utils.get_step_size(test_total_item, batch_size)
----> 2 pred_bboxes, pred_labels, pred_scores = frcnn_test_model.predict(test_data, steps=step_size, verbose=1)
16 frames
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
235 except Exception as e: # pylint:disable=broad-except
236 if hasattr(e, 'ag_error_metadata'):
--> 237 raise e.ag_error_metadata.to_exception(e)
238 else:
239 raise
TypeError: in converted code:
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/keras/engine/training_v2.py:677 map_fn
batch_size=None)
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/keras/engine/training.py:2474 _standardize_tensors
sample_weight, feed_output_names)
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/keras/engine/training_utils.py:639 standardize_sample_weights
'sample_weight')
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/keras/engine/training_utils.py:629 standardize_sample_or_class_weights
str(x_weight))
TypeError: The model has multiple outputs, so `sample_weight` should be either a list or a dict. Provided `sample_weight` type not understood: Tensor("args_2:0", shape=(None, None), dtype=int32)
The current tensorflow version I used is Tensorflow 2.1.0
I am trying to create a GNN that models a protein. However, I am running into an error with GraphConv (I get the same error with GCNConv). I do not understand why I am getting this error when the shapes should be able to be multiplied. I think the error must have something to do with the custom dataset I created, but I am not 100% sure. Please let me know if you have had a similar issue or know how to fix this. Thank you.
EDIT: Even if I change embedding_size to 1479, I still get: RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1479).
Custom dataset:
class ProteinDataset(geom_data.Dataset):
def __init__(self, root, transform=None, pre_transform=None):
# root = where data set is stored
super(ProteinDataset, self).__init__(root, transform, pre_transform)
self.root = root
#property
def raw_file_names(self):
return os.listdir(f'{self.root}/raw')
#property
def processed_file_names(self):
inxs = []
for pdb in self.raw_paths:
inxs.append(pdb.split('/')[-1].split('.p')[0])
return [f'{i}.pt' for i in inxs]
def download(self):
pass
def process(self):
for pdb in self.raw_paths:
try:
mol_obj = Chem.rdmolfiles.MolFromPDBFile(pdb)
except AttributeError:
os.remove(pdb)
continue
# Get node features
node_feats = self._get_node_features(mol_obj).reshape([-1,1])
# Get edge features
edge_feats = self._get_edge_features(mol_obj).reshape([-1,1])
# Get adjacency info
edge_index = self._get_adjacency_info(mol_obj)
label = self._get_labels(pdb)
# Create Data object
data = geom_data.Data(x=node_feats,
edge_index=edge_index,
edge_attr=edge_feats,
y=label)
i = pdb.split('/')[-1].split('.p')[0]
torch.save(data, os.path.join(self.processed_dir,f'{i}.pt'))
def _get_node_features(self, mol):
all_node_feats = []
for atom in mol.GetAtoms():
all_node_feats.append(atom.GetMass())
all_node_feats = np.asarray(all_node_feats)
return torch.tensor(all_node_feats, dtype=torch.float)
def _get_edge_features(self, mol):
all_edge_feats = []
dists = Chem.rdmolops.Get3DDistanceMatrix(mol)
# CA-CA Distances
for bond in mol.GetBonds():
begin = bond.GetBeginAtomIdx()
end = bond.GetEndAtomIdx()
all_edge_feats.append(dists[begin,end])
all_edge_feats = np.asarray(all_edge_feats)
return torch.tensor(all_edge_feats, dtype=torch.float)
def _get_adjacency_info(self, mol):
adj_matrix = Chem.rdmolops.GetAdjacencyMatrix(mol)
row, col = np.where(adj_matrix)
coo = np.array(list(zip(row, col)))
coo = np.reshape(coo, (2, -1))
return torch.tensor(coo, dtype=torch.long)
def _get_labels(self, fn):
with open(fn, 'r') as f:
label = float(f.readline())
f.close()
label = np.asarray([label])
return torch.tensor(label, dtype=torch.float)
def len(self):
return len(self.raw_paths)
def get(self, inx):
data = torch.load(self.processed_paths[inx])
return data
Model:
class GNN(torch.nn.Module):
def __init__(self, feature_size):
super(GNN, self).__init__()
embedding_size = 1024
# GNN Layers
self.conv1 = GraphConv(feature_size, embedding_size)
self.head1 = Linear(embedding_size*3, embedding_size)
self.pool1 = TopKPooling(embedding_size, ratio=0.8)
self.conv2 = GraphConv(embedding_size, embedding_size)
self.head2 = Linear(embedding_size*3, embedding_size)
self.pool2 = TopKPooling(embedding_size, ratio=0.5)
self.conv3 = GraphConv(embedding_size, embedding_size)
self.head3 = Linear(embedding_size*3, embedding_size)
self.pool3 = TopKPooling(embedding_size, ratio=0.2)
# Linear Layers
self.fc1 = Linear(embedding_size*2, 1024)
self.fc2 = Linear(1024, 128)
self.fc3 = Linear(128, 1)
def forward(self, x, edge_attr, edge_index, batch_index):
# First block
x = self.conv1(x, edge_index).relu()
x = self.head1(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool1(x,
edge_index,
None,
batch_index)
x1 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Second block
x = self.conv2(x, edge_index).relu()
x = self.head2(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool2(x,
edge_index,
None,
batch_index)
x2 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Third block
x = self.conv3(x, edge_index).relu()
x = self.head3(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool3(x,
edge_index,
None,
batch_index)
x3 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Concat pooled vectors
x = x1 + x2 + x3
# Apply Linear Layers
x = self.fc1(x).relu()
x = self.fc2(x).relu()
x = self.fc3(x)
return x
Training:
device = torch.device('cuda')
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
# Loading the dataset
train_set = ProteinDataset(root='data/lys50_2/train')
test_set = ProteinDataset(root='data/lys50_2/test')
print('Shape of input:', train_set[0].x.shape[0])
# Loading the model
model = GNN(feature_size=train_set[0].x.shape[0])
model = model.to(device)
print(f'Number of parameters: {count_parameters(model)}')
print(model)
# Loss and Optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
print(optimizer)
# Prepare for training
train_loader = DataLoader(train_set, batch_size=1, shuffle=True)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False)
def train(m,opt):
loss_sum = 0.0
for _, batch in enumerate(train_loader):
# Use GPU
batch.to(device)
# Reset grad
opt.zero_grad()
# Pass node features and connections
pred = m(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculate loss and gradients
loss = loss_fn(pred, batch.y)
loss.backward()
loss_sum += loss.item()
# Update using the gradients
opt.step()
return loss_sum / len(train_loader)
def validate(m):
loss_sum = 0.0
for _, batch in enumerate(test_loader):
for _, batch in enumerate(test_loader):
# Use GPU
batch.to(device)
# No grad
with torch.no_grad():
pred = m(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculate loss and gradients
loss = loss_fn(pred, batch.y)
loss_sum += loss.item()
return loss_sum / len(test_loader)
model.zero_grad()
optimizer.zero_grad()
# Loop for training
for i in range(101):
loss = train(model,optimizer)
if (i%10==0):
loss_v = validate(model)
print(i, loss, loss_v)
else:
print(i, loss)
Error when running training:
Traceback (most recent call last):
File "/home/spencer/sh3/gnn/./train.py", line 79, in <module>
loss = train(model,optimizer)
File "/home/spencer/sh3/gnn/./train.py", line 44, in train
pred = m(batch.x.float(),
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/feig/s1/spencer/sh3/gnn/model2.py", line 32, in forward
x = self.conv1(x, edge_index).relu()
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/conv/graph_conv.py", line 71, in forward
out = self.lin_rel(out)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/dense/linear.py", line 109, in forward
return F.linear(x, self.weight, self.bias)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/functional.py", line 1848, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1024)
The error tells you that input shapes don't match.
You can reshape the input in the forward method like this: x = x.view(1, 1479) but make sure that this is what you need - this error usually indicates wrongly shaped dataset or passing the wrong input.
I want to implement a model with a custom loss function using keras.
I will simulate by sampling a dataset according to the following function:
def sampler(N1, N2, N3):
np.random.seed(42)
# Sampler #1: PDE domain
t1 = np.random.uniform(low=T0,
high=T,
size=[N1,1])
s1 = np.random.uniform(low=S1,
high=S2,
size=[N1,1])
# Sampler #2: boundary condition
t2 = np.random.uniform(low=T0,
high=T,
size=[N2,1])
s2 = np.zeros(shape=(N2, 1))
# Sampler #3: initial/terminal condition
t3 = T * np.ones((N3,1)) #Terminal condition
s3 = np.random.uniform(low=S1,
high=S2,
size=[N3,1])
return (t1, s1, t2, s2, t3, s3)
Each subset (t1, s1), (t2, s2) and (t3, s3) should be evaluated by a unique term in the loss function:
def loss(model, t1, x1, t2, x2, t3, x3):
# Loss term #1: PDE
V = model(t1, x1)
V_t = tf.gradients(V, t1)[0]
V_x = tf.gradients(V, x1)[0]
V_xx = tf.gradients(V_x, x1)[0]
f = V_t + r*x1*V_x + 0.5*sigma**2*x1**2*V_xx - r*V
L1 = tf.reduce_mean(tf.square(f))
#Loss term #2: boundary condition
L2 = tf.reduce_mean(tf.square(model(t2, x2) - 0))
# Loss term #3: initial/terminal condition
L3 = tf.reduce_mean(tf.square(model(t3, x3) - tf.math.maximum(x3-K,0)))
I have established the following parameters:
# Set random seeds
np.random.seed(123)
tf.random.set_seed(123)
# Strike price
K = 0.5
# PDE parameters
r = 0.02 # Interest rate
sigma = 0.18 # Volatility
# Time limits
T0 = 0.0 + 1e-10 # Initial time
T = 1.0 # Terminal time
# Space limits
S1 = 0.0 + 1e-10 # Low boundary
S2 = 1.0 # High boundary
# Number of samples
NS_1 = 1000
NS_2 = 100
NS_3 = 100
The Model
class DGM(tf.keras.Model):
def __init__(self, n_layers, n_nodes, dimensions=1):
"""
Parameters:
- n_layers: number of layers
- n_nodes: number of nodes in (inner) layers
- dimensions: number of spacial dimensions
"""
super().__init__()
self.n_layers = n_layers
self.initial_layer = DenseLayer(dimensions + 1, n_nodes, activation="relu")
self.lstmlikelist = []
for _ in range(self.n_layers):
self.lstmlikelist.append(LSTMLikeLayer(dimensions + 1, n_nodes, activation="relu"))
self.final_layer = DenseLayer(n_nodes, 1, activation=None)
def call(self, t, x):
X = tf.concat([t,x], 1)
S = self.initial_layer.call(X)
for i in range(self.n_layers):
S = self.lstmlikelist[i].call({'S': S, 'X': X})
result = self.final_layer.call(S)
return result
# Neural network layers
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, n_inputs, n_outputs, activation):
"""
Parameters:
- n_inputs: number of inputs
- n_outputs: number of outputs
- activation: activation function
"""
super(DenseLayer, self).__init__()
self.n_inputs = n_inputs
self.n_outputs = n_outputs
self.W = self.add_weight(shape=(self.n_inputs, self.n_outputs),
initializer='random_normal',
trainable=True)
self.b = self.add_weight(shape=(1, self.n_outputs),
initializer='random_normal',
trainable=True)
self.activation = _get_function(activation)
def call(self, inputs):
S = tf.add(tf.matmul(inputs, self.W), self.b)
S = self.activation(S)
return S
class LSTMLikeLayer(tf.keras.layers.Layer):
def __init__(self, n_inputs, n_outputs, activation):
"""
Parameters:
- n_inputs: number of inputs
- n_outputs: number of outputs
- activation: activation function
"""
super(LSTMLikeLayer, self).__init__()
self.n_outputs = n_outputs
self.n_inputs = n_inputs
self.Uz = self.add_weight("Uz", shape=[self.n_inputs, self.n_outputs])
self.Ug = self.add_weight("Ug", shape=[self.n_inputs, self.n_outputs])
self.Ur = self.add_weight("Ur", shape=[self.n_inputs, self.n_outputs])
self.Uh = self.add_weight("Uh", shape=[self.n_inputs, self.n_outputs])
self.Wz = self.add_weight("Wz", shape=[self.n_outputs, self.n_outputs])
self.Wg = self.add_weight("Wg", shape=[self.n_outputs, self.n_outputs])
self.Wr = self.add_weight("Wr", shape=[self.n_outputs, self.n_outputs])
self.Wh = self.add_weight("Wh", shape=[self.n_outputs, self.n_outputs])
self.bz = self.add_weight("bz", shape=[1, self.n_outputs])
self.bg = self.add_weight("bg", shape=[1, self.n_outputs])
self.br = self.add_weight("br", shape=[1, self.n_outputs])
self.bh = self.add_weight("bh", shape=[1, self.n_outputs])
self.activation = _get_function(activation)
def call(self, inputs):
S = inputs['S']
X = inputs['X']
Z = self.activation(tf.add(tf.add(tf.matmul(X, self.Uz), tf.matmul(S, self.Wz)), self.bz))
G = self.activation(tf.add(tf.add(tf.matmul(X, self.Ug), tf.matmul(S, self.Wg)), self.bg))
R = self.activation(tf.add(tf.add(tf.matmul(X, self.Ur), tf.matmul(S, self.Wr)), self.br))
H = self.activation(tf.add(tf.add(tf.matmul(X, self.Uh), tf.matmul(tf.multiply(S, R), self.Wh)), self.bh))
Snew = tf.add(tf.multiply(tf.subtract(tf.ones_like(G), G), H), tf.multiply(Z, S))
return Snew
def _get_function(name):
f = None
if name == "tanh":
f = tf.nn.tanh
elif name == "sigmoid":
f = tf.nn.sigmoid
elif name == "relu":
f = tf.nn.relu
elif not name:
f = tf.identity
assert f is not None
return f
This would be my approach for an exemplary model:
model = DGM(n_layers=2, n_nodes = 3)
model.compile(
optimizer = "Adam", loss = loss
)
model.fit()
Error trace back:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-12-f781fe65eb00> in <module>()
----> 1 model.fit()
2 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1030 version_utils.disallow_legacy_graph('Model', 'fit')
1031 self._assert_compile_was_called()
-> 1032 self._check_call_args('fit')
1033 _disallow_inside_tf_function('fit')
1034
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in _check_call_args(self, method_name)
2468 'Models passed to `' + method_name + '` can only have `training` '
2469 'and the first argument in `call` as positional arguments, '
-> 2470 'found: ' + str(extra_args) + '.')
2471
2472 def _validate_compile(self, optimizer, metrics, **kwargs):
ValueError: Models passed to `fit` can only have `training` and the first argument in `call` as positional arguments, found: ['x'].
Now how can I fit the model in accordance with the customary loss function I have set up?
I'm having problems using functional api for estimating by maximizing
First I minimize the error vector by maximizing the probability layer loss, and then I want to use the mean vector layer to rank xc_hat similar embeddings.
The code is as follows:
import random as rdn
import tensorflow.compat.v2 as tf
tf.enable_v2_behavior()
import tensorflow_probability as tfp
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
tfd = tfp.distributions
n_observations = 2000
n_features = 5
d_dim = 3
lr = 0.005
# Generate toy data
def make_relations(x_tr, y_tr, c_tr):
# Generate input data centers being labels (xc) for two random
# in-cluster (xa, xb)
xa = []
xc = []
xb = []
for l in y_tr:
kone = [k for k, lab in zip(x_tr, y_tr) if lab==l]
if len(kone) < 3:
continue
for i, x in enumerate(kone):
if np.isclose(x, c_tr[l]).all():
continue
kone_minus_x = kone.copy()
kone_minus_x.pop(i)
print
xa.append(x)
xc.append(c_tr[l])
xb.append(rdn.choice(kone_minus_x))
return np.vstack(xa), np.vstack(xb), np.vstack(xc)
X, Y, C = make_blobs(n_samples=n_observations,
n_features=n_features,
centers=int(n_observations*0.2),
return_centers=True)
x_a, x_b, x_c = make_relations(X, Y, C)
Xa_train, Xa_test = train_test_split(x_a, test_size=.4)
Xb_train, Xb_test = train_test_split(x_b, test_size=.4)
Xc_train, Xc_test = train_test_split(x_c, test_size=.4)
Xa_train = Xa_train[np.newaxis]
Xb_train = Xb_train[np.newaxis]
Xc_train = Xc_train[np.newaxis]
Xa_test = Xa_test[np.newaxis]
Xb_test = Xb_test[np.newaxis]
Xc_test = Xc_test[np.newaxis]
neg_log_likelihood = lambda y, rv_y: -rv_y.log_prob(y)
ones_train = tf.keras.backend.ones((1, Xc_train.shape[1], d_dim)).numpy()
ones_test = tf.keras.backend.ones((1, Xc_test.shape[1], d_dim)).numpy()
# Build model.
xa_xb = tf.keras.layers.Input(shape=(None, n_features), name='Xa-Xb')
L_xa_xb = tf.keras.layers.Dense(d_dim, activation='sigmoid', name='L_Xa-Xb')(xa_xb)
xb = tf.keras.layers.Input(shape=(None, n_features), name='Xb')
L_xb = tf.keras.layers.Dense(d_dim, activation='sigmoid', name='L_Xb')(xb)
mu = tf.keras.layers.Add(name='mean_vector')([L_xa_xb, L_xb])
xc = tf.keras.layers.Input(shape=(None, n_features), name='Xc')
L_xc = tf.keras.layers.Dense(d_dim, name='L_Xc')(xc)
error_vector = tf.keras.layers.Subtract(name='error_vector')([L_xc, mu])
p_xc_given_xa_xb = tfp.layers.DistributionLambda(
lambda t: tfd.Normal(loc=t, scale=tf.exp(t)), name='Gaussian')(error_vector)
model = tf.keras.Model(inputs=[xa_xb, xb, xc],
outputs=p_xc_given_xa_xb, name="inner_model")
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=lr),
loss=neg_log_likelihood)
model.fit([Xa_train - Xb_train, Xb_train, Xc_train], ones_train,
validation_data=([Xa_test - Xb_test, Xb_test, Xc_test], ones_test),
epochs=1000,
verbose=True)
# After trained rebuild the part of the model I will use for prediction
xa_xb = model.get_layer('Xa-Xb')
L_xa_xb = model.get_layer('L_Xa-Xb')(xa_xb)
xb = model.get_layer('Xb')
L_xb = model.get_layer('L_Xb')(xb)
xc = model.get_layer('mean_vector')([L_xa_xb, L_xb])
model = tf.keras.Model(inputs=[xa_xb, xb],
outputs=xc, name="inner_model")
xc_hat = model([Xa_test - Xb_test, Xb_test])
The idea is to estimate xc However, I have the following error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-57-d94b1e8a583c> in <module>()
2
3 xa_xb = model.get_layer('Xa-Xb')
----> 4 L_xa_xb = model.get_layer('L_Xa-Xb')(xa_xb)
5
6 xb = model.get_layer('Xb')
1 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
966 with base_layer_utils.autocast_context_manager(
967 self._compute_dtype):
--> 968 outputs = self.call(cast_inputs, *args, **kwargs)
969 self._handle_activity_regularization(inputs, outputs)
970 self._set_mask_metadata(inputs, outputs, input_masks)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/core.py in call(self, inputs)
1178
1179 def call(self, inputs):
-> 1180 rank = inputs.shape.rank
1181 if rank is not None and rank > 2:
1182 # Broadcasting is required for the inputs.
AttributeError: 'InputLayer' object has no attribute 'shape'
Im usign Google Colaboratory
summary
I'm adding alphabets to captcha recognition, but pytorch's CTC seems to not working properly when alphabets are added.
What I've tried
At first, I modified BLANK_LABEL to 62 since there are 62 labels(0-9, a-z, A-Z), but it gives me runtime error blank must be in label range. I also tried BLANK_LABEL=0 and then assigning 1~63 as nonblank labels but it outputs NaN as loss.
The code
This is the colab link for the current version of my code: here
below are just core parts of the code.
Constants:
DATASET_PATH = "/home/ik1ne/Downloads/numbers"
MODEL_PATH = "/home/ik1ne/Downloads"
BATCH_SIZE = 50
TRAIN_BATCHES = 180
TEST_BATCHES = 20
TOTAL_BATCHES = TRAIN_BATCHES+TEST_BATCHES
TOTAL_DATASET = BATCH_SIZE*TOTAL_BATCHES
BLANK_LABEL = 63
dataset generation:
!pip install captcha
from captcha.image import ImageCaptcha
import itertools
import os
import random
import string
if not os.path.exists(DATASET_PATH):
os.makedirs(DATASET_PATH)
characters = "0123456789"+string.ascii_lowercase + string.ascii_uppercase
while(len(list(Path(DATASET_PATH).glob('*'))) < TOTAL_BATCHES):
captcha_str = "".join(random.choice(characters) for x in range(6))
if captcha_str in list(Path(DATASET_PATH).glob('*')):
continue
ImageCaptcha().write(captcha_str, f"{DATASET_PATH}/{captcha_str}.png")
dataset:
def convert_strseq_to_numseq(s):
for c in s:
if c >= '0' and c <= '9':
return int(c)
elif c>='a' and c <='z':
return ord(c)-ord('a')+10
else:
return ord(c)-ord('A')+36
class CaptchaDataset(Dataset):
"""CAPTCHA dataset."""
def __init__(self, root_dir, transform=None):
self.root_dir = root_dir
self.image_paths = list(Path(root_dir).glob('*'))
self.transform = transform
def __getitem__(self, index):
image = Image.open(self.image_paths[index])
if self.transform:
image = self.transform(image)
label_sequence = [convert_strseq_to_numseq(c) for c in self.image_paths[index].stem]
return (image, torch.tensor(label_sequence))
def __len__(self):
return len(self.image_paths)
model:
class StackedLSTM(nn.Module):
def __init__(self, input_size=60, output_size=11, hidden_size=512, num_layers=2):
super(StackedLSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.dropout = nn.Dropout()
self.fc = nn.Linear(hidden_size, output_size)
self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
def forward(self, inputs, hidden):
batch_size, seq_len, input_size = inputs.shape
outputs, hidden = self.lstm(inputs, hidden)
outputs = self.dropout(outputs)
outputs = torch.stack([self.fc(outputs[i]) for i in range(width)])
outputs = F.log_softmax(outputs, dim=2)
return outputs, hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
return (weight.new(self.num_layers, batch_size, self.hidden_size).zero_(),
weight.new(self.num_layers, batch_size, self.hidden_size).zero_())
net = StackedLSTM().to(device)
training:
net.train() # set network to training phase
epochs = 30
# for each pass of the training dataset
for epoch in range(epochs):
train_loss, train_correct, train_total = 0, 0, 0
h = net.init_hidden(BATCH_SIZE)
# for each batch of training examples
for batch_index, (inputs, targets) in enumerate(train_dataloader):
inputs, targets = inputs.to(device), targets.to(device)
h = tuple([each.data for each in h])
BATCH_SIZE, channels, height, width = inputs.shape
# reshape inputs: NxCxHxW -> WxNx(HxC)
inputs = (inputs
.permute(3, 0, 2, 1)
.contiguous()
.view((width, BATCH_SIZE, -1)))
optimizer.zero_grad() # zero the parameter gradients
outputs, h = net(inputs, h) # forward pass
# compare output with ground truth
input_lengths = torch.IntTensor(BATCH_SIZE).fill_(width)
target_lengths = torch.IntTensor([len(t) for t in targets])
loss = criterion(outputs, targets, input_lengths, target_lengths)
loss.backward() # backpropagation
nn.utils.clip_grad_norm_(net.parameters(), 10) # clip gradients
optimizer.step() # update network weights
# record statistics
prob, max_index = torch.max(outputs, dim=2)
train_loss += loss.item()
train_total += len(targets)
for i in range(BATCH_SIZE):
raw_pred = list(max_index[:, i].cpu().numpy())
pred = [c for c, _ in groupby(raw_pred) if c != BLANK_LABEL]
target = list(targets[i].cpu().numpy())
if pred == target:
train_correct += 1
# print statistics every 10 batches
if (batch_index + 1) % 10 == 0:
print(f'Epoch {epoch + 1}/{epochs}, ' +
f'Batch {batch_index + 1}/{len(train_dataloader)}, ' +
f'Train Loss: {(train_loss/1):.5f}, ' +
f'Train Accuracy: {(train_correct/train_total):.5f}')
train_loss, train_correct, train_total = 0, 0, 0
This error will occur when the index of blank is larger than the total number of classes, which equals number of chars + blank. What's more, the index starts from 0, instead of 1, so if you have 62 characters in total, their index should be 0-61 and the index of blank should be 62 instead of 63. (Or you can set blank as 0, other characters from 1-62)
You should also check the shape of the output tensor, it should has shape [T, B, C], where T is the time step length, B is the batch size, C is the class num, remember to add blank in to the class num or you will meet the problem
Most probably there is some problem with net shape when it's sent to CTC loss, but you should have provided the dataset to us to see the net's shape. It should be (T,N,C) , where T=input length, N=batch size, C=number of classes. And as I understand blank symbol id should in the 0..C range. Also, you should add blank symbol, for example '-' to the alphabet.