Related
I have two functions like this (code source of the functions is here):
device = torch.device('cuda')
dataset = TUDataset(root='/tmp/MUTAG', name='MUTAG', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)
train_dataset = dataset #just for testing
val_dataset = dataset
test_dataset = dataset
graph_train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
graph_val_loader = DataLoader(val_dataset, batch_size=8)
gnn_layer_by_name = {
"GCN": geom_nn.GCNConv,
"GAT": geom_nn.GATConv,
"GraphConv": geom_nn.GraphConv
}
class GCNLayer(nn.Module):
def __init__(self, c_in, c_out):
super().__init__()
self.projection = nn.Linear(c_in, c_out)
def forward(self, node_feats, adj_matrix):
num_neighbours = adj_matrix.sum(dim=-1, keepdims=True)
node_feats = self.projection(node_feats)
node_feats = torch.bmm(adj_matrix, node_feats)
node_feats = node_feats / num_neighbours
return node_feats
class GNNModel(nn.Module)
def __init__(self, c_in, c_hidden, c_out, num_layers, activation_function, optimizer_name, learning_rate, dp_rate_linear,layer_name="GCN", **kwargs):
super().__init__()
gnn_layer = gnn_layer_by_name[layer_name]
layers = []
activation_function = eval(activation_function) ##not great to use
in_channels, out_channels = c_in, c_hidden
for l_idx in range(num_layers-1):
layers += [
gnn_layer(in_channels=in_channels,
out_channels=out_channels,
**kwargs),
activation_function,
nn.Dropout(p=dp_rate_linear)
]
in_channels = c_hidden
layers += [gnn_layer(in_channels=in_channels,
out_channels=c_out,
**kwargs)]
self.layers = nn.ModuleList(layers)
def forward(self, x, edge_index):
for l in self.layers:
if isinstance(l, geom_nn.MessagePassing):
x = l(x, edge_index)
else:
x = l(x)
return x
class GraphGNNModel(nn.Module):
def __init__(self, c_in, c_hidden, c_out, dp_rate_linear,**kwargs):
super().__init__()
self.GNN = GNNModel(c_in=c_in,
c_hidden=c_hidden,
c_out=c_hidden,
dp_rate_linear = dp_rate_linear,
**kwargs)
self.head = nn.Sequential(
nn.Dropout(p=dp_rate_linear),
nn.Linear(c_hidden, c_out)
)
def forward(self, x, edge_index, batch_idx):
x = self.GNN(x, edge_index)
x = geom_nn.global_mean_pool(x, batch_idx)
x = self.head(x)
return x
As you can see, I really don't need GNNModel and GraphGNNModel to be two separate functions, the second function is just adding a sequential layer to the end of the first function.
I tried combining the functions by doing:
class GNNModel(nn.Module):
def __init__(self, c_in, c_hidden, c_out, num_layers, activation_function, optimizer_name, learning_rate, dp_rate_linear,layer_name="GCN" ,**kwargs):
"""
Inputs:
c_in - Dimension of input features
c_hidden - Dimension of hidden features
c_out - Dimension of the output features. Usually number of classes in classification
num_layers - Number of "hidden" graph layers
layer_name - String of the graph layer to use
dp_rate_linear - Dropout rate to apply throughout the network
kwargs - Additional arguments for the graph layer (e.g. number of heads for GAT; i'm not using gat here)
activation_function - Activation function
"""
super().__init__()
gnn_layer = gnn_layer_by_name[layer_name]
layers = []
activation_function = eval(activation_function) ##not great to use
in_channels, out_channels = c_in, c_hidden
for l_idx in range(num_layers-1):
layers += [
gnn_layer(in_channels=in_channels,
out_channels=out_channels,
**kwargs),
activation_function,
nn.Dropout(p=dp_rate_linear)
]
in_channels = c_hidden
layers += [gnn_layer(in_channels=in_channels,
out_channels=c_out,
**kwargs)]
self.layers = nn.ModuleList(layers)
self.head = nn.Sequential(
nn.Dropout(p=dp_rate_linear),
nn.Linear(c_hidden, c_out)
)
def forward(self, x, edge_index):
for l in self.layers:
if isinstance(l, geom_nn.MessagePassing): #passing data between conv
x = l(x, edge_index) #what is this
else:
x = l(x)
x = self.GNN(x, edge_index)
x = geom_nn.global_mean_pool(x, batch_idx)
x = self.head(x)
return x
But I get the error:
TypeError: forward() takes 3 positional arguments but 4 were given
Could someone show me the correct way to combine these (the exact explanation of the code is in the Graph level tasks/graph classification of here?
Try adding batch_idx as param in your new forward function. I noted some other inconsistencies like, where is geom_nn being passed to the function? you probably want to use self.geom_nn, and for that you need to fix the __init__() part as well.
def forward(self, x, edge_index, batch_idx): #here you must pass batch_idx
for l in self.layers:
if isinstance(l, geom_nn.MessagePassing): #passing data between conv
x = l(x, edge_index) #what is this
else:
x = l(x)
x = self.GNN(x, edge_index)
x = geom_nn.global_mean_pool(x, batch_idx) #here you use batch_idx
#where is geom_nn coming from???
x = self.head(x)
return x
I had a error when I tried predict my data with saved_weight model. My structure program was like this repository, but I had little enhancement for my model.
Here my edited code in models/faster_rcnn.py
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Lambda, Input, Conv2D, TimeDistributed, Dense, Flatten, BatchNormalization, Dropout
from ..utils import bbox_utils, train_utils
class Decoder(Layer):
"""Generating bounding boxes and labels from faster rcnn predictions.
First calculating the boxes from predicted deltas and label probs.
Then applied non max suppression and selecting top_n boxes by scores.
inputs:
roi_bboxes = (batch_size, roi_bbox_size, [y1, x1, y2, x2])
pred_deltas = (batch_size, roi_bbox_size, total_labels * [delta_y, delta_x, delta_h, delta_w])
pred_label_probs = (batch_size, roi_bbox_size, total_labels)
outputs:
pred_bboxes = (batch_size, top_n, [y1, x1, y2, x2])
pred_labels = (batch_size, top_n)
1 to total label number
pred_scores = (batch_size, top_n)
"""
def __init__(self, variances, total_labels, max_total_size=200, score_threshold=0.67, **kwargs):
super(Decoder, self).__init__(**kwargs)
self.variances = variances
self.total_labels = total_labels
self.max_total_size = max_total_size
self.score_threshold = score_threshold
def get_config(self):
config = super(Decoder, self).get_config()
config.update({
"variances": self.variances,
"total_labels": self.total_labels,
"max_total_size": self.max_total_size,
"score_threshold": self.score_threshold
})
return config
def call(self, inputs):
roi_bboxes = inputs[0]
pred_deltas = inputs[1]
pred_label_probs = inputs[2]
batch_size = tf.shape(pred_deltas)[0]
#
pred_deltas = tf.reshape(pred_deltas, (batch_size, -1, self.total_labels, 4))
pred_deltas *= self.variances
#
expanded_roi_bboxes = tf.tile(tf.expand_dims(roi_bboxes, -2), (1, 1, self.total_labels, 1))
pred_bboxes = bbox_utils.get_bboxes_from_deltas(expanded_roi_bboxes, pred_deltas)
#
pred_labels_map = tf.expand_dims(tf.argmax(pred_label_probs, -1), -1)
pred_labels = tf.where(tf.not_equal(pred_labels_map, 0), pred_label_probs, tf.zeros_like(pred_label_probs))
#
final_bboxes, final_scores, final_labels, _ = bbox_utils.non_max_suppression(
pred_bboxes, pred_labels,
max_output_size_per_class=self.max_total_size,
max_total_size=self.max_total_size,
score_threshold=self.score_threshold)
#
return final_bboxes, final_labels, final_scores
class RoIBBox(Layer):
"""Generating bounding boxes from rpn predictions.
First calculating the boxes from predicted deltas and label probs.
Then applied non max suppression and selecting "train or test nms_topn" boxes.
inputs:
rpn_bbox_deltas = (batch_size, img_output_height, img_output_width, anchor_count * [delta_y, delta_x, delta_h, delta_w])
img_output_height and img_output_width are calculated to the base model feature map
rpn_labels = (batch_size, img_output_height, img_output_width, anchor_count)
outputs:
roi_bboxes = (batch_size, train/test_nms_topn, [y1, x1, y2, x2])
"""
def __init__(self, anchors, mode, hyper_params, **kwargs):
super(RoIBBox, self).__init__(**kwargs)
self.hyper_params = hyper_params
self.mode = mode
self.anchors = tf.constant(anchors, dtype=tf.float32)
def get_config(self):
config = super(RoIBBox, self).get_config()
config.update({"hyper_params": self.hyper_params, "anchors": self.anchors.numpy(), "mode": self.mode})
return config
def call(self, inputs):
rpn_bbox_deltas = inputs[0]
rpn_labels = inputs[1]
anchors = self.anchors
#
pre_nms_topn = self.hyper_params["pre_nms_topn"]
post_nms_topn = self.hyper_params["train_nms_topn"] if self.mode == "training" else self.hyper_params["test_nms_topn"]
nms_iou_threshold = self.hyper_params["nms_iou_threshold"]
variances = self.hyper_params["variances"]
total_anchors = anchors.shape[0]
batch_size = tf.shape(rpn_bbox_deltas)[0]
rpn_bbox_deltas = tf.reshape(rpn_bbox_deltas, (batch_size, total_anchors, 4))
rpn_labels = tf.reshape(rpn_labels, (batch_size, total_anchors))
#
rpn_bbox_deltas *= variances
rpn_bboxes = bbox_utils.get_bboxes_from_deltas(anchors, rpn_bbox_deltas)
#
_, pre_indices = tf.nn.top_k(rpn_labels, pre_nms_topn)
#
pre_roi_bboxes = tf.gather(rpn_bboxes, pre_indices, batch_dims=1)
pre_roi_labels = tf.gather(rpn_labels, pre_indices, batch_dims=1)
#
pre_roi_bboxes = tf.reshape(pre_roi_bboxes, (batch_size, pre_nms_topn, 1, 4))
pre_roi_labels = tf.reshape(pre_roi_labels, (batch_size, pre_nms_topn, 1))
#
roi_bboxes, _, _, _ = bbox_utils.non_max_suppression(pre_roi_bboxes, pre_roi_labels,
max_output_size_per_class=post_nms_topn,
max_total_size=post_nms_topn,
iou_threshold=nms_iou_threshold)
#
return tf.stop_gradient(roi_bboxes)
class RoIDelta(Layer):
"""Calculating faster rcnn actual bounding box deltas and labels.
This layer only running on the training phase.
inputs:
roi_bboxes = (batch_size, nms_topn, [y1, x1, y2, x2])
gt_boxes = (batch_size, padded_gt_boxes_size, [y1, x1, y2, x2])
gt_labels = (batch_size, padded_gt_boxes_size)
outputs:
roi_bbox_deltas = (batch_size, train_nms_topn * total_labels, [delta_y, delta_x, delta_h, delta_w])
roi_bbox_labels = (batch_size, train_nms_topn, total_labels)
"""
def __init__(self, hyper_params, **kwargs):
super(RoIDelta, self).__init__(**kwargs)
self.hyper_params = hyper_params
def get_config(self):
config = super(RoIDelta, self).get_config()
config.update({"hyper_params": self.hyper_params})
return config
def call(self, inputs):
roi_bboxes = inputs[0]
gt_boxes = inputs[1]
gt_labels = inputs[2]
total_labels = self.hyper_params["total_labels"]
total_pos_bboxes = self.hyper_params["total_pos_bboxes"]
total_neg_bboxes = self.hyper_params["total_neg_bboxes"]
variances = self.hyper_params["variances"]
batch_size, total_bboxes = tf.shape(roi_bboxes)[0], tf.shape(roi_bboxes)[1]
# Calculate iou values between each bboxes and ground truth boxes
iou_map = bbox_utils.generate_iou_map(roi_bboxes, gt_boxes)
# Get max index value for each row
max_indices_each_gt_box = tf.argmax(iou_map, axis=2, output_type=tf.int32)
# IoU map has iou values for every gt boxes and we merge these values column wise
merged_iou_map = tf.reduce_max(iou_map, axis=2)
#
pos_mask = tf.greater(merged_iou_map, 0.67)
pos_mask = train_utils.randomly_select_xyz_mask(pos_mask, tf.constant([total_pos_bboxes], dtype=tf.int32))
#
neg_mask = tf.logical_and(tf.less(merged_iou_map, 0.47), tf.greater(merged_iou_map, 0.1))
neg_mask = train_utils.randomly_select_xyz_mask(neg_mask, tf.constant([total_neg_bboxes], dtype=tf.int32))
#
gt_boxes_map = tf.gather(gt_boxes, max_indices_each_gt_box, batch_dims=1)
expanded_gt_boxes = tf.where(tf.expand_dims(pos_mask, axis=-1), gt_boxes_map, tf.zeros_like(gt_boxes_map))
#
gt_labels_map = tf.gather(gt_labels, max_indices_each_gt_box, batch_dims=1)
pos_gt_labels = tf.where(pos_mask, gt_labels_map, tf.constant(-1, dtype=tf.int32))
neg_gt_labels = tf.cast(neg_mask, dtype=tf.int32)
expanded_gt_labels = pos_gt_labels + neg_gt_labels
#
roi_bbox_deltas = bbox_utils.get_deltas_from_bboxes(roi_bboxes, expanded_gt_boxes) / variances
#
roi_bbox_labels = tf.one_hot(expanded_gt_labels, total_labels)
scatter_indices = tf.tile(tf.expand_dims(roi_bbox_labels, -1), (1, 1, 1, 4))
roi_bbox_deltas = scatter_indices * tf.expand_dims(roi_bbox_deltas, -2)
roi_bbox_deltas = tf.reshape(roi_bbox_deltas, (batch_size, total_bboxes * total_labels, 4))
#
return tf.stop_gradient(roi_bbox_deltas), tf.stop_gradient(roi_bbox_labels)
class RoIPooling(Layer):
"""Reducing all feature maps to same size.
Firstly cropping bounding boxes from the feature maps and then resizing it to the pooling size.
inputs:
feature_map = (batch_size, img_output_height, img_output_width, channels)
roi_bboxes = (batch_size, train/test_nms_topn, [y1, x1, y2, x2])
outputs:
final_pooling_feature_map = (batch_size, train/test_nms_topn, pooling_size[0], pooling_size[1], channels)
pooling_size usually (7, 7)
"""
def __init__(self, hyper_params, **kwargs):
super(RoIPooling, self).__init__(**kwargs)
self.hyper_params = hyper_params
def get_config(self):
config = super(RoIPooling, self).get_config()
config.update({"hyper_params": self.hyper_params})
return config
def call(self, inputs):
feature_map = inputs[0]
roi_bboxes = inputs[1]
pooling_size = self.hyper_params["pooling_size"]
batch_size, total_bboxes = tf.shape(roi_bboxes)[0], tf.shape(roi_bboxes)[1]
#
row_size = batch_size * total_bboxes
# We need to arange bbox indices for each batch
pooling_bbox_indices = tf.tile(tf.expand_dims(tf.range(batch_size), axis=1), (1, total_bboxes))
pooling_bbox_indices = tf.reshape(pooling_bbox_indices, (-1, ))
pooling_bboxes = tf.reshape(roi_bboxes, (row_size, 4))
# Crop to bounding box size then resize to pooling size
pooling_feature_map = tf.image.crop_and_resize(
feature_map,
pooling_bboxes,
pooling_bbox_indices,
pooling_size
)
final_pooling_feature_map = tf.reshape(pooling_feature_map, (batch_size, total_bboxes, pooling_feature_map.shape[1], pooling_feature_map.shape[2], pooling_feature_map.shape[3]))
return final_pooling_feature_map
def get_model_frcnn(feature_extractor, rpn_model, anchors, hyper_params, mode="training"):
"""Generating rpn model for given backbone base model and hyper params.
inputs:
feature_extractor = feature extractor layer from the base model
rpn_model = tf.keras.model generated rpn model
anchors = (total_anchors, [y1, x1, y2, x2])
these values in normalized format between [0, 1]
hyper_params = dictionary
mode = "training" or "inference"
outputs:
frcnn_model = tf.keras.model
"""
input_img = rpn_model.input
rpn_reg_predictions, rpn_cls_predictions = rpn_model.output
#
roi_bboxes = RoIBBox(anchors, mode, hyper_params, name="roi_bboxes")([rpn_reg_predictions, rpn_cls_predictions])
#
roi_pooled = RoIPooling(hyper_params, name="roi_pooling")([feature_extractor.output, roi_bboxes])
#
output = TimeDistributed(Flatten(), name="frcnn_flatten")(roi_pooled)
output = TimeDistributed(Dense(4096, activation="relu"), name="frcnn_fc1")(output)
# output = TimeDistributed(Dropout(0.5), name="frcnn_dropout1")(output)
output = TimeDistributed(Dense(4096, activation="relu"), name="frcnn_fc2")(output)
output = TimeDistributed(Dropout(0.5), name="frcnn_dropout2")(output)
frcnn_cls_predictions = TimeDistributed(Dense(hyper_params["total_labels"], activation="softmax"), name="frcnn_cls")(output)
frcnn_reg_predictions = TimeDistributed(Dense(hyper_params["total_labels"] * 4, activation="linear"), name="frcnn_reg")(output)
#
if mode == "training":
input_gt_boxes = Input(shape=(None, 4), name="input_gt_boxes", dtype=tf.float32)
input_gt_labels = Input(shape=(None, ), name="input_gt_labels", dtype=tf.int32)
rpn_cls_actuals = Input(shape=(None, None, hyper_params["anchor_count"]), name="input_rpn_cls_actuals", dtype=tf.float32)
rpn_reg_actuals = Input(shape=(None, 4), name="input_rpn_reg_actuals", dtype=tf.float32)
frcnn_reg_actuals, frcnn_cls_actuals = RoIDelta(hyper_params, name="roi_deltas")(
[roi_bboxes, input_gt_boxes, input_gt_labels])
#
loss_names = ["rpn_reg_loss", "rpn_cls_loss", "frcnn_reg_loss", "frcnn_cls_loss"]
rpn_reg_loss_layer = Lambda(train_utils.reg_loss, name=loss_names[0])([rpn_reg_actuals, rpn_reg_predictions])
rpn_cls_loss_layer = Lambda(train_utils.rpn_cls_loss, name=loss_names[1])([rpn_cls_actuals, rpn_cls_predictions])
frcnn_reg_loss_layer = Lambda(train_utils.reg_loss, name=loss_names[2])([frcnn_reg_actuals, frcnn_reg_predictions])
frcnn_cls_loss_layer = Lambda(train_utils.frcnn_cls_loss, name=loss_names[3])([frcnn_cls_actuals, frcnn_cls_predictions])
#
frcnn_model = Model(inputs=[input_img, input_gt_boxes, input_gt_labels,
rpn_reg_actuals, rpn_cls_actuals],
outputs=[roi_bboxes, rpn_reg_predictions, rpn_cls_predictions,
frcnn_reg_predictions, frcnn_cls_predictions,
rpn_reg_loss_layer, rpn_cls_loss_layer,
frcnn_reg_loss_layer, frcnn_cls_loss_layer])
#
for layer_name in loss_names:
layer = frcnn_model.get_layer(layer_name)
frcnn_model.add_loss(layer.output)
frcnn_model.add_metric(layer.output, name=layer_name, aggregation="mean")
#
else:
bboxes, labels, scores = Decoder(hyper_params["variances"], hyper_params["total_labels"], name="faster_rcnn_decoder")(
[roi_bboxes, frcnn_reg_predictions, frcnn_cls_predictions])
frcnn_model = Model(inputs=input_img, outputs=[bboxes, labels, scores])
#
return frcnn_model
def init_model_frcnn(model, hyper_params):
"""Generating dummy data for initialize model.
In this way, the training process can continue from where it left off.
inputs:
model = tf.keras.model
hyper_params = dictionary
"""
final_height, final_width = hyper_params["img_size"], hyper_params["img_size"]
img = tf.random.uniform((1, final_height, final_width, 3))
feature_map_shape = hyper_params["feature_map_shape"]
total_anchors = feature_map_shape * feature_map_shape * hyper_params["anchor_count"]
gt_boxes = tf.random.uniform((1, 1, 4))
gt_labels = tf.random.uniform((1, 1), maxval=hyper_params["total_labels"], dtype=tf.int32)
bbox_deltas = tf.random.uniform((1, total_anchors, 4))
bbox_labels = tf.random.uniform((1, feature_map_shape, feature_map_shape, hyper_params["anchor_count"]), maxval=1, dtype=tf.float32)
model([img, gt_boxes, gt_labels, bbox_deltas, bbox_labels])
and this is code to test a model.
batch_size = 4
epochs = 10
load_weights = False
backbone = "vgg16"
hyper_params = train_utils.get_hyper_params(backbone)
labels = list(label_map_dict.keys()) # my custom label (pothole and crack)
labels = ["bg"] + labels
test_total_item = len(list(test_data))
test_data = test_data.map(lambda data : data_utils.preprocessing_before_frcnn(
data, IMAGE_SIZE, IMAGE_SIZE))
test_data = test_data.padded_batch(
batch_size, padded_shapes=data_shapes, padding_values=padding_values)
load_path = io_utils.get_model_path("faster_rcnn", backbone)
rpn_model, feature_extractor = rpn_vgg16.get_model_vgg16(hyper_params)
frcnn_test_model = faster_rcnn.get_model_frcnn(feature_extractor, rpn_model, anchors, hyper_params, mode="test")
frcnn_test_model.load_weights(load_path)
step_size = train_utils.get_step_size(test_total_item, batch_size)
pred_bboxes, pred_labels, pred_scores = frcnn_test_model.predict(test_data, steps=step_size, verbose=1)
After I run my test code, the error happens like this:
TypeError Traceback (most recent call last)
<ipython-input-26-de9c8627623e> in <module>()
1 step_size = train_utils.get_step_size(test_total_item, batch_size)
----> 2 pred_bboxes, pred_labels, pred_scores = frcnn_test_model.predict(test_data, steps=step_size, verbose=1)
16 frames
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
235 except Exception as e: # pylint:disable=broad-except
236 if hasattr(e, 'ag_error_metadata'):
--> 237 raise e.ag_error_metadata.to_exception(e)
238 else:
239 raise
TypeError: in converted code:
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/keras/engine/training_v2.py:677 map_fn
batch_size=None)
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/keras/engine/training.py:2474 _standardize_tensors
sample_weight, feed_output_names)
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/keras/engine/training_utils.py:639 standardize_sample_weights
'sample_weight')
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/keras/engine/training_utils.py:629 standardize_sample_or_class_weights
str(x_weight))
TypeError: The model has multiple outputs, so `sample_weight` should be either a list or a dict. Provided `sample_weight` type not understood: Tensor("args_2:0", shape=(None, None), dtype=int32)
The current tensorflow version I used is Tensorflow 2.1.0
I'm trying to train the MLP mixer on a custom dataset based on this repository.
The code I have so far is shown below. How can I save the training model to further use it on test images?
import torch
import numpy as np
from torch import nn
from einops.layers.torch import Rearrange
import glob
import cv2
from torch.utils.data import Dataset, DataLoader
class customDataset(Dataset):
def __init__(self):
self.imags_path = '/path_to_dataset/'
file_list = glob.glob(self.imags_path + '*')
self.data = []
for class_path in file_list:
class_name = class_path.split('/')[-1]
for img_path in glob.glob(class_path + '/*.jpg'):
self.data.append([img_path,class_name])
self.class_map = {'dogs':0, 'cats':1}
self.img_dim = (416,416)
def __len__(self):
return len(self.data)
def __getitem__(self,idx):
img_path,class_name = self.data[idx]
img = cv2.imread(img_path)
img = cv2.resize(img,self.img_dim)
class_id = self.class_map[class_name]
img_tensor = torch.from_numpy(img)
img_tensor = img_tensor.permute(2, 0, 1)
class_id = torch.tensor([class_id])
return img_tensor, class_id
class FeedForward(nn.Module):
def __init__(self, dim, hidden_dim, dropout = 0.):
super().__init__()
self.net = nn.Sequential(
nn.Linear(dim, hidden_dim),
nn.GELU(),
nn.Dropout(dropout),
nn.Linear(hidden_dim, dim),
nn.Dropout(dropout)
)
def forward(self, x):
return self.net(x)
class MixerBlock(nn.Module):
def __init__(self, dim, num_patch, token_dim, channel_dim, dropout = 0.):
super().__init__()
self.token_mix = nn.Sequential(
nn.LayerNorm(dim),
Rearrange('b n d -> b d n'),
FeedForward(num_patch, token_dim, dropout),
Rearrange('b d n -> b n d')
)
self.channel_mix = nn.Sequential(
nn.LayerNorm(dim),
FeedForward(dim, channel_dim, dropout),
)
def forward(self, x):
x = x + self.token_mix(x)
x = x + self.channel_mix(x)
return x
class MLPMixer(nn.Module):
def __init__(self, in_channels, dim, num_classes, patch_size, image_size, depth, token_dim, channel_dim):
super().__init__()
assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
self.num_patch = (image_size // patch_size) ** 2
self.to_patch_embedding = nn.Sequential(
nn.Conv2d(in_channels, dim, patch_size, patch_size),
Rearrange('b c h w -> b (h w) c'),
)
self.mixer_blocks = nn.ModuleList([])
for _ in range(depth):
self.mixer_blocks.append(MixerBlock(dim, self.num_patch, token_dim, channel_dim))
self.layer_norm = nn.LayerNorm(dim)
self.mlp_head = nn.Sequential(
nn.Linear(dim, num_classes)
)
def forward(self, x):
x = self.to_patch_embedding(x)
for mixer_block in self.mixer_blocks:
x = mixer_block(x)
x = self.layer_norm(x)
x = x.mean(dim=1)
return self.mlp_head(x)
if __name__ == '__main__':
dataset = customDataset()
train_loader = DataLoader(dataset,batch_size=1,shuffle=True)
mixer_model = MLPMixer(in_channels=3,
image_size=416,
patch_size=16,
num_classes=2,
dim=512,
depth=8,
token_dim=256,
channel_dim=2048)
for i, data in enumerate(train_loader,0):
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
inputs, labels = inputs.float(), labels.float()
outputs = mixer_model(inputs)
Thanks.
I'm following the guide to Transformers and the colab project https://colab.research.google.com/drive/1XBP0Zh8K4g_n0A2p1UlGFf3dij0EX_Kt
but when I run the cell with the line multi_head = build_model() I get the error.
this is the output from the console:
NameError Traceback (most recent call
last) in ()
----> 1 multi_head = build_model()
5 frames in (x)
40 self.dropout = Dropout(attn_dropout)
41 def call(self, q, k, v, mask):
---> 42 attn = Lambda(lambda x:K.batch_dot(x[0],x[1],axes=[2,2])/self.temper)([q, k])
43 if mask is not None:
44 mmask = Lambda(lambda x:(-1e+10)*(1-x))(mask)
NameError: name 'K' is not defined
It just runs after the model architecture code, which the error refers to.
Can you see where this Kshould be defined?
import random, os, sys
import numpy as np
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.initializers import *
import tensorflow as tf
from tensorflow.python.keras.layers import Layer
try:
from dataloader import TokenList, pad_to_longest
# for transformer
except: pass
embed_size = 60
class LayerNormalization(Layer):
def __init__(self, eps=1e-6, **kwargs):
self.eps = eps
super(LayerNormalization, self).__init__(**kwargs)
def build(self, input_shape):
self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:],
initializer=Ones(), trainable=True)
self.beta = self.add_weight(name='beta', shape=input_shape[-1:],
initializer=Zeros(), trainable=True)
super(LayerNormalization, self).build(input_shape)
def call(self, x):
mean = K.mean(x, axis=-1, keepdims=True)
std = K.std(x, axis=-1, keepdims=True)
return self.gamma * (x - mean) / (std + self.eps) + self.beta
def compute_output_shape(self, input_shape):
return input_shape
class ScaledDotProductAttention():
def __init__(self, d_model, attn_dropout=0.1):
self.temper = np.sqrt(d_model)
self.dropout = Dropout(attn_dropout)
def __call__(self, q, k, v, mask):
attn = Lambda(lambda x:K.batch_dot(x[0],x[1],axes=[2,2])/self.temper)([q, k])
if mask is not None:
mmask = Lambda(lambda x:(-1e+10)*(1-x))(mask)
attn = Add()([attn, mmask])
attn = Activation('softmax')(attn)
attn = self.dropout(attn)
output = Lambda(lambda x:K.batch_dot(x[0], x[1]))([attn, v])
return output, attn
class MultiHeadAttention():
# mode 0 - big martixes, faster; mode 1 - more clear implementation
def __init__(self, n_head, d_model, d_k, d_v, dropout, mode=0, use_norm=True):
self.mode = mode
self.n_head = n_head
self.d_k = d_k
self.d_v = d_v
self.dropout = dropout
if mode == 0:
self.qs_layer = Dense(n_head*d_k, use_bias=False)
self.ks_layer = Dense(n_head*d_k, use_bias=False)
self.vs_layer = Dense(n_head*d_v, use_bias=False)
elif mode == 1:
self.qs_layers = []
self.ks_layers = []
self.vs_layers = []
for _ in range(n_head):
self.qs_layers.append(TimeDistributed(Dense(d_k, use_bias=False)))
self.ks_layers.append(TimeDistributed(Dense(d_k, use_bias=False)))
self.vs_layers.append(TimeDistributed(Dense(d_v, use_bias=False)))
self.attention = ScaledDotProductAttention(d_model)
self.layer_norm = LayerNormalization() if use_norm else None
self.w_o = TimeDistributed(Dense(d_model))
def __call__(self, q, k, v, mask=None):
d_k, d_v = self.d_k, self.d_v
n_head = self.n_head
if self.mode == 0:
qs = self.qs_layer(q) # [batch_size, len_q, n_head*d_k]
ks = self.ks_layer(k)
vs = self.vs_layer(v)
def reshape1(x):
s = tf.shape(x) # [batch_size, len_q, n_head * d_k]
x = tf.reshape(x, [s[0], s[1], n_head, d_k])
x = tf.transpose(x, [2, 0, 1, 3])
x = tf.reshape(x, [-1, s[1], d_k]) # [n_head * batch_size, len_q, d_k]
return x
qs = Lambda(reshape1)(qs)
ks = Lambda(reshape1)(ks)
vs = Lambda(reshape1)(vs)
if mask is not None:
mask = Lambda(lambda x:K.repeat_elements(x, n_head, 0))(mask)
head, attn = self.attention(qs, ks, vs, mask=mask)
def reshape2(x):
s = tf.shape(x) # [n_head * batch_size, len_v, d_v]
x = tf.reshape(x, [n_head, -1, s[1], s[2]])
x = tf.transpose(x, [1, 2, 0, 3])
x = tf.reshape(x, [-1, s[1], n_head*d_v]) # [batch_size, len_v, n_head * d_v]
return x
head = Lambda(reshape2)(head)
elif self.mode == 1:
heads = []; attns = []
for i in range(n_head):
qs = self.qs_layers[i](q)
ks = self.ks_layers[i](k)
vs = self.vs_layers[i](v)
head, attn = self.attention(qs, ks, vs, mask)
heads.append(head); attns.append(attn)
head = Concatenate()(heads) if n_head > 1 else heads[0]
attn = Concatenate()(attns) if n_head > 1 else attns[0]
outputs = self.w_o(head)
outputs = Dropout(self.dropout)(outputs)
if not self.layer_norm: return outputs, attn
# outputs = Add()([outputs, q]) # sl: fix
return self.layer_norm(outputs), attn
class PositionwiseFeedForward():
def __init__(self, d_hid, d_inner_hid, dropout=0.1):
self.w_1 = Conv1D(d_inner_hid, 1, activation='relu')
self.w_2 = Conv1D(d_hid, 1)
self.layer_norm = LayerNormalization()
self.dropout = Dropout(dropout)
def __call__(self, x):
output = self.w_1(x)
output = self.w_2(output)
output = self.dropout(output)
output = Add()([output, x])
return self.layer_norm(output)
class EncoderLayer():
def __init__(self, d_model, d_inner_hid, n_head, d_k, d_v, dropout=0.1):
self.self_att_layer = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
self.pos_ffn_layer = PositionwiseFeedForward(d_model, d_inner_hid, dropout=dropout)
def __call__(self, enc_input, mask=None):
output, slf_attn = self.self_att_layer(enc_input, enc_input, enc_input, mask=mask)
output = self.pos_ffn_layer(output)
return output, slf_attn
def GetPosEncodingMatrix(max_len, d_emb):
pos_enc = np.array([
[pos / np.power(10000, 2 * (j // 2) / d_emb) for j in range(d_emb)]
if pos != 0 else np.zeros(d_emb)
for pos in range(max_len)
])
pos_enc[1:, 0::2] = np.sin(pos_enc[1:, 0::2]) # dim 2i
pos_enc[1:, 1::2] = np.cos(pos_enc[1:, 1::2]) # dim 2i+1
return pos_enc
def GetPadMask(q, k):
ones = K.expand_dims(K.ones_like(q, 'float32'), -1)
mask = K.cast(K.expand_dims(K.not_equal(k, 0), 1), 'float32')
mask = K.batch_dot(ones, mask, axes=[2,1])
return mask
def GetSubMask(s):
len_s = tf.shape(s)[1]
bs = tf.shape(s)[:1]
mask = K.cumsum(tf.eye(len_s, batch_shape=bs), 1)
return mask
class Transformer():
def __init__(self, len_limit, embedding_matrix, d_model=embed_size, \
d_inner_hid=512, n_head=10, d_k=64, d_v=64, layers=2, dropout=0.1, \
share_word_emb=False, **kwargs):
self.name = 'Transformer'
self.len_limit = len_limit
self.src_loc_info = False # True # sl: fix later
self.d_model = d_model
self.decode_model = None
d_emb = d_model
pos_emb = Embedding(len_limit, d_emb, trainable=False, \
weights=[GetPosEncodingMatrix(len_limit, d_emb)])
i_word_emb = Embedding(max_features, d_emb, weights=[embedding_matrix]) # Add Kaggle provided embedding here
self.encoder = Encoder(d_model, d_inner_hid, n_head, d_k, d_v, layers, dropout, \
word_emb=i_word_emb, pos_emb=pos_emb)
def get_pos_seq(self, x):
mask = K.cast(K.not_equal(x, 0), 'int32')
pos = K.cumsum(K.ones_like(x, 'int32'), 1)
return pos * mask
def compile(self, active_layers=999):
src_seq_input = Input(shape=(None, ))
x = Embedding(max_features, embed_size, weights=[embedding_matrix])(src_seq_input)
# LSTM before attention layers
x = Bidirectional(LSTM(128, return_sequences=True))(x)
x = Bidirectional(LSTM(64, return_sequences=True))(x)
x, slf_attn = MultiHeadAttention(n_head=3, d_model=300, d_k=64, d_v=64, dropout=0.1)(x, x, x)
avg_pool = GlobalAveragePooling1D()(x)
max_pool = GlobalMaxPooling1D()(x)
conc = concatenate([avg_pool, max_pool])
conc = Dense(64, activation="relu")(conc)
x = Dense(1, activation="sigmoid")(conc)
self.model = Model(inputs=src_seq_input, outputs=x)
self.model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics=['accuracy'])
If you look at where K is being used you will see:
K.expand_dims
K.cumsum
K.batch_dot
These are Keras backend functions. The code is missing a from keras import backend as K, which I think is a standard abbreviation.
class VAE(torch.nn.Module):
def __init__(self, input_size, hidden_sizes, batch_size):
super(VAE, self).__init__()
self.input_size = input_size
self.hidden_sizes = hidden_sizes
self.batch_size = batch_size
self.fc = torch.nn.Linear(input_size, hidden_sizes[0])
self.BN = torch.nn.BatchNorm1d(hidden_sizes[0])
self.fc1 = torch.nn.Linear(hidden_sizes[0], hidden_sizes[1])
self.BN1 = torch.nn.BatchNorm1d(hidden_sizes[1])
self.fc2 = torch.nn.Linear(hidden_sizes[1], hidden_sizes[2])
self.BN2 = torch.nn.BatchNorm1d(hidden_sizes[2])
self.fc3_mu = torch.nn.Linear(hidden_sizes[2], hidden_sizes[3])
self.fc3_sig = torch.nn.Linear(hidden_sizes[2], hidden_sizes[3])
self.fc4 = torch.nn.Linear(hidden_sizes[3], hidden_sizes[2])
self.BN4 = torch.nn.BatchNorm1d(hidden_sizes[2])
self.fc5 = torch.nn.Linear(hidden_sizes[2], hidden_sizes[1])
self.BN5 = torch.nn.BatchNorm1d(hidden_sizes[1])
self.fc6 = torch.nn.Linear(hidden_sizes[1], hidden_sizes[0])
self.BN6 = torch.nn.BatchNorm1d(hidden_sizes[0])
self.fc7 = torch.nn.Linear(hidden_sizes[0], input_size)
def sample_z(self, x_size, mu, log_var):
eps = torch.randn(x_size, self.hidden_sizes[-1])
return(mu + torch.exp(log_var/2) * eps)
def forward(self, x):
###########
# Encoder #
###########
out1 = self.fc(x)
out1 = nn.relu(self.BN(out1))
out2 = self.fc1(out1)
out2 = nn.relu(self.BN1(out2))
out3 = self.fc2(out2)
out3 = nn.relu(self.BN2(out3))
mu = self.fc3_mu(out3)
sig = nn.softplus(self.fc3_sig(out3))
###########
# Decoder #
###########
# sample from the distro
sample = self.sample_z(x.size(0), mu, sig)
out4 = self.fc4(sample)
out4 = nn.relu(self.BN4(out4))
out5 = self.fc5(out4)
out5 = nn.relu(self.BN5(out5))
out6 = self.fc6(out5)
out6 = nn.relu(self.BN6(out6))
out7 = nn.sigmoid(self.fc7(out6))
return(out7, mu, sig)
vae = VAE(input_size, hidden_sizes, batch_size)
vae.eval()
x_sample, z_mu, z_var = vae(X)
The error is:
File "VAE_LongTensor.py", line 200, in <module>
x_sample, z_mu, z_var = vae(X)
ValueError: expected 2D or 3D input (got 1D input)
When you build a nn.Module in pytorch for processing 1D signals, pytorch actually expects the input to be 2D: first dimension is the "mini batch" dimension.
Thus you need to add a singleton dimesion to your X:
x_sample, z_mu, z_var = vae(X[None, ...])