CNN model and bert with text

CNN model and bert with text - python

I got error in linear function
class MixModel(nn.Module):
def __init__(self,pre_trained='bert-base-uncased'):
super().__init__()
self.bert = AutoModel.from_pretrained('distilbert-base-uncased')
self.hidden_size = self.bert.config.hidden_size
self.conv = nn.Conv1d(in_channels=768, out_channels=256, kernel_size=5, padding='valid', stride=1)
self.relu = nn.ReLU()
self.pool = nn.MaxPool1d(kernel_size= 64- 5 + 1)
self.dropout = nn.Dropout(0.3)
self.clf = nn.Linear(self.hidden_size*2,6)
def forward(self,inputs, mask , labels):
cls_hs = self.bert(input_ids=inputs,attention_mask=mask, return_dict= False)
x=cls_hs[0]
print(cls_hs[0])
print(len(cls_hs[0]))
print(cls_hs[0].size())
#x = torch.cat(cls_hs,0) # x= [416, 64, 768]
x = x.permute(0, 2, 1)
x = self.conv(x)
x = self.relu(x)
x = self.pool(x)
x = self.dropout(x)
x = self.clf(x)
return x
error is
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
1846 if has_torch_function_variadic(input, weight, bias):
1847 return handle_torch_function(linear, (input, weight, bias), input, weight, bias=bias)
-> 1848 return torch._C._nn.linear(input, weight, bias)
1849
1850
RuntimeError: mat1 and mat2 shapes cannot be multiplied (65536x1 and 1536x6
i am trying to concatenate bert model with Cnn 1d using pytorch as disscused here
output from bert into cnn model

class MixModel(nn.Module):
def __init__(self,pre_trained='bert-base-uncased'):
super().__init__()
self.bert = AutoModel.from_pretrained('distilbert-base-uncased')
self.hidden_size = self.bert.config.hidden_size
self.conv = nn.Conv1d(in_channels=768, out_channels=256, kernel_size=5, padding='valid', stride=1)
self.relu = nn.ReLU()
self.pool = nn.MaxPool1d(kernel_size= 64- 5 + 1)
self.dropout = nn.Dropout(0.3)
self.clf1 = nn.Linear(256,256)
self.clf2 = nn.Linear(256,6)
change linear function

Related

Dimension problem between weights and a tensorflow mathematical operation inside a custom layer of tensorflow

In this case, I want to use tf.matmul between the weights and a mathematical operation. I have this following custom layer
rotationSpeed = 2* np.pi*70 #Hz
class PhysicalLayer(keras.layers.Layer):
def __init__(self, units=1):
super(PhysicalLayer, self).__init__()
self.units = units
def build(self, input_shape):
print(input_shape[0])
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,), initializer="random_normal", trainable=True
)
def call(self, inputs):
rotationSpeedSquare = tf.math.square(rotationSpeed)
maximumVibration = tf.math.reduce_max(inputs, axis = 1, keepdims = True)
stiff = rotationSpeedSquare/maximumVibration
return tf.matmul(stiff, self.w) + self.b
And this following model
class modelMaximum(tf.keras.Model):
def __init__(self, num_classes=50):
super(modelMaximum, self).__init__()
self.dense1 = tf.keras.layers.Dense(num_classes, activation=tf.nn.relu)
self.physical = PhysicalLayer()
self.dense2 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
self.dense3 = tf.keras.layers.Dense(64, activation=tf.nn.relu)
self.dense4 = tf.keras.layers.Dense(64, activation=tf.nn.relu)
self.dense5 = tf.keras.layers.Dense(32, activation=tf.nn.relu)
self.dense6 = tf.keras.layers.Dense(1)
def call(self, inputs):
x = self.dense1(inputs)
x= self.physical(x)
x = self.dense2(x)
x = self.dense3(x)
x = self.dense4(x)
x = self.dense5(x)
return self.dense6(x)
When I'm trying to fit this model with the training set
modelMax = modelMaximum()
modelMax.compile(optimizer='adam', loss='mae', metrics=[tf.keras.metrics.RootMeanSquaredError()])
modelMax.fit(train, trainRUL, batch_size=64, epochs=50, verbose=1)
I obtain the following error:
ValueError: Dimensions must be equal, but are 1 and 50 for '{{node model_maximum_1/physical_layer_1/MatMul}} = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false](model_maximum_1/physical_layer_1/truediv, model_maximum_1/physical_layer_1/MatMul/ReadVariableOp)' with input shapes: [?,1], [50,1].
How should I fix this problem ?
Thanks

neural network no attribute weight

I have a one hop GCN layer
class GCN_AISUMMER(nn.Module):
"""
"""
def __init__(self, in_features, out_features, bias=True):
super().__init__()
self.linear = nn.Linear(in_features, out_features, bias=bias)
def forward(self, X, A):
"""
A: adjecency matrix
X: graph signal
"""
L = create_graph_lapl_norm(A)
num_neighbours = L.sum(dim=-1, keepdims=True)
x = self.linear(X)
node_feats = torch.bmm(L, x)
node_feats = node_feats / num_neighbours
return node_feats
which is used in the following neural net
class GNN(nn.Module):
def __init__(self,
in_features = 12,
hidden_dim = 128,
classes = 2,
dropout = 0.5):
super(GNN, self).__init__()
self.conv1 = GCN_AISUMMER(in_features, hidden_dim)
self.conv2 = GCN_AISUMMER(hidden_dim, hidden_dim)
self.conv3 = GCN_AISUMMER(hidden_dim, hidden_dim)
self.fc = nn.Linear(hidden_dim, classes)
self.dropout = dropout
def forward(self, x,A):
x = self.conv1(x, A)
x = F.relu(x)
x = self.conv2(x, A)
x = F.relu(x)
x = self.conv3(x, A)
x = F.dropout(x, p=self.dropout, training=self.training)
# aggregate node embeddings
x = x.mean(dim=1)
# final classification layer
return self.fc(x)
I tried to print out the weight of input data after training. I tried print(model.conv1.weight) and gotAttributeError: 'GCN_AISUMMER' object has no attribute 'weight'
print(model.trainable_weights) and gotAttributeError: 'GNN' object has no attribute 'trainable_weights'
I got the weight of fc1, when I use print(model.fc1.weight), but I want to got the weight of input data after training.

Customizing Resnet 50 with an attention layer

I am trying to customize a Resnet 50 with an attention layer. Please find my code below:
IMAGE_SIZE = [224, 224]
resnet = ResNet50(input_shape=IMAGE_SIZE + [3], weights='imagenet', include_top=False)
# don't train existing weights
for layer in resnet.layers:
layer.trainable = False
import torch
import math
import torch.nn as nn
class BasicConv(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1,
groups=1, relu=True, bn=True, bias=False):
super(BasicConv, self).__init__()
self.out_channels = out_planes
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
padding=padding, dilation=dilation, groups=groups, bias=bias)
self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else
None
self.relu = nn.ReLU() if relu else None
def forward(self, x):
x = self.conv(x)
if self.bn is not None:
x = self.bn(x)
if self.relu is not None:
x = self.relu(x)
return x
class Flatten(nn.Module):
def forward(self, x):
return x.view(x.size(0), -1)
class ChannelGate(nn.Module):
def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']):
super(ChannelGate, self).__init__()
self.gate_channels = gate_channels
self.mlp = nn.Sequential(
Flatten(),
nn.Linear(gate_channels, gate_channels // reduction_ratio),
nn.ReLU(),
nn.Linear(gate_channels // reduction_ratio, gate_channels)
)
self.pool_types = pool_types
def forward(self, x):
channel_att_sum = None
for pool_type in self.pool_types:
if pool_type=='avg':
avg_pool = F.avg_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2),
x.size(3)))
channel_att_raw = self.mlp( avg_pool )
elif pool_type=='max':
max_pool = F.max_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2),
x.size(3)))
channel_att_raw = self.mlp( max_pool )
elif pool_type=='lp':
lp_pool = F.lp_pool2d( x, 2, (x.size(2), x.size(3)), stride=(x.size(2),
x.size(3)))
channel_att_raw = self.mlp( lp_pool )
elif pool_type=='lse':
# LSE pool only
lse_pool = logsumexp_2d(x)
channel_att_raw = self.mlp( lse_pool )
if channel_att_sum is None:
channel_att_sum = channel_att_raw
else:
channel_att_sum = channel_att_sum + channel_att_raw
scale = F.sigmoid( channel_att_sum ).unsqueeze(2).unsqueeze(3).expand_as(x)
return x * scale
def logsumexp_2d(tensor):
tensor_flatten = tensor.view(tensor.size(0), tensor.size(1), -1)
s, _ = torch.max(tensor_flatten, dim=2, keepdim=True)
outputs = s + (tensor_flatten - s).exp().sum(dim=2, keepdim=True).log()
return outputs
class ChannelPool(nn.Module):
def forward(self, x):
return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)),
dim=1 )
class SpatialGate(nn.Module):
def __init__(self):
super(SpatialGate, self).__init__()
kernel_size = 7
self.compress = ChannelPool()
self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False)
def forward(self, x):
x_compress = self.compress(x)
x_out = self.spatial(x_compress)
scale = F.sigmoid(x_out) # broadcasting
return x * scale
class CBAM(nn.Module):
def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max'],
no_spatial=False):
super(CBAM, self).__init__()
self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types)
self.no_spatial=no_spatial
if not no_spatial:
self.SpatialGate = SpatialGate()
def forward(self, x):
x_out = self.ChannelGate(x)
if not self.no_spatial:
x_out = self.SpatialGate(x_out)
return x_out
flat1 = Flatten()(resnet.output)
class1 = Dense(256, activation='relu')(flat1)
class1=BatchNormalization()(class1)
# receive 3D and output 3D
class2 = Dense(128, activation='relu')(class1)
class2=BatchNormalization()(class2)
class2=CBAM(128,8)(class2)
output = Dense(len(folders), activation='softmax')(class2)
I am getting the following error message while implementing the code:
flat1 = Flatten()(resnet.output)
in forward(self, x)
class Flatten(nn.Module):
def forward(self, x):
---> return x.view(x.size(0), -1)
AttributeError: 'KerasTensor' object has no attribute 'view'

output from bert into cnn model

i am trying to concatenate bert model with Cnn 1d using pytorch . I used this code but I do not understand what is meaning of in_channels and out_channels in function conv1d
if input shape into cnn model is torch(256,64,768)
class MixModel(nn.Module):
def __init__(self,pre_trained='distilbert-base-uncased'):
super().__init__()
self.bert = AutoModel.from_pretrained('distilbert-base-uncased')
self.hidden_size = self.bert.config.hidden_size
self.conv = nn.Conv1d(in_channels=1, out_channels=256, kernel_size=5, padding='valid', stride=1)
self.relu = nn.ReLU()
self.pool = nn.MaxPool1d(kernel_size= 256- 5 + 1)
self.dropout = nn.Dropout(0.3)
self.clf = nn.Linear(self.hidden_size*2,6)
def forward(self,inputs, mask , labels):
cls_hs = self.bert(input_ids=inputs,attention_mask=mask, return_dict= False)
x=cls_hs
# x = torch.cat(cls_hs[0]) # x= [416, 64, 768]
x = self.conv(x)
x = self.relu(x)
x = self.pool(x)
x = self.dropout(x)
x = self.clf(x)
return x
Edit
I use recommended answer and change the parameters but i got error
class MixModel(nn.Module):
def __init__(self,pre_trained='bert-base-uncased'):
super().__init__()
self.bert = AutoModel.from_pretrained('distilbert-base-uncased')
self.hidden_size = self.bert.config.hidden_size
self.conv = nn.Conv1d(in_channels=768, out_channels=256, kernel_size=5, padding='valid', stride=1)
self.relu = nn.ReLU()
self.pool = nn.MaxPool1d(kernel_size= 64- 5 + 1)
print(11)
self.dropout = nn.Dropout(0.3)
print(12)
self.clf = nn.Linear(self.hidden_size*2,6)
print(13)
def forward(self,inputs, mask , labels):
cls_hs = self.bert(input_ids=inputs,attention_mask=mask, return_dict= False)
x=cls_hs[0]
print(cls_hs[0])
print(len(cls_hs[0]))
print(cls_hs[0].size())
#x = torch.cat(cls_hs,0) # x= [416, 64, 768]
x = x.permute(0, 2, 1)
x = self.conv(x)
x = self.relu(x)
x = self.pool(x)
x = self.dropout(x)
x = self.clf(x)
return x
the error is
5 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
1846 if has_torch_function_variadic(input, weight, bias):
1847 return handle_torch_function(linear, (input, weight, bias), input, weight, bias=bias)
-> 1848 return torch._C._nn.linear(input, weight, bias)
1849
1850
RuntimeError: mat1 and mat2 shapes cannot be multiplied (65536x1 and 1536x6)

The dimension of the output prediction of BERT (and many other transformer-based models) is of shape batchxseq-lenxfeature-dim: That is, your input is a batch of 256 sequences of length (probably with padding) of 64 tokens, each token is represented by a feature vector of dimension 768.
In order to apply 1-d convolution along the sequence-len dimension, you will need first to permute x to be of shape batchxdimxlen:
x = x.permute(0, 2, 1)
Now you can apply nn.Conv1d, where the in_channels is the dimension of x = 768. the out_channels is up to you - what is going to be the hidden dimension of your model.

Why does the pytorch model perform poorly after setting eval()?

I used pytorch to build a segmentation model that uses the BatchNormalization layer. I found that when I set model.eval() on the test, the test result will be 0. If I don't set model.eval(), it will perform well.
I tried to search for related questions, but I got the conclusion that model.eval() can fix the parameters of BN, but I am still confused about how to solve this problem.
My batchsize is 1 and this is my model:
import torch
import torch.nn as nn
class Encode_Block(nn.Module):
def __init__(self, in_feat, out_feat):
super(Encode_Block, self).__init__()
self.conv1 = Res_Block(in_feat, out_feat)
self.conv2 = Res_Block_identity(out_feat, out_feat)
def forward(self, inputs):
outputs = self.conv1(inputs)
outputs = self.conv2(outputs)
return outputs
class Decode_Block(nn.Module):
def __init__(self, in_feat, out_feat):
super(Decode_Block, self).__init__()
self.conv1 = Res_Block(in_feat, out_feat)
self.conv2 = Res_Block_identity(out_feat, out_feat)
def forward(self, inputs):
outputs = self.conv1(inputs)
outputs = self.conv2(outputs)
return outputs
class Conv_Block(nn.Module):
def __init__(self, in_feat, out_feat):
super(Conv_Block, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(in_feat, out_feat, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(),
)
def forward(self, inputs):
outputs = self.conv1(inputs)
return outputs
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(
in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class Res_Block(nn.Module):
def __init__(self, inplanes, planes, stride=1):
super(Res_Block, self).__init__()
self.conv_input = conv1x1(inplanes, planes)
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn = nn.BatchNorm2d(planes)
self.relu = nn.LeakyReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.conv3 = conv1x1(planes, planes)
self.stride = stride
def forward(self, x):
residual = self.conv_input(x)
out = self.conv1(x)
out = self.bn(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn(out)
out += residual
out = self.relu(out)
return out
class Res_Block_identity(nn.Module):
def __init__(self, inplanes, planes, stride=1):
super(Res_Block_identity, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn = nn.BatchNorm2d(planes)
self.relu = nn.LeakyReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.conv3 = conv1x1(planes, planes)
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn(out)
out += residual
out = self.relu(out)
return out
class UpConcat(nn.Module):
def __init__(self, in_feat, out_feat):
super(UpConcat, self).__init__()
self.de_conv = nn.ConvTranspose2d(in_feat, out_feat, kernel_size=2, stride=2)
def forward(self, inputs, down_outputs):
outputs = self.de_conv(inputs)
out = torch.cat([down_outputs, outputs], 1)
return out
class Res_UNet(nn.Module):
def __init__(self, num_channels=1, num_classes=1):
super(Res_UNet, self).__init__()
flt = 64
self.down1 = Encode_Block(num_channels, flt)
self.down2 = Encode_Block(flt, flt * 2)
self.down3 = Encode_Block(flt * 2, flt * 4)
self.down4 = Encode_Block(flt * 4, flt * 8)
self.down_pool = nn.MaxPool2d(kernel_size=2)
self.bottom = Encode_Block(flt * 8, flt * 16)
self.up_cat1 = UpConcat(flt * 16, flt * 8)
self.up_conv1 = Decode_Block(flt * 16, flt * 8)
self.up_cat2 = UpConcat(flt * 8, flt * 4)
self.up_conv2 = Decode_Block(flt * 8, flt * 4)
self.up_cat3 = UpConcat(flt * 4, flt * 2)
self.up_conv3 = Decode_Block(flt * 4, flt * 2)
self.up_cat4 = UpConcat(flt * 2, flt)
self.up_conv4 = Decode_Block(flt * 2, flt)
self.final = nn.Sequential(
nn.Conv2d(flt, num_classes, kernel_size=1), nn.Sigmoid()
)
def forward(self, inputs):
down1_feat = self.down1(inputs)
pool1_feat = self.down_pool(down1_feat)
down2_feat = self.down2(pool1_feat)
pool2_feat = self.down_pool(down2_feat)
down3_feat = self.down3(pool2_feat)
pool3_feat = self.down_pool(down3_feat)
down4_feat = self.down4(pool3_feat)
pool4_feat = self.down_pool(down4_feat)
bottom_feat = self.bottom(pool4_feat)
up1_feat = self.up_cat1(bottom_feat, down4_feat)
up1_feat = self.up_conv1(up1_feat)
up2_feat = self.up_cat2(up1_feat, down3_feat)
up2_feat = self.up_conv2(up2_feat)
up3_feat = self.up_cat3(up2_feat, down2_feat)
up3_feat = self.up_conv3(up3_feat)
up4_feat = self.up_cat4(up3_feat, down1_feat)
up4_feat = self.up_conv4(up4_feat)
outputs = self.final(up4_feat)
return outputs
The model completely fails to segmentation after setting model.eval(), but the model is good after model.eval() is removed. I am confused about this, and is model.eval() necessary in the test?

BatchNorm layers keeps running estimates of its computed mean and variance during training model.train(), which are then used for normalization during evaluation model.eval().
Each layer has it own statistics of the mean and variance of its outputs/activations.
Since you are reusing your BatchNorm layer self.bn = nn.BatchNorm2d(planes) multiple times, the statics get mixed up and don't represent the actual mean and variance.
So you should create a new BatchNorm layer for every time you use it.
EDIT: I just read that your batch_size is 1, which could also be the core of your problem: see Tensorflow and Batch Normalization with Batch Size==1 => Outputs all zeros

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

CNN model and bert with text - python

Related

Dimension problem between weights and a tensorflow mathematical operation inside a custom layer of tensorflow

neural network no attribute weight

Customizing Resnet 50 with an attention layer

output from bert into cnn model

Why does the pytorch model perform poorly after setting eval()?

Categories

Resources