I want to create a model for hate speech detection. The idea is to have extra layers on top of an AlbertModel. My issue is in creating the Albert model itself. I was basically following this guide (https://www.kaggle.com/dhruv1234/huggingface-tfbertmodel). I tried many different methods and all are failing.
Below is my code and exceptions, lines marked with '*' is where the exceptions are thrown. Everything I tried is the code, with each method throwing a different error
If you can help me fix any of the methods tried it will be great, and if there any other way to do it that would work also.
This is the link for the full repo:
https://github.com/eliesid123/NLP-HateSpeechDetection/tree/main/HateSpeechDetection
Source code:
import torch
from transformers import AlbertConfig, AlbertModel
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import numpy as np
class ModelBuilder:
def __init__(self,
tokenizer,
dropout=0.5,
learningRate=12,
batchSize = 9,
epochs = 20,
validationSplit = 0.2,
inputSize = 64
):
self.Dropout = dropout
self.LearningRate = learningRate
self.Epochs = epochs
self.BatchSize = batchSize
self.InputSize = inputSize
self.ValidationSplit = validationSplit
self.AlbertModel = AlbertModel.from_pretrained('albert-xxlarge-v2')
self.AlbertConfig = AlbertConfig()
self.Tokenizer = tokenizer
self.IsCompiled = False
self.Model = None
self.ModelSummary = None
def Create(self, PRINT = False):
#INITIAL KAGGLE GUIDE
# input_ids = tf.keras.Input(shape=(self.InputSize,),dtype='int64')
# attention_masks = tf.keras.Input(shape=(self.InputSize,),dtype='float')
*# output, layer = self.AlbertModel(input_ids,attention_masks)*
#THIS IS BASED ON HUGGINGFACE DOCUMENTATION
# tensor = np.ndarray(shape=(self.InputSize,),dtype='long')
# input_ids = torch.tensor(tensor)
# tensor = np.ndarray(shape=(self.InputSize,),dtype='float')
# attention_masks = torch.tensor(tensor)
*# output = self.AlbertModel(input_ids,attention_masks)*
#THIS IS FROM HUGGINGFACE DOCUMENTATION
# sampleText = "some line............................................."
# encoded = self.Tokenizer.EncodeSentece(sampleText)
# input_ids = encoded['input_ids']
# attention_masks = encoded['attention_mask']
*# output = self.AlbertModel(torch.tensor(encoded).unsqueeze(self.BatchSize))*
output = self.AlbertModel(self.AlbertConfig)
output = output[1]
output = tf.keras.layers.Dense(128,activation='relu')(output)
output = tf.keras.layers.Dropout(self.Dropout)(output)
output = tf.keras.layers.Dense(64,activation='relu')(output)
output = tf.keras.layers.Dropout(self.Dropout)(output)
output = tf.keras.layers.Dense(32,activation='relu')(output)
output = tf.keras.layers.Dropout(self.Dropout)(output)
output = tf.keras.layers.Dense(1,activation='sigmoid')(output)
model = tf.keras.models.Model(inputs = [input_ids, attention_masks] ,outputs = output)
self.Model = model
self.ModelSummary = model.summary()
if PRINT:
print(self.ModelSummary)
Exceptions:
Using tf.Keras.Input
Exception has occurred: AttributeError
•
'KerasTensor' object has no attribute 'size'
File "/home/elie/Desktop/NLP/HateSpeechDetection/src/modelBuilder.py", line 44, in Create output, layer = self.AlbertModel(input_ids,attention_masks) File "/home/elie/Desktop/NLP/HateSpeechDetection/src/main.py", line 16, in main model.Create(PRINT=True) File "/home/elie/Desktop/NLP/HateSpeechDetection/src/main.py", line 21, in <module> main()
Using torch.tensor
Exception has occurred: ValueError
•
not enough values to unpack (expected 2, got 1)
File "/home/elie/Desktop/NLP/HateSpeechDetection/src/modelBuilder.py", line 50, in Create output = self.AlbertModel(input_ids,attention_masks) File "/home/elie/Desktop/NLP/HateSpeechDetection/src/main.py", line 16, in main model.Create(PRINT=True) File "/home/elie/Desktop/NLP/HateSpeechDetection/src/main.py", line 21, in <module> main()
Using AlbertTokenizer.encode_plus()
Exception has occurred: ValueError
•
could not determine the shape of object type 'BatchEncoding'
File "/home/elie/Desktop/NLP/HateSpeechDetection/src/modelBuilder.py", line 54, in Create output = self.AlbertModel(torch.tensor(encoded).unsqueeze(self.BatchSize)) File "/home/elie/Desktop/NLP/HateSpeechDetection/src/main.py", line 16, in main model.Create(PRINT=True) File "/home/elie/Desktop/NLP/HateSpeechDetection/src/main.py", line 21, in <module> main()
Using AlbertConfig
Exception has occurred: AttributeError
•
'AlbertConfig' object has no attribute 'size'
File "/home/elie/Desktop/NLP/HateSpeechDetection/src/modelBuilder.py", line 59, in Create output = self.AlbertModel(self.AlbertConfig) File "/home/elie/Desktop/NLP/HateSpeechDetection/src/main.py", line 16, in main model.Create(PRINT=True) File "/home/elie/Desktop/NLP/HateSpeechDetection/src/main.py", line 21, in <module> main()
Tokenizer class
import numpy as np
from transformers import AlbertTokenizer
class Tokenizor():
def __init__(self,maxLen=64) -> None:
self.Tokenizer = AlbertTokenizer.from_pretrained('albert-xxlarge-v2')
self.MaxLen = maxLen
def EncodeAll(self,data) :
input_ids = []
attention_masks = []
for i in range(len(data)):
encoded = self.EncodeSentece(data[i])
input_ids.append(encoded['input_ids'])
attention_masks.append(encoded['attention_mask'])
return np.array(input_ids),np.array(attention_masks)
def EncodeSentece(self,line):
encoded = self.Tokenizer.encode_plus(
line,
add_special_tokens=True,
max_length=self.MaxLen,
pad_to_max_length=True,
return_attention_mask=True,
)
return encoded
Related
Hi i'm currently learning coding with python and have been following a tutorial series which has helped me make the code i will show below. Apologies for it being so long but I cannot pinpoint the line of code which is causing this error. I have removed a lot of the commenting to reduce the amount of code posted.
import numpy as np
import urllib.request
import os
import gzip
import lasagne
import theano
import theano.tensor as T
def load_dataset():
def download(filename, source="http://yann.lecun.com/exdb/mnist/"):
print("downloading:", filename)
urllib.request.urlretrieve(source+filename, filename)
def load_mnist_images(filename):
if not os.path.exists(filename):
download(filename)
with gzip.open(filename, "rb") as f:
data = np.frombuffer(f.read(), np.uint8, offset= 16)
data = data.reshape(-1, 1, 28, 28)
return data / np.float32(256)
def load_mnist_labels(filename):
if not os.path.exists(filename):
download(filename)
with gzip.open(filename, "rb") as f:
data = np.frombuffer(f.read(), np.uint8, offset= 8)
return data
x_train = load_mnist_images("train-images-idx3-ubyte.gz")
y_train = load_mnist_labels("train-labels-idx1-ubyte.gz")
x_test = load_mnist_images("t10k-images-idx3-ubyte.gz")
y_test = load_mnist_labels("t10k-labels-idx1-ubyte.gz")
return x_train, y_train, x_test, y_test
x_train, y_train, x_test, y_test = load_dataset()
###### creating the handwriting digit recognition code ######
def build_nn(input_var = None):
l_in = lasagne.layers.InputLayer(shape=(None,1,28,28), input_var=input_var)
l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
l_hid1 = lasagne.layers.DenseLayer(l_in_drop, num_units= 800,
nonlinearity= lasagne.nonlinearities.rectify,
W= lasagne.init.GlorotUniform())
l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)
l_hid2 = lasagne.layers.DenseLayer(l_hid1_drop, num_units= 800,
nonlinearity= lasagne.nonlinearities.rectify,
W= lasagne.init.GlorotUniform())
l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)
l_out = lasagne.layers.DenseLayer(l_hid2_drop, num_units=10,
nonlinearity= lasagne.nonlinearities.softmax)
return l_out
input_var = T.tensor4("inputs") # an empty 4d array
target_var = T.ivector("targets") # an empty 1d int array to represent the labels
network = build_nn(input_var) # call the func that initializes the neural network
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9)
train_fn = theano.function([input_var, target_var], loss, updates= updates)
num_training_steps = 10
for step in range(num_training_steps):
train_err = train_fn(x_train, y_train)
print("current training step is " + str(step))
The error that's stopping this code is this:
Traceback (most recent call last):
File "C:\Users\Admin\.vscode\Practice codes\machine learning\deep learning\deep learning.py", line 125, in <module>
network = build_nn(input_var) # call the func that initializes the neural network
File "C:\Users\Admin\.vscode\Practice codes\machine learning\deep learning\deep learning.py", line 95, in build_nn
l_hid1 = lasagne.layers.DenseLayer(l_in_drop, num_units= 800,
File "C:\Users\Admin\AppData\Roaming\Python\Python38\site-packages\lasagne\layers\dense.py", line 103, in __init__
self.W = self.add_param(W, (num_inputs, num_units), name="W")
File "C:\Users\Admin\AppData\Roaming\Python\Python38\site-packages\lasagne\layers\base.py", line 234, in add_param
param = utils.create_param(spec, shape, name)
File "C:\Users\Admin\AppData\Roaming\Python\Python38\site-packages\lasagne\utils.py", line 393, in create_param
spec = theano.shared(spec, broadcastable=bcast)
File "C:\Users\Admin\AppData\Roaming\Python\Python38\site-packages\theano\compile\sharedvalue.py", line 284, in shared
raise TypeError('No suitable SharedVariable constructor could be found.'
TypeError: No suitable SharedVariable constructor could be found. Are you sure all kwargs are supported? We do not support the parameter dtype or type. value="[[ 0.04638761 -0.02959769 0.02330909 ... 0.01545383 0.04763002
0.05265676]
[ 0.02095251 -0.05393376 -0.04289599 ... -0.02409102 0.02824548
-0.00327342]
[ 0.02908951 -0.02853872 -0.05450716 ... -0.02296509 0.02495853
0.02486875]
...
[-0.03704383 0.0286258 0.01158947 ... -0.02583007 -0.04925423
-0.0470493 ]
[ 0.03230407 -0.00246115 -0.05074456 ... 0.00299953 0.01883504
0.01312843]
[-0.05762409 -0.05119916 -0.02820581 ... -0.05675326 0.00458562
0.04403118]]". parameters="{'broadcastable': (False, False)}"
If it helps I'm using python 3.8 - lasagne 0.2.dev1 - theano 1.0.5.
Any help would be greatly appreciated, any questions feel free to ask.
Thanks in advance
I am a begineer in nlp, as I was giving this competition https://www.kaggle.com/c/contradictory-my-dear-watson I am using the model 'bert-base-multilingual-uncased' and using BERT tokenizer from the same. I am also using kaggle tpu. This is the custom dataloader I created.
class SherlockDataset(torch.utils.data.Dataset):
def __init__(self,premise,hypothesis,tokenizer,max_len,target = None):
super(SherlockDataset,self).__init__()
self.premise = premise
self.hypothesis = hypothesis
self.tokenizer = tokenizer
self.max_len = max_len
self.target = target
def __len__(self):
return len(self.premise)
def __getitem__(self,item):
sen1 = str(self.premise[item])
sen2 = str(self.hypothesis[item])
encode_dict = self.tokenizer.encode_plus(sen1,
sen2,
add_special_tokens = True,
max_len = self.max_len,
pad_to_max_len = True,
return_attention_mask = True,
return_tensors = 'pt'
)
input_ids = encode_dict["input_ids"][0]
token_type_ids = encode_dict["token_type_ids"][0]
att_mask = encode_dict["attention_mask"][0]
if self.target is not None:
sample = {
"input_ids":input_ids,
"token_type_ids":token_type_ids,
"att_mask":att_mask,
"targets": self.target[item]
}
else:
sample = {
"input_ids":input_ids,
"token_type_ids":token_type_ids,
"att_mask":att_mask
}
return sample
and during the time of loading data in dataloader
def train_fn(model,dataloader,optimizer,criterion,scheduler = None):
model.train()
print("train")
for idx, sample in enumerate(dataloader):
'''
input_ids = sample["input_ids"].to(config.DEVICE)
token_type_ids = sample["token_type_ids"].to(config.DEVICE)
att_mask = sample["att_mask"].to(config.DEVICE)
targets = sample["targets"].to(config.DEVICE)
'''
print("train_out")
input_ids = sample[0].to(config.DEVICE)
token_type_ids = sample[1].to(config.DEVICE)
att_mask = sample[2].to(config.DEVICE)
targets = sample[3].to(config.DEVICE)
optimizer.zero_grad()
output = model(input_ids,token_type_ids,att_mask)
output = np.argmax(output,axis = 1)
loss = criterion(outputs,targets)
accuracy = accuracy_score(output,targets)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(),1.0)
xm.optimizer_step(optimizer, barrier=True)
if scheduler is not None:
scheduler.step()
if idx%50==0:
print(f"idx : {idx}, TRAIN LOSS : {loss}")
I am facing this error again and again
RuntimeError: Caught RuntimeError in DataLoader worker process 0. Original Traceback (most recent
call last): File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line
178,
in _worker_loop data = fetcher.fetch(index) File "/opt/conda/lib/python3.7/site-
packages/torch/utils/data/_utils/fetch.py", line 47, in fetch return self.collate_fn(data) File
"/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 79, in
default_collate return [default_collate(samples) for samples in transposed] File
"/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 79, in return
[default_collate(samples) for samples in transposed] File "/opt/conda/lib/python3.7/site-
packages/torch/utils/data/_utils/collate.py", line 55, in default_collate return torch.stack(batch,
0, out=out) RuntimeError: stack expects each tensor to be equal size, but got [47] at entry 0 and
[36] at entry 1
I have tried changing num_workers values,changing batch sizes. I have checked the data and none of the text in it is null, 0 or corrupt in any sense. I have also tried changing max_len in tokenizer but I am not able to find out solution to this problem. Please check and let me know how can I fix it.
data_loader = torch.utils.data.DataLoader( batch_size=batch_size, dataset=data, shuffle=shuffle, num_workers=0, collate_fn=lambda x: x )
Use of Collate_fn in dataloader should be able to solve the problem.
I want to load two model in tensorflow ,but I get this error all the time, it seems
I can only load one model in one code,The error is here:
Traceback (most recent call last):
File "X:/fffan/NLP/Classify/TextMix/pred_back.py", line 92, in <module>
textrcnn = TextRCNN_pred(vocab_dir="../data/cnews_vocab.txt")
File "X:/fffan/NLP/Classify/TextMix/pred_back.py", line 54, in __init__
saver.restore(sess=self.session, save_path=model_path)
File "D:\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1302, in restore err, "a Variable name or other graph key that is missing")
tensorflow.python.framework.errors_impl.NotFoundError: Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:
2 root error(s) found.
(0) Not found: Key W_projection_cnn not found in checkpoint
[[node save_1/RestoreV2 (defined at X:/fffan/NLP/Classify/TextMix/pred_back.py:46) ]]
(1) Not found: Key W_projection_cnn not found in checkpoint
[[node save_1/RestoreV2 (defined at X:/fffan/NLP/Classify/TextMix/pred_back.py:46) ]]
[[save_1/RestoreV2/_57]]
0 successful operations.
0 derived errors ignored.
Original stack trace for 'save_1/RestoreV2':
File "X:/fffan/NLP/Classify/TextMix/pred_back.py", line 92, in <module>
textrcnn = TextRCNN_pred(vocab_dir="../data/cnews_vocab.txt")
File "X:/fffan/NLP/Classify/TextMix/pred_back.py", line 46, in __init__
saver = tf.train.Saver()
And my code is here:
class Textcnn_pred(object):
def __init__(self,vocab_dir):
self.input_x = tf.placeholder(tf.int32, [None, config.seq_length], name='input_x')
self.words = tools.read_file(vocab_dir)
self.vocab_size = len(self.words)
self.textcnn = TextCNN(config,self.vocab_size,keep_prob = 1.0)
self.logits_cnn = self.textcnn.cnn(self.input_x)
saver = tf.train.Saver()
sess_config = tf.ConfigProto(allow_soft_placement=True)
sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4
sess_config.gpu_options.allow_growth = True
with tf.Session() as self.sess:
self.sess = tf.Session(config=sess_config)
model_path = 'checkpoints/TextCNN/TextCNNnet_2019-11-01-15-31-50.ckpt-4000'
saver.restore(sess=self.sess, save_path=model_path)
print("################ load TextCNN model down! ##########################")
def _close(self):
self.sess.close()
def text(self,input):
logit_cnn = self.sess.run([self.logits_cnn], feed_dict={self.input_x:input})
return logit_cnn
class TextRCNN_pred(object):
def __init__(self,vocab_dir):
self.input_x = tf.placeholder(tf.int32, [None, config.seq_length], name='input_x')
self.words = tools.read_file(vocab_dir)
self.vocab_size = len(self.words)
self.textrcnn = TRCM.TextRCNN(config,self.vocab_size, 1.0)
self.logits_rcnn = self.textrcnn.RCNN_inference(self.input_x)
saver = tf.train.Saver()
sess_config = tf.ConfigProto(allow_soft_placement=True)
sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4
sess_config.gpu_options.allow_growth = True
with tf.Session() as self.session:
self.session = tf.Session(config=sess_config)
model_path = 'checkpoints/TextRCNN/TextRCNNnet_2019-11-05-17-04-36.ckpt-7500'
saver.restore(sess=self.session, save_path=model_path)
print("################ load TextRCNN model down! ##########################")
def _close(self):
self.session.close()
def text(self,input):
logit_rcnn = self.session.run([self.logits_rcnn], feed_dict={self.input_x:input})
return logit_rcnn
if __name__ == '__main__':
one_text = ['sentence_1','sentence_2']
### encode those 2 sentence
test_X = encode_sentence(one_data=one_text, vocab_data="../data/cnews_vocab.txt",max_length=config.seq_length)
textcnn = Textcnn_pred(vocab_dir="../data/cnews_vocab.txt")
logit_CNN = textcnn.text(test_X)
textrcnn = TextRCNN_pred(vocab_dir="../data/cnews_vocab.txt")
logit_RCNN = textrcnn.text(test_X)
logit = logit_CNN + logit_RCNN
PS: Each of model is ok when I run them alone. Such as I run the code "logit_RCNN = textrcnn.text(test_X)" only, or run "logit_CNN = textcnn.text(test_X)" only, it is ok.
Does someone tell me how to debug this code? Thank you!
I've problems integrating Bert Embedding Layer in a BiLSTM model for word sense disambiguation task,
Windows 10
Python 3.6.4
TenorFlow 1.12
Keras 2.2.4
No virtual environments were used
PyCharm Professional 2019.2
The whole script
import os
import yaml
import numpy as np
from argparse import ArgumentParser
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.layers import (LSTM, Add, Bidirectional, Dense, Input, TimeDistributed, Embedding)
from tensorflow.keras.preprocessing.sequence import pad_sequences
try:
from bert.tokenization import FullTokenizer
except ModuleNotFoundError:
os.system('pip install bert-tensorflow')
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tqdm import tqdm
from keras_bert import BertEmbeddingLayer
from model_utils import visualize_plot_mdl
from parsing_dataset import load_dataset
from utilities import configure_tf, initialize_logger
def parse_args():
parser = ArgumentParser(description="WSD")
parser.add_argument("--model_type", default='baseline', type=str,
help="""Choose the model: baseline: BiLSTM Model.
attention: Attention Stacked BiLSTM Model.
seq2seq: Seq2Seq Attention.""")
return vars(parser.parse_args())
def train_model(mdl, data, epochs=1, batch_size=32):
[train_input_ids, train_input_masks, train_segment_ids], train_labels = data
history = mdl.fit([train_input_ids, train_input_masks, train_segment_ids],
train_labels, epochs=epochs, batch_size=batch_size)
return history
def baseline_model(output_size):
hidden_size = 128
max_seq_len = 64
in_id = Input(shape=(None,), name="input_ids")
in_mask = Input(shape=(None,), name="input_masks")
in_segment = Input(shape=(None,), name="segment_ids")
bert_inputs = [in_id, in_mask, in_segment]
bert_embedding = BertEmbeddingLayer()(bert_inputs)
embedding_size = 768
bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2,
recurrent_dropout=0.2,
return_sequences=True
)
)(bert_embedding)
output = TimeDistributed(Dense(output_size, activation="softmax"))(bilstm)
mdl = Model(inputs=bert_inputs, outputs=output, name="Bert_BiLSTM")
mdl.compile(loss="sparse_categorical_crossentropy",
optimizer='adadelta', metrics=["acc"])
return mdl
def initialize_vars(sess):
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
sess.run(tf.tables_initializer())
K.set_session(sess)
class PaddingInputExample(object):
"""Fake example so the num input examples is a multiple of the batch size.
When running eval/predict on the TPU, we need to pad the number of examples
to be a multiple of the batch size, because the TPU requires a fixed batch
size. The alternative is to drop the last batch, which is bad because it means
the entire output data won't be generated.
We use this class instead of `None` because treating `None` as padding
batches could cause silent errors.
"""
class InputExample(object):
"""A single training/test example for simple sequence classification."""
def __init__(self, guid, text_a, text_b=None, label=None):
"""Constructs a InputExample.
Args:
guid: Unique id for the example.
text_a: string. The un-tokenized text of the first sequence. For single
sequence tasks, only this sequence must be specified.
text_b: (Optional) string. The un-tokenized text of the second sequence.
Only must be specified for sequence pair tasks.
label: (Optional) string. The label of the example. This should be
specified for train and dev examples, but not for test examples.
"""
self.guid = guid
self.text_a = text_a
self.text_b = text_b
self.label = label
def create_tokenizer_from_hub_module(bert_path="https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"):
"""Get the vocab file and casing info from the Hub module."""
bert_module = hub.Module(bert_path)
tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
vocab_file, do_lower_case = sess.run(
[
tokenization_info["vocab_file"],
tokenization_info["do_lower_case"],
]
)
return FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)
def convert_single_example(tokenizer, example, max_seq_length=256):
"""Converts a single `InputExample` into a single `InputFeatures`."""
if isinstance(example, PaddingInputExample):
input_ids = [0] * max_seq_length
input_mask = [0] * max_seq_length
segment_ids = [0] * max_seq_length
label = [0] * max_seq_length
return input_ids, input_mask, segment_ids, label
tokens_a = tokenizer.tokenize(example.text_a)
if len(tokens_a) > max_seq_length - 2:
tokens_a = tokens_a[0: (max_seq_length - 2)]
tokens = []
segment_ids = []
tokens.append("[CLS]")
segment_ids.append(0)
example.label.append(0)
for token in tokens_a:
tokens.append(token)
segment_ids.append(0)
tokens.append("[SEP]")
segment_ids.append(0)
example.label.append(0)
input_ids = tokenizer.convert_tokens_to_ids(tokens)
# The mask has 1 for real tokens and 0 for padding tokens. Only real
# tokens are attended to.
input_mask = [1] * len(input_ids)
# Zero-pad up to the sequence length.
while len(input_ids) < max_seq_length:
input_ids.append(0)
input_mask.append(0)
segment_ids.append(0)
example.label.append(0)
assert len(input_ids) == max_seq_length
assert len(input_mask) == max_seq_length
assert len(segment_ids) == max_seq_length
return input_ids, input_mask, segment_ids, example.label
def convert_examples_to_features(tokenizer, examples, max_seq_length=256):
"""Convert a set of `InputExample`s to a list of `InputFeatures`."""
input_ids, input_masks, segment_ids, labels = [], [], [], []
for example in tqdm(examples, desc="Converting examples to features"):
input_id, input_mask, segment_id, label = convert_single_example(tokenizer, example, max_seq_length)
input_ids.append(np.array(input_id))
input_masks.append(np.array(input_mask))
segment_ids.append(np.array(segment_id))
labels.append(np.array(label))
return np.array(input_ids), np.array(input_masks), np.array(segment_ids), np.array(labels).reshape(-1, 1)
def convert_text_to_examples(texts, labels):
"""Create InputExamples"""
InputExamples = []
for text, label in zip(texts, labels):
InputExamples.append(
InputExample(guid=None, text_a=" ".join(text), text_b=None, label=label)
)
return InputExamples
# Initialize session
sess = tf.Session()
params = parse_args()
initialize_logger()
configure_tf()
# Load our config file
config_file_path = os.path.join(os.getcwd(), "config.yaml")
config_file = open(config_file_path)
config_params = yaml.load(config_file)
# This parameter allow that train_x to be in form of words, to allow using of your keras-elmo layer
elmo = config_params["use_elmo"]
dataset = load_dataset(elmo=elmo)
vocabulary_size = dataset.get("vocabulary_size")
output_size = dataset.get("output_size")
# Parse data in Bert format
max_seq_length = 64
train_x = dataset.get("train_x")
train_text = [' '.join(x) for x in train_x]
train_text = [' '.join(t.split()[0:max_seq_length]) for t in train_text]
train_text = np.array(train_text, dtype=object)[:, np.newaxis]
# print(train_text.shape) # (37184, 1)
train_labels = dataset.get("train_y")
# Instantiate tokenizer
tokenizer = create_tokenizer_from_hub_module()
# Convert data to InputExample format
train_examples = convert_text_to_examples(train_text, train_labels)
# Extract features
(train_input_ids, train_input_masks, train_segment_ids, train_labels) = convert_examples_to_features(tokenizer, train_examples, max_seq_length=max_seq_length)
bert_inputs = [train_input_ids, train_input_masks, train_segment_ids]
data = bert_inputs, train_labels
del dataset
model = baseline_model(output_size)
# Instantiate variables
initialize_vars(sess)
history = train_model(model, data)
The layer BertEmbeddingLayer() is imported from strongio/keras-bert, as well as following the approach in the file to integrate my work however I always have this error, please check the traceback below (exception is raised when building the model)
Traceback (most recent call last):
File "code/prova_bert.py", line 230, in <module>
model = baseline_model(output_size, max_seq_len, visualize=True)
File "code/prova_bert.py", line 165, in baseline_model
)(bert_embeddings)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\layers\wrappers.py", line 473, in __call__
return super(Bidirectional, self).__call__(inputs, **kwargs)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 746, in __call__
self.build(input_shapes)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\layers\wrappers.py", line 612, in build
self.forward_layer.build(input_shape)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\utils\tf_utils.py", line 149, in wrapper
output_shape = fn(instance, input_shape)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\layers\recurrent.py", line 552, in build
self.cell.build(step_input_shape)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\utils\tf_utils.py", line 149, in wrapper
output_shape = fn(instance, input_shape)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\layers\recurrent.py", line 1934, in build
constraint=self.kernel_constraint)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 609, in add_weight
aggregation=aggregation)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\training\checkpointable\base.py", line 639, in _add_variable_with_custom_getter
**kwargs_for_getter)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 1977, in make_variable
aggregation=aggregation)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\variables.py", line 183, in __call__
return cls._variable_v1_call(*args, **kwargs)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\variables.py", line 146, in _variable_v1_call
aggregation=aggregation)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\variables.py", line 125, in <lambda>
previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\variable_scope.py", line 2437, in default_variable_creator
import_scope=import_scope)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\variables.py", line 187, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\resource_variable_ops.py", line 297, in __init__
constraint=constraint)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\resource_variable_ops.py", line 409, in _init_from_args
initial_value() if init_from_fn else initial_value,
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 1959, in <lambda>
shape, dtype=dtype, partition_info=partition_info)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\init_ops.py", line 473, in __call__
scale /= max(1., (fan_in + fan_out) / 2.)
TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'
Exception ignored in: <bound method BaseSession.__del__ of <tensorflow.python.client.session.Session object at 0x0000026396AD0630>>
Traceback (most recent call last):
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\client\session.py", line 738, in __del__
TypeError: 'NoneType' object is not callable
Please refer to my issue on their repo and for data examples being fed to the model please check this issue
First of all, the results by "mean" or "first" pooling is not for all the tokens, so you got to change in call() function:
elif self.pooling == "mean":
result = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)["sequence_output" ]
pooled = result
In build_model, change to:
embedding_size = 768
in_id = Input(shape=(max_seq_length,), name="input_ids")
in_mask = Input(shape=(max_seq_length,), name="input_masks")
in_segment = Input(shape=(max_seq_length,), name="segment_ids")
bert_inputs = [in_id, in_mask, in_segment]
bert_output = BertLayer(n_fine_tune_layers=12, pooling="mean")(bert_inputs)
bert_output = Reshape((max_seq_length, embedding_size))(bert_output)
bilstm = Bidirectional(LSTM(128, dropout=0.2,recurrent_dropout=0.2,return_sequences=True))(bert_output)
output = Dense(output_size, activation="softmax")(bilstm)
I am trying to build a network but I am getting the following error
this is the building and training code
The network was built by this following video code
The data I created using this repository githup repository
The train contains 24 class of images and the test contains 24 class of labels
import os
import numpy as np
import lasagne
import theano
import theano.tensor as T
def load_mnist_images(filename):
with gzip.open(filename,'rb') as f :
data = np.frombuffer(f.read(),np.uint8,offset=16)
data = data.reshape(-1,1,28,28)
print(type(data))
return data/np.float32(256)
def load_mnist_labels(filename):
with gzip.open(filename,'rb')as f:
data=np.frombuffer(f.read(),np.uint8,offset=8)
return data
def load_dataset():
x_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
x_test = load_mnist_images('test-images-idx3-ubyte.gz')
y_test = load_mnist_labels('test-labels-idx1-ubyte.gz')
return x_train,y_train,x_test,y_test
def build_nn(input_var=None):
l_in = lasagne.layers.InputLayer(shape=(None,1,28,28),input_var=input_var)
l_in_drop = lasagne.layers.DropoutLayer(l_in,p=0.2)
l_hid1 = lasagne.layers.DenseLayer(l_in_drop,num_units=800,
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1,p=0.5)
l_hid2 = lasagne.layers.DenseLayer(l_hid1_drop, num_units=800,
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)
l_out = lasagne.layers.DenseLayer(l_hid2_drop,num_units=24 ,
nonlinearity= lasagne.nonlinearities.softmax)
return l_out
if __name__ == "__main__":
x_train,y_train,x_test,y_test = load_dataset()
input_var = T.tensor4('inputs')
target_var = T.tensor4('targets')
print(target_var)
network = build_nn(input_var)
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction,target_var)
loss = loss.mean()
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss,params,learning_rate=0.01 , momentum=0.9)
train_fn = theano.function([input_var,target_var],loss , updates=updates)
num_training_steps = 10
for steps in range(num_training_steps):
train_err = train_fn(x_train,y_train)
print("current step is " + str(steps))
and this is the error i get
Traceback (most recent call last):
File "/home/hassan/JPG-PNG-to-MNIST-NN-Format/mnist_test.py", line 64, in <module>
loss = lasagne.objectives.categorical_crossentropy(prediction,target_var)
File "/home/hassan/anaconda3/envs/object-detection/lib/python3.7/site-packages/lasagne/objectives.py", line 181, in categorical_crossentropy
return theano.tensor.nnet.categorical_crossentropy(predictions, targets)
File "/home/hassan/anaconda3/envs/object-detection/lib/python3.7/site-packages/theano/tensor/nnet/nnet.py", line 2101, in categorical_crossentropy
raise TypeError('rank mismatch between coding and true distributions')
TypeError: rank mismatch between coding and true distributions
I solved it there was a mistake in writing te code
instead of writing this
target_var = T.ivector('targets')
I wrote this
target_var = T.tensor4('targets')