I am using keras to build a model to caption images(basically give them a description).But when I am executing this, I get an error right before the training starts. I am using tensorflow_gpu(2.0) and the latest keras version.This is the error I get(A little shortened)==>
Epoch 1/1
Traceback (most recent call last):
File "C:\Users\neelg\Documents\Atom_projects\Main\Img_cap.py", line 165, in <module>
model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\keras\engine\training.py", line 1732, in fit_generator
initial_epoch=initial_epoch)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\keras\engine\training_generator.py", line 220, in fit_generator
reset_metrics=False)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\keras\engine\training.py", line 1514, in train_on_batch
outputs = self.train_function(ins)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\keras\backend.py", line 3734, in __call__
value = ops.convert_to_tensor(value, dtype=tensor.dtype)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1184, in convert_to_tensor
return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1242, in convert_to_tensor_v2
as_ref=False)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1296, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\tensor_conversion_registry.py", line 52, in _default_conversion_function
return constant_op.constant(value, dtype, name=name)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\constant_op.py", line 227, in constant
allow_broadcast=True)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\constant_op.py", line 235, in _constant_impl
t = convert_to_eager_tensor(value, ctx, dtype)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\constant_op.py", line 96, in convert_to_eager_tensor
return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: setting an array element with a sequence.
The main issue is that only one line of the code is addressed here, rest other lines are inside the Tensorflow libraries.
BTW The line addressed by the error is:-
model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
I think this may be a possible bug. If anybody has any requests of some extra information required, I can edit out the question. When googling, There are mentions that the 'Numpy array' is not structured or has a data_type not specified. However, My code contains no Numpy arrays so I am unsure How to proceed.
Thanx in advance
Here is the code(as requested by makis)Note that I am using a Flickr8k dataset which cannot be uploaded:=====>
#This an Image Captioning Model developed by Neel Gupta :)
# IMPORTS GOES HERE -----------------
#import tensorflow as tf # Even tho we don't need it, It activates CUDA Functionality
from numpy import array
from pickle import load
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import Dropout
from keras.layers.merge import add
from keras.callbacks import ModelCheckpoint
from os import path
def load_doc(filename):
file = open(filename, 'r') #Opening the file.
text = file.read()
file.close()
return text
def load_set(filename):
doc = load_doc(filename) #Loading the document
dataset = list()
for line in doc.split('/n'): #Weeding out the empty lines
if len(line) < 1:
continue
identifier = line.split('.')[0]
dataset.append(identifier)
return set(dataset)
def load_clean_descriptions(filename, dataset):
doc = load_doc(filename)
descriptions = dict()
for line in doc.split('\n'):
# split line by white space
tokens = line.split()
# split id from description
image_id, image_desc = tokens[0], tokens[1:]
# skip images not in the set
if image_id in dataset:
# create list
if image_id not in descriptions:
descriptions[image_id] = list()
# wrap description in tokens
desc = '#Start# ' + ' '.join(image_desc) + ' #End#'
descriptions[image_id].append(desc)
return descriptions
def load_photo_features(filename, dataset):
all_features = load(open(filename, 'rb'))
features = {k: all_features[k] for k in dataset}
return features
# convert a dictionary of clean descriptions to a list of descriptions
def to_lines(descriptions):
all_desc = list()
for key in dict.keys(descriptions):
[all_desc.append(d) for d in descriptions[key]]
return all_desc
def create_tokenizer(descriptions):
lines = to_lines(descriptions) #Fitting tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)
return tokenizer
def max_length(descriptions):
lines = to_lines(descriptions)
return max(len(d.split()) for d in lines)
def create_sequences(tokenizer, max_length, desc_list, photo, vocab_size):
X1, X2, y = list(), list(), list()
for desc in desc_list:
# encode the sequence
seq = tokenizer.texts_to_sequences([desc])[0]
# split one sequence into multiple X,y pairs
for i in range(1, len(seq)):
# split into input and output pair
in_seq, out_seq = seq[:i], seq[i]
# pad input sequence
in_seq = pad_sequences([in_seq], maxlen=None)[0] #Removed maxlen argument
# encode output sequence
out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
# store
X1.append(photo)
X2.append(in_seq)
y.append(out_seq)
return array(X1), array(X2), array(y)
def define_model(vocab_size, max_length):
# feature extractor model
inputs1 = Input(shape=(4096,))
fe1 = Dropout(0.5)(inputs1)
fe2 = Dense(256, activation='relu')(fe1)
# sequence model
inputs2 = Input(shape=(1,)) #remove shape
se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
se2 = Dropout(0.5)(se1)
se3 = LSTM(256)(se2)
# decoder model
decoder1 = add([fe2, se3])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)
# tie it together [image, seq] [word]
model = Model(inputs=[inputs1, inputs2], outputs=outputs)
# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam')
# summarize model
model.summary()
#Uncomment the line below to deactivate/activate a graph being constructed==>
#plot_model(model, to_file='model.png', show_shapes=True)
return model
def data_generator(descriptions, photos, tokenizer, max_length, vocab_size):
# loop for ever over images
#This function is for saving ur RAM from utter destruction
while 1:
for key, desc_list in descriptions.items():
# retrieve the photo feature
photo = photos[key][0]
in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc_list, photo, vocab_size)
yield [[in_img, in_seq], out_word]
# HERE GOESETH THE IMPORTANT STUFF:-----------
filename = 'C:/Users/neelg/Documents/Atom_projects/Main/Flickr8k_text/Flickr_8k.trainImages.txt'
# Be sure to replace the file-name with ur own!!!
train = load_set(filename)
#print('Dataset:%d' % len (train))
train_descriptions = load_clean_descriptions('C:/Users/neelg/Documents/Atom_projects/Main/descriptions.txt', train) #File name of clean descriptions
#print('Descriptions: train=%d' % len(train_descriptions))
#photo features
train_features = load_photo_features('C:/Users/neelg/Documents/Atom_projects/Main/features.pkl', train)
#print('Photos: train=%d' % len(train_features))
print("Loaded photo features!")
#Setting up the Tokenizer--
tokenizer = create_tokenizer(train_descriptions)
vocab_size = len(tokenizer.word_index) + 1
#print('Vocabulary Size: %d' % vocab_size)
print('\n', "Created tokenizers")
#max_length = max_length(descriptions) #Getting the max_length
#THE MODEL
model = define_model(vocab_size, max_length)
print('\n', "model ready for some action!")
# train the model, run epochs manually and save after each epoch
epochs = 20
steps = len(train_descriptions)
# test the data generator
print("Giving the Data generator a workout :)")
generator = data_generator(train_descriptions, train_features, tokenizer, max_length, vocab_size)
inputs, outputs = next(generator)
print(inputs[0].shape)
print(inputs[1].shape)
print(outputs.shape)
for i in range(epochs):
generator = data_generator(train_descriptions, train_features, tokenizer, max_length, vocab_size)
model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
print('Starting the training.....')
# save model
model.save('model_' + str(i) + '.h5')
In create_sequences, X2 has elements of different shapes. When you try to convert it to a Numpy array here.
return array(X1), array(X2), array(y)
it throws
ValueError: setting an array element with a sequence
Just returning X2, without converting it to Numpy arrays, should fix it.
Related
I have the following code to train a transformer-based model (huggingface) for a multi-head regression task (I call it multi-head because the model predicts multiple output scores, not only one).
# select device
device = 'cuda' if cuda.is_available() else 'cpu'
print("DEVICE: ", device)
MAX_LEN = 512
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 8
TRAIN_EPOCHS = 1
TEST_EPOCHS = 1
LEARNING_RATE = 2e-05
REG_DROPOUT = 0.1
DISPLAY_STEP_THRESHOLD = 100
MODEL_NAME_CHECKPOINT = 'bert-base-uncased'
MODEL_FOLDER = 'autotune_multihead_regression_model'
# *************** DATA *********************
# load data
train_data = pd.read_csv(f"derived_data/traindev_fold{args.fold}.csv")
test_data = pd.read_csv(f"derived_data/test_fold{args.fold}.csv")
train_data['labels'] = train_data[train_data.columns[2:]].values.tolist()
test_data['labels'] = test_data[test_data.columns[2:]].values.tolist()
print("train data shape: ", train_data.shape)
print("test data shape: ", test_data.shape)
# make datasets
train_dataset = Dataset.from_pandas(train_data)
test_dataset = Dataset.from_pandas(test_data)
dataset = DatasetDict({
'train': train_dataset,
'test': test_dataset
})
# initialize tokenizer
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME_CHECKPOINT)
def preprocess_function(examples):
return tokenizer(examples["full_text"], truncation=True, padding="max_length")
# apply the preprocessing on the entire dataset
encoded_dataset = dataset.map(preprocess_function, batched=True)
# ******************** FINETUNING ********************#
class BERTClass(torch.nn.Module):
def __init__(self):
super(BERTClass, self).__init__()
self.l0 = transformers.BertModel.from_pretrained(MODEL_NAME_CHECKPOINT)
self.l1 = torch.nn.Linear(768, 6)
def forward(self, input_ids, attention_mask, labels):
""""Override the function forward. Note that keys not appearing here will be removed by trainer.
It does not matter if trainer is not used.
"""
# _, output_0 = self.l0(ids, attention_mask=mask, token_type_ids=token_type_ids, return_dict=False)
_, output_0 = self.l0(input_ids, attention_mask=attention_mask, return_dict=False)
output = self.l1(output_0)
return output
def model_init():
model = BERTClass()
return model
args = TrainingArguments(
MODEL_FOLDER,
evaluation_strategy="epoch",
save_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=TRAIN_BATCH_SIZE,
per_device_eval_batch_size=VALID_BATCH_SIZE,
num_train_epochs=5,
weight_decay=0.01,
load_best_model_at_end=True,
# this is an advantage in the sense that the last model is not necessarily the best one.
metric_for_best_model="mean_rmse",
logging_strategy="steps",
logging_steps=100,
push_to_hub=False
)
def mean_rmse(outputs, targets):
""""
:param
outputs: 2D list
targets: 2D list
:returns
a scalar real number
"""
delta = outputs - targets
delta = torch.sqrt((delta ** 2).mean(axis=0))
return delta.mean()
def compute_metrics(eval_pred):
predictions, labels = eval_pred
return {"mean_rmse": mean_rmse(predictions, labels)}
class RegressionTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
labels = inputs.get("labels")
outputs = model(**inputs)
loss = torch.nn.MSELoss()(outputs.squeeze(), labels.squeeze())
return (loss, outputs) if return_outputs else loss
temp_dataset = encoded_dataset["train"].select(range(100))
trainer = RegressionTrainer(
model_init(),
args,
train_dataset=encoded_dataset["train"],
eval_dataset=temp_dataset,
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
trainer.train()
In the code, I use a customized model because I want to have the flexibility of the head. Also, I use Trainer to train the model because I want to use hyperparameter_search.
The target (labels) of the dataset is a (row) vector of 6 variables.
Now the training seems going well when I can see the loss decreasing.
However, the code crashes when it starts the evaluation.
In the code above, I use part of the training set for evaluation, and it throws:
***** Running Evaluation *****
Num examples = 100
Batch size = 8
Traceback (most recent call last):██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 13.35it/s]
File "autotune_multiregression_head_bert.py", line 152, in <module>
trainer.train()
File "/home/ubuntu/anaconda3/envs/pytorch_p37/lib/python3.7/site-packages/transformers/trainer.py", line 1504, in train
ignore_keys_for_eval=ignore_keys_for_eval,
File "/home/ubuntu/anaconda3/envs/pytorch_p37/lib/python3.7/site-packages/transformers/trainer.py", line 1834, in _inner_training_loop
self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
File "/home/ubuntu/anaconda3/envs/pytorch_p37/lib/python3.7/site-packages/transformers/trainer.py", line 2052, in _maybe_log_save_evaluate
metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
File "/home/ubuntu/anaconda3/envs/pytorch_p37/lib/python3.7/site-packages/transformers/trainer.py", line 2781, in evaluate
metric_key_prefix=metric_key_prefix,
File "/home/ubuntu/anaconda3/envs/pytorch_p37/lib/python3.7/site-packages/transformers/trainer.py", line 3059, in evaluation_loop
metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
File "autotune_multiregression_head_bert.py", line 130, in compute_metrics
return {"mcrmse": mcrmse_fn(predictions, labels)}
File "autotune_multiregression_head_bert.py", line 123, in mcrmse_fn
delta = outputs - targets
ValueError: operands could not be broadcast together with shapes (87,6) (100,6)
20%|
I got the same error when evaluating on the test set. I assume some examples got failed when evaluating and are not added to the final result, hence the final shape of outputs is not consistent with targets? (I debugged and see that outputs has 87 examples while targets has 100).
What has gone wrong?
I am using Keras Functional API from Tensorflow 2.2 to build a model that uses features columns. I followed the guide here and tutorial Classify structured data to make the code snippet:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
dataframe = dataframe.copy()
labels = dataframe.pop('target')
ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(dataframe))
ds = ds.batch(batch_size)
return ds
if __name__ == '__main__':
URL = 'https://storage.googleapis.com/applied-dl/heart.csv'
dataframe = pd.read_csv(URL)
batch_size = 5 # A small batch sized is used for demonstration purposes
train_ds = df_to_dataset(dataframe, batch_size=batch_size)
example_batch = next(iter(train_ds))
feature_columns = []
inputs = {'age': tf.keras.layers.Input(name='age', shape=(), dtype='float32'),
'thal': tf.keras.layers.Input(name='thal', shape=(), dtype='string')}
feature_columns.append(feature_column.numeric_column('age'))
thal = feature_column.categorical_column_with_vocabulary_list(
'thal', ['fixed', 'normal', 'reversible'])
thal_one_hot = feature_column.indicator_column(thal)
feature_columns.append(thal_one_hot)
x = layers.DenseFeatures(feature_columns)(inputs)
preds = layers.Dense(1)(x)
model = tf.keras.Model(inputs=inputs, outputs=preds)
print(model(example_batch))
Unfortunately this code fails with an error:
Traceback (most recent call last):
File "test.py", line 42, in <module>
print(model(example_batch))
File "C:\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 968, in __call__
outputs = self.call(cast_inputs, *args, **kwargs)
File "C:\Python38\lib\site-packages\tensorflow\python\keras\engine\network.py", line 717, in call
return self._run_internal_graph(
File "C:\Python38\lib\site-packages\tensorflow\python\keras\engine\network.py", line 837, in _run_internal_graph
y = self._conform_to_reference_input(y, ref_input=x)
File "C:\Python38\lib\site-packages\tensorflow\python\keras\engine\network.py", line 961, in _conform_to_reference_input
tensor = math_ops.cast(tensor, dtype=ref_input.dtype)
File "C:\Python38\lib\site-packages\tensorflow\python\util\dispatch.py", line 180, in wrapper
return target(*args, **kwargs)
File "C:\Python38\lib\site-packages\tensorflow\python\ops\math_ops.py", line 789, in cast
x = gen_math_ops.cast(x, base_type, name=name)
File "C:\Python38\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1971, in cast
_ops.raise_from_not_ok_status(e, name)
File "C:\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 6653, in raise_from_not_ok_status
six.raise_from(core._status_to_exception(e.code, message), None)
File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.UnimplementedError: Cast int64 to string is not supported [Op:Cast]
When i drop category column 'thal' from feature_columns, it works without error. Unfortunately, i need to pass category column into my model. I don't know how to fix it.
There is no need to pass Inputs in the code, x = layers.DenseFeatures(feature_columns) because feature_columns already comprises the Feature Columns corresponding to all the Features.
Complete working code for Training the Model with heart.csv data is shown below:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
dataframe = dataframe.copy()
labels = dataframe.pop('target')
ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(dataframe))
ds = ds.batch(batch_size)
return ds
URL = 'https://storage.googleapis.com/applied-dl/heart.csv'
dataframe = pd.read_csv(URL)
print(dataframe.head())
batch_size = 5 # A small batch sized is used for demonstration purposes
train_ds = df_to_dataset(dataframe, batch_size=batch_size)
example_batch = next(iter(train_ds))
feature_columns = []
age = feature_column.numeric_column('age')
feature_columns.append(age)
thal = feature_column.categorical_column_with_vocabulary_list(
'thal', ['fixed', 'normal', 'reversible'])
thal_one_hot = feature_column.indicator_column(thal)
feature_columns.append(thal_one_hot)
print(feature_columns)
x = layers.DenseFeatures(feature_columns)
model = tf.keras.Sequential([
x,
layers.Dense(1)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
model.fit(train_ds,
epochs=5)
#print(model(example_batch))
Please let me know if you need any other information or if you face any other error, and I will be Happy to help you.
Hope this helps. Happy Learning!
example_batch contains both feature batch and label batch, and model expects only features as inputs.
Following code should work:
feature_batch, label_batch = next(iter(train_ds))
model(feature_batch)
or:
example_batch = next(iter(train_ds))[0]
model(example_batch)
I've problems integrating Bert Embedding Layer in a BiLSTM model for word sense disambiguation task,
Windows 10
Python 3.6.4
TenorFlow 1.12
Keras 2.2.4
No virtual environments were used
PyCharm Professional 2019.2
The whole script
import os
import yaml
import numpy as np
from argparse import ArgumentParser
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.layers import (LSTM, Add, Bidirectional, Dense, Input, TimeDistributed, Embedding)
from tensorflow.keras.preprocessing.sequence import pad_sequences
try:
from bert.tokenization import FullTokenizer
except ModuleNotFoundError:
os.system('pip install bert-tensorflow')
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tqdm import tqdm
from keras_bert import BertEmbeddingLayer
from model_utils import visualize_plot_mdl
from parsing_dataset import load_dataset
from utilities import configure_tf, initialize_logger
def parse_args():
parser = ArgumentParser(description="WSD")
parser.add_argument("--model_type", default='baseline', type=str,
help="""Choose the model: baseline: BiLSTM Model.
attention: Attention Stacked BiLSTM Model.
seq2seq: Seq2Seq Attention.""")
return vars(parser.parse_args())
def train_model(mdl, data, epochs=1, batch_size=32):
[train_input_ids, train_input_masks, train_segment_ids], train_labels = data
history = mdl.fit([train_input_ids, train_input_masks, train_segment_ids],
train_labels, epochs=epochs, batch_size=batch_size)
return history
def baseline_model(output_size):
hidden_size = 128
max_seq_len = 64
in_id = Input(shape=(None,), name="input_ids")
in_mask = Input(shape=(None,), name="input_masks")
in_segment = Input(shape=(None,), name="segment_ids")
bert_inputs = [in_id, in_mask, in_segment]
bert_embedding = BertEmbeddingLayer()(bert_inputs)
embedding_size = 768
bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2,
recurrent_dropout=0.2,
return_sequences=True
)
)(bert_embedding)
output = TimeDistributed(Dense(output_size, activation="softmax"))(bilstm)
mdl = Model(inputs=bert_inputs, outputs=output, name="Bert_BiLSTM")
mdl.compile(loss="sparse_categorical_crossentropy",
optimizer='adadelta', metrics=["acc"])
return mdl
def initialize_vars(sess):
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
sess.run(tf.tables_initializer())
K.set_session(sess)
class PaddingInputExample(object):
"""Fake example so the num input examples is a multiple of the batch size.
When running eval/predict on the TPU, we need to pad the number of examples
to be a multiple of the batch size, because the TPU requires a fixed batch
size. The alternative is to drop the last batch, which is bad because it means
the entire output data won't be generated.
We use this class instead of `None` because treating `None` as padding
batches could cause silent errors.
"""
class InputExample(object):
"""A single training/test example for simple sequence classification."""
def __init__(self, guid, text_a, text_b=None, label=None):
"""Constructs a InputExample.
Args:
guid: Unique id for the example.
text_a: string. The un-tokenized text of the first sequence. For single
sequence tasks, only this sequence must be specified.
text_b: (Optional) string. The un-tokenized text of the second sequence.
Only must be specified for sequence pair tasks.
label: (Optional) string. The label of the example. This should be
specified for train and dev examples, but not for test examples.
"""
self.guid = guid
self.text_a = text_a
self.text_b = text_b
self.label = label
def create_tokenizer_from_hub_module(bert_path="https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"):
"""Get the vocab file and casing info from the Hub module."""
bert_module = hub.Module(bert_path)
tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
vocab_file, do_lower_case = sess.run(
[
tokenization_info["vocab_file"],
tokenization_info["do_lower_case"],
]
)
return FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)
def convert_single_example(tokenizer, example, max_seq_length=256):
"""Converts a single `InputExample` into a single `InputFeatures`."""
if isinstance(example, PaddingInputExample):
input_ids = [0] * max_seq_length
input_mask = [0] * max_seq_length
segment_ids = [0] * max_seq_length
label = [0] * max_seq_length
return input_ids, input_mask, segment_ids, label
tokens_a = tokenizer.tokenize(example.text_a)
if len(tokens_a) > max_seq_length - 2:
tokens_a = tokens_a[0: (max_seq_length - 2)]
tokens = []
segment_ids = []
tokens.append("[CLS]")
segment_ids.append(0)
example.label.append(0)
for token in tokens_a:
tokens.append(token)
segment_ids.append(0)
tokens.append("[SEP]")
segment_ids.append(0)
example.label.append(0)
input_ids = tokenizer.convert_tokens_to_ids(tokens)
# The mask has 1 for real tokens and 0 for padding tokens. Only real
# tokens are attended to.
input_mask = [1] * len(input_ids)
# Zero-pad up to the sequence length.
while len(input_ids) < max_seq_length:
input_ids.append(0)
input_mask.append(0)
segment_ids.append(0)
example.label.append(0)
assert len(input_ids) == max_seq_length
assert len(input_mask) == max_seq_length
assert len(segment_ids) == max_seq_length
return input_ids, input_mask, segment_ids, example.label
def convert_examples_to_features(tokenizer, examples, max_seq_length=256):
"""Convert a set of `InputExample`s to a list of `InputFeatures`."""
input_ids, input_masks, segment_ids, labels = [], [], [], []
for example in tqdm(examples, desc="Converting examples to features"):
input_id, input_mask, segment_id, label = convert_single_example(tokenizer, example, max_seq_length)
input_ids.append(np.array(input_id))
input_masks.append(np.array(input_mask))
segment_ids.append(np.array(segment_id))
labels.append(np.array(label))
return np.array(input_ids), np.array(input_masks), np.array(segment_ids), np.array(labels).reshape(-1, 1)
def convert_text_to_examples(texts, labels):
"""Create InputExamples"""
InputExamples = []
for text, label in zip(texts, labels):
InputExamples.append(
InputExample(guid=None, text_a=" ".join(text), text_b=None, label=label)
)
return InputExamples
# Initialize session
sess = tf.Session()
params = parse_args()
initialize_logger()
configure_tf()
# Load our config file
config_file_path = os.path.join(os.getcwd(), "config.yaml")
config_file = open(config_file_path)
config_params = yaml.load(config_file)
# This parameter allow that train_x to be in form of words, to allow using of your keras-elmo layer
elmo = config_params["use_elmo"]
dataset = load_dataset(elmo=elmo)
vocabulary_size = dataset.get("vocabulary_size")
output_size = dataset.get("output_size")
# Parse data in Bert format
max_seq_length = 64
train_x = dataset.get("train_x")
train_text = [' '.join(x) for x in train_x]
train_text = [' '.join(t.split()[0:max_seq_length]) for t in train_text]
train_text = np.array(train_text, dtype=object)[:, np.newaxis]
# print(train_text.shape) # (37184, 1)
train_labels = dataset.get("train_y")
# Instantiate tokenizer
tokenizer = create_tokenizer_from_hub_module()
# Convert data to InputExample format
train_examples = convert_text_to_examples(train_text, train_labels)
# Extract features
(train_input_ids, train_input_masks, train_segment_ids, train_labels) = convert_examples_to_features(tokenizer, train_examples, max_seq_length=max_seq_length)
bert_inputs = [train_input_ids, train_input_masks, train_segment_ids]
data = bert_inputs, train_labels
del dataset
model = baseline_model(output_size)
# Instantiate variables
initialize_vars(sess)
history = train_model(model, data)
The layer BertEmbeddingLayer() is imported from strongio/keras-bert, as well as following the approach in the file to integrate my work however I always have this error, please check the traceback below (exception is raised when building the model)
Traceback (most recent call last):
File "code/prova_bert.py", line 230, in <module>
model = baseline_model(output_size, max_seq_len, visualize=True)
File "code/prova_bert.py", line 165, in baseline_model
)(bert_embeddings)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\layers\wrappers.py", line 473, in __call__
return super(Bidirectional, self).__call__(inputs, **kwargs)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 746, in __call__
self.build(input_shapes)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\layers\wrappers.py", line 612, in build
self.forward_layer.build(input_shape)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\utils\tf_utils.py", line 149, in wrapper
output_shape = fn(instance, input_shape)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\layers\recurrent.py", line 552, in build
self.cell.build(step_input_shape)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\utils\tf_utils.py", line 149, in wrapper
output_shape = fn(instance, input_shape)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\layers\recurrent.py", line 1934, in build
constraint=self.kernel_constraint)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 609, in add_weight
aggregation=aggregation)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\training\checkpointable\base.py", line 639, in _add_variable_with_custom_getter
**kwargs_for_getter)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 1977, in make_variable
aggregation=aggregation)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\variables.py", line 183, in __call__
return cls._variable_v1_call(*args, **kwargs)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\variables.py", line 146, in _variable_v1_call
aggregation=aggregation)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\variables.py", line 125, in <lambda>
previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\variable_scope.py", line 2437, in default_variable_creator
import_scope=import_scope)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\variables.py", line 187, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\resource_variable_ops.py", line 297, in __init__
constraint=constraint)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\resource_variable_ops.py", line 409, in _init_from_args
initial_value() if init_from_fn else initial_value,
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 1959, in <lambda>
shape, dtype=dtype, partition_info=partition_info)
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\ops\init_ops.py", line 473, in __call__
scale /= max(1., (fan_in + fan_out) / 2.)
TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'
Exception ignored in: <bound method BaseSession.__del__ of <tensorflow.python.client.session.Session object at 0x0000026396AD0630>>
Traceback (most recent call last):
File "C:\Users\Sheikh\AppData\Local\Programs\Python\Python36\Lib\site-packages\tensorflow\python\client\session.py", line 738, in __del__
TypeError: 'NoneType' object is not callable
Please refer to my issue on their repo and for data examples being fed to the model please check this issue
First of all, the results by "mean" or "first" pooling is not for all the tokens, so you got to change in call() function:
elif self.pooling == "mean":
result = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)["sequence_output" ]
pooled = result
In build_model, change to:
embedding_size = 768
in_id = Input(shape=(max_seq_length,), name="input_ids")
in_mask = Input(shape=(max_seq_length,), name="input_masks")
in_segment = Input(shape=(max_seq_length,), name="segment_ids")
bert_inputs = [in_id, in_mask, in_segment]
bert_output = BertLayer(n_fine_tune_layers=12, pooling="mean")(bert_inputs)
bert_output = Reshape((max_seq_length, embedding_size))(bert_output)
bilstm = Bidirectional(LSTM(128, dropout=0.2,recurrent_dropout=0.2,return_sequences=True))(bert_output)
output = Dense(output_size, activation="softmax")(bilstm)
I have around 550K samples, each sample being 200x50x1. The size of this dataset is around 57GB.
I want to train a network on this set but I am having trouble reading it.
batch_size=8
def _read_py_function(filename,labels_slice):
with h5py.File(filename, 'r') as f:
data_slice = np.asarray(f['feats'])
print(data_slice.shape)
return data_slice, labels_slice
placeholder_files = tf.placeholder(tf.string, [None])
placeholder_labels = tf.placeholder(tf.int32, [None])
dataset = tf.data.Dataset.from_tensor_slices((placeholder_files,placeholder_labels))
dataset = dataset.map(
lambda filename, label: tuple(tf.py_func(
_read_py_function, [filename,label], [tf.uint8, tf.int32])))
dataset = dataset.shuffle(buffer_size=50000)
dataset = dataset.batch(batch_size)
iterator = tf.data.Iterator.from_structure(dataset.output_types, dataset.output_shapes)
data_X, data_y = iterator.get_next()
data_y = tf.cast(data_y, tf.int32)
net = conv_layer(inputs=data_X,num_outputs=8, kernel_size=3, stride=2, scope='rcl_0')
net = pool_layer(inputs=net,kernel_size=2,scope='pl_0')
net = dropout_layer(inputs=net,scope='dl_0')
net = flatten_layer(inputs=net,scope='flatten_0')
net = dense_layer(inputs=net,num_outputs=256,scope='dense_0')
net = dense_layer(inputs=net,num_outputs=64,scope='dense_1')
out = dense_layer(inputs=net,num_outputs=10,scope='dense_2')
And I run the session using :
sess.run(train_iterator, feed_dict = {placeholder_files: filenames, placeholder_labels: ytrain})
try:
while True:
_, loss, acc = sess.run([train_op, loss_op, accuracy_op])
train_loss += loss
train_accuracy += acc
except tf.errors.OutOfRangeError:
pass
But I am getting the error even before running the session :
Traceback (most recent call last):
File "SFCC-trial-134.py", line 297, in <module>
net = rcnn_layer(inputs=data_X,num_outputs=8, kernel_size=3, stride=2, scope='rcl_0')
File "SFCC-trial-134.py", line 123, in rcnn_layer
reuse=False)
File "SFCC-trial-134.py", line 109, in conv_layer
reuse = reuse
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args
return func(*args, **current_args)
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1154, in convolution2d
conv_dims=2)
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args
return func(*args, **current_args)
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1025, in convolution
(conv_dims + 2, input_rank))
TypeError: %d format: a number is required, not NoneType
I though about using TFRecords but had a hard time creating those. Couldn't find a good post where I learn to create them for my kind of dataset.
conv_layer is defined as follows :
def conv_layer(inputs, num_outputs, kernel_size, stride, normalizer_fn=None, activation_fn=nn.relu, trainable=True, scope='noname', reuse=False):
net = slim.conv2d(inputs = inputs,
num_outputs = num_outputs,
kernel_size = kernel_size,
stride = stride,
normalizer_fn = normalizer_fn,
activation_fn = activation_fn,
trainable = trainable,
scope = scope,
reuse = reuse
)
return net
Do not pass tf.py_func inside your map function. You can read the file image by passing the function name directly inside your map function. I am posing only the relevant parts of the code.
def _read_py_function(filename, label):
return tf.zeros((224, 224, 3), dtype=tf.float32), tf.ones((1,), dtype=tf.int32)
dataset = dataset.map(lambda filename, label: _read_py_function(filename, label))
Another change is your iterator will expect only floating point of input. So you will have to change your tf.uint8 type of output to float.
I'm attempting to train a regression model to predict attributes of music such as BPM. The model takes in spectrograms of audio snippets that are 256x128px png files and outputs a couple continuous values. I have the following code so far that I have developed based upon this guide on the tensorflow website:
import tensorflow as tf
import os
import random
import pathlib
AUTOTUNE = tf.data.experimental.AUTOTUNE
TRAINING_DATA_DIR = r'specgrams'
def gen_model():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(256, 128, 3)),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dense(2)
])
model.compile(optimizer=tf.keras.optimizers.RMSprop(0.001),
loss='mse',
metrics=['mse', 'mae'])
return model
def fetch_batch(batch_size=1000):
all_image_paths = []
all_image_labels = []
data_root = pathlib.Path(TRAINING_DATA_DIR)
files = data_root.iterdir()
for file in files:
file = str(file)
all_image_paths.append(os.path.abspath(file))
label = file[:-4].split('-')[2:]
label = float(label[0]) / 200, int(label[1]) / 1000.0
all_image_labels.append(label)
def preprocess_image(path):
img_raw = tf.io.read_file(path)
image = tf.image.decode_png(img_raw, channels=3)
image = tf.image.resize(image, [256, 128])
image /= 255.0
return image
def preprocess(path, label):
return preprocess_image(path), label
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
image_ds = path_ds.map(preprocess_image, num_parallel_calls=AUTOTUNE)
label_ds = tf.data.Dataset.from_tensor_slices(all_image_labels)
ds = tf.data.Dataset.zip((image_ds, label_ds))
ds = ds.shuffle(buffer_size=len(os.listdir(TRAINING_DATA_DIR)))
ds = ds.repeat()
ds = ds.batch(batch_size)
ds = ds.prefetch(buffer_size=AUTOTUNE)
return ds
ds = fetch_batch()
model = gen_model()
model.fit(ds, epochs=1, steps_per_epoch=10)
However I believe I have made a mistake with the structure of my model or how I am preprocessing the training data because I get an error about incorrect dimensions but I'm struggling to narrow down exactly where the issue is. I understand that the guide I followed was for classification problem as opposed to regression and my "labels" are an array of 2 value which is what is causing the problem but I'm not sure how to resolve this.
For context the filenames are in the format xxx-xxx-A-B.png where A and B are the two desired output values of the model. A is a floating-point value somewhere between 70 and 180 and B is an integer value between 0-1000. As such the label variable for each image looks something like this: (0.64, 0.319).
This is the error I am seeing when I attempt to execute the above script:
Traceback (most recent call last):
File "C:\Users\cainy\Desktop\BeatNet\training.py", line 60, in <module>
model.fit(ds, epochs=1, steps_per_epoch=3)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\engine\training.py", line 791, in fit
initial_epoch=initial_epoch)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1515, in fit_generator
steps_name='steps_per_epoch')
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\engine\training_generator.py", line 257, in model_iteration
batch_outs = batch_function(*batch_data)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1259, in train_on_batch
outputs = self._fit_function(ins) # pylint: disable=not-callable
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\backend.py", line 3217, in __call__
outputs = self._graph_fn(*converted_inputs)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\eager\function.py", line 558, in __call__
return self._call_flat(args)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\eager\function.py", line 627, in _call_flat
outputs = self._inference_function.call(ctx, args)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\eager\function.py", line 415, in call
ctx=ctx)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\eager\execute.py", line 66, in quick_execute
six.raise_from(core._status_to_exception(e.code, message), None)
File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.InvalidArgumentError: Can not squeeze dim[1], expected a dimension of 1, got 2
[[{{node metrics/accuracy/Squeeze}}]] [Op:__inference_keras_scratch_graph_734]
Edit: I have uploaded the source code to GitHub here.
You currently only have 1 output - a tensor with length 2 (per batch element). If you want to use/monitor separate losses you'll need to unstack it in both the model output and the labels.
I'm not sure if models.Sequential will be suitable, but you can definitely use the functional API:
def gen_model():
inputs = tf.keras.layers.Input(shape=(256, 128, 3), dtype=tf.float32)
x = inputs
x = tf.keras.layers.Dense(256, activation='relu')
x = tf.keras.layers.Dense(2)
a, b = tf.keras.layers.Lambda(tf.unstack, arguments=dict(axis=-1))(x)
model = tf.keras.models.Model(inputs=inputs, outputs=[a, b])
model.compile(optimizer=tf.keras.optimizers.RMSprop(0.001),
loss=['mse', 'mae'],
metrics=[['mse'], ['mae']])
return model
And in your preprocessing:
def preprocess(path, label):
return preprocess_image(path), tf.unstack(label, axis=-1)