enter image description here
Getting issues with my code unable to understand what to do next can anyone help me out
# Importing the libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import pickle
import re
# Importing the dataset
filename = "MoviePlots.csv"
data = pd.read_csv(filename, encoding= 'unicode_escape')
# Keeping only the neccessary columns
data = data[['Plot']]
# Clean the data
data['Plot'] = data['Plot'].apply(lambda x: x.lower())
data['Plot'] = data['Plot'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))
# Create the tokenizer
tokenizer = Tokenizer(num_words=5000, split=" ")
tokenizer.fit_on_texts(data['Plot'].values)
# Save the tokenizer
with open('tokenizer.pickle', 'wb') as handle:
pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
# Create the sequences
X = tokenizer.texts_to_sequences(data['Plot'].values)
X = pad_sequences(X)
# Create the model
model = Sequential()
model.add(Embedding(5000, 256, input_length=X.shape[1]))
model.add(Bidirectional(LSTM(256, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)))
model.add(LSTM(256, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))
model.add(LSTM(256, dropout=0.1, recurrent_dropout=0.1))
model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(5000, activation='softmax'))
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01), metrics=['accuracy'])
# Train the model
model.fit(X, X, epochs=100, batch_size=128, verbose=1)
# Saving the model
model.save('visioniser.h5')
This is my code and error in the image attached
Anyone please help me out solve this problem of my code please diagnose it
It appears that the error is happening with data['Plot'] = data['Plot'].apply(lambda x: x.lower()) (you are calling the apply function on a column of data -> one of the values in the column is not a string so it doesn't have the lower method)!
You could fix this by checking if the instance is actually of type string:
data['Plot'] = data['Plot'].apply(lambda x: x.lower() if isinstance(x, str) else x)
or instead of using a lambda function:
data['Plot'] = data['Plot'].str.lower() whereas panda´s str.lower skips values that are not strings!
It seems like your column Plot holds some NaN values (considered as float by pandas), hence the error. Try then to cast the column as str with pandas.Series.astype before calling pandas.Series.apply :
data['Plot'] = data['Plot'].astype(str).apply(lambda x: x.lower())
Or simply use pandas.Series.str.lower :
data['Plot'] = data['Plot'].astype(str).str.lower()
The same goes with re.sub, you could use pandas.Series.replace :
data['Plot'] = data['Plot'].astype(str).replace(r'[^a-zA-z0-9\s]', '', regex=True)
Related
Can someone help me with this? I'm new in Machine Learning, and I was trying to do a time series Machine Learning but when I try to train the data with model.fit() this happen
TypeError: Exception encountered when calling layer "lstm_6" (type LSTM)
Value passed to parameter 'a' has DataType string not in list of allowed values: bfloat16, float16, float32, float64, int32, int64, complex64, complex128
I'm doing this in Colab and here is my code
import numpy as np
import pandas as pd
from keras.layers import Dense, LSTM
import matplotlib.pyplot as plt
import tensorflow as tf
data = pd.read_csv('/content/daily-minimum-temperatures-in-me.csv')
data.head(20)
data.isnull().sum()
dates = data['Date'].values
temp = data['Daily minimum temperatures'].values
plt.figure(figsize=(15,5), dpi=100)
plt.plot(dates, temp)
plt.title('Temperature average',
fontsize=16);
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
series = tf.expand_dims(series, axis=-1)
ds = tf.data.Dataset.from_tensor_slices(series)
ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
ds = ds.flat_map(lambda w: w.batch(window_size + 1))
ds = ds.shuffle(shuffle_buffer)
ds = ds.map(lambda w: (w[:-1], w[-1:]))
return ds.batch(batch_size).prefetch(1)
train_set = windowed_dataset(temp, window_size=60, batch_size=100, shuffle_buffer=1000)
model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(60, return_sequences=True),
tf.keras.layers.LSTM(60),
tf.keras.layers.Dense(30, activation="relu"),
tf.keras.layers.Dense(10, activation="relu"),
tf.keras.layers.Dense(1),
])
optimizer = tf.keras.optimizers.SGD(lr=1.0000e-04, momentum=0.9)
model.compile(loss=tf.keras.losses.Huber(),
optimizer=optimizer,
metrics=["mae"])
history = model.fit(train_set,epochs=100)
If you need to know the dataset, here is the link
I appreciate the help to anyone answers. Thank you.
Whenever you are working try to use print statement very frequently.
Like print(type(temp)) gives your answer.
You are reading all your data in string format so your temp data is still in string format.
Use
data = pd.read_csv('/content/daily-minimum-temperatures-in-me.csv')
data = data._convert(numeric=True)
data.head(20)
This will convert all data in your code to numeric, hopefully it will work.
I'm trying to put a collection of images through a neural network, but I can't figure out how to get a large collection of images to go into a tensorflow model, as trying to convert the collection into a numpy array causes a memory error.
I should note that I am very new to tensorflow.
import numpy as np
from skimage.io import imread_collection
from tensorflow import keras
from tensorflow.keras import layers
def gen(arr):return(i.reshape(400*600*3) for i in arr) # Only used in Attempt2.
labelFile=open("lables_text_file.txt","r")
labels=labelFile.read()
labelFile.close()
labels=getTrain(labels)#Converts to a tuple containing the lables in order.
data = imread_collection("path_to_images/*.jpg", conserve_memory=True)
train=data[:-len(data)//4]
trainLabels=labels[:-len(data)//4]
test=data[-len(data)//4:]
testLabels=labels[-len(data)//4:]
#train = train.reshape(-1, 400*600*3) # Attempt1
#test = test.reshape(-1, 400*600*3) # Attempt1
#train = gen(train) # Attempt2
#test = gen(test) # Attempt2
trainLabels = keras.utils.to_categorical(trainLabels, 23)
testLabels = keras.utils.to_categorical(testLabels, 23)
model=keras.Sequential([keras.Input(shape=(400*600*3,)),
layers.Dense(600, name='hidden1', activation='relu'),
layers.Dense(400, name='hidden2', activation='relu'),
layers.Dense(46, name='hidden3', activation='relu'),
layers.Dense(23, activation="softmax")])
optimizer = keras.optimizers.Adam(learning_rate=0.0015)
model.compile(loss=keras.losses.CategoricalCrossentropy(), optimizer=optimizer, metrics=[keras.metrics.CategoricalAccuracy()])
model.fit(train,trainLabels,batch_size=128,epochs=8,validation_data=(test,testLabels), shuffle=True)
When I run the code as is, this is the result:
ValueError: Failed to find data adapter that can handle input: <class 'skimage.io.collection.ImageCollection'>, <class 'numpy.ndarray'>
When I try to use Attempt1, this is the result:
AttributeError: 'ImageCollection' object has no attribute 'reshape'
When I try to use Attempt2, this is the result:
ValueError: `y` argument is not supported when using python generator as input.
How can I put the data into `model.fit, such that it will successfully train the neural network?
I think I may have solved the problems.
Working code:
import numpy as np
from skimage.io import imread_collection
from tensorflow import keras
from tensorflow.keras import layers
def gen(arr,labels):return((arr[i].reshape(-1,400*600*3),labels[i].reshape(-1,23)) for i in range(len(arr)))
labelFile=open("lables_text_file.txt","r")
labels=labelFile.read()
labelFile.close()
labels=getTrain(labels)#Converts to a tuple containing the lables in order.
data = imread_collection("path_to_images/*.jpg", conserve_memory=True)
train=data[:-len(data)//4]
trainLabels=labels[:-len(data)//4]
test=data[-len(data)//4:]
testLabels=labels[-len(data)//4:]
#train = train.reshape(-1, 400*600*3) # Attempt1
#test = test.reshape(-1, 400*600*3) # Attempt1
trainLabels = keras.utils.to_categorical(trainLabels, 23)
testLabels = keras.utils.to_categorical(testLabels, 23)
train = gen(train,trainLabels) # Attempt2
test = gen(test,testLabels) # Attempt2
model=keras.Sequential([keras.Input(shape=(400*600*3,)),
layers.Dense(600, name='hidden1', activation='relu'),
layers.Dense(400, name='hidden2', activation='relu'),
layers.Dense(46, name='hidden3', activation='relu'),
layers.Dense(23, activation="softmax")])
optimizer = keras.optimizers.Adam(learning_rate=0.0015)
model.compile(loss=keras.losses.CategoricalCrossentropy(), optimizer=optimizer, metrics=[keras.metrics.CategoricalAccuracy()])
model.fit(train,None,batch_size=128,epochs=8,validation_data=(test,testLabels), shuffle=True)
The solution was to pass in a generator that returns two-tuples containing the input and label (instead of passing the labels in directly), but there were other problems that I may include in this answer if I get the time.
I am trying to run code below.Everything is going well until I have tried to fit training data and label.
I keep taking below error. I could not find why. Could you please help me?
UnimplementedError: Cast string to float is not supported [[node
metrics/accuracy/Cast (defined at :1)
]] [Op:__inference_distributed_function_53201]
Function call stack: distributed_function
import numpy as np
import pandas as pd
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, GRU, Embedding, CuDNNGRU, Activation
from tensorflow.python.keras.optimizers import Adam
from tensorflow.python.keras.preprocessing.text import Tokenizer
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
datas=pd.read_csv('data.csv', sep='delimiter', engine='python')
targets=pd.read_csv('label.csv', sep='delimiter', engine='python')
data=datas['XDESCRIPTION'].values.tolist()
target=targets['YMode'].values.tolist()
cutoff=int(len(data)*0.80)
x_train,x_test=data[:cutoff],data[cutoff:]
y_train,y_test=target[:cutoff],target[cutoff:]
tokenizer=Tokenizer()
tokenizer.fit_on_texts(data)
tokenizer.fit_on_texts(target)
x_train_tokens=tokenizer.texts_to_sequences(x_train)
num_tokens=[len(tokens) for tokens in x_train_tokens +x_test_tokens]
num_tokens=np.array(num_tokens)
np.mean(num_tokens)
max_tokens=np.mean(num_tokens)+2*np.std(num_tokens)
max_tokens=int(max_tokens)
max_tokens
np.sum(num_tokens<max_tokens)/len(num_tokens)
x_train_pad=pad_sequences(x_train_tokens, maxlen=max_tokens)
x_test_pad=pad_sequences(x_test_tokens, maxlen=max_tokens)
idx=tokenizer.word_index
inverse_map=dict(zip(idx.values(),idx.keys()))
def tokens_to_string(tokens):
words=[inverse_map[token] for token in tokens if token!=0]
text=" ".join(words)
return text
model=Sequential()
embedding_size=41
model.add(Embedding(input_dim=num_words,output_dim=embedding_size,input_length=max_tokens))
model.add(GRU(units=16,return_sequences=True))
model.add(GRU(units=8,return_sequences=True))
model.add(GRU(units=4))
model.add(Dense(1,activation="sigmoid"))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x=np.array(x_train_pad), y=np.array(y_train),epochs=2,batch_size=256)
Definitively your y_train and y_test arrays are string arrays. You can see that by these two lines
target=targets['YMode'].values.tolist()`
y_train,y_test=target[:cutoff],target[cutoff:]
If your data in the csv file is numeric, you can cast the target array to int like this
target = [int(lab) for lab in target]
However, if your data is categorical, you can solve that problem doing Label encoding to your data.
from sklearn.preprocessing import LabelEncoder
target=targets['YMode'].values.tolist()
label_encoder = LabelEncoder()
Y = np.array(label_encoder.fit_transform(target))
y_train,y_test=Y[:cutoff],Y[cutoff:]
#enumaris thank you for your answer. I'll try to explain my approach a bit:
I pushed the video frames through resnet model and got fature shapes of (k, 2048). I have the data into train/validation and test folders. Then I was writing this script:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Activation, Dropout, Dense
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import cv2
import os
dataTrain = []
labelsTrain = []
# Prepare the Training Data. The .txt files contain name of the name of the
#file and the label which is 0,1,or 2 based on which class the video belongs
#to (nameVideo.npy 0)
with open('D:...\Data\/train_files.txt') as f:
trainingList = f.readlines()
for line in trainingList:
npyFiles = line.split( )
loadTrainingData = np.load(npyFiles[0])
dataTrain.append(loadTrainingData)
labelsTrain.append(npyFiles[1])
dataNp = np.array(dataTrain, dtype=object)
labelsNp = np.array(labelsTrain, dtype=object)
f.close()
dataVal = []
labelsVal = []
# Prepare the Validation Data
with open('D:\...\Data\/val_files.txt') as f:
valList = f.readlines()
for line in valList:
npyValFiles = line.split( )
loadValData = np.load(npyValFiles[0])
dataVal.append(loadValData)
labelsVal.append(npyValFiles[1])
f.close()
print(len(dataVal))
model = Sequential()
model.add(LSTM(32,
batch_input_shape=(None, None, 1),
return_sequences=True))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(10, activation='softmax'))
model.compile(loss='mean_absolute_error',
optimizer='adam',
metrics=['accuracy'])
model.summary()
history = model.fit(dataTrain, labelsTrain,
epochs=10,
validation_data=(dataVal, labelsVal))
Which results in the following error:
ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 3521 arrays.
I am running a keras script (no direct call to theano in my script) and I get the following error:
TypeError: ('An update must have the same type as the original shared
variable (shared_var=<TensorType(float32, matrix)>,
shared_var.type=TensorType(float32, matrix),
update_val=Elemwise{add,no_inplace}.0,
update_val.type=TensorType(float64, matrix)).',
'If the difference is related to the broadcast pattern,
you can call the tensor.unbroadcast(var, axis_to_unbroadcast[, ...])
function to remove broadcastable dimensions.')
I have seen the error from folks running theano directly, but not through keras. Not sure what I should do, since I am not dealing with tensors directly.
the problem was that there is a change in keras version (I am currently using keras 0.3.2 with theano 0.8.0) and what used to be fine does not work well with he new keras version.
The following was the original code, and see the fix below.
from keras.models import Sequential
import keras.optimizers
from keras.layers.core import Dense, Dropout
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU
from keras.layers.core import Activation
from keras.optimizers import SGD, Adam
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, RegressorMixin
class NnRegression(BaseEstimator, RegressorMixin):
def __init__(self, apply_standart_scaling=True,
dropx=[0.2, 0.5, 0.5], nb_neuronx=[50, 30], nb_epoch=105, validation_split=0.,
verbose=1):
self.apply_standart_scaling = apply_standart_scaling
self.dropx = dropx
self.nb_neuronx = nb_neuronx
self.nb_epoch = nb_epoch
self.validation_split = validation_split
self.verbose = verbose
def fit(self, X, y):
nb_features = X.shape[1]
self.standart_scaling = StandardScaler() if self.apply_standart_scaling else None
if self.standart_scaling:
X = self.standart_scaling.fit_transform(X)
model = Sequential()
model.add(Dropout(input_shape = (nb_features,),p= self.dropx[0]))
model.add(Dense(output_dim = self.nb_neuronx[0], init='glorot_uniform'))
model.add(PReLU())
model.add(BatchNormalization(self.nb_neuronx[0],)))
model.add(Dropout(self.dropx[1]))
model.add(Dense(self.nb_neuronx[1], init='glorot_uniform'))
model.add(PReLU())
model.add(BatchNormalization(self.nb_neuronx[0],)))
model.add(Dropout(self.dropx[2]))
model.add(Dense(1, init='glorot_uniform'))
nn_verbose = 1 if self.verbose>0 else 0
optz = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
model.compile(optimizer=Adam(),loss='mse')
model.fit(X, y, batch_size=16,
nb_epoch=self.nb_epoch, validation_split=self.validation_split, verbose=nn_verbose)
self.model = model
def predict(self, X):
if self.standart_scaling:
X = self.standart_scaling.transform(X)
return self.model.predict_proba(X, verbose=0)
well, it turns out that the problem is this single line of code:
model.add(BatchNormalization(self.nb_neuronx[0],)))
It should actually be:
model.add(BatchNormalization())
because the number of neurons has no business within the normalization layer (however this did not bother in a previous keras version).
This apparently causes theano to generate new weights that are not float32 but float64, and that triggers the message above.