pandas dataframe to tensorflow input - python

i want to use a pandas dataset as an input to a neural net.
my neural net model is:
def build_model():
model = Sequential()
model.add(Dense(128, activation = "relu"))
model.add(Dropout(0.2))
model.add(Dense(64, activation = "relu"))
model.add(Dropout(0.1))
model.add(Dense(32, activation = "softmax"))
model.compile(
optimizer='adam',
loss=['binary_crossentropy'],
metrics=['accuracy']
)
return model
tensorboard = TensorBoard(log_dir=f"logs/{time.time()}", histogram_freq=1)
model = build_model()
history = model.fit(
x_train,
y_train,
epochs=5,
batch_size=32,
validation_data=(
x_val,
y_val
),
callbacks=[
tensorboard
]
)
and i pass my dataframe as input as such:
y_val, x_val, y_train, x_train = test_data.drop(['gender',
'comorbidities_count', 'comorbidities_significant_count',
'medication_count'],axis=1),test_data.drop(['fried'],axis=1),training_data.drop([ 'gender', 'comorbidities_count', 'comorbidities_significant_count',
'medication_count'],axis=1),training_data.drop(['fried'],axis=1)
but i get this error:
ValueError: Please provide as model inputs either a single array or a list of arrays.
Does anyone know hot to turn this dataframe into an array so i can feed it? Or is there some other issue i am not in knowledge of?

Use
y_val, x_val, y_train, x_train = test_data.drop(['gender',
'comorbidities_count', 'comorbidities_significant_count',
'medication_count'],axis=1).to_numpy().astype(np.float32) ,test_data.drop(['fried'],axis=1).to_numpy().astype(np.float32) ,training_data.drop([ 'gender', 'comorbidities_count', 'comorbidities_significant_count',
'medication_count'],axis=1).to_numpy().astype(np.float32) ,training_data.drop(['fried'],axis=1).to_numpy().astype(np.float32)
The .to_numpy() function of a pd dataframe turns it into a numpy array.

Related

Unable to pass appropriate shape to tensor flow model with tf.data.Dataset.from_generator

When using tf.data.Dataset.from_generator API to generate train and test datasets. Not able to pass the appropriate shape to the Tensorflow model.
Following is my code
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
import pandas as pd
def fetchValuesFromDatabase(path):
df = pd.read_csv(path, header=None)
rows_from_csv = df.values[:]
rows_list = rows_from_csv.tolist()
rows_list = rows_list[1:]
def castFunction(val):
try:
return int(val)
except:
return int(float(val))
result_column = [list(map(lambda x: castFunction(x), value[-1])) for value in rows_list]
train_columns = [list(map(lambda x: castFunction(x), value[3:-1])) for value in rows_list]
print(train_columns)
X_train, X_test, y_train, y_test = train_test_split(train_columns, result_column, test_size=0.20, shuffle=True)
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(32)
return train_dataset, test_dataset
def createModel():
model = Sequential()
model.add(Dense(10, input_shape=(10,), activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = createModel()
train_dataset, test_dataset = fetchValuesFromDatabase("ModifiedHrTrainData.csv")
model.fit(train_dataset, epochs=10, validation_data=test_dataset)
Getting following error.
ValueError: Input 0 of layer sequential is incompatible with the layer: expected axis -1 of input shape to have value 10 but received input with shape [10, 1]
The same error doesn't occur if tf.data.Dataset.from_tensor_slices is used and passed to the model. Need help in achieving the same with tf.data.Dataset.from_generator API.
Following is the dataset link
https://mega.nz/file/DZkVWSTT#MhjiuFcDMbe80gZ34AkMCjWD3h3y87ytpn9q4AT1bu4
Please help me understand the issue.
Add keras_input function to your model tf.keras.Input(shape=(10,))`
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(10,)))
model.add(Dense(10, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model

text binary classification error:logits and labels must have the same shape

I'm trying to bulid a model to classify my text to hate (1) or not (0) using nn.
Information about the data, it's consists of tweets and class label (hate (1) or not (0)):
sentences = df['comment']
y = df['isHate']
sentences_train, sentences_test, train_y, test_y = train_test_split(sentences, y, test_size=0.25, random_state=42)
the text get through a lot of Word Embeddings and I applied pad sequences on the tweets and LabelEncoder on the labels.
the problem is when I do the run I get this error:
ValueError: logits and labels must have the same shape ((None, 1) vs (None, 2))
the code of the model:
emb_dim = 16
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim= emb_dim, input_length=maxlen))
model.add(Flatten())
model.add(Dense(2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
the problem happened in this part:
history = model.fit(X_train, y_train,
batch_size=32,
epochs=15,
validation_data=(X_test, y_test))
Any help?
In your code:
model.add(Dense(1, activation='sigmoid'))
Your last dense layer has only 1 unit but your labels are one hot encoded which consist of 2 classes. So you need to change:
model.add(Dense(2, activation='softmax'))
You also need to change your loss function, because they are one-hot-encoded:
loss='categorical_crossentropy'

How can I extract Flatten Layer Output for each epoch?

model = Sequential()
model.add(Conv2D(50, (5,5), activation='relu', input_shape =(5,5,1), kernel_initializer='he_normal'))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.summary()
# compile the model
model.compile(loss='binary_crossentropy', optimizer= 'adam', metrics=['accuracy'])
model_checkpoint=ModelCheckpoint(r'C:\Users\globo\Desktop\Test_CNN\Results\Kernel5x5\Weights'+'\\'+test+'\model_test{epoch:02d}.h5',save_freq=1,save_weights_only=True)
# fit the model
history = model.fit(X_train, Y_train, epochs=10, batch_size=32, verbose=1, callbacks=[model_checkpoint], shuffle=True, validation_split=0.5)
I'm already extracting weights for each epoch with "ModelCheckpoint", but how can I extract flatten layer output for each epoch and save them?
doing this with sequential models is not feasible at all.
you should use functional API
inp = Input((5,5,1))
x = Conv2D(50, (5,5), activation='relu', kernel_initializer='he_normal')(inp)
xflatten = Flatten()(x)
out = Dense(1, activation='sigmoid')(xflatten)
main_model = Model(inp, out) # this works same as your model
flatten_model = Model(inp, xflatten) # and this only outputs the flatten layer and is not necessary to compile it because we won't train it, it just shows the output of a layer
main_model.compile(loss='binary_crossentropy', optimizer= 'adam', metrics=['accuracy'])
history = main_model.fit(X_train, Y_train, epochs=10, batch_size=32, verbose=1, callbacks=[model_checkpoint], shuffle=True, validation_split=0.5)
to see the flatten layers's output:
flatten_model.predict(X)

Why am I getting "Supported target types are: ('binary', 'multiclass'). Got 'continuous' instead." error?

I am writing this code and keep getting the Supported target types are: ('binary', 'multiclass'). Got 'continuous' instead. error no matter what I try. Do you see the problem within my code?
df = pd.read_csv('drain.csv')
values = df.values
seed = 7
numpy.random.seed(seed)
X = df.iloc[:,:2]
Y = df.iloc[:,2:]
def create_model():
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=10, verbose=0)
# evaluate using 10-fold cross validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(model, X, Y, cv=kfold)
print(results.mean())
You need to convert your Y variables to binary, as specified here :
https://github.com/keras-team/keras/blob/master/examples/mnist_mlp.py
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
and then
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
Seems like you forgot the conversion to categorical step.

Shape mismatch in LSTM in keras

I am trying to run a LSTM using Keras on my custom features set. I have train and test features in separate files. Each csv file contains 11 columns with last column as class label. There are total 40 classes in my dataset. The problem is I am not able to figure out the correct input_shape to the first layer. I had explored all the stackoverflow and github but still not able to solve this
Below is my complete code.
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
numpy.random.seed(7)
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=X_train.shape))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)
Whatever I change in input_shape parameter wither I get error in first LSTM layer of in fit method.
you don't have a time dimension in your input.
Input for RNN should be (batch_size, time_step, features) while your input has dimension (batch_size, features).
If you want to use your 10 columns one at a time you should reshape the array with
numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
Try this code:
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
train_dataset = numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
test_dataset = numpy.reshape(test_dataset, (-1, train_dataset.shape[1], 1))
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)

Categories

Resources