pipline implementation with PCA sklearn and TensorFlow - python

it gives another error.
The first argument to Layer.call must always be passed.
I cannot solve the problem. input_dim cannot be set as a constant. PCA and SelectKBest will cut down on the amount of input.
And if you can help with the output of the results from the pipeline, I will be very grateful
attach a link to the data: https://1drv.ms/u/s!AlHgQsqCKEIPiIxzdyWE0BfBHNocTQ?e=cxuSuo
def modelReg(inpt, opt = 'adam', kInitializer = 'glorot_uniform', dropout = 0.05):
model = Sequential()
model.add(Dense(1024, activation='relu', input_dim = inpt, kernel_initializer=kInitializer))
model.add(Dense(1024, activation='relu', kernel_initializer=kInitializer))
model.add(Dense(512, activation='relu', kernel_initializer=kInitializer))
model.add(layers.Dropout(dropout))
model.add(Dense(1, activation='sigmoid', kernel_initializer=kInitializer))
model.compile(loss='mse',optimizer=opt, metrics=["mse", "mae"])
return model
features = []
features.append(('pca', PCA(n_components=10)))
features.append(('select_best', SelectKBest(k=10)))
feature_union = FeatureUnion(features)
regressor = KerasRegressor(build_fn = modelReg(inpt), epochs = 3, batch_size = 500, verbose = 1)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('feature_union', feature_union))
estimators.append(('regressor' regressor))
model = Pipeline(estimators)
model.fit(allData.drop(['VancouverH'], axis = 1), allData['VancouverH'])

in KerasRegressor with a function to pass arguments to the model function, they are written to the KerasRegressor arguments.
kearsEstimator = ('kR', KerasRegressor(createModel, inpt = trainDataX.shape[1],
epochs = 5, batch_size = 180, verbose = 1))
like this, not like this:
kearsEstimator = ('kR', KerasRegressor(createModel(inpt),
epochs = 5, batch_size = 180, verbose = 1))
well, and transferred the pipeline to the Grid. And the names of the parameters for the grid are written with the prefix.
estimators = []
estimators.append((kearsEstimator))
param_grid = {
'kR__optimizer':['adam'] #'RMSprop', 'Adam', 'Adamax', 'sgd'
}
grid = GridSearchCV(Pipeline(estimators), param_grid, cv = 5)
grid.fit(trainDataX, trainDataY)

Related

How to extract weights of the best ANN resulting from GridSearchCV?

After running the hyperparameter tuning with GridSearchCV with the code as below:
## Tuning the ANN
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
def build_regressor(hidden_nodes, hidden_layers, optimizer):
regressor = Sequential()
regressor.add(Dense(units = hidden_nodes, kernel_initializer = 'uniform', activation = 'relu', input_dim = 7))
for layer_size in range(hidden_layers):
regressor.add(Dense(hidden_nodes, kernel_initializer = 'uniform', activation = 'relu'))
regressor.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'linear'))
regressor.compile(optimizer = optimizer, loss = 'mse', metrics = ['mse'])
return regressor
regressor = KerasRegressor(build_fn = build_regressor, epochs = 100)
# Create a dictionary of tuning parameters
parameters = {'hidden_nodes': list(range(2,101)), 'hidden_layers': [1,2,3], 'batch_size': [25,32], 'optimizer' : ['adam', 'nadam','RMSprop', 'adamax']}
grid_search = GridSearchCV(estimator = regressor, param_grid = parameters, scoring = 'neg_mean_squared_error', cv = 10, n_jobs = 4)
grid_search = grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_score = grid_search.best_score_
best_model = grid_search.best_estimator_
Do we have any way to extract the weights of the best model from GridSearchCV?
Thank you so much in advance,
As you want the model weights saved in a csv file you can do the following:
import numpy as np
weight = best_model.layers[0].get_weights()[0]
np.savetxt('weight.csv' , weight , fmt='%s', delimiter=',')

Grid Search fit not accepting list of tensors

I have a siamese network and I want to perform a grid seach on it using GridSearchCV.
So I create a model using the following function:
def createMod(learn_rate=0.01, optimizer='Adam'):
#K.clear_session()
# network definition
base_network = create_base_network(input_shape)
input_a = Input(shape=input_shape)
input_b = Input(shape=input_shape)
# because we re-use the same instance `base_network`,
# the weights of the network will be shared across the two branches
processed_a = base_network(input_a)
processed_b = base_network(input_b)
distance = Lambda(euclidean_distance,
output_shape=eucl_dist_output_shape)([processed_a, processed_b])
prediction = Dense(1,activation='sigmoid')(distance)
model = Model([input_a, input_b], prediction)
if(optimizer=='SGD'):
opt = SGD(lr=learn_rate)
elif (optimizer=='RMSprop'):
opt = RMSprop(lr=learn_rate)
else:
opt = Adam(lr=learn_rate)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[accuracy])
return model
And then I create the KerasClassifier and GridSearch as follows:
model = KerasClassifier(build_fn=createMod, verbose=0)
param_grid = dict(epochs=epochs, batch_size=batch_size, learn_rate=learn_rate,optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=5)
X = [tr_pairs[:, 0], tr_pairs[:, 1]]
grid_result = grid.fit(X, tr_y)
However this throws the following value error:
ValueError: Found input variables with inconsistent numbers of samples: [2, 1054]
The shape of tr_pairs[:, 0] and tr_pairs[:, 1] is (1054, 6) and tr_y is (1054,)
The code for the base networks is:
def create_base_network(input_shape):
K.clear_session()
encoder = build_encoder(latent_dim, n_in)
decoder = build_decoder(latent_dim, n_in)
item = Input(shape=(n_in, ))
encoded_repr = encoder(item)
reconstructed_item = decoder(encoded_repr)
autoencoder = Model(item, reconstructed_item)
return autoencoder
The code for the encoder and decoder are:
def build_encoder(latent_dim, input_dim):
input_layer = Input(shape=(input_dim, ))
h = Dense(32, activation='relu', activity_regularizer=regularizers.l1(10e-5))(input_layer)
h = Dropout(0.1)(h)
h = Dense(64, activation='relu')(h)
h = Dropout(0.1)(h)
# h = Dense(128, activation='relu')(h)
# h = Dropout(0.1)(h)
latent_repr = Dense(latent_dim, activation='relu')(h)
return Model(input_layer, latent_repr)
def build_decoder(latent_dim, input_dim):
model = Sequential()
# model.add(Dense(128, input_dim=latent_dim, activation='relu'))
# model.add(Dropout(0.1))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(input_dim, activation='relu'))
z = Input(shape=(latent_dim,))
item = model(z)
return Model(z, item)
The code works when I do the normal keras model's .fit function but doesn't work here.. is there a problem somewhere in my code or is it just not possible to feed multiple inputs in Grid Search and if that is the case is there a way I can still perform the grid search?
this is workaround to pass multiple input. I create a dummy model that receives a SINGLE input in the format (n_sample, 2, 6) and then split it into two parts using Lambda layer. you can modify this according to your siamese structure.
def createMod(optimizer='Adam'):
combi_input = Input((2,6)) # (n_sample, 2, 6)
input_a = Lambda(lambda x: x[:,0])(combi_input) # (n_sample, 6)
input_b = Lambda(lambda x: x[:,1])(combi_input) # (n_sample, 6)
c = Concatenate()([input_a,input_b])
x = Dense(32)(c)
prediction = Dense(1,activation='sigmoid')(x)
model = Model(combi_input, prediction)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics='accuracy')
return model
tr_pairs = np.random.uniform(0,1, (1054, 2, 6))
tr_y = np.random.randint(0,2, 1054)
model = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=createMod, verbose=0)
batch_size = [10, 20]
epochs = [10, 5]
optimizer = ['adam','SGD']
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(tr_pairs, tr_y)

How can ı get probability values for each class with predict method on an A.N.N model on Keras

I'm new on the deep learning subjects, i need help for getting individual probabilities for each class on a Keras artificial neural network(A.N.N.) model.I have an exoplanet catalog dataset from PHL and i'm trying to make predictions according to whether planet is habitable, maybe habitable or not habitable.For now i have tried A.N.N. with some important columns like
dataToLearn = data[["P_DISTANCE","S_HZ_OPT_MIN", "S_HZ_OPT_MAX", "S_HZ_CON_MIN", "S_HZ_CON_MAX", "P_TYPE", "P_ESI", "P_HABITABLE"]]
class_names = list(dataToLearn.columns)
ı got rid of some 'nan' values with,
dataToLearn = dataToLearn.dropna(how='all')
dataToLearn = dataToLearn.dropna(subset=['P_TYPE', 'P_ESI'])
then preprocessed the data,
labelencoder_pType = LabelEncoder()
dataToLearn["P_TYPE"] = labelencoder_pType.fit_transform(dataToLearn["P_TYPE"])
onehotencoder = ColumnTransformer([("P_TYPE", OneHotEncoder(),[5])], remainder = "passthrough")
dataToLearn = onehotencoder.fit_transform(dataToLearn)
#Dummy Variable Trap
dataToLearn = dataToLearn[:,1:]
dataToLearn = pd.DataFrame(dataToLearn)
X = dataToLearn.iloc[:,:10].values
Y = dataToLearn.iloc[:,10].values
Y = pd.get_dummies(Y).values
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size = 0.35)
y_test = y_test.astype(np.float64)
y_train = y_train.astype(np.float64)
sc_X = ColumnTransformer([("",StandardScaler(),slice(0,10))])
x_train = sc_X.fit_transform(x_train)
x_test = sc_X.transform(x_test)
as you can see ı have hot-encoded the output(Y) values but i'm not sure do i need to do that in multiclass problems.On next step i built the classifier like below.
def build_classifier():
classifier = Sequential() # initialize neural network
classifier.add(Dense(units = 10, kernel_initializer = 'uniform', activation = 'relu', input_dim = x_train.shape[1]))
classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dropout(0.3))
classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dropout(0.3))
classifier.add(Dense(units = 3, kernel_initializer = 'uniform', activation = 'softmax'))
classifier.compile(optimizer = 'RMSprop', loss = 'categorical_crossentropy', metrics = ['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = build_classifier, batch_size = 32, epochs = 150)
accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 10, n_jobs = -1)
accuracyMean = accuracies.mean()
classifier.fit(x_train, y_train)
Then predicted the x_test with
y_pred = classifier.predict(x_test)
The problem is i cannot get predicted array(y_pred) with the same dimension as y_test which is one-hot encoded according to three possibilities.In y_pred i'm always getting 0(not habitable) or 2(may habitable) results and never 1(habitable) but in one column shape, i think the reason of the model's failure on predicting the 1(habitable) case comes from the rarity of this case in dataset.But i still don't know why y_pred is in one column shape and i can't find a good explanation on how to do multi-class classification on with keras A.N.N on the internet.
Try adding class_weight, assign high weight to class 1
class_weight = {0: 1.,
1: 50.,
2: 2.}
classifier.fit(x_train, y_train, clf__class_weight = class_weight)
To get the class probability with Keras Sequential model
y_pred = classifier.predict_proba(x_test)

Parallel computing in Spyder

I am not able to get my code to run when using n_jobs = -1(on the last line).
I get the same message :
"BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable."
The code works with n_jobs = 1, but I need all processors as the code will take very long to execute.
I have tried using if __name__ == '__main__': , but I am not sure how to use it and cannot get the code to run.
I have tried for ages but to no avail. Any help is highly appreciated. Here is the relevant code:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense
def build_classifier():
classifier = Sequential()
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = build_classifier,batch_size = 10, epochs = 100)
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)

Assertion Error when compiling LSTM with Keras

I have the following code:
max_features = 5000
maxlen = 140
model = Sequential()
model.add(Embedding(max_features, 128))
model.add(LSTM(128, activation = 'sigmoid', inner_activation = 'hard_sigmoid', return_sequences = False))
model.add(Dense(input_dim = 128, output_dim = 2, activation = 'softmax'))
optimizer = Adam(lr = 0.001, beta_1 = 0.9, beta_2 = 0.999, epsilon = 1e-8)
model.compile(loss = 'categorical_crossentropy', optimizer = optimizer)
model.fit(x_train, y_train, batch_size = 64, nb_epoch = 10, verbose = 2)
y_test_pred = model.predict_classes(x_test)
But everytime I run it, I get an error at the line
model.compile(loss = 'categorical_crossentropy', optimizer = optimizer)
which states:
AssertionError: The number of inputs given to the inner function of scan does not match the number of inputs given to scan.
Does anyone know what that means?
Answer by OP:
I have fixed this problem, turns out it has something to do with an outdated Theano version. So if you are experiencing this problem, update your theano module!

Categories

Resources