pipline implementation with PCA sklearn and TensorFlow

pipline implementation with PCA sklearn and TensorFlow - python

it gives another error.
The first argument to Layer.call must always be passed.
I cannot solve the problem. input_dim cannot be set as a constant. PCA and SelectKBest will cut down on the amount of input.
And if you can help with the output of the results from the pipeline, I will be very grateful
attach a link to the data: https://1drv.ms/u/s!AlHgQsqCKEIPiIxzdyWE0BfBHNocTQ?e=cxuSuo
def modelReg(inpt, opt = 'adam', kInitializer = 'glorot_uniform', dropout = 0.05):
model = Sequential()
model.add(Dense(1024, activation='relu', input_dim = inpt, kernel_initializer=kInitializer))
model.add(Dense(1024, activation='relu', kernel_initializer=kInitializer))
model.add(Dense(512, activation='relu', kernel_initializer=kInitializer))
model.add(layers.Dropout(dropout))
model.add(Dense(1, activation='sigmoid', kernel_initializer=kInitializer))
model.compile(loss='mse',optimizer=opt, metrics=["mse", "mae"])
return model
features = []
features.append(('pca', PCA(n_components=10)))
features.append(('select_best', SelectKBest(k=10)))
feature_union = FeatureUnion(features)
regressor = KerasRegressor(build_fn = modelReg(inpt), epochs = 3, batch_size = 500, verbose = 1)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('feature_union', feature_union))
estimators.append(('regressor' regressor))
model = Pipeline(estimators)
model.fit(allData.drop(['VancouverH'], axis = 1), allData['VancouverH'])

in KerasRegressor with a function to pass arguments to the model function, they are written to the KerasRegressor arguments.
kearsEstimator = ('kR', KerasRegressor(createModel, inpt = trainDataX.shape[1],
epochs = 5, batch_size = 180, verbose = 1))
like this, not like this:
kearsEstimator = ('kR', KerasRegressor(createModel(inpt),
epochs = 5, batch_size = 180, verbose = 1))
well, and transferred the pipeline to the Grid. And the names of the parameters for the grid are written with the prefix.
estimators = []
estimators.append((kearsEstimator))
param_grid = {
'kR__optimizer':['adam'] #'RMSprop', 'Adam', 'Adamax', 'sgd'
}
grid = GridSearchCV(Pipeline(estimators), param_grid, cv = 5)
grid.fit(trainDataX, trainDataY)

Related

How to extract weights of the best ANN resulting from GridSearchCV?

After running the hyperparameter tuning with GridSearchCV with the code as below:
## Tuning the ANN
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
def build_regressor(hidden_nodes, hidden_layers, optimizer):
regressor = Sequential()
regressor.add(Dense(units = hidden_nodes, kernel_initializer = 'uniform', activation = 'relu', input_dim = 7))
for layer_size in range(hidden_layers):
regressor.add(Dense(hidden_nodes, kernel_initializer = 'uniform', activation = 'relu'))
regressor.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'linear'))
regressor.compile(optimizer = optimizer, loss = 'mse', metrics = ['mse'])
return regressor
regressor = KerasRegressor(build_fn = build_regressor, epochs = 100)
# Create a dictionary of tuning parameters
parameters = {'hidden_nodes': list(range(2,101)), 'hidden_layers': [1,2,3], 'batch_size': [25,32], 'optimizer' : ['adam', 'nadam','RMSprop', 'adamax']}
grid_search = GridSearchCV(estimator = regressor, param_grid = parameters, scoring = 'neg_mean_squared_error', cv = 10, n_jobs = 4)
grid_search = grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_score = grid_search.best_score_
best_model = grid_search.best_estimator_
Do we have any way to extract the weights of the best model from GridSearchCV?
Thank you so much in advance,

As you want the model weights saved in a csv file you can do the following:
import numpy as np
weight = best_model.layers[0].get_weights()[0]
np.savetxt('weight.csv' , weight , fmt='%s', delimiter=',')

Grid Search fit not accepting list of tensors

I have a siamese network and I want to perform a grid seach on it using GridSearchCV.
So I create a model using the following function:
def createMod(learn_rate=0.01, optimizer='Adam'):
#K.clear_session()
# network definition
base_network = create_base_network(input_shape)
input_a = Input(shape=input_shape)
input_b = Input(shape=input_shape)
# because we re-use the same instance `base_network`,
# the weights of the network will be shared across the two branches
processed_a = base_network(input_a)
processed_b = base_network(input_b)
distance = Lambda(euclidean_distance,
output_shape=eucl_dist_output_shape)([processed_a, processed_b])
prediction = Dense(1,activation='sigmoid')(distance)
model = Model([input_a, input_b], prediction)
if(optimizer=='SGD'):
opt = SGD(lr=learn_rate)
elif (optimizer=='RMSprop'):
opt = RMSprop(lr=learn_rate)
else:
opt = Adam(lr=learn_rate)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[accuracy])
return model
And then I create the KerasClassifier and GridSearch as follows:
model = KerasClassifier(build_fn=createMod, verbose=0)
param_grid = dict(epochs=epochs, batch_size=batch_size, learn_rate=learn_rate,optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=5)
X = [tr_pairs[:, 0], tr_pairs[:, 1]]
grid_result = grid.fit(X, tr_y)
However this throws the following value error:
ValueError: Found input variables with inconsistent numbers of samples: [2, 1054]
The shape of tr_pairs[:, 0] and tr_pairs[:, 1] is (1054, 6) and tr_y is (1054,)
The code for the base networks is:
def create_base_network(input_shape):
K.clear_session()
encoder = build_encoder(latent_dim, n_in)
decoder = build_decoder(latent_dim, n_in)
item = Input(shape=(n_in, ))
encoded_repr = encoder(item)
reconstructed_item = decoder(encoded_repr)
autoencoder = Model(item, reconstructed_item)
return autoencoder
The code for the encoder and decoder are:
def build_encoder(latent_dim, input_dim):
input_layer = Input(shape=(input_dim, ))
h = Dense(32, activation='relu', activity_regularizer=regularizers.l1(10e-5))(input_layer)
h = Dropout(0.1)(h)
h = Dense(64, activation='relu')(h)
h = Dropout(0.1)(h)
# h = Dense(128, activation='relu')(h)
# h = Dropout(0.1)(h)
latent_repr = Dense(latent_dim, activation='relu')(h)
return Model(input_layer, latent_repr)
def build_decoder(latent_dim, input_dim):
model = Sequential()
# model.add(Dense(128, input_dim=latent_dim, activation='relu'))
# model.add(Dropout(0.1))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(input_dim, activation='relu'))
z = Input(shape=(latent_dim,))
item = model(z)
return Model(z, item)
The code works when I do the normal keras model's .fit function but doesn't work here.. is there a problem somewhere in my code or is it just not possible to feed multiple inputs in Grid Search and if that is the case is there a way I can still perform the grid search?

this is workaround to pass multiple input. I create a dummy model that receives a SINGLE input in the format (n_sample, 2, 6) and then split it into two parts using Lambda layer. you can modify this according to your siamese structure.
def createMod(optimizer='Adam'):
combi_input = Input((2,6)) # (n_sample, 2, 6)
input_a = Lambda(lambda x: x[:,0])(combi_input) # (n_sample, 6)
input_b = Lambda(lambda x: x[:,1])(combi_input) # (n_sample, 6)
c = Concatenate()([input_a,input_b])
x = Dense(32)(c)
prediction = Dense(1,activation='sigmoid')(x)
model = Model(combi_input, prediction)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics='accuracy')
return model
tr_pairs = np.random.uniform(0,1, (1054, 2, 6))
tr_y = np.random.randint(0,2, 1054)
model = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=createMod, verbose=0)
batch_size = [10, 20]
epochs = [10, 5]
optimizer = ['adam','SGD']
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(tr_pairs, tr_y)

How can ı get probability values for each class with predict method on an A.N.N model on Keras

I'm new on the deep learning subjects, i need help for getting individual probabilities for each class on a Keras artificial neural network(A.N.N.) model.I have an exoplanet catalog dataset from PHL and i'm trying to make predictions according to whether planet is habitable, maybe habitable or not habitable.For now i have tried A.N.N. with some important columns like
dataToLearn = data[["P_DISTANCE","S_HZ_OPT_MIN", "S_HZ_OPT_MAX", "S_HZ_CON_MIN", "S_HZ_CON_MAX", "P_TYPE", "P_ESI", "P_HABITABLE"]]
class_names = list(dataToLearn.columns)
ı got rid of some 'nan' values with,
dataToLearn = dataToLearn.dropna(how='all')
dataToLearn = dataToLearn.dropna(subset=['P_TYPE', 'P_ESI'])
then preprocessed the data,
labelencoder_pType = LabelEncoder()
dataToLearn["P_TYPE"] = labelencoder_pType.fit_transform(dataToLearn["P_TYPE"])
onehotencoder = ColumnTransformer([("P_TYPE", OneHotEncoder(),[5])], remainder = "passthrough")
dataToLearn = onehotencoder.fit_transform(dataToLearn)
#Dummy Variable Trap
dataToLearn = dataToLearn[:,1:]
dataToLearn = pd.DataFrame(dataToLearn)
X = dataToLearn.iloc[:,:10].values
Y = dataToLearn.iloc[:,10].values
Y = pd.get_dummies(Y).values
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size = 0.35)
y_test = y_test.astype(np.float64)
y_train = y_train.astype(np.float64)
sc_X = ColumnTransformer([("",StandardScaler(),slice(0,10))])
x_train = sc_X.fit_transform(x_train)
x_test = sc_X.transform(x_test)
as you can see ı have hot-encoded the output(Y) values but i'm not sure do i need to do that in multiclass problems.On next step i built the classifier like below.
def build_classifier():
classifier = Sequential() # initialize neural network
classifier.add(Dense(units = 10, kernel_initializer = 'uniform', activation = 'relu', input_dim = x_train.shape[1]))
classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dropout(0.3))
classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dropout(0.3))
classifier.add(Dense(units = 3, kernel_initializer = 'uniform', activation = 'softmax'))
classifier.compile(optimizer = 'RMSprop', loss = 'categorical_crossentropy', metrics = ['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = build_classifier, batch_size = 32, epochs = 150)
accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 10, n_jobs = -1)
accuracyMean = accuracies.mean()
classifier.fit(x_train, y_train)
Then predicted the x_test with
y_pred = classifier.predict(x_test)
The problem is i cannot get predicted array(y_pred) with the same dimension as y_test which is one-hot encoded according to three possibilities.In y_pred i'm always getting 0(not habitable) or 2(may habitable) results and never 1(habitable) but in one column shape, i think the reason of the model's failure on predicting the 1(habitable) case comes from the rarity of this case in dataset.But i still don't know why y_pred is in one column shape and i can't find a good explanation on how to do multi-class classification on with keras A.N.N on the internet.

Try adding class_weight, assign high weight to class 1
class_weight = {0: 1.,
1: 50.,
2: 2.}
classifier.fit(x_train, y_train, clf__class_weight = class_weight)
To get the class probability with Keras Sequential model
y_pred = classifier.predict_proba(x_test)

Parallel computing in Spyder

I am not able to get my code to run when using n_jobs = -1(on the last line).
I get the same message :
"BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable."
The code works with n_jobs = 1, but I need all processors as the code will take very long to execute.
I have tried using if __name__ == '__main__': , but I am not sure how to use it and cannot get the code to run.
I have tried for ages but to no avail. Any help is highly appreciated. Here is the relevant code:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense
def build_classifier():
classifier = Sequential()
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = build_classifier,batch_size = 10, epochs = 100)
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)

Assertion Error when compiling LSTM with Keras

I have the following code:
max_features = 5000
maxlen = 140
model = Sequential()
model.add(Embedding(max_features, 128))
model.add(LSTM(128, activation = 'sigmoid', inner_activation = 'hard_sigmoid', return_sequences = False))
model.add(Dense(input_dim = 128, output_dim = 2, activation = 'softmax'))
optimizer = Adam(lr = 0.001, beta_1 = 0.9, beta_2 = 0.999, epsilon = 1e-8)
model.compile(loss = 'categorical_crossentropy', optimizer = optimizer)
model.fit(x_train, y_train, batch_size = 64, nb_epoch = 10, verbose = 2)
y_test_pred = model.predict_classes(x_test)
But everytime I run it, I get an error at the line
model.compile(loss = 'categorical_crossentropy', optimizer = optimizer)
which states:
AssertionError: The number of inputs given to the inner function of scan does not match the number of inputs given to scan.
Does anyone know what that means?

Answer by OP:
I have fixed this problem, turns out it has something to do with an outdated Theano version. So if you are experiencing this problem, update your theano module!

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

pipline implementation with PCA sklearn and TensorFlow - python

Related

How to extract weights of the best ANN resulting from GridSearchCV?

Grid Search fit not accepting list of tensors

How can ı get probability values for each class with predict method on an A.N.N model on Keras

Parallel computing in Spyder

Assertion Error when compiling LSTM with Keras

Categories

Resources