I am trying to use voting model in the ensembling methods, how can i add a cross validation to it?
Thanks
model_1 = SGD_cls=SGDClassifier(random_state=0)
model_2 = DecisionTreeClassifier(criterion='entropy', max_depth=12,max_leaf_nodes=35,splitter='best')
model_3 = GradientBoostingClassifier(n_estimators=100, random_state=42)
model_4 = MLPClassifier(random_state=42)
# model_3 = KNeighborsClassifier(n_neighbors=2)
X = df1.drop('product', axis = 1)
y = df1['product']
X_new = res_fit.transform(X)
#X_new =pd.DataFrame(X_new,columns = X.iloc[:,res_fit.support_].columns)
y_pred=cross_val_predict(model,X_new,y,cv=10)
X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.25, random_state=42)
model_5 = VotingClassifier([('ADA', model_1),
('Tree', model_2),
('GradBoost',model_3),
('MLP',model_4)],
voting='hard')
for model in (model_1, model_2, model_3,model_4,model_5):
model.fit(X_train, y_train)
print(model.__class__.__name__, model.score(X_test, y_test))
Related
So I wanted to predict the future price of bitcoin using LSTM but as I am new to this domain I'm stuck at what to do in order to print just the price of future lets say I want to predict the price for 3 months ahead from now what should i do in order to just get the closing price for BTC after 3 months from now?
It will be really appreciated if you guys can help me with this problem.
train_data, test_data = train_test_split(df, test_size=test_size)
X_train = extract_window_data(train_data, window_len, zero_base)
X_test = extract_window_data(test_data, window_len, zero_base)
y_train = train_data[target_col][window_len:].values
y_test = test_data[target_col][window_len:].values
if zero_base:
y_train = y_train / train_data[target_col][:-window_len].values - 1
y_test = y_test / test_data[target_col][:-window_len].values - 1
return train_data, test_data, X_train, X_test, y_train, y_test
def build_lstm_model(input_data, output_size, neurons=100, activ_func='linear',
dropout=0.2, loss='mse', optimizer='adam'):
model = Sequential()
model.add(LSTM(neurons, input_shape=(input_data.shape[1], input_data.shape[2])))
model.add(Dropout(dropout))
model.add(Dense(units=output_size))
model.add(Activation(activ_func))
model.compile(loss=loss, optimizer=optimizer)
return model
np.random.seed(42)
window_len = 5
test_size = 0.2
zero_base = True
lstm_neurons = 100
epochs = 200
batch_size = 32
loss = 'mse'
dropout = 0.2
optimizer = 'adam'
train, test, X_train, X_test, y_train, y_test = prepare_data(
hist, target_col, window_len=window_len, zero_base=zero_base, test_size=test_size)
print(X_test)
model = build_lstm_model(
X_train, output_size=1, neurons=lstm_neurons, dropout=dropout, loss=loss,
optimizer=optimizer)
history = model.fit(
X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=batch_size, verbose=1, shuffle=True)
preds = test[target_col].values[:-window_len] * (preds + 1)
preds = pd.Series(index=targets.index, data=preds)
print(test)
line_plot(targets, preds, 'actual', 'prediction', lw=3)
I am trying to plot a confusion matrix and heatmap that contains the percentages and numbered values,
I use the code in this link https://gist.github.com/mesquita/f6beffcc2579c6f3a97c9d93e278a9f1
The Error message :
cm = confusion_matrix(y_true, y_pred, labels=labels)
File "C:\Users\XX\anaconda3\envs\yamnet\lib\site-
packages\sklearn\metrics\_classification.py", line 316, in confusion_matrix
raise ValueError("At least one label specified must be in y_true")
ValueError: At least one label specified must be in y_true
This is my code:
encoder = LabelBinarizer()
print("encoder", encoder)
labels = encoder.fit_transform(y)
print("label ", labels)
print("y", y)
# Save the names of the classes for future using.
np.save(fname, encoder.classes_)
num_classes = len(np.unique(y))
# Generate the model
general_model = generate_model(num_classes, num_hidden=num_hidden,
activation=activation)
general_model.compile(optimizer=optimizer, loss='categorical_crossentropy',
metrics=['accuracy'])
# Create some callbacks
callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath=fname, monitor='val_loss',
save_best_only=True),
tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.9,
patience=15, verbose=1,
min_lr=0.000001)]
X, labels = shuffle(X, labels)
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.20)
history = general_model.fit(X_train, y_train, epochs=epochs, validation_split=0.20,
batch_size=batch_size, callbacks=callbacks, verbose=1)
score = general_model.evaluate(X_test, y_test, verbose=0)
print(f'Test loss: {score[0]} / Test accuracy: {score[1]}')
y_pred = general_model.predict(X_test)
y_pred = (y_pred.argmax(axis=1))
y_test = (y_test.argmax(axis=1))
cm_analysis(y_test, y_pred, "ConfusionMatrix", y, X, ymap=None, figsize=(17, 17))
I am using a neural network. When I try to compare the predictions with the real values from the test set, I cannot do it because it does not let me create a dataframe with the predictions. So basicly I am not able to get test_predictions.shape = (10092,) instead o (10092,1). This "1" is causing me all the troubles. Can somebody help?
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7, random_state=101)
model = keras.Sequential()
model.add(Dense(500,activation='relu'))
model.add(Dense(500,activation='relu'))
model.add(Dense(500,activation='relu'))
model.add(Dense(1))
model.compile(optimizer='rmsprop',loss = 'mse')
model.fit(X_train, y_train, epochs=100, batch_size=25, verbose=1, validation_split=0.2)
test_predictions = model.predict(X_test)
y_test = pd.Series(y_test)
test_predictions = pd.Series(test_predictions)
test_predictions = np.squeeze(test_predictions) should do the trick, it collapses all dimensions that have just one Element
I want to apply the SVM classifier to my problem where the prediction vector has two classes. SVM shows an error as "bad input" when I try to input such a prediction vector. If it's possible to provide such input to SVM? If not, how to cope with this issue?
Y = np.zeros((len(y), max(y)+1))
for i in range(len(y)):
Y[i, y[i]] = 1
from sklearn.model_selection import KFold
kf = KFold(n_splits=3)
kf.get_n_splits(X)
print(kf)
KFold(n_splits=3, random_state=None, shuffle=False)
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = Y[train_index], Y[test_index]
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
Matrix Y appears as below
enter image description here
I'm trying to print the accuracy score for an XGBoost multilabel classifier. However, I'm stuck on this error:
ValueError: Classification metrics can't handle a mix of
multilabel-indicator and binary targets
I think y_test needs to not be one-hot encoded when passed to accuracy_score()? But everything I've tried creates more errors. Any idea how I get this to work?
Code:
X = X.reshape(X.shape[0], -1)
print(X.shape)
# Split the dataset
x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state=42, stratify=y)
dtrain = xgb.DMatrix(data=x_train, label=y_train)
dtest = xgb.DMatrix(data=x_test, label=y_test)
eval_list = [(dtest, 'eval')]
# Train the model
params = {
'max_depth': 3,
'objective': 'multi:softmax',
'num_class': 3,
'tree_method':'gpu_hist'
}
# Train the model
model = xgb.train(params, dtrain, evals=eval_list, early_stopping_rounds=20, verbose_eval=True)
# Evaluate predictions
y_pred = model.predict(dtest)
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
Adding argmax to y_test seemed to work:
accuracy = accuracy_score(y_test.argmax(axis=1), predictions)