I basically want to add a colorbar at each of the subplots in the code below (link to code ). My attempts add all color bars at the end of the loop in the last subplot.
print(__doc__)
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.neural_network import MLPClassifier
mnist = fetch_mldata("MNIST original")
# rescale the data, use the traditional train/test split
X, y = mnist.data / 255., mnist.target
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]
# mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
# solver='sgd', verbose=10, tol=1e-4, random_state=1)
mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4,
solver='sgd', verbose=10, tol=1e-4, random_state=1,
learning_rate_init=.1)
mlp.fit(X_train, y_train)
print("Training set score: %f" % mlp.score(X_train, y_train))
print("Test set score: %f" % mlp.score(X_test, y_test))
fig, axes = plt.subplots(4, 4)
# use global min / max to ensure all weights are shown on the same scale
vmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max()
for coef, ax in zip(mlp.coefs_[0].T, axes.ravel()):
ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=.5 * vmin,
vmax=.5 * vmax)
ax.set_xticks(())
ax.set_yticks(())
plt.show()
UPDATE:
based on the link in the comment below, here is the code which adds the colorbar at the right of the diagram
fig, axes = plt.subplots(4, 4)
# use global min / max to ensure all weights are shown on the same scale
vmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max()
for coef, ax in zip(mlp.coefs_[0].T, axes.ravel()):
im = ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=.5 * vmin, vmax=.5 * vmax)
ax.set_xticks(())
ax.set_yticks(())
fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
fig.colorbar(im, cax=cbar_ax)
plt.show()
Related
Based on the method plot_series in this notebook.
I would like to plot a timeseries in 3d where my points consist of x,y coordinates and time.
My problem is found when I go to plot the target point by raising the exception
18 if y_true is not None:
---> 19 ax.plot3D(n_steps+1, x_true, y_true, "bo", markersize=10, label="Target")
TypeError: object of type 'int' has no len()
my code is this, I have a 9 step timeseries and I would like to print the target point on the 10th step as well. How to do this?
from matplotlib.pyplot import figure
def plot_series(x_train, y_train, n_steps=10, x_true=None, y_true=None, x_pred=None, y_pred=None, x_label="$time$", y_label="$x$", z_label="$y$", legend=True):
figure(figsize=(8, 6), dpi=80)
ax = plt.axes(projection='3d')
time = np.arange(start=0, stop=len(x_train), step=1)
# base plot
ax.plot3D(time, x_train, y_train, ".-")
if y_true is not None:
ax.plot3D(n_steps+1, x_true, y_true, "bo", markersize=10, label="Target")
if y_pred is not None:
ax.plot3D(n_steps+1, x_pred, y_pred, "rx", markersize=10, label="Prediction")
ax.grid(True)
if x_label:
ax.set_xlabel(x_label, fontsize=16)
if y_label:
ax.set_ylabel(y_label, fontsize=16, rotation=0)
if z_label:
ax.set_zlabel(z_label, fontsize=16, rotation=0)
if legend and (y_true or y_pred):
ax.legend(fontsize=14, loc="upper left")
# single timseries on training set
x_r = [0.58114803 0.5591796 0.59348005 0.59550647 0.61035596 0.4759958 0.56246371 0.51623335 0.56018264]
y_r = [0.37528117 0.52601401 0.4105518 0.41212707 0.42236306 0.36568968 0.53288641 0.42619483 0.48411763]
# target point for that timeseries on training set
x_t = [0.60137904]
y_t = [0.37068267]
plot_series(x_r, y_r, 9, x_true=x_t, y_true=y_t)
When performing classification, we may want to predict the class label, and also to obtain a probability, certainty or confidence around the respective label. Probabilities can be much more informative than labels. To convey likelihood, we need calibrated probabilities. In calibrated probabilities, the probability reflects the true likelihood. For instance, if 10 observations obtain a probability of 0.8 and probability is calibrated, we expect around 8 of those to belong to the positive class. If the probability is calibrated, we should see a match between the number of positive cases and the predicted probability.
Only binary classification is supported by sklearn. How can we extend sklearn's calibration_curve module for multi-class classification problems and plot a Probability Calibration Curve when len(np.unique(y_true)) > 2 ? Here is my code that plots it for binary classifications.
from sklearn.calibration import calibration_curve
import matplotlib.pyplot as plt
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
import pathlib
from imblearn.pipeline import Pipeline
from sklearn.metrics import brier_score_loss
def __plot_calibration_curve_binary(clf, X_test, y_test, n_bins, strategy, **kwargs):
if 'probs' not in kwargs:
# score the test set
probs = clf.predict_proba(X_test)[:, 1]
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, probs, n_bins=n_bins, strategy=strategy)
elif 'probs' in kwargs:
probs = kwargs['probs']
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, probs, n_bins=n_bins, strategy=strategy)
else:
print("Please assign the probabilities(probs) or classifier to the function as shown in the example")
max_val = max(mean_predicted_value)
if 'fig_size' in kwargs and 'dpi' in kwargs:
fig, ax = plt.subplots(2, sharex=True, gridspec_kw={'height_ratios': [2, 1], 'hspace': 0.05}, figsize=kwargs['fig_size'], dpi=kwargs['dpi'], facecolor='white')
else:
fig, ax = plt.subplots(2, facecolor='white', sharex=True, gridspec_kw={'height_ratios': [2, 1], 'hspace': 0.05})
plt.rcParams["figure.facecolor"] = 'white'
plt.rcParams["axes.facecolor"] = 'white'
plt.rcParams["savefig.facecolor"] = 'white'
ax[0].xaxis.set_major_locator(MultipleLocator(0.1))
ax[1].xaxis.set_major_locator(MultipleLocator(0.1))
ax[0].xaxis.set_major_formatter('{x:.1f}')
ax[1].xaxis.set_major_formatter('{x:.1f}')
ax[0].yaxis.set_major_locator(MultipleLocator(0.1))
ax[0].yaxis.set_major_formatter('{x:.1f}')
ax[0].tick_params(which='both', width=1)
ax[0].tick_params(which='major', length=5)
ax[0].grid(True, zorder=0)
ax[1].grid(True, zorder=0)
if type(clf) == Pipeline:
estimator_name = type(clf['clf']).__name__
else:
estimator_name = type(clf).__name__
# print roc-auc
brier_score = ' (Brier Score : ' + str(round(brier_score_loss(y_test, probs), 4)) + ')'
#plot calibration curve
ax[0].plot(mean_predicted_value, fraction_of_positives, label = estimator_name + brier_score, zorder=2)
ax[0].scatter(mean_predicted_value, fraction_of_positives, zorder=3)
#plot perfect calibration line
ax[0].plot(np.linspace(0, max_val, n_bins), np.linspace(0, max_val, n_bins), linestyle='--', color='red', label='Perfect calibration', zorder=1)
#plot number of observation per prediction interval
ax[1].hist(probs, bins=n_bins, density=True, stacked=True, alpha=0.3, zorder=1)
#add labels and legends
ax[1].set_xlabel('Probability Predictions', fontsize=18)
ax[0].set_ylabel('Fraction of positive examples', fontsize=18)
ax[1].set_ylabel('Fraction of examples', fontsize=18)
if 'title' in kwargs:
ax[0].set_title(kwargs['title'], fontsize=18)
else:
ax[0].set_title('Probability Calibration Curve', fontsize=18)
ax[0].legend(loc='upper left')
ax[0].set_xlim([0.0, 1.0])
ax[1].set_xlim([0.0, 1.0])
ax[0].set_ylim([0.0, 1.0])
plt.show()
if 'save_fig_path' in kwargs:
path = pathlib.Path(kwargs['save_fig_path'])
path.parent.mkdir(parents=True, exist_ok=True)
if 'dpi' in kwargs:
fig.savefig(kwargs['save_fig_path'], dpi=kwargs['dpi'], facecolor=fig.get_facecolor(), edgecolor='none')
else:
fig.savefig(kwargs['save_fig_path'], facecolor=fig.get_facecolor(), edgecolor='none')
return fig, ax
def __plot_calibration_curve_multiclass(clf, X_test, y_test, n_bins, strategy, **kwargs):
print("Only binary classification is supported.")
def plot_calibration_curve(clf, X_test, y_test, n_bins=10, strategy='uniform', **kwargs):
"""
Plots probability calibration curve for the given model
Parameters
----------
clf : estimators to plot probability calibration curve
estimator instance (either sklearn.Pipeline, imblearn.Pipeline or a classifier)
PRE-FITTED classifier or a PRE-FITTED Pipeline in which the last estimator is a classifier.
X_test : pandas.DataFrame of shape (n_samples, n_features)
Test values.
y_test : pandas.Series of shape (n_samples,)
Target values.
n_bins: int, default=10
Number of bins to discretize the [0, 1] interval.
A bigger number requires more data.
Bins with no samples (i.e. without corresponding values in probs) will not be returned,
thus the returned arrays may have less than n_bins values.
strategy : {'uniform', 'quantile'}, default='uniform'
Strategy used to define the widths of the bins.
**kwargs : The following options are available with kwargs
probs: array-like of shape (n_samples,)
Probabilities of the positive class.
fig_size : tuple
Size (inches) of the plot.
dpi : int, default = 100
Image DPI.
title : str
The title of the plot.
save_fig_path : str
Full path where to save the plot. Will generate the folders if they don't exist already.
Returns
-------
fig : Matplotlib.pyplot.Figure
Figure from matplotlib
ax : Matplotlib.pyplot.Axe
Axe object from matplotlib
Example Syntax #1 : Plot calibration curve from estimator
-----------------
fig, ax = plot_calibration_curve(rf_pipe, X_test, y_test, n_bins=10, strategy='uniform',
fig_size=(12, 10), dpi=100,
save_fig_path="dir1/dir2/calibration_curve.png")
Example Syntax #2 : Plot the calibration curve using the calculated probabilities
-----------------
fig, ax = plot_calibration_curve(rf_pipe, X_test, y_test, n_bins=10, strategy='uniform',
probs=probs, fig_size=(12, 10), dpi=100,
save_fig_path="dir1/dir2/calibration_curve.png")
"""
if (len(y_test.unique()) == 2):
fig, ax = __plot_calibration_curve_binary(clf, X_test, y_test, n_bins=n_bins, strategy=strategy, **kwargs)
else:
fig, ax = __plot_calibration_curve_multiclass(clf, X_test, y_test, n_bins=n_bins, strategy=strategy, **kwargs)
return fig, ax
The output for the following syntax:
fig, ax = reporting.plot_calibration_curve(rf_pipe, X_test, y_test, n_bins=10, strategy='uniform',
probs=probs, fig_size=(12, 10), dpi=100,
save_fig_path="dir1/dir2/calibration_curve.png",
title='Probability Calibration Curve')
I have 3 regression models: Linear regression, Random Forest, and ANN.
Trying to compare them, I am plotting two plots: residuals and actual vs predicted.
Overall, the residuals suggest that most models predict the data well as they have a symmetric shape and follow the horizontal line. However, when evaluating the actual vs predicted, none of the models nearly follow the 45 degree line (suggesting perfect prediction). What am I doing wrong? Please see the relevant code below:
# Separate features from the label (add .values) to use arrays instead of dataframes
X = dataset2.iloc[:, 0:-1]
y = dataset2.iloc[:, -1]
# Split into training and testin sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
# Scale the data
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Train OLS ####################################
ols_regressor = sm.OLS(y_train, X_train).fit()
# Predictions
y_pred_ols_test = ols_regressor.predict(X_test)
y_pred_ols_train = ols_regressor.predict(X_train)
# Train RF ####################################
rf = RandomForestRegressor(bootstrap=False,
max_depth=100,
max_features=3,
min_samples_leaf=1,
min_samples_split=5,
n_estimators=800)
rf.fit(X_train, y_train)
# Predictions
y_pred_rf_test = rf.predict(X_test)
y_pred_rf_train = rf.predict(X_train)
# Train RF ####################################
# ... TRAIN NN WITH KERAS AND TENSORFLOW ...
y_pred_nn_test = ann.predict(X_test)
y_pred_nn_train = ann.predict(X_train)
Here I am plotting the residuals:
fig, axes =plt.subplots(3,1,figsize=(8,12))
fig.suptitle('Residuals', fontweight="bold", fontsize=15)
# OLS #######################################
axes[0] = sns.residplot(ax=axes[0], x=y_pred_ols_test, y=y_test, data=dataset,
lowess=True,
color='darkcyan',
scatter_kws={'alpha': 0.5},
line_kws={'color': 'red', 'lw': 1, 'alpha': 0.8})
axes[0].set_xlim([-0.0050, 0.0005])
axes[0].set_ylim([-0.03, 0.03])
axes[0].set_title('Linear Regression')
axes[0].set_xlabel(' ')
axes[0].set_ylabel('Residuals')
# Random Forest ###############################
axes[1] = sns.residplot(ax=axes[1], x=y_pred_rf_test, y=y_test, data=dataset,
lowess=True,
color='darkcyan',
scatter_kws={'alpha': 0.5},
line_kws={'color': 'red', 'lw': 1, 'alpha': 0.8})
axes[1].set_xlim([-0.016, 0.006])
axes[1].set_ylim([-0.03, 0.03])
axes[1].set_title('Random Forest')
axes[1].set_xlabel(' ')
axes[1].set_ylabel('Residuals')
# Neural Network ###############################
axes[2] = sns.residplot(ax=axes[2], x=y_pred_nn_test, y=y_test, data=dataset,
lowess=True,
color='darkcyan',
scatter_kws={'alpha': 0.5},
line_kws={'color': 'red', 'lw': 1, 'alpha': 0.8})
axes[2].set_xlim([-0.018, -0.009])
axes[2].set_ylim([-0.03, 0.03])
axes[2].set_title('Artificial Neural Network)')
axes[2].set_xlabel('Predicted values')
axes[2].set_ylabel('Residuals')
plt.show()
Then I plot the predicted vs actual plot:
fig, axes =plt.subplots(3,1,figsize=(8,12))
fig.suptitle('Predicted vs Actual', fontweight="bold", fontsize=15)
# OLS #######################################
axes[0] = sns.regplot(ax=axes[0], x=y_pred_ols_test, y=y_test,
color='darkcyan',
scatter_kws={'alpha': 0.5},
line_kws={'color': 'red', 'lw': 1, 'alpha': 0.8})
x0 = -0.00415
x1 = 0.00025
y0 = -0.03
y1 = 0.03
# Plot 45 degree line
axes[0].plot([x0, x1], [y0, y1], ls="--", c="grey")
# # Axes Scale
axes[0].set_xlim([x0, x1])
axes[0].set_ylim([y0, y1])
# Labels
axes[0].set_title('Linear Regression')
axes[0].set_xlabel('')
axes[0].set_ylabel('True values')
# Random Forest ###############################
axes[1] = sns.regplot(ax=axes[1], x=y_pred_rf_test, y=y_test,
color='darkcyan',
scatter_kws={'alpha': 0.5},
line_kws={'color': 'red', 'lw': 1, 'alpha': 0.8})
x0 = -0.0125
x1 = 0.0045
y0 = -0.03
y1 = 0.03
# Plot 45 degree line
axes[1].plot([x0, x1], [y0, y1], ls="--", c="grey")
# Axes Scale
axes[1].set_xlim([x0, x1])
axes[1].set_ylim([y0, y1])
axes[1].set_title('Random Forest')
axes[1].set_xlabel(' ')
axes[1].set_ylabel('True values')
# NN #########################################
axes[2] = sns.regplot(ax=axes[2], x=y_pred_nn_test, y=y_test,
color='darkcyan',
scatter_kws={'alpha': 0.5},
line_kws={'color': 'red', 'lw': 1, 'alpha': 0.8})
x0 = -0.0175
x1 = -0.0092
y0 = -0.03
y1 = 0.03
# Plot 45 degree line
axes[2].plot([x0, x1], [y0, y1], ls="--", c="grey")
# Axes Scale
axes[2].set_xlim([x0, x1])
axes[2].set_ylim([y0, y1])
axes[2].set_title('Artificial Neural Network')
axes[2].set_xlabel('Predicted values')
axes[2].set_ylabel('True values')
plt.show()
Theoretically, the actual vs predicted plot should be good right? Considering that the residuals plot is symmetric and horizontal. What am I missing here?
For the actual vs predicted I also tried using sns.scatterplot() but it was the same really...
i compared results of testset of 6 classifications and got Random forest with highest accuracy and NB as the lowest ..
but when i do the evaluation matrix " F1 , precision and recall " am getting NB with the highest value among the rest ..
is this normal or am i doing something wrong?
i used the same code to calculate metrics for each of the classification :
RFclf=RandomForestClassifier(n_estimators=20)
#Train the model using the training sets y_pred=clf.predict(X_test)
RFclf.fit(X_train,y_train)
train_pred=RFclf.predict(X_train)
test_pred=RFclf.predict(X_test)
rftrain = metrics.accuracy_score(y_train, train_pred)
rftest = metrics.accuracy_score(y_test, test_pred)
print("Accuracy for Random Forest Test data on the test set: ",rftest)
frftest = f1_score(y_test, test_pred)
prftest = precision_score(y_test, test_pred)
rrftest = recall_score(y_test, test_pred)
and used this code to do the plot :
plt.figure(figsize=(14, 7))
ax = plt.subplot(111)
models = ['SVM ','NB','Decision Tree ','LR','KNN','RF']
values = [pSVMtest ,pGAcc ,pdesicionT ,pLRAcc ,pKNAcc ,prftest]
model = np.arange(len(models))
plt.bar(model+0.15, values, align='center', width = 0.15, alpha=0.7, color = 'blue', label =
'precision')
plt.xticks(model, models)
ax = plt.subplot(111)
models = ['SVM ','NB','Decision Tree ','LR','KNN','RF']
values = [rSVMtest ,rGAcc ,rdesicionT ,rLRAcc ,rKNAcc ,rrftest]
model = np.arange(len(models))
plt.bar(model+0.3, values, align='center', width = 0.15, alpha=0.7, color = 'green', label =
'recall')
plt.xticks(model, models)
ax = plt.subplot(111)
models = ['SVM ','NB','Decision Tree ','LR','KNN','RF']
values = [fSVMtest ,fGAcc ,fdesicionT ,fLRAcc ,fKNAcc ,frftest]
model = np.arange(len(models))
plt.bar(model+0.45, values, align='center', width = 0.15, alpha=0.7, color = 'red', label =
'F1 score')
plt.xticks(model, models)
plt.ylabel('Performance Metrics for Different models')
plt.title('Model')
# removing the axis on the top and right of the plot window
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.legend()
plt.show()
I am trying to use linear and polynomial regression for the data.
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model, cross_validation
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
def f(x):
return np.sin(2 * np.pi * x)
x = np.random.uniform(0, 1, size=100)[:, np.newaxis]
y = f(x) + np.random.normal(scale=0.3, size=100)[:, np.newaxis]
x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.5, random_state=0)
poly_model = make_pipeline(PolynomialFeatures(degree=2), linear_model.LinearRegression())
poly_model.fit(x_train, y_train)
linear_model_1 = linear_model.LinearRegression()
linear_model_1.fit(x_train, y_train)
fig = plt.figure()
ax = plt.axes()
ax.set(xlabel='X', ylabel='Y', title='X vs Y')
ax.scatter(x,y, alpha=0.5, cmap='viridis')
ax.plot(x_test, linear_model_1.predict(x_test), color='green', label='linear')
ax.plot(x_test, poly_model.predict(x_test), color='red', label='poly')
ax.legend()
With the above code, I am receiving this image:
But as you can see, the polynomial regression is not right.
I tried different approaches ( not using make_pipeline etc) but with no success.
If I've understood you correctly, just add a sorting to your x_test before passing it to predict() function and increase the degree of polynomial to 3:
poly_model = make_pipeline(PolynomialFeatures(degree=3), linear_model.LinearRegression())
and
x_test.sort(axis=0)
with these adjustments I'm getting following plot:
1) You can just call plot twice, it will add new line to existing plot. eg:
ax.plot(x_test, model1.predict(x_test), color='red', linewidth=2)
ax.plot(x_test, model2.predict(x_test), color='green', linewidth=2)
In your case I'd do sth like that:
linear_model = linear_model.LinearRegression(fit_intercept=False)
poly_model = model = Pipeline([('poly', PolynomialFeatures(degree=2)),
('linear', linear_model.LinearRegression(fit_intercept=False))])
linear_model.fit(x_train, y_train)
poly_model.fit(x_train, y_train)
And then:
ax.plot(x_test, linear_model.predict(x_test), color='red', linewidth=2, label='linear')
ax.plot(x_test, poly_model.predict(x_test), color='green', linewidth=2, label='poly')
ax.legend()