Based on the method plot_series in this notebook.
I would like to plot a timeseries in 3d where my points consist of x,y coordinates and time.
My problem is found when I go to plot the target point by raising the exception
18 if y_true is not None:
---> 19 ax.plot3D(n_steps+1, x_true, y_true, "bo", markersize=10, label="Target")
TypeError: object of type 'int' has no len()
my code is this, I have a 9 step timeseries and I would like to print the target point on the 10th step as well. How to do this?
from matplotlib.pyplot import figure
def plot_series(x_train, y_train, n_steps=10, x_true=None, y_true=None, x_pred=None, y_pred=None, x_label="$time$", y_label="$x$", z_label="$y$", legend=True):
figure(figsize=(8, 6), dpi=80)
ax = plt.axes(projection='3d')
time = np.arange(start=0, stop=len(x_train), step=1)
# base plot
ax.plot3D(time, x_train, y_train, ".-")
if y_true is not None:
ax.plot3D(n_steps+1, x_true, y_true, "bo", markersize=10, label="Target")
if y_pred is not None:
ax.plot3D(n_steps+1, x_pred, y_pred, "rx", markersize=10, label="Prediction")
ax.grid(True)
if x_label:
ax.set_xlabel(x_label, fontsize=16)
if y_label:
ax.set_ylabel(y_label, fontsize=16, rotation=0)
if z_label:
ax.set_zlabel(z_label, fontsize=16, rotation=0)
if legend and (y_true or y_pred):
ax.legend(fontsize=14, loc="upper left")
# single timseries on training set
x_r = [0.58114803 0.5591796 0.59348005 0.59550647 0.61035596 0.4759958 0.56246371 0.51623335 0.56018264]
y_r = [0.37528117 0.52601401 0.4105518 0.41212707 0.42236306 0.36568968 0.53288641 0.42619483 0.48411763]
# target point for that timeseries on training set
x_t = [0.60137904]
y_t = [0.37068267]
plot_series(x_r, y_r, 9, x_true=x_t, y_true=y_t)
Related
When performing classification, we may want to predict the class label, and also to obtain a probability, certainty or confidence around the respective label. Probabilities can be much more informative than labels. To convey likelihood, we need calibrated probabilities. In calibrated probabilities, the probability reflects the true likelihood. For instance, if 10 observations obtain a probability of 0.8 and probability is calibrated, we expect around 8 of those to belong to the positive class. If the probability is calibrated, we should see a match between the number of positive cases and the predicted probability.
Only binary classification is supported by sklearn. How can we extend sklearn's calibration_curve module for multi-class classification problems and plot a Probability Calibration Curve when len(np.unique(y_true)) > 2 ? Here is my code that plots it for binary classifications.
from sklearn.calibration import calibration_curve
import matplotlib.pyplot as plt
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
import pathlib
from imblearn.pipeline import Pipeline
from sklearn.metrics import brier_score_loss
def __plot_calibration_curve_binary(clf, X_test, y_test, n_bins, strategy, **kwargs):
if 'probs' not in kwargs:
# score the test set
probs = clf.predict_proba(X_test)[:, 1]
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, probs, n_bins=n_bins, strategy=strategy)
elif 'probs' in kwargs:
probs = kwargs['probs']
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, probs, n_bins=n_bins, strategy=strategy)
else:
print("Please assign the probabilities(probs) or classifier to the function as shown in the example")
max_val = max(mean_predicted_value)
if 'fig_size' in kwargs and 'dpi' in kwargs:
fig, ax = plt.subplots(2, sharex=True, gridspec_kw={'height_ratios': [2, 1], 'hspace': 0.05}, figsize=kwargs['fig_size'], dpi=kwargs['dpi'], facecolor='white')
else:
fig, ax = plt.subplots(2, facecolor='white', sharex=True, gridspec_kw={'height_ratios': [2, 1], 'hspace': 0.05})
plt.rcParams["figure.facecolor"] = 'white'
plt.rcParams["axes.facecolor"] = 'white'
plt.rcParams["savefig.facecolor"] = 'white'
ax[0].xaxis.set_major_locator(MultipleLocator(0.1))
ax[1].xaxis.set_major_locator(MultipleLocator(0.1))
ax[0].xaxis.set_major_formatter('{x:.1f}')
ax[1].xaxis.set_major_formatter('{x:.1f}')
ax[0].yaxis.set_major_locator(MultipleLocator(0.1))
ax[0].yaxis.set_major_formatter('{x:.1f}')
ax[0].tick_params(which='both', width=1)
ax[0].tick_params(which='major', length=5)
ax[0].grid(True, zorder=0)
ax[1].grid(True, zorder=0)
if type(clf) == Pipeline:
estimator_name = type(clf['clf']).__name__
else:
estimator_name = type(clf).__name__
# print roc-auc
brier_score = ' (Brier Score : ' + str(round(brier_score_loss(y_test, probs), 4)) + ')'
#plot calibration curve
ax[0].plot(mean_predicted_value, fraction_of_positives, label = estimator_name + brier_score, zorder=2)
ax[0].scatter(mean_predicted_value, fraction_of_positives, zorder=3)
#plot perfect calibration line
ax[0].plot(np.linspace(0, max_val, n_bins), np.linspace(0, max_val, n_bins), linestyle='--', color='red', label='Perfect calibration', zorder=1)
#plot number of observation per prediction interval
ax[1].hist(probs, bins=n_bins, density=True, stacked=True, alpha=0.3, zorder=1)
#add labels and legends
ax[1].set_xlabel('Probability Predictions', fontsize=18)
ax[0].set_ylabel('Fraction of positive examples', fontsize=18)
ax[1].set_ylabel('Fraction of examples', fontsize=18)
if 'title' in kwargs:
ax[0].set_title(kwargs['title'], fontsize=18)
else:
ax[0].set_title('Probability Calibration Curve', fontsize=18)
ax[0].legend(loc='upper left')
ax[0].set_xlim([0.0, 1.0])
ax[1].set_xlim([0.0, 1.0])
ax[0].set_ylim([0.0, 1.0])
plt.show()
if 'save_fig_path' in kwargs:
path = pathlib.Path(kwargs['save_fig_path'])
path.parent.mkdir(parents=True, exist_ok=True)
if 'dpi' in kwargs:
fig.savefig(kwargs['save_fig_path'], dpi=kwargs['dpi'], facecolor=fig.get_facecolor(), edgecolor='none')
else:
fig.savefig(kwargs['save_fig_path'], facecolor=fig.get_facecolor(), edgecolor='none')
return fig, ax
def __plot_calibration_curve_multiclass(clf, X_test, y_test, n_bins, strategy, **kwargs):
print("Only binary classification is supported.")
def plot_calibration_curve(clf, X_test, y_test, n_bins=10, strategy='uniform', **kwargs):
"""
Plots probability calibration curve for the given model
Parameters
----------
clf : estimators to plot probability calibration curve
estimator instance (either sklearn.Pipeline, imblearn.Pipeline or a classifier)
PRE-FITTED classifier or a PRE-FITTED Pipeline in which the last estimator is a classifier.
X_test : pandas.DataFrame of shape (n_samples, n_features)
Test values.
y_test : pandas.Series of shape (n_samples,)
Target values.
n_bins: int, default=10
Number of bins to discretize the [0, 1] interval.
A bigger number requires more data.
Bins with no samples (i.e. without corresponding values in probs) will not be returned,
thus the returned arrays may have less than n_bins values.
strategy : {'uniform', 'quantile'}, default='uniform'
Strategy used to define the widths of the bins.
**kwargs : The following options are available with kwargs
probs: array-like of shape (n_samples,)
Probabilities of the positive class.
fig_size : tuple
Size (inches) of the plot.
dpi : int, default = 100
Image DPI.
title : str
The title of the plot.
save_fig_path : str
Full path where to save the plot. Will generate the folders if they don't exist already.
Returns
-------
fig : Matplotlib.pyplot.Figure
Figure from matplotlib
ax : Matplotlib.pyplot.Axe
Axe object from matplotlib
Example Syntax #1 : Plot calibration curve from estimator
-----------------
fig, ax = plot_calibration_curve(rf_pipe, X_test, y_test, n_bins=10, strategy='uniform',
fig_size=(12, 10), dpi=100,
save_fig_path="dir1/dir2/calibration_curve.png")
Example Syntax #2 : Plot the calibration curve using the calculated probabilities
-----------------
fig, ax = plot_calibration_curve(rf_pipe, X_test, y_test, n_bins=10, strategy='uniform',
probs=probs, fig_size=(12, 10), dpi=100,
save_fig_path="dir1/dir2/calibration_curve.png")
"""
if (len(y_test.unique()) == 2):
fig, ax = __plot_calibration_curve_binary(clf, X_test, y_test, n_bins=n_bins, strategy=strategy, **kwargs)
else:
fig, ax = __plot_calibration_curve_multiclass(clf, X_test, y_test, n_bins=n_bins, strategy=strategy, **kwargs)
return fig, ax
The output for the following syntax:
fig, ax = reporting.plot_calibration_curve(rf_pipe, X_test, y_test, n_bins=10, strategy='uniform',
probs=probs, fig_size=(12, 10), dpi=100,
save_fig_path="dir1/dir2/calibration_curve.png",
title='Probability Calibration Curve')
I'm training a KNN model and I want to plot 2 images per for loop, as shown in the imagen below:
What I need
At the left, I plot the boundary visualization of my model for a certain amoung of neighbours. At the right, I plot the confusion matrix.
To accomplish something along those lines I've written the following code:
fig = plt.figure()
for i in range(1,3):
neigh = KNeighborsClassifier(n_neighbors=i)
neigh.fit(X, y)
y_pred = neigh.predict(X)
acc = accuracy_score(y_pred,y)
# Boundary
ax1 = fig.add_subplot(1,2,1)
visualize_classifier(neigh, X, y, ax=ax1) # Defined by me
# Plot confusion matrix. Defined by sklearn.metrics
ax2 = fig.add_subplot(1,2,2)
plot_confusion_matrix(neigh, X, y, cmap=plt.cm.Blues, values_format = '.0f',ax=ax2)
ax1.set_title(f'Neighbors = {i}.\nAccuracy = {acc:.4f}',
fontsize = 14)
ax2.set_title(f'Neighbors = {i}.\nAccuracy = {acc:.4f}',
fontsize = 14)
plt.tight_layout()
plt.figure(i)
plt.show()
The visualize_classifier() function:
def visualize_classifier(model, X, y, ax=None, cmap='Dark2'):
ax = ax or plt.gca()
# Plot the training points
ax.scatter(X.iloc[:, 0], X.iloc[:, 1], c=y, s=30, cmap=cmap, # Changed to iloc.
clim=(y.min(), y.max()), zorder=3, alpha = 0.5)
ax.axis('tight')
ax.set_xlabel('x1')
ax.set_ylabel('x2')
# ax.axis('off')
xlim = ax.get_xlim()
ylim = ax.get_ylim()
xx, yy = np.meshgrid(np.linspace(*xlim, num=200),
np.linspace(*ylim, num=200))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
# Create a color plot with the results
n_classes = len(np.unique(y))
contours = ax.contourf(xx, yy, Z, alpha=0.3,
levels=np.arange(n_classes + 1) - 0.5,
cmap=cmap, clim=(y.min(), y.max()),
zorder=1)
ax.set(xlim=xlim, ylim=ylim)
What I get
What I get. Continues...
As you can see, only the first loop is plotted. the second one is not plotted and I can't figure out why.
Furthermore, I have the same title for the plot at the right and at the left. I would like to have only one on top of both, how can this be accomplished?
Now, you might be wondering why do I need to do this and the answer is that I would like to see how the boundaries change depending on the number of neighbors. It's just to get a visual sense of KNN algorithm.
Any suggestion would be pretty much appreciated.
I was able to make it work. What I had wrong was the first line inside the for loop. I assigned plt.figure(i, figsize=(18, 8)) to the variable fig.
for i in range(1,30):
fig = plt.figure(i, figsize=(18, 8))
sns.set(font_scale=2.0) # Adjust to fit
neigh = KNeighborsClassifier(n_neighbors=i)
neigh.fit(X, y)
y_pred = neigh.predict(X)
acc = accuracy_score(y_pred,y)
# Boundary
ax1 = fig.add_subplot(1,2,1)
visualize_classifier(neigh, X, y, ax=ax1) # Defined by me
# Plot confusion matrix. Defined by sklearn.metrics
ax2 = fig.add_subplot(1,2,2)
plot_confusion_matrix(neigh, X, y, cmap=plt.cm.Blues, values_format = '.0f',ax=ax2)
fig.suptitle(f'Neighbors = {i}. Accuracy = {acc:.4f}',y=1)
plt.show()
For the title I used: fig.suptitle(f'Neighbors = {i}. Accuracy = {acc:.4f}',y=1)
The Problem:
I'm having trouble plotting and interpreting the results from my TensorFlow model. I've created my own CSV of [x, y, color] where there is a plot of randomly scattered dots with a clear pattern in the color formation. I'm able to enter all the data into the model and train the neural network but can't seem to put it all together. I'm a bit new to this as a hobbyist.
Essentially I want the ML algorithm to pick up the pattern from 100 datapoints and use it on a test dataset of nodes to plot an approximation of the pattern.
The Code:
LABEL_COLUMN = "Color"
LABELS=[0,1]
def get_dataset(data_url, **kwargs):
dataset = tf.data.experimental.make_csv_dataset(
data_url,
batch_size=5,
label_name=LABEL_COLUMN,
na_value="?",
num_epochs=1,
ignore_errors=True,
**kwargs)
return dataset
project_data = get_dataset(data_url)
project_test_data = get_dataset(test_data_url)
def pack(features,label):
return tf.stack(list(features.values()), axis=-1), label
packed_data = project_data.map(pack)
packed_test_data = project_test_data.map(pack)
model2 = tf.keras.Sequential([
tf.keras.layers.Dense(128, activation="relu"),
tf.keras.layers.Dense(128, activation="relu"),
tf.keras.layers.Dense(1),
])
model2.compile(
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True),
optimizer = "adam",
metrics = ["accuracy"]
)
model2.fit(packed_data, epochs=100)
model_output = model2.predict(packed_test_data)
model_output.plot()
Gives the below error:
AttributeError: 'numpy.ndarray' object has no attribute 'plot'
Perhaps this function can be adapted to solve your problem?
(From https://jonchar.net/notebooks/Artificial-Neural-Network-with-Keras/)
import matplotlib.pyplot as plt
def plot_decision_boundary(X, y, model, steps=1000, cmap='Paired'):
"""
Function to plot the decision boundary and data points of a model.
Data points are colored based on their actual label.
"""
cmap = plt.get_cmap(cmap)
# Define region of interest by data limits
xmin, xmax = X[:,0].min() - 1, X[:,0].max() + 1
ymin, ymax = X[:,1].min() - 1, X[:,1].max() + 1
steps = 1000
x_span = np.linspace(xmin, xmax, steps)
y_span = np.linspace(ymin, ymax, steps)
xx, yy = np.meshgrid(x_span, y_span)
# Make predictions across region of interest
labels = model.predict(np.c_[xx.ravel(), yy.ravel()])
# Plot decision boundary in region of interest
z = labels.reshape(xx.shape)
fig, ax = plt.subplots()
ax.contourf(xx, yy, z, cmap=cmap, alpha=0.5)
# Get predicted labels on training data and plot
train_labels = model.predict(X)
ax.scatter(X[:,0], X[:,1], c=y, cmap=cmap, lw=0)
return fig, ax
plot_decision_boundary(X, y, model, cmap='RdBu')
so I have been attempting to view the decision boundary for my network and for some reason when i run it it doesn't give me any output.
i took the function from here
it doesn't give any error, it just ends the run.
# Fit the model also history to map the model
history = model.fit(X, Y,validation_split=0.30, epochs=10, batch_size=1000, verbose= 1)
# evaluate the model
scores = model.evaluate(X, Y)
def plot_decision_boundary(X, y, model, steps=1000, cmap='Paired'):
"""
Function to plot the decision boundary and data points of a model.
Data points are colored based on their actual label.
"""
cmap = plt.get_cmap(cmap)
# Define region of interest by data limits
xmin, xmax = X[:,0].min() - 1, X[:,0].max() + 1
ymin, ymax = X[:,1].min() - 1, X[:,1].max() + 1
steps = 1000
x_span = np.linspace(xmin, xmax, steps)
y_span = np.linspace(ymin, ymax, steps)
xx, yy = np.meshgrid(x_span, y_span)
# Make predictions across region of interest
labels = model.predict(np.c_[xx.ravel(), yy.ravel()])
# Plot decision boundary in region of interest
z = labels.reshape(xx.shape)
fig, ax = plt.subplots()
ax.contourf(xx, yy, z, cmap=cmap, alpha=0.5)
# Get predicted labels on training data and plot
train_labels = model.predict(X)
ax.scatter(X[:,0], X[:,1], c=y, cmap=cmap, lw=0)
return fig, ax
plot_decision_boundary(X, Y, model, cmap='RdBu')
i havn't really done many changes to the function.
what am i missing here?
Your function plot_decision_boundary() constructs a fig and an ax object which are returned at the end. In your code there is nothing to take up these objects when they are returned. Just because a function returns fig and ax that does not mean, they are automatically drawn.
Solution is simple, just call
plt.show()
after calling the decision boundary function.
This part is often omitted in example codes. I believe it is because there are several ways to generate the window and show the plot (you could also want to save it directly to file in which case you wouldn't need the show() statement).
I basically want to add a colorbar at each of the subplots in the code below (link to code ). My attempts add all color bars at the end of the loop in the last subplot.
print(__doc__)
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.neural_network import MLPClassifier
mnist = fetch_mldata("MNIST original")
# rescale the data, use the traditional train/test split
X, y = mnist.data / 255., mnist.target
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]
# mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
# solver='sgd', verbose=10, tol=1e-4, random_state=1)
mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4,
solver='sgd', verbose=10, tol=1e-4, random_state=1,
learning_rate_init=.1)
mlp.fit(X_train, y_train)
print("Training set score: %f" % mlp.score(X_train, y_train))
print("Test set score: %f" % mlp.score(X_test, y_test))
fig, axes = plt.subplots(4, 4)
# use global min / max to ensure all weights are shown on the same scale
vmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max()
for coef, ax in zip(mlp.coefs_[0].T, axes.ravel()):
ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=.5 * vmin,
vmax=.5 * vmax)
ax.set_xticks(())
ax.set_yticks(())
plt.show()
UPDATE:
based on the link in the comment below, here is the code which adds the colorbar at the right of the diagram
fig, axes = plt.subplots(4, 4)
# use global min / max to ensure all weights are shown on the same scale
vmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max()
for coef, ax in zip(mlp.coefs_[0].T, axes.ravel()):
im = ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=.5 * vmin, vmax=.5 * vmax)
ax.set_xticks(())
ax.set_yticks(())
fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
fig.colorbar(im, cax=cbar_ax)
plt.show()