Wrong scatter label color in pyplot legend - python

I am making this bar plot:
... using this code segment:
my_cmap = plt.get_cmap('copper')
plt.figure()
plt.set_cmap(my_cmap)
plt.pcolormesh(xx, yy, Z)
labels = ['Negative', 'Negative (doubtful)', 'Positive (doubtful)', 'Positive' ]
for i in [0, 1, 2, 3] :
plt.scatter(clustered_training_data[y==i, 0], clustered_training_data[y==i, 1], c=my_cmap(i / 3.0), label=labels[i], s=50, marker='o', edgecolor='white', alpha=0.7)
plt.scatter(lda_trans_eval[q == -1, 0], lda_trans_eval[q == -1, 1], c='green', label='Your patient', s=80, marker='h', edgecolor='white')
plt.legend(prop={'size':8})
Only one (second) color is always blue, regardless of chosen color map. Corresponding data points are correctly colored in the plot and I can't see the reason why pyplot colors the second label differently.

I can't reproduce it with dummy data. Does this have the problem when you run it?
import matplotlib.pyplot as plt
import numpy as np
my_cmap = plt.get_cmap('copper')
fig = plt.figure(figsize=(5,5))
plt.set_cmap(my_cmap)
X = np.linspace(-1,5,100)
Y = np.linspace(-1,5,100)
X,Y = np.meshgrid(X,Y)
Z = (X**2 + Y**2)
Z = Z.astype(int)
Z += (X**2 + Y**2) < .5
ax = plt.pcolormesh(X, Y, Z)
for i in [0,1,2,3]:
plt.scatter([i],[i],c=my_cmap(i / 3.0),label='i=%s'%str(i),
edgecolor='white', alpha=0.7)
plt.scatter([],[],c=my_cmap(1/3.0), label='empty data')
plt.scatter([3],[1],c='green',label='Force color')
plt.legend(loc=2, prop={'size':8})
from os.path import realpath, basename
s = basename(realpath(__file__))
fig.savefig(s.split('.')[0])
plt.show()

This happened to me. I fixed it by using color instead of c.
plt.scatter(clustered_training_data[y==i, 0], clustered_training_data[y==i, 1], color=my_cmap(i / 3.0), label=labels[i], s=50, marker='o', edgecolor='white', alpha=0.7)

Related

Gradient 2D plot using contourf

I did a test code brigging something I saw on stack on different topic, and try to assemble it to make what I need : a filled curve with gradient.
After validate this test code I will make a subplot (4 plots for 4 weeks) with the same min/max for all plot (it's a power consumption).
My code :
from matplotlib import pyplot as plt
import numpy as np
# random x
x = range(100)
# smooth random y
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(loc=0, scale=1)#, size=len(x))
y = result
y = list(map(abs, y))
# creation of z for contour
z1 = min(y)
z3 = max(y)/(len(x)+1)
z2 = max(y)-z3
z = [[z] * len(x) for z in np.arange(z1,z2,z3)]
num_bars = len(x) # more bars = smoother gradient
# plt.contourf(x, y, z, num_bars, cmap='greys')
plt.contourf(x, y, z, num_bars, cmap='cool', levels=101)
background_color = 'w'
plt.fill_between(
x,
y,
y2=max(y),
color=background_color
)
But everytime I make the code run, the result display a different gradient scale, that is not smooth neither even straight right.
AND sometime the code is in error : TypeError: Length of y (100) must match number of rows in z (101)
I'm on it since too many time, turning around, and can't figure where I'm wrong...
I finally find something particularly cool, how to :
have both filled gradient curves in a different color (thanks to JohanC in this topic)
use x axis with datetime (thanks to Ffisegydd in this topic)
Here the code :
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as mdates
np.random.seed(2022)
st_date = '2022-11-01 00:00:00'
st_date = pd.to_datetime(st_date)
en_date = st_date + pd.DateOffset(days=7)
x = pd.date_range(start=st_date,end=en_date,freq='30min')
x = mdates.date2num(x)
y = np.random.normal(0.01, 1, len(x)).cumsum()
fig, ax = plt.subplots(figsize=(18, 5))
ax.plot(x, y, color='grey')
########################
# positives fill
#######################
grad1 = ax.imshow(
np.linspace(0, 1, 256).reshape(-1, 1),
cmap='Blues',
vmin=-0.5,
aspect='auto',
extent=[x.min(), x.max(), 0, y.max()],
# extent=[x[0], x[1], 0, y.max()],
origin='lower'
)
poly_pos = ax.fill_between(x, y.min(), y, alpha=0.1)
grad1.set_clip_path(
poly_pos.get_paths()[0],
transform=ax.transData
)
poly_pos.remove()
########################
# negatives fill
#######################
grad2 = ax.imshow(
np.linspace(0, 1, 256).reshape(-1, 1),
cmap='Reds',
vmin=-0.5,
aspect='auto',
extent=[x.min(), x.max(), y.min(), 0],
origin='upper'
)
poly_neg = ax.fill_between(x, y, y.max(), alpha=0.1)
grad2.set_clip_path(
poly_neg.get_paths()[0],
transform=ax.transData
)
poly_neg.remove()
########################
# decorations and formatting plot
########################
ax.xaxis_date()
date_format = mdates.DateFormatter('%d-%b %H:%M')
ax.xaxis.set_major_formatter(date_format)
fig.autofmt_xdate()
ax.grid(True)

Gaussian curve fitting python

I have a problem fitting some date with Gaussian function. I tried to do it in multiple different ways but none of them worked. I need some ideas please. The data is attached (columns 2 and 3).
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from numpy import asarray as ar,exp
x = ar(range(19))
y = ar(0, 0, 0, 0, 0, 0, 0.01955, 1.163025, 19.7159833333333, 81.3119708333334,80.0329166666667,19.3835833333333, 0.03378, 0, 0, 0, 0, 0, 0)
#y = ar(007, 0.04, .175, .628, 1.89, 4.78,10.034,17.542, 25.589, 31.1, 31.544, 26.65, 18.74, 11.01, 5.39, 2.209, 0.74, 0.215. 0.049)
n = len(x)
mean = sum(x*y)/n
sigma = sum(y*(x-mean)**2)/n
def gaus(x,a,x0,sigma):
return a*exp(-(x-x0)**2/(2*sigma**2))
popt,pcov = curve_fit(gaus,x,y)
#popt,pcov = curve_fit(gaus,x,y,p0=[1,mean,sigma])
plt.scatter(x,y, color='blue')
plt.plot(x,y,label='data', marker='', color='blue', linestyle='-', linewidth=2)
plt.scatter(x,gaus(y,*popt), color='red')
plt.plot(x,gaus(y,*popt),label='fit', marker='', color='Red', linestyle='--', linewidth=2)
print(len(x))
print(mean,sigma)
plt.legend()
plt.xlabel('No of Resets', fontsize=20)
plt.ylabel('Frequency', fontsize=20)
plt.legend(loc='upper right')
plt.title('Gaussian Fit', fontsize=20)
plt.show()
I agree with #ddejohn.
However, you are calculating the mean and std wrongly. You could use the following approximation for the integral
import numpy as np
mean = (x*(y/y.sum())).sum()
sigma = np.sqrt(((y/y.sum())*(x-mean)**2).sum())
These should be used as initial guess for the fit as in your commented line, where you can also add a0 = y.max() for the amplitude.
popt,pcov = curve_fit(gaus,x,y,p0=[a0,mean,sigma])
Then plot as #ddejohn said maybe with more sample points
xx = np.linspace(x[0], x[-1], 100)
plt.plot(xx,gaus(xx,*popt),label='fit', marker='', color='Red', linestyle='--', linewidth=2)

Scope in Python subplot similar to MATLAB's stackedplot()

Is there a plot function available in Python that is same as MATLAB's stackedplot()?
stackedplot() in MATLAB can line plot several variables with the same X axis and are stacked vertically. Additionally, there is a scope in this plot that shows the value of all variables for a given X just by moving the cursor (please see the attached plot). I have been able to generate stacked subplots in Python with no issues, however, not able to add a scope like this that shows the value of all variables by moving the cursor. Is this feature available in Python?
This is a plot using MATLAB's stackedplot():
import pandas as pd
import numpy as np
from datetime import datetime, date, time
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.transforms as transforms
import mplcursors
from collections import Counter
import collections
def flatten(x):
result = []
for el in x:
if isinstance(x, collections.Iterable) and not isinstance(el, str):
result.extend(flatten(el))
else:
result.append(el)
return result
def shared_scope(sel):
sel.annotation.set_visible(False) # hide the default annotation created by mplcursors
x = sel.target[0]
for ax in axes:
for plot in plotStore:
da = plot.get_ydata()
if type(da[0]) is np.datetime64: #pd.Timestamp
yData = matplotlib.dates.date2num(da) # to numerical values
vals = np.interp(x, plot.get_xdata(), yData)
dates = matplotlib.dates.num2date(vals) # to matplotlib dates
y = datetime.strftime(dates,'%Y-%m-%d %H:%M:%S') # to strings
annot = ax.annotate(f'{y:.30s}', (x, vals), xytext=(15, 10), textcoords='offset points',
bbox=dict(facecolor='tomato', edgecolor='black', boxstyle='round', alpha=0.5))
sel.extras.append(annot)
else:
y = np.interp(x, plot.get_xdata(), plot.get_ydata())
annot = ax.annotate(f'{y:.2f}', (x, y), xytext=(15, 10), textcoords='offset points', arrowprops=dict(arrowstyle="->",connectionstyle="angle,angleA=0,angleB=90,rad=10"),
bbox=dict(facecolor='tomato', edgecolor='black', boxstyle='round', alpha=0.5))
sel.extras.append(annot)
vline = ax.axvline(x, color='k', ls=':')
sel.extras.append(vline)
trans = transforms.blended_transform_factory(axes[0].transData, axes[0].transAxes)
text1 = axes[0].text(x, 1.01, f'{x:.2f}', ha='center', va='bottom', color='blue', clip_on=False, transform=trans)
sel.extras.append(text1)
# Data to plot
data = pd.DataFrame(columns = ['timeOfSample','Var1','Var2'])
data.timeOfSample = ['2020-05-10 09:09:02','2020-05-10 09:09:39','2020-05-10 09:40:07','2020-05-10 09:40:45','2020-05-12 09:50:45']
data['timeOfSample'] = pd.to_datetime(data['timeOfSample'])
data.Var1 = [10,50,100,5,25]
data.Var2 = [20,55,70,60,50]
variables = ['timeOfSample',['Var1','Var2']] # variables to plot - Var1 and Var2 to share a plot
nPlot = len(variables)
dataPts = np.arange(0, len(data[variables[0]]), 1) # x values for plots
plotStore = [0]*len(flatten(variables)) # to store all the plots for annotation purposes later
fig, axes = plt.subplots(nPlot,1,sharex=True)
k=0
for i in range(nPlot):
if np.size(variables[i])==1:
yData = data[variables[i]]
line, = axes[i].plot(dataPts,yData,label = variables[i])
plotStore[k]=line
k = k+1
else:
for j in range(np.size(variables[i])):
yData = data[variables[i][j]]
line, = axes[i].plot(dataPts,yData,label = variables[i][j])
plotStore[k]=line
k = k+1
axes[i].set_ylabel(variables[i])
cursor = mplcursors.cursor(plotStore, hover=True)
cursor.connect('add', shared_scope)
plt.xlabel('Samples')
plt.show()
mplcursors can be used to create annotations while hovering, moving texts and vertical bars. sel.extras.append(...) helps to automatically hide the elements that aren't needed anymore.
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
import mplcursors
import numpy as np
def shared_scope(sel):
x = sel.target[0]
annotation_text = f'x: {x:.2f}'
for ax, plot in zip(axes, all_plots):
y = np.interp(x, plot.get_xdata(), plot.get_ydata())
annotation_text += f'\n{plot.get_label()}: {y:.2f}'
vline = ax.axvline(x, color='k', ls=':')
sel.extras.append(vline)
sel.annotation.set_text(annotation_text)
trans = transforms.blended_transform_factory(axes[0].transData, axes[0].transAxes)
text1 = axes[0].text(x, 1.01, f'{x:.2f}', ha='center', va='bottom', color='blue', clip_on=False, transform=trans)
sel.extras.append(text1)
fig, axes = plt.subplots(figsize=(15, 10), nrows=3, sharex=True)
y1 = np.random.uniform(-1, 1, 100).cumsum()
y2 = np.random.uniform(-1, 1, 100).cumsum()
y3 = np.random.uniform(-1, 1, 100).cumsum()
all_y = [y1, y2, y3]
all_labels = ['Var1', 'Var2', 'Var3']
all_plots = [ax.plot(y, label=label)[0]
for ax, y, label in zip(axes, all_y, all_labels)]
for ax, label in zip(axes, all_labels):
ax.set_ylabel(label)
cursor = mplcursors.cursor(all_plots, hover=True)
cursor.connect('add', shared_scope)
plt.show()
Here is a version with separate annotations per subplot:
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
import mplcursors
import numpy as np
def shared_scope(sel):
sel.annotation.set_visible(False) # hide the default annotation created by mplcursors
x = sel.target[0]
for ax, plot in zip(axes, all_plots):
y = np.interp(x, plot.get_xdata(), plot.get_ydata())
vline = ax.axvline(x, color='k', ls=':')
sel.extras.append(vline)
annot = ax.annotate(f'{y:.2f}', (x, y), xytext=(5, 0), textcoords='offset points',
bbox=dict(facecolor='tomato', edgecolor='black', boxstyle='round', alpha=0.5))
sel.extras.append(annot)
trans = transforms.blended_transform_factory(axes[0].transData, axes[0].transAxes)
text1 = axes[0].text(x, 1.01, f'{x:.2f}', ha='center', va='bottom', color='blue', clip_on=False, transform=trans)
sel.extras.append(text1)
fig, axes = plt.subplots(figsize=(15, 10), nrows=3, sharex=True)
y1 = np.random.uniform(-1, 1, 100).cumsum()
y2 = np.random.uniform(-1, 1, 100).cumsum()
y3 = np.random.uniform(-1, 1, 100).cumsum()
all_y = [y1, y2, y3]
all_labels = ['Var1', 'Var2', 'Var3']
all_plots = [ax.plot(y, label=label)[0]
for ax, y, label in zip(axes, all_y, all_labels)]
for ax, label in zip(axes, all_labels):
ax.set_ylabel(label)
cursor = mplcursors.cursor(all_plots, hover=True)
cursor.connect('add', shared_scope)
plt.show()

Plot a 3D Boundary Decision in Python

I'm trying to plot a 3D Decision Boundary, but it does not seem to be working the way it looks, see how it is:
I want it to appear as in this example here:
I do not know how to explain, but in the example above it literally looks like a "wall". And this is what I want to do in my code.
Then follow my code:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_title('Hello World')
ax.set_xlim(-1, 1)
ax.set_ylim(-1, 1)
ax.set_zlim(-1, 1)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
w = [3,2,1]
x = 1
y = 1
z = 1
x_plan = (- w[1] * y - w[2] * z) / w[0]
y_plan = (- w[0] * x - w[2] * z) / w[1]
z_plan = (- w[0] * x - w[1] * y) / w[2]
ax.plot3D([x_plan, 1, 1], [1, y_plan, 1], [1, 1, z_plan], "lightblue")
plt.show()
P.S.: I'm using:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
I believe that the problem should be in the calculation, or else in the:
ax.plot3D([x_plan, 1, 1], [1, y_plan, 1], [1, 1, z_plan], "lightblue")
P.S.2: I know that my Boundary Decision is not separating the data correctly, but at the moment this is a detail for me, later I will fix it.
To plot a 3d surface you actually need to use plt3d.plot_surface, see reference.
As an example, this piece of code will generate the following image (Notice the comment on plt3d.plot_surface line):
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
def randrange(n, vmin, vmax):
'''
Helper function to make an array of random numbers having shape (n, )
with each number distributed Uniform(vmin, vmax).
'''
return (vmax - vmin)*np.random.rand(n) + vmin
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
n = 10
for c, m, zlow, zhigh in [('r', 'o', 0, 100)]:
xs = randrange(n, 0, 50)
ys = randrange(n, 0, 50)
zs = randrange(n, zlow, zhigh)
ax.scatter(xs, ys, zs, c=c, marker=m)
for c, m, zlow, zhigh in [('b', '^', 0, 100)]:
xs = randrange(n, 60, 100)
ys = randrange(n, 60, 100)
zs = randrange(n, zlow, zhigh)
ax.scatter(xs, ys, zs, c=c, marker=m)
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')
xm,ym = np.meshgrid(xs, ys)
ax.plot_surface(xm, ym, xm, color='green', alpha=0.5) # Data values as 2D arrays as stated in reference - The first 3 arguments is what you need to change in order to turn your plane into a boundary decision plane.
plt.show()

Prevent overlapping labels and graphs in matplotlib

I have the following code:
from mpl_toolkits.axes_grid.axislines import SubplotZero
from matplotlib.transforms import BlendedGenericTransform
from matplotlib import patches
import matplotlib.pyplot as plt
import numpy
if 1:
fig = plt.figure(1)
ax = SubplotZero(fig, 111)
fig.add_subplot(ax)
ax.axhline(linewidth=1.7, color="k")
ax.axvline(linewidth=1.7, color="k")
plt.xticks([])
plt.yticks([])
ax.text(0, 1.05, r'$y$', transform=BlendedGenericTransform(ax.transData, ax.transAxes), ha='center')
ax.text(1.05, 0, r'$x$', transform=BlendedGenericTransform(ax.transAxes, ax.transData), va='center')
for direction in ["xzero", "yzero"]:
ax.axis[direction].set_axisline_style("-|>")
ax.axis[direction].set_visible(True)
for direction in ["left", "right", "bottom", "top"]:
ax.axis[direction].set_visible(False)
x = numpy.linspace(-1.499999999, 5, 1000000)
yy = numpy.log(2*x + 3)/2 + 3
ax.plot(x, yy, linewidth=1.2, color="black")
plt.ylim(-2, 5)
plt.xlim(-5, 5)
plt.text((numpy.e**(-6) - 3)/2, 0, r'$(\frac{1}{2} (e^{-6} - 3), 0)$', position=((numpy.e**(-6) - 3)/2 + 0.1, 0.1))
plt.plot((numpy.e**(-6) - 3)/2, 0, 'ko')
plt.text(0, numpy.log(3)/2 + 3, r'$(0, \frac{1}{2} \log_e{\left (3 \right )} + 3)$', position=(0.1, numpy.log(3)/2 + 3 + 0.1))
plt.plot(0, numpy.log(3)/2 + 3, 'ko')
plt.savefig('AnswersSA1a.png')
that produces this graph:
The two labelled axis intercepts have significant overlap with the axes as well as the graph itself. I know this could be solved by manually placing the text at a separate point. However - is there a way to have matplotlib smartly place the label so it doesn't overlap?

Categories

Resources