Related
I did a test code brigging something I saw on stack on different topic, and try to assemble it to make what I need : a filled curve with gradient.
After validate this test code I will make a subplot (4 plots for 4 weeks) with the same min/max for all plot (it's a power consumption).
My code :
from matplotlib import pyplot as plt
import numpy as np
# random x
x = range(100)
# smooth random y
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(loc=0, scale=1)#, size=len(x))
y = result
y = list(map(abs, y))
# creation of z for contour
z1 = min(y)
z3 = max(y)/(len(x)+1)
z2 = max(y)-z3
z = [[z] * len(x) for z in np.arange(z1,z2,z3)]
num_bars = len(x) # more bars = smoother gradient
# plt.contourf(x, y, z, num_bars, cmap='greys')
plt.contourf(x, y, z, num_bars, cmap='cool', levels=101)
background_color = 'w'
plt.fill_between(
x,
y,
y2=max(y),
color=background_color
)
But everytime I make the code run, the result display a different gradient scale, that is not smooth neither even straight right.
AND sometime the code is in error : TypeError: Length of y (100) must match number of rows in z (101)
I'm on it since too many time, turning around, and can't figure where I'm wrong...
I finally find something particularly cool, how to :
have both filled gradient curves in a different color (thanks to JohanC in this topic)
use x axis with datetime (thanks to Ffisegydd in this topic)
Here the code :
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as mdates
np.random.seed(2022)
st_date = '2022-11-01 00:00:00'
st_date = pd.to_datetime(st_date)
en_date = st_date + pd.DateOffset(days=7)
x = pd.date_range(start=st_date,end=en_date,freq='30min')
x = mdates.date2num(x)
y = np.random.normal(0.01, 1, len(x)).cumsum()
fig, ax = plt.subplots(figsize=(18, 5))
ax.plot(x, y, color='grey')
########################
# positives fill
#######################
grad1 = ax.imshow(
np.linspace(0, 1, 256).reshape(-1, 1),
cmap='Blues',
vmin=-0.5,
aspect='auto',
extent=[x.min(), x.max(), 0, y.max()],
# extent=[x[0], x[1], 0, y.max()],
origin='lower'
)
poly_pos = ax.fill_between(x, y.min(), y, alpha=0.1)
grad1.set_clip_path(
poly_pos.get_paths()[0],
transform=ax.transData
)
poly_pos.remove()
########################
# negatives fill
#######################
grad2 = ax.imshow(
np.linspace(0, 1, 256).reshape(-1, 1),
cmap='Reds',
vmin=-0.5,
aspect='auto',
extent=[x.min(), x.max(), y.min(), 0],
origin='upper'
)
poly_neg = ax.fill_between(x, y, y.max(), alpha=0.1)
grad2.set_clip_path(
poly_neg.get_paths()[0],
transform=ax.transData
)
poly_neg.remove()
########################
# decorations and formatting plot
########################
ax.xaxis_date()
date_format = mdates.DateFormatter('%d-%b %H:%M')
ax.xaxis.set_major_formatter(date_format)
fig.autofmt_xdate()
ax.grid(True)
I have some 2D data that I am smoothing using:
from scipy.stats import gaussian_kde
kde = gaussian_kde(data)
but what if my data isn't Gaussian/tophat/the other options? Mine looks more elliptical before smoothing, so should I really have a different bandwidth in x and then y? The variance in one direction is a lot higher, and also the values of the x axis are higher, so it feels like a simple Gaussian might miss something?
This is what I get with your defined X and Y. Seems good. Were you expecting something different?
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
def generate(n):
# generate data
np.random.seed(42)
x = np.random.normal(size=n, loc=1, scale=0.01)
np.random.seed(1)
y = np.random.normal(size=n, loc=200, scale=100)
return x, y
x, y = generate(100)
xmin = x.min()
xmax = x.max()
ymin = y.min()
ymax = y.max()
X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([x, y])
kernel = stats.gaussian_kde(values)
Z = np.reshape(kernel(positions).T, X.shape)
fig, ax = plt.subplots(figsize=(7, 7))
ax.imshow(np.rot90(Z), cmap=plt.cm.gist_earth_r,
extent=[xmin, xmax, ymin, ymax],
aspect='auto', alpha=.75
)
ax.plot(x, y, 'ko', ms=5)
ax.set_xlim([xmin, xmax])
ax.set_ylim([ymin, ymax])
plt.show()
The distributions of x and y are Gaussian.
You can verify with seaborn too
import pandas as pd
import seaborn as sns
# I pass a DataFrame because passing
# (x,y) alone will be soon deprecated
g = sns.jointplot(data=pd.DataFrame({'x':x, 'y':y}), x='x', y='y')
g.plot_joint(sns.kdeplot, color="r", zorder=0, levels=6)
update
Kernel Density Estimate of 2-dimensional data is done separately along each axis and then join together.
Let's make an example with the dataset we already used.
As we can see in the seaborn jointplot, you have not only the estimated 2d-kde but also marginal distributions of x and y (the histograms).
So, step by step, let's estimate the density of x and y and then evaluate the density over a linearspace
kde_x = sps.gaussian_kde(x)
kde_x_space = np.linspace(x.min(), x.max(), 100)
kde_x_eval = kde_x.evaluate(kde_x_space)
kde_x_eval /= kde_x_eval.sum()
kde_y = sps.gaussian_kde(y)
kde_y_space = np.linspace(y.min(), y.max(), 100)
kde_y_eval = kde_y.evaluate(kde_y_space)
kde_y_eval /= kde_y_eval.sum()
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
ax[0].plot(kde_x_space, kde_x_eval, 'k.')
ax[0].set(title='KDE of x')
ax[1].plot(kde_y_space, kde_y_eval, 'k.')
ax[1].set(title='KDE of y')
plt.show()
So we now have the marginal distributions of x and y. These are probability density functions so, the joint-probability of x and y can be seen as the intersection of independent events x and y, thus we can multiply the estimated probability density of x and y in a 2d-matrix and plot on 3d projection
# Grid of x and y
X, Y = np.meshgrid(kde_x_space, kde_y_space)
# Grid of probability density
kX, kY = np.meshgrid(kde_x_eval, kde_y_eval)
# Intersection
Z = kX * kY
fig, ax = plt.subplots(
2, 2,
subplot_kw={"projection": "3d"},
figsize=(10, 10))
for i, (elev, anim, title) in enumerate(zip([10, 10, 25, 25],
[0, -90, 25, -25],
['y axis', 'x axis', 'view 1', 'view 2']
)):
# Plot the surface.
surf = ax.flat[i].plot_surface(X, Y, Z, cmap=plt.cm.gist_earth_r,
linewidth=0, antialiased=False, alpha=.75)
ax.flat[i].scatter(x, y, zs=0, zdir='z', c='k')
ax.flat[i].set(
xlabel='x', ylabel='y',
title=title
)
ax.flat[i].view_init(elev=elev, azim=anim)
plt.show()
This is a very simple and naif method but only to have an idea on how it works and why x and y scales don't matter for a 2d-KDE.
I want to visualize a function that is discontinuous at a certain value of x (=2).
However I do not get what I hoped. My code is the following:
x = np.arange(-1, 4, 0.1)
y = 2 * x**2 / (x - 2)
df = pd.DataFrame({"x" : x , "y" : y})
% matplotlib inline
import matplotlib
from matplotlib import pyplot as plt
# Set up the graph
plt.xlabel('x')
plt.ylabel('y')
plt.xticks(np.arange(-1,4, 0.5))
plt.yticks(np.arange(-8, 8, 0.5))
plt.axhline()
plt.axvline()
plt.grid()
# Plot the function
plt.plot(df["x"], df["y"], color='red')
axes = plt.gca()
xmin = -1
xmax = 4
ymin = -8
ymax = 8
axes.set_xlim([xmin,xmax])
axes.set_ylim([ymin,ymax])
plt.axvline(2)
plt.show()
What I get is the following:
Why the y values for x > 2 do not appear?
The problem is that you've set your ylim in such a way that it cuts off the values for x > 2.
Consider the output of your function for inputs greater than 2:
f = lambda x: 2 * x**2 / (x - 2)
print([f(i) for i in np.arange(2.1, 4, 0.2)])
#[88.199999999999918, 35.266666666666644, 24.999999999999989, 20.828571428571422,
# 18.688888888888883, 17.472727272727269, 16.753846153846151, 16.333333333333332,
# 16.105882352941176, 16.010526315789473]
If you changed the ylim constraints, you'll see the plot below:
# Set up the graph
plt.xlabel('x')
plt.ylabel('y')
#plt.xticks(np.arange(-1,4, 0.5))
#plt.yticks(np.arange(-8, 8, 0.5))
plt.axhline()
plt.axvline()
plt.grid()
# Plot the function
plt.plot(df["x"], df["y"], color='red')
axes = plt.gca()
xmin = -1
xmax = 4
ymin = -8
ymax = 100
axes.set_xlim([xmin,xmax])
axes.set_ylim([ymin,ymax])
plt.axvline(2)
plt.show()
How to center the bar plot to show the difference of a certain column?
I have the following bar plot, done with matplotlib :
Note how the barplot is really bad. The difference between each bar cant really be seen properly. So what I want, is to use the red bar as the origin in the y-axis. That way, the other bars would show the difference (blue_bar(i) - redbar).
In other words, I want the value of the red bar in the y-axis to be the y-origin of the plot.
Again, in another words, the red bar is the accuracy obtained by my academic work. I want to plot the other article results compared/ IN RELATION to mine.
I made the following picture using paint.net to illustrate what I want.
Any other ideas/suggestions are really appreciated.
Appendix :
I used the following code to produce the first graphic :
import numpy as np
import random
from matplotlib import pyplot as plt
accuracies = [0.9630, 0.9597, 0.9563, 0.9533, 0.9527, 0.9480, 0.9477, 0.9472, 0.9472, 0.9466, 0.9452, 0.9452, 0.9442,
0.9440, 0.9434, 0.9420, 0.9407, 0.9407, 0.9391, 0.9377, 0.9185, 0.9268]
sensitividades = [0.7680, 0.7200, 0.8173, 0.7569, 0.7406, 0.7354, 0.7746, 0.7344, 0.7067, 0.7410, 0.7370, 0.7321,
0.7357]
especificidades = [0.9827, 0.9733, 0.9816, 0.9807, 0.9789, 0.9724, 0.9764, 0.9801, 0.9751, 0.9521, 0.9487, 0.9694]
accuracies = [x * 100 for x in accuracies]
y = accuracies
N = len(y)
x = range(N)
width = 1 / 1.1
fig, ax = plt.subplots(1, 1)
ax.grid(zorder=0)
# Plot other articles
ax.bar(x, y, width, color="blue", zorder=3)
# Plot my work
ax.bar(x[len(x) - 1] + 1, 95.30, width, color="red", zorder=3)
plt.title('Accuracy of each article')
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.savefig('foo.png')
plt.show()
You could either set the y limits closer to the interesting values:
import numpy as np
import random
from matplotlib import pyplot as plt
accuracies = [0.9630, 0.9597, 0.9563, 0.9533, 0.9527, 0.9480, 0.9477, 0.9472, 0.9472, 0.9466, 0.9452, 0.9452, 0.9442,
0.9440, 0.9434, 0.9420, 0.9407, 0.9407, 0.9391, 0.9377, 0.9185, 0.9268]
sensitividades = [0.7680, 0.7200, 0.8173, 0.7569, 0.7406, 0.7354, 0.7746, 0.7344, 0.7067, 0.7410, 0.7370, 0.7321,
0.7357]
especificidades = [0.9827, 0.9733, 0.9816, 0.9807, 0.9789, 0.9724, 0.9764, 0.9801, 0.9751, 0.9521, 0.9487, 0.9694]
accuracies = [x * 100 for x in accuracies]
my_acc = 95.30
y = accuracies
N = len(y)
x = range(N)
width = 1 / 1.1
fig, ax = plt.subplots(1, 1)
ax.grid(zorder=0)
# Plot other articles
ax.bar(x, y, width, color="blue", zorder=3)
# Plot my work
ax.bar(x[len(x) - 1] + 1, my_acc, width, color="red", zorder=3)
plt.title('Accuracy of each article')
plt.ylim(min(y) - 0.5, max(y) +0.5)
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.savefig('foo2.png')
plt.show()
Or you could plot it around zero, with your result being the new origin (but you will have to indicate by how much you shifted the origin somewhere in the legend or somewhere else):
import numpy as np
import random
from matplotlib import pyplot as plt
accuracies = [0.9630, 0.9597, 0.9563, 0.9533, 0.9527, 0.9480, 0.9477, 0.9472, 0.9472, 0.9466, 0.9452, 0.9452, 0.9442,
0.9440, 0.9434, 0.9420, 0.9407, 0.9407, 0.9391, 0.9377, 0.9185, 0.9268]
sensitividades = [0.7680, 0.7200, 0.8173, 0.7569, 0.7406, 0.7354, 0.7746, 0.7344, 0.7067, 0.7410, 0.7370, 0.7321,
0.7357]
especificidades = [0.9827, 0.9733, 0.9816, 0.9807, 0.9789, 0.9724, 0.9764, 0.9801, 0.9751, 0.9521, 0.9487, 0.9694]
accuracies = [x * 100 for x in accuracies]
my_acc = 95.30
y = np.asarray(accuracies) - my_acc
N = len(y)
x = range(N)
width = 1 / 1.1
fig, ax = plt.subplots(1, 1)
ax.grid(zorder=0)
# Plot other articles
bars = ax.bar(x, y, width, color="blue", zorder=3)
# Plot my work
# ax.bar(x[len(x) - 1] + 1, my_acc, width, color="red", zorder=3)
plt.title('Accuracy of each article')
plt.yticks([0, -0.3, -1.3, -2.3, -3.3, 0.7, 1.7], [95.30, 95, 94, 93, 92, 96, 97])
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.ylim(min(y) - 0.5, max(y) + 0.7)
def autolabel(rects):
for i in range(len(rects)):
rect = rects[i]
height = rect.get_height()
if (height >= 0):
ax.text(rect.get_x() + rect.get_width()/2.,
0.3 + height,'[{}]'.format( i), ha='center', va='bottom',
fontsize=7.5)
if (height < 0):
ax.text(rect.get_x() + rect.get_width()/2.,
height - 0.3,'[{}]'.format( i), ha='center', va='bottom',
fontsize=7.5)
autolabel(bars)
plt.savefig('foo.png')
plt.show()
Of course, your own result would not appear in the second plot, since it would have height zero.
I actually think the way you have it represented now is actually best -- meaning there isn't a huge difference in accuracy on a cursory level.
However, if you want to set the value of the red bar as the origin, try this:
...
plt.title('Accuracy of each article')
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.ylim(95.30) # Sets the value of the red bar as the origin.
plt.savefig('foo.png')
plt.show()
Perhaps setting the minimum value of lowest accuracy of the article might make this graph more digestible.
...
plt.title('Accuracy of each article')
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.ylim(min(accuracies), 100) # Sets the value of minimum accuracy as the origin and the max value as 100.
plt.savefig('foo.png')
plt.show()
I can't get matplotlib to make a contour plot with equal x and y axes. Note that I try defining the figure with equal figsize (10,10) and also try 'equal' in the axis function. Neither work, as is show by the "circle" that is a oval:
import matplotlib.pyplot as plt
from matplotlib.colors import BoundaryNorm
from matplotlib.ticker import MaxNLocator
import numpy as np
# generate 2 2d grids for the x & y bounds
y, x = np.mgrid[slice(0, 1 + 0.1, 0.1),
slice(0, 1 + 0.1, 0.1)]
z = y + x
z = z[:-1, :-1]
levels = MaxNLocator(nbins=15).tick_values(z.min(), z.max())
# pick the desired colormap, sensible levels, and define a normalization
# instance which takes data values and translates those into levels.
plt.figure(num=None, figsize=(10,10))
cmap = plt.get_cmap('nipy_spectral')
norm = BoundaryNorm(levels, ncolors=cmap.N, clip=True)
# contours are *point* based plots, so convert our bound into point centers
plt.contour(x[:-1, :-1] + 0.1 / 2.,
y[:-1, :-1] + 0.1 / 2., z, levels=levels,
cmap=cmap, zorder=1)
plt.colorbar()
plt.axis([0, 1, 0, 1],'equal')
plt.tight_layout()
circle=plt.Circle((0.4,0.5),.1,color='k',fill=False)
plt.gca().add_artist(circle)
plt.savefig('not_square')
To get a square plot try plt.axis('square');
def f(x, y):
return x ** 2 + y ** 2
x = np.linspace(-2, 2, 50)
y = np.linspace(-2, 2, 50)
X, Y = np.meshgrid(x, y)
Z = f(X, Y)
plt.contour(X, Y, Z, colors='black');
plt.axis('square');