Matplotlib: Shared axis for imshow images - python

I'm trying to plot multiple images with Matplotlib's imshow() method, and have them share a single y axis. Although the images have the same number of y pixels, the images don't end up the same height.
Demonstration code;
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import poisson
def ibp_oneparam(alpha, N):
"""One-parameter IBP"""
# First customer
Z = np.array([np.ones(poisson(alpha).rvs(1))], dtype=int)
# ith customer
for i in range(2, N+1):
# Customer walks along previously sampled dishes
z_i = []
for previously_sampled_dish in Z.T:
m_k = np.sum(previously_sampled_dish)
if np.random.rand() >= m_k / i:
# Customer decides to sample this dish
z_i.append(1.0)
else:
# Customer decides to skip this dish
z_i.append(0.0)
# Customer decides to try some new dishes
z_i.extend(np.ones(poisson(alpha / i).rvs(1)))
z_i = np.array(z_i)
# Add this customer to Z
Z_new = np.zeros((
Z.shape[0] + 1,
max(Z.shape[1], len(z_i))
))
Z_new[0:Z.shape[0], 0:Z.shape[1]] = Z
Z = Z_new
Z[i-1, :] = z_i
return Z
np.random.seed(3)
N = 10
alpha = 2.0
#plt.figure(dpi=100)
fig, (ax1, ax2, ax3) = plt.subplots(
1,
3,
dpi=100,
sharey=True
)
Z = ibp_oneparam(alpha, N)
plt.sca(ax1)
plt.imshow(
Z,
extent=(0.5, Z.shape[1] + 0.5, len(Z) + 0.5, 0.5),
cmap='Greys_r'
)
plt.ylabel("Customers")
plt.xlabel("Dishes")
plt.xticks(range(1, Z.shape[1] + 1))
plt.yticks(range(1, Z.shape[0] + 1))
Z = ibp_oneparam(alpha, N)
plt.sca(ax2)
plt.imshow(
Z,
extent=(0.5, Z.shape[1] + 0.5, len(Z) + 0.5, 0.5),
cmap='Greys_r'
)
plt.xlabel("Dishes")
plt.xticks(range(1, Z.shape[1] + 1))
Z = ibp_oneparam(alpha, N)
plt.sca(ax3)
plt.imshow(
Z,
extent=(0.5, Z.shape[1] + 0.5, len(Z) + 0.5, 0.5),
cmap='Greys_r'
)
plt.xlabel("Dishes")
plt.xticks(range(1, Z.shape[1] + 1))
plt.show()
Output;
I expect these images to each have the same height, and have varying widths. How can I achieve this?
Aside: The code above is demonstrating the Indian Buffet Process. For the purposes of this post, consider the three images to be random binary matrices with the same number of rows, but variable numbers of columns.
Thank you,

I got a decent result with grid-spec width_ratios.
"""fig, (ax1, ax2, ax3) = plt.subplots(
1,
3,
dpi=100,
sharey=True,
constrained_layout=True
)"""
# I commented the above code and replaced with below.
import matplotlib.gridspec as gridspec
fig = plt.figure(constrained_layout=True)
gs = gridspec.GridSpec(ncols=3, nrows=1, figure=fig, width_ratios=[7./4.,1,6./4.])
ax1 = fig.add_subplot(gs[0,0])
ax2 = fig.add_subplot(gs[0,1])
ax3 = fig.add_subplot(gs[0,2])
It's some what counter intuitive that you need to use width ratios to adjust the heights but in the context of a grid with multiple rows it makes sense that you can only scale columns independently by width. and rows independently by height.
https://matplotlib.org/tutorials/intermediate/gridspec.html

Related

Rank line plot customization

Currently I'm trying to plot a graph showing the rank of some equipment in operation, the rank goes from 1 to 300 (1 is the best, 300 is the worst) over a few days (df columns). What I'm trying to do, is a graph similar to this:
And what I got is this:
I would like to make the lines inclined as it is on the first graph instead of vertical, but I can't figure it out how. I found the base for the first graph on this question here and I started the code from there, this is what I end up having:
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import numpy as np
def energy_rank(data, marker_width=.5, color='blue'):
y_data = np.repeat(data, 2)
x_data = np.empty_like(y_data)
x_data[0::2] = np.arange(1, len(data)+1) - (marker_width/2)
x_data[1::2] = np.arange(1, len(data)+1) + (marker_width/2)
lines = []
lines.append(plt.Line2D(x_data, y_data, lw=0.8, linestyle='dashed', color=color,alpha=1,marker='.'))
for x in range(0,len(data)*2, 2):
lines.append(plt.Line2D(x_data[x:x+2], y_data[x:x+2], lw=2, linestyle='solid', color=color))
return lines
head = 8
dfPlot = vazio.sort_values(dia, ascending = True).head(head)
data = dfPlot.to_numpy()
colorsHEX=('#FE5815','#001A70','#2F5C22','#B01338','#00030D','#2DE1FC','#2E020C','#B81D8C')
artists = []
for row, color in zip(data, colorsHEX):
artists.extend(energy_rank(row, color=color))
eixoXDatas = pd.to_datetime(list(vazio.columns),format='%d/%m/%y').strftime('%d/%b')
fig, ax = plt.subplots()
plt.xticks(np.arange(len(vazio.columns)),
eixoXDatas,
rotation = 35,
fontsize = 14)
plt.yticks(fontsize = 14)
plt.xlabel('Dias', fontsize=18)
plt.ylabel('Ranking', fontsize=18)
fig = plt.gcf()
fig.set_size_inches(16, 8)
for artist in artists:
ax.add_artist(artist)
ax.set_ybound([0,15])
ax.set_ylim(ax.get_ylim()[::-1])
ax.set_xbound([-0.1,float(len(vazio.columns))+2.5])
plt.yticks(np.arange(1,16,step=1))
ax.grid(axis='y',alpha=0.5)
lastDay = vazio.sort_values(vazio.iloc[:,-1:].columns.values[0], ascending = True).iloc[:,-1:]
lastDay = lastDay.head(head)
for inverter, pos in lastDay.iterrows():
ax.annotate(inverter, xy =(plt.gca().get_xlim()[1]-2.4, pos), color=colorsHEX[int(pos)-1])
I tried implementing on energy_rank function, removing the +/- parts on x_data but I only could end up with inclined lines with dots instead of the horizontal lines. Can anyone help me out how can I mantain the horziontal lines and instead of vertical dashed lines, implement inclined lines as the example above?
I imagine that is vertical because the points change on top of the x ticks. If you observe the 1st image, the horizontal bars are centralized on each x tick, so the lines "have some room" to be inclined.
vazio dataframe is as follows (contains the rank of each equipment):
Equipment 21-03-27 21-03-28 21-03-29 21-03-30 21-03-31 21-04-01 21-04-02
P01-INV-1-1 1 1 1 1 1 2 2
P01-INV-1-2 2 2 4 4 5 1 1
P01-INV-1-3 4 4 3 5 6 10 10
Here is an adaption of your energy_rank function creating horizontal line segments together with their connections. The line drawing part is inspired by this tutorial example. Optionally the area below the lines can be filled.
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import numpy as np
def energy_rank(data, marker_width=.5, color='blue', ax=None, fill=False):
ax = ax or plt.gca()
y = data
x = np.arange(1, len(data) + 1)
segments1 = np.array([x - marker_width / 2, y, x + marker_width / 2, y]).T.reshape(-1, 2, 2)
lc1 = LineCollection(segments1, color=color)
lc1.set_linewidth(2)
lc1.set_linestyle('-')
lines_hor = ax.add_collection(lc1)
segments2 = np.array([x[:-1] + marker_width / 2, y[:-1], x[1:] - marker_width / 2, y[1:]]).T.reshape(-1, 2, 2)
lc2 = LineCollection(segments2, color=color)
lc2.set_linewidth(0.5)
lc2.set_linestyle('--')
lines_connect = ax.add_collection(lc2)
if fill:
ax.fill_between(segments1.reshape(-1,2)[:,0], segments1.reshape(-1,2)[:,1],
color=color, alpha=0.05)
return lines_hor, lines_connect
fig, ax = plt.subplots()
M, N = 5, 25
y = np.random.uniform(-2, 2, (M, N)).cumsum(axis=1)
y += np.random.uniform(0.5, 2, (M, 1)) - y.min(axis=1, keepdims=True)
colorsHEX = ('#FE5815', '#001A70', '#2F5C22', '#B01338', '#00030D')
for yi, color in zip(y, colorsHEX):
energy_rank(yi, ax=ax, color=color)
ax.set_xlim(0, N + 1)
ax.set_ylim(0, y.max() + 1)
plt.show()

normal distribution curve doesn't fit well over histogram in subplots using matplotlib

I am using "plt.subplots(2, 2, sharex=True, sharey=True)" to draw a 2*2 subplots. Each subplot has two Y axis and contains normal distribution curve over a histogram. Noting I particularly set "sharex=True, sharey=True" here in order to make all subplots share the same X axis and Y axis.
After running my code, everything is fine except the second, three, and fourth subplots where the normal distribution curve doesn't fit the histogram very well (please see the figure here)
I did googling but failed to get this issue solved. However, if I set "sharex=True, sharey=False" in my code, then the figure looks correct, but all subplots use their own Y axix which isn't what I want. Please see the figure here
Hope this issue can be fixed by experts in StackOverflow. Many thanks in advance!
Below is my code:
import matplotlib.pyplot as plt
from scipy.stats import norm
def align_yaxis(ax1, v1, ax2, v2):
#adjust ax2 ylimit so that v2 in ax2 is aligned to v1 in ax1
_, y1 = ax1.transData.transform((0, v1))
_, y2 = ax2.transData.transform((0, v2))
inv = ax2.transData.inverted()
_, dy = inv.transform((0, 0)) - inv.transform((0, y1-y2))
miny, maxy = ax2.get_ylim()
ax2.set_ylim(miny+dy, maxy+dy)
def drawSingle(myax, mydf , title, offset):
num_bins = 200
xs = mydf["gap"]
x = np.linspace(-1,1,1000)
mu =np.mean(x)
sigma =np.std(xs)
n, bins, patche = myax.hist(xs, num_bins, alpha=0.8, facecolor='blue', density=False)
myax.set_ylabel('frequency',color="black",fontsize=12, weight = "bold")
myax.set_xlabel('X', fontsize=12, weight = "bold",horizontalalignment='center')
ax_twin = myax.twinx()
y_normcurve = norm.pdf(bins, mu, sigma)
ax_twin.plot(bins, y_normcurve, 'r--')
align_yaxis(myax,0,ax_twin,0)
peakpoint = norm.pdf(mu,loc=mu,scale=sigma)
plt.vlines(mu, 0, peakpoint, 'y', '--', label='example')
ax_twin.set_ylabel("probablility dense",color="black",fontsize=12, weight = "bold")
def drawSubplots(mydf1,mydf2,mydf3,mydf4, pos1,pos2,pos3,pos4, title, filename):
plt.rcParams['figure.figsize'] = (18,15 )
my_x_ticks = np.arange(-0.8, 0.8,0.1)
rows, cols = 2, 2
fig, ax = plt.subplots(2, 2, sharex=True, sharey=True)
drawSingle(ax[0][0], mydf1, "Subplot1", pos1)
drawSingle(ax[0][1], mydf2, "Subplot2", pos2)
drawSingle(ax[1][0], mydf3, "Subplot3", pos3)
drawSingle(ax[1][1], mydf4, "Subplot4", pos4)
plt.text(-1, -1, title, horizontalalignment='center', fontsize=18)
plt.show()
drawSubplots(df1, df2,df3,df4,3.2,3.1,2.7,2.85,"test9", "test9")
Here is an attempt to:
have the left y-axes being "frequency" (which is very uninformative in the case of the current bin widths) and shared among the 4 subplots
have the right y-axes be a "probability density"; note how the top of all gaussians is around y=0.02 (the twin axes can only be set at the end because the shared y axes can be updated via later subplots)
have the histogram and the normal curve aligned
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import norm
def drawSingle(myax, mydf, title):
num_bins = 200
xs = mydf["gap"]
x = np.linspace(-1, 1, 1000)
mu = np.mean(x)
sigma = np.std(xs)
n, bins, patches = myax.hist(xs, num_bins, alpha=0.8, facecolor='blue', density=False)
myax.set_ylabel('frequency', color="black", fontsize=12, weight="bold")
myax.set_xlabel('X', fontsize=12, weight="bold", horizontalalignment='center')
normalization_factor = len(xs) * (bins[1] - bins[0])
y_normcurve = norm.pdf(x, mu, sigma) * normalization_factor
myax.plot(x, y_normcurve, 'r--')
myax.vlines(mu, 0, y_normcurve.max(), 'y', '--', color='lime', label='example')
return normalization_factor
def drawSubplots(mydf1, mydf2, mydf3, mydf4, title):
plt.rcParams['figure.figsize'] = (18, 15)
fig, ax = plt.subplots(nrows=2, ncols=2, sharex=True, sharey=True)
dfs = [mydf1, mydf2, mydf3, mydf4]
norm_factors = [drawSingle(ax_i, df, title)
for ax_i, df, title in zip(ax.ravel(), dfs, ["Subplot1", "Subplot2", "Subplot3", "Subplot4"])]
for ax_i, norm_factor in zip(ax.ravel(), norm_factors):
ax_twin = ax_i.twinx()
ymax = ax_i.get_ylim()[1]
ax_twin.set_ylim(0, ymax / norm_factor)
plt.suptitle(title, fontsize=18)
plt.tight_layout()
plt.show()
df1, df2, df3, df4 = [pd.DataFrame({"gap": np.random.normal(0, 0.2, n)}) for n in [6000, 4000, 1800, 1200]]
drawSubplots(df1, df2, df3, df4, "Title")
Many thanks JohanC, you are amazing.
Based on your code, I just added a few lines of code within drawSubplots function in order to make 95% of the Gaussian curve area shaded between the lower bound and upper bound for each subplot. The following is my try. It seems that ax_twin.fill_between doesn't work normally here. As you could see from the figure that the shaded area is out of the Gaussian curve enter image description here. What I want is only to shade the area under the Gaussian curve between the lower bound and upper bound. If you don't mind, would you please check it out my mistake? Thank you very much!
import matplotlib.pyplot as plt
import math
from scipy.stats import norm
def align_yaxis(ax1, v1, ax2, v2):
#adjust ax2 ylimit so that v2 in ax2 is aligned to v1 in ax1
_, y1 = ax1.transData.transform((0, v1))
_, y2 = ax2.transData.transform((0, v2))
inv = ax2.transData.inverted()
_, dy = inv.transform((0, 0)) - inv.transform((0, y1-y2))
miny, maxy = ax2.get_ylim()
ax2.set_ylim(miny+dy, maxy+dy)
def drawSingle(myax, mydf , title):
num_bins = 200
xs = mydf["gap"]
x = np.linspace(-1,1,1000)
mu =np.mean(xs)
sigma =np.std(xs)
n, bins, patches = myax.hist(xs, num_bins, alpha=0.8, facecolor='blue', density=False)
myax.set_ylabel('Frequency', color="black", fontsize=12, weight="bold")
myax.set_xlabel(title, fontsize=12, weight="bold", horizontalalignment='center')
normalization_factor = len(xs) * (bins[1] - bins[0])
y_normcurve = norm.pdf(x, mu, sigma) * normalization_factor
myax.plot(x, y_normcurve, 'r--')
myax.vlines(mu, 0, y_normcurve.max(), 'y', '--', color='lime', label='example')
plt.xlim(-0.8,0.8)
my_x_ticks = np.arange(-0.8, 0.8,0.1)
plt.xticks(my_x_ticks)
return normalization_factor, mu, sigma
def drawSubplots(mydf1,mydf2,mydf3,mydf4, title):
plt.rcParams['figure.figsize'] = (18,15 )
norm_factors = []
mus = []
sigmas = []
my_x_ticks = np.arange(-0.8, 0.8,0.1)
rows, cols = 2, 2
fig, ax = plt.subplots(nrows=rows, ncols=cols, sharex=True, sharey=True)
dfs = [mydf1, mydf2, mydf3, mydf4]
#norm_factors = [drawSingle(ax_i, df, title)
#for ax_i, df, title in zip(ax.ravel(), dfs, ["Subplot1", "Subplot2", "Subplot3", "Subplot4"])]
for ax_i, df, title in zip(ax.ravel(), dfs, ["Subplot1", "Subplot2", "Subplot3", "Subplot4"]):
norm_factor, mu, sigma = drawSingle(ax_i, df, title)
norm_factors.append(norm_factor)
mus.append(mu)
sigmas.append(sigma)
for ax_i, norm_factor, mu, sigma in zip(ax.ravel(), norm_factors, mus, sigmas ):
ax_twin = ax_i.twinx()
xmax = ax_i.get_xlim()[1]
ax_twin.set_ylim(0, xmax / norm_factor)
ax_twin.set_ylabel("probablility dense",color="black",fontsize=12, weight = "bold")
CI_95_lower = mu - (1.96*sigma)
CI_95_upper = mu + (1.96*sigma)
px_shaded = np.arange(CI_95_lower,CI_95_upper,0.1)
ax_twin.fill_between(px_shaded,norm.pdf(px_shaded,loc=mu,scale=sigma) * norm_factor,alpha=0.75, color='pink')
area_shaded_95_CI = norm.cdf(x=CI_95_upper, loc=mu, scale=sigma)-norm.cdf(x=CI_95_lower, loc=mu, scale=sigma)
ax_twin.text(-0.06,0.01,str(round(area_shaded_95_CI*100,1))+"%", fontsize=20)
ax_twin.annotate(s=f'lower bound= {CI_95_lower:.3f}',xy=(CI_95_lower,norm.pdf(CI_95_lower,loc=mu,scale=sigma)),xytext=(-0.75,0.01),weight='bold',color='blue',\
arrowprops=dict(arrowstyle='-|>',connectionstyle='arc3',color='green'),\
fontsize=12
)
ax_twin.annotate(s=f'upper bound= {CI_95_upper:.3f}',xy=(CI_95_upper,norm.pdf(CI_95_upper,loc=mu,scale=sigma)),xytext=(0.28,0.01),weight='bold',color='blue',\
arrowprops=dict(arrowstyle='-|>',connectionstyle='arc3',color='green'),\
fontsize=12
)
ax_twin.text(0.05, 0.03, r"$\mu=" + f'{mu:.6f}' + ", \sigma=" + f'{sigma:.6f}' + "$" + ", confidence interval=95%" ,
horizontalalignment='center', fontsize=15)
plt.suptitle(title, fontsize=18)
plt.tight_layout()
plt.show()
df1, df2, df3, df4 = [pd.DataFrame({"gap": np.random.normal(0, 0.2, n)}) for n in [6000, 4000, 1800, 1200]]
drawSubplots(df1, df2, df3, df4, "Title")

Matplotlib - Maintain consistent subplot size for different layouts

I am facing what I thought would be a simple problem, but I am struggling to find a simple and scalable solution. Basically, I would like to make some figure in Matplotlib with different numbers of subplots and different layouts for each figure.
The specific requirement that I have for these figures is that I want all subplots, across all figures to have the same exact size.
The simplest solution that I have tried would be to scale the figsize according to the number of subplots that I have:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
fig, ax = plt.subplots(2, 2, figsize=(10,6))
for i in ax.flatten():
i.plot(x, y)
plt.savefig('f1.pdf')
fig, ax = plt.subplots(3, 2, figsize=(10,9))
for i in ax.flatten():
i.plot(x, y)
plt.savefig('f2.pdf')
fig, ax = plt.subplots(2, 3, figsize=(15,6))
for i in ax.flatten():
i.plot(x, y)
plt.savefig('f3.pdf')
So in the code above, for the 2x2 layout, the figsize is set at 10in x 6in and, for instance, for the 3x2 layout at 10in x 9in.
This makes the subplots in each figure be quite similar in terms of their size, but not exactly the same (I check that by importing each figure in Adobe Illustrator and checking the axes dimensions).
Is there an easy and scalable approach that I can use to ensure the same subplot size in each figure for any arbitrary number of subplots and their layout? I would assume something where instead of specifying the figsize, I set the subplot size instead, but I have not figured anything out yet...
Any help will be appreciated!
You may want to use an AxesDivider. The following example creates all axes 3.5" wide (Size.Fixed(3.5)) x 2.0" high (Size.Fixed(2)) and evenly (Size.Scaled(1) for all pads) splits the remaining space for the padding.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import Divider, Size
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
sc = Size.Scaled(1)
fh = Size.Fixed(3.5)
fv = Size.Fixed(2)
fig, ax = plt.subplots(2, 2, figsize=(10,6))
h = [sc, fh] * 2 + [sc]
v = [sc, fv] * 2 + [sc]
divider = Divider(fig, (0.0, 0.0, 1., 1.), h, v)
for i in range(2):
for j in range(2):
ax[i,j].set_axes_locator(divider.new_locator(nx=2*i+1, ny=2*j+1))
for i in ax.flatten():
i.plot(x, y)
plt.savefig('f1.pdf')
fig, ax = plt.subplots(3, 2, figsize=(10,9))
h = [sc, fh] * 2 + [sc]
v = [sc, fv] * 3 + [sc]
divider = Divider(fig, (0.0, 0.0, 1., 1.), h, v)
for i in range(3):
for j in range(2):
ax[i,j].set_axes_locator(divider.new_locator(nx=2*j+1, ny=2*i+1))
for i in ax.flatten():
i.plot(x, y)
plt.savefig('f2.pdf')
fig, ax = plt.subplots(2, 3, figsize=(15,6))
h = [sc, fh] * 3 + [sc]
v = [sc, fv] * 2 + [sc]
divider = Divider(fig, (0.0, 0.0, 1., 1.), h, v)
for i in range(2):
for j in range(3):
ax[i,j].set_axes_locator(divider.new_locator(nx=2*j+1, ny=2*i+1))
for i in ax.flatten():
i.plot(x, y)
plt.savefig('f3.pdf')

SciPy Bernoulli random number generation, different behavior within a for loop for different sample sizes

The purpose of this code is to demonstrate CLT.
If I do the following:
num_samples = 10000
sample_means = np.empty(num_samples)
for i in range(num_samples):
mean = np.mean(st.bernoulli.rvs(p=0.5, size=100))
sample_means[i] = mean
sample_demeaned = np.subtract(sample_means, 0.5)
denominator = np.divide(0.5, np.sqrt(100))
z_ed = np.divide(sample_demeaned, denominator)
plt.hist(z_ed, bins=40, edgecolor='k', density=True)
x = np.linspace(st.norm.ppf(0.001), st.norm.ppf(0.999), 10000)
y = st.norm.pdf(x)
plt.plot(x, y, color='red')
I get:
However, if I try to do it with a for loop for different sample sizes:
num_samples = 10000
sample_sizes = np.array([5, 20, 75, 100])
sample_std_means = np.empty(shape=(num_samples, len(sample_sizes)))
for col, size in enumerate(sample_sizes):
sample_means = np.empty(num_samples)
for i in range(num_samples):
mean = np.mean(st.bernoulli.rvs(p=0.5, size=size))
sample_means[i] = mean
sample_demeaned = np.subtract(sample_means, 0.5)
denominator = np.divide(0.5, np.sqrt(size))
z_ed = np.divide(sample_demeaned, denominator)
sample_std_means[:, col] = sample_means
And then plot each of them in a 2x2 grid:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 7))
x = np.linspace(st.norm.ppf(0.001), st.norm.ppf(0.999), 10000)
y = st.norm.pdf(x)
for i, ax in enumerate(axes.flatten()):
ax.hist(sample_std_means[i], bins=40, edgecolor='k', color='midnightblue')
ax.set_ylabel('Density')
ax.set_xlabel(f'n = {sample_sizes[i]}')
ax.plot(x, y, color='red')
ax.set_xlim((-3, 3))
plt.show()
I get the following image:
I cannot debug the discrepancy here. Any help is highly appreciated.
Please note that scipy.stats and numpy have been imported as st and np respectively in both code blocks.
First, note that one numpy's strong points is that it allows operations which mix arrays and single numbers. This is called broadcasting. So, for example sample_demeaned = np.subtract(sample_means, 0.5) can be written more concise as sample_demeaned = sample_means - 0.5.
Several issues are going wrong:
sample_std_means[:, col] = sample_means should use the just calculated z_ed instead of sample_means.
ax.hist(sample_std_means[i], ...) uses the i'th row of the array. That row only contains 4 elements. You'd want sample_std_means[;,i] to take the i'th column.
The pdf is drawn in its normalized form (with an area below the curve equal to one). However, the histogram's height is proportional to the number of samples. Its total area is num_samples * bin_width, where the histogram's default bin width is the length from the first to the last element divided by the number of bins. To get both the pdf and histogram with similar sizes, either the histogram should be normalized (using density=True) or the pdf should be multiplied by the expected area of the histogram.
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
num_samples = 10000
sample_sizes = np.array([5, 20, 75, 100])
sample_std_means = np.empty(shape=(num_samples, len(sample_sizes)))
for col, size in enumerate(sample_sizes):
sample_means = np.empty(num_samples)
for i in range(num_samples):
sample_means[i] = np.mean(st.bernoulli.rvs(p=0.5, size=size))
sample_demeaned = sample_means - 0.5
z_ed = sample_demeaned / (0.5 / np.sqrt(size))
sample_std_means[:, col] = z_ed
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 7))
x = np.linspace(st.norm.ppf(0.001), st.norm.ppf(0.999), 1000)
y = st.norm.pdf(x)
for i, ax in enumerate(axes.flatten()):
ax.hist(sample_std_means[:, i], bins=40, edgecolor='k', color='midnightblue', density=True)
ax.set_ylabel('Density')
ax.set_xlabel(f'n = {sample_sizes[i]}')
# bin_width = (sample_std_means[:, i].max() - sample_std_means[:, i].min()) / 40
# ax.plot(x, y * num_samples * bin_width, color='red')
ax.plot(x, y, color='red')
ax.set_xlim((-3, 3))
plt.show()
Now note the weird empty bars in the histograms. A histogram works best for continuous distributions. But the mean of n Bernoulli trials can have at most n+1 different outcomes. When all trials would be True, the mean would be n/n = 1. When all would be False, the mean would be 0. Combined, the possible means are 0, 1/n, 2/n, ..., 1. The histogram of such a discrete distribution should take these values into account for the boundaries between the bins.
The following code creates a scatter plot, using the position of the means and a random y-value to visualize how many there are per x. Also, the position of the bin boundaries is calculated and visualized by dotted vertical lines.
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 7))
for i, ax in enumerate(axes.flatten()):
ax.scatter(sample_std_means[:, i], np.random.uniform(0, 1, num_samples), color='r', alpha=0.5, lw=0, s=1)
# there are n+1 possible mean values for n bernoulli trials
# n+2 boundaries will be needed to separate the bins
bins = np.arange(-1, sample_sizes[i]+1) / sample_sizes[i]
bins += (bins[1] - bins[0]) / 2 # shift half a bin
bins -= 0.5 # subtract the mean
bins /= (0.5 / np.sqrt(sample_sizes[i])) # correction factor
for b in bins:
ax.axvline(b, color='g', ls=':')
ax.set_xlabel(f'n = {sample_sizes[i]}')
ax.set_xlim((-3, 3))
And here are the histograms using these bins:
ax.hist(sample_std_means[:, i], bins=bins, edgecolor='k', color='midnightblue', density=True)

Bulls eye plot of image

I want to plot the Bull's Eye diagram of an image. I tried these codes
Shade 'cells' in polar plot with matplotlib
For Bull's Eye diagram I want to use different colors. Is there is any way to set this colors? In color = choice(['navy','maroon','lightgreen']) colors are repeating according as for loop iterates.
Does anyone know if in matplotlib there is a function correponding to matlab bullseye()?
To provide something similar to the matlab function you mentioned, I drafted some code (shared on git hub as well) for the creation of bullseyes with any number of sector for each radial subdivision. The colour is given by the colormap and by the values below the sector. Moreover, this allows for the modification the centering of the radial subdivision.
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.ticker as ticker
def bulls_eye(ax, data, cmap=None, norm=None, raidal_subdivisions=(2, 8, 8, 11),
centered=(True, False, False, True), add_nomenclatures=True, cell_resolution=128,
pfi_where_to_save=None, colors_bound='-k'):
"""
Clockwise, from smaller radius to bigger radius.
:param ax:
:param data:
:param cmap:
:param norm:
:param raidal_subdivisions:
:param centered:
:param add_nomenclatures:
:param cell_resolution:
:param pfi_where_to_save:
:return:
"""
line_width = 1.5
data = np.array(data).ravel()
if cmap is None:
cmap = plt.cm.viridis
if norm is None:
norm = mpl.colors.Normalize(vmin=data.min(), vmax=data.max())
theta = np.linspace(0, 2*np.pi, 768)
r = np.linspace(0, 1, len(raidal_subdivisions)+1)
nomenclatures = []
if isinstance(add_nomenclatures, bool):
if add_nomenclatures:
nomenclatures = range(1, sum(raidal_subdivisions)+1)
elif isinstance(add_nomenclatures, list) or isinstance(add_nomenclatures, tuple):
assert len(add_nomenclatures) == sum(raidal_subdivisions)
nomenclatures = add_nomenclatures[:]
add_nomenclatures = True
# Create the circular bounds
line_width_circular = line_width
for i in range(r.shape[0]):
if i == range(r.shape[0])[-1]:
line_width_circular = int(line_width / 2.)
ax.plot(theta, np.repeat(r[i], theta.shape), colors_bound, lw=line_width_circular)
# iterate over cells divided by radial subdivision
for rs_id, rs in enumerate(raidal_subdivisions):
for i in range(rs):
cell_id = sum(raidal_subdivisions[:rs_id]) + i
theta_i = - i * 2 * np.pi / rs + np.pi / 2
if not centered[rs_id]:
theta_i += (2 * np.pi / rs) / 2
theta_i_plus_one = theta_i - 2 * np.pi / rs # clockwise
# Create colour fillings for each cell:
theta_interval = np.linspace(theta_i, theta_i_plus_one, cell_resolution)
r_interval = np.array([r[rs_id], r[rs_id+1]])
angle = np.repeat(theta_interval[:, np.newaxis], 2, axis=1)
radius = np.repeat(r_interval[:, np.newaxis], cell_resolution, axis=1).T
z = np.ones((cell_resolution, 2)) * data[cell_id]
ax.pcolormesh(angle, radius, z, cmap=cmap, norm=norm)
# Create radial bounds
if rs > 1:
ax.plot([theta_i, theta_i], [r[rs_id], r[rs_id+1]], colors_bound, lw=line_width)
# Add centered nomenclatures if needed
if add_nomenclatures:
if rs == 1 and rs_id ==0:
cell_center = (0, 0)
else:
cell_center = ((theta_i + theta_i_plus_one) / 2., r[rs_id] + .5 * r[1] )
if isinstance(nomenclatures[0], (int, long, float, complex)):
ax.annotate(r"${:.3g}$".format(nomenclatures[cell_id]), xy=cell_center,
xytext=(cell_center[0], cell_center[1]),
horizontalalignment='center', verticalalignment='center', size=8)
else:
ax.annotate(nomenclatures[cell_id], xy=cell_center,
xytext=(cell_center[0], cell_center[1]),
horizontalalignment='center', verticalalignment='center', size=12)
ax.grid(False)
ax.set_ylim([0, 1])
ax.set_yticklabels([])
ax.set_xticklabels([])
if pfi_where_to_save is not None:
plt.savefig(pfi_where_to_save, format='pdf', dpi=200)
def multi_bull_eyes(multi_data, cbar=None, cmaps=None, normalisations=None,
global_title=None, canvas_title='title', titles=None, units=None, raidal_subdivisions=(2, 8, 8, 11),
centered=(True, False, False, True), add_nomenclatures=(True, True, True, True),
pfi_where_to_save=None, show=True):
plt.clf()
n_fig = len(multi_data)
if cbar is None:
cbar = [True] * n_fig
if cmaps is None:
cmaps = [mpl.cm.viridis] * n_fig
if normalisations is None:
normalisations = [mpl.colors.Normalize(vmin=np.min(multi_data[i]), vmax=np.max(multi_data[i]))
for i in range(n_fig)]
if titles is None:
titles = ['Title {}'.format(i) for i in range(n_fig)]
h_space = 0.15 / n_fig
h_dim_fig = .8
w_dim_fig = .8 / n_fig
def fmt(x, pos):
# a, b = '{:.2e}'.format(x).split('e')
# b = int(b)
# return r'${} \times 10^{{{}}}$'.format(a, b)
return r"${:.4g}$".format(x)
# Make a figure and axes with dimensions as desired.
fig = plt.figure(figsize=(3 * n_fig, 4))
fig.canvas.set_window_title(canvas_title)
if global_title is not None:
plt.suptitle(global_title)
for n in range(n_fig):
origin_fig = (h_space * (n + 1) + w_dim_fig * n, 0.15)
ax = fig.add_axes([origin_fig[0], origin_fig[1], w_dim_fig, h_dim_fig], polar=True)
bulls_eye(ax, multi_data[n], cmap=cmaps[n], norm=normalisations[n], raidal_subdivisions=raidal_subdivisions,
centered=centered, add_nomenclatures=add_nomenclatures[n])
ax.set_title(titles[n], size=10)
if cbar[n]:
origin_cbar = (h_space * (n + 1) + w_dim_fig * n, .15)
axl = fig.add_axes([origin_cbar[0], origin_cbar[1], w_dim_fig, .05])
cb1 = mpl.colorbar.ColorbarBase(axl, cmap=cmaps[n], norm=normalisations[n], orientation='horizontal',
format=ticker.FuncFormatter(fmt))
cb1.ax.tick_params(labelsize=8)
if units is not None:
cb1.set_label(units[n])
if pfi_where_to_save is not None:
plt.savefig(pfi_where_to_save, format='pdf', dpi=330)
if show:
plt.show()
if __name__ == '__main__':
# Very dummy data:
data = np.array(range(29)) + 1
# TEST bull-eye three-fold
if True:
fig, ax = plt.subplots(figsize=(12, 8), nrows=1, ncols=3,
subplot_kw=dict(projection='polar'))
fig.canvas.set_window_title('Left Ventricle Bulls Eyes')
# First one:
cmap = mpl.cm.viridis
norm = mpl.colors.Normalize(vmin=1, vmax=29)
bulls_eye(ax[0], data, cmap=cmap, norm=norm)
ax[0].set_title('Bulls Eye ')
axl = fig.add_axes([0.14, 0.15, 0.2, 0.05])
cb1 = mpl.colorbar.ColorbarBase(axl, cmap=cmap, norm=norm, orientation='horizontal')
cb1.set_label('Some Units')
# Second one
cmap2 = mpl.cm.cool
norm2 = mpl.colors.Normalize(vmin=1, vmax=29)
bulls_eye(ax[1], data, cmap=cmap2, norm=norm2)
ax[1].set_title('Bulls Eye ')
axl2 = fig.add_axes([0.41, 0.15, 0.2, 0.05])
cb2 = mpl.colorbar.ColorbarBase(axl2, cmap=cmap2, norm=norm2, orientation='horizontal')
cb2.set_label('Some other units')
# Third one
cmap3 = mpl.cm.winter
norm3 = mpl.colors.Normalize(vmin=1, vmax=29)
bulls_eye(ax[2], data, cmap=cmap3, norm=norm3)
ax[2].set_title('Bulls Eye third')
axl3 = fig.add_axes([0.69, 0.15, 0.2, 0.05])
cb3 = mpl.colorbar.ColorbarBase(axl3, cmap=cmap3, norm=norm3, orientation='horizontal')
cb3.set_label('Some more units')
plt.show()
if True:
fig = plt.figure(figsize=(5, 7))
fig.canvas.set_window_title('Bulls Eyes - segmentation assessment')
# First and only:
cmap = mpl.cm.viridis
norm = mpl.colors.Normalize(vmin=1, vmax=29)
ax = fig.add_axes([0.1, 0.2, 0.8, 0.7], polar=True)
bulls_eye(ax, data, cmap=cmap, norm=norm)
ax.set_title('Bulls Eye')
axl = fig.add_axes([0.1, 0.15, 0.8, 0.05])
cb1 = mpl.colorbar.ColorbarBase(axl, cmap=cmap, norm=norm, orientation='horizontal')
cb1.set_label('Some Units')
plt.show()
if True:
multi_data = [range(1,17), list( 0.000000001 * np.array(range(1,17))), list( 0.001 * np.array(range(1,17)))]
print multi_data
multi_bull_eyes(multi_data, raidal_subdivisions=(3,3,4,6),
centered=(True, True, True, True), add_nomenclatures=[True]*3)
plt.show(block=True)
It is proposed within my code LabelsManager on github, that you are welcome to use.
Here is an example, for raidal_subdivisions=(2, 8, 8, 11), and centered=(True, False, False, True):
I don't think that there is a specific bullseye method in matplotlib, but comparing what the matlab function does and the right plot in this answer, I think that you should be able to get what you want playing a bit with the radius coordinates. E.g. (borrowing from the aforesaid answer)
import numpy as np
import matplotlib.pyplot as plt
theta, r = np.mgrid[0:2*np.pi:20j, 0.2:1:10j]
z = np.random.random(theta.size).reshape(theta.shape)
fig, ax = plt.subplots(ncols=1, subplot_kw=dict(projection='polar'))
ax.pcolormesh(theta, r, z)
ax.set_yticklabels([])
ax.set_ylim([0, 1])
plt.show()
plots
I don't understand exactly what you mean with the question about the colors. ax.pcolormesh accept either a colormap or a matplotlib color. If you want specific colors you can play with those two parameters and/or create your own colormap.

Categories

Resources