Divide cell of heatmap in multiple rows - python

i'm working with a heatmap, for each cell have 3 rows of data, and now, i want to divide each cell in 3 rows, one for each row of data,
that way, each row will have its own color according to the value
I was trying with the following link, but I have no success dividing into 3 rows:
How to create a heatmap where each cell is divided into 4 triangles?
for that reason I go to this space in search of help to be able to make this modification, I include the code that I have modified, which belongs to the link that I mentioned before,
from matplotlib import pyplot as plt
import numpy as np
M, N = 4,4
values = np.random.uniform(9, 10, (N * 1, M * 2))
fig, ax = plt.subplots()
#ax.imshow(values, extent=[-0.5, M - 0.5, N - 0.5,-0.5], cmap='autumn_r')
ax.imshow(values, extent=[-0.5,M - 0.5, N - 0.5,-0.5], cmap='autumn_r')
ax.set_xticks(np.arange(0, 4))
ax.set_xticks(np.arange(-0.5, M), minor=True)
ax.set_yticks(np.arange(0, 4))
ax.set_yticks(np.arange(-0.5, N), minor=True)
ax.grid(which='minor', lw=6, color='w', clip_on=True)
ax.grid(which='major', lw=2, color='w', clip_on=True)
ax.tick_params(length=0)
for s in ax.spines:
ax.spines[s].set_visible(True)
plt.show()
I appreciate all the help, regards!

When dividing the cells into 2, major tick positions can be used both to set the labels and position subdivision lines. To divide into 3 or more, it probably is easier to explicitly draw horizontal and vertical lines.
Here is some example code:
from matplotlib import pyplot as plt
import numpy as np
M, N = 4, 4 # M columns and N rows of large cells
K, L = 1, 3 # K columns and L rows to subdivide each of the cells
values = np.random.uniform(9, 10, (N * L, M * K))
fig, ax = plt.subplots()
ax.imshow(values, extent=[-0.5, M - 0.5, N - 0.5, -0.5], cmap='autumn_r')
# positions for the labels
ax.set_xticks(np.arange(0, M))
ax.set_yticks(np.arange(0, N))
# thin lines between the sub cells
for i in range(M):
for j in range(1, K):
ax.axvline(i - 0.5 + j / K, color='white', lw=2)
for i in range(N):
for j in range(1, L):
ax.axhline(i - 0.5 + j / L, color='white', lw=2)
# thick line between the large cells
# use clip_on=False and hide the spines to avoid that the border cells look different
for i in range(M + 1):
ax.axvline(i - 0.5, color='skyblue', lw=4, clip_on=False)
for i in range(N + 1):
ax.axhline(i - 0.5, color='skyblue', lw=4, clip_on=False)
ax.tick_params(length=0)
for s in ax.spines:
ax.spines[s].set_visible(False)
plt.show()

Related

Taking quartile values of an list of list, and plotting them

My code below, when ran, plots 100 curves on a plot for random values of a parameter in an ODE system. However, I am trying to take a 'cross section' of every time point, so I'd have all 100 values at that point, and then take the upper and lower quartiles and median of those values (which in the code would be an array or list), and plot those:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
import random
N = 1000
I0, R0 = 1, 0
S0 = N - I0 - R0
J0 = I0
beta, gamma = 2/7, 1/7
t = np.linspace(0, 100,100)
empty = []
for i in range(100):
empty.append(random.uniform(1.5, 2.5)*gamma)
def deriv(y, t, N, beta, gamma):
S, I, R, J = y
dS = ((-beta * S * I) / N)
dI = ((beta * S * I) / N) - (gamma * I)
dR = (gamma * I)
dJ = ((beta * S * I) / N)
return dS, dI, dR, dJ
solns = []
for empt in empty:
ces = odeint(deriv, (S0, I0, R0, J0), t, args=(N, empt, gamma))
solns.append(ces)
J_diffs = []
for sol in solns:
S, I, R, J = sol.T
J_diffs.append(np.diff(J))
fig = plt.figure(facecolor='w')
ax = fig.add_subplot(111, facecolor='#dddddd', axisbelow=True)
ax.set_xlabel('Time in days')
ax.set_ylabel('Number')
ax.grid(b=True, which='major', c='w', lw=2, ls='-')
xcoords = [2.5, 97.5]
for J_diff in J_diffs:
ax.plot(t[1:], J_diff, 'blue', alpha=1, lw=2)
What I have tried, is using:
for val in solns:
ax.plot(t[1:], np.percentile(val,25), 'blue', alpha=1, lw=2)
but this is an error for dimensions as it only retrieves 1 value in solns, not all of them.
I then tried
for t in solns:
ax.plot(t[1:], np.percentile(val,25), 'blue', alpha=1, lw=2)
as I thought it makes more sense to select the timepoints corresponding to each value, but this returns the same error. I believe I am missing the logical approach on how to achieve this. The end result is to have 3 curves, one displaying a curve made of the 97.5 percentile of all values, one for the 2.5, and one for 0.5th. Where Have I gone wrong in my approach?
You can find the percentiles first, then plot them. Use something like this:
def plot_percentile(data, percentile):
percentiles = [np.percentile(val, percentile) for val in data]
ax.plot(t, percentiles, 'blue', alpha=1, lw=2)
plot_percentile(solns, 97.5)
plot_percentile(solns, 2.5)
plot_percentile(solns, 0.5)
This code put all three curves on one graph, which isn't what you wanted, but I think you could adapt it from here (use subplots, and pass the subplot index to plot_percentile() as an argument or something similar).

How to distribute points evenly on a circle in matplotlib?

I'm trying to create 10 points distributed on a circle, just like a watch but with 10 numbers instead of 12. and also be able to use the dots/points to plot lines between them.
This is the code I used to create a circle but can't figure out how to make them 10 points, and also how to use the points' coordinates in my code.
import matplotlib.pyplot as plt
import numpy as np
# T = testing
# myList = testing.digitcounter(18, 20)
def circle_points(r, n):
circles = []
for r, n in zip(r, n):
t = np.linspace(0, 2*np.pi, n)
x = r * np.cos(t)
y = r * np.sin(t)
circles.append(np.c_[x, y])
return circles
r = [0.15]
n = [15]
circles = circle_points(r, n)
fig, ax = plt.subplots()
for circle in circles:
ax.scatter(circle[:, 0], circle[:, 1])
ax.set_aspect('equal')
plt.show()
With t = np.linspace(0, 2*np.pi, 10, endpoint=False), you can create 10 angles equally distributed over a circle. The default endpoint=True would have the first and last angle coincide. To have the first point at the top, and going clockwise, interchange cos and sin in the formula.
To plot a continuous line between digits, you could use ax.plot(circle[digits, 0], circle[digits, 1]), with digits a numpy array of integers between 0 and 1. Note that this will contain a line of zero length when two subsequent digits would be equal.
import matplotlib.pyplot as plt
import numpy as np
def circle_points(r, n):
circles = []
for r, n in zip(r, n):
t = np.linspace(0, 2 * np.pi, n, endpoint=False)
x = r * np.sin(t)
y = r * np.cos(t)
circles.append(np.c_[x, y])
return circles
digits = np.random.randint(0, 10, 7) # 7 random digits between 0 and 9
circle10 = circle_points([0.15], [10])[0] # 10 points on a circle
circle10 = circle10[(np.arange(10) - 3) % 10, :] # with 3 at the top
fig, ax = plt.subplots()
ax.scatter(circle10[:, 0], circle10[:, 1], color='crimson')
ax.plot(circle10[digits, 0], circle10[digits, 1], color='dodgerblue', lw=3)
for i, (x, y) in enumerate(circle10):
ax.text(x * 1.1, y * 1.1, i, ha='center', va='center', color='crimson')
ax.set_aspect('equal')
ax.margins(x=0.1, y=0.1) # extra margins, because the text isn't taken into account for the default margins
ax.set_title("".join([f'{d}' for d in digits]), size=16)
ax.axis('off')
plt.show()
Similarly, a 17-pointed star could be drawn with:
N = 17
circle = circle_points([0.15], [N])[0]
for i in range(N):
ax.plot([circle[i, 0], circle[(i + 6) % N, 0]],
[circle[i, 1], circle[(i + 6) % N, 1]],
color='dodgerblue', lw=3)

Matplotlib - Maintain consistent subplot size for different layouts

I am facing what I thought would be a simple problem, but I am struggling to find a simple and scalable solution. Basically, I would like to make some figure in Matplotlib with different numbers of subplots and different layouts for each figure.
The specific requirement that I have for these figures is that I want all subplots, across all figures to have the same exact size.
The simplest solution that I have tried would be to scale the figsize according to the number of subplots that I have:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
fig, ax = plt.subplots(2, 2, figsize=(10,6))
for i in ax.flatten():
i.plot(x, y)
plt.savefig('f1.pdf')
fig, ax = plt.subplots(3, 2, figsize=(10,9))
for i in ax.flatten():
i.plot(x, y)
plt.savefig('f2.pdf')
fig, ax = plt.subplots(2, 3, figsize=(15,6))
for i in ax.flatten():
i.plot(x, y)
plt.savefig('f3.pdf')
So in the code above, for the 2x2 layout, the figsize is set at 10in x 6in and, for instance, for the 3x2 layout at 10in x 9in.
This makes the subplots in each figure be quite similar in terms of their size, but not exactly the same (I check that by importing each figure in Adobe Illustrator and checking the axes dimensions).
Is there an easy and scalable approach that I can use to ensure the same subplot size in each figure for any arbitrary number of subplots and their layout? I would assume something where instead of specifying the figsize, I set the subplot size instead, but I have not figured anything out yet...
Any help will be appreciated!
You may want to use an AxesDivider. The following example creates all axes 3.5" wide (Size.Fixed(3.5)) x 2.0" high (Size.Fixed(2)) and evenly (Size.Scaled(1) for all pads) splits the remaining space for the padding.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import Divider, Size
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
sc = Size.Scaled(1)
fh = Size.Fixed(3.5)
fv = Size.Fixed(2)
fig, ax = plt.subplots(2, 2, figsize=(10,6))
h = [sc, fh] * 2 + [sc]
v = [sc, fv] * 2 + [sc]
divider = Divider(fig, (0.0, 0.0, 1., 1.), h, v)
for i in range(2):
for j in range(2):
ax[i,j].set_axes_locator(divider.new_locator(nx=2*i+1, ny=2*j+1))
for i in ax.flatten():
i.plot(x, y)
plt.savefig('f1.pdf')
fig, ax = plt.subplots(3, 2, figsize=(10,9))
h = [sc, fh] * 2 + [sc]
v = [sc, fv] * 3 + [sc]
divider = Divider(fig, (0.0, 0.0, 1., 1.), h, v)
for i in range(3):
for j in range(2):
ax[i,j].set_axes_locator(divider.new_locator(nx=2*j+1, ny=2*i+1))
for i in ax.flatten():
i.plot(x, y)
plt.savefig('f2.pdf')
fig, ax = plt.subplots(2, 3, figsize=(15,6))
h = [sc, fh] * 3 + [sc]
v = [sc, fv] * 2 + [sc]
divider = Divider(fig, (0.0, 0.0, 1., 1.), h, v)
for i in range(2):
for j in range(3):
ax[i,j].set_axes_locator(divider.new_locator(nx=2*j+1, ny=2*i+1))
for i in ax.flatten():
i.plot(x, y)
plt.savefig('f3.pdf')

SciPy Bernoulli random number generation, different behavior within a for loop for different sample sizes

The purpose of this code is to demonstrate CLT.
If I do the following:
num_samples = 10000
sample_means = np.empty(num_samples)
for i in range(num_samples):
mean = np.mean(st.bernoulli.rvs(p=0.5, size=100))
sample_means[i] = mean
sample_demeaned = np.subtract(sample_means, 0.5)
denominator = np.divide(0.5, np.sqrt(100))
z_ed = np.divide(sample_demeaned, denominator)
plt.hist(z_ed, bins=40, edgecolor='k', density=True)
x = np.linspace(st.norm.ppf(0.001), st.norm.ppf(0.999), 10000)
y = st.norm.pdf(x)
plt.plot(x, y, color='red')
I get:
However, if I try to do it with a for loop for different sample sizes:
num_samples = 10000
sample_sizes = np.array([5, 20, 75, 100])
sample_std_means = np.empty(shape=(num_samples, len(sample_sizes)))
for col, size in enumerate(sample_sizes):
sample_means = np.empty(num_samples)
for i in range(num_samples):
mean = np.mean(st.bernoulli.rvs(p=0.5, size=size))
sample_means[i] = mean
sample_demeaned = np.subtract(sample_means, 0.5)
denominator = np.divide(0.5, np.sqrt(size))
z_ed = np.divide(sample_demeaned, denominator)
sample_std_means[:, col] = sample_means
And then plot each of them in a 2x2 grid:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 7))
x = np.linspace(st.norm.ppf(0.001), st.norm.ppf(0.999), 10000)
y = st.norm.pdf(x)
for i, ax in enumerate(axes.flatten()):
ax.hist(sample_std_means[i], bins=40, edgecolor='k', color='midnightblue')
ax.set_ylabel('Density')
ax.set_xlabel(f'n = {sample_sizes[i]}')
ax.plot(x, y, color='red')
ax.set_xlim((-3, 3))
plt.show()
I get the following image:
I cannot debug the discrepancy here. Any help is highly appreciated.
Please note that scipy.stats and numpy have been imported as st and np respectively in both code blocks.
First, note that one numpy's strong points is that it allows operations which mix arrays and single numbers. This is called broadcasting. So, for example sample_demeaned = np.subtract(sample_means, 0.5) can be written more concise as sample_demeaned = sample_means - 0.5.
Several issues are going wrong:
sample_std_means[:, col] = sample_means should use the just calculated z_ed instead of sample_means.
ax.hist(sample_std_means[i], ...) uses the i'th row of the array. That row only contains 4 elements. You'd want sample_std_means[;,i] to take the i'th column.
The pdf is drawn in its normalized form (with an area below the curve equal to one). However, the histogram's height is proportional to the number of samples. Its total area is num_samples * bin_width, where the histogram's default bin width is the length from the first to the last element divided by the number of bins. To get both the pdf and histogram with similar sizes, either the histogram should be normalized (using density=True) or the pdf should be multiplied by the expected area of the histogram.
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
num_samples = 10000
sample_sizes = np.array([5, 20, 75, 100])
sample_std_means = np.empty(shape=(num_samples, len(sample_sizes)))
for col, size in enumerate(sample_sizes):
sample_means = np.empty(num_samples)
for i in range(num_samples):
sample_means[i] = np.mean(st.bernoulli.rvs(p=0.5, size=size))
sample_demeaned = sample_means - 0.5
z_ed = sample_demeaned / (0.5 / np.sqrt(size))
sample_std_means[:, col] = z_ed
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 7))
x = np.linspace(st.norm.ppf(0.001), st.norm.ppf(0.999), 1000)
y = st.norm.pdf(x)
for i, ax in enumerate(axes.flatten()):
ax.hist(sample_std_means[:, i], bins=40, edgecolor='k', color='midnightblue', density=True)
ax.set_ylabel('Density')
ax.set_xlabel(f'n = {sample_sizes[i]}')
# bin_width = (sample_std_means[:, i].max() - sample_std_means[:, i].min()) / 40
# ax.plot(x, y * num_samples * bin_width, color='red')
ax.plot(x, y, color='red')
ax.set_xlim((-3, 3))
plt.show()
Now note the weird empty bars in the histograms. A histogram works best for continuous distributions. But the mean of n Bernoulli trials can have at most n+1 different outcomes. When all trials would be True, the mean would be n/n = 1. When all would be False, the mean would be 0. Combined, the possible means are 0, 1/n, 2/n, ..., 1. The histogram of such a discrete distribution should take these values into account for the boundaries between the bins.
The following code creates a scatter plot, using the position of the means and a random y-value to visualize how many there are per x. Also, the position of the bin boundaries is calculated and visualized by dotted vertical lines.
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 7))
for i, ax in enumerate(axes.flatten()):
ax.scatter(sample_std_means[:, i], np.random.uniform(0, 1, num_samples), color='r', alpha=0.5, lw=0, s=1)
# there are n+1 possible mean values for n bernoulli trials
# n+2 boundaries will be needed to separate the bins
bins = np.arange(-1, sample_sizes[i]+1) / sample_sizes[i]
bins += (bins[1] - bins[0]) / 2 # shift half a bin
bins -= 0.5 # subtract the mean
bins /= (0.5 / np.sqrt(sample_sizes[i])) # correction factor
for b in bins:
ax.axvline(b, color='g', ls=':')
ax.set_xlabel(f'n = {sample_sizes[i]}')
ax.set_xlim((-3, 3))
And here are the histograms using these bins:
ax.hist(sample_std_means[:, i], bins=bins, edgecolor='k', color='midnightblue', density=True)

Matplotlib: Shared axis for imshow images

I'm trying to plot multiple images with Matplotlib's imshow() method, and have them share a single y axis. Although the images have the same number of y pixels, the images don't end up the same height.
Demonstration code;
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import poisson
def ibp_oneparam(alpha, N):
"""One-parameter IBP"""
# First customer
Z = np.array([np.ones(poisson(alpha).rvs(1))], dtype=int)
# ith customer
for i in range(2, N+1):
# Customer walks along previously sampled dishes
z_i = []
for previously_sampled_dish in Z.T:
m_k = np.sum(previously_sampled_dish)
if np.random.rand() >= m_k / i:
# Customer decides to sample this dish
z_i.append(1.0)
else:
# Customer decides to skip this dish
z_i.append(0.0)
# Customer decides to try some new dishes
z_i.extend(np.ones(poisson(alpha / i).rvs(1)))
z_i = np.array(z_i)
# Add this customer to Z
Z_new = np.zeros((
Z.shape[0] + 1,
max(Z.shape[1], len(z_i))
))
Z_new[0:Z.shape[0], 0:Z.shape[1]] = Z
Z = Z_new
Z[i-1, :] = z_i
return Z
np.random.seed(3)
N = 10
alpha = 2.0
#plt.figure(dpi=100)
fig, (ax1, ax2, ax3) = plt.subplots(
1,
3,
dpi=100,
sharey=True
)
Z = ibp_oneparam(alpha, N)
plt.sca(ax1)
plt.imshow(
Z,
extent=(0.5, Z.shape[1] + 0.5, len(Z) + 0.5, 0.5),
cmap='Greys_r'
)
plt.ylabel("Customers")
plt.xlabel("Dishes")
plt.xticks(range(1, Z.shape[1] + 1))
plt.yticks(range(1, Z.shape[0] + 1))
Z = ibp_oneparam(alpha, N)
plt.sca(ax2)
plt.imshow(
Z,
extent=(0.5, Z.shape[1] + 0.5, len(Z) + 0.5, 0.5),
cmap='Greys_r'
)
plt.xlabel("Dishes")
plt.xticks(range(1, Z.shape[1] + 1))
Z = ibp_oneparam(alpha, N)
plt.sca(ax3)
plt.imshow(
Z,
extent=(0.5, Z.shape[1] + 0.5, len(Z) + 0.5, 0.5),
cmap='Greys_r'
)
plt.xlabel("Dishes")
plt.xticks(range(1, Z.shape[1] + 1))
plt.show()
Output;
I expect these images to each have the same height, and have varying widths. How can I achieve this?
Aside: The code above is demonstrating the Indian Buffet Process. For the purposes of this post, consider the three images to be random binary matrices with the same number of rows, but variable numbers of columns.
Thank you,
I got a decent result with grid-spec width_ratios.
"""fig, (ax1, ax2, ax3) = plt.subplots(
1,
3,
dpi=100,
sharey=True,
constrained_layout=True
)"""
# I commented the above code and replaced with below.
import matplotlib.gridspec as gridspec
fig = plt.figure(constrained_layout=True)
gs = gridspec.GridSpec(ncols=3, nrows=1, figure=fig, width_ratios=[7./4.,1,6./4.])
ax1 = fig.add_subplot(gs[0,0])
ax2 = fig.add_subplot(gs[0,1])
ax3 = fig.add_subplot(gs[0,2])
It's some what counter intuitive that you need to use width ratios to adjust the heights but in the context of a grid with multiple rows it makes sense that you can only scale columns independently by width. and rows independently by height.
https://matplotlib.org/tutorials/intermediate/gridspec.html

Categories

Resources