Adding colorbars to clustered heatmaps - python

I am trying to replicate this type of plot (heatmap with colorbars as leaves)
This is what I've done so far
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as ssd
#read data
fid_df = pd.read_csv(fid_file, index_col=[0])
# scale data
def scale(x):
return np.math.log2(x+1)
fid_df = fid_df.applymap(scale)
# clustering colums
data_1D_X = ssd.pdist(fid_df.T, 'euclidean')
X = sch.linkage(data_1D_X, method='ward')
# clustering rows
data_1D_Y = ssd.pdist(fid_df, 'cityblock')
Y = linkage(data_1D_Y, method='ward')
#plot first dendrogram
fig = plt.figure(figsize=(8, 8))
ax1 = fig.add_axes([0.09, 0.1, 0.2, 0.6])
Z1 = sch.dendrogram(Y, orientation='left')
ax1.set_xticks([])
ax1.set_yticks([])
# second dendrogram.
ax2 = fig.add_axes([0.3, 0.71, 0.6, 0.2])
Z2 = sch.dendrogram(X)
ax2.set_xticks([])
ax2.set_yticks([])
# plot matrix
axmatrix = fig.add_axes([0.3, 0.1, 0.6, 0.6])
# sorts based of clustering
idx1 = Z1['leaves']
idx2 = Z2['leaves']
D = fid_df.values[idx1, :]
D = D[:, idx2]
im = axmatrix.matshow(D, aspect='auto', origin='lower', cmap=plt.cm.YlGnBu)
axmatrix.set_xticks([])
axmatrix.set_yticks([])
Example:
However, I need to add colorbars that would show the initial groups of rows and columns. Any idea how to do this?

Something like this?
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
ax1 = fig.add_axes((0, 0, 1, 0.9))
ax2 = fig.add_axes((0, 0.9, 1, 0.1))
gridY, gridX = np.mgrid[0:10:11 * 1j, 0:10:11 * 1j]
ax1.pcolormesh(gridX, gridY, np.sqrt(gridX ** 2 + gridY ** 2))
randCol = ['red', 'blue']
for value in np.linspace(0, 10, 1001):
ax2.axvline(value, color=randCol[np.random.default_rng().integers(2)])
ax2.set_xlim((0, 10))
ax2.tick_params(labelbottom=False, bottom=False, labelleft=False, left=False)
fig.savefig('so.png', bbox_inches='tight')

Related

Integrating a histogram in a bootstrap simulation graph

I have a dataframe with 1000 simulations of a portfolio's returns. I am able to graph the simulations and do the respective histogram separately, but I have absolutely no idea how to merge them in order to resemble the following image:
please take this example of data in order to facilitate answers:
import numpy as np
import pandas as pd
def simulate_panel(T, N):
"""" This function simulates return paths"""
dates = pd.date_range("20210218", periods=T, freq='D')
columns = []
for i in range(N):
columns.append(str(i+1))
return pd.DataFrame(np.random.normal(0, 0.01, size=(T, N)), index=dates,
columns=columns)
df=(1+simulate_panel(1000,1000)).cumprod()
df.plot(figsize=(8,6),title=('Bootstrap'), legend=False)
Thank you very much in advance.
To color the curves via their last value, they can be drawn one-by-one. With a colormap and a norm, the value can be converted to the appropriate color. Using some transparency (alpha), the most visited positions will be colored stronger.
In a second subplot, a vertical histogram can be drawn, with the bars colored similarly.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def simulate_panel(T, N):
"""" This function simulates return paths"""
dates = pd.date_range("20210218", periods=T, freq='D')
columns = [(str(i + 1)) for i in range(N)]
return pd.DataFrame(np.random.normal(0, 0.01, size=(T, N)), index=dates, columns=columns)
df = (1 + simulate_panel(1000, 1000)).cumprod()
fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True, figsize=(12, 4),
gridspec_kw={'width_ratios': [5, 1], 'wspace': 0})
data = df.to_numpy().T
cmap = plt.cm.get_cmap('turbo')
norm = plt.Normalize(min(data[:, -1]), max(data[:, -1]))
for row in data:
ax1.plot(df.index, row, c=cmap(norm(row[-1])), alpha=0.1)
ax1.margins(x=0)
_, bin_edges, bars = ax2.hist(data[:, -1], bins=20, orientation='horizontal')
for x0, x1, bar in zip(bin_edges[:-1], bin_edges[1:], bars):
bar.set_color(cmap(norm((x0 + x1) / 2)))
ax2.tick_params(left=False)
plt.tight_layout()
plt.show()
You can use GridSpec to set up axes for line chart and the histogram next to each other:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
# layout
fig = plt.figure()
gs = fig.add_gridspec(1, 2, wspace=0, width_ratios=[9, 1])
ax = gs.subplots(sharey=True)
# line chart
z = df.iloc[-1]
df.plot(figsize=(8,6), title=('Bootstrap'), legend=False, ax=ax[0],
color=cm.RdYlBu_r((z - z.min()) / (z.max() - z.min())))
# histogram
n_bins = 20
cnt, bins, patches = ax[1].hist(
z, np.linspace(z.min(), z.max(), n_bins),
ec='k', orientation='horizontal')
colors = cm.RdYlBu_r((bins - z.min()) / (z.max() - z.min()))
for i, p in enumerate(patches):
p.set_color(colors[i])

matplotlib.widgets.Slider with fill_between

I'm trying to Add the slider in the plot similar to the slider demo example.
I'm plotting fill_between which gives PolyCollection object.
Although I tried with plot too which give Line2D object as shown picture below, but plot doesn't update as expected as in demo.
code
import numpy as np
import scipy.stats as ss
import matplotlib.pyplot as plt
import matplotlib.widgets as widgets
def get_pdf(mu, sigma=1, offset=4):
o = sigma * offset
x = np.linspace(mu - o, mu + o, 100)
rv = ss.norm(mu, sigma)
return x, rv.pdf(x)
fig, ax = plt.subplots()
plt.subplots_adjust(bottom=0.25)
ax.fill_between(*get_pdf(0, 1), alpha=0.7)
# t = plt.fill_between(*get_pdf(2, 1), alpha=0.7) # this gives ployCollection
t = ax.plot(*get_pdf(2, 1), label='treatment', alpha=0.7)
a = plt.axes([0.25, 0.1, 0.5, 0.03])
slider = widgets.Slider(a, "shift", 0, 10, valinit=2, valstep=1)
def update(val):
x, y = get_pdf(val)
t[0].set_ydata(y)
fig.canvas.draw_idle()
slider.on_changed(update)
plt.show()
To update the line plot, t[0].set_xdata(x) needs to be set, as it is different for each call. In this particular case, get_pdf each time returns the same y.
Updating the coordinates of the polyCollection generated by fill_between doesn't seem to be possible. However, you can delete and recreate it at every update. Note that this is slower than just updating the coordinates.
import numpy as np
import scipy.stats as ss
import matplotlib.pyplot as plt
import matplotlib.widgets as widgets
def get_pdf(mu, sigma=1, offset=4):
o = sigma * offset
x = np.linspace(mu - o, mu + o, 100)
rv = ss.norm(mu, sigma)
return x, rv.pdf(x)
fig, ax = plt.subplots()
plt.subplots_adjust(bottom=0.25)
ax.fill_between(*get_pdf(0, 1), alpha=0.7)
t = ax.fill_between(*get_pdf(2), color='crimson', alpha=0.7)
a = plt.axes([0.25, 0.1, 0.5, 0.03])
slider = widgets.Slider(a, "shift", 0, 10, valinit=2, valstep=1)
def update(val):
global t
t.remove()
t = ax.fill_between(*get_pdf(val), color='crimson', alpha=0.7)
fig.canvas.draw_idle()
slider.on_changed(update)
plt.show()

Plotting two cross section intensity at the same time in one figure

I have an array of shape(512,512).
Looks like, (row=x, column=y, density=z=the number of the array)
[[0.012825 0.020408 0.022976 ... 0.015938 0.02165 0.024357]
[0.036332 0.031904 0.025462 ... 0.031095 0.019812 0.024523]
[0.015831 0.027392 0.031939 ... 0.016249 0.01697 0.028686]
...
[0.024545 0.011895 0.022235 ... 0.033226 0.03223 0.030235]]
I had already drawn it into a 2D density plot. My goal is to find the center of the circle and draw a vertical and horizontal cross-section in one figure.
Now, I have the trouble to find the center of the circle and combine two cross-sections in one figure.
Please help.
This is my code:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.ndimage
data = pd.read_csv('D:/BFP.csv', header=None)
# create data
data = np.array(data)
print(data)
#plot data
side = np.linspace(-1.5,1.5,512)
x,y = np.meshgrid(side,side)
z = [[data[i][j] for i in range(len(data[0]))]for j in range(len(data))]
#-- Extract the line...
# Make a line with "num" points...
x0, y0 = 270, 0 # These are in _pixel_ coordinates!!
x1, y1 = 270, 500
num = 512
x_, y_ = np.linspace(x0, x1, num), np.linspace(y0, y1, num)
# Extract the values along the line, using cubic interpolation
zi = scipy.ndimage.map_coordinates(z, np.vstack((x_,y_)))
#-- Plot...
fig, axes = plt.subplots(nrows=2)
axes[0].imshow(z,origin='lower')
axes[0].plot([x0, x1], [y0, y1], 'ro-')
#axes[0].axis('image')
axes[1].plot(zi)
plt.savefig('D:/vertical.png')
plt.show()
image here:
I cannot help you with finding the center of the circle, but you can create a nice visualization of the cross section by creating 3 axes in a grid. Usually, I would use GridSpec for this, but imhsow has a tendency to mess up the relative size of the axes to maintain square pixels. Thankfully, the AxesGrid toolkit can help.
The base of the code is inspired by this matplotlib example.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.stats import multivariate_normal
import scipy
fig, main_ax = plt.subplots(figsize=(5, 5))
divider = make_axes_locatable(main_ax)
top_ax = divider.append_axes("top", 1.05, pad=0.1, sharex=main_ax)
right_ax = divider.append_axes("right", 1.05, pad=0.1, sharey=main_ax)
# make some labels invisible
top_ax.xaxis.set_tick_params(labelbottom=False)
right_ax.yaxis.set_tick_params(labelleft=False)
main_ax.set_xlabel('dim 1')
main_ax.set_ylabel('dim 2')
top_ax.set_ylabel('Z profile')
right_ax.set_xlabel('Z profile')
x, y = np.mgrid[-1:1:.01, -1:1:.01]
pos = np.empty(x.shape + (2,))
pos[:, :, 0] = x; pos[:, :, 1] = y
rv = multivariate_normal([-0.2, 0.2], [[1, 1.5], [0.25, 0.25]])
z = rv.pdf(pos)
z_max = z.max()
cur_x = 110
cur_y = 40
main_ax.imshow(z, origin='lower')
main_ax.autoscale(enable=False)
right_ax.autoscale(enable=False)
top_ax.autoscale(enable=False)
right_ax.set_xlim(right=z_max)
top_ax.set_ylim(top=z_max)
v_line = main_ax.axvline(cur_x, color='r')
h_line = main_ax.axhline(cur_y, color='g')
v_prof, = right_ax.plot(z[:,int(cur_x)],np.arange(x.shape[1]), 'r-')
h_prof, = top_ax.plot(np.arange(x.shape[0]),z[int(cur_y),:], 'g-')
plt.show()
Just for fun, you can even make it interactive
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.stats import multivariate_normal
import scipy
fig, main_ax = plt.subplots(figsize=(5, 5))
divider = make_axes_locatable(main_ax)
top_ax = divider.append_axes("top", 1.05, pad=0.1, sharex=main_ax)
right_ax = divider.append_axes("right", 1.05, pad=0.1, sharey=main_ax)
# make some labels invisible
top_ax.xaxis.set_tick_params(labelbottom=False)
right_ax.yaxis.set_tick_params(labelleft=False)
main_ax.set_xlabel('dim 1')
main_ax.set_ylabel('dim 2')
top_ax.set_ylabel('Z profile')
right_ax.set_xlabel('Z profile')
x, y = np.mgrid[-1:1:.01, -1:1:.01]
pos = np.empty(x.shape + (2,))
pos[:, :, 0] = x; pos[:, :, 1] = y
rv = multivariate_normal([-0.2, 0.2], [[1, 1.5], [0.25, 0.25]])
z = rv.pdf(pos)
z_max = z.max()
main_ax.imshow(z, origin='lower')
main_ax.autoscale(enable=False)
right_ax.autoscale(enable=False)
top_ax.autoscale(enable=False)
right_ax.set_xlim(right=z_max)
top_ax.set_ylim(top=z_max)
v_line = main_ax.axvline(np.nan, color='r')
h_line = main_ax.axhline(np.nan, color='g')
v_prof, = right_ax.plot(np.zeros(x.shape[1]),np.arange(x.shape[1]), 'r-')
h_prof, = top_ax.plot(np.arange(x.shape[0]),np.zeros(x.shape[0]), 'g-')
def on_move(event):
if event.inaxes is main_ax:
cur_x = event.xdata
cur_y = event.ydata
v_line.set_xdata([cur_x,cur_x])
h_line.set_ydata([cur_y,cur_y])
v_prof.set_xdata(z[:,int(cur_x)])
h_prof.set_ydata(z[int(cur_y),:])
fig.canvas.draw_idle()
fig.canvas.mpl_connect('motion_notify_event', on_move)
plt.show()
NB: the lag is just due to the convertion in gif, the update is much smoother on my machine

Matplotlib heatmap for multiple timeseries to show distribution over time

I have n_series recordings with the same frames 0, 1, 2, 3,... and would like to make a 2D contour out of it.
I've found that I can very easily do the following:
import matplotlib.pyplot as plt
import numpy as np
series_len = 1000
n_series = 10
y = np.random.normal(0, 0.15, series_len * n_series)
x = np.tile(np.arange(0, series_len, 1), n_series)
heatmap, xbins, ybins = np.histogram2d(x, y, bins=20)
plt.contourf(heatmap.T)
plt.show()
But since this just gives a 20x20 histogram, I have no idea how my intensities are distributed in the outputted plot (e.g. roughly zero-centered), nor how to fix the ticks.
What I'd like is this ('shopped):
Try set_xticklabels:
series_len = 1000
n_series = 10
fig, ax = plt.subplots(figsize=(10,6))
np.random.seed(1)
y = np.random.normal(0, 0.15, series_len * n_series)
x = np.tile(np.arange(0, series_len, 1), n_series)
heatmap, xs, ys = np.histogram2d(x, y, bins=20)
fig, ax = plt.subplots(figsize=(10,6))
ax.contourf(heatmap.T)
# the actual x-axis and y-axis are from 0 to 19
# we want to put 11 ticks on the axis
ax.set_xticks(np.linspace(0,19,11))
ax.set_xticklabels(range(0,1001,100))
ax.set_yticks(np.linspace(0,19,11))
ax.set_yticklabels(['{:.3f}'.format(y) for y in ys[::2]])
plt.show()
Output:
IIUC, did you want something like this:
import matplotlib.pyplot as plt
import numpy as np
series_len = 1000
n_series = 10
y = np.random.normal(0, 0.15, series_len * n_series)
x = np.tile(np.arange(0, series_len, 1), n_series)
heatmap, xlabels, ylabels = np.histogram2d(x, y, bins=20)
plt.contourf(xlabels[:-1], ylabels[:-1], heatmap.T)
plt.colorbar()
plt.show()
Output:
Okay, found an answer myself which makes the process much simpler than it appears to be. Simply resize the heatmap by 1 in both directions using skimage will make everything follow along nicely.
import matplotlib.pyplot as plt
import numpy as np
import skimage.transform
series_len = 1000
n_series = 10
bins = 20
y = np.random.normal(0, 0.15, series_len * n_series)
x = np.tile(np.arange(0, series_len, 1), n_series)
heatmap, xlabels, ylabels = np.histogram2d(x, y, bins=bins)
heatmap = skimage.transform.resize(heatmap, output_shape = (bins+1, bins+1), mode = "symmetric")
plt.contourf(xlabels, ylabels, heatmap.T)
plt.xlim(0, 1000)
plt.ylim(-0.5, 0.5)
plt.show()

Matplotlib imshow/matshow display values on plot

I am trying to create a 10x10 grid using either imshow or matshow in Matplotlib. The function below takes a numpy array as input, and plots the grid. However, I'd like to have values from the array also displayed inside the cells defined by the grid. So far I could not find a proper way to do it. I can use plt.text to place things over the grid, but this requires coordinates of each cell, totally inconvenient. Is there a better way to do what I am trying to accomplish?
Thanks!
NOTE: The code below does not take the values from the array yet, I was just playing with plt.text.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors
board = np.zeros((10, 10))
def visBoard(board):
cmap = colors.ListedColormap(['white', 'red'])
bounds=[0,0.5,1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.figure(figsize=(4,4))
plt.matshow(board, cmap=cmap, norm=norm, interpolation='none', vmin=0, vmax=1)
plt.xticks(np.arange(0.5,10.5), [])
plt.yticks(np.arange(0.5,10.5), [])
plt.text(-0.1, 0.2, 'x')
plt.text(0.9, 0.2, 'o')
plt.text(1.9, 0.2, 'x')
plt.grid()
visBoard(board)
Output:
Can you do something like:
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
min_val, max_val = 0, 10
ind_array = np.arange(min_val + 0.5, max_val + 0.5, 1.0)
x, y = np.meshgrid(ind_array, ind_array)
for i, (x_val, y_val) in enumerate(zip(x.flatten(), y.flatten())):
c = 'x' if i%2 else 'o'
ax.text(x_val, y_val, c, va='center', ha='center')
#alternatively, you could do something like
#for x_val, y_val in zip(x.flatten(), y.flatten()):
# c = 'x' if (x_val + y_val)%2 else 'o'
ax.set_xlim(min_val, max_val)
ax.set_ylim(min_val, max_val)
ax.set_xticks(np.arange(max_val))
ax.set_yticks(np.arange(max_val))
ax.grid()
Edit:
Here is an updated example with an imshow background.
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
min_val, max_val, diff = 0., 10., 1.
#imshow portion
N_points = (max_val - min_val) / diff
imshow_data = np.random.rand(N_points, N_points)
ax.imshow(imshow_data, interpolation='nearest')
#text portion
ind_array = np.arange(min_val, max_val, diff)
x, y = np.meshgrid(ind_array, ind_array)
for x_val, y_val in zip(x.flatten(), y.flatten()):
c = 'x' if (x_val + y_val)%2 else 'o'
ax.text(x_val, y_val, c, va='center', ha='center')
#set tick marks for grid
ax.set_xticks(np.arange(min_val-diff/2, max_val-diff/2))
ax.set_yticks(np.arange(min_val-diff/2, max_val-diff/2))
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim(min_val-diff/2, max_val-diff/2)
ax.set_ylim(min_val-diff/2, max_val-diff/2)
ax.grid()
plt.show()
For your graph you should should try with pyplot.table:
import matplotlib.pyplot as plt
import numpy as np
board = np.zeros((10, 10))
board[0,0] = 1
board[0,1] = -1
board[0,2] = 1
def visBoard(board):
data = np.empty(board.shape,dtype=np.str)
data[:,:] = ' '
data[board==1.0] = 'X'
data[board==-1.0] = 'O'
plt.axis('off')
size = np.ones(board.shape[0])/board.shape[0]
plt.table(cellText=data,loc='center',colWidths=size,cellLoc='center',bbox=[0,0,1,1])
plt.show()
visBoard(board)
Some elaboration on the code of #wflynny making it into a function that takes any matrix no matter what size and plots its values.
import numpy as np
import matplotlib.pyplot as plt
cols = np.random.randint(low=1,high=30)
rows = np.random.randint(low=1,high=30)
X = np.random.rand(rows,cols)
def plotMat(X):
fig, ax = plt.subplots()
#imshow portion
ax.imshow(X, interpolation='nearest')
#text portion
diff = 1.
min_val = 0.
rows = X.shape[0]
cols = X.shape[1]
col_array = np.arange(min_val, cols, diff)
row_array = np.arange(min_val, rows, diff)
x, y = np.meshgrid(col_array, row_array)
for col_val, row_val in zip(x.flatten(), y.flatten()):
c = '+' if X[row_val.astype(int),col_val.astype(int)] < 0.5 else '-'
ax.text(col_val, row_val, c, va='center', ha='center')
#set tick marks for grid
ax.set_xticks(np.arange(min_val-diff/2, cols-diff/2))
ax.set_yticks(np.arange(min_val-diff/2, rows-diff/2))
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim(min_val-diff/2, cols-diff/2)
ax.set_ylim(min_val-diff/2, rows-diff/2)
ax.grid()
plt.show()
plotMat(X)

Categories

Resources