Integrating a histogram in a bootstrap simulation graph - python

I have a dataframe with 1000 simulations of a portfolio's returns. I am able to graph the simulations and do the respective histogram separately, but I have absolutely no idea how to merge them in order to resemble the following image:
please take this example of data in order to facilitate answers:
import numpy as np
import pandas as pd
def simulate_panel(T, N):
"""" This function simulates return paths"""
dates = pd.date_range("20210218", periods=T, freq='D')
columns = []
for i in range(N):
columns.append(str(i+1))
return pd.DataFrame(np.random.normal(0, 0.01, size=(T, N)), index=dates,
columns=columns)
df=(1+simulate_panel(1000,1000)).cumprod()
df.plot(figsize=(8,6),title=('Bootstrap'), legend=False)
Thank you very much in advance.

To color the curves via their last value, they can be drawn one-by-one. With a colormap and a norm, the value can be converted to the appropriate color. Using some transparency (alpha), the most visited positions will be colored stronger.
In a second subplot, a vertical histogram can be drawn, with the bars colored similarly.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def simulate_panel(T, N):
"""" This function simulates return paths"""
dates = pd.date_range("20210218", periods=T, freq='D')
columns = [(str(i + 1)) for i in range(N)]
return pd.DataFrame(np.random.normal(0, 0.01, size=(T, N)), index=dates, columns=columns)
df = (1 + simulate_panel(1000, 1000)).cumprod()
fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True, figsize=(12, 4),
gridspec_kw={'width_ratios': [5, 1], 'wspace': 0})
data = df.to_numpy().T
cmap = plt.cm.get_cmap('turbo')
norm = plt.Normalize(min(data[:, -1]), max(data[:, -1]))
for row in data:
ax1.plot(df.index, row, c=cmap(norm(row[-1])), alpha=0.1)
ax1.margins(x=0)
_, bin_edges, bars = ax2.hist(data[:, -1], bins=20, orientation='horizontal')
for x0, x1, bar in zip(bin_edges[:-1], bin_edges[1:], bars):
bar.set_color(cmap(norm((x0 + x1) / 2)))
ax2.tick_params(left=False)
plt.tight_layout()
plt.show()

You can use GridSpec to set up axes for line chart and the histogram next to each other:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
# layout
fig = plt.figure()
gs = fig.add_gridspec(1, 2, wspace=0, width_ratios=[9, 1])
ax = gs.subplots(sharey=True)
# line chart
z = df.iloc[-1]
df.plot(figsize=(8,6), title=('Bootstrap'), legend=False, ax=ax[0],
color=cm.RdYlBu_r((z - z.min()) / (z.max() - z.min())))
# histogram
n_bins = 20
cnt, bins, patches = ax[1].hist(
z, np.linspace(z.min(), z.max(), n_bins),
ec='k', orientation='horizontal')
colors = cm.RdYlBu_r((bins - z.min()) / (z.max() - z.min()))
for i, p in enumerate(patches):
p.set_color(colors[i])

Related

Vertical "broken" bar plot with arrays as bar heights and color coding

I am trying to create a bar plot that looks like this:
x axis is the number of detectors hit in coincidence (i.e. multiplicity)
for each multiplicity i have several events. The y axis contains the average pulse height of each event.The colors should correspond to the number of hits which have the shown pulse heights and appeared in events with the respective multiplicity
I have a dictionary that has multiplicities as keys and arrays of the avarage pulse heights as values. :
averages = {2 : [...],
3 : [...],
4 : [...],
5 : [...],
6 : [...],}
for key in averages:
plt.bar(key,averages[key] ,width = 0.8)
i only know how to produce the simple version of a bar chart that looks like this:
can someone tell me how to make the bars "broken to show all pulse heights and add the color coding?
Not entirely clear but I think you want something like this
import seaborn as sns
from scipy import stats
import matplotlib as mpl
import matplotlib.pyplot as plt
# Create some fake data that looks roughly like what you have
tips = sns.load_dataset("tips")
weights = stats.gaussian_kde(tips["total_bill"])(tips["total_bill"])
tips = tips.sample(frac=50, weights=weights, replace=True)
days = []
segments = []
counts = []
for day, x in tips["total_bill"].groupby(tips["day"]):
days.append(day)
segments.append(np.sort(x.unique()))
counts.append(x.value_counts().sort_index())
# Map from counts to colors
norm = mpl.colors.Normalize(0, np.concatenate(counts).max())
colors = [mpl.cm.viridis(norm(c)) for c in counts]
f, ax = plt.subplots()
# Draw each horizontal line
events = ax.eventplot(segments, colors=colors, orientation="vertical", zorder=.5)
events[0].set_norm(norm)
f.colorbar(events[0])
# Add the mean/std for each x position
sns.pointplot(data=tips, x="day", y="total_bill", ci="sd", order=days, join=False, color=".1")
I took the question to need each horizontal line to represent each data value, but if you're satisfied with a histogram, this is two function calls in seaborn (>=0.11)
sns.histplot(
data=tips, x="day", y="total_bill",
discrete=(True, False), binwidth=(1, .5),
cmap="viridis", cbar=True, zorder=.5, alpha=.75,
)
sns.pointplot(
data=tips, x="day", y="total_bill",
ci="sd", order=days, join=False, color=".1",
)
Here is a solution which uses imshow to produce the columnwise "color histograms":
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
# Create dummy data
coincidences = [2, 3, 4, 5, 6]
n_list = [10000, 8000, 6000, 4000, 2000]
mu_list = np.array([200, 300, 400, 500, 600])
scale = 100
averages = {c: np.random.normal(loc=mu_list[i], scale=scale, size=n_list[i])
for i, c in enumerate(coincidences)}
# Calculate histogram for each column
bins = np.linspace(0, 1000, 1000)
hist_img = np.array([np.histogram(averages[c], bins=bins)[0]
for c in coincidences]).T
# Create Normalized colormap
# norm = mpl.colors.Normalize()
norm = mpl.colors.LogNorm(vmin=1, vmax=hist_img.max())
sm = mpl.cm.ScalarMappable(cmap='viridis', norm=norm)
# Use colormap for img_hist and make zeros transparent
hist_img2 = sm.to_rgba(hist_img, bytes=True)
hist_img2[hist_img == 0, 3] = 0
# Plot
fig, ax = plt.subplots()
cc = ax.imshow(hist_img2, aspect='auto', interpolation='none', origin='lower',
extent=[1.5, 6.5, 0, 1000])
plt.colorbar(sm)
mean = [np.mean(averages[c]) for c in coincidences]
std = [np.std(averages[c]) for c in coincidences]
ax.errorbar(coincidences, mean, yerr=std, ls='', c='k', capsize=3, label='std')
ax.plot(coincidences, mean, ls='', marker='o', c='b', label='mean')
ax.legend()

Frequency Distribution Plot: change x-axis to interval

Dear People of the Internet
I have calculated a frequency distribution and I would now like to plot it in a certain manner. So far I have calculated and plotted the frequency distribution, but I couldn't find a solution for the endproduct I am looking for. My code with an example dataset for now is:
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd
# example data
rng = np.random.RandomState(seed=12345)
a1 = stats.norm.rvs(size=1000, random_state=rng)
res = stats.relfreq(a1, numbins=34)
x = res.lowerlimit + np.linspace(0, res.binsize*res.frequency.size, res.frequency.size)
# plotting
fig = plt.figure(figsize=(6, 3))
ax = fig.add_subplot(1, 1, 1)
ax.bar(x, res.frequency, width=res.binsize)
ax.set_title('Frequency Distribution of 1D Vix Returns')
ax.set_xlim([x.min(), x.max()])
ax.set_xticks(ax.get_xticks()[::1])
plt.show()
As a last step, I would like to plot the x-Axis just as in the attached picture. Instead of single number I would like to have the interval. I couldn't find a source in which this matter is resolved. Has anyone encountered the same problem or knows any source which has a solution to it? Thanks in advance
Have a look at this nice answer:
https://stackoverflow.com/a/6353051/10372616.
I added the code to your current plot.
import matplotlib.pyplot as plt
from scipy import stats # ????
import numpy as np
import pandas as pd # ????
# example data
rng = np.random.RandomState(seed=12345)
a1 = stats.norm.rvs(size=1000, random_state=rng)
res = stats.relfreq(a1, numbins=34)
x = res.lowerlimit + np.linspace(0, res.binsize*res.frequency.size, res.frequency.size)
# plotting
fig = plt.figure(figsize=(6, 3))
ax = fig.add_subplot(1, 1, 1)
ax.bar(x, res.frequency, width=res.binsize)
ax.set_title('Frequency Distribution of 1D Vix Returns')
ax.set_xlim([x.min(), x.max()])
ax.set_xticks(ax.get_xticks()[::1])
# Change traditional tick labels to range labels
# ----------------------------------------------------------------
ax.set_xticklabels([]) # hide your previous x tick labels
bins = ax.get_xticks()[::1]
bin_centers = 0.5 * np.diff(bins) + bins[:-1]
for a, b, x in zip(bins, bins[1:], bin_centers):
label = '{:0.0f} to {:0.0f}'.format(a, b)
ax.annotate(label, xy=(x, 0), xycoords=('data', 'axes fraction'),
xytext=(0, -10), textcoords='offset points', va='top', ha='center', rotation=90)
plt.show()
Before:
After:

Not getting the heatmap in the background using Matplotlib Python

I have tried this and got the result as in the image:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
cmap = LinearSegmentedColormap.from_list("", ["red","grey","green"])
df = pd.read_csv('t.csv', header=0)
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax = ax1.twiny()
# Scatter plot of positive points, coloured blue (C0)
ax.scatter(np.argwhere(df['real'] > 0), df.loc[df['real'] > 0, 'real'], color='C2')
# Scatter plot of negative points, coloured red (C3)
ax.scatter(np.argwhere(df['real'] < 0), df.loc[df['real'] < 0, 'real'], color='C3')
# Scatter neutral values in grey (C7)
ax.scatter(np.argwhere(df['real'] == 0), df.loc[df['real'] == 0, 'real'], color='C7')
ax.set_ylim([df['real'].min(), df['real'].max()])
index = len(df.index)
ymin = df['prediction'].min()
ymax= df['prediction'].max()
ax1.imshow([np.arange(index),df['prediction']],cmap=cmap,
extent=(0,index-1,ymin, ymax), alpha=0.8)
plt.show()
Image:
I was expecting one output where the color is placed according to the figure. I am getting green color and no reds or greys.
I want to get the image or contours spread as the values are. How I can do that? See the following image, something similar:
Please let me know how I can achieve this. The data I used is here: t.csv
For a live version, have a look at Tensorflow Playground
There are essentially 2 tasks required in a solution like this:
Plot the heatmap as the background;
Plot the scatter data;
Output:
Source code:
import numpy as np
import matplotlib.pyplot as plt
###
# Plot heatmap in the background
###
# Setting up input values
x = np.arange(-6.0, 6.0, 0.1)
y = np.arange(-6.0, 6.0, 0.1)
X, Y = np.meshgrid(x, y)
# plot heatmap colorspace in the background
fig, ax = plt.subplots(nrows=1)
im = ax.imshow(X, cmap=plt.cm.get_cmap('RdBu'), extent=(-6, 6, -6, 6), interpolation='bilinear')
cax = fig.add_axes([0.21, 0.95, 0.6, 0.03]) # [left, bottom, width, height]
fig.colorbar(im, cax=cax, orientation='horizontal') # add colorbar at the top
###
# Plot data as scatter
###
# generate the points
num_samples = 150
theta = np.linspace(0, 2 * np.pi, num_samples)
# generate inner points
circle_r = 2
r = circle_r * np.random.rand(num_samples)
inner_x, inner_y = r * np.cos(theta), r * np.sin(theta)
# generate outter points
circle_r = 4
r = circle_r + np.random.rand(num_samples)
outter_x, outter_y = r * np.cos(theta), r * np.sin(theta)
# plot data
ax.scatter(inner_x, inner_y, s=30, marker='o', color='royalblue', edgecolors='white', linewidths=0.8)
ax.scatter(outter_x, outter_y, s=30, marker='o', color='crimson', edgecolors='white', linewidths=0.8)
ax.set_ylim([-6,6])
ax.set_xlim([-6,6])
plt.show()
To keep things simple, I kept the colorbar range (-6, 6) to match the data range.
I'm sure this code can be changed to suit your specific needs. Good luck!
Here is a possible solution.
A few notes and questions:
What are the 'prediction' values in your data file? They do not seem to correlate with the values in the 'real' column.
Why do you create a second axis? What is represented on the bottom X-axis in your plot? I removed the second axis and labelled the remaining axes (index and real).
When you slice a pandas DataFrame, the index comes with it. You don't need to create a separate index (argwhere and arange(index) in your code). I simplified the first part of the code, where scatterplots are produced.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
cmap = LinearSegmentedColormap.from_list("", ["red","grey","green"])
df = pd.read_csv('t.csv', header=0)
print(df)
fig = plt.figure()
ax = fig.add_subplot(111)
# Data limits
xmin = 0
xmax = df.shape[0]
ymin = df['real'].min()
ymax = df['real'].max()
# Scatter plots
gt0 = df.loc[df['real'] > 0, 'real']
lt0 = df.loc[df['real'] < 0, 'real']
eq0 = df.loc[df['real'] == 0, 'real']
ax.scatter(gt0.index, gt0.values, edgecolor='white', color='C2')
ax.scatter(lt0.index, lt0.values, edgecolor='white', color='C3')
ax.scatter(eq0.index, eq0.values, edgecolor='white', color='C7')
ax.set_ylim((ymin, ymax))
ax.set_xlabel('index')
ax.set_ylabel('real')
# We want 0 to be in the middle of the colourbar,
# because gray is defined as df['real'] == 0
if abs(ymax) > abs(ymin):
lim = abs(ymax)
else:
lim = abs(ymin)
# Create a gradient that runs from -lim to lim in N number of steps,
# where N is the number of colour steps in the cmap.
grad = np.arange(-lim, lim, 2*lim/cmap.N)
# Arrays plotted with imshow must be 2D arrays. In this case it will be
# 1 pixel wide and N pixels tall. Set the aspect ratio to auto so that
# each pixel is stretched out to the full width of the frame.
grad = np.expand_dims(grad, axis=1)
im = ax.imshow(grad, cmap=cmap, aspect='auto', alpha=1, origin='bottom',
extent=(xmin, xmax, -lim, lim))
fig.colorbar(im, label='real')
plt.show()
This gives the following result:

How to add cross (X) on a heatmap cells like with R language?

I would like to add cross (X) on heatmap cells (depending on significance level, but the question is on adding the X).
Like in R-language (sig.level = XXX).
See the Python and R code used and the corresponding output images.
Thank you for your help.
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, center=0, vmin=-1, vmax=1, square=True, linewidths=0.5, fmt=".2f",
cbar_kws={"shrink": .65, "orientation": "horizontal", "ticks":np.arange(-1, 1+1, 0.2)},
annot = True, annot_kws={"weight": 'bold', "size":15})
corrplot(cor(subset (wqw, select =
c(fixed.acidity:quality,ratio.sulfur.dioxide))),
# compute the p matrix
p.mat = cor.mtest(subset
(wqw, select = c(fixed.acidity:quality,ratio.sulfur.dioxide))),
# significance level 0.01
sig.level = 0.01,
# Method to display : color (could be corcle, ...)
method = "color",
# color palette
col = colorRampPalette(c("#BB4444", "#EE9988",
"#FFFFFF", "#77AADD", "#4477AA"))(200),
)
```
The easy solution is to add a scatter plot with an X-shaped marker to cross out the unwanted cells.
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
data = np.random.rand(10,10)
mask = np.zeros_like(data)
mask[np.triu_indices_from(mask)] = True
data_masked = np.ma.array(data, mask=mask)
fig, ax = plt.subplots()
im = ax.imshow(data_masked, cmap="YlGnBu", origin="upper")
fig.colorbar(im)
ax.scatter(*np.argwhere(data_masked.T < 0.4).T, marker="x", color="black", s=100)
plt.show()
The drawback of this is that the markersize (s) is independent of the number of cells and needs to be adjusted for different figure sizes.
An alternative is hence to draw some lines (an X are two crossed lines) at the respective positions. Here we create a function crossout(points, ax=None, scale=1, **kwargs), where scale is the percentage the lines shall take from each cell.
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
def crossout(points, ax=None, scale=1, **kwargs):
ax = ax or plt.gca()
l = np.array([[[1,1],[-1,-1]]])*scale/2.
r = np.array([[[-1,1],[1,-1]]])*scale/2.
p = np.atleast_3d(points).transpose(0,2,1)
c = LineCollection(np.concatenate((l+p,r+p), axis=0), **kwargs)
ax.add_collection(c)
return c
data = np.random.rand(10,10)
mask = np.zeros_like(data)
mask[np.triu_indices_from(mask)] = True
data_masked = np.ma.array(data, mask=mask)
fig, ax = plt.subplots()
im = ax.imshow(data_masked, cmap="YlGnBu", origin="upper")
fig.colorbar(im)
crossout(np.argwhere(data_masked.T < 0.4), ax=ax, scale=0.8, color="black")
plt.show()
For scale=0.8 this looks like
Note that for a pcolormesh plot or a seaborn heatmap (which uses pcolormesh internally), one would need to add 0.5 to the data, i.e.
np.argwhere(data_masked.T < 0.4)+0.5

Matplotlib imshow/matshow display values on plot

I am trying to create a 10x10 grid using either imshow or matshow in Matplotlib. The function below takes a numpy array as input, and plots the grid. However, I'd like to have values from the array also displayed inside the cells defined by the grid. So far I could not find a proper way to do it. I can use plt.text to place things over the grid, but this requires coordinates of each cell, totally inconvenient. Is there a better way to do what I am trying to accomplish?
Thanks!
NOTE: The code below does not take the values from the array yet, I was just playing with plt.text.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors
board = np.zeros((10, 10))
def visBoard(board):
cmap = colors.ListedColormap(['white', 'red'])
bounds=[0,0.5,1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.figure(figsize=(4,4))
plt.matshow(board, cmap=cmap, norm=norm, interpolation='none', vmin=0, vmax=1)
plt.xticks(np.arange(0.5,10.5), [])
plt.yticks(np.arange(0.5,10.5), [])
plt.text(-0.1, 0.2, 'x')
plt.text(0.9, 0.2, 'o')
plt.text(1.9, 0.2, 'x')
plt.grid()
visBoard(board)
Output:
Can you do something like:
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
min_val, max_val = 0, 10
ind_array = np.arange(min_val + 0.5, max_val + 0.5, 1.0)
x, y = np.meshgrid(ind_array, ind_array)
for i, (x_val, y_val) in enumerate(zip(x.flatten(), y.flatten())):
c = 'x' if i%2 else 'o'
ax.text(x_val, y_val, c, va='center', ha='center')
#alternatively, you could do something like
#for x_val, y_val in zip(x.flatten(), y.flatten()):
# c = 'x' if (x_val + y_val)%2 else 'o'
ax.set_xlim(min_val, max_val)
ax.set_ylim(min_val, max_val)
ax.set_xticks(np.arange(max_val))
ax.set_yticks(np.arange(max_val))
ax.grid()
Edit:
Here is an updated example with an imshow background.
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
min_val, max_val, diff = 0., 10., 1.
#imshow portion
N_points = (max_val - min_val) / diff
imshow_data = np.random.rand(N_points, N_points)
ax.imshow(imshow_data, interpolation='nearest')
#text portion
ind_array = np.arange(min_val, max_val, diff)
x, y = np.meshgrid(ind_array, ind_array)
for x_val, y_val in zip(x.flatten(), y.flatten()):
c = 'x' if (x_val + y_val)%2 else 'o'
ax.text(x_val, y_val, c, va='center', ha='center')
#set tick marks for grid
ax.set_xticks(np.arange(min_val-diff/2, max_val-diff/2))
ax.set_yticks(np.arange(min_val-diff/2, max_val-diff/2))
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim(min_val-diff/2, max_val-diff/2)
ax.set_ylim(min_val-diff/2, max_val-diff/2)
ax.grid()
plt.show()
For your graph you should should try with pyplot.table:
import matplotlib.pyplot as plt
import numpy as np
board = np.zeros((10, 10))
board[0,0] = 1
board[0,1] = -1
board[0,2] = 1
def visBoard(board):
data = np.empty(board.shape,dtype=np.str)
data[:,:] = ' '
data[board==1.0] = 'X'
data[board==-1.0] = 'O'
plt.axis('off')
size = np.ones(board.shape[0])/board.shape[0]
plt.table(cellText=data,loc='center',colWidths=size,cellLoc='center',bbox=[0,0,1,1])
plt.show()
visBoard(board)
Some elaboration on the code of #wflynny making it into a function that takes any matrix no matter what size and plots its values.
import numpy as np
import matplotlib.pyplot as plt
cols = np.random.randint(low=1,high=30)
rows = np.random.randint(low=1,high=30)
X = np.random.rand(rows,cols)
def plotMat(X):
fig, ax = plt.subplots()
#imshow portion
ax.imshow(X, interpolation='nearest')
#text portion
diff = 1.
min_val = 0.
rows = X.shape[0]
cols = X.shape[1]
col_array = np.arange(min_val, cols, diff)
row_array = np.arange(min_val, rows, diff)
x, y = np.meshgrid(col_array, row_array)
for col_val, row_val in zip(x.flatten(), y.flatten()):
c = '+' if X[row_val.astype(int),col_val.astype(int)] < 0.5 else '-'
ax.text(col_val, row_val, c, va='center', ha='center')
#set tick marks for grid
ax.set_xticks(np.arange(min_val-diff/2, cols-diff/2))
ax.set_yticks(np.arange(min_val-diff/2, rows-diff/2))
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim(min_val-diff/2, cols-diff/2)
ax.set_ylim(min_val-diff/2, rows-diff/2)
ax.grid()
plt.show()
plotMat(X)

Categories

Resources