Creating a 2-D data plot with "vertical" marginal histograms - python

How can I create in python a bi-variate data plot with "vertical" marginal histograms like this?:
Say that the data is generated via:
from scipy.stats import multivariate_normal
import numpy as np
mean = np.array([0, 0])
cov = np.array([[1, 0.5], [0.5, 2]])
data = multivariate_normal(mean, cov).rvs(1000)

Here is sample code which shows how one can do this:
import math
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import rc, rcParams
from numpy.linalg import eigh
from scipy.stats import multivariate_normal, norm
from mpl_toolkits.mplot3d import Axes3D
rcParams['text.latex.preamble'] = r'\boldmath'
rc('text', usetex=True)
mean = np.array([0,0])
cov = np.array([[1, 0.3], [0.3, .5]])
np.random.seed(0)
mvn_rvs = multivariate_normal(mean, cov).rvs(800)
pdf_x = norm(mean[0], np.sqrt(cov[0,0])).pdf
pdf_y = norm(mean[1], np.sqrt(cov[1,1])).pdf
rv_x = mvn_rvs[:, 0]
rv_y = mvn_rvs[:, 1]
x = np.linspace(-3, 3, 101)
y = np.linspace(-3, 3, 100)
X, Y = np.meshgrid(x, y)
fontsize = 30
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax), np.diag([1.15, 1.15, 1, 1]))
ax.plot(y, pdf_y(y), zs=x.min(), zdir='x', linewidth=3, label="$\\mathsf{P_y(y)}$")
ax.plot(x, pdf_x(x), zs=y.max(), zdir='y', linewidth=3, label='$\\mathsf{P_x(x)}$')
leg = plt.legend(fontsize=fontsize, ncol=2, frameon=False, bbox_to_anchor=(-0.10, 1.1275),
loc='upper left', handlelength=0.7, handletextpad=0.5, columnspacing=2.4)
grid_linewidth = 1.15
ax.xaxis._axinfo["grid"]['linewidth'] = grid_linewidth
ax.yaxis._axinfo["grid"]['linewidth'] = grid_linewidth
ax.zaxis._axinfo["grid"]['linewidth'] = grid_linewidth
ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.w_xaxis.line.set_color((1.0, 1.0, 1.0, 0.0))
ax.w_yaxis.line.set_color((1.0, 1.0, 1.0, 0.0))
ax.w_zaxis.line.set_color((1.0, 1.0, 1.0, 0.0))
labelpad = -5
ax.set_xlabel("$\\mathsf{x}$", fontsize=fontsize, labelpad=labelpad)
ax.set_ylabel("$\\mathsf{y}$", fontsize=fontsize, labelpad=labelpad)
labelsize = 10
ax.xaxis.set_rotate_label(False)
ax.yaxis.set_rotate_label(False)
ax.set_zlim(bottom=0)
ax.set_xlim(-3, 3)
ax.set_ylim(-3, 3)
ax.xaxis.set_ticklabels([])
ax.xaxis.set_visible(False)
ax.yaxis.set_ticklabels([])
ax.zaxis.set_ticklabels([])
sx2 = cov[0, 0]
sy2 = cov[1, 1]
rho = cov[0, 1] / np.sqrt(sx2 * sy2)
Sigma = cov
target = 0.1
gamma = math.log(1 / (4*(np.pi**2)*(sx2**2)*(sy2**2)*(1 - rho**2)*(target**2)))
eigenvalues, P = eigh(np.linalg.inv(Sigma))
# Compute u and v as per link using thetas from 0 to 2pi
thetas = np.linspace(0, 2*np.pi, 10000)
uv = (np.sqrt(gamma) / np.sqrt(eigenvalues)) * np.hstack((np.cos(thetas).reshape(-1,1), np.sin(thetas).reshape(-1, 1)))
orig_coord=np.zeros((10000,2))
for i in range(len(uv)):
orig_coord[i,0]=np.matmul(np.linalg.inv(P), uv[i,:])[0]
orig_coord[i,1]=np.matmul(np.linalg.inv(P), uv[i,:])[1]
ax.plot(rv_x, rv_y, 0*rv_x, ' o', c='g', markersize=1.1) # "RdBu_r")
ax.plot(orig_coord[:, 0], orig_coord[:, 1],
0 * np.ones_like(orig_coord[:, 0]), c='r', linewidth=3)
ax.view_init(azim=-45, elev=20)

Related

Set xticks relative to the plot coordinates

Is there a simple way to set two xticks at even distances from the xmin and xmax, for two plots with different ranges on the x-axis?
# Example:
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
fig = plt.figure(figsize=(6,4), constrained_layout=True)
gs = gridspec.GridSpec(ncols=2, nrows=1, figure=fig)
x1 = [1, 0.6, 0.4, 0.3, 0.25, 0.24, 0.23]
x2 = [0.1, 0.14, 0.15, 0.16, 0.166, 0.1666, 0.1666 ]
y = [1, 2, 3, 4, 5, 6, 7]
# xticks
number_of_xticks = 2
# Plot 1:
ax0 = fig.add_subplot(gs[0, 0])
ax0.plot(x1, y)
ax0.xaxis.set_major_locator(plt.MaxNLocator(number_of_xticks))
# Plot 2:
ax1 = fig.add_subplot(gs[0, 1])
ax1.plot(x2, y)
ax1.xaxis.set_major_locator(plt.MaxNLocator(number_of_xticks))
plt.show()
Example code does not work because xticks are at different distances from the xmin and xmax in the two plots:
You could try specifying the relative distance along the x-range:
# xticks
tick_fractions = [1/4, 3/4]
And then calculate the tick positions based on each x-range:
mini = min(x)
maxi = max(x)
dist = maxi - mini
ax.set_xticks([mini + f * dist for f in tick_fractions])
So full script would look like:
# Example:
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
fig = plt.figure(figsize=(6,4), constrained_layout=True)
gs = gridspec.GridSpec(ncols=2, nrows=1, figure=fig)
x1 = [1, 0.6, 0.4, 0.3, 0.25, 0.24, 0.23]
x2 = [0.1, 0.14, 0.15, 0.16, 0.166, 0.1666, 0.1666 ]
y = [1, 2, 3, 4, 5, 6, 7]
# xticks
tick_fractions = [1/4, 3/4]
# Plot 1:
ax0 = fig.add_subplot(gs[0, 0])
ax0.plot(x1, y)
mini = min(x1)
maxi = max(x1)
dist = maxi - mini
ax0.set_xticks([mini + f * dist for f in tick_fractions])
# Plot 2:
ax1 = fig.add_subplot(gs[0, 1])
ax1.plot(x2, y)
mini = min(x2)
maxi = max(x2)
dist = maxi - mini
ax1.set_xticks([mini + f * dist for f in tick_fractions])
plt.show()
You could add a call to round somewhere if you want to limit the decimals.

How to remove square brackets in legend of scatterplot?

Is there a way to remove the square brackets from the legend of a line of regression in scatter plot? Here is my code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
x = np.array([0.5, 2.5, 4.5]).reshape((-1, 1))
y = np.array([1.19, 1.67, 2.01])
model = LinearRegression().fit(x, y.reshape((-1, 1)))
r_sq = model.score(x, y)
intercept=model.intercept_
slope=model.coef_
print('coefficient of determination:', r_sq)
print('intercept:', intercept)
print('slope:', slope)
y_predict = intercept + slope * x
print('predicted response:', y_predict, sep='\n')
x_all = np.array([0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5]).reshape((-1, 1))
y_all = model.predict(x_all)
#y_all=round(y_all, 2)
print(x_all)
print(y_all)
x1 = x_all[0,:]
y1 = y_all[0,:]
plt.plot(x_all, y_all, 'o', color='black', markersize=10)
plt.xlabel('Factor 1', fontsize=16)
plt.ylabel('Factor 2', fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
m, b = np.polyfit(x_all.flatten(), y_all.flatten(), 1)
plt.plot(x, m*x + b, linestyle='-', color='black', label='y = {}+{}x, R² = {}'.format(np.round(intercept,2), np.round(slope,2), np.round(r_sq, 2)))
plt.legend(loc=(0.05, 0.85), fontsize=20)
This gives the following plot:
However nothing I try lets me remove them, any suggestions?
Just pass the scalars instead of the whole array in label:
plt.plot(x, m*x + b, linestyle='-', color='black',
label='y = {}+{}x, R² = {}'.format(np.round(intercept,2).item(),
np.round(slope,2).item(),
np.round(r_sq, 2))
)
Output:

Setting value for x-axis and y-axis

I have a code like this, and it will present a figure with the x-axis from 1 to 200, and the y-axis also from 1 to 200. But I would like to make the two axes both from -1.5 to 1.5 with 0.5 space.
I have already tried "plt.xticks" and "set_xlim", but I still cannot make it.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.stats import multivariate_normal
fig, main_ax = plt.subplots(figsize=(5, 5))
divider = make_axes_locatable(main_ax)
top_ax = divider.append_axes("top", 1.05, pad=0.1,sharex=main_ax)
top_ax.xaxis.set_tick_params(labelbottom=False)
main_ax.set_xlabel('dim 1')
main_ax.set_ylabel('dim 2')
top_ax.set_ylabel('Z profile')
x, y = np.mgrid[-1:1:.01, -1:1:.01]
pos = np.empty(x.shape + (2,))
pos[:, :, 0] = x; pos[:, :, 1] = y
rv = multivariate_normal([-0.2, 0.2], [[1, 1.5], [0.25, 0.25]])
z = rv.pdf(pos)
z_max = z.max()
plt.set_cmap(plt.cm.gist_earth)
cur_x = 100
cur_y = 100
main_ax.imshow(z, origin='lower')
main_ax.autoscale(enable=False)
top_ax.autoscale(enable=False)
top_ax.set_ylim(top=z_max)
v_line = main_ax.axvline(cur_x, color='r')
h_line = main_ax.axhline(cur_y, color='y')
v_prof, = top_ax.plot(np.arange(x.shape[1])[::-1], z[:,int(cur_x)], 'r-')
h_prof, = top_ax.plot(np.arange(x.shape[0]), z[int(cur_y),:], 'y-')
plt.show()
You can simply use this code.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.stats import multivariate_normal
fig, main_ax = plt.subplots(figsize=(5, 5))
divider = make_axes_locatable(main_ax)
top_ax = divider.append_axes("top", 1.05, pad=0.1,sharex=main_ax)
top_ax.xaxis.set_tick_params(labelbottom=False)
main_ax.set_xlabel('dim 1')
main_ax.set_ylabel('dim 2')
top_ax.set_ylabel('Z profile')
x, y = np.mgrid[-1:1:.01, -1:1:.01]
pos = np.empty(x.shape + (2,))
pos[:, :, 0] = x; pos[:, :, 1] = y
rv = multivariate_normal([-0.2, 0.2], [[1, 1.5], [0.25, 0.25]])
z = rv.pdf(pos)
z_max = z.max()
plt.set_cmap(plt.cm.gist_earth)
# For y axis
main_ax.set_yticks(np.linspace(0,200,7))
main_ax.set_yticklabels(np.linspace(-1.5,1.5,7))
# For x axis
plt.xticks(np.linspace(0,200,7),np.linspace(-1.5,1.5,7))
cur_x = 100
cur_y = 100
main_ax.imshow(z, origin='lower')
main_ax.autoscale(enable=False)
top_ax.autoscale(enable=False)
top_ax.set_ylim(top=z_max)
v_line = main_ax.axvline(cur_x, color='r')
h_line = main_ax.axhline(cur_y, color='y')
v_prof, = top_ax.plot(np.arange(x.shape[1])[::-1], z[:,int(cur_x)], 'r-')
h_prof, = top_ax.plot(np.arange(x.shape[0]), z[int(cur_y),:], 'y-')

A random array within some bounds

I have a an array params with errors e_params and bounds that the array can be params_bounds:
params = [0.2, 0.2]
e_params = [0.1, 0.05]
params_bounds = [(0.0, 1.0), (0.0, 1.0)]
I want to draw a random Gaussian realisation of params as follows:
import numpy as np
params_mc = np.random.normal(params, e_params)
Is there any way to make sure that the result params_mc is within the upper and lower bounds specified by params_bounds?
Thanks for any help here.
You can use numpy.clip to clip values within given bounds. First generate arrays of minimums and maximums you need, something like:
>>> lower_bound = numpy.asarray(param_bounds)[:, 0]
>>> upper_bound = numpy.asarray(param_bounds)[:, 1]
Now clip your result:
>>> numpy.clip(params_mc, lower_bound, upper_bound)
(Untested code, your mileage may vary)
Perhaps you are looking for a truncated normal distribution.
Using scipy.stats.truncnorm,
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
lower, upper = (0.0, 0.0), (1.0, 1.0)
mu, sigma = np.array([0.2, 0.2]), np.array([0.1, 0.05])
X = stats.truncnorm(
(lower - mu) / sigma, (upper - mu) / sigma, loc=mu, scale=sigma)
data = X.rvs((10000, 2))
fig, ax = plt.subplots()
ax.hist(data[:, 0], density=True, alpha=0.5, bins=20)
ax.hist(data[:, 1], density=True, alpha=0.5, bins=20)
plt.show()
yields
Here's another way to visualize the sample. The code is mainly taken from the matplotlib gallery:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
lower, upper = (0.0, 0.0), (1.0, 1.0)
mu, sigma = np.array([0.2, 0.2]), np.array([0.1, 0.05])
X = stats.truncnorm(
(lower - mu) / sigma, (upper - mu) / sigma, loc=mu, scale=sigma)
data = X.rvs((10000, 2))
x, y = data.T
nullfmt = mticker.NullFormatter() # no labels
# definitions for the axes
left, width = 0.1, 0.65
bottom, height = 0.1, 0.65
bottom_h = left_h = left + width + 0.02
rect_scatter = [left, bottom, width, height]
rect_histx = [left, bottom_h, width, 0.2]
rect_histy = [left_h, bottom, 0.2, height]
# start with a rectangular Figure
plt.figure(1, figsize=(8, 8))
axScatter = plt.axes(rect_scatter)
axHistx = plt.axes(rect_histx)
axHisty = plt.axes(rect_histy)
# no labels
axHistx.xaxis.set_major_formatter(nullfmt)
axHisty.yaxis.set_major_formatter(nullfmt)
# the scatter plot:
axScatter.scatter(x, y)
axScatter.set_xlim((-0.1, 0.7))
axScatter.set_ylim((-0.1, 0.5))
bins = 20
axHistx.hist(x, bins=bins)
axHisty.hist(y, bins=bins, orientation='horizontal')
axHistx.set_xlim(axScatter.get_xlim())
axHisty.set_ylim(axScatter.get_ylim())
plt.show()
Just a quick idea, you could us np.clip() to do this pretty easily!
params_bounds = [np.clip(params_mc[i], params_bounds[i][0],params_bounds[i][1]) for i in range(len(params_mc))]

Matplotlib: same height for colorbar as for plot [duplicate]

This question already has answers here:
Set Matplotlib colorbar size to match graph
(9 answers)
Closed 8 years ago.
I'm plotting some 2D data as shown. The axes aspect should be equal and the axes range should differ.
import numpy
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
#Generate data
delta = 0.025
x = numpy.arange(-5.0, 5.0, delta)
y = numpy.arange(-5.0, 5.0, delta)
X, Y = numpy.meshgrid(x, y)
Z1 = mlab.bivariate_normal(X, Y, 1.0, 1.0, 0.0, 0.0)
Z2 = mlab.bivariate_normal(X, Y, 1.5, 0.5, 1, 1)
# difference of Gaussians
Z = 10.0 * (Z2 - Z1)
#Plot
fig = plt.figure()
ax1 = fig.add_subplot(1, 1, 1, aspect='equal')
PC = ax1.pcolor(X, Y, Z)
CF = ax1.contour(X, Y, Z, 50, colors = "black")
plt.xlim(-4.0, 4.0)
plt.ylim(-2.0, 2.0)
cbar = plt.colorbar(PC)
cbar.add_lines(CF)
plt.show()
How can I make the colobar has the same height as the plotted data?
You can do this using make_axes_locatable:
import numpy
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
from mpl_toolkits.axes_grid1 import make_axes_locatable
#Generate data
delta = 0.025
x = numpy.arange(-5.0, 5.0, delta)
y = numpy.arange(-5.0, 5.0, delta)
X, Y = numpy.meshgrid(x, y)
Z1 = mlab.bivariate_normal(X, Y, 1.0, 1.0, 0.0, 0.0)
Z2 = mlab.bivariate_normal(X, Y, 1.5, 0.5, 1, 1)
# difference of Gaussians
Z = 10.0 * (Z2 - Z1)
#Plot
fig = plt.figure()
ax1 = fig.add_subplot(1, 1, 1, aspect='equal')
PC = ax1.pcolor(X, Y, Z)
CF = ax1.contour(X, Y, Z, 50, colors = "black")
plt.xlim(-4.0, 4.0)
plt.ylim(-2.0, 2.0)
divider = make_axes_locatable(ax1)
cax1 = divider.append_axes("right", size="5%", pad=0.05)
cbar = plt.colorbar(PC, cax = cax1)
cbar.add_lines(CF)
plt.show()

Categories

Resources