A random array within some bounds - python

I have a an array params with errors e_params and bounds that the array can be params_bounds:
params = [0.2, 0.2]
e_params = [0.1, 0.05]
params_bounds = [(0.0, 1.0), (0.0, 1.0)]
I want to draw a random Gaussian realisation of params as follows:
import numpy as np
params_mc = np.random.normal(params, e_params)
Is there any way to make sure that the result params_mc is within the upper and lower bounds specified by params_bounds?
Thanks for any help here.

You can use numpy.clip to clip values within given bounds. First generate arrays of minimums and maximums you need, something like:
>>> lower_bound = numpy.asarray(param_bounds)[:, 0]
>>> upper_bound = numpy.asarray(param_bounds)[:, 1]
Now clip your result:
>>> numpy.clip(params_mc, lower_bound, upper_bound)
(Untested code, your mileage may vary)

Perhaps you are looking for a truncated normal distribution.
Using scipy.stats.truncnorm,
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
lower, upper = (0.0, 0.0), (1.0, 1.0)
mu, sigma = np.array([0.2, 0.2]), np.array([0.1, 0.05])
X = stats.truncnorm(
(lower - mu) / sigma, (upper - mu) / sigma, loc=mu, scale=sigma)
data = X.rvs((10000, 2))
fig, ax = plt.subplots()
ax.hist(data[:, 0], density=True, alpha=0.5, bins=20)
ax.hist(data[:, 1], density=True, alpha=0.5, bins=20)
plt.show()
yields
Here's another way to visualize the sample. The code is mainly taken from the matplotlib gallery:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
lower, upper = (0.0, 0.0), (1.0, 1.0)
mu, sigma = np.array([0.2, 0.2]), np.array([0.1, 0.05])
X = stats.truncnorm(
(lower - mu) / sigma, (upper - mu) / sigma, loc=mu, scale=sigma)
data = X.rvs((10000, 2))
x, y = data.T
nullfmt = mticker.NullFormatter() # no labels
# definitions for the axes
left, width = 0.1, 0.65
bottom, height = 0.1, 0.65
bottom_h = left_h = left + width + 0.02
rect_scatter = [left, bottom, width, height]
rect_histx = [left, bottom_h, width, 0.2]
rect_histy = [left_h, bottom, 0.2, height]
# start with a rectangular Figure
plt.figure(1, figsize=(8, 8))
axScatter = plt.axes(rect_scatter)
axHistx = plt.axes(rect_histx)
axHisty = plt.axes(rect_histy)
# no labels
axHistx.xaxis.set_major_formatter(nullfmt)
axHisty.yaxis.set_major_formatter(nullfmt)
# the scatter plot:
axScatter.scatter(x, y)
axScatter.set_xlim((-0.1, 0.7))
axScatter.set_ylim((-0.1, 0.5))
bins = 20
axHistx.hist(x, bins=bins)
axHisty.hist(y, bins=bins, orientation='horizontal')
axHistx.set_xlim(axScatter.get_xlim())
axHisty.set_ylim(axScatter.get_ylim())
plt.show()

Just a quick idea, you could us np.clip() to do this pretty easily!
params_bounds = [np.clip(params_mc[i], params_bounds[i][0],params_bounds[i][1]) for i in range(len(params_mc))]

Related

Creating a 2-D data plot with "vertical" marginal histograms

How can I create in python a bi-variate data plot with "vertical" marginal histograms like this?:
Say that the data is generated via:
from scipy.stats import multivariate_normal
import numpy as np
mean = np.array([0, 0])
cov = np.array([[1, 0.5], [0.5, 2]])
data = multivariate_normal(mean, cov).rvs(1000)
Here is sample code which shows how one can do this:
import math
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import rc, rcParams
from numpy.linalg import eigh
from scipy.stats import multivariate_normal, norm
from mpl_toolkits.mplot3d import Axes3D
rcParams['text.latex.preamble'] = r'\boldmath'
rc('text', usetex=True)
mean = np.array([0,0])
cov = np.array([[1, 0.3], [0.3, .5]])
np.random.seed(0)
mvn_rvs = multivariate_normal(mean, cov).rvs(800)
pdf_x = norm(mean[0], np.sqrt(cov[0,0])).pdf
pdf_y = norm(mean[1], np.sqrt(cov[1,1])).pdf
rv_x = mvn_rvs[:, 0]
rv_y = mvn_rvs[:, 1]
x = np.linspace(-3, 3, 101)
y = np.linspace(-3, 3, 100)
X, Y = np.meshgrid(x, y)
fontsize = 30
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax), np.diag([1.15, 1.15, 1, 1]))
ax.plot(y, pdf_y(y), zs=x.min(), zdir='x', linewidth=3, label="$\\mathsf{P_y(y)}$")
ax.plot(x, pdf_x(x), zs=y.max(), zdir='y', linewidth=3, label='$\\mathsf{P_x(x)}$')
leg = plt.legend(fontsize=fontsize, ncol=2, frameon=False, bbox_to_anchor=(-0.10, 1.1275),
loc='upper left', handlelength=0.7, handletextpad=0.5, columnspacing=2.4)
grid_linewidth = 1.15
ax.xaxis._axinfo["grid"]['linewidth'] = grid_linewidth
ax.yaxis._axinfo["grid"]['linewidth'] = grid_linewidth
ax.zaxis._axinfo["grid"]['linewidth'] = grid_linewidth
ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.w_xaxis.line.set_color((1.0, 1.0, 1.0, 0.0))
ax.w_yaxis.line.set_color((1.0, 1.0, 1.0, 0.0))
ax.w_zaxis.line.set_color((1.0, 1.0, 1.0, 0.0))
labelpad = -5
ax.set_xlabel("$\\mathsf{x}$", fontsize=fontsize, labelpad=labelpad)
ax.set_ylabel("$\\mathsf{y}$", fontsize=fontsize, labelpad=labelpad)
labelsize = 10
ax.xaxis.set_rotate_label(False)
ax.yaxis.set_rotate_label(False)
ax.set_zlim(bottom=0)
ax.set_xlim(-3, 3)
ax.set_ylim(-3, 3)
ax.xaxis.set_ticklabels([])
ax.xaxis.set_visible(False)
ax.yaxis.set_ticklabels([])
ax.zaxis.set_ticklabels([])
sx2 = cov[0, 0]
sy2 = cov[1, 1]
rho = cov[0, 1] / np.sqrt(sx2 * sy2)
Sigma = cov
target = 0.1
gamma = math.log(1 / (4*(np.pi**2)*(sx2**2)*(sy2**2)*(1 - rho**2)*(target**2)))
eigenvalues, P = eigh(np.linalg.inv(Sigma))
# Compute u and v as per link using thetas from 0 to 2pi
thetas = np.linspace(0, 2*np.pi, 10000)
uv = (np.sqrt(gamma) / np.sqrt(eigenvalues)) * np.hstack((np.cos(thetas).reshape(-1,1), np.sin(thetas).reshape(-1, 1)))
orig_coord=np.zeros((10000,2))
for i in range(len(uv)):
orig_coord[i,0]=np.matmul(np.linalg.inv(P), uv[i,:])[0]
orig_coord[i,1]=np.matmul(np.linalg.inv(P), uv[i,:])[1]
ax.plot(rv_x, rv_y, 0*rv_x, ' o', c='g', markersize=1.1) # "RdBu_r")
ax.plot(orig_coord[:, 0], orig_coord[:, 1],
0 * np.ones_like(orig_coord[:, 0]), c='r', linewidth=3)
ax.view_init(azim=-45, elev=20)

How to create a bar plot with a logarithmic x-axis and gaps between the bars?

I want to plot a beautiful bar plot using the data mentioned in the script. Additionally, the x-axis should be logarithmic and there must be gaps between the bars.
I tried the script as below:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
fig = plt.figure()
x = [0.000001,0.00001,0.0001,0.001,0.01,0.1,1.0]
height = [5.3,1.8,8.24,5.8,2.8,3.3,4.2]
width = 0.000001
plt.bar(x, height, width, color='b' )
plt.xscale("log")
plt.savefig('SimpleBar.png')
plt.show()
However, the x-axis values are not plotted as expected.
With a log scale x-axis, you can't set constant widths for the bars. E.g. the first bar would go between 0 and 0.000002, (0 is at minus infinity on a log scale).
You could use the x-positions for the left edge of the bars, and the next x-position for the right edge:
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
x = [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0]
height = [5.3, 1.8, 8.24, 5.8, 2.8, 3.3, 4.2]
plt.xscale("log")
widths = np.diff(x + [x[-1] * 10])
plt.bar(x, height, widths, align='edge', facecolor='dodgerblue', edgecolor='white', lw=2)
plt.show()
If you want to "center" the bars around the original x-values, you need to calculate the start and end positions of each bar in log space. The easiest way to get more spacing between the bars, is to set a thicker white border.
import matplotlib.pyplot as plt
import numpy as np
x = [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0]
height = [5.3, 1.8, 8.24, 5.8, 2.8, 3.3, 4.2]
plt.xscale("log")
padded_x = [x[0] / 10] + x + [x[-1] * 10]
centers = [np.sqrt(x0 * x1) for x0, x1 in zip(padded_x[:-1], padded_x[1:])]
widths = np.diff(centers)
plt.bar(centers[:-1], height, widths, align='edge', facecolor='dodgerblue', edgecolor='white', lw=4)
plt.margins(x=0.01)
plt.show()
You can also have a configurable width if you calculate the new left and right positions for each bar:
import matplotlib.pyplot as plt
import numpy as np
x = [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0]
height = [5.3, 1.8, 8.24, 5.8, 2.8, 3.3, 4.2]
plt.xscale("log")
padded_x = [x[0] / 10] + x + [x[-1] * 10]
width = 0.3 # 1 for full width, closer to 0 for thinner bars
lefts = [x1 ** (1 - width / 2) * x0 ** (width / 2) for x0, x1 in zip(padded_x[:-2], padded_x[1:-1])]
rights = [x0 ** (1 - width / 2) * x1 ** (width / 2) for x0, x1 in zip(padded_x[1:-1], padded_x[2:])]
widths = [r - l for l, r in zip(lefts, rights)]
plt.bar(lefts, height, widths, align='edge', facecolor='dodgerblue', lw=0)
plt.show()

plotting data on a hexagonal figure

I want to build a graph that will look like this, - for each point I have a single value and there is a maximum that reaches the border.
All I can find is how to have hexbin in a scatterplot with seaborn or similar - any ideas, is there some ready solution maybe or I would need to code my way through it?
You could use tripcolor to show 6 shaded triangles. Scaling the outer vectors can adapt the triangles to show the desired proportions.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.tri as tri
proportions = [0.6, 0.75, 0.8, 0.9, 0.7, 0.8]
labels = ['alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta']
N = len(proportions)
proportions = np.append(proportions, 1)
theta = np.linspace(0, 2 * np.pi, N, endpoint=False)
x = np.append(np.sin(theta), 0)
y = np.append(np.cos(theta), 0)
triangles = [[N, i, (i + 1) % N] for i in range(N)]
triang_backgr = tri.Triangulation(x, y, triangles)
triang_foregr = tri.Triangulation(x * proportions, y * proportions, triangles)
cmap = plt.cm.rainbow_r # or plt.cm.hsv ?
colors = np.linspace(0, 1, N + 1)
plt.tripcolor(triang_backgr, colors, cmap=cmap, shading='gouraud', alpha=0.4)
plt.tripcolor(triang_foregr, colors, cmap=cmap, shading='gouraud', alpha=0.8)
plt.triplot(triang_backgr, color='white', lw=2)
for label, color, xi, yi in zip(labels, colors, x, y):
plt.text(xi * 1.05, yi * 1.05, label, # color=cmap(color),
ha='left' if xi > 0.1 else 'right' if xi < -0.1 else 'center',
va='bottom' if yi > 0.1 else 'top' if yi < -0.1 else 'center')
plt.axis('off')
plt.gca().set_aspect('equal')
plt.show()
The code allows for different numbers of triangles. Here are examples with 5 or 6 triangles:

Adding colorbars to clustered heatmaps

I am trying to replicate this type of plot (heatmap with colorbars as leaves)
This is what I've done so far
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as ssd
#read data
fid_df = pd.read_csv(fid_file, index_col=[0])
# scale data
def scale(x):
return np.math.log2(x+1)
fid_df = fid_df.applymap(scale)
# clustering colums
data_1D_X = ssd.pdist(fid_df.T, 'euclidean')
X = sch.linkage(data_1D_X, method='ward')
# clustering rows
data_1D_Y = ssd.pdist(fid_df, 'cityblock')
Y = linkage(data_1D_Y, method='ward')
#plot first dendrogram
fig = plt.figure(figsize=(8, 8))
ax1 = fig.add_axes([0.09, 0.1, 0.2, 0.6])
Z1 = sch.dendrogram(Y, orientation='left')
ax1.set_xticks([])
ax1.set_yticks([])
# second dendrogram.
ax2 = fig.add_axes([0.3, 0.71, 0.6, 0.2])
Z2 = sch.dendrogram(X)
ax2.set_xticks([])
ax2.set_yticks([])
# plot matrix
axmatrix = fig.add_axes([0.3, 0.1, 0.6, 0.6])
# sorts based of clustering
idx1 = Z1['leaves']
idx2 = Z2['leaves']
D = fid_df.values[idx1, :]
D = D[:, idx2]
im = axmatrix.matshow(D, aspect='auto', origin='lower', cmap=plt.cm.YlGnBu)
axmatrix.set_xticks([])
axmatrix.set_yticks([])
Example:
However, I need to add colorbars that would show the initial groups of rows and columns. Any idea how to do this?
Something like this?
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
ax1 = fig.add_axes((0, 0, 1, 0.9))
ax2 = fig.add_axes((0, 0.9, 1, 0.1))
gridY, gridX = np.mgrid[0:10:11 * 1j, 0:10:11 * 1j]
ax1.pcolormesh(gridX, gridY, np.sqrt(gridX ** 2 + gridY ** 2))
randCol = ['red', 'blue']
for value in np.linspace(0, 10, 1001):
ax2.axvline(value, color=randCol[np.random.default_rng().integers(2)])
ax2.set_xlim((0, 10))
ax2.tick_params(labelbottom=False, bottom=False, labelleft=False, left=False)
fig.savefig('so.png', bbox_inches='tight')

nonlinear colormap, matplotlib

Are there any colormaps or is there a simple way to transform a matplotlib colormap to provide a much bigger color range near 0.5 and a smaller one at the extremes? I am creating a bunch of subplots, one of which has color values of about 10 times the others, so it’s values dominate and the rest of the plots all look the same. For a simple example say we have:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(1,10,10)
y = np.linspace(1,10,10)
t1 = np.random.normal(2,0.3,10)
t2 = np.random.normal(9,0.01,10)
t2_max = max(t2)
plt.figure(figsize=(22.0, 15.50))
p = plt.subplot(1,2,1)
colors = plt.cm.Accent(t1/t2_max)
p.scatter(x, y, edgecolors=colors, s=15, linewidths=4)
p = plt.subplot(1,2,2)
colors = plt.cm.Accent(t2/t2_max)
p.scatter(x, y, edgecolors=colors, s=15, linewidths=4)
plt.subplots_adjust(left=0.2)
cbar_ax = plt.axes([0.10, 0.15, 0.05, 0.7])
sm = plt.cm.ScalarMappable(cmap=plt.cm.Accent, norm=plt.Normalize(vmin=0, vmax=t2_max))
sm._A = []
cbar = plt.colorbar(sm,cax=cbar_ax)
plt.show()
There is much more variation in t1 than in t2, however the variation can not be seen because of the high values of t2. What I want is a map the will provide a larger color gradient around the mean of t1 without transforming the data itself. I have found one solution here http://protracted-matter.blogspot.co.nz/2012/08/nonlinear-colormap-in-matplotlib.html but cant get it to work for my scatter plots.
EDIT:
From answer below the class can be modified to take negative numbers, and fixed boundaries.
import numpy as np
import matplotlib.pyplot as plt
x = y = np.linspace(1, 10, 10)
t1mean, t2mean = -6, 9
sigma1, sigma2 = .3, .01
t1 = np.random.normal(t1mean, sigma1, 10)
t2 = np.random.normal(t2mean, sigma2, 10)
class nlcmap(object):
def __init__(self, cmap, levels):
self.cmap = cmap
self.N = cmap.N
self.monochrome = self.cmap.monochrome
self.levels = np.asarray(levels, dtype='float64')
self._x = self.levels
self.levmax = self.levels.max()
self.levmin = self.levels.min()
self.transformed_levels = np.linspace(self.levmin, self.levmax,
len(self.levels))
def __call__(self, xi, alpha=1.0, **kw):
yi = np.interp(xi, self._x, self.transformed_levels)
return self.cmap(yi / (self.levmax-self.levmin)+0.5, alpha)
tmax = 10
tmin = -10
#the choice of the levels depends on the data:
levels = np.concatenate((
[tmin, tmax],
np.linspace(t1mean - 2 * sigma1, t1mean + 2 * sigma1, 5),
np.linspace(t2mean - 2 * sigma2, t2mean + 2 * sigma2, 5),
))
levels = levels[levels <= tmax]
levels.sort()
print levels
cmap_nonlin = nlcmap(plt.cm.jet, levels)
fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.scatter(x, y, edgecolors=cmap_nonlin(t1), s=15, linewidths=4)
ax2.scatter(x, y, edgecolors=cmap_nonlin(t2), s=15, linewidths=4)
fig.subplots_adjust(left=.25)
cbar_ax = fig.add_axes([0.10, 0.15, 0.05, 0.7])
#for the colorbar we map the original colormap, not the nonlinear one:
sm = plt.cm.ScalarMappable(cmap=plt.cm.jet,
norm=plt.Normalize(vmin=tmin, vmax=tmax))
sm._A = []
cbar = fig.colorbar(sm, cax=cbar_ax)
#here we are relabel the linear colorbar ticks to match the nonlinear ticks
cbar.set_ticks(cmap_nonlin.transformed_levels)
cbar.set_ticklabels(["%.2f" % lev for lev in levels])
plt.show()
Your link provides quite a good solution for the colormap. I edited a bit, but it contained al the necessary. You need to pick some sensible levels for your nonlinear colormap. I used two ranges centered around the mean values, between +- 4 the standard deviation of your sample. by changing that to another number you obtain a different local gradient in the color around the two mean values.
For the colorbar, you
either leave the colors nonlinearly spaced with linearly spaced labels
you have linearly spaced colors with nonlinearly spaced labels.
The second allows greater resolution when looking at the data, looks nicer and is implemented below:
import numpy as np
import matplotlib.pyplot as plt
x = y = np.linspace(1, 10, 10)
t1mean, t2mean = 2, 9
sigma1, sigma2 = .3, .01
t1 = np.random.normal(t1mean, sigma1, 10)
t2 = np.random.normal(t2mean, sigma2, 10)
class nlcmap(object):
def __init__(self, cmap, levels):
self.cmap = cmap
self.N = cmap.N
self.monochrome = self.cmap.monochrome
self.levels = np.asarray(levels, dtype='float64')
self._x = self.levels
self.levmax = self.levels.max()
self.transformed_levels = np.linspace(0.0, self.levmax,
len(self.levels))
def __call__(self, xi, alpha=1.0, **kw):
yi = np.interp(xi, self._x, self.transformed_levels)
return self.cmap(yi / self.levmax, alpha)
tmax = max(t1.max(), t2.max())
#the choice of the levels depends on the data:
levels = np.concatenate((
[0, tmax],
np.linspace(t1mean - 4 * sigma1, t1mean + 4 * sigma1, 5),
np.linspace(t2mean - 4 * sigma2, t2mean + 4 * sigma2, 5),
))
levels = levels[levels <= tmax]
levels.sort()
cmap_nonlin = nlcmap(plt.cm.jet, levels)
fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.scatter(x, y, edgecolors=cmap_nonlin(t1), s=15, linewidths=4)
ax2.scatter(x, y, edgecolors=cmap_nonlin(t2), s=15, linewidths=4)
fig.subplots_adjust(left=.25)
cbar_ax = fig.add_axes([0.10, 0.15, 0.05, 0.7])
#for the colorbar we map the original colormap, not the nonlinear one:
sm = plt.cm.ScalarMappable(cmap=plt.cm.jet,
norm=plt.Normalize(vmin=0, vmax=tmax))
sm._A = []
cbar = fig.colorbar(sm, cax=cbar_ax)
#here we are relabel the linear colorbar ticks to match the nonlinear ticks
cbar.set_ticks(cmap_nonlin.transformed_levels)
cbar.set_ticklabels(["%.2f" % lev for lev in levels])
plt.show()
In the result, notice that the ticks of the colorbar are NOT equispaced:
You could use LinearSegmentedColormap:
With this, you need to set up a color lookup table within a dictionary e.g. 'cdict' below.
cdict = {'red': [(0.0, 0.0, 0.0),
(0.15, 0.01, 0.01),
(0.35, 1.0, 1.0),
(1.0, 1.0, 1.0)],
'green': [(0.0, 0.0, 0.0),
(1.0, 0.0, 1.0)],
'blue': [(0.0, 0.0, 1.0),
(0.9, 0.01, 0.01),
(1.0, 0.0, 1.0)]}
This shows the transistions between values. I have set red to vary a lot around the values of t1/t2_max (0.15 to 0.35) and blue to vary a lot around the values of t2/t2_max (0.9 to 1.0). Green does nothing. I'd recommend reading the docs to see how this works. (Note this could be automated to automatically vary around your values). I then tweaked your code to show the graph:
import matplotlib.colors as col
my_cmap = col.LinearSegmentedColormap('my_colormap', cdict)
plt.figure(figsize=(22.0, 15.50))
p = plt.subplot(1,2,1)
colors = my_cmap(t1/t2_max)
p.scatter(x, y, edgecolors=colors, s=15, linewidths=4)
p = plt.subplot(1,2,2)
colors = my_cmap(t2/t2_max)
p.scatter(x, y, edgecolors=colors, s=15, linewidths=4)
plt.subplots_adjust(left=0.2)
cbar_ax = plt.axes([0.10, 0.15, 0.05, 0.7])
sm = plt.cm.ScalarMappable(cmap=my_cmap, norm=plt.Normalize(vmin=0, vmax=t2_max))
sm._A = []
cbar = plt.colorbar(sm,cax=cbar_ax)
plt.show()

Categories

Resources