putting limits to x,y,z interpolated heatmap in matplotlib - python

I'm looking to plot a heatmap for which I have the value (=heatmap color) z at each couple of spatial x,y coordinates but I want to mark out the z values between [z0,z1] with z0=0.0 and z1=0.4 while some of interpolated z values are under and above those boundaries.
from numpy.random import uniform, seed
from matplotlib.mlab import griddata
import matplotlib.pyplot as plt
import numpy as np
# make up data.
#npts = int(raw_input('enter # of random points to plot:'))
seed(0)
npts = 200
x = uniform(-2, 2, npts)
y = uniform(-2, 2, npts)
z = x*np.exp(-x**2 - y**2)
# define grid.
xi = np.linspace(0, 1, 1000)
yi = np.linspace(0, 1, 1000)
# grid the data.
zi = griddata(x, y, z, xi, yi, interp='linear')
# contour the gridded data, plotting dots at the nonuniform data points.
CS = plt.contourf(xi, yi, zi, 15, cmap=plt.cm.rainbow,
vmax=abs(zi).max(), vmin=-abs(zi).max())
plt.colorbar() # draw colorbar
# plot data points.
plt.show()
I would like to restrict the colorbar and heatmap color from 0.0 to 0.4 (so avoid in the heatmap and in the colorbar valies under 0.0 and above 0.4).
How to do that? Thanks

You can set the values in a numpy array to None to leave them unplotted. For example,
zmin = 0.0
zmax = 0.4
zi[(zi<zmin) | (zi>zmax)] = None
CS = plt.contourf(xi, yi, zi, 15, cmap=plt.cm.rainbow,
vmax=zmax, vmin=zmin)

Related

Summing overlapping bubbles with gradient in python

I want to plot a map of specific sites to interpret their effects on the surrounding city environment. To do this, I would like to plot the sites as bubbles, with a decreasing gradient towards the edge of the circle, and where the gradient of the overlapping circles is the sum.
As an example I've used this:
# libraries
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
# create data
x = np.random.rand(15)
y = x+np.random.rand(15)
z = x+np.random.rand(15)
z=z*z
# Change color with c and alpha. I map the color to the X axis value.
plt.scatter(x, y, s=1500, c=z, cmap="Blues", alpha=0.4, edgecolors="grey", linewidth=1)
# Add titles (main and on axis)
plt.xlabel("the X axis")
plt.ylabel("the Y axis")
plt.title("A colored bubble plot")
plt.show();
which produces:
However, the color of the circles does not decay, nor do they seem to sum the intended way.
Is there any smart way to do this, or could it possibly be easier with some kind of heatmap solution, or using grids and a decaying effect on adjacent tiles?
Here is an approach with densities placed at each x and y, enlarged by the z value.
Depending on the distance to each x,y position a quantity is added.
import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import norm # calculate the length of a vector
# import seaborn as sns
# create data
x = np.random.rand(15)
y = x+np.random.rand(15)
z = x+np.random.rand(15)
z=z*z
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12,5))
# Change color with c and alpha. I map the color to the X axis value.
ax1.scatter(x, y, s=1500, c=z, cmap="Blues", alpha=0.4, edgecolors="grey", linewidth=1)
ax1.set_xlabel("the X axis")
ax1.set_ylabel("the Y axis")
ax1.set_title("A colored bubble plot")
centers = np.dstack((x, y))[0]
xmin = min(x)-0.2
xmax = max(x)+0.2
ymin = min(y)-0.2
ymax = max(y)+0.2
zmin = min(z)
zmax = max(z)
xx, yy = np.meshgrid(np.linspace(xmin, xmax, 100),
np.linspace(ymin, ymax, 100))
xy = np.dstack((xx, yy))
zz = np.zeros_like(xx)
for ci, zi in zip(centers, z):
sigma = zi / zmax * 0.3
sigma2 = sigma ** 2
zz += np.exp(- norm(xy - ci, axis=-1) ** 2 / sigma2 / 2)
img = ax2.imshow(zz, extent=[xmin, xmax, ymin, ymax], origin='lower', aspect='auto', cmap='Blues')
#plt.colorbar(img, ax=ax2)
ax2.set_xlabel("the X axis")
ax2.set_ylabel("the Y axis")
ax2.set_title("Density depending on z")
plt.show()
The plot compares the two approaches using the same random data.

Get coordinates from points density with KDE

I have a set of points (> 1k) in this form:
y,x
173.549,308.176
173.549,313.328
213.26,419.588
Using KDE, i can plot points density with pcolormesh and contourf. This is an example result, plotting points too:
This is the code i used to have the plot:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
from scipy.stats.kde import gaussian_kde
x, y = np.genfromtxt('terzinoSX.csv', delimiter=',', unpack=True)
y = y[np.logical_not(np.isnan(y))]
x = x[np.logical_not(np.isnan(x))]
k = gaussian_kde(np.vstack([x, y]))
xi, yi = np.mgrid[x.min():x.max():x.size**0.5*1j,y.min():y.max():y.size**0.5*1j]
zi = k(np.vstack([xi.flatten(), yi.flatten()]))
fig = plt.figure(figsize=(7,4))
ax2 = fig.add_subplot(111)
#alpha=0.5 will make the plots semitransparent
#ax1.pcolormesh(yi, xi, zi.reshape(xi.shape), alpha=0.5)
ax2.contourf(yi, xi, zi.reshape(xi.shape), alpha=0.5)
plt.axis('off')
ax2.plot(y,x, "o")
ax2.set_xlim(0, 740)
ax2.set_ylim(515, 0)
#overlay soccer field
im = plt.imread('statszone_football_pitch.png')
ax2.imshow(im, extent=[0, 740, 0, 515], aspect='auto')
fig.savefig('test.png', bbox_inches='tight')
I would like to have one point representing coordinates of most populated zone (middle point for example), like a middle point over the "red" zone. Is it possible in some way?
I solved this by adding these lines that calculate the point in the most populated area:
xy = np.vstack([x,y])
kde = stats.gaussian_kde(xy)
density = kde(xy)
pts = xy.T[np.argmax(density)]
You can use np.argmax to get the coordinates of the maximum. For example:
kde = compute_my_kde() # Returns a two-dimensional array
y, x = np.argmax(kde) # x and y are swapped because matplotlib displays images as a matrix (first index is rows, second index is colums)
plt.imshow(kde) # Show the kde
plt.scatter(x, y) # Show the maximum point

Matplotlib: Making 2D Gaussian contours with transparent outermost layer

So I have used matplotlib cookbook to generate the following grayscale gaussian contours:
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import numpy.ma as ma
from numpy.random import uniform, seed
from matplotlib import cm
def gauss(x,y,Sigma,mu):
X=np.vstack((x,y)).T
mat_multi=np.dot((X-mu[None,...]).dot(np.linalg.inv(Sigma)),(X-mu[None,...]).T)
return np.diag(np.exp(-1*(mat_multi)))
def plot_countour(x,y,z):
# define grid.
xi = np.linspace(-2.1,2.1,100)
yi = np.linspace(-2.1,2.1,100)
## grid the data.
zi = griddata((x, y), z, (xi[None,:], yi[:,None]), method='cubic')
# contour the gridded data, plotting dots at the randomly spaced data points.
CS = plt.contour(xi,yi,zi,6,linewidths=0.5,colors='k')
#CS = plt.contourf(xi,yi,zi,15,cmap=plt.cm.jet)
CS = plt.contourf(xi,yi,zi,6,cmap=cm.Greys_r)
#plt.colorbar() # draw colorbar
# plot data points.
#plt.scatter(x,y,marker='o',c='b',s=5)
plt.xlim(-2,2)
plt.ylim(-2,2)
plt.title('griddata test (%d points)' % npts)
plt.show()
# make up some randomly distributed data
seed(1234)
npts = 1000
x = uniform(-2,2,npts)
y = uniform(-2,2,npts)
z = gauss(x,y,Sigma=np.asarray([[1.,.5],[0.5,1.]]),mu=np.asarray([0.,0.]))
plot_countour(x,y,z)
However I want the outermost layer to be colourless so I could export the image consisting only of the few circular contours of the Gaussian. Is there any way of manipulating this code to do that?
Try using levels.
def plot_countour(x,y,z):
# define grid.
xi = np.linspace(-2.1, 2.1, 100)
yi = np.linspace(-2.1, 2.1, 100)
## grid the data.
zi = griddata((x, y), z, (xi[None,:], yi[:,None]), method='cubic')
levels = [0.2, 0.4, 0.6, 0.8, 1.0]
# contour the gridded data, plotting dots at the randomly spaced data points.
CS = plt.contour(xi,yi,zi,len(levels),linewidths=0.5,colors='k', levels=levels)
#CS = plt.contourf(xi,yi,zi,15,cmap=plt.cm.jet)
CS = plt.contourf(xi,yi,zi,len(levels),cmap=cm.Greys_r, levels=levels)
plt.colorbar() # draw colorbar
# plot data points.
# plt.scatter(x, y, marker='o', c='b', s=5)
plt.xlim(-2, 2)
plt.ylim(-2, 2)
plt.title('griddata test (%d points)' % npts)
plt.show()
# make up some randomly distributed data
seed(1234)
npts = 1000
x = uniform(-2, 2, npts)
y = uniform(-2, 2, npts)
z = gauss(x, y, Sigma=np.asarray([[1.,.5],[0.5,1.]]), mu=np.asarray([0.,0.]))
plot_countour(x, y, z)

How to plot a 3D density map in python with matplotlib

I have a large dataset of (x,y,z) protein positions and would like to plot areas of high occupancy as a heatmap. Ideally the output should look similiar to the volumetric visualisation below, but I'm not sure how to achieve this with matplotlib.
My initial idea was to display my positions as a 3D scatter plot and color their density via a KDE. I coded this up as follows with test data:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
mu, sigma = 0, 0.1
x = np.random.normal(mu, sigma, 1000)
y = np.random.normal(mu, sigma, 1000)
z = np.random.normal(mu, sigma, 1000)
xyz = np.vstack([x,y,z])
density = stats.gaussian_kde(xyz)(xyz)
idx = density.argsort()
x, y, z, density = x[idx], y[idx], z[idx], density[idx]
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x, y, z, c=density)
plt.show()
This works well! However, my real data contains many thousands of data points and calculating the kde and the scatter plot becomes extremely slow.
A small sample of my real data:
My research would suggest that a better option is to evaluate the gaussian kde on a grid. I’m just not sure how to this in 3D:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
mu, sigma = 0, 0.1
x = np.random.normal(mu, sigma, 1000)
y = np.random.normal(mu, sigma, 1000)
nbins = 50
xy = np.vstack([x,y])
density = stats.gaussian_kde(xy)
xi, yi = np.mgrid[x.min():x.max():nbins*1j, y.min():y.max():nbins*1j]
di = density(np.vstack([xi.flatten(), yi.flatten()]))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.pcolormesh(xi, yi, di.reshape(xi.shape))
plt.show()
Thanks to mwaskon for suggesting the mayavi library.
I recreated the density scatter plot in mayavi as follows:
import numpy as np
from scipy import stats
from mayavi import mlab
mu, sigma = 0, 0.1
x = 10*np.random.normal(mu, sigma, 5000)
y = 10*np.random.normal(mu, sigma, 5000)
z = 10*np.random.normal(mu, sigma, 5000)
xyz = np.vstack([x,y,z])
kde = stats.gaussian_kde(xyz)
density = kde(xyz)
# Plot scatter with mayavi
figure = mlab.figure('DensityPlot')
pts = mlab.points3d(x, y, z, density, scale_mode='none', scale_factor=0.07)
mlab.axes()
mlab.show()
Setting the scale_mode to 'none' prevents glyphs from being scaled in proportion to the density vector. In addition for large datasets, I disabled scene rendering and used a mask to reduce the number of points.
# Plot scatter with mayavi
figure = mlab.figure('DensityPlot')
figure.scene.disable_render = True
pts = mlab.points3d(x, y, z, density, scale_mode='none', scale_factor=0.07)
mask = pts.glyph.mask_points
mask.maximum_number_of_points = x.size
mask.on_ratio = 1
pts.glyph.mask_input_points = True
figure.scene.disable_render = False
mlab.axes()
mlab.show()
Next, to evaluate the gaussian kde on a grid:
import numpy as np
from scipy import stats
from mayavi import mlab
mu, sigma = 0, 0.1
x = 10*np.random.normal(mu, sigma, 5000)
y = 10*np.random.normal(mu, sigma, 5000)
z = 10*np.random.normal(mu, sigma, 5000)
xyz = np.vstack([x,y,z])
kde = stats.gaussian_kde(xyz)
# Evaluate kde on a grid
xmin, ymin, zmin = x.min(), y.min(), z.min()
xmax, ymax, zmax = x.max(), y.max(), z.max()
xi, yi, zi = np.mgrid[xmin:xmax:30j, ymin:ymax:30j, zmin:zmax:30j]
coords = np.vstack([item.ravel() for item in [xi, yi, zi]])
density = kde(coords).reshape(xi.shape)
# Plot scatter with mayavi
figure = mlab.figure('DensityPlot')
grid = mlab.pipeline.scalar_field(xi, yi, zi, density)
min = density.min()
max=density.max()
mlab.pipeline.volume(grid, vmin=min, vmax=min + .5*(max-min))
mlab.axes()
mlab.show()
As a final improvement I sped up the evaluation of kensity density function by calling the kde function in parallel.
import numpy as np
from scipy import stats
from mayavi import mlab
import multiprocessing
def calc_kde(data):
return kde(data.T)
mu, sigma = 0, 0.1
x = 10*np.random.normal(mu, sigma, 5000)
y = 10*np.random.normal(mu, sigma, 5000)
z = 10*np.random.normal(mu, sigma, 5000)
xyz = np.vstack([x,y,z])
kde = stats.gaussian_kde(xyz)
# Evaluate kde on a grid
xmin, ymin, zmin = x.min(), y.min(), z.min()
xmax, ymax, zmax = x.max(), y.max(), z.max()
xi, yi, zi = np.mgrid[xmin:xmax:30j, ymin:ymax:30j, zmin:zmax:30j]
coords = np.vstack([item.ravel() for item in [xi, yi, zi]])
# Multiprocessing
cores = multiprocessing.cpu_count()
pool = multiprocessing.Pool(processes=cores)
results = pool.map(calc_kde, np.array_split(coords.T, 2))
density = np.concatenate(results).reshape(xi.shape)
# Plot scatter with mayavi
figure = mlab.figure('DensityPlot')
grid = mlab.pipeline.scalar_field(xi, yi, zi, density)
min = density.min()
max=density.max()
mlab.pipeline.volume(grid, vmin=min, vmax=min + .5*(max-min))
mlab.axes()
mlab.show()

Matplotlib contour from xyz data: griddata invalid index

I'm trying to do a contour plot using matplotlib of a file with the following format:
x1 y1 z1
x2 y2 z2
etc
I can load it with numpy.loadtxt to get the vectors. So far, no trouble.
I read this to learn how to plot, and can reproduce it by copy paste, so i'm sure nothin is wrong with my installation:
http://matplotlib.org/examples/pylab_examples/griddata_demo.html
I understand I have to input x and y as vector and z as an array ,which can be done with griddata. This is also what i find on this site.
The documentation says:
zi = griddata(x,y,z,xi,yi) fits a surface of the form z = f*(*x, y) to the data in the (usually) nonuniformly spaced vectors (x, y, z). griddata() interpolates this surface at the points specified by (xi, yi) to produce zi. xi and yi must describe a regular grid, can be either 1D or 2D, but must be monotonically increasing.
For the sake of the example, I have written this code:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as ml
x=np.linspace(1.,10.,20)
y=np.linspace(1.,10.,20)
z=np.linspace(1.,2.,20)
xi=np.linspace(1.,10.,10)
yi=np.linspace(1.,10.,10)
zi = ml.griddata(x,y,z,xi,yi)
However, I get the following error when it comes to the griddata:
IndexError: invalid index
So, I tried to modify a bit the exemple of the doc like following:
from matplotlib.mlab import griddata
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(-2.1,2.1,300)
y = np.linspace(-2.1,2.1,300)
z = x*np.exp(-x**2-y**2)
# define grid.
xi = np.linspace(-2.1,2.1,100)
yi = np.linspace(-2.1,2.1,200)
# grid the data.
zi = griddata(x,y,z,xi,yi,interp='linear')
And I get the same error. I don't understand what's going wrong.
Thanks for your help.
Consider:
x = np.linspace(1., 10., 20)
y = np.linspace(1., 10., 20)
z = np.linspace(1., 2., 20)
This means we know the z-values at certain points along the line x=y.
From there,
zi = ml.griddata(x,y,z,xi,yi)
is asking mlab.griddata to extrapolate the values of z for all points in a rectangular grid.
We've given a lot of information about how z varies along this line, but no information about how z varies in the perpendicular direction (away from the x = y line). An error is being raised because mlab.griddata refuses to guess.
You'll get better results if your initial x, y data are distributed more randomly:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as ml
ndata = 10
ny, nx = 100, 200
xmin, xmax = 1, 10
ymin, ymax = 1, 10
# x = np.linspace(1, 10, ndata)
# y = np.linspace(1, 10, ndata)
x = np.random.randint(xmin, xmax, ndata)
y = np.random.randint(ymin, ymax, ndata)
z = np.random.random(ndata)
xi = np.linspace(xmin, xmax, nx)
yi = np.linspace(ymin, ymax, ny)
zi = ml.griddata(x, y, z, xi, yi)
plt.contour(xi, yi, zi, 15, linewidths = 0.5, colors = 'k')
plt.pcolormesh(xi, yi, zi, cmap = plt.get_cmap('rainbow'))
plt.colorbar()
plt.scatter(x, y, marker = 'o', c = 'b', s = 5, zorder = 10)
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
plt.show()
If you want mlab.griddata to extrapolate data along the line x=y to the entire grid in an arbitrary way, you could add two extra boundary points (xmin, ymax, z[0]) and (xmax,ymin,z[-1]):
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as ml
np.random.seed(8)
ndata = 10
ny, nx = 100, 200
xmin, xmax = 1, 10
ymin, ymax = 1, 10
x = np.linspace(1, 10, ndata)
y = np.linspace(1, 10, ndata)
z = np.random.random(ndata)
x = np.r_[x,xmin,xmax]
y = np.r_[y,ymax,ymin]
z = np.r_[z,z[0],z[-1]]
xi = np.linspace(xmin, xmax, nx)
yi = np.linspace(ymin, ymax, ny)
# Requires installation of natgrid
# http://sourceforge.net/projects/matplotlib/files/matplotlib-toolkits/
zi = ml.griddata(x, y, z, xi, yi, interp='nn')
# Or, without natgrid:
# zi = ml.griddata(x, y, z, xi, yi, interp='linear')
plt.contour(xi, yi, zi, 15, linewidths = 0.5, colors = 'k')
plt.pcolormesh(xi, yi, zi, cmap = plt.get_cmap('rainbow'))
plt.colorbar()
plt.scatter(x, y, marker = 'o', c = 'b', s = 10, zorder = 10)
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
plt.show()
ok, I finally found the solution to plot it. For those interested, here is the trick: use the griddata from Scipy with the 'nearest' method.
from scipy.interpolate import griddata
import numpy as np
import matplotlib.pyplot as plt
x=np.linspace(1.,10.,20)
y=np.linspace(1.,10.,20)
z=z = np.random.random(20)
xi=np.linspace(1.,10.,10)
yi=np.linspace(1.,10.,10)
X,Y= np.meshgrid(xi,yi)
Z = griddata((x, y), z, (X, Y),method='nearest')
plt.contourf(X,Y,Z)

Categories

Resources