2D histogram plot from data file - python

How can you make a 2d histogram (or surface) plot out of a given 2d numpy array where the first index is the label for the 'x' dimension, the 2nd index for the 'y' dimension and the third is the value to be plotted? Example:
[[4.80000e+01 5.12000e+02 8.03447e+03]
[4.80000e+01 2.56000e+02 9.24963e+03]
[4.80000e+01 1.28000e+02 9.02154e+03]
[4.80000e+01 6.40000e+01 8.96437e+03]
[4.80000e+01 3.20000e+01 7.98477e+03]
[4.80000e+01 1.60000e+01 1.07688e+04]
[4.80000e+01 8.00000e+00 1.96227e+04]
[4.80000e+01 4.00000e+00 3.51944e+04]
[5.20000e+01 5.12000e+02 7.53994e+03]
[5.20000e+01 2.56000e+02 7.54521e+03]
[5.20000e+01 1.28000e+02 8.12631e+03]
[5.20000e+01 6.40000e+01 8.12542e+03]
[5.20000e+01 3.20000e+01 7.49664e+03]
[5.20000e+01 1.60000e+01 9.92450e+03]
...
The whole 2d area to be plotted is (18,8).
Since matplotlib seems to accept the data in a 2d array (meshgrid/contour) I was trying to first stuff the values into a newly created 2d numpy array with a loop but failed at getting the indices looked up. Like
xlu=np.unique(d[:,0])
ylu=np.unique(d[:,1])
z=np.empty((xdim,ydim))
for x in d:
z[ np.where(xlu==x[0])[0] , np.where(ylu==x[1])[0] ] = z[2]
Is there a quicker/more elegant way to do get to a 2d histogram/surface plot?

plt.tricontourf(d[:,0], d[:,1], d[:,2]) creates filled contours from x, y and z data which aren't organised as a grid.
plt.hist2d(d[:,0], d[:,1], weights=d[:,2]) creates a 2d histogram where the weight of each x,y is given by z.
from matplotlib import pyplot as plt
import numpy as np
d = np.random.randn(1000, 10, 3).cumsum(axis=0).reshape(-1, 3)
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12, 4))
sm1 = ax1.tricontourf(d[:, 0], d[:, 1], d[:, 2], cmap='hot')
plt.colorbar(sm1, ax=ax1)
ax1.set_title('plt.tricontourf()')
_, _, _, sm2 = ax2.hist2d(d[:, 0], d[:, 1], weights=d[:, 2], bins=40, cmap='hot')
ax2.set_title('plt.hist2d()')
plt.colorbar(sm2, ax=ax2)
plt.tight_layout()
plt.show()

It seems that you need to interpolate your data since it is irregularly spaced.
Here is a snippet from official documentation (link).
import matplotlib.tri as tri
import numpy as np
np.random.seed(19680801)
npts = 200
ngridx = 100
ngridy = 200
x = np.random.uniform(-2, 2, npts)
y = np.random.uniform(-2, 2, npts)
z = x * np.exp(-x**2 - y**2)
fig, (ax1, ax2) = plt.subplots(nrows=2)
# -----------------------
# Interpolation on a grid
# -----------------------
# A contour plot of irregularly spaced data coordinates
# via interpolation on a grid.
# Create grid values first.
xi = np.linspace(-2.1, 2.1, ngridx)
yi = np.linspace(-2.1, 2.1, ngridy)
# Linearly interpolate the data (x, y) on a grid defined by (xi, yi).
triang = tri.Triangulation(x, y)
interpolator = tri.LinearTriInterpolator(triang, z)
Xi, Yi = np.meshgrid(xi, yi)
zi = interpolator(Xi, Yi)
# Note that scipy.interpolate provides means to interpolate data on a grid
# as well. The following would be an alternative to the four lines above:
#from scipy.interpolate import griddata
#zi = griddata((x, y), z, (xi[None,:], yi[:,None]), method='linear')
ax1.contour(xi, yi, zi, levels=14, linewidths=0.5, colors='k')
What it essentially does, it interpolates the values of your function to a regular grid using trilinear interpolation. There are other ways to do it depending on your goals.

Related

3D surface graph with matplotlib using dataframe columns to input the data

I have a spreadsheet file that I would like to input to create a 3D surface graph using Matplotlib in Python.
I used plot_trisurf and it worked, but I need the projections of the contour profiles onto the graph that I can get with the surface function, like this example.
I'm struggling to arrange my Z data in a 2D array that I can use to input in the plot_surface method. I tried a lot of things, but none seems to work.
Here it is what I have working, using plot_trisurf
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
df=pd.read_excel ("/Users/carolethais/Desktop/Dissertação Carol/Códigos/Resultados/res_02_0.5.xlsx")
fig = plt.figure()
ax = fig.gca(projection='3d')
# I got the graph using trisurf
graf=ax.plot_trisurf(df["Diametro"],df["Comprimento"], df["temp_out"], cmap=matplotlib.cm.coolwarm)
ax.set_xlim(0, 0.5)
ax.set_ylim(0, 100)
ax.set_zlim(25,40)
fig.colorbar(graf, shrink=0.5, aspect=15)
ax.set_xlabel('Diâmetro (m)')
ax.set_ylabel('Comprimento (m)')
ax.set_zlabel('Temperatura de Saída (ºC)')
plt.show()
This is a part of my df, dataframe:
Diametro Comprimento temp_out
0 0.334294 0.787092 34.801994
1 0.334294 8.187065 32.465551
2 0.334294 26.155976 29.206090
3 0.334294 43.648591 27.792126
4 0.334294 60.768219 27.163233
... ... ... ...
59995 0.437266 14.113660 31.947302
59996 0.437266 25.208851 30.317583
59997 0.437266 33.823035 29.405461
59998 0.437266 57.724209 27.891616
59999 0.437266 62.455890 27.709298
I tried this approach to use the imported data with plot_surface, but what I got was indeed a graph but it didn't work, here it's the way the graph looked with this approach:
Thank you so much
A different approach, based on re-gridding the data, that doesn't require that the original data is specified on a regular grid [deeply inspired by this example;-].
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.tri as tri
from mpl_toolkits.mplot3d import Axes3D
np.random.seed(19880808)
# compute the sombrero over a cloud of random points
npts = 10000
x, y = np.random.uniform(-5, 5, npts), np.random.uniform(-5, 5, npts)
z = np.cos(1.5*np.sqrt(x*x + y*y))/(1+0.33*(x*x+y*y))
# prepare the interpolator
triang = tri.Triangulation(x, y)
interpolator = tri.LinearTriInterpolator(triang, z)
# do the interpolation
xi = yi = np.linspace(-5, 5, 101)
Xi, Yi = np.meshgrid(xi, yi)
Zi = interpolator(Xi, Yi)
# plotting
fig = plt.figure()
ax = fig.gca(projection='3d')
norm = plt.Normalize(-1,1)
ax.plot_surface(Xi, Yi, Zi,
cmap='inferno',
norm=plt.Normalize(-1,1))
plt.show()
plot_trisurf expects x, y, z as 1D arrays while plot_surface expects X, Y, Z as 2D arrays or as x, y, Z with x, y being 1D array and Z a 2D array.
Your data consists of 3 1D arrays, so plotting them with plot_trisurf is immediate but you need to use plot_surface to be able to project the isolines on the coordinate planes... You need to reshape your data.
It seems that you have 60000 data points, in the following I assume that you have a regular grid 300 points in the x direction and 200 points in y — but what is important is the idea of regular grid.
The code below shows
the use of plot_trisurf (with a coarser mesh), similar to your code;
the correct use of reshaping and its application in plot_surface;
note that the number of rows in reshaping corresponds to the number
of points in y and the number of columns to the number of points in x;
and 4. incorrect use of reshaping, the resulting subplots are somehow
similar to the plot you showed, maybe you just need to fix the number
of row and columns.
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
x, y = np.arange(30)/3.-5, np.arange(20)/2.-5
x, y = (arr.flatten() for arr in np.meshgrid(x, y))
z = np.cos(1.5*np.sqrt(x*x + y*y))/(1+0.1*(x*x+y*y))
fig, axes = plt.subplots(2, 2, subplot_kw={"projection" : "3d"})
axes = iter(axes.flatten())
ax = next(axes)
ax.plot_trisurf(x,y,z, cmap='Reds')
ax.set_title('Trisurf')
X, Y, Z = (arr.reshape(20,30) for arr in (x,y,z))
ax = next(axes)
ax.plot_surface(X,Y,Z, cmap='Reds')
ax.set_title('Surface 20×30')
X, Y, Z = (arr.reshape(30,20) for arr in (x,y,z))
ax = next(axes)
ax.plot_surface(X,Y,Z, cmap='Reds')
ax.set_title('Surface 30×20')
X, Y, Z = (arr.reshape(40,15) for arr in (x,y,z))
ax = next(axes)
ax.plot_surface(X,Y,Z, cmap='Reds')
ax.set_title('Surface 40×15')
plt.tight_layout()
plt.show()

Get coordinates from points density with KDE

I have a set of points (> 1k) in this form:
y,x
173.549,308.176
173.549,313.328
213.26,419.588
Using KDE, i can plot points density with pcolormesh and contourf. This is an example result, plotting points too:
This is the code i used to have the plot:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
from scipy.stats.kde import gaussian_kde
x, y = np.genfromtxt('terzinoSX.csv', delimiter=',', unpack=True)
y = y[np.logical_not(np.isnan(y))]
x = x[np.logical_not(np.isnan(x))]
k = gaussian_kde(np.vstack([x, y]))
xi, yi = np.mgrid[x.min():x.max():x.size**0.5*1j,y.min():y.max():y.size**0.5*1j]
zi = k(np.vstack([xi.flatten(), yi.flatten()]))
fig = plt.figure(figsize=(7,4))
ax2 = fig.add_subplot(111)
#alpha=0.5 will make the plots semitransparent
#ax1.pcolormesh(yi, xi, zi.reshape(xi.shape), alpha=0.5)
ax2.contourf(yi, xi, zi.reshape(xi.shape), alpha=0.5)
plt.axis('off')
ax2.plot(y,x, "o")
ax2.set_xlim(0, 740)
ax2.set_ylim(515, 0)
#overlay soccer field
im = plt.imread('statszone_football_pitch.png')
ax2.imshow(im, extent=[0, 740, 0, 515], aspect='auto')
fig.savefig('test.png', bbox_inches='tight')
I would like to have one point representing coordinates of most populated zone (middle point for example), like a middle point over the "red" zone. Is it possible in some way?
I solved this by adding these lines that calculate the point in the most populated area:
xy = np.vstack([x,y])
kde = stats.gaussian_kde(xy)
density = kde(xy)
pts = xy.T[np.argmax(density)]
You can use np.argmax to get the coordinates of the maximum. For example:
kde = compute_my_kde() # Returns a two-dimensional array
y, x = np.argmax(kde) # x and y are swapped because matplotlib displays images as a matrix (first index is rows, second index is colums)
plt.imshow(kde) # Show the kde
plt.scatter(x, y) # Show the maximum point

Plotting a 2D heatmap

Using Matplotlib, I want to plot a 2D heat map. My data is an n-by-n Numpy array, each with a value between 0 and 1. So for the (i, j) element of this array, I want to plot a square at the (i, j) coordinate in my heat map, whose color is proportional to the element's value in the array.
How can I do this?
The imshow() function with parameters interpolation='nearest' and cmap='hot' should do what you want.
Please review the interpolation parameter details, and see Interpolations for imshow and Image antialiasing.
import matplotlib.pyplot as plt
import numpy as np
a = np.random.random((16, 16))
plt.imshow(a, cmap='hot', interpolation='nearest')
plt.show()
Seaborn is a high-level API for matplotlib, which takes care of a lot of the manual work.
seaborn.heatmap automatically plots a gradient at the side of the chart etc.
import numpy as np
import seaborn as sns
import matplotlib.pylab as plt
uniform_data = np.random.rand(10, 12)
ax = sns.heatmap(uniform_data, linewidth=0.5)
plt.show()
You can even plot upper / lower left / right triangles of square matrices. For example, a correlation matrix, which is square and is symmetric, so plotting all values would be redundant.
corr = np.corrcoef(np.random.randn(10, 200))
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
with sns.axes_style("white"):
ax = sns.heatmap(corr, mask=mask, vmax=.3, square=True, cmap="YlGnBu")
plt.show()
I would use matplotlib's pcolor/pcolormesh function since it allows nonuniform spacing of the data.
Example taken from matplotlib:
import matplotlib.pyplot as plt
import numpy as np
# generate 2 2d grids for the x & y bounds
y, x = np.meshgrid(np.linspace(-3, 3, 100), np.linspace(-3, 3, 100))
z = (1 - x / 2. + x ** 5 + y ** 3) * np.exp(-x ** 2 - y ** 2)
# x and y are bounds, so z should be the value *inside* those bounds.
# Therefore, remove the last value from the z array.
z = z[:-1, :-1]
z_min, z_max = -np.abs(z).max(), np.abs(z).max()
fig, ax = plt.subplots()
c = ax.pcolormesh(x, y, z, cmap='RdBu', vmin=z_min, vmax=z_max)
ax.set_title('pcolormesh')
# set the limits of the plot to the limits of the data
ax.axis([x.min(), x.max(), y.min(), y.max()])
fig.colorbar(c, ax=ax)
plt.show()
For a 2d numpy array, simply use imshow() may help you:
import matplotlib.pyplot as plt
import numpy as np
def heatmap2d(arr: np.ndarray):
plt.imshow(arr, cmap='viridis')
plt.colorbar()
plt.show()
test_array = np.arange(100 * 100).reshape(100, 100)
heatmap2d(test_array)
This code produces a continuous heatmap.
You can choose another built-in colormap from here.
Here's how to do it from a csv:
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
# Load data from CSV
dat = np.genfromtxt('dat.xyz', delimiter=' ',skip_header=0)
X_dat = dat[:,0]
Y_dat = dat[:,1]
Z_dat = dat[:,2]
# Convert from pandas dataframes to numpy arrays
X, Y, Z, = np.array([]), np.array([]), np.array([])
for i in range(len(X_dat)):
X = np.append(X, X_dat[i])
Y = np.append(Y, Y_dat[i])
Z = np.append(Z, Z_dat[i])
# create x-y points to be used in heatmap
xi = np.linspace(X.min(), X.max(), 1000)
yi = np.linspace(Y.min(), Y.max(), 1000)
# Interpolate for plotting
zi = griddata((X, Y), Z, (xi[None,:], yi[:,None]), method='cubic')
# I control the range of my colorbar by removing data
# outside of my range of interest
zmin = 3
zmax = 12
zi[(zi<zmin) | (zi>zmax)] = None
# Create the contour plot
CS = plt.contourf(xi, yi, zi, 15, cmap=plt.cm.rainbow,
vmax=zmax, vmin=zmin)
plt.colorbar()
plt.show()
where dat.xyz is in the form
x1 y1 z1
x2 y2 z2
...
Use matshow() which is a wrapper around imshow to set useful defaults for displaying a matrix.
a = np.diag(range(15))
plt.matshow(a)
https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.matshow.html
This is just a convenience function wrapping imshow to set useful defaults for displaying a matrix. In particular:
Set origin='upper'.
Set interpolation='nearest'.
Set aspect='equal'.
Ticks are placed to the left and above.
Ticks are formatted to show integer indices.
Here is a new python package to plot complex heatmaps with different kinds of row/columns annotations in Python: https://github.com/DingWB/PyComplexHeatmap

How to use scipy.interpolate.interp2d for a vector of data?

I have a table of measured values for a quantity that depends on two parameters.
So say I have a function fuelConsumption(speed, temperature), for which data on a mesh are known.
Now I want to interpolate the expected fuelConsumption for a lot of measured data points (speed, temperature) from a pandas.DataFrame (and return a vector with the values for each data point).
I am currently using SciPy's interpolate.interp2d for interpolation, but when passing the parameters as two vectors [s1,s2] and [t1,t2] (only two ordered values for simplicity) it will construct a mesh and return:
[[f(s1,t1), f(s2,t1)], [f(s1,t2), f(s2,t2)]]
The result I am hoping to get is:
[f(s1,t1), f(s2, t2)]
How can I interpolate to get the output I want?
From scipy v0.14 onwards you can use scipy.interpolate.RectBivariateSpline with grid=False:
import numpy as np
from scipy.interpolate import RectBivariateSpline
from matplotlib import pyplot as plt
x, y = np.ogrid[-1:1:10j,-1:1:10j]
z = (x + y)*np.exp(-6.0 * (x * x + y * y))
spl = RectBivariateSpline(x, y, z)
xi = np.linspace(-1, 1, 50)
yi = np.linspace(-1, 1, 50)
zi = spl(xi, yi, grid=False)
fig, ax = plt.subplots(1, 1)
ax.hold(True)
ax.imshow(z, cmap=plt.cm.coolwarm, origin='lower', extent=(-1, 1, -1, 1))
ax.scatter(xi, yi, s=60, c=zi, cmap=plt.cm.coolwarm)

putting limits to x,y,z interpolated heatmap in matplotlib

I'm looking to plot a heatmap for which I have the value (=heatmap color) z at each couple of spatial x,y coordinates but I want to mark out the z values between [z0,z1] with z0=0.0 and z1=0.4 while some of interpolated z values are under and above those boundaries.
from numpy.random import uniform, seed
from matplotlib.mlab import griddata
import matplotlib.pyplot as plt
import numpy as np
# make up data.
#npts = int(raw_input('enter # of random points to plot:'))
seed(0)
npts = 200
x = uniform(-2, 2, npts)
y = uniform(-2, 2, npts)
z = x*np.exp(-x**2 - y**2)
# define grid.
xi = np.linspace(0, 1, 1000)
yi = np.linspace(0, 1, 1000)
# grid the data.
zi = griddata(x, y, z, xi, yi, interp='linear')
# contour the gridded data, plotting dots at the nonuniform data points.
CS = plt.contourf(xi, yi, zi, 15, cmap=plt.cm.rainbow,
vmax=abs(zi).max(), vmin=-abs(zi).max())
plt.colorbar() # draw colorbar
# plot data points.
plt.show()
I would like to restrict the colorbar and heatmap color from 0.0 to 0.4 (so avoid in the heatmap and in the colorbar valies under 0.0 and above 0.4).
How to do that? Thanks
You can set the values in a numpy array to None to leave them unplotted. For example,
zmin = 0.0
zmax = 0.4
zi[(zi<zmin) | (zi>zmax)] = None
CS = plt.contourf(xi, yi, zi, 15, cmap=plt.cm.rainbow,
vmax=zmax, vmin=zmin)

Categories

Resources