scipy griddata produces nan values between samples

scipy griddata produces nan values between samples - python

I'm trying to interpolate grid points based on unstructured samples. My samples are taken from a log space between 0.01 and 10 (x axis) and between 1e-8 and 1 (y axis). When I run this code:
from scipy.interpolate import griddata
data = pd.read_csv('data.csv')
param1, param2, errors = data['param1'].values, data['param2'].values, data['error'].values
x = np.linspace(param1.min(), param1.max(), 100, endpoint=True)
y = np.linspace(param2.min(), param2.max(), 100, endpoint=True)
X, Y = np.meshgrid(x, y)
Z = griddata((param1, param2), errors, (X, Y), method='linear')
fig, ax = plt.subplots(figsize=(10, 7))
cax = ax.contourf(X, Y, Z, 25, cmap='hot')
ax.scatter(param1, param2, s=1, color='black', alpha=0.4)
ax.set(xscale='log', yscale='log')
cbar = fig.colorbar(cax)
fig.tight_layout()
I get this result.The white area shows NaN values. Both x and y axes are in log scale:
Even though there are samples in the white area (scatter points prove that), griddata produces NaNs. There are no NaNs/infs in the data. Am I missing something or it's just a bug in Scipy?
data.csv

This is due to the linear spacing of your X-Y interpolation grid, and logarithmic scaling of axes. This is fairly easily fixed by geometrically ("logarithmically") spacing the interpolation grid.
One can also interpolate in log-space; IMO this gives a better looking result, but it may not be valid.
Here's a more-coarsely-sampled version of your figure, showing how the interpolation grid points are "clumped up" to the top right in the log-scaled plot. Here the top row of axes is shows where the data is finite, the bottom row is the "real" plot:
You can see the extreme left and extreme bottom points of a linearly-spaced sample grid are (just!) outside set of values; this is especially bad because the next closest lines of points are visually far away due to the logarithmic scaling.
Here's a result with the interpolation grid geometrically spaced, and interpolation also done in that space.
You can run the code below to view the other two variants.
from itertools import product
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
import pandas as pd
CMAP = None
# crude, to make interpolation grid visible
NX = 11
NY = 11
def plot_general(log_grid=False, log_interp=False):
data = pd.read_csv('data.csv')
param1, param2, errors = data['param1'].values, data['param2'].values, data['error'].values
if log_grid:
x = np.geomspace(param1.min(), param1.max(), NX)
y = np.geomspace(param2.min(), param2.max(), NY)
else:
x = np.linspace(param1.min(), param1.max(), NX)
y = np.linspace(param2.min(), param2.max(), NY)
X, Y = np.meshgrid(x, y)
if log_interp:
Z = griddata((np.log10(param1), np.log10(param2)), errors, (np.log10(X), np.log10(Y)), method='linear')
else:
Z = griddata((param1, param2), errors, (X, Y), method='linear')
fZ = np.isfinite(Z)
fig, ax = plt.subplots(2, 2)
ax[0,0].contourf(X, Y, fZ, levels=[0.5,1.5])
ax[0,0].scatter(param1, param2, s=1, color='black')
ax[0,0].plot(X.flat, Y.flat, '.', color='blue')
ax[0,1].contourf(X, Y, fZ, levels=[0.5,1.5])
ax[0,1].scatter(param1, param2, s=1, color='black')
ax[0,1].plot(X.flat, Y.flat, '.', color='blue')
ax[0,1].set(xscale='log', yscale='log')
ax[1,0].contourf(X, Y, Z, levels=25, cmap=CMAP)
ax[1,0].scatter(param1, param2, s=1, color='black')
ax[1,0].plot(X.flat, Y.flat, '.', color='blue')
ax[1,1].contourf(X, Y, Z, levels=25, cmap=CMAP)
ax[1,1].scatter(param1, param2, s=1, color='black')
ax[1,1].set(xscale='log', yscale='log')
ax[1,1].plot(X.flat, Y.flat, '.', color='blue')
fig.suptitle(f'{log_grid=}, {log_interp=}')
fig.tight_layout()
return fig
plt.close('all')
for log_grid, log_interp in product([False, True],
[False, True]):
fig = plot_general(log_grid, log_interp)
#if you want to save results:
#fig.savefig(f'log_grid{log_grid}-log_interp{log_interp}.png')

Related

Make 3D triangle lines more visible in pyplot

I'd like to make a triangle plot in matplotlib with a mostly-transparent surface. I'm running the example code at https://matplotlib.org/mpl_examples/mplot3d/trisurf3d_demo.py:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
n_radii = 8
n_angles = 36
# Make radii and angles spaces (radius r=0 omitted to eliminate duplication).
radii = np.linspace(0.125, 1.0, n_radii)
angles = np.linspace(0, 2*np.pi, n_angles, endpoint=False)
# Repeat all angles for each radius.
angles = np.repeat(angles[..., np.newaxis], n_radii, axis=1)
# Convert polar (radii, angles) coords to cartesian (x, y) coords.
# (0, 0) is manually added at this stage, so there will be no duplicate
# points in the (x, y) plane.
x = np.append(0, (radii*np.cos(angles)).flatten())
y = np.append(0, (radii*np.sin(angles)).flatten())
# Compute z to make the pringle surface.
z = np.sin(-x*y)
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_trisurf(x, y, z, linewidth=0.2, antialiased=True)
plt.show()
I can set
ax.plot_trisurf(x, y, z, linewidth=0.2, alpha = 0.2, antialiased=True)
to set the opacity to 0.2, but then the lines disappear. Furthermore, when I change the linewidth, even without the alpha, I see no change in the thickness of the lines between the points. How can I have a triangle plot where the faces are mostly transparent and the lines are clearly visible?

Summing overlapping bubbles with gradient in python

I want to plot a map of specific sites to interpret their effects on the surrounding city environment. To do this, I would like to plot the sites as bubbles, with a decreasing gradient towards the edge of the circle, and where the gradient of the overlapping circles is the sum.
As an example I've used this:
# libraries
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
# create data
x = np.random.rand(15)
y = x+np.random.rand(15)
z = x+np.random.rand(15)
z=z*z
# Change color with c and alpha. I map the color to the X axis value.
plt.scatter(x, y, s=1500, c=z, cmap="Blues", alpha=0.4, edgecolors="grey", linewidth=1)
# Add titles (main and on axis)
plt.xlabel("the X axis")
plt.ylabel("the Y axis")
plt.title("A colored bubble plot")
plt.show();
which produces:
However, the color of the circles does not decay, nor do they seem to sum the intended way.
Is there any smart way to do this, or could it possibly be easier with some kind of heatmap solution, or using grids and a decaying effect on adjacent tiles?

Here is an approach with densities placed at each x and y, enlarged by the z value.
Depending on the distance to each x,y position a quantity is added.
import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import norm # calculate the length of a vector
# import seaborn as sns
# create data
x = np.random.rand(15)
y = x+np.random.rand(15)
z = x+np.random.rand(15)
z=z*z
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12,5))
# Change color with c and alpha. I map the color to the X axis value.
ax1.scatter(x, y, s=1500, c=z, cmap="Blues", alpha=0.4, edgecolors="grey", linewidth=1)
ax1.set_xlabel("the X axis")
ax1.set_ylabel("the Y axis")
ax1.set_title("A colored bubble plot")
centers = np.dstack((x, y))[0]
xmin = min(x)-0.2
xmax = max(x)+0.2
ymin = min(y)-0.2
ymax = max(y)+0.2
zmin = min(z)
zmax = max(z)
xx, yy = np.meshgrid(np.linspace(xmin, xmax, 100),
np.linspace(ymin, ymax, 100))
xy = np.dstack((xx, yy))
zz = np.zeros_like(xx)
for ci, zi in zip(centers, z):
sigma = zi / zmax * 0.3
sigma2 = sigma ** 2
zz += np.exp(- norm(xy - ci, axis=-1) ** 2 / sigma2 / 2)
img = ax2.imshow(zz, extent=[xmin, xmax, ymin, ymax], origin='lower', aspect='auto', cmap='Blues')
#plt.colorbar(img, ax=ax2)
ax2.set_xlabel("the X axis")
ax2.set_ylabel("the Y axis")
ax2.set_title("Density depending on z")
plt.show()
The plot compares the two approaches using the same random data.

Get coordinates from points density with KDE

I have a set of points (> 1k) in this form:
y,x
173.549,308.176
173.549,313.328
213.26,419.588
Using KDE, i can plot points density with pcolormesh and contourf. This is an example result, plotting points too:
This is the code i used to have the plot:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
from scipy.stats.kde import gaussian_kde
x, y = np.genfromtxt('terzinoSX.csv', delimiter=',', unpack=True)
y = y[np.logical_not(np.isnan(y))]
x = x[np.logical_not(np.isnan(x))]
k = gaussian_kde(np.vstack([x, y]))
xi, yi = np.mgrid[x.min():x.max():x.size**0.5*1j,y.min():y.max():y.size**0.5*1j]
zi = k(np.vstack([xi.flatten(), yi.flatten()]))
fig = plt.figure(figsize=(7,4))
ax2 = fig.add_subplot(111)
#alpha=0.5 will make the plots semitransparent
#ax1.pcolormesh(yi, xi, zi.reshape(xi.shape), alpha=0.5)
ax2.contourf(yi, xi, zi.reshape(xi.shape), alpha=0.5)
plt.axis('off')
ax2.plot(y,x, "o")
ax2.set_xlim(0, 740)
ax2.set_ylim(515, 0)
#overlay soccer field
im = plt.imread('statszone_football_pitch.png')
ax2.imshow(im, extent=[0, 740, 0, 515], aspect='auto')
fig.savefig('test.png', bbox_inches='tight')
I would like to have one point representing coordinates of most populated zone (middle point for example), like a middle point over the "red" zone. Is it possible in some way?

I solved this by adding these lines that calculate the point in the most populated area:
xy = np.vstack([x,y])
kde = stats.gaussian_kde(xy)
density = kde(xy)
pts = xy.T[np.argmax(density)]

You can use np.argmax to get the coordinates of the maximum. For example:
kde = compute_my_kde() # Returns a two-dimensional array
y, x = np.argmax(kde) # x and y are swapped because matplotlib displays images as a matrix (first index is rows, second index is colums)
plt.imshow(kde) # Show the kde
plt.scatter(x, y) # Show the maximum point

Draw seamless distribution of tweets

I've collected tweets from twitter now I'm trying to draw the distribution of tweets geographically. To do that, I divide the entire square area into small square and count number of tweets in each square. Finally, I use matplotlib to draw the following figure:
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.3, cmap='Accent')
The problem is that the elevation map is not smooth. I'd like a way to draw smooth curve from the data. One example for that in 2D is when we have a histogram of image, we can draw smooth curve over the distribution as follows:
So my question is that is there a way to draw a smooth surface from the discrete data?

Expanding on my answer, here's what you can get with resampling and smoothing (gaussian_filter())/spline interpolation (RectBivariateSpline). Note that it would be nice of you to provide a template code that plots your graph, but since you haven't, I had to improvise.
import numpy
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def plot(name, method):
numpy.random.seed(123)
x = numpy.linspace(0, 50, 51)
X, Y = numpy.meshgrid(x, x)
Z = numpy.zeros((x.size, x.size))
for n in range(50):
i = numpy.random.randint(0, x.size)
j = numpy.random.randint(0, x.size)
Z[i, j] = numpy.abs(numpy.random.normal()) * 1000
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
if method == 0:
# regular plot
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.3, cmap='Accent')
else:
# create a finer grid
resample_coeff = 2
Z2 = numpy.repeat(Z, resample_coeff, 0).repeat(resample_coeff, 1)
x2 = numpy.linspace(x[0], x[-1], x.size * resample_coeff)
X2, Y2 = numpy.meshgrid(x2, x2)
if method == 1:
# smoothing
from scipy.ndimage.filters import gaussian_filter
Z2 = gaussian_filter(Z2, 1)
elif method == 2:
# interpolation
from scipy.interpolate import RectBivariateSpline
spline = RectBivariateSpline(
x, x, Z, bbox=[x[0], x[-1], x[0], x[-1]])
Z2 = spline.ev(X2, Y2)
ax.plot_surface(X2, Y2, Z2, rstride=1, cstride=1, alpha=0.3, cmap='Accent')
fig.savefig(name)
if __name__ == '__main__':
plot('t0.png', 0)
plot('t1.png', 1)
plot('t2.png', 2)
Initial graph:
Smoothing:
Interpolation (notice the negative regions; that's polynomial interpolation for you):

Shifting the triangles order to match colormap

This question is a sequel of a previous one but regarding this time the colormap and the order of the triangle. I want to interpolate experimental data over a surface so as to enable a continuous colormap with however the surface known only at its corner node. To interpolate, I put a canonical example which works quite well but fails on real data.
Indeed as shown in the example below, the initial triangulation results in two triangles with a huge gap between them, cf first picture. When the interpolation is done, it doesn't get any better and the colormap is also lost, cf. second picture. The best so far is by interverting z and y to get adjacent triangles from the beginning which results in a successful interpolation. However as you might notice in the third picture, the surface is tilted by 90° which is normal since I switch y for z and vice-versa.
However when I switch back y and z in the tri_surf function with ax.plot_trisurf(new.x, new_z, new.y, **kwargs), the colormap doesn't follow, cf. picture 4.
I thought of rotating the colormap in somehow or generate new triangles from the interpolated ones with triang = tri.Triangulation(new.x, new_z) but without any success. So any idea or hint about properly doing the initial triangulation with two adjacent triangles, as for the third picture, but with the surface oriented correclty and ultimately the colormap proportional to the Y-value.
import numpy
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.tri as tri
x=numpy.array([0.00498316, 0.00498316, 0.00996632, 0.00996632])
y=numpy.array([-0.00037677, -0.00027191, -0.00078681, -0.00088475])
z=numpy.array([0., -0.0049926, 0., -0.00744763])
# Initial Triangle
fig = plt.figure()
ax = Axes3D(fig)
triang = tri.Triangulation(x, y)
norm = plt.Normalize(vmax=y.max(), vmin=y.min())
ax.plot_trisurf(x, y, z, triangles=triang.triangles)
# Interpolated Triangle
fig = plt.figure()
ax = Axes3D(fig)
triang = tri.Triangulation(x, y)
refiner = tri.UniformTriRefiner(triang)
interpolator = tri.LinearTriInterpolator(triang, z)
new, new_z = refiner.refine_field(z, interpolator, subdiv=4)
kwargs = dict(triangles=new.triangles, cmap=cm.jet, norm=norm, linewidth=0, antialiased=False)
ax.plot_trisurf(new.x, new.y, new_z, **kwargs)
# Best so far
fig = plt.figure()
ax = Axes3D(fig)
triang = tri.Triangulation(x, z)
refiner = tri.UniformTriRefiner(triang)
interpolator = tri.LinearTriInterpolator(triang, y)
new, new_z = refiner.refine_field(y, interpolator, subdiv=4)
kwargs = dict(triangles=new.triangles, cmap=cm.jet, norm=norm, linewidth=0, antialiased=False)
ax.plot_trisurf(new.x, new.y, new_z, **kwargs)
plt.show()

Apparently the automatic triangulation doesn't produce the right triangles for you, but you can specify how you want your triangles manually:
triang = tri.Triangulation(x, y, [[3,2,1],[1,2,0]])
# alternatively:
triang = tri.Triangulation(x, y, [[3,2,0],[1,3,0]])
These two ways give rather different results:
However, now the interpolation becomes awkward, because for some (x,y) there are multiple z-values.. One way of bypassing this issue is interpolating and plotting the 2 large triangles separately:
import numpy
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.tri as tri
def plot_refined_tri(x, y, z, ax, subdiv=4, **kwargs):
triang = tri.Triangulation(x, y)
refiner = tri.UniformTriRefiner(triang)
interpolator = tri.LinearTriInterpolator(triang, z)
new, new_z = refiner.refine_field(z, interpolator, subdiv=subdiv)
ax.plot_trisurf(new.x, new.y, new_z, triangles=new.triangles, **kwargs)
x=numpy.array([0.00498316, 0.00498316, 0.00996632, 0.00996632])
y=numpy.array([-0.00037677, -0.00027191, -0.00078681, -0.00088475])
z=numpy.array([0., -0.0049926, 0., -0.00744763])
fig = plt.figure()
ax = Axes3D(fig)
# note: I normalized on z-values to "fix" the colormap
norm = plt.Normalize(vmax=z.max(), vmin=z.min())
kwargs = kwargs = dict(linewidth=0.2, cmap=cm.jet, norm=norm)
idx = [3,2,1]
plot_refined_tri(x[idx], y[idx], z[idx], ax, **kwargs)
idx = [1,2,0]
plot_refined_tri(x[idx], y[idx], z[idx], ax, **kwargs)
plt.show()
Result:

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

scipy griddata produces nan values between samples - python

Related

Make 3D triangle lines more visible in pyplot

Summing overlapping bubbles with gradient in python

Get coordinates from points density with KDE

Draw seamless distribution of tweets

Shifting the triangles order to match colormap

Categories

Resources