Plot three lists using plot_trisurf but got IndexError - python

I want to plot two surface plots from four lists, which are all independent from each other.
I tried this:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import mpl_toolkits.mplot3d as Axes3D
X = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
Y = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Z1 = [0.735, 0.735, 0.735, 0.735, 0.735, 0.735, 0.735, 0.735, 0.735, 0.735] # the elements here happens to be the same, but could be different
Z2 = [0.8623, 0.9461, 0.9341, 0.976, 0.982, 0.976, 0.976, 0.976, 0.976, 0.976]
X = np.array(X)
Y = np.array(Y)
Z1 = np.array(Z1)
Z2 = np.array(Z2)
X, Y = np.meshgrid(X,Y)
fig = plt.figure(figsize=(8,8))
ax1 = fig.add_subplot(121,projection='3d')
surf1 = ax1.plot_trisurf(X.flatten(),Y.flatten(),Z1.flatten(),cmap = cm.jet, antialiased=True)
ax2 = fig.add_subplot(122,projection='3d')
surf2 = ax2.plot_trisurf(X.flatten(),Y.flatten(),Z2.flatten(),cmap = cm.jet, antialiased=True)
However, an IndexError pops out saying that "index 40 is out of bounds for axis 0 with size 10".
Given that all lists have length 10, I thought the "index 40" may be due to meshgrid, but when I deleted that line, there is another error in "qhull Delauney triangulation calculation".
From my search, it seemed like a majority of 3d plots are function-based, i.e. z=f(x,y). I am not sure if the errors here are because such a relationship does not exist in my case.
Thank you in advance.

Meshgrid changes the shape of X,Y:
>>> X, Y = np.meshgrid(X,Y)
>>> X.shape, Y.shape
(10,10) (10,10)
But Z1 and Z2 still have the shapes (10,). So to provide the same dimensionality along all axes, mesh Z1 and Z2:
X, Y = np.meshgrid(X,Y)
Z1,Z2 = np.meshgrid(Z1, Z2)
And you will get the correct plot:

Related

Confused about plotting interpolated 2D data with matplotlib

I have some unstructured 2D data that I would like to interpolate on a unit offset grid (ie grid indices start at 1 not 0) using scipy and plot using matplotlib. The code is below
import numpy as np
import matplotlib.pyplot as plt
import scipy.interpolate
# X Y Z
data = [[ 9, 2, 2.0],
[ 3, 3, 5.0],
[ 6, 4, 1.0],
[ 2, 6, 3.0],
[10, 7, 4.5],
[ 5, 8, 2.0]]
data = np.array(data)
coords = data[:, 0:2]
zvals = data[:, 2]
# Create the grid on which to interpolate (unit offset)
nx = 10
ny = 10
x = np.arange(nx)
x += 1
y = np.arange(ny)
y += 1
grid_x, grid_y = np.meshgrid(x, y, indexing='xy')
# Interpolate
grid_z1 = scipy.interpolate.griddata(coords, zvals, (grid_x, grid_y), method='linear')
# Plot the results
fig, axs = plt.subplots()
plt.imshow(grid_z1)
plt.plot(coords[:,0], coords[:,1], 'k.', ms=10)
plt.show()
The point data seem to be in the right place but matplotlib seems to be plotting the gridded data as zero-offset not unit-offset. I am obviously missing something - just not sure what. Thanks in advance!
Update
fig, axs = plt.subplots()
plt.imshow(grid_z1, origin='lower', extent=[1, 10, 1, 10])
plt.plot(coords[:,0], coords[:,1], 'k.', ms=10)
plt.show()
IIUC, you want to define xlim and ylim equal to the plot and not the heatmap:
plt.plot(coords[:,0], coords[:,1], 'k.', ms=10)
ax = plt.gca()
xlim, ylim = ax.get_xlim(), ax.get_ylim()
plt.imshow(grid_z1)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
Output:

Implementation of cost function in linear regression

I am trying to implement the cost function on a simple training dataset and visualise the cost function in 3D.
The shape of my cost function is not as it is supposed to be.
This is my code:
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d.axes3d import Axes3D
import pandas as pd
from scipy.interpolate import griddata
def create_array(start, end, resolution):
return np.linspace(start, end, int((end - start)/resolution + 1))
def f(x,a,b):
x = np.array(x)
return a*x+b # or Theta_1 * x + Theta_0
def get_J(x, y, a, b):
x = np.array(x)
y = np.array(y)
# return 1/(2*len(y)) * sum(pow(f(x,a,b) - y, 2))
# Simple implementation
sum = 0
for i in range(0, len(x)):
sum+= (f(x[i],a,b) - y[i])**2
return 1/(2*len(y))*sum
# Training set
x = np.array([0,1,2,3])
y = np.array([0,1,2,3])
Theta_0 = create_array(-20, 10, 0.5)
Theta_1 = create_array(-20, 10, 0.5)
X,Y = np.meshgrid(Theta_0, Theta_1)
X=X.flatten()
Y=Y.flatten()
J = [get_J(x, y, X[i], Y[i]) for i in range(0,len(X))]
# simple set to verify 3D plotting is doing as expetected - OK
# X = [10, 0, -10,-20, 10, 0, -10,-20, 10, 0,-10, -20, 10, 0, -10,-20]
# Y = [-20, -20, -20, -20, -10, -10, -10, -10, 0, 0, 0, 0, 10, 10, 10, 10]
# J = [50, 25, 26, 60, 24, 10, 11, 26, 10, 0, 2, 11, 52, 26, 27, 63]
# Create the graphing elements
xyz = {'x': X, 'y': Y, 'z': J}
# put the data into a pandas DataFrame (this is what my data looks like)
df = pd.DataFrame(xyz, index=range(len(xyz['x'])))
# re-create the 2D-arrays
x1 = np.linspace(df['x'].min(), df['x'].max(), len(df['x'].unique()))
y1 = np.linspace(df['y'].min(), df['y'].max(), len(df['y'].unique()))
x2, y2 = np.meshgrid(x1, y1)
z2 = griddata((df['x'], df['y']), df['z'], (x2, y2), method='cubic')
fig = plt.figure(figsize =(14, 9))
ax = Axes3D(fig)
surf = ax.plot_surface(x2, y2, z2, rstride=1, cstride=1, cmap=plt.get_cmap('coolwarm'),linewidth=0, antialiased=False)
plt.gca().invert_xaxis()
ax.set_xlabel('\u03B81', fontweight ='bold')
ax.set_ylabel('\u03B80', fontweight ='bold')
ax.set_zlabel('J (\u03B81, \u03B80)', fontweight ='bold')
fig.colorbar(surf, shrink=0.5, aspect=5)
plt.show()
The 3D plot has the following shape:
when it is supposed to have this shape:
If you take paper and pencil and analytically derive the J you have implemented, you arrive at something like this:
a = theta_1: -20 ... 10
b = theta_0: -20 ... 10
J(a,b) ~ b^2 + (a+b-1)^2 + (2a+b-2)^2 + (3a+b-3)^2
This basically means that a and b are coupled like a+b. The a+b like terms are squared and a plot of (a+b)^2 looks like this (made with gnuplot):
The reference plot has another form which looks more like a and b being independent, as in a^2 + b^2, lets plot this:
So we should be able to reproduce the reference plot if J has the form
J(a, b) ~ a^2 + b^2 + (other terms except a*b)
The form of J is given by the training set x and y. I leave it to you to show analytically that the values in x build the coupling between a and b. For y, I play with the values and arrive at:
x = np.array([-1, 1])
y = np.array([1, -4])
This is the simplest setting I can think of. There are many more possibilities.
I'm not that deep into machine learning and the meaning of these values. My knowledge basically comes from here. So if I'm wrong please let me know.
Now I get the following image, and I think it is quite close to the reference one, at least the shape:
As a summary: I don't think there is a bug in your implementation. I think, you have plotted different data.

Why don't the contour lines show up on this plot?

I'm trying to create a contour plot to show a height field at time 0, represented by h_list[0, :, :]. When I run the code, an empty plot shows up with the correct axes and scales, but no contour lines. The shape of h_list is [250, 99, 99], but I am only calling the first x index, which is shape [99, 99]. This matches the others.
My code is this:
h_list = np.array(h_list)
con = plt.figure()
ax = con.add_subplot(111)
x = np.linspace(0, 1000, 99)
y = np.linspace(0, 1000, 99)
X, Y = np.meshgrid(x, y)
Z = h_list[0, :, :]
ax.contour(X, Y, Z)
ax.set_xlim(0, 1000)
ax.set_ylim(0, 1000)
The shapes of all variables are the same and I've set the appropriate limits so I'm not sure why matplotlib isn't recognizing my data. I do not get any compiling errors, just no graph. Any ideas?
Edit: Z has shape [99, 99], which is the same as X and Y.

How can I give specific x values to `scipy.interpolate.splev`?

How can I interpolate a hysteresis loop at specific x points? Multiple related questions/answers are available on SOF regarding B-spline interpolation using scipy.interpolate.splprep (other questions here or here). However, I have hundreds of hysteresis loops at very similar (but not exactly same) x positions and I would like to perform B-spline interpolation on all of them at specific x coordinates.
Taking a previous example:
import numpy as np
from scipy import interpolate
from matplotlib import pyplot as plt
x = np.array([23, 24, 24, 25, 25])
y = np.array([13, 12, 13, 12, 13])
# append the starting x,y coordinates
x = np.r_[x, x[0]]
y = np.r_[y, y[0]]
# fit splines to x=f(u) and y=g(u), treating both as periodic. also note that s=0
# is needed in order to force the spline fit to pass through all the input points.
tck, u = interpolate.splprep([x, y], s=0, per=True)
# evaluate the spline fits for 1000 evenly spaced distance values
xi, yi = interpolate.splev(np.linspace(0, 1, 1000), tck)
# plot the result
fig, ax = plt.subplots(1, 1)
ax.plot(x, y, 'or')
ax.plot(xi, yi, '-b')
plt.show()
Is it possible to provide specific x values to interpolate.splev? I get unexpected results:
x2, y2 = interpolate.splev(np.linspace(start=23, stop=25, num=30), tck)
fig, ax = plt.subplots(1, 1)
ax.plot(x, y, 'or')
ax.plot(x2, y2, '-b')
plt.show()
The b-spline gives x and y positions for a given u (between 0 and 1).
Getting y positions for a given x position involves solving for the inverse. As there can be many y's corresponding to one x (in the given example there are places with 4 y's, for example at x=24).
A simple way to get a list of (x,y)'s for x between two limits, is to create a filter:
import numpy as np
from scipy import interpolate
from matplotlib import pyplot as plt
x = np.array([23, 24, 24, 25, 25])
y = np.array([13, 12, 13, 12, 13])
# append the starting x,y coordinates
x = np.r_[x, x[0]]
y = np.r_[y, y[0]]
tck, u = interpolate.splprep([x, y], s=0, per=True)
# evaluate the spline fits for 1000 evenly spaced distance values
xi, yi = interpolate.splev(np.linspace(0, 1, 1000), tck)
# plot the result
fig, ax = plt.subplots(1, 1)
ax.plot(x, y, 'or')
ax.plot(xi, yi, '-b')
filter = (xi >= 24) & (xi <= 25)
x2 = xi[filter]
y2 = yi[filter]
ax.scatter(x2, y2, color='c')
plt.show()

Limit/mask matplotlib contour to data area

I have a pandas DataFrame with non-uniformly spaced data points given by an x, y and z column, where x and y are pairs of variables and z is the dependent variable. For example:
import matplotlib.pyplot as plt
from matploblib.mlab import griddata
import numpy as np
import pandas as pd
df = pd.DataFrame({'x':[0, 0, 1, 1, 3, 3, 3, 4, 4, 4],
'y':[0, 1, 0, 1, 0.2, 0.7, 1.4, 0.2, 1.4, 2],
'z':[50, 40, 40, 30, 30, 30, 20, 20, 20, 10]})
x = df['x']
y = df['y']
z = df['z']
I want to do a contour plot of the dependent variable z over x and y. For this, I create a new grid to interpolate the data on using matplotlib.mlab's griddata function.
xi = np.linspace(x.min(), x.max(), 100)
yi = np.linspace(y.min(), y.max(), 100)
z_grid = griddata(x, y, z, xi, yi, interp='linear')
plt.contourf(xi, yi, z_grid, 15)
plt.scatter(x, y, color='k') # The original data points
plt.show()
While this works, the output is not what I want. I do not want griddata to interpolate outside of the boundaries given by the min and max values of the x and y data. The following plots are what shows up after calling plt.show(), and then highlighted in purple what area of the data I want to have interpolated and contoured. The contour outside the purple line is supposed to be blank. How could I go about masking the outlying data?
The linked question does unfortunately not answer my question, as I don't have a clear mathematical way to define the conditions on which to do a triangulation. Is it possible to define a condition to mask the data based on the data alone, taking the above Dataframe as an example?
As seen in the answer to this question one may introduce a condition to mask the values.
The sentence from the question
"I do not want griddata to interpolate outside of the boundaries given by the min and max values of the x and y data." implies that there is some min/max condition present, which can be used.
Should that not be the case, one may clip the contour using a path. The points of this path need to be specified as there is no generic way of knowing which points should be the edges. The code below does this for three different possible paths.
import matplotlib.pyplot as plt
from matplotlib.path import Path
from matplotlib.patches import PathPatch
from matplotlib.mlab import griddata
import numpy as np
import pandas as pd
df = pd.DataFrame({'x':[0, 0, 1, 1, 3, 3, 3, 4, 4, 4],
'y':[0, 1, 0, 1, 0.2, 0.7, 1.4, 0.2, 1.4, 2],
'z':[50, 40, 40, 30, 30, 30, 20, 20, 20, 10]})
x = df['x']
y = df['y']
z = df['z']
xi = np.linspace(x.min(), x.max(), 100)
yi = np.linspace(y.min(), y.max(), 100)
z_grid = griddata(x, y, z, xi, yi, interp='linear')
clipindex = [ [0,2,4,7,8,9,6,3,1,0],
[0,2,4,7,5,8,9,6,3,1,0],
[0,2,4,7,8,9,6,5,3,1,0]]
fig, axes = plt.subplots(ncols=3, sharey=True)
for i, ax in enumerate(axes):
cont = ax.contourf(xi, yi, z_grid, 15)
ax.scatter(x, y, color='k') # The original data points
ax.plot(x[clipindex[i]], y[clipindex[i]], color="crimson")
clippath = Path(np.c_[x[clipindex[i]], y[clipindex[i]]])
patch = PathPatch(clippath, facecolor='none')
ax.add_patch(patch)
for c in cont.collections:
c.set_clip_path(patch)
plt.show()
Ernest's answer is a great solution, but very slow for lots of contours. Instead of clipping every one of them, I built a mask by constructing the complement polygon of the desired clipping mask.
Here is the code based on Ernest's accepted answer:
import numpy as np
import pandas as pd
import matplotlib.tri as tri
import matplotlib.pyplot as plt
from descartes import PolygonPatch
from shapely.geometry import Polygon
df = pd.DataFrame({'x':[0, 0, 1, 1, 3, 3, 3, 4, 4, 4],
'y':[0, 1, 0, 1, 0.2, 0.7, 1.4, 0.2, 1.4, 2],
'z':[50, 40, 40, 30, 30, 30, 20, 20, 20, 10]})
points = df[['x', 'y']]
values = df[['z']]
xi = np.linspace(points.x.min(), points.x.max(), 100)
yi = np.linspace(points.y.min(), points.y.max(), 100)
triang = tri.Triangulation(points.x, points.y)
interpolator = tri.LinearTriInterpolator(triang, values.z)
Xi, Yi = np.meshgrid(xi, yi)
zi = interpolator(Xi, Yi)
clipindex = [ [0,2,4,7,8,9,6,3,1,0],
[0,2,4,7,5,8,9,6,3,1,0],
[0,2,4,7,8,9,6,5,3,1,0]]
fig, axes = plt.subplots(ncols=3, sharey=True, figsize=(10,4))
for i, ax in enumerate(axes):
ax.set_xlim(-0.5, 4.5)
ax.set_ylim(-0.2, 2.2)
xlim = ax.get_xlim()
ylim = ax.get_ylim()
cont = ax.contourf(Xi, Yi, zi, 15)
ax.scatter(points.x, points.y, color='k', zorder=2) # The original data points
ax.plot(points.x[clipindex[i]], points.y[clipindex[i]], color="crimson", zorder=1)
#### 'Universe polygon':
ext_bound = Polygon([(xlim[0], ylim[0]), (xlim[0], ylim[1]), (xlim[1], ylim[1]), (xlim[1], ylim[0]), (xlim[0], ylim[0])])
#### Clipping mask as polygon:
inner_bound = Polygon([ (row.x, row.y) for idx, row in points.iloc[clipindex[i]].iterrows() ])
#### Mask as the symmetric difference of both polygons:
mask = ext_bound.symmetric_difference(inner_bound)
ax.add_patch(PolygonPatch(mask, facecolor='white', zorder=1, edgecolor='white'))
plt.show()

Categories

Resources