Confused about plotting interpolated 2D data with matplotlib - python

I have some unstructured 2D data that I would like to interpolate on a unit offset grid (ie grid indices start at 1 not 0) using scipy and plot using matplotlib. The code is below
import numpy as np
import matplotlib.pyplot as plt
import scipy.interpolate
# X Y Z
data = [[ 9, 2, 2.0],
[ 3, 3, 5.0],
[ 6, 4, 1.0],
[ 2, 6, 3.0],
[10, 7, 4.5],
[ 5, 8, 2.0]]
data = np.array(data)
coords = data[:, 0:2]
zvals = data[:, 2]
# Create the grid on which to interpolate (unit offset)
nx = 10
ny = 10
x = np.arange(nx)
x += 1
y = np.arange(ny)
y += 1
grid_x, grid_y = np.meshgrid(x, y, indexing='xy')
# Interpolate
grid_z1 = scipy.interpolate.griddata(coords, zvals, (grid_x, grid_y), method='linear')
# Plot the results
fig, axs = plt.subplots()
plt.imshow(grid_z1)
plt.plot(coords[:,0], coords[:,1], 'k.', ms=10)
plt.show()
The point data seem to be in the right place but matplotlib seems to be plotting the gridded data as zero-offset not unit-offset. I am obviously missing something - just not sure what. Thanks in advance!

Update
fig, axs = plt.subplots()
plt.imshow(grid_z1, origin='lower', extent=[1, 10, 1, 10])
plt.plot(coords[:,0], coords[:,1], 'k.', ms=10)
plt.show()
IIUC, you want to define xlim and ylim equal to the plot and not the heatmap:
plt.plot(coords[:,0], coords[:,1], 'k.', ms=10)
ax = plt.gca()
xlim, ylim = ax.get_xlim(), ax.get_ylim()
plt.imshow(grid_z1)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
Output:

Related

How to make countour plot from dataframe [duplicate]

I want to draw a contour plot using table data.
I have 2 variables and response (3 columns).
I dont understand how to build this plot using it. I tried code below. But I had a next mistake: Input z must be 2D, not 1D.
feature_x = data.factor1
feature_y = data.factor2
# Creating 2-D grid of features
[X, Y] = np.meshgrid(feature_x, feature_y)
fig, ax = plt.subplots(1, 1)
Z = data.response
# plots filled contour plot
ax.contourf(X, Y, Z)
ax.set_title('Filled Contour Plot')
ax.set_xlabel('feature_x')
ax.set_ylabel('feature_y')
plt.show()
Data
To have a contour plot, z needs to be 2d matrix with all values for the points (x,y). You can think the data needed for a contour plot, as a DataFrame where index is x, columns are y and values are z. So z needs to be a 2d array of shape (x.size, y.size).
Since your z is not a 2d matrix but a 1d array, you cannot have a contour plot.
What you can do, for example, is a relplot with hue and/or size
import numpy as np
import pandas as pd
import seaborn as sns
x = np.array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3])
y = np.array([1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4])
z = np.array([249, 523, 603, 775, 577, 763, 808, 695, 642, 525, 795, 758])
df = pd.DataFrame({'x':x, 'y':y, 'z':z})
sns.relplot(
data=df,
x='x', y='y',
size='z', sizes=(10, 100),
hue='z',
palette='coolwarm',
);
EDIT
But... if you're looking for a continuous estimate, you can use gaussian_kde, for example
import scipy.stats as sps
import matplotlib.pyplot as plt
offset = .25
xmin = x.min()-offset
xmax = x.max()+offset
ymin = y.min()-offset
ymax = y.max()+offset
X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([x, y])
kernel = sps.gaussian_kde(values, weights=z)
Z = np.reshape(kernel(positions).T, X.shape)
fig, ax = plt.subplots(figsize=(7, 7))
ax.imshow(np.rot90(Z), cmap=plt.cm.gist_earth_r,
extent=[xmin, xmax, ymin, ymax],
aspect='auto'
)
sns.scatterplot(
data=df,
x='x', y='y',
size='z', sizes=(10, 200),
color='k'
)
ax.set_xlim([xmin, xmax])
ax.set_ylim([ymin, ymax])
ax.legend(loc='upper left', bbox_to_anchor=(1,1))
plt.show()

How to create a step-plot with a gradient based on y-value?

In Python matplotlib, how can you get the line in a line or step plot to display a gradient based on the y-value?
Example plot (made in Tableau):
Code for step plot with a line that changes gradient according to x-value, adapted from this answer:
fig, ax = plt.subplots(figsize=(10, 4))
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
y = [2, 3, 9, 10, 2, 9, 0, 1, 9, 1, -8]
T = np.linspace(0,1,np.size(x))**2
s = 1
for i in range(0, len(x)-s, s):
ax.step(x[i:i+s+1], y[i:i+s+1], marker='.', color=(0.0,0.5,T[i]))
ax.tick_params(axis='both', colors='lightgray', labelsize=8)
The following code is inspired by the multicolored-line example from the matplotlib docs. First the horizontal line segments are drawn and colored using their y-value. The vertical segments are subdivided in small chunks to colored individually.
vmin of the norm is set a bit lower to avoid the too-light range of the colormap.
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import numpy as np
x = np.arange(50)
y = np.random.randint(-3, 4, x.size).cumsum()
fig, ax = plt.subplots()
norm = plt.Normalize(y.min() - y.ptp() * .2, y.max())
cmap = 'inferno_r' # 'Reds'
horizontal_lines = np.array([x[:-1], y[:-1], x[1:], y[:-1]]).T.reshape(-1, 2, 2)
hor_lc = LineCollection(horizontal_lines, cmap=cmap, norm=norm)
hor_lc.set_array(y[:-1])
ax.add_collection(hor_lc)
factor = 10
long_y0 = np.linspace(y[:-1], y[1:], factor)[:-1, :].T.ravel()
long_y1 = np.linspace(y[:-1], y[1:], factor)[1:, :].T.ravel()
long_x = np.repeat(x[1:], factor - 1)
vertical_lines = np.array([long_x, long_y0, long_x, long_y1]).T.reshape(-1, 2, 2)
ver_lc = LineCollection(vertical_lines, cmap=cmap, norm=norm)
ver_lc.set_array((long_y0 + long_y1) / 2)
ax.add_collection(ver_lc)
ax.scatter(x, y, c=y, cmap=cmap, norm=norm)
plt.autoscale() # needed in case the scatter plot would be omited
plt.show()
Here is another example, with a black background. In this case the darkest part of the colormap is avoided. The changed code parts are:
y = np.random.randint(-9, 10, x.size)
ax.patch.set_color('black')
norm = plt.Normalize(y.min(), y.max() + y.ptp() * .2)
cmap = 'plasma_r'
Here is an example with a TwoSlopeNorm and the blue-white-red colormap:
from matplotlib.colors import TwoSlopeNorm
y = np.random.uniform(-1, 1, x.size * 10).cumsum()[::10]
y = (y - y.min()) / y.ptp() * 15 - 5
norm = TwoSlopeNorm(vmin=-5, vcenter=0, vmax=10)
cmap = 'bwr'

dash from the point to x and y axes in matplotlib

as you can see, I want to make the dash connect to the x and y axes.
There is always a small gap.
I use matplotlib
the vline function, and I don't know how to use the transform parameters.
Using vlines and hlines from matplotlib.pyplot, you can specify your axes and your line limits:
from matplotlib import pyplot as plt
# Drawing example diagram
plt.scatter(x=11,y=0.891)
plt.xlim(5,20)
plt.xticks([5,8,11,14,17,20])
plt.ylim(0.780,0.9)
# Specifying lines, notice how despite setting xmin and ymin lower than your axes,
# the lines stop at each boundary
plt.vlines(x=11, ymin=0.7, ymax=0.891, colors='r',linestyles='dashed')
plt.hlines(y=0.891, xmin=4, xmax=11, colors='k',linestyles='dashed')
plt.show()
The result is beautiful, but the code not so good.
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.ticker as ticker
x = [i for i in range(5, 21, 3)]
# [5, 8, 11, 14, 17, 20]
y = [0.780, 0.865, 0.891, 0.875, 0.884, 0.870]
y_max_index = np.argmax(y)
# print(y_max_index)
# get the max point
x_max = x[y_max_index]
y_max = y[y_max_index]
fig, ax = plt.subplots()
ax.plot(x, y, marker='o', color='r')
# set x ticks as [5, 8, 11, 14, 17, 20]
my_x_ticks = x
plt.xticks(my_x_ticks)
# set x and y lim
axe_y_min, axe_y_max = ax.get_ylim()
axe_x_min, axe_x_max = ax.get_xlim()
ax.set_ylim(axe_y_min, axe_y_max)
ax.set_xlim(axe_x_min, axe_x_max)
plt.gca().yaxis.set_major_formatter(ticker.FormatStrFormatter('%.3f')) # set y axe format
anno_text = "(11, 0.891)"
plt.annotate(anno_text, xy=(x_max, y_max), xytext=(x_max+0.5, y_max)) # annotate
y_scale_trans = (y_max - axe_y_min) / (axe_y_max - axe_y_min)
x_scale_trans = (x_max - axe_x_min) / (axe_x_max - axe_x_min)
ax.vlines(x_max, 0, y_scale_trans, transform=ax.get_xaxis_transform(), colors='black', linestyles="dashed")
ax.hlines(y_max, 0, x_scale_trans, transform=ax.get_yaxis_transform(), colors='black', linestyles="dashed")
plt.ylabel("准确率")
plt.xlabel("滑动窗口大小")
plt.savefig("滑动窗口.pdf", dpi=100)
plt.show()
Here is a solution using plt.plot to draw the lines.
import matplotlib.pyplot as plt
import numpy as np
y = np.random.randint(1, 10, 10)
x = np.arange(len(y))
point = [x[2], y[2]]
plt.plot(x,y)
plt.plot((point[0], point[0]), (0, point[1]), '--')
plt.plot((0, point[0]), (point[1], point[1]), '--')
plt.xlim(0,10)
plt.ylim(0,10)

Problem with 3D wireframe visualization in matplotlib

I need to plot 3D data of the form z_i as function of (x_i, y_i) using a wireframe. I wrote the code below:
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import axes3d
import scipy.interpolate as spint
## Data to plot
sim_data = np.array([[ 20, 1, 8],
[ 20, 2, 7],
[ 20, 4, 7],
[ 20, 6, 6],
[ 20, 10, 6],
[ 50, 0.4, 15],
[ 50, 0.8, 11],
[ 50, 1, 10],
[ 50, 2, 8],
[ 50, 4, 7],
[ 50, 6, 7],
[ 50, 10, 7],
[100, 0.4, 22],
[100, 0.8, 15],
[100, 1, 13],
[100, 2, 10],
[100, 4, 8],
[100, 6, 7],
[100, 10, 7]])
x = sim_data[:, 0]
y = sim_data[:, 1]
z = sim_data[:, 2]
# Do trisurf plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_trisurf(x, y, z)
ax.set_xlabel('Air flow')
ax.set_ylabel('Fuel rate')
ax.set_zlabel('Temp.')
ax.text2D(0.05, 0.95, "Trisurf plot", transform=ax.transAxes)
# Transform from vector to grid
X, Y = np.meshgrid(x, y)
xi = (X, Y)
Z = spint.griddata((x,y), z, xi)
# Do wireframe plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_wireframe(X, Y, Z)
ax.set_xlabel('Air flow')
ax.set_ylabel('Fuel rate')
ax.set_zlabel('Temp.')
ax.text2D(0.05, 0.95, "Wireframe plot", transform=ax.transAxes)
# Do surface plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z)
ax.set_xlabel('Air flow')
ax.set_ylabel('Fuel rate')
ax.set_zlabel('Temp.')
ax.text2D(0.05, 0.95, "Surface plot", transform=ax.transAxes)
But I get some annoying extra wires (marked with a red arrow):
How can I get rid of this arrow? I have the same problem while trying a surface plot by the way:
My goal is to have a plot similar to a trisurf plot like the one below, but with a wireframe visualization.
Many thanks in advance.
What's wrong with wireframe?
I am not sure, but I think the problem is in your data. It's small and if you look carefully you will see that it looks like a stack of three different lines (observations). Look at this plot:
it's definitely there are three parallel lines over there. I suppose, that's might cause confusion with plot_wireframe, as well as with plot from the previous image. I see 3 possible solutions:
Solution 1: Use plot
Ok, so the first solution is not to use plot_wireframe at all. Let's use good old one plot to build our own wires. But first, let's break our data into 3 lines data:
line1 = sim_data[0:5][::-1] # NOTE: the first line is shorter
line2 = sim_data[5:12][::-1]
line3 = sim_data[12:][::-1]
Plot them all!
# a helper function
def prepare_fig(fw=7, fh=7, view = (25, 30)):
fig = plt.figure(figsize=(fw, fh))
ax = fig.add_subplot(111, projection='3d')
ax.view_init(view[0], view[-1])
return ax
ax = prepare_fig()
ax.title.set_text('3 Lines')
for line in [line1, line2, line3]:
x, y, z = line[:, 0], line[:, 1], line[:, 2]
ax.plot(x, y, z, c='tab:blue', linewidth=3)
Ok, we fixed undesired links, now let's add parallel links (lines) to connect our main lines:
ax = prepare_fig()
ax.title.set_text('Paralel links')
for i in range(len(line3)):
x, y, z = [], [], []
if i < len(line1):
x.append(line1[:, 0][i])
y.append(line1[:, 1][i])
z.append(line1[:, 2][i])
else:
# line1 is shorter so we will put nan here (for now)
x.append(np.nan)
y.append(np.nan)
z.append(np.nan)
x.extend([line2[:, 0][i], line3[:, 0][i]])
y.extend([line2[:, 1][i], line3[:, 1][i]])
z.extend([line2[:, 2][i], line3[:, 2][i]])
ax.plot(x, y, z, c='tab:blue', linewidth=3)
Now all in one:
Final Code:
ax = prepare_fig()
ax.title.set_text('Handmade Wireframe (enclosed)')
line1 = sim_data[0:5][::-1]
line2 = sim_data[5:12][::-1]
line3 = sim_data[12:][::-1]
for line in [line1, line2, line3]:
x, y, z = line[:, 0], line[:, 1], line[:, 2]
ax.plot(x, y, z, c='tab:blue', linewidth=3)
for i in range(len(line3)):
x, y, z = [], [], []
if i < len(line1):
x.append(line1[:, 0][i])
y.append(line1[:, 1][i])
z.append(line1[:, 2][i])
else:
# put nan because line1 is shorter
# x.append(np.nan)
# y.append(np.nan)
# z.append(np.nan)
# Or you can just replace it with last line1 value
x.append(line1[:, 0][-1])
y.append(line1[:, 1][-1])
z.append(line1[:, 2][-1])
x.extend([line2[:, 0][i], line3[:, 0][i]])
y.extend([line2[:, 1][i], line3[:, 1][i]])
z.extend([line2[:, 2][i], line3[:, 2][i]])
ax.plot(x, y, z, c='tab:blue', linewidth=3)
Solution 2: Use plot_trisurf.
If triangles are acceptable, another solution is to transform trisurf to wireframe-like by some tweaking.
x = sim_data[:, 0]
y = sim_data[:, 1]
z = sim_data[:, 2]
ax = prepare_fig()
ax.title.set_text('Trisurf Wireframe')
trisurf = ax.plot_trisurf(x, y, z)
# turn of surface color, you can control it with alpha here:
trisurf.set_facecolor(mpl.colors.colorConverter.to_rgba('w', alpha=0.0))
# setting wire color
trisurf.set_edgecolor('tab:blue')
#setting wire width
trisurf.set_linewidth(3)
Solution 3: Use plot_wireframe and interpolation at linspace grid.
This might be solution if you want good looking smooth surface. You just need to generate new grid and then using scipy's spint.griddata to perform interpolation:
import scipy.interpolate as spint
x = sim_data[:, 0]
y = sim_data[:, 1]
z = sim_data[:, 2]
# generate new linear grid based on previous
X, Y = np.meshgrid(np.linspace(min(x), max(x), len(x)),
np.linspace(min(y), max(y), len(y)))
Z = spint.griddata((x, y), z, (X, Y))
ax = prepare_fig()
ax.title.set_text('Interpotation on Linspace Grid')
# ax.plot_wireframe(X, Y, Z, rstride=3, cstride=3)
ax.plot_surface(X, Y, Z, rstride=3, cstride=3)
And you will get something like this:

Limit/mask matplotlib contour to data area

I have a pandas DataFrame with non-uniformly spaced data points given by an x, y and z column, where x and y are pairs of variables and z is the dependent variable. For example:
import matplotlib.pyplot as plt
from matploblib.mlab import griddata
import numpy as np
import pandas as pd
df = pd.DataFrame({'x':[0, 0, 1, 1, 3, 3, 3, 4, 4, 4],
'y':[0, 1, 0, 1, 0.2, 0.7, 1.4, 0.2, 1.4, 2],
'z':[50, 40, 40, 30, 30, 30, 20, 20, 20, 10]})
x = df['x']
y = df['y']
z = df['z']
I want to do a contour plot of the dependent variable z over x and y. For this, I create a new grid to interpolate the data on using matplotlib.mlab's griddata function.
xi = np.linspace(x.min(), x.max(), 100)
yi = np.linspace(y.min(), y.max(), 100)
z_grid = griddata(x, y, z, xi, yi, interp='linear')
plt.contourf(xi, yi, z_grid, 15)
plt.scatter(x, y, color='k') # The original data points
plt.show()
While this works, the output is not what I want. I do not want griddata to interpolate outside of the boundaries given by the min and max values of the x and y data. The following plots are what shows up after calling plt.show(), and then highlighted in purple what area of the data I want to have interpolated and contoured. The contour outside the purple line is supposed to be blank. How could I go about masking the outlying data?
The linked question does unfortunately not answer my question, as I don't have a clear mathematical way to define the conditions on which to do a triangulation. Is it possible to define a condition to mask the data based on the data alone, taking the above Dataframe as an example?
As seen in the answer to this question one may introduce a condition to mask the values.
The sentence from the question
"I do not want griddata to interpolate outside of the boundaries given by the min and max values of the x and y data." implies that there is some min/max condition present, which can be used.
Should that not be the case, one may clip the contour using a path. The points of this path need to be specified as there is no generic way of knowing which points should be the edges. The code below does this for three different possible paths.
import matplotlib.pyplot as plt
from matplotlib.path import Path
from matplotlib.patches import PathPatch
from matplotlib.mlab import griddata
import numpy as np
import pandas as pd
df = pd.DataFrame({'x':[0, 0, 1, 1, 3, 3, 3, 4, 4, 4],
'y':[0, 1, 0, 1, 0.2, 0.7, 1.4, 0.2, 1.4, 2],
'z':[50, 40, 40, 30, 30, 30, 20, 20, 20, 10]})
x = df['x']
y = df['y']
z = df['z']
xi = np.linspace(x.min(), x.max(), 100)
yi = np.linspace(y.min(), y.max(), 100)
z_grid = griddata(x, y, z, xi, yi, interp='linear')
clipindex = [ [0,2,4,7,8,9,6,3,1,0],
[0,2,4,7,5,8,9,6,3,1,0],
[0,2,4,7,8,9,6,5,3,1,0]]
fig, axes = plt.subplots(ncols=3, sharey=True)
for i, ax in enumerate(axes):
cont = ax.contourf(xi, yi, z_grid, 15)
ax.scatter(x, y, color='k') # The original data points
ax.plot(x[clipindex[i]], y[clipindex[i]], color="crimson")
clippath = Path(np.c_[x[clipindex[i]], y[clipindex[i]]])
patch = PathPatch(clippath, facecolor='none')
ax.add_patch(patch)
for c in cont.collections:
c.set_clip_path(patch)
plt.show()
Ernest's answer is a great solution, but very slow for lots of contours. Instead of clipping every one of them, I built a mask by constructing the complement polygon of the desired clipping mask.
Here is the code based on Ernest's accepted answer:
import numpy as np
import pandas as pd
import matplotlib.tri as tri
import matplotlib.pyplot as plt
from descartes import PolygonPatch
from shapely.geometry import Polygon
df = pd.DataFrame({'x':[0, 0, 1, 1, 3, 3, 3, 4, 4, 4],
'y':[0, 1, 0, 1, 0.2, 0.7, 1.4, 0.2, 1.4, 2],
'z':[50, 40, 40, 30, 30, 30, 20, 20, 20, 10]})
points = df[['x', 'y']]
values = df[['z']]
xi = np.linspace(points.x.min(), points.x.max(), 100)
yi = np.linspace(points.y.min(), points.y.max(), 100)
triang = tri.Triangulation(points.x, points.y)
interpolator = tri.LinearTriInterpolator(triang, values.z)
Xi, Yi = np.meshgrid(xi, yi)
zi = interpolator(Xi, Yi)
clipindex = [ [0,2,4,7,8,9,6,3,1,0],
[0,2,4,7,5,8,9,6,3,1,0],
[0,2,4,7,8,9,6,5,3,1,0]]
fig, axes = plt.subplots(ncols=3, sharey=True, figsize=(10,4))
for i, ax in enumerate(axes):
ax.set_xlim(-0.5, 4.5)
ax.set_ylim(-0.2, 2.2)
xlim = ax.get_xlim()
ylim = ax.get_ylim()
cont = ax.contourf(Xi, Yi, zi, 15)
ax.scatter(points.x, points.y, color='k', zorder=2) # The original data points
ax.plot(points.x[clipindex[i]], points.y[clipindex[i]], color="crimson", zorder=1)
#### 'Universe polygon':
ext_bound = Polygon([(xlim[0], ylim[0]), (xlim[0], ylim[1]), (xlim[1], ylim[1]), (xlim[1], ylim[0]), (xlim[0], ylim[0])])
#### Clipping mask as polygon:
inner_bound = Polygon([ (row.x, row.y) for idx, row in points.iloc[clipindex[i]].iterrows() ])
#### Mask as the symmetric difference of both polygons:
mask = ext_bound.symmetric_difference(inner_bound)
ax.add_patch(PolygonPatch(mask, facecolor='white', zorder=1, edgecolor='white'))
plt.show()

Categories

Resources