Return z-value of xy coordinate - python
I have a set of xy cooridnates that generate a contour. For the code below, these cooridnates are from groups A and B in the df. I have also created a separate xy cooridnate that is called from C1_X and C1_Y. However this isn't used in generating the contour itself. It is a separate xy coordinate.
Question: Is it possible to return the z-value of the contour at the C1_X C1_Y coordinate?
I have found a separate question that is similar: multivariate spline interpolation in python scipy?. The figure in that question displays what I'm hoping to return but I just want the z-value for one xy coordinate.
The contour in this question is normalised so values fall between -1 and 1. I'm hoping to return the z-value for C1_X and C1_Y, which is the white scatter point seen in the figure beneath the code.
I have attempted to return the z-value for this point using:
# Attempt at returning the z-value for C1
f = RectBivariateSpline(X, Y, normPDF)
z = f(d['C1_X'], d['C1_Y'])
print(z)
But I'm returning an error: raise TypeError('x must be strictly increasing')
TypeError: x must be strictly increasing
I have commented out this function so the code runs.
Side note: This code is written for an animation.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts
import matplotlib.animation as animation
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.interpolate import RectBivariateSpline
DATA_LIMITS = [0, 15]
def datalimits(*data):
return DATA_LIMITS
def mvpdf(x, y, xlim, ylim, radius=1, velocity=0, scale=0, theta=0):
X,Y = np.meshgrid(np.linspace(*xlim), np.linspace(*ylim))
XY = np.stack([X, Y], 2)
PDF = sts.multivariate_normal([x, y]).pdf(XY)
return X, Y, PDF
def mvpdfs(xs, ys, xlim, ylim, radius=None, velocity=None, scale=None, theta=None):
PDFs = []
for i,(x,y) in enumerate(zip(xs,ys)):
X, Y, PDF = mvpdf(x, y, xlim, ylim)
PDFs.append(PDF)
return X, Y, np.sum(PDFs, axis=0)
fig, ax = plt.subplots(figsize = (10,6))
ax.set_xlim(DATA_LIMITS)
ax.set_ylim(DATA_LIMITS)
line_a, = ax.plot([], [], 'o', c='red', alpha = 0.5, markersize=5,zorder=3)
line_b, = ax.plot([], [], 'o', c='blue', alpha = 0.5, markersize=5,zorder=3)
scat = ax.scatter([], [], s=5**2,marker='o', c='white', alpha = 1,zorder=3)
lines=[line_a,line_b]
scats=[scat]
cfs = None
def plotmvs(tdf, xlim=datalimits(df['X']), ylim=datalimits(df['Y']), fig=fig, ax=ax):
global cfs
if cfs:
for tp in cfs.collections:
tp.remove()
df = tdf[1]
PDFs = []
for (group, gdf), group_line in zip(df.groupby('group'), (line_a, line_b)):
group_line.set_data(*gdf[['X','Y']].values.T)
X, Y, PDF = mvpdfs(gdf['X'].values, gdf['Y'].values, xlim, ylim)
PDFs.append(PDF)
for (group, gdf), group_line in zip(df.groupby('group'), lines+scats):
if group in ['A','B']:
group_line.set_data(*gdf[['X','Y']].values.T)
kwargs = {
'xlim': xlim,
'ylim': ylim
}
X, Y, PDF = mvpdfs(gdf['X'].values, gdf['Y'].values, **kwargs)
PDFs.append(PDF)
#plot white scatter point from C1_X, C1_Y
elif group in ['C']:
gdf['X'].values, gdf['Y'].values
scat.set_offsets(gdf[['X','Y']].values)
# normalize PDF by shifting and scaling, so that the smallest value is -1 and the largest is 1
normPDF = (PDFs[0]-PDFs[1])/max(PDFs[0].max(),PDFs[1].max())
''' Attempt at returning z-value for C1_X, C1_Y '''
''' This is the function that I am trying to write that will '''
''' return the contour value '''
#f = RectBivariateSpline(X[::-1, :], Y[::-1, :], normPDF[::-1, :])
#z = f(d['C1_X'], d['C1_Y'])
#print(z)
cfs = ax.contourf(X, Y, normPDF, cmap='jet', alpha = 1, levels=np.linspace(-1,1,10),zorder=1)
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
cbar = fig.colorbar(cfs, ax=ax, cax=cax)
cbar.set_ticks([-1,-0.8,-0.6,-0.4,-0.2,0,0.2,0.4,0.6,0.8,1])
return cfs.collections + [scat] + [line_a,line_b]
''' Sample Dataframe '''
n = 1
time = range(n)
d = ({
'A1_X' : [3],
'A1_Y' : [6],
'A2_X' : [6],
'A2_Y' : [10],
'B1_X' : [12],
'B1_Y' : [2],
'B2_X' : [14],
'B2_Y' : [4],
'C1_X' : [4],
'C1_Y' : [6],
})
# a list of tuples of the form ((time, group_id, point_id, value_label), value)
tuples = [((t, k.split('_')[0][0], int(k.split('_')[0][1:]), k.split('_')[1]), v[i])
for k,v in d.items() for i,t in enumerate(time) ]
df = pd.Series(dict(tuples)).unstack(-1)
df.index.names = ['time', 'group', 'id']
#Code will eventually operate with multiple frames
interval_ms = 1000
delay_ms = 2000
ani = animation.FuncAnimation(fig, plotmvs, frames=df.groupby('time'), interval=interval_ms, repeat_delay=delay_ms,)
plt.show()
I am hoping to return the z value for the white scatter point. Intended Output will display the normalised z value (-1,1) for C1_X,C1_Y.
Upon visual inspection this would be between0.6 and 0.8
Edit 2:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts
import matplotlib.animation as animation
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.interpolate import RectBivariateSpline
import matplotlib.transforms as transforms
DATA_LIMITS = [-85, 85]
def datalimits(*data):
return DATA_LIMITS # dmin - spad, dmax + spad
def rot(theta):
theta = np.deg2rad(theta)
return np.array([
[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]
])
def getcov(radius=1, scale=1, theta=0):
cov = np.array([
[radius*(scale + 1), 0],
[0, radius/(scale + 1)]
])
r = rot(theta)
return r # cov # r.T
def mvpdf(x, y, xlim, ylim, radius=1, velocity=0, scale=0, theta=0):
X,Y = np.meshgrid(np.linspace(*xlim), np.linspace(*ylim))
XY = np.stack([X, Y], 2)
x,y = rot(theta) # (velocity/2, 0) + (x, y)
cov = getcov(radius=radius, scale=scale, theta=theta)
PDF = sts.multivariate_normal([x, y], cov).pdf(XY)
return X, Y, PDF
def mvpdfs(xs, ys, xlim, ylim, radius=None, velocity=None, scale=None, theta=None):
PDFs = []
for i,(x,y) in enumerate(zip(xs,ys)):
kwargs = {
'radius': radius[i] if radius is not None else 0.5,
'velocity': velocity[i] if velocity is not None else 0,
'scale': scale[i] if scale is not None else 0,
'theta': theta[i] if theta is not None else 0,
'xlim': xlim,
'ylim': ylim
}
X, Y, PDF = mvpdf(x, y,**kwargs)
PDFs.append(PDF)
return X, Y, np.sum(PDFs, axis=0)
fig, ax = plt.subplots(figsize = (10,6))
ax.set_xlim(DATA_LIMITS)
ax.set_ylim(DATA_LIMITS)
line_a, = ax.plot([], [], 'o', c='red', alpha = 0.5, markersize=3,zorder=3)
line_b, = ax.plot([], [], 'o', c='blue', alpha = 0.5, markersize=3,zorder=3)
lines=[line_a,line_b] ## this is iterable!
offset = lambda p: transforms.ScaledTranslation(p/82.,0, plt.gcf().dpi_scale_trans)
trans = plt.gca().transData
scat = ax.scatter([], [], s=5,marker='o', c='white', alpha = 1,zorder=3,transform=trans+offset(+2) )
scats=[scat]
cfs = None
def plotmvs(tdf, xlim=None, ylim=None, fig=fig, ax=ax):
global cfs
if cfs:
for tp in cfs.collections:
tp.remove()
df = tdf[1]
if xlim is None: xlim = datalimits(df['X'])
if ylim is None: ylim = datalimits(df['Y'])
PDFs = []
for (group, gdf), group_line in zip(df.groupby('group'), lines+scats):
if group in ['A','B']:
group_line.set_data(*gdf[['X','Y']].values.T)
kwargs = {
'radius': gdf['Radius'].values if 'Radius' in gdf else None,
'velocity': gdf['Velocity'].values if 'Velocity' in gdf else None,
'scale': gdf['Scaling'].values if 'Scaling' in gdf else None,
'theta': gdf['Rotation'].values if 'Rotation' in gdf else None,
'xlim': xlim,
'ylim': ylim
}
X, Y, PDF = mvpdfs(gdf['X'].values, gdf['Y'].values, **kwargs)
PDFs.append(PDF)
elif group in ['C']:
gdf['X'].values, gdf['Y'].values
scat.set_offsets(gdf[['X','Y']].values)
normPDF = (PDFs[0]-PDFs[1])/max(PDFs[0].max(),PDFs[1].max())
def get_contour_value_of_point(point_x, point_y, X, Y, Z, precision=10000):
CS = ax.contour(X, Y, Z, 100)
containing_levels = []
for cc, lev in zip(CS.collections, CS.levels):
for pp in cc.get_paths():
if pp.contains_point((point_x, point_y)):
containing_levels.append(lev)
if max(containing_levels) == 0:
return 0
else:
if max(containing_levels) > 0:
lev = max(containing_levels)
adj = 1. / precision
elif max(containing_levels) < 0:
lev = min(containing_levels)
adj = -1. / precision
is_inside = True
while is_inside:
CS = ax.contour(X, Y, Z, [lev])
for pp in CS.collections[0].get_paths():
if not pp.contains_point((point_x, point_y)):
is_inside = False
if is_inside:
lev += adj
return lev - adj
print(get_contour_value_of_point(d['C1_X'], d['C1_Y'], X, Y, normPDF))
cfs = ax.contourf(X, Y, normPDF, cmap='viridis', alpha = 1, levels=np.linspace(-1,1,10),zorder=1)
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
cbar = fig.colorbar(cfs, ax=ax, cax=cax)
cbar.set_ticks([-1,-0.8,-0.6,-0.4,-0.2,0,0.2,0.4,0.6,0.8,1])
return cfs.collections + [scat] + [line_a,line_b]
''' Sample Dataframe '''
n = 10
time = range(n)
d = ({
'A1_X' : [3],
'A1_Y' : [6],
'A2_X' : [6],
'A2_Y' : [10],
'B1_X' : [12],
'B1_Y' : [2],
'B2_X' : [14],
'B2_Y' : [4],
'C1_X' : [4],
'C1_Y' : [6],
})
# a list of tuples of the form ((time, group_id, point_id, value_label), value)
tuples = [((t, k.split('_')[0][0], int(k.split('_')[0][1:]), k.split('_')[1]), v[i])
for k,v in d.items() for i,t in enumerate(time) ]
df = pd.Series(dict(tuples)).unstack(-1)
df.index.names = ['time', 'group', 'id']
#Code will eventually operate with multiple frames
interval_ms = 1000
delay_ms = 2000
ani = animation.FuncAnimation(fig, plotmvs, frames=df.groupby('time'), interval=interval_ms, repeat_delay=delay_ms,)
plt.show()
If you have an arbitrary cloud of (X, Y, Z) points and you want to interpolate the z-coordinate of some (x, y) point, you have a number of different options. The simplest is probably to just use scipy.interpolate.interp2d to get the z-value:
f = interp2d(X.T, Y.T, Z.T)
z = f(x, y)
Since the grid you have appears to be regular, you may be better off using scipy.interpolate.RectBivariateSpline, which has a very similar interface, but is specifically made for regular grids:
f = RectBivariateSpline(X.T, Y.T, Z.T)
z = f(x, y)
Since you have a regular meshgrid, you can also do
f = RectBivariateSpline(X[0, :], Y[:, 0], Z.T)
z = f(x, y)
Notice that the dimensions are flipped between the plotting arrays and the interpolation arrays. Plotting treats axis 0 as rows, i.e. Y, while the interpolation functions treat axis 0 as X. Rather than transposing, you could also switch the X and Y inputs, leaving Z intact for a similar end result, e.g.:
f = RectBivariateSpline(Y, X, Z)
z = f(y, x)
Alternatively, you could change all your plotting code to swap the inputs as well, but that would be too much work at this point. Whatever you do, pick an approach and stick with it. As long as you do it consistently, they should all work.
If you use one of the scipy approaches (recommended), keep the object f around to interpolate any further points you might want.
If you want a more manual approach, you can do something like find the three closest (X, Y, Z) points to (x, y), and find the value of the plane between them at (x, y). For example:
def interp_point(x, y, X, Y, Z):
"""
x, y: scalar coordinates to interpolate at
X, Y, Z: arrays of coordinates corresponding to function
"""
X = X.ravel()
Y = Y.ravel()
Z = Z.ravel()
# distances from x, y to all X, Y points
dist = np.hypot(X - x, Y - y)
# indices of the nearest points
nearest3 = np.argpartition(dist, 2)[:3]
# extract the coordinates
points = np.stack((X[nearest3], Y[nearest3], Z[nearest3]))
# compute 2 vectors in the plane
vecs = np.diff(points, axis=0)
# compute normal to plane
plane = np.cross(vecs[0], vecs[1])
# rhs of plane equation
d = np.dot(plane, points [:, 0])
# The final result:
z = (d - np.dot(plane[:2], [x, y])) / plane[-1]
return z
print(interp_point(x, y, X.T, Y.T, Z.T))
Since your data is on a regular grid, it might be easier to do something like bilinear interpolation on the quad surrounding (x, y):
def interp_grid(x, y, X, Y, Z):
"""
x, y: scalar coordinates to interpolate at
X, Y, Z: arrays of coordinates corresponding to function
"""
X, Y = X[:, 0], Y[0, :]
# find matching element
r, c = np.searchsorted(Y, y), np.searchsorted(X, x)
if r == 0: r += 1
if c == 0: c += 1
# interpolate
z = (Z[r - 1, c - 1] * (X[c] - x) * (Y[r] - y) +
Z[r - 1, c] * (x - X[c - 1]) * (Y[r] - y) +
Z[r, c - 1] * (X[c] - x) * (y - Y[r - 1]) +
Z[r, c] * (x - X[c - 1]) * (y - Y[r - 1])
) / ((X[c] - X[c - 1]) * (Y[r] - Y[r - 1]))
return z
print(interpolate_grid(x, y, X.T, Y.T, Z.T))
Here's an inelegant, brute force approach.* Assuming we have X, Y, and Z values, let's define a function that draws custom contour lines over and over until they intersect with the point at a user-defined level of precision (in your data, make Z = normPDF).
def get_contour_value_of_point(point_x, point_y, X, Y, Z, precision=10000):
fig, ax = plt.subplots()
CS = ax.contour(X, Y, Z, 100)
containing_levels = []
for cc, lev in zip(CS.collections, CS.levels):
for pp in cc.get_paths():
if pp.contains_point((point_x, point_y)):
containing_levels.append(lev)
if max(containing_levels) == 0:
return 0
else:
if max(containing_levels) > 0:
lev = max(containing_levels)
adj = 1. / precision
elif max(containing_levels) < 0:
lev = min(containing_levels)
adj = -1. / precision
is_inside = True
while is_inside:
CS = ax.contour(X, Y, Z, [lev])
for pp in CS.collections[0].get_paths():
if not pp.contains_point((point_x, point_y)):
is_inside = False
if is_inside:
lev += adj
return lev - adj
In more detail: what this is doing is drawing an initial contour map with 100 levels, then finding the list of contour levels whose polygons contain the point in question. We then find the narrowest level (either the highest if the levels are positive or the lowest if the levels are negative). From there, we tighten the level by small steps (corresponding to your desired precision level), checking if the point is still within the polygons. When the point is no longer within the contour polygon, we know that we've found the right level (the last one to contain the point).
As an example, we can use a contour in Matplotlib's library:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
delta = 0.025
x = np.arange(-3.0, 3.0, delta)
y = np.arange(-2.0, 2.0, delta)
X, Y = np.meshgrid(x, y)
Z1 = np.exp(-X**2 - Y**2)
Z2 = np.exp(-(X - 1)**2 - (Y - 1)**2)
Z = (Z1 - Z2) * 2
With this setup, get_contour_value_of_point(0, -0.6) returns 1.338399999999998, which on a visual examination seems to match. get_contour_value_of_point(0, -0.6) returns -1.48, which also seems to match. Plots below for visual verification.
*I can't guarantee this will cover all use cases. It covered the ones I tried. I would test this fairly rigorously before getting it near any kind of production environment. I would expect there to be more elegant solutions than this (such as Mad Physicist's answer), but this was the one that occurred to me and seemed to work in straightforward, if brute-force, way.
Related
'FuncAnimation' object has no attribute '_resize_id'
I am trying to plot a single pendulum using Eulers method and with given theta values and formula in python but I am getting an Attribute error on FuncAnimation saying 'FuncAnimation' object has no attribute '_resize_id'. Does anyone know what I'm doing wrong here? # Liður 2 import numpy as np import matplotlib.pyplot as plt from matplotlib.animation import FuncAnimation def ydot(t, y): g = 9.81 l = 1 z1 = y[1] z2 = -g/l*np.sin(y[0]) return np.array([z1, z2]) def eulerstep(t, x, h): return ([x[j]+h*ydot(t,x)[j] for j in range(len(x))]) def eulersmethod(Theta0, T, n): z = Theta0 h = T/n t = [i*h for i in range(n)] theta = [[],[]] for i in range(n): z = eulerstep(t[i], z, h) theta[0].append(z[0]) theta[1].append(z[1]) return(t, theta[0], theta[1]) def animate_pendulum(x, y, h): fig = plt.figure(figsize=(8,8)) ax = fig.add_subplot(autoscale_on = False, xlim=(-2.2, 2.2), ylim = (-2.2, 2.2)) ax.grid() line = ax.plot([],[], 'o', c='blue', lw=1) time_text = ax.text(0.05, 0.9, '', transform = ax.transAxes) def animate(i): xline = [0, x[1]] yline = [0, y[1]] line.set_data(xline, yline) time_text.set_text(f"time = {i*h:1f}s") return line, time_text ani = FuncAnimation( fig, animate, len(x), interval = h*1000, blit = True, repeat = False ) plt.show() def min(): L=2 T=20 n=500 h=T/n y_0 = [np.pi/12, 0] t, angle, velocity = eulersmethod(y_0, T, n) x, y = L*np.sin(angle[:]), -L*np.cos(angle[:]) animate_pendulum(x, y, h) min()
Axes.plot() returns a list of lines, see the docs. You Can unpack the list by adding a comma at the variable assignment. This code runs on my machine: # Liður 2 import numpy as np import matplotlib.pyplot as plt from matplotlib.animation import FuncAnimation def ydot(t, y): g = 9.81 l = 1 z1 = y[1] z2 = -g/l*np.sin(y[0]) return np.array([z1, z2]) def eulerstep(t, x, h): return ([x[j]+h*ydot(t,x)[j] for j in range(len(x))]) def eulersmethod(Theta0, T, n): z = Theta0 h = T/n t = [i*h for i in range(n)] theta = [[],[]] for i in range(n): z = eulerstep(t[i], z, h) theta[0].append(z[0]) theta[1].append(z[1]) return(t, theta[0], theta[1]) def animate_pendulum(x, y, h): fig = plt.figure(figsize=(8,8)) ax = fig.add_subplot(autoscale_on = False, xlim=(-2.2, 2.2), ylim = (-2.2, 2.2)) ax.grid() line, = ax.plot([],[], 'o', c='blue', lw=1) time_text = ax.text(0.05, 0.9, '', transform = ax.transAxes) def animate(i): xline = [0, x[1]] yline = [0, y[1]] line.set_data(xline, yline) time_text.set_text(f"time = {i*h:1f}s") return line, time_text ani = FuncAnimation( fig, animate, len(x), interval = h*1000, blit = True, repeat = False ) plt.show() def min(): L=2 T=20 n=500 h=T/n y_0 = [np.pi/12, 0] t, angle, velocity = eulersmethod(y_0, T, n) x, y = L*np.sin(angle[:]), -L*np.cos(angle[:]) animate_pendulum(x, y, h) min()
3D Quadratic Plane of best fit
Can someone show me the code on how to make this work for 4th degree? import numpy as np import scipy.linalg import matplotlib.pyplot as plt # some 3-dim points x = [] y=[] z=[] data = np.c_[x,y,z] # regular grid covering the domain of the data mn = np.min(data, axis=0) mx = np.max(data, axis=0) X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20)) XX = X.flatten() YY = Y.flatten() order = 2 # 1: linear, 2: quadratic if order == 1: # best-fit linear plane A = np.c_[data[:,0], data[:,1], np.ones(data.shape[0])] C,_,_,_ = scipy.linalg.lstsq(A, data[:,2]) # coefficients # evaluate it on grid # Z = C[0]*X + C[1]*Y + C[2] # or expressed using matrix/vector product Z = np.dot(np.c_[XX, YY, np.ones(XX.shape)], C).reshape(X.shape) elif order == 2: # best-fit quadratic curve # M = [ones(size(x)), x, y, x.*y, x.^2 y.^2] A = np.c_[np.ones(data.shape[0]), data[:,:2], np.prod(data[:,:2], axis=1), data[:,:2]**2] C,_,_,_ = scipy.linalg.lstsq(A, data[:,2]) # evaluate it on a grid Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX*YY, XX**2, YY**2], C).reshape(X.shape) elif order == 3: # M = [ones(size(x)), x, y, x.^2, x.*y, y.^2, x.^3, x.^2.*y, x.*y.^2, y.^3] A = np.c_[np.ones(data.shape[0]), data[:,:2], data[:,0]**2, np.prod(data[:,:2], axis=1), \ data[:,1]**2, data[:,0]**3, np.prod(np.c_[data[:,0]**2,data[:,1]],axis=1), \ np.prod(np.c_[data[:,0],data[:,1]**2],axis=1), data[:,1]**3] C,_,_,_ = scipy.linalg.lstsq(A, data[:,2]) Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX**2, XX*YY, YY**2, XX**3, XX**2*YY, XX*YY**2, YY**3], C).reshape(X.shape) # best-fit quadratic curve # plot points and fitted surface fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.2) ax.scatter(data[:,0], data[:,1], data[:,2], c='r', s=50) plt.xlabel('X') plt.ylabel('Y') ax.set_zlabel('Z') ax.axis('auto') ax.axis('tight') print(C) plt.show() I would also like an explanation after the fact if someone knows how accurate this method is. Or if there is a better way to find a plane of best fit. I am trying to find a plane that would best fit three wheel paths. The first wheel path is at y=0, the second at y=2, and the third at y=4. They all go from 0-94 in the x direction, but they all have different z values.
Inverse the spline interpolation SciPy
I have an array of X and Y data, and I also have a spline that outputs Y values based on X values. I need to get a spline that will perform the reverse operation, i.e. output the X value over the Y value. (interpolate.splrep(y, x, s=0) do not suggest) def f(x): return math.sin(x) def compare_func(func, a, b, eps): x_for_plot = list(np.arange(a, b, eps / 10)) x = list(np.arange(a, b, eps)) y_for_plot = [func(i) for i in x_for_plot] y = [func(i) for i in x] plt.plot(x_for_plot, y_for_plot, label='Orig') plt.scatter(x, y, label='Points', color='blue') spline = interpolate.splrep(x, y, s=0) y_interpolated = interpolate.splev(x_for_plot, spline, der=0) spline_inv = ??? x_calc = interpolate.splev(y_interpolated, spline_inv, der=0) print(x_calc) #x_calc and x_for_plot must be equal plt.plot(x_for_plot, y_interpolated, label='Interpolated') plt.xlabel('X') plt.ylabel('Y') plt.legend() plt.show() compare_func(f, -3, 3, 0.5)
How do I modify my contourplot to display a region with an huge gradient better?
I'm working on a contourplot with matplotlib and for my data I have a region where I have a strong gradient - Now I have the problem that matplotlib will display the regions with different colors, according to the selected colormap, and distribute the colors linear over the whole spectrum. Since 90% of my datapoints are within one end of the spectra, and only this small region acts completely differently, my contourplot looks kind of monochrome, as you can see in the attached picture Also, I've added some contours to make the differences in the values more visible. Since we have a huge gradient at a specific spot, there area lot of contours and it is super hard to see the underlying colors or the values. Is there a good way how to handle such "problematic" regions with matplotlib? Maybe to define another colormap there? I've tried to set some manual levels and to "cut out" the specific region, but it would be nice to find a way to display the value of this region Just to get a feeling: My minimal value to display is around 7, the maximum value is 145 and the average Here is the important part of my code: z = [] # z is a list of values that i've read in before from a file X = np.arange(0, 61, 1) Y = np.arange(0, 151, 1) z = z.reshape((len(Y), len(X))) blurred = gaussian_filter(z, sigma=2) # applies a gaussian filter to smooth the plot xx, yy = np.meshgrid(X, Y) # gets the grid for the plot procent = np.arange(np.min(z), np.max(z), 5) # levels for the contourlines newlevels = [5,10,15,20,30,40,50, 80, 100, 120, 140] # sets manual levels for the plot, where I've tried to set a stronger focus on the first part of the spectra plusmin = plt.contourf(xx,yy,z, origin='lower', extend='both', levels=procent,) levels = np.arange(np.min(z), np.max(z), 3) CS = plt.contourf(xx, yy, z, levels=newlevels, extend="both", cmap=cm.viridis) s = plt.contour(xx, yy, blurred, plusmin.levels, colors='white', linewidths=2) cbar = plt.colorbar(CS, fraction=0.042, pad=0.04) ax.clabel(s, fontsize=12, inline=1, colors ='white')
A solution might be to scale the colormap so that each colors is equally displayed. Here is a piece of code I use to handle this kind of problem. There is certainly more proper way to do it with matplotlib, but I do not know it. import matplotlib.pyplot as plt from matplotlib import colors import numpy as np import copy #----------------------- def repartition(z): """compute the repartition function of an array""" hist, bin_edges = np.histogram(z.flat[:], bins = 1000) x = 0.5 * (bin_edges[1:] + bin_edges[:-1]) y = np.cumsum(np.array(hist, float)) y = (y - y[0]) / (y[-1] - y[0]) return x, y #----------------------- def adjustcmap2data(Z, cmap, N = 16384): """scale the colormap so that all colors are equally displayed""" def cmap2xs(cmap): "convert cmap to matrix" sd = cmap._segmentdata xr = [tup[0] for tup in sd['red']] xg = [tup[0] for tup in sd['green']] xb = [tup[0] for tup in sd['blue']] return tuple([np.array(x) for x in xr, xg, xb]) def xs2cmap(cmap, xr, xg, xb): "convert matrix to cmap" sd = cmap._segmentdata for k in 'red', 'green', 'blue': sd[k] = list(sd[k]) for i in xrange(len(sd[k])): sd[k][i] = list(sd[k][i]) for i in xrange(len(sd['red'])): sd['red'][i] = (xr[i], sd['red'][i][1], sd['red'][i][2]) for i in xrange(len(sd['green'])): sd['green'][i] = (xg[i], sd['green'][i][1], sd['green'][i][2]) for i in xrange(len(sd['blue'])): sd['blue'][i] = (xb[i], sd['blue'][i][1], sd['blue'][i][2]) for k in 'red', 'green', 'blue': sd[k] = tuple(sd[k]) return colors.LinearSegmentedColormap('mycmap_%010.0f' % (np.random.randn() * 1.e10), sd, N) x, y = repartition(Z) x = (x - x[0]) / (x[-1] - x[0]) xr, xg, xb = cmap2xs(cmap) xrr = np.interp(xr, xp = y, fp = x) xgg = np.interp(xg, xp = y, fp = x) xbb = np.interp(xb, xp = y, fp = x) for x in xrr, xgg, xbb: x[x < 0.] = 0. x[x > 1.] = 1. x[0], x[-1] = 0., 1. x.sort() mycmap = xs2cmap(copy.deepcopy(cmap), xrr, xgg, xbb) return mycmap #--------------------- def fake_data(): """generate a fake dataset""" x = np.linspace(-1., 1., 256) y = np.linspace(-1., 1., 256) X, Y = np.meshgrid(x, y) Z = np.zeros_like(X) #create background noise for _ in xrange(100): x0 = np.random.randn() y0 = np.random.randn() Z += 0.05 * np.exp(-0.5 * (((X - x0) / 0.1) ** 2. + ((Y - y0) / 0.1) ** 2.)) #add strong peak Z += np.exp(-0.5 * (((X - 0.5) / 0.3) ** 2. + ((Y - 0.5) / 0.02) ** 2.)) return X, Y, Z #--------------------- if __name__ == "__main__": X, Y, Z = fake_data() plt.figure() cmap = plt.cm.spectral plt.pcolormesh(X, Y, Z, cmap = cmap) plt.colorbar() plt.contour(X, Y, Z, colors = "w") plt.gcf().show() plt.figure() scaledcmap = adjustcmap2data(Z, cmap = cmap) plt.pcolormesh(X, Y, Z, cmap = scaledcmap) plt.colorbar() plt.gcf().show() raw_input('pause') which should give you the following results 1) linear colorbar 2) scaled colorbar
Plotting a decision boundary separating 2 classes using Matplotlib's pyplot
I could really use a tip to help me plotting a decision boundary to separate to classes of data. I created some sample data (from a Gaussian distribution) via Python NumPy. In this case, every data point is a 2D coordinate, i.e., a 1 column vector consisting of 2 rows. E.g., [ 1 2 ] Let's assume I have 2 classes, class1 and class2, and I created 100 data points for class1 and 100 data points for class2 via the code below (assigned to the variables x1_samples and x2_samples). mu_vec1 = np.array([0,0]) cov_mat1 = np.array([[2,0],[0,2]]) x1_samples = np.random.multivariate_normal(mu_vec1, cov_mat1, 100) mu_vec1 = mu_vec1.reshape(1,2).T # to 1-col vector mu_vec2 = np.array([1,2]) cov_mat2 = np.array([[1,0],[0,1]]) x2_samples = np.random.multivariate_normal(mu_vec2, cov_mat2, 100) mu_vec2 = mu_vec2.reshape(1,2).T When I plot the data points for each class, it would look like this: Now, I came up with an equation for an decision boundary to separate both classes and would like to add it to the plot. However, I am not really sure how I can plot this function: def decision_boundary(x_vec, mu_vec1, mu_vec2): g1 = (x_vec-mu_vec1).T.dot((x_vec-mu_vec1)) g2 = 2*( (x_vec-mu_vec2).T.dot((x_vec-mu_vec2)) ) return g1 - g2 I would really appreciate any help! EDIT: Intuitively (If I did my math right) I would expect the decision boundary to look somewhat like this red line when I plot the function...
Your question is more complicated than a simple plot : you need to draw the contour which will maximize the inter-class distance. Fortunately it's a well-studied field, particularly for SVM machine learning. The easiest method is to download the scikit-learn module, which provides a lot of cool methods to draw boundaries: scikit-learn: Support Vector Machines Code : # -*- coding: utf-8 -*- import numpy as np import matplotlib from matplotlib import pyplot as plt import scipy from sklearn import svm mu_vec1 = np.array([0,0]) cov_mat1 = np.array([[2,0],[0,2]]) x1_samples = np.random.multivariate_normal(mu_vec1, cov_mat1, 100) mu_vec1 = mu_vec1.reshape(1,2).T # to 1-col vector mu_vec2 = np.array([1,2]) cov_mat2 = np.array([[1,0],[0,1]]) x2_samples = np.random.multivariate_normal(mu_vec2, cov_mat2, 100) mu_vec2 = mu_vec2.reshape(1,2).T fig = plt.figure() plt.scatter(x1_samples[:,0],x1_samples[:,1], marker='+') plt.scatter(x2_samples[:,0],x2_samples[:,1], c= 'green', marker='o') X = np.concatenate((x1_samples,x2_samples), axis = 0) Y = np.array([0]*100 + [1]*100) C = 1.0 # SVM regularization parameter clf = svm.SVC(kernel = 'linear', gamma=0.7, C=C ) clf.fit(X, Y) Linear Plot w = clf.coef_[0] a = -w[0] / w[1] xx = np.linspace(-5, 5) yy = a * xx - (clf.intercept_[0]) / w[1] plt.plot(xx, yy, 'k-') MultiLinear Plot C = 1.0 # SVM regularization parameter clf = svm.SVC(kernel = 'rbf', gamma=0.7, C=C ) clf.fit(X, Y) h = .02 # step size in the mesh # create a mesh to plot in x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.contour(xx, yy, Z, cmap=plt.cm.Paired) Implementation If you want to implement it yourself, you need to solve the following quadratic equation: The Wikipedia article Unfortunately, for non-linear boundaries like the one you draw, it's a difficult problem relying on a kernel trick but there isn't a clear cut solution.
Based on the way you've written decision_boundary you'll want to use the contour function, as Joe noted above. If you just want the boundary line, you can draw a single contour at the 0 level: f, ax = plt.subplots(figsize=(7, 7)) c1, c2 = "#3366AA", "#AA3333" ax.scatter(*x1_samples.T, c=c1, s=40) ax.scatter(*x2_samples.T, c=c2, marker="D", s=40) x_vec = np.linspace(*ax.get_xlim()) ax.contour(x_vec, x_vec, decision_boundary(x_vec, mu_vec1, mu_vec2), levels=[0], cmap="Greys_r") Which makes:
Those were some great suggestions, thanks a lot for your help! I ended up solving the equation analytically and this is the solution I ended up with (I just want to post it for future reference: # 2-category classification with random 2D-sample data # from a multivariate normal distribution import numpy as np from matplotlib import pyplot as plt def decision_boundary(x_1): """ Calculates the x_2 value for plotting the decision boundary.""" return 4 - np.sqrt(-x_1**2 + 4*x_1 + 6 + np.log(16)) # Generating a Gaussion dataset: # creating random vectors from the multivariate normal distribution # given mean and covariance mu_vec1 = np.array([0,0]) cov_mat1 = np.array([[2,0],[0,2]]) x1_samples = np.random.multivariate_normal(mu_vec1, cov_mat1, 100) mu_vec1 = mu_vec1.reshape(1,2).T # to 1-col vector mu_vec2 = np.array([1,2]) cov_mat2 = np.array([[1,0],[0,1]]) x2_samples = np.random.multivariate_normal(mu_vec2, cov_mat2, 100) mu_vec2 = mu_vec2.reshape(1,2).T # to 1-col vector # Main scatter plot and plot annotation f, ax = plt.subplots(figsize=(7, 7)) ax.scatter(x1_samples[:,0], x1_samples[:,1], marker='o', color='green', s=40, alpha=0.5) ax.scatter(x2_samples[:,0], x2_samples[:,1], marker='^', color='blue', s=40, alpha=0.5) plt.legend(['Class1 (w1)', 'Class2 (w2)'], loc='upper right') plt.title('Densities of 2 classes with 25 bivariate random patterns each') plt.ylabel('x2') plt.xlabel('x1') ftext = 'p(x|w1) ~ N(mu1=(0,0)^t, cov1=I)\np(x|w2) ~ N(mu2=(1,1)^t, cov2=I)' plt.figtext(.15,.8, ftext, fontsize=11, ha='left') # Adding decision boundary to plot x_1 = np.arange(-5, 5, 0.1) bound = decision_boundary(x_1) plt.plot(x_1, bound, 'r--', lw=3) x_vec = np.linspace(*ax.get_xlim()) x_1 = np.arange(0, 100, 0.05) plt.show() And the code can be found here EDIT: I also have a convenience function for plotting decision regions for classifiers that implement a fit and predict method, e.g., the classifiers in scikit-learn, which is useful if the solution cannot be found analytically. A more detailed description how it works can be found here.
You can create your own equation for the boundary: where you have to find the positions x0 and y0, as well as the constants ai and bi for the radius equation. So, you have 2*(n+1)+2 variables. Using scipy.optimize.leastsq is straightforward for this type of problem. The code attached below builds the residual for the leastsq penalizing the points outsize the boundary. The result for your problem, obtained with: x, y = find_boundary(x2_samples[:,0], x2_samples[:,1], n) ax.plot(x, y, '-k', lw=2.) x, y = find_boundary(x1_samples[:,0], x1_samples[:,1], n) ax.plot(x, y, '--k', lw=2.) using n=1: using n=2: usng n=5: using n=7: import numpy as np from numpy import sin, cos, pi from scipy.optimize import leastsq def find_boundary(x, y, n, plot_pts=1000): def sines(theta): ans = np.array([sin(i*theta) for i in range(n+1)]) return ans def cosines(theta): ans = np.array([cos(i*theta) for i in range(n+1)]) return ans def residual(params, x, y): x0 = params[0] y0 = params[1] c = params[2:] r_pts = ((x-x0)**2 + (y-y0)**2)**0.5 thetas = np.arctan2((y-y0), (x-x0)) m = np.vstack((sines(thetas), cosines(thetas))).T r_bound = m.dot(c) delta = r_pts - r_bound delta[delta>0] *= 10 return delta # initial guess for x0 and y0 x0 = x.mean() y0 = y.mean() params = np.zeros(2 + 2*(n+1)) params[0] = x0 params[1] = y0 params[2:] += 1000 popt, pcov = leastsq(residual, x0=params, args=(x, y), ftol=1.e-12, xtol=1.e-12) thetas = np.linspace(0, 2*pi, plot_pts) m = np.vstack((sines(thetas), cosines(thetas))).T c = np.array(popt[2:]) r_bound = m.dot(c) x_bound = popt[0] + r_bound*cos(thetas) y_bound = popt[1] + r_bound*sin(thetas) return x_bound, y_bound
I like the mglearn library to draw decision boundaries. Here is one example from the book "Introduction to Machine Learning with Python" by A. Mueller: fig, axes = plt.subplots(1, 3, figsize=(10, 3)) for n_neighbors, ax in zip([1, 3, 9], axes): clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X, y) mglearn.plots.plot_2d_separator(clf, X, fill=True, eps=0.5, ax=ax, alpha=.4) mglearn.discrete_scatter(X[:, 0], X[:, 1], y, ax=ax) ax.set_title("{} neighbor(s)".format(n_neighbors)) ax.set_xlabel("feature 0") ax.set_ylabel("feature 1") axes[0].legend(loc=3)
If you want to use scikit learn, you can write your code like this: import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LogisticRegression # read data data = pd.read_csv('ex2data1.txt', header=None) X = data[[0,1]].values y = data[2] # use LogisticRegression log_reg = LogisticRegression() log_reg.fit(X, y) # Coefficient of the features in the decision function. (from theta 1 to theta n) parameters = log_reg.coef_[0] # Intercept (a.k.a. bias) added to the decision function. (theta 0) parameter0 = log_reg.intercept_ # Plotting the decision boundary fig = plt.figure(figsize=(10,7)) x_values = [np.min(X[:, 1] -5 ), np.max(X[:, 1] +5 )] # calcul y values y_values = np.dot((-1./parameters[1]), (np.dot(parameters[0],x_values) + parameter0)) colors=['red' if l==0 else 'blue' for l in y] plt.scatter(X[:, 0], X[:, 1], label='Logistics regression', color=colors) plt.plot(x_values, y_values, label='Decision Boundary') plt.show() see: Building-a-Logistic-Regression-with-Scikit-learn
Just solved a very similar problem with a different approach (root finding) and wanted to post this alternative as answer here for future reference: def discr_func(x, y, cov_mat, mu_vec): """ Calculates the value of the discriminant function for a dx1 dimensional sample given covariance matrix and mean vector. Keyword arguments: x_vec: A dx1 dimensional numpy array representing the sample. cov_mat: numpy array of the covariance matrix. mu_vec: dx1 dimensional numpy array of the sample mean. Returns a float value as result of the discriminant function. """ x_vec = np.array([[x],[y]]) W_i = (-1/2) * np.linalg.inv(cov_mat) assert(W_i.shape[0] > 1 and W_i.shape[1] > 1), 'W_i must be a matrix' w_i = np.linalg.inv(cov_mat).dot(mu_vec) assert(w_i.shape[0] > 1 and w_i.shape[1] == 1), 'w_i must be a column vector' omega_i_p1 = (((-1/2) * (mu_vec).T).dot(np.linalg.inv(cov_mat))).dot(mu_vec) omega_i_p2 = (-1/2) * np.log(np.linalg.det(cov_mat)) omega_i = omega_i_p1 - omega_i_p2 assert(omega_i.shape == (1, 1)), 'omega_i must be a scalar' g = ((x_vec.T).dot(W_i)).dot(x_vec) + (w_i.T).dot(x_vec) + omega_i return float(g) #g1 = discr_func(x, y, cov_mat=cov_mat1, mu_vec=mu_vec_1) #g2 = discr_func(x, y, cov_mat=cov_mat2, mu_vec=mu_vec_2) x_est50 = list(np.arange(-6, 6, 0.1)) y_est50 = [] for i in x_est50: y_est50.append(scipy.optimize.bisect(lambda y: discr_func(i, y, cov_mat=cov_est_1, mu_vec=mu_est_1) - \ discr_func(i, y, cov_mat=cov_est_2, mu_vec=mu_est_2), -10,10)) y_est50 = [float(i) for i in y_est50] Here is the result: (blue the quadratic case, red the linear case (equal variances)
I know this question has been answered in a very thorough way analytically. I just wanted to share a possible 'hack' to the problem. It is unwieldy but gets the job done. Start by building a mesh grid of the 2d area and then based on the classifier just build a class map of the entire space. Subsequently detect changes in the decision made row-wise and store the edges points in a list and scatter plot the points. def disc(x): # returns the class of the point based on location x = [x,y] temp = 0.5 + 0.5*np.sign(disc0(x)-disc1(x)) # disc0() and disc1() are the discriminant functions of the respective classes return 0*temp + 1*(1-temp) num = 200 a = np.linspace(-4,4,num) b = np.linspace(-6,6,num) X,Y = np.meshgrid(a,b) def decColor(x,y): temp = np.zeros((num,num)) print x.shape, np.size(x,axis=0) for l in range(num): for m in range(num): p = np.array([x[l,m],y[l,m]]) #print p temp[l,m] = disc(p) return temp boundColorMap = decColor(X,Y) group = 0 boundary = [] for x in range(num): group = boundColorMap[x,0] for y in range(num): if boundColorMap[x,y]!=group: boundary.append([X[x,y],Y[x,y]]) group = boundColorMap[x,y] boundary = np.array(boundary) Sample Decision Boundary for a simple bivariate gaussian classifier
Given two bi-variate normal distributions, you can use Gaussian Discriminant Analysis (GDA) to come up with a decision boundary as the difference between the log of the 2 pdf's. Here's a way to do it using scipy multivariate_normal (the code is not optimized): import numpy as np import matplotlib.pyplot as plt from scipy.stats import multivariate_normal from numpy.linalg import norm from numpy.linalg import inv from scipy.spatial.distance import mahalanobis def normal_scatter(mean, cov, p): size = 100 sigma_x = cov[0,0] sigma_y = cov[1,1] mu_x = mean[0] mu_y = mean[1] x_ps, y_ps = np.random.multivariate_normal(mean, cov, size).T x,y = np.mgrid[mu_x-3*sigma_x:mu_x+3*sigma_x:1/size, mu_y-3*sigma_y:mu_y+3*sigma_y:1/size] grid = np.empty(x.shape + (2,)) grid[:, :, 0] = x; grid[:, :, 1] = y z = p*multivariate_normal.pdf(grid, mean, cov) return x_ps, y_ps, x,y,z # Dist 1 mu_1 = np.array([1, 1]) cov_1 = .5*np.array([[1, 0], [0, 1]]) p_1 = .5 x_ps, y_ps, x,y,z = normal_scatter(mu_1, cov_1, p_1) plt.plot(x_ps,y_ps,'x') plt.contour(x, y, z, cmap='Blues', levels=3) # Dist 2 mu_2 = np.array([2, 1]) #cov_2 = np.array([[2, -1], [-1, 1]]) cov_2 = cov_1 p_2 = .5 x_ps, y_ps, x,y,z = normal_scatter(mu_2, cov_2, p_2) plt.plot(x_ps,y_ps,'.') plt.contour(x, y, z, cmap='Oranges', levels=3) # Decision Boundary X = np.empty(x.shape + (2,)) X[:, :, 0] = x; X[:, :, 1] = y g = np.log(p_1*multivariate_normal.pdf(X, mu_1, cov_1)) - np.log(p_2*multivariate_normal.pdf(X, mu_2, cov_2)) plt.contour(x, y, g, [0]) plt.grid() plt.axhline(y=0, color='k') plt.axvline(x=0, color='k') plt.plot([mu_1[0], mu_2[0]], [mu_1[1], mu_2[1]], 'k') plt.show() If p_1 != p_2, then you get non-linear boundary. The decision boundary is given by g above. Then to plot the decision hyper-plane (line in 2D), you need to evaluate g for a 2D mesh, then get the contour which will give a separating line. You can also assume to have equal co-variance matrices for both distributions, which will give a linear decision boundary. In this case, you can replace the calculation of g in the above code with the following: W = inv(cov_1).dot(mu_1-mu_2) x_0 = 1/2*(mu_1+mu_2) - cov_1.dot(np.log(p_1/p_2)).dot((mu_1-mu_2)/mahalanobis(mu_1, mu_2, cov_1)) X = np.empty(x.shape + (2,)) X[:, :, 0] = x; X[:, :, 1] = y g = (X-x_0).dot(W)
i use this method from this book python-machine-learning-2nd.pdf URL from matplotlib.colors import ListedColormap import matplotlib.pyplot as plt def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): # setup marker generator and color map markers = ('s', 'x', 'o', '^', 'v') colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') cmap = ListedColormap(colors[:len(np.unique(y))]) # plot the decision surface x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution)) Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) Z = Z.reshape(xx1.shape) plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap) plt.xlim(xx1.min(), xx1.max()) plt.ylim(xx2.min(), xx2.max()) for idx, cl in enumerate(np.unique(y)): plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=colors[idx], marker=markers[idx], label=cl, edgecolor='black') # highlight test samples if test_idx: # plot all samples X_test, y_test = X[test_idx, :], y[test_idx] plt.scatter(X_test[:, 0], X_test[:, 1], c='', edgecolor='black', alpha=1.0, linewidth=1, marker='o', s=100, label='test set')
Since version 1.1, sklearn has a function for this: https://scikit-learn.org/stable/modules/generated/sklearn.inspection.DecisionBoundaryDisplay.html#sklearn.inspection.DecisionBoundaryDisplay