Multivariate KDE Scipy Stats - what if it's not Gaussian?

Multivariate KDE Scipy Stats - what if it's not Gaussian? - python

I have some 2D data that I am smoothing using:
from scipy.stats import gaussian_kde
kde = gaussian_kde(data)
but what if my data isn't Gaussian/tophat/the other options? Mine looks more elliptical before smoothing, so should I really have a different bandwidth in x and then y? The variance in one direction is a lot higher, and also the values of the x axis are higher, so it feels like a simple Gaussian might miss something?

This is what I get with your defined X and Y. Seems good. Were you expecting something different?
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
def generate(n):
# generate data
np.random.seed(42)
x = np.random.normal(size=n, loc=1, scale=0.01)
np.random.seed(1)
y = np.random.normal(size=n, loc=200, scale=100)
return x, y
x, y = generate(100)
xmin = x.min()
xmax = x.max()
ymin = y.min()
ymax = y.max()
X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([x, y])
kernel = stats.gaussian_kde(values)
Z = np.reshape(kernel(positions).T, X.shape)
fig, ax = plt.subplots(figsize=(7, 7))
ax.imshow(np.rot90(Z), cmap=plt.cm.gist_earth_r,
extent=[xmin, xmax, ymin, ymax],
aspect='auto', alpha=.75
)
ax.plot(x, y, 'ko', ms=5)
ax.set_xlim([xmin, xmax])
ax.set_ylim([ymin, ymax])
plt.show()
The distributions of x and y are Gaussian.
You can verify with seaborn too
import pandas as pd
import seaborn as sns
# I pass a DataFrame because passing
# (x,y) alone will be soon deprecated
g = sns.jointplot(data=pd.DataFrame({'x':x, 'y':y}), x='x', y='y')
g.plot_joint(sns.kdeplot, color="r", zorder=0, levels=6)
update
Kernel Density Estimate of 2-dimensional data is done separately along each axis and then join together.
Let's make an example with the dataset we already used.
As we can see in the seaborn jointplot, you have not only the estimated 2d-kde but also marginal distributions of x and y (the histograms).
So, step by step, let's estimate the density of x and y and then evaluate the density over a linearspace
kde_x = sps.gaussian_kde(x)
kde_x_space = np.linspace(x.min(), x.max(), 100)
kde_x_eval = kde_x.evaluate(kde_x_space)
kde_x_eval /= kde_x_eval.sum()
kde_y = sps.gaussian_kde(y)
kde_y_space = np.linspace(y.min(), y.max(), 100)
kde_y_eval = kde_y.evaluate(kde_y_space)
kde_y_eval /= kde_y_eval.sum()
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
ax[0].plot(kde_x_space, kde_x_eval, 'k.')
ax[0].set(title='KDE of x')
ax[1].plot(kde_y_space, kde_y_eval, 'k.')
ax[1].set(title='KDE of y')
plt.show()
So we now have the marginal distributions of x and y. These are probability density functions so, the joint-probability of x and y can be seen as the intersection of independent events x and y, thus we can multiply the estimated probability density of x and y in a 2d-matrix and plot on 3d projection
# Grid of x and y
X, Y = np.meshgrid(kde_x_space, kde_y_space)
# Grid of probability density
kX, kY = np.meshgrid(kde_x_eval, kde_y_eval)
# Intersection
Z = kX * kY
fig, ax = plt.subplots(
2, 2,
subplot_kw={"projection": "3d"},
figsize=(10, 10))
for i, (elev, anim, title) in enumerate(zip([10, 10, 25, 25],
[0, -90, 25, -25],
['y axis', 'x axis', 'view 1', 'view 2']
)):
# Plot the surface.
surf = ax.flat[i].plot_surface(X, Y, Z, cmap=plt.cm.gist_earth_r,
linewidth=0, antialiased=False, alpha=.75)
ax.flat[i].scatter(x, y, zs=0, zdir='z', c='k')
ax.flat[i].set(
xlabel='x', ylabel='y',
title=title
)
ax.flat[i].view_init(elev=elev, azim=anim)
plt.show()
This is a very simple and naif method but only to have an idea on how it works and why x and y scales don't matter for a 2d-KDE.

Related

Gradient 2D plot using contourf

I did a test code brigging something I saw on stack on different topic, and try to assemble it to make what I need : a filled curve with gradient.
After validate this test code I will make a subplot (4 plots for 4 weeks) with the same min/max for all plot (it's a power consumption).
My code :
from matplotlib import pyplot as plt
import numpy as np
# random x
x = range(100)
# smooth random y
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(loc=0, scale=1)#, size=len(x))
y = result
y = list(map(abs, y))
# creation of z for contour
z1 = min(y)
z3 = max(y)/(len(x)+1)
z2 = max(y)-z3
z = [[z] * len(x) for z in np.arange(z1,z2,z3)]
num_bars = len(x) # more bars = smoother gradient
# plt.contourf(x, y, z, num_bars, cmap='greys')
plt.contourf(x, y, z, num_bars, cmap='cool', levels=101)
background_color = 'w'
plt.fill_between(
x,
y,
y2=max(y),
color=background_color
)
But everytime I make the code run, the result display a different gradient scale, that is not smooth neither even straight right.
AND sometime the code is in error : TypeError: Length of y (100) must match number of rows in z (101)
I'm on it since too many time, turning around, and can't figure where I'm wrong...

I finally find something particularly cool, how to :
have both filled gradient curves in a different color (thanks to JohanC in this topic)
use x axis with datetime (thanks to Ffisegydd in this topic)
Here the code :
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as mdates
np.random.seed(2022)
st_date = '2022-11-01 00:00:00'
st_date = pd.to_datetime(st_date)
en_date = st_date + pd.DateOffset(days=7)
x = pd.date_range(start=st_date,end=en_date,freq='30min')
x = mdates.date2num(x)
y = np.random.normal(0.01, 1, len(x)).cumsum()
fig, ax = plt.subplots(figsize=(18, 5))
ax.plot(x, y, color='grey')
########################
# positives fill
#######################
grad1 = ax.imshow(
np.linspace(0, 1, 256).reshape(-1, 1),
cmap='Blues',
vmin=-0.5,
aspect='auto',
extent=[x.min(), x.max(), 0, y.max()],
# extent=[x[0], x[1], 0, y.max()],
origin='lower'
)
poly_pos = ax.fill_between(x, y.min(), y, alpha=0.1)
grad1.set_clip_path(
poly_pos.get_paths()[0],
transform=ax.transData
)
poly_pos.remove()
########################
# negatives fill
#######################
grad2 = ax.imshow(
np.linspace(0, 1, 256).reshape(-1, 1),
cmap='Reds',
vmin=-0.5,
aspect='auto',
extent=[x.min(), x.max(), y.min(), 0],
origin='upper'
)
poly_neg = ax.fill_between(x, y, y.max(), alpha=0.1)
grad2.set_clip_path(
poly_neg.get_paths()[0],
transform=ax.transData
)
poly_neg.remove()
########################
# decorations and formatting plot
########################
ax.xaxis_date()
date_format = mdates.DateFormatter('%d-%b %H:%M')
ax.xaxis.set_major_formatter(date_format)
fig.autofmt_xdate()
ax.grid(True)

Summing overlapping bubbles with gradient in python

I want to plot a map of specific sites to interpret their effects on the surrounding city environment. To do this, I would like to plot the sites as bubbles, with a decreasing gradient towards the edge of the circle, and where the gradient of the overlapping circles is the sum.
As an example I've used this:
# libraries
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
# create data
x = np.random.rand(15)
y = x+np.random.rand(15)
z = x+np.random.rand(15)
z=z*z
# Change color with c and alpha. I map the color to the X axis value.
plt.scatter(x, y, s=1500, c=z, cmap="Blues", alpha=0.4, edgecolors="grey", linewidth=1)
# Add titles (main and on axis)
plt.xlabel("the X axis")
plt.ylabel("the Y axis")
plt.title("A colored bubble plot")
plt.show();
which produces:
However, the color of the circles does not decay, nor do they seem to sum the intended way.
Is there any smart way to do this, or could it possibly be easier with some kind of heatmap solution, or using grids and a decaying effect on adjacent tiles?

Here is an approach with densities placed at each x and y, enlarged by the z value.
Depending on the distance to each x,y position a quantity is added.
import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import norm # calculate the length of a vector
# import seaborn as sns
# create data
x = np.random.rand(15)
y = x+np.random.rand(15)
z = x+np.random.rand(15)
z=z*z
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12,5))
# Change color with c and alpha. I map the color to the X axis value.
ax1.scatter(x, y, s=1500, c=z, cmap="Blues", alpha=0.4, edgecolors="grey", linewidth=1)
ax1.set_xlabel("the X axis")
ax1.set_ylabel("the Y axis")
ax1.set_title("A colored bubble plot")
centers = np.dstack((x, y))[0]
xmin = min(x)-0.2
xmax = max(x)+0.2
ymin = min(y)-0.2
ymax = max(y)+0.2
zmin = min(z)
zmax = max(z)
xx, yy = np.meshgrid(np.linspace(xmin, xmax, 100),
np.linspace(ymin, ymax, 100))
xy = np.dstack((xx, yy))
zz = np.zeros_like(xx)
for ci, zi in zip(centers, z):
sigma = zi / zmax * 0.3
sigma2 = sigma ** 2
zz += np.exp(- norm(xy - ci, axis=-1) ** 2 / sigma2 / 2)
img = ax2.imshow(zz, extent=[xmin, xmax, ymin, ymax], origin='lower', aspect='auto', cmap='Blues')
#plt.colorbar(img, ax=ax2)
ax2.set_xlabel("the X axis")
ax2.set_ylabel("the Y axis")
ax2.set_title("Density depending on z")
plt.show()
The plot compares the two approaches using the same random data.

How to take into account the data's uncertainty (standard deviation) when fitting with scipy.linalg.lstsq?

I am trying to surface fit 3d data (z is a function of x and y). I have assymetrical error bars for each point. I would like the fit to take this uncertainty into account.
I am using scipy.linalg.lstsq(). It does not have any option for uncertainties in its arguments.
I am trying to adapt some code found on this page.
import numpy as np
import scipy.linalg
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
# Create data with x and y random over [-2, 2], and z a Gaussian function of x and y.
np.random.seed(12345)
x = 2 * (np.random.random(500) - 0.5)
y = 2 * (np.random.random(500) - 0.5)
def f(x, y):
return np.exp(-(x + y ** 2))
z = f(x, y)
data = np.c_[x,y,z]
# regular grid covering the domain of the data
mn = np.min(data, axis=0)
mx = np.max(data, axis=0)
X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))
XX = X.flatten()
YY = Y.flatten()
# best-fit quadratic curve (2nd-order)
A = np.c_[np.ones(data.shape[0]), data[:,:2], np.prod(data[:,:2], axis=1), data[:,:2]**2]
C,_,_,_ = scipy.linalg.lstsq(A, data[:,2])
# evaluate it on a grid
Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX*YY, XX**2, YY**2], C).reshape(X.shape)
# plot points and fitted surface using Matplotlib
fig = plt.figure(figsize=(10, 10))
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.2)
ax.scatter(data[:,0], data[:,1], data[:,2], c='r', s=50)
plt.xlabel('X')
plt.ylabel('Y')
ax.set_zlabel('Z')
ax.axis('equal')
ax.axis('tight')

Contourf on the faces of a Matplotlib cube

I am trying to 'paint' the faces of a cube with a contourf function using Python Matplotlib. Is this possible?
This is similar idea to what was done here but obviously I cannot use patches. Similarly, I don't think I can use add_collection3d like this as it only supports PolyCollection, LineColleciton and PatchCollection.
I have been trying to use contourf on a fig.gca(projection='3d'). Toy example below.
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
plt.close('all')
fig = plt.figure()
ax = fig.gca(projection='3d')
############################################
# plotting the 'top' layer works okay... #
############################################
X = np.linspace(-5, 5, 43)
Y = np.linspace(-5, 5, 28)
X, Y = np.meshgrid(X, Y)
varone=np.random.rand(75,28,43)
Z=varone[0,:,:]
cset = ax.contourf(X, Y, Z, zdir='z', offset=1,
levels=np.linspace(np.min(Z),np.max(Z),30),cmap='jet')
#see [1]
plt.show()
#################################################
# but now trying to plot a vertical slice.... #
#################################################
plt.close('all')
fig = plt.figure()
ax = fig.gca(projection='3d')
Z=varone[::-1,:,-1]
X = np.linspace(-5, 5, 28)
Y = np.linspace(-5, 5, 75)
X, Y = np.meshgrid(X, Y)
#this 'projection' doesn't result in what I want, I really just want to rotate it
cset = ax.contourf(X, Y, Z, offset=5,zdir='x',
levels=np.linspace(np.min(Z),np.max(Z),30),cmap='jet')
#here's what it should look like....
ax=fig.add_subplot(1, 2,1)
cs1=ax.contourf(X,Y,Z,levels=np.linspace(np.min(Z),np.max(Z),30),cmap='jet')
#see [2]
plt.show()
1 From the example, the top surface comes easily:
2 But I'm not sure how to do the sides. Left side of this plot is what the section should look like (but rotated)...
Open to other python approaches. The data I'm actually plotting are geophysical netcdf files.

You have to assign the data to the right axis. The zig-zag results from the fact that now you are at x = const and have your oscillation in the z-direction (from the random data, which is generated between 0 and 1).
If you you assign the matrixes differently in your example, you end up with the desired result:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
plt.close('all')
fig = plt.figure()
ax = fig.gca(projection='3d')
X = np.linspace(-5, 5, 43)
Y = np.linspace(-5, 5, 28)
X, Y = np.meshgrid(X, Y)
varone=np.random.rand(75,28,43) * 5.0 - 10.0
Z=varone[0,:,:]
cset = [[],[],[]]
# this is the example that worked for you:
cset[0] = ax.contourf(X, Y, Z, zdir='z', offset=5,
levels=np.linspace(np.min(Z),np.max(Z),30),cmap='jet')
# now, for the x-constant face, assign the contour to the x-plot-variable:
cset[1] = ax.contourf(Z, Y, X, zdir='x', offset=5,
levels=np.linspace(np.min(Z),np.max(Z),30),cmap='jet')
# likewise, for the y-constant face, assign the contour to the y-plot-variable:
cset[2] = ax.contourf(X, Z, Y, zdir='y', offset=-5,
levels=np.linspace(np.min(Z),np.max(Z),30),cmap='jet')
# setting 3D-axis-limits:
ax.set_xlim3d(-5,5)
ax.set_ylim3d(-5,5)
ax.set_zlim3d(-5,5)
plt.show()
The result looks like this:

The answer given below is not fully satisfying. Indeed, planes in x, y and z direction reproduce the same field.
Hereafter, a function that allows to represent the correct field in each of the planes.
import numpy as np
import matplotlib.pyplot as plt
def plot_cube_faces(arr, ax):
"""
External faces representation of a 3D array with matplotlib
Parameters
----------
arr: numpy.ndarray()
3D array to handle
ax: Axes3D object
Axis to work with
"""
x0 = np.arange(arr.shape[0])
y0 = np.arange(arr.shape[1])
z0 = np.arange(arr.shape[2])
x, y, z = np.meshgrid(x0, y0, z0)
xmax, ymax, zmax = max(x0), max(y0), max(z0)
vmin, vmax = np.min(arr), np.max(arr)
ax.contourf(x[:, :, 0], y[:, :, 0], arr[:, :, -1].T,
zdir='z', offset=zmax, vmin=vmin, vmax=vmax)
ax.contourf(x[0, :, :].T, arr[:, 0, :].T, z[0, :, :].T,
zdir='y', offset=0, vmin=vmin, vmax=vmax)
ax.contourf(arr[-1, :, :].T, y[:, 0, :].T, z[:, 0, :].T,
zdir='x', offset=xmax, vmin=vmin, vmax=vmax)
x0 = np.arange(30)
y0 = np.arange(20)
z0 = np.arange(10)
x, y, z = np.meshgrid(x0, y0, z0)
arr = (x + y + z) // 10
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
plot_cube_faces(arr, ax)
plt.show()

Contour graph in python

How would I make a countour grid in python using matplotlib.pyplot, where the grid is one colour where the z variable is below zero and another when z is equal to or larger than zero? I'm not very familiar with matplotlib so if anyone can give me a simple way of doing this, that would be great.
So far I have:
x= np.arange(0,361)
y= np.arange(0,91)
X,Y = np.meshgrid(x,y)
area = funcarea(L,D,H,W,X,Y) #L,D,H and W are all constants defined elsewhere.
plt.figure()
plt.contourf(X,Y,area)
plt.show()

You can do this using the levels keyword in contourf.
import numpy as np
import matplotlib.pyplot as plt
fig, axs = plt.subplots(1,2)
x = np.linspace(0, 1, 100)
X, Y = np.meshgrid(x, x)
Z = np.sin(X)*np.sin(Y)
levels = np.linspace(-1, 1, 40)
zdata = np.sin(8*X)*np.sin(8*Y)
cs = axs[0].contourf(X, Y, zdata, levels=levels)
fig.colorbar(cs, ax=axs[0], format="%.2f")
cs = axs[1].contourf(X, Y, zdata, levels=[-1,0,1])
fig.colorbar(cs, ax=axs[1])
plt.show()
You can change the colors by choosing and different colormap; using vmin, vmax; etc.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Multivariate KDE Scipy Stats - what if it's not Gaussian? - python

Related

Gradient 2D plot using contourf

Summing overlapping bubbles with gradient in python

How to take into account the data's uncertainty (standard deviation) when fitting with scipy.linalg.lstsq?

Contourf on the faces of a Matplotlib cube

Contour graph in python

Categories

Resources