extracting data from sns.kdeplot python - python

Is it possible to extract the data from a sns.kdeplot() before plotting?
ie. without using the function
y.get_lines()[0].get_data() post plotting

This can be done by extracting the line data from the matplotlib Axes object:
import numpy as np
from seaborn import kdeplot
my_data = np.random.randn(1000)
my_kde = kdeplot(my_data)
line = my_kde.lines[0]
x, y = line.get_data()
fig, ax = plt.subplots()
ax.plot(x[x>0], y[x>0])
alternatively the statsmodels way:
import statsmodels.api as sm
dens = sm.nonparametric.KDEUnivariate(np.random.randn(1000))
dens.fit()
x =np.linspace(0,1,100) #restrict range to (0,1)
y = dens.evaluate(x)
plt.plot(x,y)

Based on statsmodels's documentation:
import numpy as np
import seaborn as sns
import statsmodels.api as sm
import matplotlib.pyplot as plt
# generate bimodal disrtibution
X1 = np.random.normal(100, 10, 250)
X2 = np.random.normal(10, 20, 250)
X = np.concatenate([X1, X2])
# get density from seaborn
x, y = sns.kdeplot(X).lines[0].get_data()
# get density from statsmodel
kde = sm.nonparametric.KDEUnivariate(X).fit()
xx, yy = (kde.support, kde.density)
# compare outputs
plt.plot(x, y, label='from sns')
plt.plot(xx, yy, label='from statsmodels')
plt.legend()

Related

How to plot property distribution with interpolation?

I have a dataframe like this:
import random
import matplotlib.pyplot as plt
plt.style.use('ggplot')
fig = plt.figure(figsize=(16,8))
import pandas as pd
data = pd.DataFrame({"X":random.sample(range(530000, 560000), 60),
"Y":random.sample(range(8580000, 8620000), 60),
"PROPERTY":random.choices(range(0, 30), k=60)})
I saw an example where I could plot my PROPERTY along X and Y coordinates as a triangle spatial distribution:
x = data["X"]
y = data["Y"]
z = data["PROPERTY"]
# Plot Triangular Color Filled Contour
plt.tricontourf(x, y, z, cmap="rainbow")
plt.colorbar()
plt.tricontour(x, y, z)
# Set well shapes
plt.scatter(x, y, color='black')
plt.xlabel("X")
plt.ylabel("Y")
Althoug I would like to plot it as a different map type, not with these abrupt data transitions. Maybe like kriging or smooth interpolation like this example:
Anyone could show me an example?
I used the pykrige package to interpolate the point data into a grid field.
The code and output figure are here.
import random
import matplotlib.pyplot as plt
plt.style.use('ggplot')
fig = plt.figure(figsize=(6,4))
import pandas as pd
from pykrige import OrdinaryKriging
import numpy as np
random.seed(100)
data = pd.DataFrame({"X":random.sample(range(530000, 560000), 60),
"Y":random.sample(range(8580000, 8620000), 60),
"PROPERTY":random.choices(range(0, 30), k=60)})
x = data["X"]
y = data["Y"]
z = data["PROPERTY"]
x1 = np.linspace(530000.,560000,700)
y1 = np.linspace(8580000,8620000,400)
dict1= {'sill': 1, 'range': 6500.0, 'nugget': .1}
OK = OrdinaryKriging(x,y,z,variogram_model='gaussian',
variogram_parameters=dict1,nlags=6)
zgrid,ss = OK.execute('grid',x1,y1)
xgrid,ygrid = np.meshgrid(x1,y1)
# Plot Triangular Color Filled Contour
# plt.tricontourf(x, y, z, cmap="rainbow")
plt.contourf(xgrid, ygrid, zgrid, cmap="rainbow")
plt.colorbar()
# Set well shapes
plt.scatter(x, y, color='black')
plt.xlabel("X")
plt.ylabel("Y")

Mpl toolkits plot_surface not showing any output

I am trying to plot z transforms of some signals using the mpl_toolkits in python, but the output is totally blank. What am I doing wrong? The input numpy arrays have non-zero values. Here is my code:
import numpy as np
import matplotlib.pyplot as plt
import math
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
# initialize r and theta
r = 10
theta = r*np.linspace(-math.pi,math.pi,100)
theta = np.meshgrid(theta,theta)[0]
# calculate z
z = r*(np.cos(theta) + 1j*np.sin(theta))
# calculate z transform for first signal
xs1 = np.abs(z/(z-2))
# calculate z transform for second signal
xs2 = np.abs((np.power(z,3)+2*np.power(z,2)+3*z+3)/np.power(z,3))
# plot the transforms
fig1 = plt.figure(0)
ax1 = fig1.add_subplot(111, projection='3d')
fig2 = plt.figure(1)
ax2 = fig2.add_subplot(111, projection='3d')
ax1.plot_surface(z.real,z.imag,xs1,cmap = cm.coolwarm)
ax2.plot_surface(z.real,z.imag,xs2,cmap = cm.coolwarm)
plt.show()
Here is one of the output:

How to extend the regression line in plot?

I did a cubic regression on the data below. How can I plot the regression line with x value starting from 0 rather than the minimum x?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
df = pd.DataFrame({'x':list(range(3,18)),'y':[-4,-2,0,3,5,8,12,17,21,23,24,25,26,26,24]})
x = df['x'].values.reshape(-1,1)
y = df['y'].values.reshape(-1,1)
cubic = PolynomialFeatures(degree=3)
x_cubic = cubic.fit_transform(x)
cubic.fit(x_cubic, y)
model = LinearRegression()
model.fit(x_cubic, y)
fig, ax = plt.subplots()
ax.scatter(x, y, color = 'blue')
pred = model.predict(cubic.fit_transform(x))
ax.plot(x, pred, color = 'red')
ax.set_xlim(0)
ax.set_ylim(-20)
This is what I have now.
How can I get a plot like this?
Try creating and extended x range like this and predicting with your existing model. Add this to the bottom of your code.
ex_x = np.arange(0,4).reshape(-1,1)
ex_pred = model.predict(cubic.fit_transform(ex_x))
ax.plot(ex_x, ex_pred, color='red', linestyle='--')
Output:

I'm trying to plot two functions on one plot

This is the code I have so far, I'm trying to set the y limit to be [0,4] and the x limit to be [-2,3]. I can take care of the plot titles myself but I can't figure out how to get these two functions on the same graph.
import math as m
from matplotlib import pylab as plt
import numpy as np
def fermi_dirac(x):
fermi_result = (1/(np.exp(x)+1))
return fermi_result
def bose_einstein(x):
bose_result = (1/(np.exp(x)-1))
return bose_result
Here is a template to get you going
import math as m
import matplotlib.pyplot as plt
import numpy as np
def fermi_dirac(x):
fermi_result = (1./(np.exp(x)+1))
return fermi_result
def bose_einstein(x):
bose_result = (1/(np.exp(x)-1))
return bose_result
x = np.linspace( -2,3, 100)
fd = fermi_dirac(x)
be = bose_einstein(x)
plt.figure()
plt.plot(x, fd, label='fermi dirac')
plt.plot(x, be, label ='bose einstein')
plt.legend(loc='best')
plt.show()
Here's what I did and it works fine with the exception of a divide by zero error for certain values (I'm assuming graphical asymptotes):
import matplotlib.pyplot as plt
import numpy as np
def fermi_dirac(x):
fermi_result = (1/(np.exp(x)+1))
return fermi_result
def bose_einstein(x):
bose_result = (1/(np.exp(x)-1))
return bose_result
f = plt.figure()
x_vals = range(-2,3)
plt.plot(x_vals, fermi_dirac(x_vals))
plt.plot(x_vals, bose_einstein(x_vals))
plt.show()
Here's the documentation for pyplot when you need more references: https://matplotlib.org/api/_as_gen/matplotlib.pyplot.html
To get those functions on the same plot, just use plt.plot(...) two times.
Reference: How to plot multiple functions on the same figure, in Matplotlib?
import math as m
from matplotlib import pylab as plt
import numpy as np
def fermi_dirac(x):
fermi_result = (1/(np.exp(x)+1))
return fermi_result
def bose_einstein(x):
bose_result = (1/(np.exp(x)-1))
return bose_result
x = np.linspace(-2, 3, 100)
y1 = fermi_dirac(x)
y2 = bose_einstein(x)
plt.plot(x, y1, 'r')
plt.plot(x, y2, 'b')
plt.ylim(0, 4)
plt.show()
Output:
Very simple, you just have to define an array of input values (that you can call x). Here's an example with 1000 such values, input as a line plot using both formulas and the axis ranges you provided:
x = np.linspace(-2, 3, 1000)
plt.xlim([-2, 3])
plt.ylim([0,4])
plt.plot(x, fermi_dirac(x), '-', x, bose_einstein(x), '--')
plt.show()

Partial shade of distribution plot using Seaborn

Following simple code:
import numpy as np
import seaborn as sns
dist = np.random.normal(loc=0, scale=1, size=1000)
ax = sns.kdeplot(dist, shade=True);
Yields the following image:
I would like to only shade everything right (or left to some x value). What is the simplest way? I am ready to use something other than Seaborn.
After calling ax = sns.kdeplot(dist, shade=True), the last line in ax.get_lines() corresponds to the kde density curve:
ax = sns.kdeplot(dist, shade=True)
line = ax.get_lines()[-1]
You can extract the data corresponding to that curve using line.get_data:
x, y = line.get_data()
Once you have the data, you can, for instance, shade the region corresponding to x > 0 by selecting those points and calling ax.fill_between:
mask = x > 0
x, y = x[mask], y[mask]
ax.fill_between(x, y1=y, alpha=0.5, facecolor='red')
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
dist = np.random.normal(loc=0, scale=1, size=1000)
ax = sns.kdeplot(dist, shade=True)
line = ax.get_lines()[-1]
x, y = line.get_data()
mask = x > 0
x, y = x[mask], y[mask]
ax.fill_between(x, y1=y, alpha=0.5, facecolor='red')
plt.show()
Using seaborn is often fine for standard plots, but when some customized requirements come into play, falling back to matplotlib is often easier.
So one may first calculate the kernel density estimate and then plot it in the region of interest.
import scipy.stats as stats
import numpy as np
import matplotlib.pyplot as plt
plt.style.use("seaborn-darkgrid")
dist = np.random.normal(loc=0, scale=1, size=1000)
kde = stats.gaussian_kde(dist)
# plot complete kde curve as line
pos = np.linspace(dist.min(), dist.max(), 101)
plt.plot(pos, kde(pos))
# plot shaded kde only right of x=0.5
shade = np.linspace(0.5,dist.max(), 101)
plt.fill_between(shade,kde(shade), alpha=0.5)
plt.ylim(0,None)
plt.show()

Categories

Resources