How to find point inside an ellipse? - python

I'm trying to find points inside the ellipse. It is not an 'ordinary' ellipse actually it is based on average and standard deviation which is much easier than calculating eigen values in order to find confidence interval
Function is not written by me here are the sources
https://matplotlib.org/devdocs/gallery/statistics/confidence_ellipse.html
https://carstenschelp.github.io/2018/09/14/Plot_Confidence_Ellipse_001.html
Here is the code:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms
x = np.array([21.5,16.3,13.7,20.0,17.4,10.4,16.9,7.0,13.8,15.2,13.8,8.2,18.0,9.4,13.2,7.2,21.2,30.2,13.5,29.8,18.3,20.2,31.1,21.5,29.8,18.0,13.1,24.1,32.5,15.4,16.1,15.0,25.9,3.0,17.0,23.6,17.6,-11.8,22.2,26.6,17.8,20.6,23.0,28.0,25.3,22.1,22.4,16.3,22.0,12.1])
y = np.array([92.4,98.2,97.6,95.9,96.5,92.1,89.6,89.4,89.2,89.4,90.2,86.7,89.5,89.9,90.2,87.6,104.0,87.3,99.4,85.4,92.8,92.0,87.9,96.2,94.1,95.2,95.6,86.3,87.6,89.5,95.0,97.1,93.0,87.8,98.9,98.2,100.1,45.4,92.1,91.6,94.7,93.9,91.4,91.1,95.7,93.8,96.4,94.1,94.0,89.1])
#function obtained from matplotlib documentation
#https://matplotlib.org/devdocs/gallery/statistics/confidence_ellipse.html
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
"""
Create a plot of the covariance confidence ellipse of *x* and *y*.
Parameters
----------
x, y : array-like, shape (n, )
Input data.
ax : matplotlib.axes.Axes
The axes object to draw the ellipse into.
n_std : float
The number of standard deviations to determine the ellipse's radiuses.
**kwargs
Forwarded to `~matplotlib.patches.Ellipse`
Returns
-------
matplotlib.patches.Ellipse
"""
if x.size != y.size:
raise ValueError("x and y must be the same size")
cov = np.cov(x, y)
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
# Using a special case to obtain the eigenvalues of this
# two-dimensionl dataset.
ell_radius_x = np.sqrt(1 + pearson)
ell_radius_y = np.sqrt(1 - pearson)
ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
facecolor=facecolor, **kwargs)
# Calculating the stdandard deviation of x from
# the squareroot of the variance and multiplying
# with the given number of standard deviations.
scale_x = np.sqrt(cov[0, 0]) * n_std
mean_x = np.mean(x)
# calculating the stdandard deviation of y ...
scale_y = np.sqrt(cov[1, 1]) * n_std
mean_y = np.mean(y)
transf = transforms.Affine2D() \
.rotate_deg(45) \
.scale(scale_x, scale_y) \
.translate(mean_x, mean_y)
ellipse.set_transform(transf + ax.transData)
return ax.add_patch(ellipse)
#implementation
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
ax.scatter(x,y,s=5)
ellipse = confidence_ellipse(x,y,ax,n_std=2,edgecolor='red')
plt.show()
Afterwards I tried to find get center coordinates and the points inside the ellipse as below:
ellipse.get_center()
Out:(0,0)
ellipse.contains_point([21.5,92.4])#first points in x,y arrays
Out:False
ellipse.contains_point([0,0])#get_center() result
Out:False
Ellipse plot is working fine but I need every points coordinates inside the ellipse.
What I am doing wrong? I already checked similar questions but they didn't work either.

The confidence_ellipse example function only returns an object for drawing, and the contains point will only tell you if the point is on the ellipse.
What you probably want is something like:
import math
class distribution():
def __init__(self,x,y):
self.cov = np.cov(x, y)
self.mean = np.matrix( [np.mean(x), np.mean(y)])
def dist(self, x,y):
tmp = np.matrix([x,y])
diff = self.mean - tmp
dist = diff * np.linalg.inv(self.cov) * diff.T
return math.sqrt(dist)
def is_inside(self, x,y,nstd=2.0):
if (self.dist(x,y) < nstd):
return True
else:
return False
Then you can do :
d = distribution(x,y)
d.is_inside(24.1,86.3)
Returns true.
Then, for all the points:
points = np.array(list(zip(x, y)))
points_in = list(filter(lambda p: d.is_inside(p[0],p[1]), points))
points_out = list(filter(lambda p: not d.is_inside(p[0],p[1]), points))
x_in = [ x[0] for x in points_in]
y_in = [ x[1] for x in points_in]
x_out = [ x[0] for x in points_out]
y_out = [ x[1] for x in points_out]
fig2, ax2 = plt.subplots(1, 1, figsize=(8, 8))
ax2.scatter(x_in,y_in,s=5, facecolor="green")
ax2.scatter(x_out,y_out, s=5, facecolor="red")
ellipse = confidence_ellipse(x,y,ax2,n_std=2,edgecolor='red') # this presupposes that you still have the confidence_ellipse still defined
plt.show()
And your output should look something like this:
Where the red points are more than 2 standard deviations away, and the green ones are inside.

You can plot all the x,y label on the plot.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms
x = np.array([21.5,16.3,13.7,20.0,17.4,10.4,16.9,7.0,13.8,15.2,13.8,8.2,18.0,9.4,13.2,7.2,21.2,30.2,13.5,29.8,18.3,20.2,31.1,21.5,29.8,18.0,13.1,24.1,32.5,15.4,16.1,15.0,25.9,3.0,17.0,23.6,17.6,-11.8,22.2,26.6,17.8,20.6,23.0,28.0,25.3,22.1,22.4,16.3,22.0,12.1])
y = np.array([92.4,98.2,97.6,95.9,96.5,92.1,89.6,89.4,89.2,89.4,90.2,86.7,89.5,89.9,90.2,87.6,104.0,87.3,99.4,85.4,92.8,92.0,87.9,96.2,94.1,95.2,95.6,86.3,87.6,89.5,95.0,97.1,93.0,87.8,98.9,98.2,100.1,45.4,92.1,91.6,94.7,93.9,91.4,91.1,95.7,93.8,96.4,94.1,94.0,89.1])
#function obtained from matplotlib documentation
#https://matplotlib.org/devdocs/gallery/statistics/confidence_ellipse.html
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
"""
Create a plot of the covariance confidence ellipse of *x* and *y*.
Parameters
----------
x, y : array-like, shape (n, )
Input data.
ax : matplotlib.axes.Axes
The axes object to draw the ellipse into.
n_std : float
The number of standard deviations to determine the ellipse's radiuses.
**kwargs
Forwarded to `~matplotlib.patches.Ellipse`
Returns
-------
matplotlib.patches.Ellipse
"""
if x.size != y.size:
raise ValueError("x and y must be the same size")
cov = np.cov(x, y)
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
# Using a special case to obtain the eigenvalues of this
# two-dimensionl dataset.
ell_radius_x = np.sqrt(1 + pearson)
ell_radius_y = np.sqrt(1 - pearson)
ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
facecolor=facecolor, **kwargs)
# Calculating the stdandard deviation of x from
# the squareroot of the variance and multiplying
# with the given number of standard deviations.
scale_x = np.sqrt(cov[0, 0]) * n_std
mean_x = np.mean(x)
# calculating the stdandard deviation of y ...
scale_y = np.sqrt(cov[1, 1]) * n_std
mean_y = np.mean(y)
transf = transforms.Affine2D() \
.rotate_deg(45) \
.scale(scale_x, scale_y) \
.translate(mean_x, mean_y)
ellipse.set_transform(transf + ax.transData)
return ax.add_patch(ellipse)
#implementation
fig, ax = plt.subplots(1, 1, figsize=(12, 8))
ax.scatter(x,y,s=15)
ellipse = confidence_ellipse(x,y,ax,n_std=2,edgecolor='red')
# zip joins x and y coordinates in pairs
for x,y in zip(x,y):
label = f"({x},{y})"
#label = "{:.2f}".format(y) # plot just y-value of the point
# print(label) # uncomment if you want to print the points for reference
plt.annotate(label, # this is the text
(x,y), # this is the point to label
textcoords="offset points", # how to position the text
xytext=(0,10), # distance from text to points (x,y)
ha='center') # horizontal alignment can be left, right or center
plt.show()
P.S. : You need to adjust your xytext accordingly. That is just the point where this label has plotted in the figure.
You can also print those values for your reference. Just put print(label) and it will print all the points for you.
(21.5,92.4)
(16.3,98.2)
(13.7,97.6)
(20.0,95.9)
(17.4,96.5)
(10.4,92.1)
(16.9,89.6)
(7.0,89.4)
(13.8,89.2)
(15.2,89.4)
(13.8,90.2)
(8.2,86.7)
(18.0,89.5)
(9.4,89.9)
(13.2,90.2)
(7.2,87.6)
(21.2,104.0)
(30.2,87.3)
(13.5,99.4)
(29.8,85.4)
(18.3,92.8)
(20.2,92.0)
(31.1,87.9)
(21.5,96.2)
(29.8,94.1)
(18.0,95.2)
(13.1,95.6)
(24.1,86.3)
(32.5,87.6)
(15.4,89.5)
(16.1,95.0)
(15.0,97.1)
(25.9,93.0)
(3.0,87.8)
(17.0,98.9)
(23.6,98.2)
(17.6,100.1)
(-11.8,45.4)
(22.2,92.1)
(26.6,91.6)
(17.8,94.7)
(20.6,93.9)
(23.0,91.4)
(28.0,91.1)
(25.3,95.7)
(22.1,93.8)
(22.4,96.4)
(16.3,94.1)
(22.0,94.0)
(12.1,89.1)

Related

Ploting multiple Gaussians from pandas file

I am trying to plot multiple gaussians on one plot with different heights, widths and centers from this type of dataframe:
hight(y)
fwhM(width)
centers(x)
24.122348
1.827472
98
24.828252
4.333549
186
26.810812
1.728494
276
25.997897
1.882424
373
24.503944
2.222210
471
27.488572
1.750039
604
31.556823
3.844592
683
27.920951
0.891394
792
27.009054
1.917744
897
Any idea on how to go about it?
We will reuse the Gaussian plotter as defined in
Creating gaussians of fixed width and std
(The code is repeated here)
Data
The following code generates the above dataframe.
data = [
(24.122348, 1.827472, 98),
(24.828252, 4.333549, 186),
(26.810812, 1.728494, 276),
(25.997897, 1.882424, 373),
(24.503944, 2.222210, 471),
(27.488572, 1.750039, 604),
(31.556823, 3.844592, 683),
(27.920951, 0.891394, 792),
(27.009054, 1.917744, 897),
]
df = pd.DataFrame(data, columns=["height", "fwhm", "center"])
Gaussian
Taken from the reference post above.
import matplotlib.cm as mpl_cm
import matplotlib.colors as mpl_colors
import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial.distance import cdist
class Gaussian:
def __init__(self, size):
self.size = size
self.center = np.array(self.size) / 2
self.axis = self._calculate_axis()
def _calculate_axis(self):
"""
Generate a list of rows, columns over multiple axis.
Example:
Input: size=(5, 3)
Output: [array([0, 1, 2, 3, 4]), array([[0], [1], [2]])]
"""
axis = [np.arange(size).reshape(-1, *np.ones(idx, dtype=np.uint8))
for idx, size in enumerate(self.size)]
return axis
def update_size(self, size):
""" Update the size and calculate new centers and axis. """
self.size = size
self.center = np.array(self.size) / 2
self.axis = self._calculate_axis()
def create(self, dim=1, fwhm=3, center=None):
""" Generate a gaussian distribution on the center of a certain width. """
center = center if center is not None else self.center[:dim]
distance = sum((ax - ax_center) ** 2 for ax_center, ax in zip(center, self.axis))
distribution = np.exp(-4 * np.log(2) * distance / fwhm ** 2)
return distribution
def creates(self, dim=2, fwhm=3, centers: np.ndarray = None):
""" Combines multiple gaussian distributions based on multiple centers. """
centers = np.array(centers or np.array([self.center]).T).T
indices = np.indices(self.size).reshape(dim, -1).T
distance = np.min(cdist(indices, centers, metric='euclidean'), axis=1)
distance = np.power(distance.reshape(self.size), 2)
distribution = np.exp(-4 * np.log(2) * distance / fwhm ** 2)
return distribution
#staticmethod
def plot(distribution, show=True):
""" Plotter, in case you do not know the dimensions of your distribution, or want the same interface. """
if len(distribution.shape) == 1:
return Gaussian.plot1d(distribution, show)
if len(distribution.shape) == 2:
return Gaussian.plot2d(distribution, show)
if len(distribution.shape) == 3:
return Gaussian.plot3d(distribution, show)
raise ValueError(f"Trying to plot {len(distribution.shape)}-dimensional data, "
f"Only 1D, 2D, and 3D distributions are valid.")
#staticmethod
def plot1d(distribution, show=True, vmin=None, vmax=None, cmap=None):
norm = mpl_colors.Normalize(
vmin=vmin if vmin is not None else distribution.min(),
vmax=vmax if vmin is not None else distribution.max()
)
cmap = mpl_cm.ScalarMappable(norm=norm, cmap=cmap or mpl_cm.get_cmap('jet'))
cmap.set_array(distribution)
c = [cmap.to_rgba(value) for value in distribution] # defines the color
fig, ax = plt.subplots()
ax.scatter(np.arange(len(distribution)), distribution, c=c)
ax.plot(distribution)
fig.colorbar(cmap)
if show: plt.show()
return fig
#staticmethod
def plot2d(distribution, show=True):
fig, ax = plt.subplots()
img = ax.imshow(distribution, cmap='jet')
fig.colorbar(img)
if show: plt.show()
return fig
#staticmethod
def plot3d(distribution, show=True):
m, n, c = distribution.shape
x, y, z = np.mgrid[:m, :n, :c]
out = np.column_stack((x.ravel(), y.ravel(), z.ravel(), distribution.ravel()))
x, y, z, values = np.array(list(zip(*out)))
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# Standalone colorbar, directly creating colorbar on fig results in strange artifacts.
img = ax.scatter([0, 0], [0, 0], [0, 0], c=[0, 1], cmap=mpl_cm.get_cmap('jet'))
img.set_visible = False
fig.colorbar(img)
ax.scatter(x, y, z, c=values, cmap=mpl_cm.get_cmap('jet'))
if show: plt.show()
return fig
Solution
Since it is unclear to me how you want to the Gaussian distributions to interact when they are in part of multiple widths, I will assume that you want the maximum value.
Then the main logic is that we can now generate a unique Gaussian distribution for every center with given full width half maximum (fwhm), and take the maximum of all the distrubutions.
distribution = np.zeros((1200,))
df = pd.DataFrame(data, columns=["height", "fwhm", "center"])
gaussian = Gaussian(size=distribution.shape)
for idx, row in df.iterrows():
distribution = np.maximum(distribution, gaussian.create(fwhm=row.fwhm, center=[row.center]))
gaussian.plot(distribution, show=True)
Result
Edit
Since the question now asks for a different distribution, you can adjust the code in the create (and creates) method with the following to get different types of distributions:
def create(self, dim=1, fwhm=3, center=None):
""" Generate a gaussian distribution on the center of a certain width. """
center = center if center is not None else self.center[:dim]
distance = sum((ax - ax_center) for ax_center, ax in zip(center, self.axis))
distribution = sps.beta.pdf(distance / max(distance), a=3, b=100)
return distribution
Where sps.beta comes from import scipy.stats as sps, and can be changed with a gamma distribution as well. e.g. distribution = sps.gamma.pdf(distance, 10, 40).
Note that the distance is no longer squared, and that the argument fwhm, could be replaced by the parameters required for the distribution.

Unable to plot an accurate tangent to a curvature in Python

I have a dataset for curvature and I need to find the tangent to the curve but unfortunately, this is a bit far from the curve. Kindly guide me the issue solution related to the problem. Thank you!
My code is as follows:
fig, ax1 = plt.subplots()
chData_m = efficient.get('Car.Road.y')
x_fit = chData_m.timestamps
y_fit = chData_m.samples
fittedParameters = np.polyfit(x_fit[:],y_fit[:],1)
f = plt.figure(figsize=(800/100.0, 600/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(x_fit, y_fit, 'D')
# create data for the fitted equation plot
xModel = np.linspace(min(x_fit), max(x_fit))
yModel = np.polyval(fittedParameters, xModel)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
# polynomial derivative from numpy
deriv = np.polyder(fittedParameters)
# for plotting
minX = min(x_fit)
maxX = max(x_fit)
# value of derivative (slope) at a specific X value, so
# that a straight line tangent can be plotted at the point
# you might place this code in a loop to animate
pointVal = 10.0 # example X value
y_value_at_point = np.polyval(fittedParameters, pointVal)
slope_at_point = np.polyval(deriv, pointVal)
ylow = (minX - pointVal) * slope_at_point + y_value_at_point
yhigh = (maxX - pointVal) * slope_at_point + y_value_at_point
# now the tangent as a line plot
axes.plot([minX, maxX], [ylow, yhigh])
plt.show()
plt.close('all') # clean up after using pyplot
and the output is:
I am not sure how you wanted to use numpy polyfit/polyval to determine the tangent formula. I describe here a different approach. The advantage of this approach is that it does not have any assumptions about the nature of the function. The disadvantage is that it will not work for vertical tangents.
To be on the safe side, I have considered both cases, i.e., that the evaluated x-value is a data point in your series and that it is not. Some problems may arise because I see that you mention timestamps in your question without specifying their nature by providing a toy dataset - so, this version may or may not work with the datetime objects or timestamps of your original data:
import matplotlib.pyplot as plt
import numpy as np
#generate fake data with unique random x-values between 0 and 70
def func(x, a=0, b=100, c=1, n=3.5):
return a + (b/(1+(c/x)**n))
np.random.seed(123)
x = np.random.choice(range(700000), 100)/10000
x.sort()
y = func(x, 1, 2, 15, 2.4)
#data point to evaluate
xVal = 29
#plot original data
fig, ax = plt.subplots()
ax.plot(x, y, c="blue", label="data")
#calculate gradient
slope = np.gradient(y, x)
#determine slope and intercept at xVal
ind1 = (np.abs(x - xVal)).argmin()
#case 1 the value is a data point
if xVal == x[ind1]:
yVal, slopeVal = y[ind1], slope[ind1]
#case 2 the value lies between to data points
#in which case we approximate linearly from the two nearest data points
else:
if xVal < x[ind1]:
ind1, ind2 = ind1-1, ind1
else:
ind1, ind2 = ind1, ind1+1
yVal = y[ind1] + (y[ind2]-y[ind1]) * (xVal-x[ind1]) / (x[ind2]-x[ind1])
slopeVal = slope[ind1] + (slope[ind2]-slope[ind1]) * (xVal-x[ind1]) / (x[ind2]-x[ind1])
intercVal = yVal - slopeVal * xVal
ax.plot([x.min(), x.max()], [slopeVal*x.min()+intercVal, slopeVal*x.max()+intercVal], color="green",
label=f"tangent\nat point [{xVal:.1f}, {yVal:.1f}]\nwith slope {slopeVal:.2f}\nand intercept {intercVal:.2f}" )
ax.set_ylim(0.8 * y.min(), 1.2 * y.max())
ax.legend()
plt.show()

How to combine two outputs of KernelDensity from sklearn (Python)

I want to multiply two Kernel Density Estimates to get a composite likelihood ... The multiplication operator doesn't exist for sklearn.
I have KDR for data from 3 independent sources and I want multiply KDE from each sources. The below code is from https://scikit-learn.org/stable/auto_examples/neighbors/plot_species_kde.html#sphx-glr-auto-examples-neighbors-plot-species-kde-py and should run easily.
Although I made some small modification to get get two KDEs and then multiply them which fails.
Someone can help. Below code should run without error except the multiplication part.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_species_distributions
from sklearn.neighbors import KernelDensity
# if basemap is available, we'll use it.
# otherwise, we'll improvise later...
try:
from mpl_toolkits.basemap import Basemap
basemap = True
except ImportError:
basemap = False
def construct_grids(batch):
"""Construct the map grid from the batch object
Parameters
----------
batch : Batch object
The object returned by :func:`fetch_species_distributions`
Returns
-------
(xgrid, ygrid) : 1-D arrays
The grid corresponding to the values in batch.coverages
"""
# x,y coordinates for corner cells
xmin = batch.x_left_lower_corner + batch.grid_size
xmax = xmin + (batch.Nx * batch.grid_size)
ymin = batch.y_left_lower_corner + batch.grid_size
ymax = ymin + (batch.Ny * batch.grid_size)
# x coordinates of the grid cells
xgrid = np.arange(xmin, xmax, batch.grid_size)
# y coordinates of the grid cells
ygrid = np.arange(ymin, ymax, batch.grid_size)
return (xgrid, ygrid)
# Get matrices/arrays of species IDs and locations
data = fetch_species_distributions()
species_names = ['Bradypus Variegatus', 'Microryzomys Minutus']
Xtrain = np.vstack([data['train']['dd lat'], data['train']['dd long']]).T
ytrain = np.array([d.decode('ascii').startswith('micro')
for d in data['train']['species']], dtype='int')
Xtrain *= np.pi / 180. # Convert lat/long to radians
# Set up the data grid for the contour plot
xgrid, ygrid = construct_grids(data)
X, Y = np.meshgrid(xgrid[::5], ygrid[::5][::-1])
land_reference = data.coverages[6][::5, ::5]
land_mask = (land_reference > -9999).ravel()
xy = np.vstack([Y.ravel(), X.ravel()]).T
xy = xy[land_mask]
xy *= np.pi / 180.
# Plot map of South America with distributions of each species
fig = plt.figure()
fig.subplots_adjust(left=0.05, right=0.95, wspace=0.05)
kde0 = KernelDensity(bandwidth=0.04, metric='haversine',
kernel='gaussian', algorithm='ball_tree')
kde0.fit(Xtrain[ytrain == 0])
kde1 = KernelDensity(bandwidth=0.04, metric='haversine',
kernel='gaussian', algorithm='ball_tree')
kde1.fit(Xtrain[ytrain == 1])
kde01=kde0*kde1
plt.subplot(1, 1, 1)
# evaluate only on the land: -9999 indicates ocean
Z = np.full(land_mask.shape[0], -9999, dtype='int')
Z[land_mask] = np.exp(kde01.score_samples(xy))
Z = Z.reshape(X.shape)
# plot contours of the density
levels = np.linspace(0, Z.max(), 25)
plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)
plt.show()

PCA projection centroids and ellipsis

I'm currently working on my PhD, and I wondering if somebody using PCA projection have ideas on displaying some more information, that some library in R can print by default. See an example of STHDA
What the best way to do it?
I planned to ask for tips, but I found some answers myself to produce same result on Python.
What I have done is:
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
"""
Create a plot of the covariance confidence ellipse of `x` and `y`
Parameters
----------
x, y : array_like, shape (n, )
Input data.
ax : matplotlib.axes.Axes
The axes object to draw the ellipse into.
n_std : float
The number of standard deviations to determine the ellipse's radiuses.
Returns
-------
matplotlib.patches.Ellipse
Other parameters
----------------
kwargs : `~matplotlib.patches.Patch` properties
"""
if x.size != y.size:
raise ValueError("x and y must be the same size")
cov = np.cov(x, y)
pearson = cov[0, 1] / np.sqrt(cov[0, 0] * cov[1, 1])
# Using a special case to obtain the eigenvalues of this
# two-dimensionl dataset.
ell_radius_x = np.sqrt(1 + pearson)
ell_radius_y = np.sqrt(1 - pearson)
ellipse = Ellipse((0, 0),
width=ell_radius_x * 2,
height=ell_radius_y * 2,
facecolor=facecolor,
**kwargs)
# Calculating the stdandard deviation of x from
# the squareroot of the variance and multiplying
# with the given number of standard deviations.
scale_x = np.sqrt(cov[0, 0]) * n_std
mean_x = np.mean(x)
# calculating the stdandard deviation of y ...
scale_y = np.sqrt(cov[1, 1]) * n_std
mean_y = np.mean(y)
transf = transforms.Affine2D() \
.rotate_deg(45) \
.scale(scale_x, scale_y) \
.translate(mean_x, mean_y)
ellipse.set_transform(transf + ax.transData)
return ax.add_patch(ellipse)
method = PCA(n_components=2, whiten=True) # project to 2 dimensions
projected = method.fit_transform(np.array(inputs[tags['datum']].tolist()))
figure = pyplot.figure()
axis = figure.add_subplot(111)
# Display data
for label in labels:
color = np.expand_dims(np.array(settings.get_color(label)), axis=0)
pyplot.scatter(projected[labels == label, 0], projected[labels == label, 1],
c=color, alpha=0.5, label=label, edgecolor='none')
# Centroids
for label in labels:
# Centroids
color = np.array(settings.get_color(label))
# Ellipsis
Views.confidence_ellipse(projected[labels == label, 0], projected[labels == label, 1], axis,
edgecolor=color, linewidth=3, zorder=0)
The confidence_ellipse came from matplotlib example.

How to create a plot with a repeating color pattern?

For my report, I'm creating a special color plot in jupyter notebook. There are two parameters, x and y.
import numpy as np
x = np.arange(-1,1,0.1)
y = np.arange(1,11,1)
with which I compute a third quantity. Here is an example to demonstrate the concept:
values = []
for i in range(len(y)) :
z = y[i] * x**3
# in my case the value z represents phases of oscillators
# so I will transform the computed values to the intervall [0,2pi)
values.append(z)
values = np.array(values) % 2*np.pi
I'm plotting y vs x. For each y = 1,2,3,4... there will be a horizontal line with total length two. For example: The coordinate (0.5,8) stands for a single point on line 8 at position x = 0.5 and z(0.5,8) is its associated value.
Now I want to represent each point on all ten lines with a unique color that is determined by z(x,y). Since z(x,y) takes only values in [0,2pi) I need a color scheme that starts at zero (for example z=0 corresponds to blue). For increasing z the color continuously changes and in the end at 2pi it takes the same color again (so at z ~ 2pi it becomes blue again).
Does someone know how this can be done in python?
The kind of structure for x, y and z you need, is easier using a meshgrid. Also, to have a lot of x-values between -1 and 1, np.linspace(-1,1,N) divides the range in N even intervals.
Using meshgrid, z can be calculated in one line using numpy's vectorization. This runs much faster.
To set a repeating color, a cyclic colormap such as hsv can be used. There the last color is the same as the starting color.
import numpy as np
from matplotlib import pyplot as plt
x, y = np.meshgrid(np.linspace(-1,1,100), np.arange(1,11,1))
z = (y * x**3) % 2*np.pi
plt.scatter(x, y, c=z, s=6, cmap='hsv')
plt.yticks(range(1,11))
plt.show()
Alternatively, a symmetric colormap could be built taken the colors from and existing map and combining them with the same colors in reverse order.
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.colors as mcolors
colors_orig = plt.cm.viridis_r(np.linspace(0, 1, 128))
# combine the colors with the reversed array and build a new colormap
colors = np.vstack((colors_orig, colors_orig[::-1]))
symcmap = mcolors.LinearSegmentedColormap.from_list('symcmap', colors)
x, y = np.meshgrid(np.linspace(-1,1,100), np.arange(1,11,1))
z = (y * x**3) % 2*np.pi
plt.scatter(x, y, c=z, s=6, cmap=symcmap)
plt.yticks(range(1,11))
plt.show()
Multicolored lines are somewhat more complicated than just scatter plots. The docs have an example using LineCollections. Here is the adapted code. Note that the line segments are colored using their start point, so make sure there are enough x values. Also, the x and y limits aren't set automatically any more.
The code also adds a colorbar to illustrate how the colors map to the z values. Some interesting code from Jake VanderPlas shows how to create ticks for multiples of π.
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
# code from Jake VanderPlas
def format_func(value, tick_number):
# find number of multiples of pi/2
N = int(np.round(2 * value / np.pi))
if N == 0:
return "0"
elif N == 1:
return r"$\pi/2$"
elif N == 2:
return r"$\pi$"
elif N % 2 > 0:
return r"${0}\pi/2$".format(N)
else:
return r"${0}\pi$".format(N // 2)
x = np.linspace(-1, 1, 500)
y_max = 10
# Create a continuous norm to map from data points to colors
norm = plt.Normalize(0, 2 * np.pi)
for y in range(1, y_max + 1):
z = (y * x ** 3) % 2 * np.pi
y_array = y * np.ones_like(x)
points = np.array([x, y_array]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap='hsv', norm=norm)
lc.set_array(z) # Set the values used for colormapping
lc.set_linewidth(2)
line = plt.gca().add_collection(lc)
# plt.scatter(x, y_array, c=z, s=10, norm=norm, cmap='hsv')
cbar = plt.colorbar(line) # , ticks=[k*np.pi for k in np.arange(0, 2.001, 0.25)])
cbar.locator = plt.MultipleLocator(np.pi / 2)
cbar.minor_locator = plt.MultipleLocator(np.pi / 4)
cbar.formatter = plt.FuncFormatter(format_func)
cbar.ax.minorticks_on()
cbar.update_ticks()
plt.yticks(range(1, y_max + 1)) # one tick for every y
plt.xlim(x.min(), x.max()) # the LineCollection doesn't force the limits
plt.ylim(0.5, y_max + 0.5)
plt.show()

Categories

Resources