Unable to plot an accurate tangent to a curvature in Python - python

I have a dataset for curvature and I need to find the tangent to the curve but unfortunately, this is a bit far from the curve. Kindly guide me the issue solution related to the problem. Thank you!
My code is as follows:
fig, ax1 = plt.subplots()
chData_m = efficient.get('Car.Road.y')
x_fit = chData_m.timestamps
y_fit = chData_m.samples
fittedParameters = np.polyfit(x_fit[:],y_fit[:],1)
f = plt.figure(figsize=(800/100.0, 600/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(x_fit, y_fit, 'D')
# create data for the fitted equation plot
xModel = np.linspace(min(x_fit), max(x_fit))
yModel = np.polyval(fittedParameters, xModel)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
# polynomial derivative from numpy
deriv = np.polyder(fittedParameters)
# for plotting
minX = min(x_fit)
maxX = max(x_fit)
# value of derivative (slope) at a specific X value, so
# that a straight line tangent can be plotted at the point
# you might place this code in a loop to animate
pointVal = 10.0 # example X value
y_value_at_point = np.polyval(fittedParameters, pointVal)
slope_at_point = np.polyval(deriv, pointVal)
ylow = (minX - pointVal) * slope_at_point + y_value_at_point
yhigh = (maxX - pointVal) * slope_at_point + y_value_at_point
# now the tangent as a line plot
axes.plot([minX, maxX], [ylow, yhigh])
plt.show()
plt.close('all') # clean up after using pyplot
and the output is:

I am not sure how you wanted to use numpy polyfit/polyval to determine the tangent formula. I describe here a different approach. The advantage of this approach is that it does not have any assumptions about the nature of the function. The disadvantage is that it will not work for vertical tangents.
To be on the safe side, I have considered both cases, i.e., that the evaluated x-value is a data point in your series and that it is not. Some problems may arise because I see that you mention timestamps in your question without specifying their nature by providing a toy dataset - so, this version may or may not work with the datetime objects or timestamps of your original data:
import matplotlib.pyplot as plt
import numpy as np
#generate fake data with unique random x-values between 0 and 70
def func(x, a=0, b=100, c=1, n=3.5):
return a + (b/(1+(c/x)**n))
np.random.seed(123)
x = np.random.choice(range(700000), 100)/10000
x.sort()
y = func(x, 1, 2, 15, 2.4)
#data point to evaluate
xVal = 29
#plot original data
fig, ax = plt.subplots()
ax.plot(x, y, c="blue", label="data")
#calculate gradient
slope = np.gradient(y, x)
#determine slope and intercept at xVal
ind1 = (np.abs(x - xVal)).argmin()
#case 1 the value is a data point
if xVal == x[ind1]:
yVal, slopeVal = y[ind1], slope[ind1]
#case 2 the value lies between to data points
#in which case we approximate linearly from the two nearest data points
else:
if xVal < x[ind1]:
ind1, ind2 = ind1-1, ind1
else:
ind1, ind2 = ind1, ind1+1
yVal = y[ind1] + (y[ind2]-y[ind1]) * (xVal-x[ind1]) / (x[ind2]-x[ind1])
slopeVal = slope[ind1] + (slope[ind2]-slope[ind1]) * (xVal-x[ind1]) / (x[ind2]-x[ind1])
intercVal = yVal - slopeVal * xVal
ax.plot([x.min(), x.max()], [slopeVal*x.min()+intercVal, slopeVal*x.max()+intercVal], color="green",
label=f"tangent\nat point [{xVal:.1f}, {yVal:.1f}]\nwith slope {slopeVal:.2f}\nand intercept {intercVal:.2f}" )
ax.set_ylim(0.8 * y.min(), 1.2 * y.max())
ax.legend()
plt.show()

Related

Gaussian Fit function anomaly

I have written a code to fit the gaussian function in a dataset by scipy curve_fit. There are a few different datasets. One with 19 points and one with 21 points and both of them include different datasets in range of 0.5-0.7, 1.0-1.2 and 1.5-1.7.
Surprisingly, when I ran the code in 19 point datasets, all three of them executed successfully but in case of 21 point datasets, only 1.5-1.7 ranged data had the right fit. All others were given with horribly wrong fit.
Here is the code.
#function declaration
def gauss(x, amp, mu, sigma):
y = amp*np.exp(-(x-mu)**2/(2*sigma**2))
return y
#fitting
popt, pcov = curve_fit(f = gauss, xdata = x, ydata = y)
#print(popt)
amp = popt[0]
mu = popt[1]
sigma = popt[2]
print(amp,mu,sigma)
#krypton value
krypton_y = amp/((math.exp(1))**2)
#print(krypton_y)
krypton_x1 = mu + math.sqrt((-2*(sigma**2))*math.log(krypton_y/amp))
krypton_x2 = mu - math.sqrt((-2*(sigma**2))*math.log(krypton_y/amp))
print(krypton_x1-krypton_x2)
#print(gauss([krypton_x1, krypton_x2], popt[0], popt[1], popt[2]))
#horizontal line
horizontal_x = np.arange(min(x)-0.01, max(x)+0.02, 0.01)
horizontal_y = np.repeat(0, len(horizontal_x))
#build fit set
x_test = np.arange(min(x), max(x), 0.0000001)
y_test = gauss(x_test, popt[0], popt[1], popt[2])
y_krypton = []
for i in horizontal_x:
y_krypton.append(krypton_y)
#Vertical lines
vertical_y = np.arange(-20, amp+20, 0.01)
l = len(vertical_y)
vertical_mean = np.repeat(mu, l)
#fit data
fig = plt.figure()
fig = plt.scatter(x,y, label ='original data', color = 'red', marker = 'x')
fig = plt.plot(x_test, y_test, label = 'Gaussian fit curve')
fig = plt.plot(horizontal_x, y_krypton, color = '#830000', linewidth = 1)
fig = plt.plot(vertical_mean, vertical_y, color = '#0011ed')
fig = plt.xlabel('Distance in mm')
fig = plt.ylabel('Current in nA')
fig = plt.title('Intensity Profile for '+gas+' laser | Z = '+str(z)+'cm')
fig = plt.scatter(mu, amp, s = 25, color = '#0011ed')
fig = plt.scatter(krypton_x1, krypton_y, s = 25, color = '#830000')
fig = plt.scatter(krypton_x2, krypton_y, s = 25, color = '#830000')
plt.annotate('('+"{:.4f}".format(mu)+','+"{:.4f}".format(amp)+')', (mu, amp), xytext = (mu+0.002,amp+0.5))
plt.annotate('('+"{:.4f}".format(krypton_x1)+','+"{:.4f}".format(krypton_y)+')', (krypton_x1, krypton_y), xytext = (krypton_x1+0.002,krypton_y+0.5))
plt.annotate('('+"{:.4f}".format(krypton_x2)+','+"{:.4f}".format(krypton_y)+')', (krypton_x2, krypton_y), xytext = (krypton_x2+0.002,krypton_y+0.5))
plt.legend()
plt.margins(0)
plt.show()
I am also adding two images, the correct fit and the wrong fit.
In order to make clear the difficulty we will use an elementary regression method.
We see that the fitting involves ln(y) which is infinite at the points k<6 and k>16. Those points cannot be used for the numerical calculus. Also the point k=16 is not reliable because the small value of y=0.001 is not accurate enough (only one sigificative digit). So, we use only the points from k=6 to k=15 in the next calculus.
This shows that the non-significative points have to be eliminated. Of course more sophisticated methods implemented in nonlinear regression package with iterative calculus gives better fitting according to some particular criteria of fitting specified in the software.

Large Dataset Polynomial Fitting Using Numpy

I'm trying to fit a second order polynomial to raw data and output the results using Matplotlib. There are about a million points in the data set that I'm trying to fit. It is supposed to be simple, with many examples available around the web. However for some reason I cannot get it right.
I get the following warning message:
RankWarning: Polyfit may be poorly conditioned
This is my output:
This is output using Excel:
See below for my code. What am I missing??
xData = df['X']
yData = df['Y']
xTitle = 'X'
yTitle = 'Y'
title = ''
minX = 100
maxX = 300
minY = 500
maxY = 2200
title_font = {'fontname':'Arial', 'size':'30', 'color':'black', 'weight':'normal',
'verticalalignment':'bottom'} # Bottom vertical alignment for more space
axis_font = {'fontname':'Arial', 'size':'18'}
#Poly fit
# calculate polynomial
z = np.polyfit(xData, yData, 2)
f = np.poly1d(z)
print(f)
# calculate new x's and y's
x_new = xData
y_new = f(x_new)
#Plot
plt.scatter(xData, yData,c='#002776',edgecolors='none')
plt.plot(x_new,y_new,c='#C60C30')
plt.ylim([minY,maxY])
plt.xlim([minX,maxX])
plt.xlabel(xTitle,**axis_font)
plt.ylabel(yTitle,**axis_font)
plt.title(title,**title_font)
plt.show()
The array to plot must be sorted. Here is a comparisson between plotting a sorted and an unsorted array. The plot in the unsorted case looks completely distorted, however, the fitted function is of course the same.
2
-3.496 x + 2.18 x + 17.26
import matplotlib.pyplot as plt
import numpy as np; np.random.seed(0)
x = (np.random.normal(size=300)+1)
fo = lambda x: -3*x**2+ 1.*x +20.
f = lambda x: fo(x) + (np.random.normal(size=len(x))-0.5)*4
y = f(x)
fig, (ax, ax2) = plt.subplots(1,2, figsize=(6,3))
ax.scatter(x,y)
ax2.scatter(x,y)
def fit(ax, x,y, sort=True):
z = np.polyfit(x, y, 2)
fit = np.poly1d(z)
print(fit)
ax.set_title("unsorted")
if sort:
x = np.sort(x)
ax.set_title("sorted")
ax.plot(x, fo(x), label="original func", color="k", alpha=0.6)
ax.plot(x, fit(x), label="fit func", color="C3", alpha=1, lw=2.5 )
ax.legend()
fit(ax, x,y, sort=False)
fit(ax2, x,y, sort=True)
plt.show()
The problem is probably using a power basis for data that is displaced some distance from zero along the x axis. If you use the Polynomial class from numpy.polynomial it will scale and shift the data before the fit, which will help, and also keep track of the scale and shift used. Note that if you want the coefficients in the normal form you will need to convert to that form.

Generate Dataset from plot

Hello I have come across a problem where I need to generate dataset from a distribution given on a scatter plot where datapoints are mostly centred around the centre of the circle and also surrounded within particular radius of the circle.Any ideas of generating such datasets in python ?
One way of producing a distribution over a circular shape is to sample a one dimensional distribution and then stretch it over the 2 Pi circonference of a circle.
One could then decide to use a uniform or a normal distribution.
import matplotlib.pyplot as plt
import numpy as np
def dist(R=4., width=1., num=1000, uniform=True):
if uniform:
r = np.random.rand(num)*width+R
else:
r = np.random.normal(R, width, num)
phi = np.linspace(0,2.*np.pi, len(r))
x= r * np.sin(phi)
y = r* np.cos(phi)
return x,y
fig, ax = plt.subplots(ncols=2, figsize=(9,4))
ax[0].set_title("uniform")
x,y = dist()
ax[0].plot(x,y, linestyle="", marker="o", markersize="2")
x,y = dist(0,1.2, 400)
ax[0].plot(x,y, linestyle="", marker="o", markersize="2")
ax[1].set_title("normal")
x,y = dist(4,0.4, uniform=False)
ax[1].plot(x,y, linestyle="", marker="o", markersize="2")
x,y = dist(0,0.6, uniform=False)
ax[1].plot(x,y, linestyle="", marker="o", markersize="2")
for a in ax:
a.set_aspect("equal")
plt.show()
You can easily generalize random numbers with some distribution centered on a point, for example normal centered on the 0, 0.
x = np.random.normal(size=1000)
y = np.random.normal(size=1000)
plt.plot(x, y, 'o', alpha=0.6)
EDIT:
What we do is generate random points in polar coordinates. First we do a random for the angle (between 0 and 2 pi) and then we give the noise multiplying it by some random number.
n = 300
theta_out = np.random.uniform(low=0, high=2*np.pi, size=n)
noise_out = np.random.uniform(low=0.9, high=1.1, size=n)
x_out = np.cos(theta_out) * noise_out
y_out = np.sin(theta_out) * noise_out
theta_in = np.random.uniform(low=0, high=2*np.pi, size=n)
noise_in = np.random.uniform(low=0, high=0.5, size=n)
x_in = np.cos(theta_in) * noise_in
y_in = np.sin(theta_in) * noise_in
ax = plt.gca()
ax.set_aspect('equal')
plt.plot(x_out, y_out, 'o')
plt.plot(x_in, y_in, 'o')
Note that there is more density of points while the lower the radius.

Confidence regions of 1sigma for a 2D plot

I have two variables that I have plotted using matplotlib scatter function.
I would like to show the 68% confidence region by highlighting it in the plot. I know to show it in a histogram, but I don't know how to do it for a 2D plot like this (x vs y). In my case, the x is Mass and y is Ngal Mstar+2.
An example image of what I am looking for looks like this:
Here they have showed the 68% confidence region using dark blue and 95% confidence region using light blue.
Can it be achieved using one of thescipy.stats modules?
To plot a region between two curves, you could use pyplot.fill_between().
As for your confidence region, I was not sure what you wanted to achieve, so I exemplified with simultaneous confidence bands, by modifying the code from:
https://en.wikipedia.org/wiki/Confidence_and_prediction_bands#cite_note-2
import numpy as np
import matplotlib.pyplot as plt
import scipy.special as sp
## Sample size.
n = 50
## Predictor values.
XV = np.random.uniform(low=-4, high=4, size=n)
XV.sort()
## Design matrix.
X = np.ones((n,2))
X[:,1] = XV
## True coefficients.
beta = np.array([0, 1.], dtype=np.float64)
## True response values.
EY = np.dot(X, beta)
## Observed response values.
Y = EY + np.random.normal(size=n)*np.sqrt(20)
## Get the coefficient estimates.
u,s,vt = np.linalg.svd(X,0)
v = np.transpose(vt)
bhat = np.dot(v, np.dot(np.transpose(u), Y)/s)
## The fitted values.
Yhat = np.dot(X, bhat)
## The MSE and RMSE.
MSE = ((Y-EY)**2).sum()/(n-X.shape[1])
s = np.sqrt(MSE)
## These multipliers are used in constructing the intervals.
XtX = np.dot(np.transpose(X), X)
V = [np.dot(X[i,:], np.linalg.solve(XtX, X[i,:])) for i in range(n)]
V = np.array(V)
## The F quantile used in constructing the Scheffe interval.
QF = sp.fdtri(X.shape[1], n-X.shape[1], 0.95)
QF_2 = sp.fdtri(X.shape[1], n-X.shape[1], 0.68)
## The lower and upper bounds of the Scheffe band.
D = s*np.sqrt(X.shape[1]*QF*V)
LB,UB = Yhat-D,Yhat+D
D_2 = s*np.sqrt(X.shape[1]*QF_2*V)
LB_2,UB_2 = Yhat-D_2,Yhat+D_2
## Make the plot.
plt.clf()
plt.plot(XV, Y, 'o', ms=3, color='grey')
plt.hold(True)
a = plt.plot(XV, EY, '-', color='black', zorder = 4)
plt.fill_between(XV, LB_2, UB_2, where = UB_2 >= LB_2, facecolor='blue', alpha= 0.3, zorder = 0)
b = plt.plot(XV, LB_2, '-', color='blue', zorder=1)
plt.plot(XV, UB_2, '-', color='blue', zorder=1)
plt.fill_between(XV, LB, UB, where = UB >= LB, facecolor='blue', alpha= 0.3, zorder = 2)
b = plt.plot(XV, LB, '-', color='blue', zorder=3)
plt.plot(XV, UB, '-', color='blue', zorder=3)
d = plt.plot(XV, Yhat, '-', color='red',zorder=4)
plt.ylim([-8,8])
plt.xlim([-4,4])
plt.xlabel("X")
plt.ylabel("Y")
plt.show()
The output looks like this:
First of all thank you #snake_charmer for your answer, but I have found a simpler way of solving the issue using curve_fit from scipy.optimize
I fit my data sample using curve_fit which gives me my best fit parameters. What it also gives me is the estimated covariance of the parameters. The diagonals of the same provide the variance of the parameter estimate. To compute one standard deviation errors on the parameters we can use np.sqrt(np.diag(pcov)) where pcov is the covariance matrix.
def fitfunc(M,p1,p2):
N = p1+( (M)*p2 )
return N
The above is the fit function I use for the data.
Now to fit the data using curve_fit
popt_1,pcov_1 = curve_fit(fitfunc,logx,logn,p0=(10.0,1.0),maxfev=2000)
p1_1 = popt_1[0]
p1_2 = popt_1[1]
sigma1 = [np.sqrt(pcov_1[0,0]),np.sqrt(pcov_1[1,1])] #THE 1 SIGMA CONFIDENCE INTERVALS
residuals1 = (logy) - fitfunc((logx),p1_1,p1_2)
xi_sq_1 = sum(residuals1**2) #THE CHI-SQUARE OF THE FIT
curve_y_1 = fitfunc((logx),p1_1,p1_2)
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.scatter(logx,logy,c='r',label='$0.0<z<0.5$')
ax1.plot(logx,curve_y_1,'y')
ax1.plot(logx,fitfunc(logx,p1_1+sigma1[0],p1_2+sigma1[1]),'m',label='68% conf limits')
ax1.plot(logx,fitfunc(logx,p1_1-sigma1[0],p1_2-sigma1[1]),'m')
So just by using the square root the diagonal elements of the covariance matrix, I can obtain the 1 sigma confidence lines.

Inline labels in Matplotlib

In Matplotlib, it's not too tough to make a legend (example_legend(), below), but I think it's better style to put labels right on the curves being plotted (as in example_inline(), below). This can be very fiddly, because I have to specify coordinates by hand, and, if I re-format the plot, I probably have to reposition the labels. Is there a way to automatically generate labels on curves in Matplotlib? Bonus points for being able to orient the text at an angle corresponding to the angle of the curve.
import numpy as np
import matplotlib.pyplot as plt
def example_legend():
plt.clf()
x = np.linspace(0, 1, 101)
y1 = np.sin(x * np.pi / 2)
y2 = np.cos(x * np.pi / 2)
plt.plot(x, y1, label='sin')
plt.plot(x, y2, label='cos')
plt.legend()
def example_inline():
plt.clf()
x = np.linspace(0, 1, 101)
y1 = np.sin(x * np.pi / 2)
y2 = np.cos(x * np.pi / 2)
plt.plot(x, y1, label='sin')
plt.plot(x, y2, label='cos')
plt.text(0.08, 0.2, 'sin')
plt.text(0.9, 0.2, 'cos')
Update: User cphyc has kindly created a Github repository for the code in this answer (see here), and bundled the code into a package which may be installed using pip install matplotlib-label-lines.
Pretty Picture:
In matplotlib it's pretty easy to label contour plots (either automatically or by manually placing labels with mouse clicks). There does not (yet) appear to be any equivalent capability to label data series in this fashion! There may be some semantic reason for not including this feature which I am missing.
Regardless, I have written the following module which takes any allows for semi-automatic plot labelling. It requires only numpy and a couple of functions from the standard math library.
Description
The default behaviour of the labelLines function is to space the labels evenly along the x axis (automatically placing at the correct y-value of course). If you want you can just pass an array of the x co-ordinates of each of the labels. You can even tweak the location of one label (as shown in the bottom right plot) and space the rest evenly if you like.
In addition, the label_lines function does not account for the lines which have not had a label assigned in the plot command (or more accurately if the label contains '_line').
Keyword arguments passed to labelLines or labelLine are passed on to the text function call (some keyword arguments are set if the calling code chooses not to specify).
Issues
Annotation bounding boxes sometimes interfere undesirably with other curves. As shown by the 1 and 10 annotations in the top left plot. I'm not even sure this can be avoided.
It would be nice to specify a y position instead sometimes.
It's still an iterative process to get annotations in the right location
It only works when the x-axis values are floats
Gotchas
By default, the labelLines function assumes that all data series span the range specified by the axis limits. Take a look at the blue curve in the top left plot of the pretty picture. If there were only data available for the x range 0.5-1 then then we couldn't possibly place a label at the desired location (which is a little less than 0.2). See this question for a particularly nasty example. Right now, the code does not intelligently identify this scenario and re-arrange the labels, however there is a reasonable workaround. The labelLines function takes the xvals argument; a list of x-values specified by the user instead of the default linear distribution across the width. So the user can decide which x-values to use for the label placement of each data series.
Also, I believe this is the first answer to complete the bonus objective of aligning the labels with the curve they're on. :)
label_lines.py:
from math import atan2,degrees
import numpy as np
#Label line with line2D label data
def labelLine(line,x,label=None,align=True,**kwargs):
ax = line.axes
xdata = line.get_xdata()
ydata = line.get_ydata()
if (x < xdata[0]) or (x > xdata[-1]):
print('x label location is outside data range!')
return
#Find corresponding y co-ordinate and angle of the line
ip = 1
for i in range(len(xdata)):
if x < xdata[i]:
ip = i
break
y = ydata[ip-1] + (ydata[ip]-ydata[ip-1])*(x-xdata[ip-1])/(xdata[ip]-xdata[ip-1])
if not label:
label = line.get_label()
if align:
#Compute the slope
dx = xdata[ip] - xdata[ip-1]
dy = ydata[ip] - ydata[ip-1]
ang = degrees(atan2(dy,dx))
#Transform to screen co-ordinates
pt = np.array([x,y]).reshape((1,2))
trans_angle = ax.transData.transform_angles(np.array((ang,)),pt)[0]
else:
trans_angle = 0
#Set a bunch of keyword arguments
if 'color' not in kwargs:
kwargs['color'] = line.get_color()
if ('horizontalalignment' not in kwargs) and ('ha' not in kwargs):
kwargs['ha'] = 'center'
if ('verticalalignment' not in kwargs) and ('va' not in kwargs):
kwargs['va'] = 'center'
if 'backgroundcolor' not in kwargs:
kwargs['backgroundcolor'] = ax.get_facecolor()
if 'clip_on' not in kwargs:
kwargs['clip_on'] = True
if 'zorder' not in kwargs:
kwargs['zorder'] = 2.5
ax.text(x,y,label,rotation=trans_angle,**kwargs)
def labelLines(lines,align=True,xvals=None,**kwargs):
ax = lines[0].axes
labLines = []
labels = []
#Take only the lines which have labels other than the default ones
for line in lines:
label = line.get_label()
if "_line" not in label:
labLines.append(line)
labels.append(label)
if xvals is None:
xmin,xmax = ax.get_xlim()
xvals = np.linspace(xmin,xmax,len(labLines)+2)[1:-1]
for line,x,label in zip(labLines,xvals,labels):
labelLine(line,x,label,align,**kwargs)
Test code to generate the pretty picture above:
from matplotlib import pyplot as plt
from scipy.stats import loglaplace,chi2
from labellines import *
X = np.linspace(0,1,500)
A = [1,2,5,10,20]
funcs = [np.arctan,np.sin,loglaplace(4).pdf,chi2(5).pdf]
plt.subplot(221)
for a in A:
plt.plot(X,np.arctan(a*X),label=str(a))
labelLines(plt.gca().get_lines(),zorder=2.5)
plt.subplot(222)
for a in A:
plt.plot(X,np.sin(a*X),label=str(a))
labelLines(plt.gca().get_lines(),align=False,fontsize=14)
plt.subplot(223)
for a in A:
plt.plot(X,loglaplace(4).pdf(a*X),label=str(a))
xvals = [0.8,0.55,0.22,0.104,0.045]
labelLines(plt.gca().get_lines(),align=False,xvals=xvals,color='k')
plt.subplot(224)
for a in A:
plt.plot(X,chi2(5).pdf(a*X),label=str(a))
lines = plt.gca().get_lines()
l1=lines[-1]
labelLine(l1,0.6,label=r'$Re=${}'.format(l1.get_label()),ha='left',va='bottom',align = False)
labelLines(lines[:-1],align=False)
plt.show()
#Jan Kuiken's answer is certainly well-thought and thorough, but there are some caveats:
it does not work in all cases
it requires a fair amount of extra code
it may vary considerably from one plot to the next
A much simpler approach is to annotate the last point of each plot. The point can also be circled, for emphasis. This can be accomplished with one extra line:
import matplotlib.pyplot as plt
for i, (x, y) in enumerate(samples):
plt.plot(x, y)
plt.text(x[-1], y[-1], f'sample {i}')
A variant would be to use the method matplotlib.axes.Axes.annotate.
Nice question, a while ago I've experimented a bit with this, but haven't used it a lot because it's still not bulletproof. I divided the plot area into a 32x32 grid and calculated a 'potential field' for the best position of a label for each line according the following rules:
white space is a good place for a label
Label should be near corresponding line
Label should be away from the other lines
The code was something like this:
import matplotlib.pyplot as plt
import numpy as np
from scipy import ndimage
def my_legend(axis = None):
if axis == None:
axis = plt.gca()
N = 32
Nlines = len(axis.lines)
print Nlines
xmin, xmax = axis.get_xlim()
ymin, ymax = axis.get_ylim()
# the 'point of presence' matrix
pop = np.zeros((Nlines, N, N), dtype=np.float)
for l in range(Nlines):
# get xy data and scale it to the NxN squares
xy = axis.lines[l].get_xydata()
xy = (xy - [xmin,ymin]) / ([xmax-xmin, ymax-ymin]) * N
xy = xy.astype(np.int32)
# mask stuff outside plot
mask = (xy[:,0] >= 0) & (xy[:,0] < N) & (xy[:,1] >= 0) & (xy[:,1] < N)
xy = xy[mask]
# add to pop
for p in xy:
pop[l][tuple(p)] = 1.0
# find whitespace, nice place for labels
ws = 1.0 - (np.sum(pop, axis=0) > 0) * 1.0
# don't use the borders
ws[:,0] = 0
ws[:,N-1] = 0
ws[0,:] = 0
ws[N-1,:] = 0
# blur the pop's
for l in range(Nlines):
pop[l] = ndimage.gaussian_filter(pop[l], sigma=N/5)
for l in range(Nlines):
# positive weights for current line, negative weight for others....
w = -0.3 * np.ones(Nlines, dtype=np.float)
w[l] = 0.5
# calculate a field
p = ws + np.sum(w[:, np.newaxis, np.newaxis] * pop, axis=0)
plt.figure()
plt.imshow(p, interpolation='nearest')
plt.title(axis.lines[l].get_label())
pos = np.argmax(p) # note, argmax flattens the array first
best_x, best_y = (pos / N, pos % N)
x = xmin + (xmax-xmin) * best_x / N
y = ymin + (ymax-ymin) * best_y / N
axis.text(x, y, axis.lines[l].get_label(),
horizontalalignment='center',
verticalalignment='center')
plt.close('all')
x = np.linspace(0, 1, 101)
y1 = np.sin(x * np.pi / 2)
y2 = np.cos(x * np.pi / 2)
y3 = x * x
plt.plot(x, y1, 'b', label='blue')
plt.plot(x, y2, 'r', label='red')
plt.plot(x, y3, 'g', label='green')
my_legend()
plt.show()
And the resulting plot:
matplotx (which I wrote) has line_labels() which plots the labels to the right of the lines. It's also smart enough to avoid overlaps when too many lines are concentrated in one spot. (See stargraph for examples.) It does that by solving a particular non-negative-least-squares problem on the target positions of the labels. Anyway, in many cases where there's no overlap to begin with, such as the example below, that's not even necessary.
import matplotlib.pyplot as plt
import matplotx
import numpy as np
# create data
rng = np.random.default_rng(0)
offsets = [1.0, 1.50, 1.60]
labels = ["no balancing", "CRV-27", "CRV-27*"]
x0 = np.linspace(0.0, 3.0, 100)
y = [offset * x0 / (x0 + 1) + 0.1 * rng.random(len(x0)) for offset in offsets]
# plot
with plt.style.context(matplotx.styles.dufte):
for yy, label in zip(y, labels):
plt.plot(x0, yy, label=label)
plt.xlabel("distance [m]")
matplotx.ylabel_top("voltage [V]") # move ylabel to the top, rotate
matplotx.line_labels() # line labels to the right
plt.show()
# plt.savefig("out.png", bbox_inches="tight")
A simpler approach like the one Ioannis Filippidis do :
import matplotlib.pyplot as plt
import numpy as np
# evenly sampled time at 200ms intervals
tMin=-1 ;tMax=10
t = np.arange(tMin, tMax, 0.1)
# red dashes, blue points default
plt.plot(t, 22*t, 'r--', t, t**2, 'b')
factor=3/4 ;offset=20 # text position in view
textPosition=[(tMax+tMin)*factor,22*(tMax+tMin)*factor]
plt.text(textPosition[0],textPosition[1]+offset,'22 t',color='red',fontsize=20)
textPosition=[(tMax+tMin)*factor,((tMax+tMin)*factor)**2+20]
plt.text(textPosition[0],textPosition[1]+offset, 't^2', bbox=dict(facecolor='blue', alpha=0.5),fontsize=20)
plt.show()
code python 3 on sageCell

Categories

Resources