How to remove square brackets in legend of scatterplot? - python

Is there a way to remove the square brackets from the legend of a line of regression in scatter plot? Here is my code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
x = np.array([0.5, 2.5, 4.5]).reshape((-1, 1))
y = np.array([1.19, 1.67, 2.01])
model = LinearRegression().fit(x, y.reshape((-1, 1)))
r_sq = model.score(x, y)
intercept=model.intercept_
slope=model.coef_
print('coefficient of determination:', r_sq)
print('intercept:', intercept)
print('slope:', slope)
y_predict = intercept + slope * x
print('predicted response:', y_predict, sep='\n')
x_all = np.array([0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5]).reshape((-1, 1))
y_all = model.predict(x_all)
#y_all=round(y_all, 2)
print(x_all)
print(y_all)
x1 = x_all[0,:]
y1 = y_all[0,:]
plt.plot(x_all, y_all, 'o', color='black', markersize=10)
plt.xlabel('Factor 1', fontsize=16)
plt.ylabel('Factor 2', fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
m, b = np.polyfit(x_all.flatten(), y_all.flatten(), 1)
plt.plot(x, m*x + b, linestyle='-', color='black', label='y = {}+{}x, R² = {}'.format(np.round(intercept,2), np.round(slope,2), np.round(r_sq, 2)))
plt.legend(loc=(0.05, 0.85), fontsize=20)
This gives the following plot:
However nothing I try lets me remove them, any suggestions?

Just pass the scalars instead of the whole array in label:
plt.plot(x, m*x + b, linestyle='-', color='black',
label='y = {}+{}x, R² = {}'.format(np.round(intercept,2).item(),
np.round(slope,2).item(),
np.round(r_sq, 2))
)
Output:

Related

Gaussian curve fitting python

I have a problem fitting some date with Gaussian function. I tried to do it in multiple different ways but none of them worked. I need some ideas please. The data is attached (columns 2 and 3).
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from numpy import asarray as ar,exp
x = ar(range(19))
y = ar(0, 0, 0, 0, 0, 0, 0.01955, 1.163025, 19.7159833333333, 81.3119708333334,80.0329166666667,19.3835833333333, 0.03378, 0, 0, 0, 0, 0, 0)
#y = ar(007, 0.04, .175, .628, 1.89, 4.78,10.034,17.542, 25.589, 31.1, 31.544, 26.65, 18.74, 11.01, 5.39, 2.209, 0.74, 0.215. 0.049)
n = len(x)
mean = sum(x*y)/n
sigma = sum(y*(x-mean)**2)/n
def gaus(x,a,x0,sigma):
return a*exp(-(x-x0)**2/(2*sigma**2))
popt,pcov = curve_fit(gaus,x,y)
#popt,pcov = curve_fit(gaus,x,y,p0=[1,mean,sigma])
plt.scatter(x,y, color='blue')
plt.plot(x,y,label='data', marker='', color='blue', linestyle='-', linewidth=2)
plt.scatter(x,gaus(y,*popt), color='red')
plt.plot(x,gaus(y,*popt),label='fit', marker='', color='Red', linestyle='--', linewidth=2)
print(len(x))
print(mean,sigma)
plt.legend()
plt.xlabel('No of Resets', fontsize=20)
plt.ylabel('Frequency', fontsize=20)
plt.legend(loc='upper right')
plt.title('Gaussian Fit', fontsize=20)
plt.show()
I agree with #ddejohn.
However, you are calculating the mean and std wrongly. You could use the following approximation for the integral
import numpy as np
mean = (x*(y/y.sum())).sum()
sigma = np.sqrt(((y/y.sum())*(x-mean)**2).sum())
These should be used as initial guess for the fit as in your commented line, where you can also add a0 = y.max() for the amplitude.
popt,pcov = curve_fit(gaus,x,y,p0=[a0,mean,sigma])
Then plot as #ddejohn said maybe with more sample points
xx = np.linspace(x[0], x[-1], 100)
plt.plot(xx,gaus(xx,*popt),label='fit', marker='', color='Red', linestyle='--', linewidth=2)

Creating a 2-D data plot with "vertical" marginal histograms

How can I create in python a bi-variate data plot with "vertical" marginal histograms like this?:
Say that the data is generated via:
from scipy.stats import multivariate_normal
import numpy as np
mean = np.array([0, 0])
cov = np.array([[1, 0.5], [0.5, 2]])
data = multivariate_normal(mean, cov).rvs(1000)
Here is sample code which shows how one can do this:
import math
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import rc, rcParams
from numpy.linalg import eigh
from scipy.stats import multivariate_normal, norm
from mpl_toolkits.mplot3d import Axes3D
rcParams['text.latex.preamble'] = r'\boldmath'
rc('text', usetex=True)
mean = np.array([0,0])
cov = np.array([[1, 0.3], [0.3, .5]])
np.random.seed(0)
mvn_rvs = multivariate_normal(mean, cov).rvs(800)
pdf_x = norm(mean[0], np.sqrt(cov[0,0])).pdf
pdf_y = norm(mean[1], np.sqrt(cov[1,1])).pdf
rv_x = mvn_rvs[:, 0]
rv_y = mvn_rvs[:, 1]
x = np.linspace(-3, 3, 101)
y = np.linspace(-3, 3, 100)
X, Y = np.meshgrid(x, y)
fontsize = 30
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax), np.diag([1.15, 1.15, 1, 1]))
ax.plot(y, pdf_y(y), zs=x.min(), zdir='x', linewidth=3, label="$\\mathsf{P_y(y)}$")
ax.plot(x, pdf_x(x), zs=y.max(), zdir='y', linewidth=3, label='$\\mathsf{P_x(x)}$')
leg = plt.legend(fontsize=fontsize, ncol=2, frameon=False, bbox_to_anchor=(-0.10, 1.1275),
loc='upper left', handlelength=0.7, handletextpad=0.5, columnspacing=2.4)
grid_linewidth = 1.15
ax.xaxis._axinfo["grid"]['linewidth'] = grid_linewidth
ax.yaxis._axinfo["grid"]['linewidth'] = grid_linewidth
ax.zaxis._axinfo["grid"]['linewidth'] = grid_linewidth
ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.w_xaxis.line.set_color((1.0, 1.0, 1.0, 0.0))
ax.w_yaxis.line.set_color((1.0, 1.0, 1.0, 0.0))
ax.w_zaxis.line.set_color((1.0, 1.0, 1.0, 0.0))
labelpad = -5
ax.set_xlabel("$\\mathsf{x}$", fontsize=fontsize, labelpad=labelpad)
ax.set_ylabel("$\\mathsf{y}$", fontsize=fontsize, labelpad=labelpad)
labelsize = 10
ax.xaxis.set_rotate_label(False)
ax.yaxis.set_rotate_label(False)
ax.set_zlim(bottom=0)
ax.set_xlim(-3, 3)
ax.set_ylim(-3, 3)
ax.xaxis.set_ticklabels([])
ax.xaxis.set_visible(False)
ax.yaxis.set_ticklabels([])
ax.zaxis.set_ticklabels([])
sx2 = cov[0, 0]
sy2 = cov[1, 1]
rho = cov[0, 1] / np.sqrt(sx2 * sy2)
Sigma = cov
target = 0.1
gamma = math.log(1 / (4*(np.pi**2)*(sx2**2)*(sy2**2)*(1 - rho**2)*(target**2)))
eigenvalues, P = eigh(np.linalg.inv(Sigma))
# Compute u and v as per link using thetas from 0 to 2pi
thetas = np.linspace(0, 2*np.pi, 10000)
uv = (np.sqrt(gamma) / np.sqrt(eigenvalues)) * np.hstack((np.cos(thetas).reshape(-1,1), np.sin(thetas).reshape(-1, 1)))
orig_coord=np.zeros((10000,2))
for i in range(len(uv)):
orig_coord[i,0]=np.matmul(np.linalg.inv(P), uv[i,:])[0]
orig_coord[i,1]=np.matmul(np.linalg.inv(P), uv[i,:])[1]
ax.plot(rv_x, rv_y, 0*rv_x, ' o', c='g', markersize=1.1) # "RdBu_r")
ax.plot(orig_coord[:, 0], orig_coord[:, 1],
0 * np.ones_like(orig_coord[:, 0]), c='r', linewidth=3)
ax.view_init(azim=-45, elev=20)

Plot a model with multiple curve_fit parameters

I have a model that describes a sum of Gaussians distributions:
s1 = np.random.normal(2, 0.5, size = (1000, 1))
s2 = np.random.normal(5, 0.5, size = (1000, 1))
mb = (np.concatenate((s1, s2), axis=0)).max()
Xi = np.arange(0,mb,0.1) #bins
#histogram population 1
Y11, bins1 = np.histogram(s1, X)
Y1 = Y11/Y11.sum()
X1 = bins1[:-1]
#histogram population 2
Y22, bins2 = np.histogram(s2, X)
Y2 = Y22/Y22.sum()
X2 = bins2[:-1]
#universe, with all mixed populations
S = np.concatenate((s1, s2), axis=0)
Yi, bins = np.histogram(S, Xi)
Y = Yi/Yi.sum()
X = bins[:-1]
def gaussians(X, amp1, mean1, SD1, amp2, mean2, SD2):
A = amp1 * np.exp(-0.5*((X - mean1)/SD1)**2)
B = amp2 * np.exp(-0.5*((X - mean2)/SD2)**2)
return A + B
params, pcov = curve_fit(gaussians, X,Y, p0=(1,2,1,1,5,1), maxfev=4000)
j = numpy.arange(0.1, mb, 0.1)
plt.figure(figsize=(10, 6)) #size of graph
plt.plot(X, Y, 'o', linewidth=2)
plt.plot(X, gaussians(X ,params[0], params[1],params[2], params[3], params[4], params[5]),'b', linewidth=2)
plt.xlim([-.01, mb])
plt.ylim([0, 0.1])
plt.show()
This code plot a nice graph as follows:
I wonder how to plot each gaussian overlapped in the same graph from the parameters of my model function. I mean, something like this (made by hand):
For those worried to get the answer, I figured out how to do it. It's only matters to become zero all the parameters that you don't want to graph:
plt.plot(X, gaussians(X ,params[0], params[1],params[2], params[3], params[4], params[5]),'b', linewidth=8, alpha=0.1)
plt.plot(X, gaussians(X ,0, params[1],params[2], params[3], params[4], params[5]),'r', linewidth=1 )
plt.plot(X, gaussians(X ,params[0], params[1],params[2], 0, params[4], params[5]),'g', linewidth=1)
plt.xlim([-.01, mb])
plt.ylim([0, 0.1])

How to use the datasets to fit the 3D-surface?

I am trying to fit this X, Y, Z datasets to an unknown surface.
Unfortunately, linear fitting is not good enough to show the surface data. I think the polynomial fitting might fit in this case. In addition, The problem is that I do not know how to build the polynomial fitting function to make the surface fitting done.
Any help would be great.
Thank you
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
X = [[2, 2, 2], [1.5, 1.5, 1.5], [0.5, 0.5, 0.5]]
Y = [[3, 2, 1], [3, 2, 1], [3, 2, 1]]
Z = [[2.4, 2.5, 2.2], [2.4, 3, 2.5], [4, 3.3, 8]]
# ================= Plot figure ================= ##
Fontsize_set = {'size': 20}
fig = plt.figure(figsize=[8, 5], dpi=140, facecolor='w')
ax = fig.gca(projection='3d')
ax.grid(color='y', linestyle='--', linewidth=0.5)
ax.tick_params(labelsize=20)
ax.set_xlim3d(0, 3)
ax.set_ylim3d(0, 6)
ax.set_zlim3d(0, 10)
ax.view_init(30, 45)
ax.scatter(X, Y, Z, s=50, color='k', marker='o', linewidth=None, alpha=1)
# ax.plot_surface(X, Y, Z)
fig.tight_layout()
plt.show()
Here you go
=^..^=
Description in code:
import numpy as np
from scipy.optimize import curve_fit
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
# test function
def function(data, a, b, c):
x = data[0]
y = data[1]
return a * (x**b) * (y**c)
# setup test data
raw_data = [2.0, 2.0, 2.0], [1.5, 1.5, 1.5], [0.5, 0.5, 0.5],[3.0, 2.0, 1.0], [3.0, 2.0, 1.0],\
[3.0, 2.0, 1.0], [2.4, 2.5, 2.2], [2.4, 3.0, 2.5], [4.0, 3.3, 8.0]
# convert data into proper format
x_data = []
y_data = []
z_data = []
for item in raw_data:
x_data.append(item[0])
y_data.append(item[1])
z_data.append(item[2])
# get fit parameters from scipy curve fit
parameters, covariance = curve_fit(function, [x_data, y_data], z_data)
# create surface function model
# setup data points for calculating surface model
model_x_data = np.linspace(min(x_data), max(x_data), 30)
model_y_data = np.linspace(min(y_data), max(y_data), 30)
# create coordinate arrays for vectorized evaluations
X, Y = np.meshgrid(model_x_data, model_y_data)
# calculate Z coordinate array
Z = function(np.array([X, Y]), *parameters)
# setup figure object
fig = plt.figure()
# setup 3d object
ax = Axes3D(fig)
# plot surface
ax.plot_surface(X, Y, Z)
# plot input data
ax.scatter(x_data, y_data, z_data, color='red')
# set plot descriptions
ax.set_xlabel('X data')
ax.set_ylabel('Y data')
ax.set_zlabel('Z data')
plt.show()
Here is an additional graphics example with scatterplot, surface plot, and contour plot. You should be able to hold down the mouse button and rotate the 3D plots.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def func(data, a, b, c):
x = data[0]
y = data[1]
return (a * x) + (y * b) + c
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([11.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

Matplotlib: same height for colorbar as for plot [duplicate]

This question already has answers here:
Set Matplotlib colorbar size to match graph
(9 answers)
Closed 8 years ago.
I'm plotting some 2D data as shown. The axes aspect should be equal and the axes range should differ.
import numpy
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
#Generate data
delta = 0.025
x = numpy.arange(-5.0, 5.0, delta)
y = numpy.arange(-5.0, 5.0, delta)
X, Y = numpy.meshgrid(x, y)
Z1 = mlab.bivariate_normal(X, Y, 1.0, 1.0, 0.0, 0.0)
Z2 = mlab.bivariate_normal(X, Y, 1.5, 0.5, 1, 1)
# difference of Gaussians
Z = 10.0 * (Z2 - Z1)
#Plot
fig = plt.figure()
ax1 = fig.add_subplot(1, 1, 1, aspect='equal')
PC = ax1.pcolor(X, Y, Z)
CF = ax1.contour(X, Y, Z, 50, colors = "black")
plt.xlim(-4.0, 4.0)
plt.ylim(-2.0, 2.0)
cbar = plt.colorbar(PC)
cbar.add_lines(CF)
plt.show()
How can I make the colobar has the same height as the plotted data?
You can do this using make_axes_locatable:
import numpy
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
from mpl_toolkits.axes_grid1 import make_axes_locatable
#Generate data
delta = 0.025
x = numpy.arange(-5.0, 5.0, delta)
y = numpy.arange(-5.0, 5.0, delta)
X, Y = numpy.meshgrid(x, y)
Z1 = mlab.bivariate_normal(X, Y, 1.0, 1.0, 0.0, 0.0)
Z2 = mlab.bivariate_normal(X, Y, 1.5, 0.5, 1, 1)
# difference of Gaussians
Z = 10.0 * (Z2 - Z1)
#Plot
fig = plt.figure()
ax1 = fig.add_subplot(1, 1, 1, aspect='equal')
PC = ax1.pcolor(X, Y, Z)
CF = ax1.contour(X, Y, Z, 50, colors = "black")
plt.xlim(-4.0, 4.0)
plt.ylim(-2.0, 2.0)
divider = make_axes_locatable(ax1)
cax1 = divider.append_axes("right", size="5%", pad=0.05)
cbar = plt.colorbar(PC, cax = cax1)
cbar.add_lines(CF)
plt.show()

Categories

Resources