Related
Consider the following dataset with random data:
test_dataset = np.array([ -2.09601881, -4.26602684, 1.09105452, -4.59559669,
1.05865251, -0.93076762, -14.70398945, -18.01937129,
4.64126152, -10.34178822, -9.46058493, -5.66864965,
-3.17562022, 15.7030379 , 10.59675205, -5.80882413,
-24.00604149, -4.81518663, -1.94333927, 1.18142171,
12.72030312, 3.84917581, -0.4468796 , 11.91828567,
-17.99171774, 9.35108712, -5.57233376, 5.77547128,
5.49296099, -10.96132844, -18.75174336, 5.27843303,
25.73548956, -21.58043021, -14.24734733, 12.57886018,
-22.10002076, 1.72207555, -6.0411867 , -3.63568527,
7.26542117, -0.21449529, -6.64974714, -0.94574606,
-4.23339431, 16.76199734, -12.42195793, 18.965854 ,
-23.85336123, -15.55104466, 6.17215868, 7.34993316,
8.62461351, -16.30482638, -16.35601099, 1.96857833,
18.74440399, -22.48374434, -10.895831 , -10.14393648,
-17.62768751, 4.83388855, 20.1578181 , 6.04299626,
0.97198296, -3.40889754, -10.62734293, 1.70240472,
20.4203839 , 10.26751364, 15.47859675, -10.97940064,
1.82728251, 4.22894717, 8.31502887, -5.48502811,
-1.09244874, -11.32072796, -24.88520436, -7.42108403,
19.4200716 , 4.82704045, -12.46290135, -15.18466755,
6.37714692, -11.06825059, 5.10898588, -9.07485484,
1.63946084, -12.2270078 , 12.63776832, -25.03916909,
2.42972082, -14.22890171, 18.2199446 , 6.9819771 ,
-12.07795089, 2.59948596, -16.90206575, 6.35192719,
7.33823106, -23.69653447, -11.66091871, -19.40251179,
-12.64863792, 11.04004231, 13.7247356 , -16.36107329,
20.43227515, 17.97334692, 16.92675175, -5.62051239,
-8.66304184, -8.40848514, -23.20919855, 0.96808137,
-5.03287253, -3.13212582, 18.81155666, -8.27988284,
3.85708447, 12.43039322, 17.98003878, 18.11009997,
-3.74294421, -16.62276121, 9.4446743 , 2.2060981 ,
8.34853736, 14.79144713, -1.91113975, -5.17061419,
4.53451746, 8.19090358, 7.98343201, 11.44592322,
-16.9132677 , -25.92554857, 10.10638432, -8.09236786,
20.8878207 , 19.52368296, 0.85858125, 2.61760415,
9.21360649, -8.1192651 , -6.94829273, 2.73562447,
13.40981323, -9.05018331, -17.77563166, -21.03927199,
4.10415845, -1.31550732, 5.68284828, 15.08670773,
-19.78675315, 12.94697869, -11.51797637, 1.91485992,
16.69417993, -16.04271622, -1.14028558, 9.79830109,
-18.58386093, -7.52963269, -10.10059878, -25.2194216 ,
-0.10598426, -15.77641532, -14.15999125, 14.35011271,
11.15178588, -14.43856266, 15.84015226, -3.41221883,
11.90724469, 0.57782081, 18.82127466, -6.01068727,
-19.83684476, 2.20091942, -1.38707755, -8.62821053,
-11.89000913, -11.69539815, 5.70242019, -3.83781841,
5.35894135, -0.30995954, 21.76661212, 8.52974329,
-9.13065082, -11.06209 , -12.00654618, 2.769838 ,
-12.21579496, -27.2686534 , -4.58538197, -6.94388425])
I'd like to plot normalized histogram of it, so in the plt.hist options I choose density=True:
import numpy as np
import matplotlib.pyplot as plt
data1, bins, _ = plt.hist(test_dataset, density=True);
print(np.trapz(data1))
print(sum(data1))
which outputs the following histogram:
0.18206124014272715
0.18866449755723017
From matplotlib documentation:
The density parameter, which normalizes bin heights so that the integral of the histogram is 1. The resulting histogram is an approximation of the probability density function.
But from my example it is clearly seen that the integral of the histogram is NOT 1 and strongly depends on the number of bins: if I specify it for example to be 40 the sum will increase:
data1, bins, _ = plt.hist(test_dataset, density=True);
print(np.trapz(data1))
print(sum(data1))
0.7508847002777762
0.7546579902289207
Is it incorrect description in documentation or I misunderstand some issues here?
you do not calculate the area, area you should calculate as follow (in your example):
sum(data1 * np.diff(bins)) == 1
I have coordinates of four points of the following image. I want to make a mask from the rectangle area of four coordinates. How can I do this? I have tried following code.
back=np.zeros(rotated_img.shape)
mask=np.zeros(rotated_img.shape)
# back[rotated_img==2]=255
back[rotated_img>0]=255
plt.imshow(back)
plt.show()
# ++++++++++++++++++++++++++++++++++=
Y1,Y2,Y3,Y4,X1,X2,X3,X4=[],[],[],[],[],[],[],[]
for lb in label.keys():
# co-ordinate
idx = np.where(img==lb)
y_min,y_max,x_min,x_max = np.min(idx[0]), np.max(idx[0]), np.min(idx[1]), np.max(idx[1])
x1,x2,x3,x4=x_min,x_max,x_max,x_min
y1,y2,y3,y4=y_min,y_min,y_max,y_max
X1.append(x1)
X2.append(x2)
X3.append(x3)
X4.append(x4)
Y1.append(y1)
Y2.append(y2)
Y3.append(y3)
Y4.append(y4)
# +++++++++++++++++++++++++++++
x1,x2,x3,x4=min(X1),max(X2),max(X3),min(X4)
y1,y2,y3,y4=min(Y1),min(Y2),max(Y3),max(Y4)
coords=np.array([[x1,y1],[x2,y2],[x3,y3],[x4,y4]],dtype="float32")
print(coords)
new_coords=[]
coord_mat=np.concatenate([coords,np.ones((4,1))],axis=1)
for c in coord_mat:
new_coords.append(np.dot(M,c))
plt.imshow(back)
for c in new_coords:
plt.plot(c[0], c[1], 'go--', linewidth=2, markersize=12)
print(c[0], c[1])
plt.show()
Outout:
[[ 0. 0.]
[731. 0.]
[731. 127.]
[ 0. 127.]]
-0.4652977851045641 365.5743741577959
632.5992723813201 0.07437415779594403
696.0992723813201 110.05960043841964
63.03470221489543 475.5596004384196
If by mask you just mean a numpy array that is 0 outside of the rectangle and 1 inside (or conversely, or whatever values you prefer), then cv2 can do that for you, it's just drawing a polygon. Using values from your output:
import cv2
import numpy
from matplotlib import pyplot as plt
x = np.zeros((475, 696), dtype=np.uint8)
pts = np.array([[-0.4652977851045641, 365.5743741577959],
[632.5992723813201, 0.07437415779594403],
[696.0992723813201, 110.05960043841964],
[63.03470221489543, 475.5596004384196]], np.int32).reshape(-1, 1, 2)
plt.imshow(cv2.fillPoly(x,[pts],255), cmap='gray')
This returns:
I'm working on an astrophysics project where I need to measure the density(ne) of the gas in the center of the galaxy by two methods(A and S). I made a plot of ne_s x ne_a and I want to try an exponential fit in this plot. The problems are the following:
the errors in the data are asymmetrical and, apparently, scipy.odr does not accept this type of error. When the erros are included 'ValueError: could not convert we to a suitable array' is raised.
even if I do not include the errors the fit still does not work.
The code used(errors in the fit not included):
import numpy as np
import matplotlib.pyplot as plt
ne_s = np.array([ 134.70722125, 316.27850769, 403.37221974, 579.91067991,
1103.06258335, 1147.23685549, 115.00820933, 476.42659337,
667.61690967, 403.30988606, 282.08007264, 479.98058352,
897.64247885, 214.75999934, 213.22512064, 491.81749573,
743.68513419, 374.37957281, 362.136037 , 893.88595455])
dne_s_max = np.array([23.6619623 , 5.85802097, 12.02456923, 1.50211648, 5.15987014,
10.3830146 , 10.5274528 , 0.82928872, 2.18586603, 31.95014727,
6.53134179, 2.38392559, 32.2838402 , 5.43629034, 1.02316579,
6.60281602, 14.53943481, 9.16809221, 6.84052648, 12.87655997])
dne_s_min = np.array([21.94513608, 5.80578938, 11.8303456 , 1.49856527, 5.1265976 ,
10.2523836 , 10.12663739, 0.82824884, 2.17914616, 30.55846643,
6.45691351, 2.37446669, 30.87025015, 5.37271061, 1.02087355,
6.5358395 , 14.21332643, 9.0523711 , 6.77187898, 12.64596461])
ne_a = np.array([ 890.61498788, 2872.03715706, 10222.33463389, 1946.48193766,
6695.25304235, 2107.36471192, 891.72010662, 3988.87511761,
11328.9670489 , 1097.38904905, 2896.62668843, 4849.57809801,
5615.96780935, 1415.18564794, 1204.00022768, 3616.05423907,
15638.52683391, 3300.6039601 , 775.28841051, 12325.54379524])
dne_a_max = np.array([1082.33639266, 571.57094375, 2396.39839075, 458.32058555,
796.79916236, 665.95370946, 2262.73423374, 1006.65192577,
1761.9251987 , 1718.78400914, 579.65477159, 245.54811362,
1652.50314639, 401.37677822, 178.03620792, 725.26490794,
6625.62353545, 908.21490446, 719.01117673, 2098.24809312])
dne_a_min = np.array([ 865.33019015, 518.08880981, 1877.85283954, 412.91242092,
724.38681574, 582.52644162, 870.14392196, 866.63643893,
1478.1792513 , 1076.64135559, 521.08794554, 236.2457763 ,
1349.36104495, 362.72343267, 169.23314057, 646.39803115,
4139.5768453 , 789.04878324, 620.55523654, 1720.06369942])
dne_a = [dne_a_min, dne_a_max]
dne_s = [dne_s_min, dne_s_max]
fig, ax = plt.subplots(1,1)
ax.errorbar(ne_s, ne_a, xerr = dne_s, yerr = dne_a,
linestyle = 'none', linewidth = 0.7, capsize = 5, color = 'crimson')
ax.scatter(ne_s, ne_a, s = 15, color = 'black')
ax.set_ylabel('$n_e(A)$'), ax.set_xlabel('$n_e(S)$')
from scipy.odr import Data, RealData, Model, ODR
def f(B, x):
return B[0] + B[1] * np.exp(B[2] * x)
exponential = Model(f)
data = RealData(ne_s, ne_a)
odr = ODR(data, exponential, beta0=[1, 200, 3e-3])
out = odr.run()
ax.plot(ne_s, f(out.beta, ne_s), linewidth = 0.7)
Which results in:
And the actual plot is:
So what am I missing here? Did I applied the odr routine erroneously? What should I do to make the fit work properly? And how to make scipy.odr accept asymmetrical error?
Important to add that I don't know too much about scipy.odr, I just adapted the documentation example to an exponential function.
Appreciate the help. Let me know if more information is necessary.
I have data like this:
X = array([ 24.41, 54.98, 89.57, 114.26, 133.61, 202.14, 250.99, 321.31,
333.47, 373.79, 422.02, 447.41, 522.47, 549.53, 20.15, 39.12,
73.42, 134.03, 179.86, 262.52, 337.23, 432.68, 253.24, 346.62,
450.1 , 552.22, 656.2 , 33.84, 60.41, 94.88, 147.73, 206.76,
237.12, 372.72, 495.47, 544.47, 28.93, 49.87, 85.15, 143.84,
226.86, 339.15, 393.32, 524.7 , 623.86, 39.22, 96.44, 156.92,
223.88, 271.78, 349.52, 429.66, 523.03, 622.05, 748.29, 646.89,
749.27, 851.37, 851.61])
y = array([ 0.70168044, 4.93931985, 8.71831269, 10.84590729, 12.22458808,
15.46380214, 16.61898425, 17.29600649, 17.34369784, 17.43434118,
17.50907445, 17.57419685, 18.00322011, 18.26260499, 0.03686716,
2.85433237, 7.0779359 , 12.25192523, 14.65463193, 16.79352551,
17.35594282, 17.53284075, 16.65553712, 17.38224061, 17.58297862,
18.29143563, 19.71214346, 2.10666383, 5.59990814, 9.21325511,
13.08716841, 15.60686344, 16.36464679, 17.43271999, 17.80134835,
18.20983513, 1.38643181, 4.29326544, 8.28990266, 12.86092195,
16.1416266 , 17.36179504, 17.46194981, 18.02244612, 19.22640164,
2.86822848, 9.35464796, 13.58885705, 16.07082828, 16.91213557,
17.38928103, 17.52563605, 18.00801144, 19.19976288, 20.8797045 ,
19.5713721 , 20.88735117, 20.40458438, 20.39937509])
When I plot them using:
import matplotlib.pyplot as plt
plt.plot(X, y, "ro", markersize=6)
plt.plot(X, y)
I get:
My expectation would be one line that connects from red point to red point, but no matter how I tweak the parameters a subset of points get connected by straight lines. I am reading through the documentation and can't figure out what parameters to tweak to stop these lines from appearing.
Sort your data before doing the line plotting:
index =np.argsort(X)
plt.plot(X, y, "ro", markersize=6)
plt.plot(X[index], y[index])
plt.show()
If you don't do this, the lines will be drawn in the order you have in your data - as you see in your plot.
I have written some code to plot the longitude and latitude of towns/cities in the UK with the 3D bar height representing the population of these places. I am trying to also colour code the bars using a colormap so that the variation in population can be more easily seen. However, my code doesnt seem to follow the colormap and instead all the bars have very similar colours - I am not sure where I have gone wrong
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
pop = [189120, 91297, 107123, 107355, 94782, 87590, 142968, 1085810, 117963, 147663, 194189, 187503, 349561, 229700, 535907, 145818, 335145, 110507, 116447, 86011, 88483, 119441, 325949, 106943, 92363, 255394, 109805, 144170, 109185, 468720, 113507, 120046, 104157, 589900, 136362, 88243, 88134, 88855, 91053, 94932, 120256, 162949, 284321, 144957, 474632, 443760, 100160, 552267, 8173941, 211228, 107627, 510746, 174700, 171750, 268064, 128060, 215173, 186682, 289301, 86552, 96555, 159994, 161707, 234982, 154718, 238137, 97886, 95580, 218705, 107926, 109691, 134022, 103886, 518090, 155298, 123187, 253651, 175547, 91703, 102885, 89663, 105878, 270726, 174286, 109015, 179485, 182441, 142723, 99251, 165456, 131982, 91930, 218791, 83641, 103608, 105367, 265178, 100153, 109120, 152841]
lat = [57.14369, 53.55, 51.56844, 51.26249, 51.37795, 52.13459, 53.39337, 52.48142, 53.75, 53.81667, 53.58333, 50.72048, 53.79391, 50.82838, 51.45523, 52.2, 51.48, 51.73575, 51.9, 53.1905, 53.25, 51.88921, 52.40656, 51.11303, 54.52429, 52.92277, 53.52327, 56.5, 50.76871, 55.95206, 50.7236, 54.96209, 51.38914, 55.86515, 51.86568, 53.56539, 53.71667, 54.68611, 50.85519, 51.75369, 51.62907, 53.64904, 53.7446, 52.05917, 53.79648, 52.6386, 53.22683, 53.41058, 51.50853, 51.87967, 51.26667, 53.48095, 54.57623, 52.04172, 54.97328, 51.58774, 52.25, 52.62783, 52.9536, 52.52323, 53.54051, 51.75222, 52.57364, 50.37153, 50.71667, 50.79899, 53.76667, 51.58571, 51.45625, 53.61766, 53.43012, 53.42519, 53.48771, 53.38297, 51.50949, 52.41426, 50.90395, 51.53782, 53.64779, 53.45, 51.90224, 53.40979, 53.00415, 54.90465, 52.56667, 51.62079, 51.55797, 52.67659, 53.68331, 53.39254, 51.65531, 52.51868, 51.50853, 51.34603, 53.53333, 51.31903, 52.58547, 52.18935, 50.81448, 53.95763]
long = [-2.09814, -1.48333, 0.45782, -1.08708, -2.35907, -0.46632, -3.01479, -1.89983, -2.48333, -3.05, -2.43333, -1.8795, -1.75206, -0.13947, -2.59665, 0.11667, -3.18, 0.46958, -2.08333, -2.89189, -1.41667, 0.90421, -1.51217, -0.18312, -1.55039, -1.47663, -1.13691, -2.96667, 0.28453, -3.19648, -3.52751, -1.60168, 0.54863, -4.25763, -2.2431, -0.07553, -1.85, -1.2125, 0.57292, -0.47517, -0.74934, -1.78416, -0.33525, 1.15545, -1.54785, -1.13169, -0.53792, -2.97794, -0.12574, -0.41748, 0.51667, -2.23743, -1.23483, -0.75583, -1.61396, -2.99835, -0.88333, 1.29834, -1.15047, -1.46523, -2.1183, -1.25596, -0.24777, -4.14305, -2.0, -1.09125, -2.71667, 0.60459, -0.97113, -2.1552, -1.35678, -2.32443, -2.29042, -1.4659, -0.59541, -1.78094, -1.40428, 0.71433, -3.00648, -2.73333, -0.20256, -2.15761, -2.18538, -1.38222, -1.81667, -3.94323, -1.78116, -2.44926, -1.49768, -2.58024, -0.39602, -1.9945, -0.12574, -2.97665, -2.61667, -0.55893, -2.12296, -2.22001, -0.37126, -1.08271]
fig = plt.figure()
ax = Axes3D(fig)
X,Y,Z = np.array(long),np.array(lat),np.log10(np.array(pop))
colours = plt.cm.rainbow_r(Z/np.log10(max(pop)))
plot1 = ax.bar3d(X,Y,Z,dx=0.2,dy=0.2,dz=Z/3,color=colours)
ax.set_xlabel('\nLongitude (\u00B0)')
ax.set_ylabel('\nLatitude (\u00B0)')
ax.set_zlabel('\nlog\u2081\u2080(Population)')
ax.set_zlim3d(4,7)
ax.view_init(elev=70,azim=280)
colourMap = plt.cm.ScalarMappable(cmap=plt.cm.rainbow_r)
colourMap.set_array(Z)
colBar = plt.colorbar(colourMap).set_label('log\u2081\u2080(Population)')
plt.show()
The code produces this plot:
I think that the majority of the bars should be red/orange in colour, with only the major cities being yellow/green/blue...
(Z/np.log10(max(pop))).min() is 0.7. So all values are indeed in the upper range of the colormap.
You probably want to normalize your data before giving it to the colormap:
norm = plt.Normalize((Z/np.log10(max(pop))).min(), (Z/np.log10(max(pop))).max())
colours = plt.cm.rainbow_r(norm(Z/np.log10(max(pop))))