Unstable results from Bayesian statistical approach(pymc3) - python

Studying Bayesian statistical approach I have found that my model gives unstable results. I have made a lot of experiments with different samplers(Metropolis and NUTS) and model parameters - tune and draws. Apart from the fact that I know the correct answer I used also autocorrplot and Acceptance Rate test for model trace to check my results. Could you explain please what is wrong - how can I trust this approach if I have what I see. Below is my data and code:
%matplotlib inline
from IPython.core.pylabtools import figsize
import numpy as np
from matplotlib import pyplot as plt
figsize(11, 9)
import scipy.stats as stats
import pymc3 as pm
import theano.tensor as tt
import random
import pandas as pd
count_data = [ 13. , 24. , 8. , 24. ,
7. , 35. , 14. , 11. ,
15. , 11. , 22. , 22. ,
11. , 57. , 11. , 19. ,
29. , 6. , 19. , 12. ,
22. , 12. , 18. , 72. ,
32. , 9. , 7. , 13. ,
19. , 23. , 27. , 20. ,
6. , 17. , 13. , 10. ,
14. , 6. , 16. , 15. ,
7. , 2. , 15. , 15. ,
19. , 70. , 49. , 7. ,
53. , 22. , 21. , 31. ,
19. , 11. , 18. , 20. ,
12. , 35. , 17. , 23. ,
17. , 4. , 2. , 31. ,
30. , 13. , 27. , 0. ,
39. , 37. , 5. , 14. ,
13. , 22. , 44.99931616, 55.99931616,
39.99931616, 55.99931616, 38.99931616, 66.99931616,
45.99931616, 42.99931616, 46.99931616, 42.99931616,
53.99931616, 53.99931616, 42.99931616, 88.99931616,
42.99931616, 50.99931616, 60.99931616, 37.99931616,
50.99931616, 43.99931616, 53.99931616, 43.99931616,
49.99931616, 103.99931616, 63.99931616, 40.99931616,
38.99931616, 44.99931616, 50.99931616, 54.99931616,
58.99931616, 51.99931616, 37.99931616, 48.99931616,
44.99931616, 41.99931616, 45.99931616, 37.99931616,
47.99931616, 46.99931616]
figsize(12.5, 3.5)
n_count_data = len(count_data)
plt.bar(np.arange(n_count_data), count_data, color="#348ABD")
plt.xlabel("Time (days)")
plt.ylabel("count of text-msgs received")
plt.title("Did the user's texting habits change over time?")
plt.xlim(0, n_count_data);
with pm.Model() as model:
alpha = 1.0/count_data.mean() # Recall count_data is the
# variable that holds our txt counts
lambda_1 = pm.Exponential("lambda_1", alpha)
lambda_2 = pm.Exponential("lambda_2", alpha)
lambda_3 = pm.Exponential("lambda_3", alpha)
tau1 = pm.DiscreteUniform("tau1", lower=0, upper=n_count_data - 1)
tau2 = pm.DiscreteUniform("tau2", lower=0, upper=n_count_data - 1)
idx = np.arange(n_count_data) # Index
lambda_ = pm.math.switch(tau2 >= idx, pm.math.switch(tau1 >= idx, lambda_1, lambda_2), lambda_3)
observation = pm.Poisson("obs", lambda_, observed=count_data)
#step = pm.NUTS()
trace = pm.sample(5000, tune=20000)#,step=step)
lambda_1_samples = trace['lambda_1']
lambda_2_samples = trace['lambda_2']
lambda_3_samples = trace['lambda_3']
tau_samples1 = trace['tau1']
tau_samples2 = trace['tau2']
figsize(16.5, 12)
#histogram of the samples:
lambda_all = np.hstack([lambda_1_samples, lambda_2_samples, lambda_3_samples])
tau_all = np.hstack([tau_samples1, tau_samples2])
la_min = np.min(lambda_all) - 1
la_max = np.max(lambda_all) + 1
ta_min = np.min(tau_all) - 1
ta_max = np.max(tau_all) + 1
ax = plt.subplot(511)
ax.set_autoscaley_on(False)
plt.hist(lambda_1_samples, histtype='stepfilled', bins=30, alpha=0.85,
label="posterior of $\lambda_1$", color="#A60628", density=True)
plt.legend(loc="upper left")
plt.title(r"""Posterior distributions of the variables
$\lambda_1,\;\lambda_2,\;\lambda_3,\;\tau1,\;\tau2$""")
plt.xlim([la_min, la_max])
plt.xlabel("$\lambda_1$ value")
ax = plt.subplot(512)
ax.set_autoscaley_on(False)
plt.hist(lambda_2_samples, histtype='stepfilled', bins=30, alpha=0.85,
label="posterior of $\lambda_2$", color="#7A68A6", density=True)
plt.legend(loc="upper left")
plt.xlim([la_min, la_max])
plt.xlabel("$\lambda_2$ value")
ax = plt.subplot(513)
ax.set_autoscaley_on(False)
plt.hist(lambda_3_samples, histtype='stepfilled', bins=30, alpha=0.85,
label="posterior of $\lambda_3$", color="#7A68A6", density=True)
plt.legend(loc="upper left")
plt.xlim([la_min, la_max])
plt.xlabel("$\lambda_3$ value")
plt.subplot(514)
w = 1.0 / tau_samples1.shape[0] * np.ones_like(tau_samples1)
plt.hist(tau_samples1, bins=2*len(np.unique(tau_samples1)), alpha=1,
label=r"posterior of $\tau1$",
color="#467821", weights=w, rwidth=2.)
plt.xticks(np.arange(n_count_data))
plt.legend(loc="upper left")
plt.ylim([0, 1.0])
plt.xlim([35, len(count_data)-20])
plt.xlabel(r"$\tau1$ (in days)")
plt.subplot(515)
w = 1.0 / tau_samples2.shape[0] * np.ones_like(tau_samples2)
plt.hist(tau_samples2, bins=2*len(np.unique(tau_samples2)), alpha=1,
label=r"posterior of $\tau2$",
color="#467821", weights=w, rwidth=2.)
plt.xticks(np.arange(n_count_data))
plt.legend(loc="upper left")
plt.ylim([0, 1.0])
plt.xlim([35, len(count_data)-20])
plt.xlabel(r"$\tau2$ (in days)")
plt.ylabel("probability");
with model:
_ = pm.plots.autocorrplot(trace,figsize=(17,15))
accept = np.sum(trace['tau1'][1:] != trace['tau1'][:-1])
print("Acceptance Rate for tau1: ", accept/trace['tau1'].shape[0])
accept = np.sum(trace['tau2'][1:] != trace['tau2'][:-1])
print("Acceptance Rate for tau2: ", accept/trace['tau2'].shape[0])
accept = np.sum(trace['lambda_1'][1:] != trace['lambda_1'][:-1])
print("Acceptance Rate for lambda_1: ", accept/trace['lambda_1'].shape[0])
accept = np.sum(trace['lambda_2'][1:] != trace['lambda_2'][:-1])
print("Acceptance Rate for lambda_2: ", accept/trace['lambda_2'].shape[0])
accept = np.sum(trace['lambda_3'][1:] != trace['lambda_3'][:-1])
print("Acceptance Rate for lambda_3: ", accept/trace['lambda_3'].shape[0])

Related

Code for rolling filter in python behaving oddly (Numpy Arrays)

I wanted to implement a simple rolling average filter in python using numpy.sum()
Given x is a numpy array of first 30 natural numbers. (x = np.linspace(1, 30, 30))
np.sum(x[8:10]) returns 19 which is right as x[8] = 9 and x[9] = 10
But when i do the following i get some random values.(here i chose 2 as the size of the window)
import numpy as np
x = np.linspace(1, 30, 30)
print(x)
y = x #creating a duplicate array of same length
z= len(x)
for i in range(2,z):
y[i] = np.sum(x[i-2:i])
print(y/2,"\n")
print(y,"\n")
print(x)
The result i get is
[ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15. 16. 17. 18.
19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30.]
[5.000000e-01 1.000000e+00 1.500000e+00 2.500000e+00 4.000000e+00
6.500000e+00 1.050000e+01 1.700000e+01 2.750000e+01 4.450000e+01
7.200000e+01 1.165000e+02 1.885000e+02 3.050000e+02 4.935000e+02
7.985000e+02 1.292000e+03 2.090500e+03 3.382500e+03 5.473000e+03
8.855500e+03 1.432850e+04 2.318400e+04 3.751250e+04 6.069650e+04
9.820900e+04 1.589055e+05 2.571145e+05 4.160200e+05 6.731345e+05]
[1.000000e+00 2.000000e+00 3.000000e+00 5.000000e+00 8.000000e+00
1.300000e+01 2.100000e+01 3.400000e+01 5.500000e+01 8.900000e+01
1.440000e+02 2.330000e+02 3.770000e+02 6.100000e+02 9.870000e+02
1.597000e+03 2.584000e+03 4.181000e+03 6.765000e+03 1.094600e+04
1.771100e+04 2.865700e+04 4.636800e+04 7.502500e+04 1.213930e+05
1.964180e+05 3.178110e+05 5.142290e+05 8.320400e+05 1.346269e+06]
[1.000000e+00 2.000000e+00 3.000000e+00 5.000000e+00 8.000000e+00
1.300000e+01 2.100000e+01 3.400000e+01 5.500000e+01 8.900000e+01
1.440000e+02 2.330000e+02 3.770000e+02 6.100000e+02 9.870000e+02
1.597000e+03 2.584000e+03 4.181000e+03 6.765000e+03 1.094600e+04
1.771100e+04 2.865700e+04 4.636800e+04 7.502500e+04 1.213930e+05
1.964180e+05 3.178110e+05 5.142290e+05 8.320400e+05 1.346269e+06]
try that maybe solve your problem
y = x.copy()

Unique trend Curve Fitting

I have data like this:
x = np.array([ 0. , 3. , 3.3 , 10. , 18. , 43. , 80. ,
120. , 165. , 210. , 260. , 310. , 360. , 410. ,
460. , 510. , 560. , 610. , 660. , 710. , 760. ,
809.5 , 859. , 908.5 , 958. , 1007.5 , 1057. , 1106.5 ,
1156. , 1205.5 , 1255. , 1304.5 , 1354. , 1403.5 , 1453. ,
1502.5 , 1552. , 1601.5 , 1651. , 1700.5 , 1750. , 1799.5 ,
1849. , 1898.5 , 1948. , 1997.5 , 2047. , 2096.5 , 2146. ,
2195.5 , 2245. , 2294.5 , 2344. , 2393.5 , 2443. , 2492.5 ,
2542. , 2591.5 , 2640. , 2690. , 2740. , 2789.67, 2839.33,
2891.5 ])
y = array([ 1.45 , 1.65 , 5.8 , 6.8 , 8.0355, 8.0379, 8.04 ,
8.0505, 8.175 , 8.3007, 8.4822, 8.665 , 8.8476, 9.0302,
9.528 , 9.6962, 9.864 , 10.032 , 10.2 , 10.9222, 11.0553,
11.1355, 11.2228, 11.3068, 11.3897, 11.4704, 11.5493, 11.6265,
11.702 , 11.7768, 11.8491, 11.9208, 11.9891, 12.0571, 12.1247,
12.1912, 12.2558, 12.3181, 12.3813, 12.4427, 12.503 , 12.5638,
12.6226, 12.6807, 12.7384, 12.7956, 12.8524, 12.9093, 12.9663,
13.0226, 13.0786, 13.1337, 13.1895, 13.2465, 13.3017, 13.3584,
13.4156, 13.4741, 13.5311, 13.5899, 13.6498, 13.6533, 13.657 ,
13.6601])
and look like this :
I need to make curve fitting for this trend. Iam using Moving Average for smoothing and look like this:
where the magenta color is the MA, and Iam using polynomial (5th Ordo) and look like this:
where the blue is the result of the polynomial. I have try higher ordo, but the result getting worst. How can I get a result where first point at (0,0) and look like this (like black curve)?
This is my code :
import numpy as np
from scipy import interpolate
def movingaverage(interval, window_size):
window= np.ones(int(window_size))/float(window_size)
print(window)
return np.convolve(interval, window, 'same')
y_av = movingaverage(y, 2)
X = np.arange(0,np.max(x),30).ravel()
yinter = interpolate.interp1d(x,y_av)(X)
z = np.poly1d(np.polyfit(x,y_av,5))
Y = z(X)
plt.figure(1)
plt.plot(xm,ym,'*-r')
plt.plot(xm,y_av,'.-m')
plt.plot(X,Y,'*-b')
To do this, you should use your analytical function (with parameters) based on some assumption (not only polynomial functions). You can use curve_fit form scipy.optimize to find the unknown parameters of your analytic function that best fit your input data.
For example:
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
# your analytical function (theoretical function) with parameters: a, b (or more)
def your_analytical_func(x, a, b):
return a * np.log(x + b) # this is just for example
# or using anonymous (lambda) function
# your_analytical_func = lambda x, a, b: a * np.log(x + b)
# Fit for the parameters a, b (or more) of the function your_analytical_func:
popt, pcov = curve_fit(your_analytical_func, x, y)
plt.plot(x, y, 'r.', label='incoming data')
plt.plot(x, your_analytical_func(x, *popt), '-', color="black", label='fit: your_analytical_func(x, a=%5.3f, b=%5.3f)' % tuple(popt))
plt.legend()

Irregularly spaced heatmap

I would like to create a heatmap based on the matrix and bounds below and then plot the data (x1, x2) to see dots falling into the respective classes.
The best I could do was use seaborn, but since the classes on both axes are not evenly spaced, it's hard to read the values corresponding to a dot, on both axes.
Is there a way (using seaborn or any other library) to have the cell sizes of the heatmap proportional to the numbers of bounds1, bounds2, so that the values on the axis respect the right proportions?
Below my example code.
Thanks in advance!
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import numpy as np
bounds1 = [ 0. , 3. , 27.25 , 51.5 , 75.75 , 100. ]
bounds2 = [ 0. , 127., 165., 334. , 522. , 837., 1036., 1316., 1396., 3000]
matrix = [[0.3 , 0.5 , 0.7 , 0.9 , 1. , 0.9 , 0.7 , 0.4 , 0.3 , 0.3 ],
[0.22725, 0.37875, 0.53025, 0.68175, 0.7575, 0.68175, 0.53025, 0.303, 0.22725, 0.22725],
[0.1545 , 0.2575 , 0.3605 , 0.4635 , 0.515 , 0.4635 , 0.3605 , 0.206, 0.1545 , 0.1545 ],
[0.08175, 0.13625, 0.19075, 0.24525, 0.2725, 0.24525, 0.19075, 0.109, 0.08175, 0.08175],
[0.009 , 0.015 , 0.021 , 0.027 , 0.03 , 0.027 , 0.021 , 0.012, 0.009 , 0.009 ],
[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]]
x2 = np.array([1.7765000e+00, 3.9435000e+00, 4.5005002e+00, 4.5005002e+00,
5.0325003e+00, 6.0124998e+00, 7.0035005e+00, 8.5289993e+00,
1.0150000e+01, 1.1111500e+01, 1.2193500e+01, 1.2193500e+01,
1.2193500e+01, 1.3665500e+01, 1.4780001e+01, 1.5908000e+01,
1.7007000e+01, 1.8597000e+01, 2.0439001e+01, 2.2047001e+01,
2.4724501e+01, 2.7719501e+01, 3.0307501e+01, 3.3042500e+01,
3.6326000e+01, 3.8622997e+01, 4.1292500e+01, 4.4293495e+01,
4.7881500e+01, 5.1105499e+01, 5.3708996e+01, 5.6908497e+01,
5.9103497e+01, 6.1926003e+01, 6.6175499e+01, 6.9841499e+01,
7.3534996e+01, 7.8712997e+01, 8.3992500e+01, 8.7227493e+01,
9.1489487e+01, 9.6500992e+01, 1.0068549e+02, 1.0625399e+02,
1.1245149e+02, 1.1828050e+02, 1.2343950e+02, 1.2875299e+02,
1.3531699e+02, 1.4146500e+02, 1.4726399e+02, 1.5307101e+02,
1.5917000e+02, 1.6554350e+02, 1.7167050e+02, 1.7897350e+02,
1.8766650e+02, 1.9705751e+02, 2.0610300e+02, 2.1421350e+02,
2.2146150e+02, 2.2975949e+02, 2.3886848e+02, 2.4766153e+02,
2.5618802e+02, 2.6506250e+02, 2.7528250e+02, 2.8465201e+02,
2.9246451e+02, 3.0088300e+02, 3.1069800e+02, 3.2031000e+02,
3.2950650e+02, 3.3929001e+02, 3.4919598e+02, 3.5904755e+02,
3.6873303e+02, 3.7849451e+02, 3.8831549e+02, 3.9915201e+02,
4.1044501e+02, 4.2201651e+02, 4.3467300e+02, 4.4735904e+02,
4.5926651e+02, 4.7117001e+02, 4.8231406e+02, 4.9426105e+02,
5.0784149e+02, 5.2100049e+02, 5.3492249e+02, 5.4818701e+02,
5.6144202e+02, 5.7350153e+02, 5.8634998e+02, 5.9905096e+02,
6.1240802e+02, 6.2555353e+02, 6.3893542e+02, 6.5263202e+02,
6.6708154e+02, 6.8029950e+02, 6.9236456e+02, 7.0441150e+02,
7.1579163e+02, 7.2795203e+02, 7.4106995e+02, 7.5507953e+02,
7.6881946e+02, 7.8363702e+02, 7.9864905e+02, 8.1473901e+02,
8.3018762e+02, 8.4492249e+02, 8.6007306e+02, 8.7455353e+02,
8.8938556e+02, 9.0509601e+02, 9.2196307e+02, 9.3774091e+02,
9.5391345e+02, 9.7015198e+02, 9.8671466e+02, 1.0042726e+03,
1.0209606e+03, 1.0379355e+03, 1.0547625e+03, 1.0726985e+03,
1.0912705e+03, 1.1100559e+03, 1.1288949e+03, 1.1476450e+03,
1.1654260e+03, 1.1823262e+03, 1.1997356e+03, 1.2171041e+03,
1.2353951e+03, 1.2535184e+03, 1.2718250e+03, 1.2903676e+03,
1.3086545e+03, 1.3270005e+03, 1.3444775e+03, 1.3612805e+03,
1.3784171e+03, 1.3958615e+03, 1.4131825e+03, 1.4311034e+03,
1.4489685e+03, 1.4677334e+03, 1.4869026e+03, 1.5062087e+03,
1.5258719e+03, 1.5452015e+03, 1.5653271e+03, 1.5853635e+03,
1.6053860e+03, 1.6247255e+03, 1.6436824e+03, 1.6632330e+03,
1.6819221e+03, 1.7011276e+03, 1.7198782e+03, 1.7383060e+03,
1.7565670e+03, 1.7749023e+03, 1.7950280e+03, 1.8149988e+03,
1.8360586e+03, 1.8572985e+03, 1.8782219e+03, 1.8991390e+03,
1.9200371e+03, 1.9395586e+03, 1.9595035e+03, 1.9790668e+03,
1.9995455e+03, 2.0203715e+03, 2.0416791e+03, 2.0616587e+03,
2.0819294e+03, 2.1032202e+03, 2.1253989e+03, 2.1470112e+03,
2.1686660e+03, 2.1908926e+03, 2.2129436e+03, 2.2349995e+03,
2.2567026e+03, 2.2784224e+03, 2.2997925e+03, 2.3198750e+03,
2.3393770e+03, 2.3588149e+03, 2.3783970e+03, 2.3988135e+03,
2.4175618e+03, 2.4363840e+03, 2.4572385e+03, 2.4773455e+03,
2.4965142e+03, 2.5157107e+03, 2.5354666e+03, 2.5554331e+03,
2.5757551e+03, 2.5955181e+03, 2.6157085e+03, 2.6348906e+03,
2.6535190e+03, 2.6727512e+03, 2.6923147e+03, 2.7118843e+03])
x1 = np.array([28.427988 , 28.891748 , 30.134018 , 29.833858 , 30.540195 ,
31.762226 , 32.163025 , 31.623648 , 31.964993 , 32.73733 ,
32.562325 , 32.89953 , 33.064743 , 32.76882 , 32.1024 ,
32.171394 , 33.363426 , 34.328148 , 36.24527 , 35.877434 ,
35.29762 , 35.193832 , 35.61119 , 36.50994 , 35.615444 ,
35.2758 , 34.447975 , 34.183205 , 35.781815 , 35.510662 ,
35.277668 , 35.26543 , 34.944313 , 35.301414 , 34.63578 ,
34.36223 , 35.496872 , 35.488243 , 35.494583 , 35.21087 ,
34.275524 , 33.945126 , 33.63986 , 33.904293 , 33.553017 ,
34.348408 , 33.84105 , 32.8437 , 32.19287 , 31.688663 ,
32.035015 , 31.641226 , 31.138266 , 30.629492 , 30.111526 ,
29.571909 , 29.244211 , 28.42031 , 27.908197 , 27.316568 ,
26.909412 , 25.928982 , 25.03047 , 24.354822 , 23.54626 ,
22.88031 , 23.000391 , 22.300774 , 21.988918 , 21.467094 ,
21.730871 , 23.060678 , 22.910374 , 24.45383 , 23.610855 ,
24.594006 , 24.263508 , 25.077124 , 23.9773 , 22.611958 ,
21.88306 , 21.014484 , 19.674965 , 18.745205 , 20.225956 ,
19.433172 , 19.451014 , 18.264421 , 17.588757 , 16.837574 ,
17.252535 , 18.967127 , 19.111462 , 19.90994 , 19.15653 ,
18.49522 , 17.376019 , 17.35794 , 16.200405 , 17.9445 ,
18.545986 , 17.69698 , 20.665318 , 20.90071 , 20.32658 ,
21.27805 , 21.145922 , 19.32898 , 19.160307 , 18.60541 ,
18.902897 , 18.843922 , 17.890692 , 18.197395 , 17.662706 ,
18.578962 , 18.898802 , 18.435923 , 17.644451 , 16.393314 ,
15.570944 , 16.779602 , 15.74104 , 15.041967 , 14.544464 ,
15.014386 , 14.156769 , 13.591232 , 12.386208 , 11.133551 ,
10.472783 , 9.7923355 , 10.571391 , 11.245247 , 10.063455 ,
10.742685 , 8.819294 , 8.141182 , 6.9487176 , 6.3410373 ,
7.033326 , 6.5856943 , 6.0214376 , 6.6087174 , 9.583405 ,
9.4608135 , 9.183213 , 10.673293 , 9.477165 , 8.667246 ,
7.3392615 , 6.2609572 , 5.5752296 , 4.4312773 , 4.0997415 ,
4.127005 , 4.072541 , 3.5704772 , 2.7370691 , 2.3750854 ,
2.0708292 , 3.4086852 , 3.8237891 , 3.9072614 , 3.1760776 ,
2.4963813 , 1.5232614 , 0.931248 , 0.49159998, 0.21676798,
0.874704 , 2.0560641 , 1.5494559 , 3.0944476 , 2.6151357 ,
2.7285278 , 3.4450078 , 3.4614875 , 5.779072 , 8.063728 ,
7.7077436 , 7.8576636 , 7.4494233 , 6.5933595 , 6.1667037 ,
4.9452477 , 5.6894236 , 6.0578876 , 5.9922714 , 5.060448 ,
6.074832 , 6.7870073 , 5.7388477 , 5.8681116 , 4.7604475 ,
4.2740316 , 3.785328 , 4.060576 , 4.9203672 , 5.355184 ,
4.793792 , 3.8007674 , 3.6115997 , 2.7794237 , 2.5385118 ,
5.1410074 , 5.5506234 , 7.638063 , 7.512544 , 6.617264 ,
6.5637918 , 6.452815 ])
# define colormap
N=5 # number of desired color bins
cmap = plt.cm.RdYlGn_r
cmaplist = [cmap(i) for i in range(cmap.N)]
cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N)
# define the bins and normalize
bounds = np.linspace(0, 1, N+1)
norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)
fig, ax = plt.subplots(figsize=(15,10))
ax = sns.heatmap(matrix, cmap=cmap, norm=norm, ax=ax, linewidths=0.1)
ax.invert_yaxis()
ax.tick_params(axis = 'both', which = 'major')
ax.set_xticklabels(bounds2)
ax.set_yticklabels(bounds1)
cbar = ax.collections[0].colorbar
cbar.set_ticks(bounds)
cbar.set_ticklabels(bounds)
x2_idx = np.interp(x2, bounds2, range(len(bounds2)))+0.5 # +0.5: to shift data start to bin center
x1_idx = np.interp(x1, bounds1, range(len(bounds1)))+0.5 # +0.5: to shift data start to bin center
ax.plot(x2_idx, x1_idx, color='black', marker='o')
plt.show()
plt.close()
pcolormesh can be handy here. It allows assigning colors to an unevenly-spaced grid. Note that the 6 boundaries in bounds1 delimit 5 rows, so the last row of matrix will be ignored. Similarly, the 10 boundaries in bounds2 delimit 9 columns, ignoring the last one. You might want to add an extra boundary at the start or end if you need all cells.
The x-ticks can be rotated to avoid overlapping.
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import numpy as np
bounds1 = [ 0. , 3. , 27.25 , 51.5 , 75.75 , 100. ]
bounds2 = [ 0. , 127., 165., 334. , 522. , 837., 1036., 1316., 1396., 3000]
matrix = [[0.3 , 0.5 , 0.7 , 0.9 , 1. , 0.9 , 0.7 , 0.4 , 0.3 , 0.3 ],
[0.22725, 0.37875, 0.53025, 0.68175, 0.7575, 0.68175, 0.53025, 0.303, 0.22725, 0.22725],
[0.1545 , 0.2575 , 0.3605 , 0.4635 , 0.515 , 0.4635 , 0.3605 , 0.206, 0.1545 , 0.1545 ],
[0.08175, 0.13625, 0.19075, 0.24525, 0.2725, 0.24525, 0.19075, 0.109, 0.08175, 0.08175],
[0.009 , 0.015 , 0.021 , 0.027 , 0.03 , 0.027 , 0.021 , 0.012, 0.009 , 0.009 ],
[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]]
x2 = np.array([1.7765000e+00, 3.9435000e+00, 4.5005002e+00, 4.5005002e+00, 5.0325003e+00, 6.0124998e+00, 7.0035005e+00, 8.5289993e+00, 1.0150000e+01, 1.1111500e+01, 1.2193500e+01, 1.2193500e+01, 1.2193500e+01, 1.3665500e+01, 1.4780001e+01, 1.5908000e+01, 1.7007000e+01, 1.8597000e+01, 2.0439001e+01, 2.2047001e+01, 2.4724501e+01, 2.7719501e+01, 3.0307501e+01, 3.3042500e+01, 3.6326000e+01, 3.8622997e+01, 4.1292500e+01, 4.4293495e+01, 4.7881500e+01, 5.1105499e+01, 5.3708996e+01, 5.6908497e+01, 5.9103497e+01, 6.1926003e+01, 6.6175499e+01, 6.9841499e+01, 7.3534996e+01, 7.8712997e+01, 8.3992500e+01, 8.7227493e+01, 9.1489487e+01, 9.6500992e+01, 1.0068549e+02, 1.0625399e+02, 1.1245149e+02, 1.1828050e+02, 1.2343950e+02, 1.2875299e+02, 1.3531699e+02, 1.4146500e+02, 1.4726399e+02, 1.5307101e+02, 1.5917000e+02, 1.6554350e+02, 1.7167050e+02, 1.7897350e+02, 1.8766650e+02, 1.9705751e+02, 2.0610300e+02, 2.1421350e+02, 2.2146150e+02, 2.2975949e+02, 2.3886848e+02, 2.4766153e+02, 2.5618802e+02, 2.6506250e+02, 2.7528250e+02, 2.8465201e+02, 2.9246451e+02, 3.0088300e+02, 3.1069800e+02, 3.2031000e+02, 3.2950650e+02, 3.3929001e+02, 3.4919598e+02, 3.5904755e+02, 3.6873303e+02, 3.7849451e+02, 3.8831549e+02, 3.9915201e+02, 4.1044501e+02, 4.2201651e+02, 4.3467300e+02, 4.4735904e+02, 4.5926651e+02, 4.7117001e+02, 4.8231406e+02, 4.9426105e+02, 5.0784149e+02, 5.2100049e+02, 5.3492249e+02, 5.4818701e+02, 5.6144202e+02, 5.7350153e+02, 5.8634998e+02, 5.9905096e+02, 6.1240802e+02, 6.2555353e+02, 6.3893542e+02, 6.5263202e+02, 6.6708154e+02, 6.8029950e+02, 6.9236456e+02, 7.0441150e+02, 7.1579163e+02, 7.2795203e+02, 7.4106995e+02, 7.5507953e+02, 7.6881946e+02, 7.8363702e+02, 7.9864905e+02, 8.1473901e+02, 8.3018762e+02, 8.4492249e+02, 8.6007306e+02, 8.7455353e+02, 8.8938556e+02, 9.0509601e+02, 9.2196307e+02, 9.3774091e+02, 9.5391345e+02, 9.7015198e+02, 9.8671466e+02, 1.0042726e+03, 1.0209606e+03, 1.0379355e+03, 1.0547625e+03, 1.0726985e+03, 1.0912705e+03, 1.1100559e+03, 1.1288949e+03, 1.1476450e+03, 1.1654260e+03, 1.1823262e+03, 1.1997356e+03, 1.2171041e+03, 1.2353951e+03, 1.2535184e+03, 1.2718250e+03, 1.2903676e+03, 1.3086545e+03, 1.3270005e+03, 1.3444775e+03, 1.3612805e+03, 1.3784171e+03, 1.3958615e+03, 1.4131825e+03, 1.4311034e+03, 1.4489685e+03, 1.4677334e+03, 1.4869026e+03, 1.5062087e+03, 1.5258719e+03, 1.5452015e+03, 1.5653271e+03, 1.5853635e+03, 1.6053860e+03, 1.6247255e+03, 1.6436824e+03, 1.6632330e+03, 1.6819221e+03, 1.7011276e+03, 1.7198782e+03, 1.7383060e+03, 1.7565670e+03, 1.7749023e+03, 1.7950280e+03, 1.8149988e+03, 1.8360586e+03, 1.8572985e+03, 1.8782219e+03, 1.8991390e+03, 1.9200371e+03, 1.9395586e+03, 1.9595035e+03, 1.9790668e+03, 1.9995455e+03, 2.0203715e+03, 2.0416791e+03, 2.0616587e+03, 2.0819294e+03, 2.1032202e+03, 2.1253989e+03, 2.1470112e+03, 2.1686660e+03, 2.1908926e+03, 2.2129436e+03, 2.2349995e+03, 2.2567026e+03, 2.2784224e+03, 2.2997925e+03, 2.3198750e+03, 2.3393770e+03, 2.3588149e+03, 2.3783970e+03, 2.3988135e+03, 2.4175618e+03, 2.4363840e+03, 2.4572385e+03, 2.4773455e+03, 2.4965142e+03, 2.5157107e+03, 2.5354666e+03, 2.5554331e+03, 2.5757551e+03, 2.5955181e+03, 2.6157085e+03, 2.6348906e+03, 2.6535190e+03, 2.6727512e+03, 2.6923147e+03, 2.7118843e+03])
x1 = np.array([28.427988, 28.891748, 30.134018, 29.833858, 30.540195, 31.762226, 32.163025, 31.623648, 31.964993, 32.73733, 32.562325, 32.89953, 33.064743, 32.76882, 32.1024, 32.171394, 33.363426, 34.328148, 36.24527, 35.877434, 35.29762, 35.193832, 35.61119, 36.50994, 35.615444, 35.2758, 34.447975, 34.183205, 35.781815, 35.510662, 35.277668, 35.26543, 34.944313, 35.301414, 34.63578, 34.36223, 35.496872, 35.488243, 35.494583, 35.21087, 34.275524, 33.945126, 33.63986, 33.904293, 33.553017, 34.348408, 33.84105, 32.8437, 32.19287, 31.688663, 32.035015, 31.641226, 31.138266, 30.629492, 30.111526, 29.571909, 29.244211, 28.42031, 27.908197, 27.316568, 26.909412, 25.928982, 25.03047, 24.354822, 23.54626, 22.88031, 23.000391, 22.300774, 21.988918, 21.467094, 21.730871, 23.060678, 22.910374, 24.45383, 23.610855, 24.594006, 24.263508, 25.077124, 23.9773, 22.611958, 21.88306, 21.014484, 19.674965, 18.745205, 20.225956, 19.433172, 19.451014, 18.264421, 17.588757, 16.837574, 17.252535, 18.967127, 19.111462, 19.90994, 19.15653, 18.49522, 17.376019, 17.35794, 16.200405, 17.9445, 18.545986, 17.69698, 20.665318, 20.90071, 20.32658, 21.27805, 21.145922, 19.32898, 19.160307, 18.60541, 18.902897, 18.843922, 17.890692, 18.197395, 17.662706, 18.578962, 18.898802, 18.435923, 17.644451, 16.393314, 15.570944, 16.779602, 15.74104, 15.041967, 14.544464, 15.014386, 14.156769, 13.591232, 12.386208, 11.133551, 10.472783, 9.7923355, 10.571391, 11.245247, 10.063455, 10.742685, 8.819294, 8.141182, 6.9487176, 6.3410373, 7.033326, 6.5856943, 6.0214376, 6.6087174, 9.583405, 9.4608135, 9.183213, 10.673293, 9.477165, 8.667246, 7.3392615, 6.2609572, 5.5752296, 4.4312773, 4.0997415, 4.127005, 4.072541, 3.5704772, 2.7370691, 2.3750854, 2.0708292, 3.4086852, 3.8237891, 3.9072614, 3.1760776, 2.4963813, 1.5232614, 0.931248, 0.49159998, 0.21676798, 0.874704, 2.0560641, 1.5494559, 3.0944476, 2.6151357, 2.7285278, 3.4450078, 3.4614875, 5.779072, 8.063728, 7.7077436, 7.8576636, 7.4494233, 6.5933595, 6.1667037, 4.9452477, 5.6894236, 6.0578876, 5.9922714, 5.060448, 6.074832, 6.7870073, 5.7388477, 5.8681116, 4.7604475, 4.2740316, 3.785328, 4.060576, 4.9203672, 5.355184, 4.793792, 3.8007674, 3.6115997, 2.7794237, 2.5385118, 5.1410074, 5.5506234, 7.638063, 7.512544, 6.617264, 6.5637918, 6.452815])
# define colormap
N = 5 # number of desired color bins
cmap = plt.cm.get_cmap('RdYlGn_r', N)
# define the bins and normalize
bounds = np.linspace(0, 1, N + 1)
norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)
fig, ax = plt.subplots(figsize=(15, 10))
colormesh = ax.pcolormesh(bounds2, bounds1, matrix, cmap=cmap, norm=norm, linewidths=0.1)
# ax.invert_yaxis()
ax.tick_params(axis='x', which='major', rotation=50)
ax.set_xticks(bounds2)
ax.set_yticks(bounds1)
cbar = fig.colorbar(colormesh, ax=ax)
cbar.set_ticks(bounds)
ax.plot(x2, x1, color='black', marker='o')
plt.show()

Creating new pdf based on mean, standard deviation and skewness

I have two time series with their mean and standard deviations and skewness computed.
How can I generate new probability density function(pdf) based on the mean and standard deviation of the first time series but skewness of the second time series.
ts1 = [[ 0.24795413, 0.51981795, -1.128888 , -0.3915509 , -0.36329997,
0.88802624, 1.1253957 , 0.14258218, -0.55445707, -0.79443187,
-0.16986907, 1.5396615 , 1.7831599 , 0.48034644, -0.55335957,
-0.1698935 , -0.82943815, -1.2654774 , -1.0019791 , 0.2947774 ,
-0.38959527, -1.193097 , 0.19199276, -0.24384224, 1.0309464 ,
-0.54779184, -1.2194977 , -0.56049407, 0.33375692, 0.09617996,
1.500001 , 1.3636391 , -0.6977335 , 1.14598 , -0.31681216,
0.14813781, 1.2058297 , -0.5362634 , -0.18963599, -0.82915914,
-0.46238124, -0.5534528 , 0.2932713 , 0.03283417, -0.59320515,
-0.15333033, 0.418383 , 0.623021 , 2.9942613 , 1.1455983 ,
-0.7301076 , 0.47346616, 0.96548057, 1.7798176 , 1.4704127 ,
0.12636256, -0.54934335, -0.057479]
ts2 = [-0.20055497, -1.0812824 , 1.6640332 , -0.9939632 , 0.05211592,
1.4842485 , -0.3094573 , 0.35551405, 0.38313067, -1.0607151 ,
-0.24302316, -0.97898716, -1.3700265 , 0.5152137 , -0.49310505,
0.71188784, -0.51848483, 0.16944373, -0.5372622 , -0.60573167,
0.04732573, 0.34191012, -0.23362386, 1.5668747 , -0.91083133,
-0.7176449 , 1.4817567 , -0.2580676 , 1.0231726 , 0.10230541,
0.16726625, -1.4672999 , 0.40618753, -1.4019163 , -0.1177727 ,
-0.21126366, -0.9413029 , 0.3704642 , 0.11061847, 2.4474611 ,
-0.5796859 , -1.2674994 , -0.44353878, -1.2688683 , -0.5369506 ,
-0.53735554, 1.7200137 , 2.3995981 , -1.4367745 , -0.5959428 ,
0.63681364, 1.0187957 , -0.97254455, -0.50130427, -0.31367695,
0.43504715, -0.60908884, -1.176665]
m1 = np.mean(ts1) std1 = np.std(ts1) skw1 = scipy.stats.skew(ts1)
skw2 = scipy.stats.skew(ts2)
# m1 mean of pdf1, std1 standard deviation and skw1 is skewness
pdf1 = m1 std1 skw1
# how can I get pdf 2 to be something like this
pdf2 = m1, std1, sk2 ( here I am using different skewness but
same meand and standard deviation)
df = pd.DataFrame({'ts1': ts1, 'ts2': ts2})
df.describe()
Data Plot:
df.plot()
Probability Density Function (PDF):
pandas.DataFrame.plot.density for further details
df.plot.density()

Why are TensorFlow LinearRegressor predicting too big numbers?

In a simple example I try to use TensorFlow LinearRegressor but the result can't be correct. Any suggestions?
import tensorflow as tf
import numpy as np
x_data =np.array([ 44.57, 42.71, 119.25, 40.83, 46.87, 71.44,
113.5, 39.83, 39.48,
77.3, 53.32 , 21.68 , 113.55 , 40.1 , 77.39 , 46.01 ,
35.42 , 93.81,
84.71, 51.7 , 73.57, 102.21 , 98.05 , 99.53 ,
98.65 , 50.1, 108.4,
62.06, 48.34 , 71.45 , 53.21 , 72.57 , 48.14 ,
71.32 , 41.01 , 96.71,
112.09, 54.87 , 63.17 , 44.95])
y_data= np.array([ 127.42 , 121.09 , 294.53, 96.73, 125.04, 195.08,
287.84, 106.97, 107.94,
204.45, 116.09 , 57.64 , 296.82 , 123.5 , 180.11 , 116.81 ,
96.73 ,233.71,
237.07, 130. , 182.61 , 260.22, 238.86 , 238.02 ,
248.05, 101.41, 269.69,
156.43 , 121.27 , 172.64 , 139.62 , 203.87 , 134.78 ,
176.24 , 106.22 , 252.93,
282.96 ,141.95 ,161. , 123.42])
features = [tf.contrib.layers.real_valued_column("x", dimension=1)]
estimator = tf.contrib.learn.LinearRegressor(feature_columns=features,
model_dir='./linear_estimator')
input_fn = tf.contrib.learn.io.numpy_input_fn({"x":x_data}, y_data,
num_epochs=1000)
estimator.fit(input_fn=input_fn, steps=2000)
np.asarray([i for i in estimator.predict(x={'x': x_data})])
The result is
array([ 1539.31665039, 1476.55419922, 4059.26489258, 1413.11694336,
1616.92626953, 2445.9987793 , 3865.2409668 , 1379.37365723,
1367.56335449, 2643.734375 , 1834.57043457, 766.93310547,
3866.92822266, 1388.48425293, 2646.77124023, 1587.90698242,
1230.56567383, 3200.83569336, 2893.77197266, 1779.90625 ,
2517.87182617, 3484.27929688, 3343.9074707 , 3393.84741211,
3364.15332031, 1725.91699219, 3693.15039062, 2129.48681641,
1666.52893066, 2446.3359375 , 1830.85864258, 2484.12866211,
1659.78015137, 2441.94946289, 1419.19055176, 3298.69140625,
3817.6628418 , 1886.87243652, 2166.94165039, 1552.13916016], dtype=float32)
Now it works.
I deleted the export directory and have the new version 1.1.0.
Thanks for all answers.

Categories

Resources