When trying to plot these graphs, the line:
sub3.hist(x=np.log(df[i]), bins = 100, color="grey")
Gives the error:
ValueError: supplied range of [-inf, -inf] is not finite.
I don't understand this error and can't find any explanations online. Here is the full code. df and df_norm are pandas dataframes with identical data, save for df_norm being minmax normalised.
tb = widgets.TabBar([str(c) for c in range(16)])
k = 0
for c in range(len(df_norm.columns)):
with tb.output_to(c, select=(c < 3)):
colours = ["orange", "green"]
fig = plt.figure(figsize=(20, 5))
plt.subplots_adjust(bottom = 0., left = 0, top = 1., right = 1)
p = 0
g = 1
for i in df_norm.columns[k:k+2]:
sub1 = fig.add_subplot(2,3,g)
sub1.hist(x=df[i], bins = 100, alpha=0.3, color=colours[p])
sub2 = fig.add_subplot(2,3,g+1)
sub2.hist(x=df_norm[i], bins = 100, alpha=0.3, color=colours[p])
sub3 = fig.add_subplot(2,3,g+2)
sub3.hist(x=np.log(df[i]), bins = 100, color="grey")
sub1.set_title(i)
sub2.set_title('title ' + i)
sub3.set_title('title ' + i)
sub1.set_ylabel('label')
p = p + 1
k = k + 1
g = g + 3
Edit, full stack trace:
ValueError Traceback (most recent call last)
<ipython-input-179-d6170fc0d99d> in <module>()
20 sub2.hist(x=df_norm[i], bins = 100, alpha=0.3, color=colours[p])
21 sub3 = fig.add_subplot(2,3,g+2) # two rows, two columns, second cell
---> 22 sub3.hist(x=np.log(df[i]), bins = 100, color="grey")
23 sub1.set_title(i)
24 sub2.set_title('title ' + i)
4 frames
<__array_function__ internals> in histogram(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/numpy/lib/histograms.py in _get_outer_edges(a, range)
314 if not (np.isfinite(first_edge) and np.isfinite(last_edge)):
315 raise ValueError(
--> 316 "supplied range of [{}, {}] is not finite".format(first_edge, last_edge))
317 elif a.size == 0:
318 # handle empty arrays. Can't determine range, so use 0-1.
ValueError: supplied range of [-inf, -inf] is not finite
Related
I want to find the complex roots for z1 = -0.9 and z2 = 0.3 but for every phi between 0 and 4pi.
phi = np.linspace(0, 4*np.pi, 400, endpoint=False)
e = np.exp(1j*phi)
z1 = [-0.9, -0.25, -0.99, -0.9405, -0.76, -1.019898, -1.00]
z2 = [0.3, 0.25, 0.11, 0.0495, 0.04, 0.000102, 1.00]
#Coefficients
P = [e, -e*(2*z1[0] + 2*z2[0]), e*(z1[0]**2 + z2[0]**2 + 4*z1[0]*z2[0] - 1), -e*((2*z1[0]**2 * z2[0]) + (2*z1[0]*z2[0]**2)), (z1[0]*z2[0])*(z1[0]*z2[0] + 1)]
#Output
ROOT = np.roots(P)
print(ROOT)
I'm getting the error:
/opt/conda/lib/python3.7/site-packages/numpy/core/shape_base.py:65: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated.
If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
ary = asanyarray(ary)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_17/4090299359.py in <module>
18 #Outputting the roots
19
---> 20 ROOT = np.roots(P)
21
22 print(ROOT)
<__array_function__ internals> in roots(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/numpy/lib/polynomial.py in roots(p)
232
233 # find non-zero array entries
--> 234 non_zero = NX.nonzero(NX.ravel(p))[0]
235
236 # Return an empty array if polynomial is all zeros
<__array_function__ internals> in nonzero(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in nonzero(a)
1919
1920 """
-> 1921 return _wrapfunc(a, 'nonzero')
1922
1923
/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
55
56 try:
---> 57 return bound(*args, **kwds)
58 except TypeError:
59 # A TypeError occurs if the object does have such a method in its
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
I also went and tried a.all() on 'phi', I'm not sure how to fix it so I just place it randomly hoping the error would go away:
phi = np.linspace(0, 4*np.pi, 400, endpoint=False).all()
And I only get 4 roots, instead of 4 roots for every phi. How can I fix this? Any actual help would be appreciated, thanks.
import numpy as np
phi = np.linspace(0, 4*np.pi, 400, endpoint=False)
e = np.exp(1j*phi)
z1 = [-0.9, -0.25, -0.99, -0.9405, -0.76, -1.019898, -1.00]
z2 = [0.3, 0.25, 0.11, 0.0495, 0.04, 0.000102, 1.00]
#Coefficients
P = [e,
-e*(2*z1[0] + 2*z2[0]),
e*(z1[0]**2 + z2[0]**2 + 4*z1[0]*z2[0] - 1),
-e*((2*z1[0]**2 * z2[0]) + (2*z1[0]*z2[0]**2)),
(z1[0]*z2[0])*(z1[0]*z2[0] + 1)
]
# the last element of "P" is just a float, but the others are arrays...
# Is this really so different?
#Output
n_phi = len(phi)
ROOT = np.zeros((n_phi, 4), dtype=complex)
# apply "np.roots" for each point, one by one
for i in range(n_phi):
# change new_P if the last element of "P" should be a array
new_P = [P[0][i], P[1][i], P[2][i], P[3][i], P[4]]
ROOT[i, :] = np.roots(new_P)
print(ROOT)
# For a more readable presentation you can use this:
# with np.printoptions(precision=2, linewidth=85):
# print(ROOT)
my following code is getting the error: "AssertionError: Total area is zero in defuzzification!" im honestly really trying to understand what is wrong and its giving me the following error however im at a dead end. if anyone has some solution it would be appreciated. the gist of the code below is to use fuzzy logic in combination with Vader to clasify whether a text is negative or positive.
x_p = np.arange(0, 1, 0.1)
x_n = np.arange(0, 1, 0.1)
x_op = np.arange(0, 10, 1)
p_lo = fuzz.trimf(x_p, [0, 0, 0.5])
p_md = fuzz.trimf(x_p, [0, 0.5, 1])
p_hi = fuzz.trimf(x_p, [0.5, 1, 1])
n_lo = fuzz.trimf(x_n, [0, 0, 0.5])
n_md = fuzz.trimf(x_n, [0, 0.5, 1])
n_hi = fuzz.trimf(x_n, [0.5, 1, 1])
op_Neg = fuzz.trimf(x_op, [0, 0, 5]) # Scale : Neg Neu Pos
op_Neu = fuzz.trimf(x_op, [0, 5, 10])
op_Pos = fuzz.trimf(x_op, [5, 10, 10])
sid = SentimentIntensityAnalyzer()
sentiment_val=[]
sentiment_doc=[]
for j in range(doclen):
sentiment_doc.append(senti[j])
ss = sid.polarity_scores(tweets[j])
posscore=ss['pos']
negscore=ss['neg']
neuscore=ss['neu']
compoundscore=ss['compound']
print(str(j+1)+" {:-<65} {}".format(tweets[j], str(ss)))
print("\nPositive Score for each tweet :")
if (posscore==1):
posscore=0.9
else:
posscore=round(posscore,1)
print(posscore)
print("\nNegative Score for each tweet :")
if (negscore==1):
negscore=0.9
else:
negscore=round(negscore,1)
print(negscore)
# We need the activation of our fuzzy membership functions at these values.
p_level_lo = fuzz.interp_membership(x_p, p_lo, posscore)
p_level_md = fuzz.interp_membership(x_p, p_md, posscore)
p_level_hi = fuzz.interp_membership(x_p, p_hi, posscore)
n_level_lo = fuzz.interp_membership(x_n, n_lo, negscore)
n_level_md = fuzz.interp_membership(x_n, n_md, negscore)
n_level_hi = fuzz.interp_membership(x_n, n_hi, negscore)
# Now we take our rules and apply them. Rule 1 concerns bad food OR nice.
# The OR operator means we take the maximum of these two.
active_rule1 = np.fmin(p_level_lo, n_level_lo)
active_rule2 = np.fmin(p_level_md, n_level_lo)
active_rule3 = np.fmin(p_level_hi, n_level_lo)
active_rule4 = np.fmin(p_level_lo, n_level_md)
active_rule5 = np.fmin(p_level_md, n_level_md)
active_rule6 = np.fmin(p_level_hi, n_level_md)
active_rule7 = np.fmin(p_level_lo, n_level_hi)
active_rule8 = np.fmin(p_level_md, n_level_hi)
active_rule9 = np.fmin(p_level_hi, n_level_hi)
# Now we apply this by clipping the top off the corresponding output
# membership function with `np.fmin`
n1=np.fmax(active_rule4,active_rule7)
n2=np.fmax(n1,active_rule8)
op_activation_lo = np.fmin(n2,op_Neg)
neu1=np.fmax(active_rule1,active_rule5)
neu2=np.fmax(neu1,active_rule9)
op_activation_md = np.fmin(neu2,op_Neu)
p1=np.fmax(active_rule2,active_rule3)
p2=np.fmax(p1,active_rule6)
op_activation_hi = np.fmin(p2,op_Pos)
op0 = np.zeros_like(x_op)
# Aggregate all three output membership functions together
aggregated = np.fmax(op_activation_lo,
np.fmax(op_activation_md, op_activation_hi))
# Calculate defuzzified result
op = fuzz.defuzz(x_op, aggregated, 'centroid')
output=round(op,2)
op_activation = fuzz.interp_membership(x_op, aggregated, op) # for plot
if 0<(output)<3.33: # R
print("\nOutput after Defuzzification: Negative")
sentiment.append("Negative")
sentiment_val.append('0')
elif 3.34<(output)<10:
print("\nOutput after Defuzzification: Positive")
sentiment.append("Positive")
sentiment_val.append('1')
print("Doc sentiment: " +str(senti[j])+"\n")
traceback is the following:
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
/var/folders/1c/pf8ljm0n5d7_w36ty_m7hyhw0000gn/T/ipykernel_1538/2987240111.py in <module>
151
152 # Calculate defuzzified result
--> 153 op = fuzz.defuzz(x_op, aggregated, 'centroid')
154 output=round(op,2)
155
~/opt/anaconda3/lib/python3.9/site-packages/skfuzzy/defuzzify/defuzz.py in defuzz(x, mfx, mode)
246 if 'centroid' in mode or 'bisector' in mode:
247 zero_truth_degree = mfx.sum() == 0 # Approximation of total area
--> 248 assert not zero_truth_degree, 'Total area is zero in defuzzification!'
249
250 if 'centroid' in mode:
AssertionError: Total area is zero in defuzzification!
This is the code:
import numpy as np
import pandas as pd
import statsmodels
from statsmodels.tsa.stattools import coint
# just set the seed for the random number generator
np.random.seed(107)
import matplotlib.pyplot as plt
Generate a fake security X and model it’s daily returns by drawing from a normal distribution. Then perform a cumulative sum to get the value of X on each day.
# Generate daily returns
Xreturns = np.random.normal(0, 1, 100)
# sum them and shift all the prices up
X = pd.Series(np.cumsum(
Xreturns), name='X') + 50
X.plot(figsize=(15,7))
plt.show()
Generate Y which has deep economic link to X, so price of Y should vary pretty similarly as X.
noise = np.random.normal(0, 1, 100)
Y = X + 5 + noise
Y.name = 'Y'
pd.concat([X, Y], axis=1).plot(figsize=(15,7))
plt.show()
Plot the ratio between the two:
(Y/X).plot(figsize=(15,7))
plt.axhline((Y/X).mean(), color='red', linestyle='--')
plt.xlabel('Time')
plt.legend(['Price Ratio', 'Mean'])
plt.show()
# compute the p-value of the cointegration test
# will inform us as to whether the ratio between the 2 timeseries is stationary
# around its mean
score, pvalue, _ = coint(X,Y)
print (pvalue)
ret1 = np.random.normal(1, 1, 100)
ret2 = np.random.normal(2, 1, 100)
s1 = pd.Series( np.cumsum(ret1), name='X')
s2 = pd.Series( np.cumsum(ret2), name='Y')
pd.concat([s1, s2], axis=1 ).plot(figsize=(15,7))
plt.show()
print 'Correlation: ' + str(X_diverging.corr(Y_diverging))
score, pvalue, _ = coint(X_diverging,Y_diverging)
print 'Cointegration test p-value: ' + str(pvalue)
Error Message:
File "", line 9
print 'Correlation: ' + str(X_diverging.corr(Y_diverging))
SyntaxError: invalid syntax
Y2 = pd.Series(np.random.normal(0, 1, 800), name='Y2') + 20
Y3 = Y2.copy()
Y3[0:100] = 30
Y3[100:200] = 10
Y3[200:300] = 30
Y3[300:400] = 10
Y3[400:500] = 30
Y3[500:600] = 10
Y3[600:700] = 30
Y3[700:800] = 10
Y2.plot(figsize=(15,7))
Y3.plot()
plt.ylim([0, 40])
plt.show()
# correlation is nearly zero
print 'Correlation: ' + str(Y2.corr(Y3))
score, pvalue, _ = coint(Y2,Y3)
print 'Cointegration test p-value: ' + str(pvalue)
Error message:File "", line 14
print 'Correlation: ' + str(Y2.corr(Y3))
SyntaxError: invalid syntax
def find_cointegrated_pairs(data):
n = data.shape[1]
score_matrix = np.zeros((n, n))
pvalue_matrix = np.ones((n, n))
keys = data.keys()
pairs = []
for i in range(n):
for j in range(i+1, n):
S1 = data[keys[i]]
S2 = data[keys[j]]
result = coint(S1, S2)
score = result[0]
pvalue = result[1]
score_matrix[i, j] = score
pvalue_matrix[i, j] = pvalue
if pvalue < 0.02:
pairs.append((keys[i], keys[j]))
return score_matrix, pvalue_matrix, pairs
pip install auquan-toolbox and execute following code snippet:
from backtester.dataSource.yahoo_data_source import YahooStockDataSource
from datetime import datetime
startDateStr = '2007/12/01'
endDateStr = '2017/12/01'
cachedFolderName = 'yahooData/'
dataSetId = 'testPairsTrading'
instrumentIds = ['SPY','AAPL','ADBE','SYMC','EBAY','MSFT','QCOM',
'HPQ','JNPR','AMD','IBM']
ds = YahooStockDataSource(cachedFolderName=cachedFolderName,
dataSetId=dataSetId,
instrumentIds=instrumentIds,
startDateStr=startDateStr,
endDateStr=endDateStr,
event='history')
data = ds.getBookDataByFeature()['Adj Close']
data.head(3)
Error message:File "C:\ProgramData\Anaconda3\lib\site-packages\backtester\dataSource\data_source_utils.py", line 25, in getCookieForYahoo
return cookie, crumb # return a tuple of crumb and cookie
UnboundLocalError: local variable 'crumb' referenced before assignment
Complete article with code can be found here.
Any help is much appreciated. Thank you.
On errors regarding the print function you miss parentheses.
Eg. this error:
Error Message: File "", line 9 print 'Correlation: ' + str(X_diverging.corr(Y_diverging)) SyntaxError: invalid syntax
is caused by the line 9:
print 'Correlation: ' + str(X_diverging.corr(Y_diverging))
which should be:
print('Correlation: ' + str(X_diverging.corr(Y_diverging)))
I am trying to use the fmin_l_bfgs function in python to maximize the log-likelihood function below:
def loglik(x0):
p = np.zeros((NCS,1)) #vector to hold the probabilities for each observation
data['v'] = (data.iloc[:, [3,4]]).dot(x0) #calculate determinstic utility
for i in range(NCS):
vv = data.v[(data.idcase == i + 1)]
vy = data.v[(data.idcase == i + 1) & (data.depvar == 1)]
p[i][0] = np.maximum(np.exp(vy)/ sum(np.exp(vv)),0.00000001)
#print("p", p)
ll = -sum(np.log(p)) #Negative since neg of ll is minimized
return ll
The input data being used is:
data = pd.read_csv("drive/My Drive/example_data.csv") #read data
data.iloc[:, [3,4]] = data.iloc[:, [3,4]]/100 #scale costs
B = np.zeros((1,2)) #give starting values of beta; 1xK vector; 2alternatives so 1x2 vector
NCS = data['idcase'].nunique() # number of choice situations in the dataset
x0 = B.T
estimation
optim2 = fmin_l_bfgs_b(loglik, x0, fprime=None, args=(), approx_grad=0, bounds=None, m=10, factr=10000000.0, pgtol=1e-05, epsilon=1e-08,iprint=0, maxfun=15000, maxiter=15000, disp=None, callback=None)
However, I keep getting this:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-77-2821f2269a8c> in <module>()
83 print('which is the same as maximizing the log-likelihood.')
84
---> 85 optim2 = fmin_l_bfgs_b(loglik, x0, fprime=None, args=(), approx_grad=0, bounds=None, m=10, factr=10000000.0, pgtol=1e-05, epsilon=1e-08, iprint=0, maxfun=15000, maxiter=15000, disp=None, callback=None)
86
87 print(optim2)
4 frames
/usr/local/lib/python3.6/dist-packages/scipy/optimize/optimize.py in __call__(self, x, *args)
64 self.x = numpy.asarray(x).copy()
65 fg = self.fun(x, *args)
---> 66 self.jac = fg[1]
67 return fg[0]
68
IndexError: index 1 is out of bounds for axis 0 with size 1#
Can someone kindly advise me as to what to do? I am quite new in using numerical optimization methods.
Thanks
I am trying to learn PyMC3, I want to make a simple mixture of gaussians example. I found this example and want to convert it to pymc3 but I'm currently getting an error when trying to plot the traceplot.
n1 = 500
n2 = 200
n = n1+n2
mean1 = 21.8
mean2 = 42.0
precision = 0.1
sigma = np.sqrt(1 / precision)
# precision = 1/sigma^2
print "sigma1: %s" % sigma1
print "sigma2: %s" % sigma2
data1 = np.random.normal(mean1,sigma,n1)
data2 = np.random.normal(mean2,sigma,n2)
data = np.concatenate([data1 , data2])
#np.random.shuffle(data)
fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(111, xlabel='x', ylabel='y', title='mixture of 2 guassians')
ax.plot(range(0,n1+n2), data, 'x', label='data')
plt.legend(loc=0)
with pm.Model() as model:
#priors
p = pm.Uniform( "p", 0 , 1) #this is the fraction that come from mean1 vs mean2
ber = pm.Bernoulli( "ber", p = p) # produces 1 with proportion p.
precision = pm.Gamma('precision', alpha=0.1, beta=0.1)
mean1 = pm.Normal( "mean1", 0, 0.01 ) #better to use normals versus Uniforms (unless you are certain the value is truncated at 0 and 200
mean2 = pm.Normal( "mean2", 0, 0.01 )
mean = pm.Deterministic('mean', ber*mean1 + (1-ber)*mean2)
process = pm.Normal('process', mu=mean, tau=precision, observed=data)
# inference
step = pm.Metropolis()
trace = pm.sample(10000, step)
pm.traceplot(trace)
Error:
sigma1: 3.16227766017
sigma2: 1.69030850946
[-----------------100%-----------------] 10000 of 10000 complete in 4.4 sec
---------------------------------------------------------------------------
LinAlgError Traceback (most recent call last)
<ipython-input-10-eb728824de83> in <module>()
44 step = pm.Metropolis()
45 trace = pm.sample(10000, step)
---> 46 pm.traceplot(trace)
/usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc in traceplot(trace, vars, figsize, lines, combined, grid)
70 ax[i, 0].set_xlim(mind - .5, maxd + .5)
71 else:
---> 72 kdeplot_op(ax[i, 0], d)
73 ax[i, 0].set_title(str(v))
74 ax[i, 0].grid(grid)
/usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc in kdeplot_op(ax, data)
94 for i in range(data.shape[1]):
95 d = data[:, i]
---> 96 density = kde.gaussian_kde(d)
97 l = np.min(d)
98 u = np.max(d)
/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in __init__(self, dataset, bw_method)
186
187 self.d, self.n = self.dataset.shape
--> 188 self.set_bandwidth(bw_method=bw_method)
189
190 def evaluate(self, points):
/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in set_bandwidth(self, bw_method)
496 raise ValueError(msg)
497
--> 498 self._compute_covariance()
499
500 def _compute_covariance(self):
/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in _compute_covariance(self)
507 self._data_covariance = atleast_2d(np.cov(self.dataset, rowvar=1,
508 bias=False))
--> 509 self._data_inv_cov = linalg.inv(self._data_covariance)
510
511 self.covariance = self._data_covariance * self.factor**2
/usr/lib64/python2.7/site-packages/scipy/linalg/basic.pyc in inv(a, overwrite_a, check_finite)
381 inv_a, info = getri(lu, piv, lwork=lwork, overwrite_lu=1)
382 if info > 0:
--> 383 raise LinAlgError("singular matrix")
384 if info < 0:
385 raise ValueError('illegal value in %d-th argument of internal '
LinAlgError: singular matrix
Thanks to Fonnesbeck for answering this on the github issue tracker:
https://github.com/pymc-devs/pymc3/issues/452
here is the updated code:
with pm.Model() as model:
#priors
p = pm.Uniform( "p", 0 , 1) #this is the fraction that come from mean1 vs mean2
ber = pm.Bernoulli( "ber", p = p, shape=len(data)) # produces 1 with proportion p.
sigma = pm.Uniform('sigma', 0, 100)
precision = sigma**-2
mean = pm.Normal( "mean", 0, 0.01, shape=2 )
mu = pm.Deterministic('mu', mean[ber])
process = pm.Normal('process', mu=mu, tau=precision, observed=data)
with model:
step1 = pm.Metropolis([p, sigma, mean])
step2 = pm.BinaryMetropolis([ber])
trace = pm.sample(10000, [step1, step2])
You need to use BinaryMetropolis when inferring a Bernoulli random variable
And an even simpler and quicker version is as follows:
with pm.Model() as model2:
p = pm.Beta( "p", 1., 1.)
means = pm.Uniform('mean', 15, 60, shape=2)
sigma = pm.Uniform('sigma', 0, 20, testval=5)
process = pm.NormalMixture('obs', tt.stack([p, 1-p]), means, sd=sigma, observed=data)
with model2:
step = pm.Metropolis()
trace = pm.sample(10000, step=step)
I know this issue is old, but I am trying differente examples of PyMC3 usages to get used to modeling in PyMC3. The answer as given above does not work in current version 1.0 of PyMC3 (It does not distringuish the two means correctly). The minimum changes I had to do in order to make it work were the following:
1)
# mean = pm.Normal("mean", 0, 0.01, shape=2 )
mean = pm.Uniform('mean', 15, 60, shape=2)
2)
# step2 = pm.BinaryMetropolis([ber])
step2 = pm.ElemwiseCategorical(vars=[ber], values=[0, 1])
Just in case anybody else is having a similar problem.