I am trying to run KNeighborsClassifer for multiple "k" as follows, but get an error on line where I am doing the "predictions". When I run the same code with k = 1, even multiple times, it works fine. There must be something I don't understand about the sklearn objects here. What is the problem with the code here? Thank you.
My code:
accuracy = []
f1score = []
predictions = []
for n in range(10):
vectorizer = CountVectorizer()
output = vectorizer.fit_transform(train_data)
output_dev = vectorizer.transform(dev_data)
neighbor = KNeighborsClassifier(n_neighbors = n)
neighbor.fit(output, train_labels)
predictions = neighbor.predict(output_dev)
accuracy.append(round(sum(predictions == dev_labels) * 1.0 / len(predictions), 2))
f1score.append(round(metrics.f1_score(dev_labels, predictions), 2))
print accuracy
print f1score
Error message:
UnboundLocalError Traceback (most recent call last)
<ipython-input-99-c3eaa2d9dd70> in <module>()
17 print f1score
18
---> 19 P3()
<ipython-input-99-c3eaa2d9dd70> in P3()
10 neighbor = KNeighborsClassifier(n_neighbors = n)
11 neighbor.fit(output, train_labels)
---> 12 predictions = neighbor.predict(output_dev)
13
14 accuracy.append(round(sum(predictions == dev_labels) * 1.0 / len(predictions), 2))
/Library/Python/2.7/site-packages/sklearn/neighbors/classification.pyc in predict(self, X)
160 for k, classes_k in enumerate(classes_):
161 if weights is None:
--> 162 mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
163 else:
164 mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1)
/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/scipy/stats/stats.pyc in mode(a, axis)
658 oldcounts = np.maximum(counts, oldcounts)
659 oldmostfreq = mostfrequent
--> 660 return mostfrequent, oldcounts
661
662 def mask_to_limits(a, limits, inclusive):
UnboundLocalError: local variable 'mostfrequent' referenced before assignment
Related
I am receiving an error whilst looping through the batch
The error: expected Tensor as element 0 in argument 0, but got builtin_function_or_method
I followed a tutorial on Udemy, but I am not sure what the issue is.
The Code. As mentioned, I followed a tutorial on Udemy, but I cannot seem to find the mistake:
def sample(self, batch_size):
assert self.can_sample(batch_size)
batch = random.sample(self.memory, batch_size)
batch = zip(*batch)
return [torch.cat(items) for items in batch]
def can_sample(self, batch_size):
return len(self.memory) >= batch_size * 10
def deep_sarsa(q_network, policy, episodes, alpha = 0.001, batch_size = 32, gamma = 0.99, epsilon = 0.05):
optim = AdamW(q_network.parameters(), lr = alpha)
memory = ReplayMemory(capacity = 1e6)
stats = {'MSE Loss': [], 'Returns': []}
for episodes in tqdm(range(1, episodes + 1)):
state = env.reset()
ep_return = 0.
done = False
while not done:
action = policy(state, epsilon)
next_state, rewards, done, _ = env.step(action)
memory.insert([state, action, rewards, done, next_state])
if (memory.can_sample(batch_size)):
state_b, action_b, reward_b, done_b, next__state_b = memory.sample(batch_size)
qsa_b = q_network(state_b).gather(1, action_b)
next_action_b = policy(next_state_b, epsilon)
next_qsa_b = target_q_network(next_state_b).gather(1, next_action_b)
target_b = reward_b + -done_b * gamma * next_qsa_b
loss = F.mse_loss(qsa_b, target_b)
q_network.zero_grad() # eliminate the gradient that was computed - used to calculate new one
loss.backward()#kick off the backprop.
optim.step()
stats['MSE Loss'].append(loss.item())
state = next_state
# print(rewards)
ep_return += rewards().item()
stats['Returns'].append(ep_return)
if(episodes % 10 == 0):
target_q_network.load_state_dict(q_network.state.dict())
return stats
The error
TypeError Traceback (most recent call last)
Cell In[52], line 1
----> 1 stats = deep_sarsa(q_network, policy, 2000, epsilon = 0.01)
Cell In[51], line 24, in deep_sarsa(q_network, policy, episodes, alpha, batch_size, gamma, epsilon)
21 memory.insert([state, action, rewards, done, next_state])
23 if (memory.can_sample(batch_size)):
---> 24 state_b, action_b, reward_b, done_b, next__state_b = memory.sample(batch_size)
26 qsa_b = q_network(state_b).gather(1, action_b)
27 next_action_b = policy(next_state_b, epsilon)
Cell In[50], line 46, in ReplayMemory.sample(self, batch_size)
38 batch = zip(*batch)
41 #batch = torch.tensor(batch, dtype=torch.int8)
42 #torch.cat() -> concatinate the elements in a single tensor.
---> 46 return [torch.cat(items) for items in batch]
Cell In[50], line 46, in <listcomp>(.0)
38 batch = zip(*batch)
41 #batch = torch.tensor(batch, dtype=torch.int8)
42 #torch.cat() -> concatinate the elements in a single tensor.
---> 46 return [torch.cat(items) for items in batch]
TypeError: expected Tensor as element 0 in argument 0, but got builtin_function_or_method
I'm trying to code for undersampling negative class data (majority class in imbalanced dataset) by only keeping the support vectors. Thus in the process I need to rebuild the dataset by removing some of the not needed data granular support vector machine repetitive undersampling. I'm trying to rebuild the dataset in the def rebuild function.
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
class gsvm(object):
def __init__(self, C = 100,
T = 10,class_weight = 'balanced',
degree = 3,gamma='auto', kernel='rbf'
):
self.C = C
self.T = T
self.class_weight = class_weight
self.degree = degree
self.gamma = gamma
self.kernel = kernel
self.allSVC = SVC(C = self.C, class_weight=self.class_weight,
degree=self.degree, gamma = self.gamma,
kernel= self.kernel)
def rebuild(self, xTrain, yTrain, sv, xNLSV): # rebuild SVC
xNew = []
yNew = []
count = 0
for i in range(0, len(yTrain)):
if yTrain.iloc[i] == 1:
xNew.append(xTrain.iloc[i])
yNew.append(yTrain.iloc[i])
else:
if i not in sv:
xNew.append(xTrain.iloc[i])
yNew.append(yTrain.iloc[i])
count += 1
else:
xNLSV.append(xTrain.iloc[i])
return xNew, yNew, xNLSV, count
def fit(self, x, y):
#
xPos = []
xNeg = []
xTrain = []
yTrain = []
xlastTrain = []
ylastTrain = []
for i in range(0, len(y)):
if y.iloc[i] == 1:
xPos.append(x.iloc[i])
xlastTrain.append(x.iloc[i])
ylastTrain.append(y.iloc[i])
xTrain.append(x.iloc[i])
yTrain.append(y.iloc[i])
else:
xNeg.append(x.iloc[i])
xTrain.append(x.iloc[i])
yTrain.append(y.iloc[i])
xNLSV = []
iterRecord = 0
for i in range(0, self.T):
svc = SVC(C = self.C, class_weight=self.class_weight,
degree=self.degree, gamma = self.gamma,
kernel= self.kernel)
print (iterRecord)
iterRecord += 1
svc.fit(xTrain, yTrain)
sv = svc.support_ # This is support vector
xTrain, yTrain, xNLSV, lastMar = self.rebuild(xTrain, yTrain, sv, xNLSV) # rebuild sample
print (lastMar)
if lastMar < 0.1 * len(xPos):
break
for i in xNLSV:
xlastTrain.append(i)
ylastTrain.append(0)
self.allSVC.fit(xlastTrain, ylastTrain)
def predict(self, x):
return self.allSVC.predict(x)
The problem is, all of the data I used are formated as dataframe but when I tried to rebuild the sample, after fitting the class into svm, this line:
xTrain, yTrain, xNLSV, lastMar = self.rebuild(xTrain, yTrain, sv, xNLSV) # rebuild sample
I got the error below:
AttributeError Traceback (most recent call last)
Input In [15], in <cell line: 1>()
----> 1 gsvm().fit(X_train1,y_train1)
Input In [14], in gsvm.fit(self, x, y)
66 svc.fit(xTrain, yTrain)
67 sv = svc.support_ # This is support vector
---> 68 xTrain, yTrain, xNLSV, lastMar = self.rebuild(xTrain, yTrain, sv, xNLSV) # rebuild sample
69 print (lastMar)
70 if lastMar < 0.1 * len(xPos):
Input In [14], in gsvm.rebuild(self, xTrain, yTrain, sv, xNLSV)
24 count = 0
25 for i in range(0, len(yTrain)):
---> 26 if yTrain.iloc[i] == 1:
27 xNew.append(xTrain.iloc[i])
28 yNew.append(yTrain.iloc[i])
AttributeError: 'list' object has no attribute 'iloc'
Where do you think my dataset change from dataframe into list?
I'm having problems using functional api for estimating by maximizing
First I minimize the error vector by maximizing the probability layer loss, and then I want to use the mean vector layer to rank xc_hat similar embeddings.
The code is as follows:
import random as rdn
import tensorflow.compat.v2 as tf
tf.enable_v2_behavior()
import tensorflow_probability as tfp
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
tfd = tfp.distributions
n_observations = 2000
n_features = 5
d_dim = 3
lr = 0.005
# Generate toy data
def make_relations(x_tr, y_tr, c_tr):
# Generate input data centers being labels (xc) for two random
# in-cluster (xa, xb)
xa = []
xc = []
xb = []
for l in y_tr:
kone = [k for k, lab in zip(x_tr, y_tr) if lab==l]
if len(kone) < 3:
continue
for i, x in enumerate(kone):
if np.isclose(x, c_tr[l]).all():
continue
kone_minus_x = kone.copy()
kone_minus_x.pop(i)
print
xa.append(x)
xc.append(c_tr[l])
xb.append(rdn.choice(kone_minus_x))
return np.vstack(xa), np.vstack(xb), np.vstack(xc)
X, Y, C = make_blobs(n_samples=n_observations,
n_features=n_features,
centers=int(n_observations*0.2),
return_centers=True)
x_a, x_b, x_c = make_relations(X, Y, C)
Xa_train, Xa_test = train_test_split(x_a, test_size=.4)
Xb_train, Xb_test = train_test_split(x_b, test_size=.4)
Xc_train, Xc_test = train_test_split(x_c, test_size=.4)
Xa_train = Xa_train[np.newaxis]
Xb_train = Xb_train[np.newaxis]
Xc_train = Xc_train[np.newaxis]
Xa_test = Xa_test[np.newaxis]
Xb_test = Xb_test[np.newaxis]
Xc_test = Xc_test[np.newaxis]
neg_log_likelihood = lambda y, rv_y: -rv_y.log_prob(y)
ones_train = tf.keras.backend.ones((1, Xc_train.shape[1], d_dim)).numpy()
ones_test = tf.keras.backend.ones((1, Xc_test.shape[1], d_dim)).numpy()
# Build model.
xa_xb = tf.keras.layers.Input(shape=(None, n_features), name='Xa-Xb')
L_xa_xb = tf.keras.layers.Dense(d_dim, activation='sigmoid', name='L_Xa-Xb')(xa_xb)
xb = tf.keras.layers.Input(shape=(None, n_features), name='Xb')
L_xb = tf.keras.layers.Dense(d_dim, activation='sigmoid', name='L_Xb')(xb)
mu = tf.keras.layers.Add(name='mean_vector')([L_xa_xb, L_xb])
xc = tf.keras.layers.Input(shape=(None, n_features), name='Xc')
L_xc = tf.keras.layers.Dense(d_dim, name='L_Xc')(xc)
error_vector = tf.keras.layers.Subtract(name='error_vector')([L_xc, mu])
p_xc_given_xa_xb = tfp.layers.DistributionLambda(
lambda t: tfd.Normal(loc=t, scale=tf.exp(t)), name='Gaussian')(error_vector)
model = tf.keras.Model(inputs=[xa_xb, xb, xc],
outputs=p_xc_given_xa_xb, name="inner_model")
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=lr),
loss=neg_log_likelihood)
model.fit([Xa_train - Xb_train, Xb_train, Xc_train], ones_train,
validation_data=([Xa_test - Xb_test, Xb_test, Xc_test], ones_test),
epochs=1000,
verbose=True)
# After trained rebuild the part of the model I will use for prediction
xa_xb = model.get_layer('Xa-Xb')
L_xa_xb = model.get_layer('L_Xa-Xb')(xa_xb)
xb = model.get_layer('Xb')
L_xb = model.get_layer('L_Xb')(xb)
xc = model.get_layer('mean_vector')([L_xa_xb, L_xb])
model = tf.keras.Model(inputs=[xa_xb, xb],
outputs=xc, name="inner_model")
xc_hat = model([Xa_test - Xb_test, Xb_test])
The idea is to estimate xc However, I have the following error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-57-d94b1e8a583c> in <module>()
2
3 xa_xb = model.get_layer('Xa-Xb')
----> 4 L_xa_xb = model.get_layer('L_Xa-Xb')(xa_xb)
5
6 xb = model.get_layer('Xb')
1 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
966 with base_layer_utils.autocast_context_manager(
967 self._compute_dtype):
--> 968 outputs = self.call(cast_inputs, *args, **kwargs)
969 self._handle_activity_regularization(inputs, outputs)
970 self._set_mask_metadata(inputs, outputs, input_masks)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/core.py in call(self, inputs)
1178
1179 def call(self, inputs):
-> 1180 rank = inputs.shape.rank
1181 if rank is not None and rank > 2:
1182 # Broadcasting is required for the inputs.
AttributeError: 'InputLayer' object has no attribute 'shape'
Im usign Google Colaboratory
enter image description here
between the codes there are the following codes:
def regTreeEval(model, inDat):
if model is not None:
return float(model)
def modelTreeEval(model, inDat):
n = shape(inDat)[1]
X = mat(ones((1, n+1)))
X[:, 1: n+1] = inDat
# print X, model
xmodel = X*model
if xmodel is not None:
return float(X * model)
<pre><code>def modelErr(dataSet):
ws, X, Y = linearSolve(dataSet)
yHat = X * ws
# print corrcoef(yHat, Y, rowvar=0)
return sum(power(Y - yHat, 2))
def linearSolve(dataSet):
m,n = shape(dataSet)
X = mat(ones((m,n)))
Y = mat(ones((m,1)))
X[:,1:n] = dataSet[:,0:n-1]
Y = dataSet[:,-1]
xTx = X.T*X
if linalg.det(xTx) ==0.0:
raiseNameError("This matrix is singular, cannot do inverse")
ws = X.T*X.I*(X.T*Y)
return ws,X,Y` def modelErr(dataSet):
ws, X, Y = linearSolve(dataSet)
yHat = X * ws
# print corrcoef(yHat, Y, rowvar=0)
return sum(power(Y - yHat, 2))
def linearSolve(dataSet):
m,n = shape(dataSet)
X = mat(ones((m,n)))
Y = mat(ones((m,1)))
X[:,1:n] = dataSet[:,0:n-1]
Y = dataSet[:,-1]
xTx = X.T*X
if linalg.det(xTx) ==0.0:
raiseNameError("This matrix is singular, cannot do inverse")
ws = X.T*X.I*(X.T*Y)
return ws,X,Y
<pre><code>def modelTreeEval(model,inDat):
n=shape(inDat)[1]
X=mat(ones((1,n+1)))
X[:,1:n+1] = inDat
print('X',X,'\n','model',model)
return float(X*model)
---------------------------------------------------------------------------
<pre><code>
TypeError Traceback (most recent call last)
<ipython-input-50-c482daea3463> in <module>()
116 myDat = loadDataSet('F:/data4.txt')
117 myMat = mat(myDat)
--> 118 myTree = createTree(myMat, modelLeaf, modelErr)
119 print(myTree)
120
<ipython-input-42-60c716a5beb4> in createTree(dataSet, leafType, errType, ops)
12 """
13 #
---> 14 feat,val = chooseBestSplit(dataSet,leafType,errType,ops)
15 #
16 '''
<ipython-input-41-4305f5a748bb> in chooseBestSplit(dataSet, leafType, errType, ops)
76 s = set(dataSet[:,-1].T.tolist()[0])
77 if len(s) ==1:
---> 78 return None,leafType(dataSet)
79 m,n = shape(dataSet)
80 #
<ipython-input-43-6a297b0e048d> in modelLeaf(dataSet)
9 ###
10 """
---> 11 ws,X,Y = linearSolve(dataSet)
12 return ws
13
<ipython-input-43-6a297b0e048d> in linearSolve(dataSet)
42 X = mat(ones((m,n)))
43 Y = mat(ones((m,1)))
---> 44 X[:,1:n] = dataSet[:,0:n-1]
45 Y = dataSet[:,-1]
46 xTx = X.T*X
TypeError: float() argument must be a string or a number, not 'map'
I am converting from Python2.7 numba code into Python3.4. This function pairwise_distance converts the distance matrix from a multidimensional array X and Y.
However, I use a numba decorator #jit to speed up the code:
import numpy as np
from numba import double
from numba.decorators import jit
#jit(arg_types = [double[:,:], double[:,:]])
def pairwise_distance(X, D):
M = X.shape[0]
N = X.shape[1]
for i in range(M):
for j in range(M):
d = 0.0
for k in range(N):
tmp = X[i, k] - X[j, k]
d += tmp * tmp
D[i, j] = np.sqrt(d)
# calculate the pairwise distance between X and Y
X = np.random.random((1000, 3))
Y = np.empty((1000, 1000))
pairwise_distance(X, Y)
This outputs the following error:
KeyError: "Does not support option: 'arg_types'"
I am not entirely sure what this error means, or how one translates this from Python2.7 to be compatible with Python3.4
This is the error in full:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/numba/targets/options.py in from_dict(self, dic)
15 try:
---> 16 ctor = self.OPTIONS[k]
17 except KeyError:
KeyError: 'arg_types'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-15-2c486d04f659> in <module>()
19 X = np.random.random((1000, 3))
20 Y = np.empty((1000, 1000))
---> 21 pairwise_numba(X, Y)
/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/numba/dispatcher.py in _compile_for_args(self, *args, **kws)
286 else:
287 real_args.append(self.typeof_pyval(a))
--> 288 return self.compile(tuple(real_args))
289
290 def inspect_llvm(self, signature=None):
/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/numba/dispatcher.py in compile(self, sig)
504
505 self._cache_misses[sig] += 1
--> 506 cres = self._compiler.compile(args, return_type)
507 self.add_overload(cres)
508 self._cache.save_overload(sig, cres)
/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/numba/dispatcher.py in compile(self, args, return_type)
76 def compile(self, args, return_type):
77 flags = compiler.Flags()
---> 78 self.targetdescr.options.parse_as_flags(flags, self.targetoptions)
79
80 impl = self._get_implementation(args, {})
/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/numba/targets/options.py in parse_as_flags(cls, flags, options)
24 def parse_as_flags(cls, flags, options):
25 opt = cls()
---> 26 opt.from_dict(options)
27 opt.set_flags(flags)
28 return flags
/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/numba/targets/options.py in from_dict(self, dic)
17 except KeyError:
18 fmt = "Does not support option: '%s'"
---> 19 raise KeyError(fmt % k)
20 else:
21 self.values[k] = ctor(v)
KeyError: "Does not support option: 'arg_types'"
When I use argtypes instead of arg_types, instead of getting a KeyError, I get a deprecation warning saying to use signature instead.
The following worked for me using python 3.5 and numba 0.25.0
import numpy as np
from numba import jit
#jit('void(double[:,:], double[:,:])')
def pairwise_distance(X, D):
M = X.shape[0]
N = X.shape[1]
for i in range(M):
for j in range(M):
d = 0.0
for k in range(N):
tmp = X[i, k] - X[j, k]
d += tmp * tmp
D[i, j] = np.sqrt(d)
# calculate the pairwise distance between X and Y
X = np.random.random((1000, 3))
Y = np.empty((1000, 1000))
pairwise_distance(X, Y)