Scikit-Multiflow - Cannot take a larger sample than population when 'replace'=False - python

So I was trying to run the following code, where x is a feature vector with dimensions (2381,) and y is a label with dimension (1,) after being cast to a Numpy array.
from skmultiflow.meta import AdaptiveRandomForestClassifier
import numpy as np
import data
np.random.seed(1)
def main() -> None:
dataset = data.get_full_dataset()
metadata = data.get_metadata()
training_batch = data.get_windows(dataset, metadata, data.get_initial_training_groups())
streaming_batch = data.get_windows(dataset, metadata, data.get_incremental_learning_groups())
initial_features = np.concatenate([dataset.feature_vectors for group, dataset in training_batch])
initial_labels = np.concatenate([dataset.labels for group, dataset in training_batch])
model = AdaptiveRandomForestClassifier()
correct_count = 0
n_samples = 0
for x, y in zip(initial_features, initial_labels):
y = np.asarray([y])
y_prediction = model.predict(x)
if y_prediction[0] == y:
correct_count += 1
model.partial_fit(x, y)
n_samples += 1
print(f"Accuracy: {correct_count / n_samples}")
if __name__ == "__main__":
main()
However, I am yielding the following error:
Traceback (most recent call last):
File "/home/nathan/Documents/Research/BodmasOnline/main.py", line 31, in <module>
main()
File "/home/nathan/Documents/Research/BodmasOnline/main.py", line 24, in main
model.partial_fit(x, y)
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/meta/adaptive_random_forests.py", line 313, in partial_fit
self._partial_fit(X[i], y[i], self.classes, weight[i])
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/meta/adaptive_random_forests.py", line 328, in _partial_fit
self.ensemble[i].partial_fit(np.asarray([X]), np.asarray([y]),
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/meta/adaptive_random_forests.py", line 569, in partial_fit
self.classifier.partial_fit(X, y, classes=classes, sample_weight=sample_weight)
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/trees/hoeffding_tree.py", line 394, in partial_fit
self._partial_fit(X[i], y[i], sample_weight[i])
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/trees/hoeffding_tree.py", line 424, in _partial_fit
learning_node.learn_from_instance(X, y, sample_weight, self)
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/trees/nodes/random_learning_node_nb_adaptive.py", line 54, in learn_from_instance
super().learn_from_instance(X, y, weight, ht)
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/trees/nodes/random_learning_node_classification.py", line 58, in learn_from_instance
self.list_attributes = self._sample_features(get_dimensions(X)[1])
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/trees/nodes/random_learning_node_classification.py", line 72, in _sample_features
return self.random_state.choice(
File "mtrand.pyx", line 965, in numpy.random.mtrand.RandomState.choice
ValueError: Cannot take a larger sample than population when 'replace=False'
Can anyone help me out?

Going to answer my own question, since scikit-multiflow does not necessarily have the best documentation. The feature vector x has to have dimensions (1, n), which in this case yields (1, 2381). This can be achieved programmatically as follows:
from skmultiflow.meta import AdaptiveRandomForestClassifier
import numpy as np
import data
np.random.seed(1)
def main() -> None:
dataset = data.get_full_dataset()
metadata = data.get_metadata()
training_batch = data.get_windows(dataset, metadata, data.get_initial_training_groups())
streaming_batch = data.get_windows(dataset, metadata, data.get_incremental_learning_groups())
initial_features = np.concatenate([dataset.feature_vectors for group, dataset in training_batch])
initial_labels = np.concatenate([dataset.labels for group, dataset in training_batch])
model = AdaptiveRandomForestClassifier()
correct_count = 0
n_samples = 0
for x, y in zip(initial_features, initial_labels):
x = np.expand_dims(x, axis=0)
y = np.asarray([y])
y_prediction = model.predict(x)
if y_prediction[0] == y:
correct_count += 1
model.partial_fit(x, y)
n_samples += 1
print(f"Accuracy: {correct_count / n_samples}")
if __name__ == "__main__":
main()

Related

ValueError: Variable <tf.Variable 'TensorGraph/base_params/trainable_float32_1:0' shape=(1,) dtype=float32> has `None` for gradient

I’m trying to implement a prediction of low birth rate using nengo and tensorflow with a SNN model.\
But, I got the following Value error (in Anaconda):
Traceback (most recent call last):
File "C:\Users\USER\NengoPRJ\nengo_lowbirth.py", line 95, in <module>
sim.fit(train_data, {out_p: train_labels}, epochs=epochs)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\nengo\utils\magic.py", line 181, in __call__
return self.wrapper(self.__wrapped__, self.instance, args, kwargs)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\nengo_dl\simulator.py", line 66, in require_open
return wrapped(*args, **kwargs)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\nengo_dl\simulator.py", line 869, in fit
"fit", x=x, y=y, n_steps=n_steps, stateful=stateful, **kwargs
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\nengo\utils\magic.py", line 181, in __call__
return self.wrapper(self.__wrapped__, self.instance, args, kwargs)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\nengo_dl\simulator.py", line 50, in with_self
output = wrapped(*args, **kwargs)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\nengo_dl\simulator.py", line 1032, in _call_keras
outputs = getattr(self.keras_model, func_type)(**func_args)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 819, in fit
use_multiprocessing=use_multiprocessing)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\engine\training_arrays.py", line 680, in fit
steps_name='steps_per_epoch')
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\engine\training_arrays.py", line 189, in model_iteration
f = _make_execution_function(model, mode)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\engine\training_arrays.py", line 571, in _make_execution_function
return model._make_execution_function(mode)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 2125, in _make_execution_function
self._make_train_function()
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 2057, in _make_train_function
params=self._collected_trainable_weights, loss=self.total_loss)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 503, in get_updates
grads = self.get_gradients(loss, params)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 397, in get_gradients
"K.argmax, K.round, K.eval.".format(param))
ValueError: Variable <tf.Variable 'TensorGraph/base_params/trainable_float32_1:0' shape=(1,) dtype=float32> has `None` for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.
(in Google Colab):
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-205839bf5640> in <module>()
96 loss={out_p: tf.losses.SparseCategoricalCrossentropy(from_logits=True)})
97
---> 98 sim.fit(train_data, {out_p: train_labels}, epochs=epochs)
99
100
13 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py in get_gradients(self, loss, params)
467 "gradient defined (i.e. are differentiable). "
468 "Common ops without gradient: "
--> 469 "K.argmax, K.round, K.eval.".format(param))
470 grads = self._clip_gradients(grads)
471 return grads
ValueError: Variable <tf.Variable 'TensorGraph/base_params/trainable_float32_1:0' shape=(1,) dtype=float32> has `None` for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.
I found many solution in github and stackoverflow.
https://github.com/tensorflow/tensorflow/issues/1511
https://github.com/huggingface/transformers/issues/5427
But it couldn't resolve my error.
My code are as follows:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import random
import nengo
import nengo_dl
import requests
seed = 1
amp =1
max_rates = 100
intercepts = 0
tau_rc = 0.02
noise_filter = 0.1 #noise_filter
train_data_rate = 0.85
learning_rate = 0.001
epochs = 5
np.random.seed(seed)
do_train = True
url = "https://github.com/nfmcclure/tensorflow_cookbook/raw/master/01_Introduction/07_Working_with_Data_Sources/birthweight_data/birthweight.dat"
birth_file = requests.get(url)
birth_all_data = birth_file.text.split('\r\n')
birth_header = [x for x in birth_all_data[0].split('\t') if len(x)>=1]
birth_data = [[float(x) for x in y.split('\t') if len(x)>=1] for y in birth_all_data[1:] if len(y)>=1]
data_size = len(birth_data)
x_data = np.array([x[1:8] for x in birth_data])
y_data = np.array([y[0] for y in birth_data])
train_samples = round(data_size*train_data_rate)
train_indices = np.random.choice(data_size, train_samples, replace=False)
testset = set(range(data_size)) - set(train_indices)
test_indices = np.array(list(testset))
x_train = x_data[train_indices]
y_train = np.transpose([y_data[train_indices]])
x_test = x_data[test_indices]
y_test = np.transpose([y_data[test_indices]])
def normalize_cols(m):
col_max = m.max(axis=0)
col_min = m.min(axis=0)
return (m - col_min) / (col_max - col_min)
x_train = np.nan_to_num(normalize_cols(x_train))
x_test = np.nan_to_num(normalize_cols(x_test))
##################################################
nfeatures = 7
#minibatch_size = 189 - train_samples
minibatch_size=1
with nengo.Network(seed=seed) as net:
net.config[nengo.Ensemble].max_rates = nengo.dists.Choice([max_rates])
net.config[nengo.Ensemble].intercepts = nengo.dists.Choice([intercepts])
neuron_type=nengo.LIF(amplitude=amp, tau_rc=tau_rc)
nengo_dl.configure_settings(stateful=False)
inp = nengo.Node([0] * nfeatures)
ens = nengo.Ensemble(1, 1, neuron_type=neuron_type)
x = nengo.Connection(inp, ens.neurons, transform=nengo_dl.dists.Glorot(), synapse=None)
inp_p = nengo.Probe(inp)
out_p = nengo.Probe(x, label="out_p")
out_p_filt = nengo.Probe(x, synapse=noise_filter, label="out_p_filt")
sim = nengo_dl.Simulator(net, minibatch_size=minibatch_size)
n_steps=20
train_data = np.reshape(x_train, (x_train.shape[0], 1, nfeatures))
train_labels = np.reshape(y_train, (y_train.shape[0], 1, 1))
test_data = np.tile(np.reshape(x_test, (x_test.shape[0], 1, nfeatures)), (1, n_steps, 1))
test_labels = np.tile(np.reshape(y_test, (y_test.shape[0], 1, 1)), (1, n_steps, 1))
def accuracy(outputs, targets):
return 100 * tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.sigmoid(outputs)), targets), tf.float32))
sim.compile(loss={out_p_filt: accuracy})
print("accuracy before training:", sim.evaluate(test_data, {out_p_filt: test_labels}, verbose=0)["loss"])
do_training = do_train;
if do_training:
sim.compile(optimizer=tf.optimizers.Adam(learning_rate=learning_rate),
loss={out_p: tf.losses.SparseCategoricalCrossentropy(from_logits=True)})
sim.fit(train_data, {out_p: train_labels}, epochs=epochs)
System:
python:3.7.7
tensorflow:2.2.0 and 2.1.0
nengo:3.0.0
nengo-dl:3.2.0
How can I solve this problem?
Thanks in advance.

Writing training model for CNN

I am writing the training code for TwoStream-IQA which is a two-stream convolutional neural network. This model predicts the quality score for the patches being assessed through two streams of the network. In the training below, I have used test dataset provided in the GitHub link above.
The training code is as below:
import os
import time
import numpy as np
import argparse
import chainer
chainer.global_config.train=True
from chainer import cuda
from chainer import serializers
from chainer import optimizers
from chainer import iterators
from chainer import training
from chainer.training import extensions
from PIL import Image
from sklearn.feature_extraction.image import extract_patches
from model import Model
parser = argparse.ArgumentParser(description='train.py')
parser.add_argument('--model', '-m', default='',
help='path to the trained model')
parser.add_argument('--gpu', '-g', default=0, type=int, help='GPU ID')
args = parser.parse_args()
model = Model()
cuda.cudnn_enabled = True
cuda.check_cuda_available()
xp = cuda.cupy
model.to_gpu()
## prepare training data
test_label_path = 'data_list/test.txt'
test_img_path = 'data/live/'
test_Graimg_path = 'data/live_grad/'
save_model_path = '/models/nr_sana_2stream.model'
patches_per_img = 256
patchSize = 32
print('-------------Load data-------------')
final_train_set = []
with open(test_label_path, 'rt') as f:
for l in f:
line, la = l.strip().split() # for debug
tic = time.time()
full_path = os.path.join(test_img_path, line)
Grafull_path = os.path.join(test_Graimg_path, line)
inputImage = Image.open(full_path)
Graf = Image.open(Grafull_path)
img = np.asarray(inputImage, dtype=np.float32)
Gra = np.asarray(Graf, dtype=np.float32)
img = img.transpose(2, 0, 1)
Gra = Gra.transpose(2, 0, 1)
img1 = np.zeros((1, 3, Gra.shape[1], Gra.shape[2]))
img1[0, :, :, :] = img
Gra1 = np.zeros((1, 3, Gra.shape[1], Gra.shape[2]))
Gra1[0, :, :, :] = Gra
patches = extract_patches(img, (3, patchSize, patchSize), patchSize)
Grapatches = extract_patches(Gra, (3, patchSize, patchSize), patchSize)
X = patches.reshape((-1, 3, patchSize, patchSize))
GraX = Grapatches.reshape((-1, 3, patchSize, patchSize))
temp_slice1 = [X[int(float(index))] for index in range(256)]
temp_slice2 = [GraX[int(float(index))] for index in range(256)]
##############################################
for j in range(len(temp_slice1)):
temp_slice1[j] = xp.array(temp_slice1[j].astype(np.float32))
temp_slice2[j] = xp.array(temp_slice2[j].astype(np.float32))
final_train_set.append((
np.asarray((temp_slice1[j], temp_slice2[j])).astype(np.float32),
int(la)
))
##############################################
print('--------------Done!----------------')
print('--------------Iterator!----------------')
train_iter = iterators.SerialIterator(final_train_set, batch_size=4)
optimizer = optimizers.Adam()
optimizer.use_cleargrads()
optimizer.setup(model)
updater = training.StandardUpdater(train_iter, optimizer, device=0)
print('--------------Trainer!----------------')
trainer = training.Trainer(updater, (50, 'epoch'), out='result')
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(['epoch', 'iteration', 'main/loss', 'elapsed_time']))
print('--------------Running trainer!----------------')
trainer.run()
But the code is producing error on line trainer.run() as:
-------------Load data-------------
--------------Done!----------------
--------------Iterator!----------------
--------------Trainer!----------------
--------------Running trainer!----------------
Exception in main training loop: Unsupported dtype object
Traceback (most recent call last):
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/trainer.py", line 316, in run
update()
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 149, in update
self.update_core()
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 154, in update_core
in_arrays = self.converter(batch, self.device)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/dataset/convert.py", line 149, in concat_examples
return to_device(device, _concat_arrays(batch, padding))
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/dataset/convert.py", line 37, in to_device
return cuda.to_gpu(x, device)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/backends/cuda.py", line 285, in to_gpu
return _array_to_gpu(array, device_, stream)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/backends/cuda.py", line 333, in _array_to_gpu
return cupy.asarray(array)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/cupy/creation/from_data.py", line 60, in asarray
return core.array(a, dtype, False)
File "cupy/core/core.pyx", line 2049, in cupy.core.core.array
File "cupy/core/core.pyx", line 2083, in cupy.core.core.array
Will finalize trainer extensions and updater before reraising the exception.
Traceback (most recent call last):
File "<ipython-input-69-12b84b41c6b9>", line 1, in <module>
runfile('/mnt/nas/sanaalamgeer/Projects/1/MyOwnChainer/Two-stream_IQA-master/train.py', wdir='/mnt/nas/sanaalamgeer/Projects/1/MyOwnChainer/Two-stream_IQA-master')
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/spyder_kernels/customize/spydercustomize.py", line 668, in runfile
execfile(filename, namespace)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/spyder_kernels/customize/spydercustomize.py", line 108, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "/mnt/nas/sanaalamgeer/Projects/1/MyOwnChainer/Two-stream_IQA-master/train.py", line 129, in <module>
trainer.run()
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/trainer.py", line 330, in run
six.reraise(*sys.exc_info())
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/six.py", line 693, in reraise
raise value
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/trainer.py", line 316, in run
update()
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 149, in update
self.update_core()
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 154, in update_core
in_arrays = self.converter(batch, self.device)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/dataset/convert.py", line 149, in concat_examples
return to_device(device, _concat_arrays(batch, padding))
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/dataset/convert.py", line 37, in to_device
return cuda.to_gpu(x, device)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/backends/cuda.py", line 285, in to_gpu
return _array_to_gpu(array, device_, stream)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/backends/cuda.py", line 333, in _array_to_gpu
return cupy.asarray(array)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/cupy/creation/from_data.py", line 60, in asarray
return core.array(a, dtype, False)
File "cupy/core/core.pyx", line 2049, in cupy.core.core.array
File "cupy/core/core.pyx", line 2083, in cupy.core.core.array
ValueError: Unsupported dtype object
Maybe thats's because I am arraging training data wrong because the model takes training parameters as:
length = x_data.shape[0]
x1 = Variable(x_data[0:length:2])
x2 = Variable(x_data[1:length:2])
and y_data as:
t = xp.repeat(y_data[0:length:2], 1)
The variable final_train_set prepapres dataset of a tuple (Numpy Array, 66) where every Numpy Array has dimensions (2, 3, 32, 32) which carries two types patches (3, 32, 32).
I have used dataset from the github link provided above.
I am a newbie in Chainer,Please help!!
In very short, you inappropriately called numpy.asarray: numpy.asarray does not concatenate two cupy.ndarrays, while it concatenates two numpy.ndarrays.
Your code in brief:
import numpy, cupy
final_train_set = []
N_PATCH_PER_IMAGE = 8
for i in range(10):
label = 0
temp_slice_1 = [numpy.zeros((3, 3)) for j in range(N_PATCH_PER_IMAGE)]
temp_slice_2 = [numpy.zeros((3, 3)) for j in range(N_PATCH_PER_IMAGE)]
for j in range(N_PATCH_PER_IMAGE):
temp_slice_1[j] = cupy.array(temp_slice_1[j])
temp_slice_2[j] = cupy.array(temp_slice_2[j])
final_train_set.append(
[
# attempting to concatenate two cupy arrays by numpy.asarray
numpy.asarray([temp_slice_1[j], temp_slice_2[j]]),
label
]
)
The bug
import numpy as np
import cupy as cp
print("two numpy arrays")
print(np.asarray([np.zeros(shape=(1,)), np.zeros(shape=(1,))]))
print(np.asarray([np.zeros(shape=(1,)), np.zeros(shape=(1,))]).dtype)
print()
print("two cupy arrays")
print(np.asarray([cp.zeros(shape=(1,)), cp.zeros(shape=(1,))]))
print(np.asarray([cp.zeros(shape=(1,)), cp.zeros(shape=(1,))]).dtype)
two numpy arrays
[[0.]
[0.]]
float64
two cupy arrays
[[array(0.)]
[array(0.)]]
object
Solution: comment out two lines
import numpy # not import cupy here
for i in range(10):
label = 0
temp_slice_1 = [numpy.zeros((3, 3)) for j in range(N_PATCH_PER_IMAGE)]
temp_slice_2 = [numpy.zeros((3, 3)) for j in range(N_PATCH_PER_IMAGE)]
for j in range(N_PATCH_PER_IMAGE):
# temp_slice_1[j] = cupy.array(temp_slice_1[j]) <- comment out!
# temp_slice_2[j] = cupy.array(temp_slice_2[j]) <- comment out!
final_train_set.append(
[
# concatenate two numpy arrays: usually cupy should not be used in dataset
numpy.asarray([temp_slice_1[j], temp_slice_2[j]]),
label
]
)
Footnote
In the code you presented, xp is not specified, so you could not get answer from anyone. Please post WHOLE BODY of your code including the model if you were not able to separate the problem.
I guess you might not able to run the training code for another reason. In this code, the data is first brought to the main memory in the construction of final_train_set. But if the number of images is huge, the main memory would run out and MemoryError would be raised. (In other words, if the number of image is small and your memory is large enough, the error would not be happen)
In that case, the following references (Chainer at glance and Dataset Abstraction) would help.
DISCLAIMER: None of this code is written by me
I found this Github repository using OpenCV, Scipy and a few other module for the quality assessment. Here is the code:
# Python code for BRISQUE model
# Original paper title: No-Reference Image Quality Assessment in the Spatial Domain
# Link: http://ieeexplore.ieee.org/document/6272356/
import cv2
import numpy as np
from scipy import ndimage
import math
def get_gaussian_filter():
[m,n] = [(ss - 1.0) / 2.0 for ss in (shape,shape)]
[y,x] = np.ogrid[-m:m+1,-n:n+1]
window = np.exp( -(x*x + y*y) / (2.0*sigma*sigma) )
window[window < np.finfo(window.dtype).eps*window.max() ] = 0
sum_window = window.sum()
if sum_window != 0:
window = np.divide(window, sum_window)
return window
def lmom(X):
(rows, cols) = X.shape
if cols == 1:
X = X.reshape(1,rows)
n = rows
X.sort()
b = np.zeros(3)
b0 = X.mean()
for r in range(1,4):
Num = np.prod(np.tile(np.arange(r+1,n+1), (r,1))-np.tile(np.arange(1,r+1).reshape(r,1),(1,n-r)),0)
Num = Num.astype(np.float)
Den = np.prod(np.tile(n, (1, r)) - np.arange(1,r+1), 1)
b[r-1] = 1.0/n * sum(Num/Den * X[0,r:])
L = np.zeros(4)
L[0] = b0
L[1] = 2*b[0] - b0
L[2] = 6*b[1] - 6*b[0] + b0
L[3] = 20*b[2] - 30*b[1] + 12*b[0] - b0
return L
def compute_features(im):
im = im.astype(np.float)
window = get_gaussian_filter()
scalenum = 2
feat = []
for itr_scale in range(scalenum):
mu = cv2.filter2D(im, cv2.CV_64F, window, borderType=cv2.BORDER_CONSTANT)
mu_sq = mu * mu
sigma = np.sqrt(abs(cv2.filter2D(im*im, cv2.CV_64F, window, borderType=cv2.BORDER_CONSTANT) - mu_sq))
structdis = (im-mu)/(sigma+1)
structdis_col_vector = np.reshape(structdis.transpose(), (structdis.size,1))
L = lmom(structdis.reshape(structdis.size,1))
feat = np.append(feat,[L[1], L[3]])
shifts = [[0,1], [1,0], [1,1], [-1,1]]
for itr_shift in shifts:
shifted_structdis = np.roll(structdis, itr_shift[0], axis=0)
shifted_structdis = np.roll(shifted_structdis, itr_shift[1], axis=1)
shifted_structdis_col_vector = np.reshape(shifted_structdis.T, (shifted_structdis.size,1))
pair = structdis_col_vector * shifted_structdis_col_vector
L = lmom(pair.reshape(pair.size,1))
feat = np.append(feat, L)
im = cv2.resize(im, (0,0), fx=0.5, fy=0.5, interpolation=cv2.INTER_CUBIC)
return feat
im = ndimage.imread('example.bmp', flatten=True)
feat = compute_features(im)
print feat

shapes (401,1) and (401,1) not aligned: 1 (dim 1) != 401 (dim 0)

I am implementing the one vs all classifier, however, I got the error "shapes (401,1) and (401,1) not aligned: 1 (dim 1) != 401 (dim 0)",and the traceback is below :
Traceback (most recent call last):
File "<ipython-input-1-682bb50c2435>", line 1, in <module>
runfile('/Users/alvin/Documents/GitDemo/ML_Basic_Imple/Coursera_ML_Python/ex3/Multi_classify_oneVSall.py', wdir='/Users/alvin/Documents/GitDemo/ML_Basic_Imple/Coursera_ML_Python/ex3')
File "/Users/alvin/Documents/tools/anaconda3/lib/python3.6/site-packages/spyder/utils/site/sitecustomize.py", line 710, in runfile
execfile(filename, namespace)
File "/Users/alvin/Documents/tools/anaconda3/lib/python3.6/site-packages/spyder/utils/site/sitecustomize.py", line 101, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "/Users/alvin/Documents/GitDemo/ML_Basic_Imple/Coursera_ML_Python/ex3/Multi_classify_oneVSall.py", line 124, in <module>
trained_theta = training_OnevsAll_theta(X,y,10,0.1)
File "/Users/alvin/Documents/GitDemo/ML_Basic_Imple/Coursera_ML_Python/ex3/Multi_classify_oneVSall.py", line 119, in training_OnevsAll_theta
theta,cost = opt_Cost(initial_theta,X,y,lamada)
File "/Users/alvin/Documents/GitDemo/ML_Basic_Imple/Coursera_ML_Python/ex3/Multi_classify_oneVSall.py", line 96, in opt_Cost
res = optimize.fmin_bfgs(LR_Costfunction, theta, fprime=Gradient, args=(X,y,lamada) )
File "/Users/alvin/Documents/tools/anaconda3/lib/python3.6/site-packages/scipy/optimize/optimize.py", line 859, in fmin_bfgs
res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)
File "/Users/alvin/Documents/tools/anaconda3/lib/python3.6/site-packages/scipy/optimize/optimize.py", line 934, in _minimize_bfgs
old_fval, old_old_fval, amin=1e-100, amax=1e100)
File "/Users/alvin/Documents/tools/anaconda3/lib/python3.6/site-packages/scipy/optimize/optimize.py", line 765, in _line_search_wolfe12
**kwargs)
File "/Users/alvin/Documents/tools/anaconda3/lib/python3.6/site-packages/scipy/optimize/linesearch.py", line 97, in line_search_wolfe1
derphi0 = np.dot(gfk, pk)
ValueError: shapes (401,1) and (401,1) not aligned: 1 (dim 1) != 401 (dim 0)e
Could you find any problem in my below code?
Thank you for your patient!
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.io
import scipy.misc
import matplotlib.cm as cm # Used to display images in a specific colormap
import random
from scipy.special import expit
datapath = 'data/ex3data1.mat'
data = scipy.io.loadmat(datapath)
X = data['X']
y = data['y']
print(X.shape)
print(y.shape)
def _display_data():
all_fig = np.zeros((10*20,10*20))
index_of_samples = random.sample(range(X.shape[0]),100)
row, col = 0, 0
for i in index_of_samples:
if col == 10:
row += 1
col = 0
fig = X[i].reshape(20,20).T
all_fig[row * 20:(row+1)*20,col * 20:(col+1)*20] = fig
col += 1
plt.figure(figsize=(8,8))
img = scipy.misc.toimage(all_fig)
plt.imshow(img, cmap = plt.cm.gray_r)
_display_data()
# ============ Part 2a: Vectorize Logistic Regression ============
def hpy_sigmod_fucntion(X_inter,theta_inter):
return expit(np.dot(X_inter,theta_inter))
def LR_Costfunction(theta_inter,X_inter,y,lamada=0.):
m = X_inter.shape[0]
hyp = hpy_sigmod_fucntion(X_inter,theta_inter)
reg = np.dot(theta_inter.T,theta_inter) * (lamada / (2 * m))
J = np.dot(y.T,np.log(hyp))+np.dot((1 - y.T),np.log(1 - hyp))
return J + reg
def Gradient(theta_inter,X_inter,y,lamada=0.):
m = X_inter.shape[0]
hyp = hpy_sigmod_fucntion(X_inter,theta_inter)
hyp = np.asarray(hyp).reshape(hyp.shape[0],1)
h_y = hyp - y # 5000 * 1
reg = theta_inter[1:] * (lamada / m)
reg = np.asarray(reg).reshape(reg.shape[0],1)
grad = (1 / m) * np.dot(X_inter.T,h_y) # 401 * 1
grad[1:] = grad[1:] + reg
return grad # 401 * 1
def opt_Cost(theta,X,y,lamada=0.):
from scipy import optimize
res = optimize.fmin_bfgs(LR_Costfunction, theta, fprime=Gradient, args=(X,y,lamada) )
return result[0], result[1]
This function below maybe catch the problem.
Are there any restrictions when using fmin functions?
def training_OnevsAll_theta(X,y,num_labels,lamada=0.):
m = X.shape[0]
n = X.shape[1]
all_theta = np.zeros((num_labels,n+1))
X = np.hstack((np.ones((m,1)),X))
for c in range(num_labels):
print("Training theta for class %d" %c)
initial_theta = np.zeros((n+1,1))
theta,cost = opt_Cost(initial_theta,X,y,lamada)
all_theta[c] = theta
print("Finished!")
trained_theta = training_OnevsAll_theta(X,y,10,0.1)
Thank you!
Aha , I found the answer on matrices are not aligned Error: Python SciPy fmin_bfgs
Actually, the incorrect input gradient makes the problem occur, so I followed the answer up and add below code before 'return grad'
grad = np.ndarray.flatten(grad)
And It works!

Python classification technique naive bayes

I am doing a research on classification techniques. I found a code online for Naive Bayes Classification in python. I have shared the code below. But I am getting errors in it. Please help in solving the errors. The software I am using is Anaconda with Python 3.6 in it.
The code is as follows:
import csv
def loadCsv(filename):
lines = csv.reader(open(filename))
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = [float(x) for x in dataset[i]]
return dataset
import random
def splitDataset(dataset, splitRatio):
trainSize = int(len(dataset) * splitRatio)
trainSet = []
copy = list(dataset)
while len(trainSet) < trainSize:
index = random.randrange(len(copy))
trainSet.append(copy.pop(index))
return [trainSet, copy]
def separateByClass(dataset):
separated = {}
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
import math
def mean(numbers):
return sum(numbers)/float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)
def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
del summaries[-1]
return summaries
def summarizeByClass(dataset):
separated = separateByClass(dataset)
summaries = {}
for classValue, instances in separated.iteritems():
summaries[classValue] = summarize(instances)
return summaries
def calculateProbability(x, mean, stdev):
exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent
def calculateClassProbabilities(summaries, inputVector):
probabilities = {}
for classValue, classSummaries in summaries.iteritems():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, stdev = classSummaries[i]
x = inputVector[i]
probabilities[classValue] *= calculateProbability(x, mean, stdev)
return probabilities
def predict(summaries, inputVector):
probabilities = calculateClassProbabilities(summaries, inputVector)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.iteritems():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
def getPredictions(summaries, testSet):
predictions = []
for i in range(len(testSet)):
result = predict(summaries, testSet[i])
predictions.append(result)
return predictions
def getAccuracy(testSet, predictions):
correct = 0
for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]:
correct += 1
return (correct/float(len(testSet))) * 100.0
def main():
splitRatio = 0.67
filename = 'E:\iris.data.csv'
dataset = loadCsv(filename)
trainingSet, testSet = splitDataset(dataset, splitRatio)
print('Split {0} rows into train = {1} and test = {2} rows'). format(len(dataset), len(trainingSet), len(testSet))
summaries = summarizeByClass(trainingSet)
predictions = getPredictions(summaries, testSet)
accuracy = getAccuracy(testSet, predictions)
print('Accuracy: {0}%').format(accuracy)
main()
And I am getting this as my traceback:
runfile('C:/Users/Lenovo/Desktop/Naive .py', wdir='C:/Users/Lenovo/Desktop')
Traceback (most recent call last):
File "<ipython-input-11-c6b2508abccc>", line 1, in <module>
runfile('C:/Users/Lenovo/Desktop/Naive .py', wdir='C:/Users/Lenovo/Desktop')
File "C:\Users\Lenovo\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 710, in runfile
execfile(filename, namespace)
File "C:\Users\Lenovo\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 101, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Lenovo/Desktop/Naive .py", line 109, in <module>
main()
File "C:/Users/Lenovo/Desktop/Naive .py", line 101, in main
dataset = loadCsv(filename)
File "C:/Users/Lenovo/Desktop/Naive .py", line 7, in loadCsv
dataset[i] = [float(x) for x in dataset[i]]
File "C:/Users/Lenovo/Desktop/Naive .py", line 7, in <listcomp>
dataset[i] = [float(x) for x in dataset[i]]
ValueError: could not convert string to float: 'Iris-setosa'
Please help me solve the problem. Thank you in advance

ValueError: x and y must have same first dimension

I get an error message
ValueError: x and y must have same first dimension.
Here is the code:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
date,bid,ask = np.loadtxt('GBPUSD1d.txt', unpack=True,
delimiter =',',converters={0:mdates.strpdate2num('%Y%m%d%H%M%S')})
avgLine = ((bid+ask)/2)
patternAr = []
performanceAr = []
patForRec = []
eachPattern = []
def percentChange(startPoint, currentPoint):
return ((float(currentPoint)- startPoint)/abs(startPoint))*100.00
def patternStorage():
patStartTime = time.time()
x = (len(avgLine))-30
y = 11
while y < x:
pattern = []
p1 = percentChange(avgLine[y-10], avgLine[y-9])
...
p10 = percentChange(avgLine[y-10], avgLine[y])
outcomeRange = avgLine[y+20:y+30]
currentPoint = avgLine[y]
try:
avgOutcome = reduce(lambda x, y: x + y, outcomeRange) / len(outcomeRange)
except Exception, e:
print str(e)
avgOutcome = 0
futureOutcome = percentChange(currentPoint, avgOutcome)
pattern.append(p1)
pattern.append(p2)
pattern.append(p3)
pattern.append(p3)
pattern.append(p4)
pattern.append(p5)
pattern.append(p6)
pattern.append(p7)
pattern.append(p8)
pattern.append(p9)
pattern.append(p10)
patternAr.append(pattern)
performanceAr.append(futureOutcome)
y += 1
patEndTime = time.time()
print len (patternAr)
print len (performanceAr)
print 'Patten storage took:', patEndTime - patStartTime, 'seconds'
def currentPattern():
cp1 = percentChange(avgLine[-11], avgLine[-10])
...
cp10 = percentChange(avgLine[-11], avgLine[-1])
patForRec.append(cp1)
...
patForRec.append(cp10)
print patForRec
def patternRecognition():
for eachPattern in patternAr:
sim1 = 100.00 - abs(percentChange(eachPattern[0], patForRec[0]))
...
sim10 = 100.00 - abs(percentChange(eachPattern[9], patForRec[9]))
howSim =((sim1+sim2+sim3+sim4+sim5+sim6+sim7+sim8+sim9+sim10))/float(10)
if howSim > 70:
patdex = patternAr.index(eachPattern)
print 'predicted outcome',performanceAr[patdex]
xp = [1,2,3,4,5,6,7,8,9,10]
fig = plt.figure()
plt.plot(xp, patForRec)
plt.plot(xp, eachPattern)
plt.show()
patternStorage()
currentPattern()
patternRecognition()
print (len(patForRec))
print (len(eachPattern))
Full error message
Traceback (most recent call last):
File "C:\Python27\ANN.py", line 165, in <module>
patternRecognition()
File "C:\Python27\ANN.py", line 131, in patternRecognition
plt.plot(xp, eachPattern)
File "C:\Python27\lib\site-packages\matplotlib\pyplot.py", line 3093, in plot
ret = ax.plot(*args, **kwargs)
File "C:\Python27\lib\site-packages\matplotlib\axes\_axes.py", line 1373, in plot
for line in self._get_lines(*args, **kwargs):
File "C:\Python27\lib\site-packages\matplotlib\axes\_base.py", line 303, in _grab_next_args
for seg in self._plot_args(remaining, kwargs):
File "C:\Python27\lib\site-packages\matplotlib\axes\_base.py", line 281, in _plot_args
x, y = self._xy_from_xy(x, y)
File "C:\Python27\lib\site-packages\matplotlib\axes\_base.py", line 223, in _xy_from_xy
raise ValueError("x and y must have same first dimension")
ValueError: x and y must have same first dimension
The problem is that eachPattern has a 11 elements in it, whereas all xp has 10. The reason for this is probably on lines 52 and 53 in the patternStorage function of your code where you append p3 to your list twice:
pattern.append(p3)
pattern.append(p3)
if you get rid of one of these the graph plots fine. Though it is stored in a loop to plot multiple times, don't know if you wanted to do that...
If you try and do more things inside loops, so you have to write less code, this sort of problem where you accidentally do something twice will happen less.

Categories

Resources