ValueError: x and y must have same first dimension

ValueError: x and y must have same first dimension - python

I get an error message
ValueError: x and y must have same first dimension.
Here is the code:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
date,bid,ask = np.loadtxt('GBPUSD1d.txt', unpack=True,
delimiter =',',converters={0:mdates.strpdate2num('%Y%m%d%H%M%S')})
avgLine = ((bid+ask)/2)
patternAr = []
performanceAr = []
patForRec = []
eachPattern = []
def percentChange(startPoint, currentPoint):
return ((float(currentPoint)- startPoint)/abs(startPoint))*100.00
def patternStorage():
patStartTime = time.time()
x = (len(avgLine))-30
y = 11
while y < x:
pattern = []
p1 = percentChange(avgLine[y-10], avgLine[y-9])
...
p10 = percentChange(avgLine[y-10], avgLine[y])
outcomeRange = avgLine[y+20:y+30]
currentPoint = avgLine[y]
try:
avgOutcome = reduce(lambda x, y: x + y, outcomeRange) / len(outcomeRange)
except Exception, e:
print str(e)
avgOutcome = 0
futureOutcome = percentChange(currentPoint, avgOutcome)
pattern.append(p1)
pattern.append(p2)
pattern.append(p3)
pattern.append(p3)
pattern.append(p4)
pattern.append(p5)
pattern.append(p6)
pattern.append(p7)
pattern.append(p8)
pattern.append(p9)
pattern.append(p10)
patternAr.append(pattern)
performanceAr.append(futureOutcome)
y += 1
patEndTime = time.time()
print len (patternAr)
print len (performanceAr)
print 'Patten storage took:', patEndTime - patStartTime, 'seconds'
def currentPattern():
cp1 = percentChange(avgLine[-11], avgLine[-10])
...
cp10 = percentChange(avgLine[-11], avgLine[-1])
patForRec.append(cp1)
...
patForRec.append(cp10)
print patForRec
def patternRecognition():
for eachPattern in patternAr:
sim1 = 100.00 - abs(percentChange(eachPattern[0], patForRec[0]))
...
sim10 = 100.00 - abs(percentChange(eachPattern[9], patForRec[9]))
howSim =((sim1+sim2+sim3+sim4+sim5+sim6+sim7+sim8+sim9+sim10))/float(10)
if howSim > 70:
patdex = patternAr.index(eachPattern)
print 'predicted outcome',performanceAr[patdex]
xp = [1,2,3,4,5,6,7,8,9,10]
fig = plt.figure()
plt.plot(xp, patForRec)
plt.plot(xp, eachPattern)
plt.show()
patternStorage()
currentPattern()
patternRecognition()
print (len(patForRec))
print (len(eachPattern))
Full error message
Traceback (most recent call last):
File "C:\Python27\ANN.py", line 165, in <module>
patternRecognition()
File "C:\Python27\ANN.py", line 131, in patternRecognition
plt.plot(xp, eachPattern)
File "C:\Python27\lib\site-packages\matplotlib\pyplot.py", line 3093, in plot
ret = ax.plot(*args, **kwargs)
File "C:\Python27\lib\site-packages\matplotlib\axes\_axes.py", line 1373, in plot
for line in self._get_lines(*args, **kwargs):
File "C:\Python27\lib\site-packages\matplotlib\axes\_base.py", line 303, in _grab_next_args
for seg in self._plot_args(remaining, kwargs):
File "C:\Python27\lib\site-packages\matplotlib\axes\_base.py", line 281, in _plot_args
x, y = self._xy_from_xy(x, y)
File "C:\Python27\lib\site-packages\matplotlib\axes\_base.py", line 223, in _xy_from_xy
raise ValueError("x and y must have same first dimension")
ValueError: x and y must have same first dimension

The problem is that eachPattern has a 11 elements in it, whereas all xp has 10. The reason for this is probably on lines 52 and 53 in the patternStorage function of your code where you append p3 to your list twice:
pattern.append(p3)
pattern.append(p3)
if you get rid of one of these the graph plots fine. Though it is stored in a loop to plot multiple times, don't know if you wanted to do that...
If you try and do more things inside loops, so you have to write less code, this sort of problem where you accidentally do something twice will happen less.

Related

Scikit-Multiflow - Cannot take a larger sample than population when 'replace'=False

So I was trying to run the following code, where x is a feature vector with dimensions (2381,) and y is a label with dimension (1,) after being cast to a Numpy array.
from skmultiflow.meta import AdaptiveRandomForestClassifier
import numpy as np
import data
np.random.seed(1)
def main() -> None:
dataset = data.get_full_dataset()
metadata = data.get_metadata()
training_batch = data.get_windows(dataset, metadata, data.get_initial_training_groups())
streaming_batch = data.get_windows(dataset, metadata, data.get_incremental_learning_groups())
initial_features = np.concatenate([dataset.feature_vectors for group, dataset in training_batch])
initial_labels = np.concatenate([dataset.labels for group, dataset in training_batch])
model = AdaptiveRandomForestClassifier()
correct_count = 0
n_samples = 0
for x, y in zip(initial_features, initial_labels):
y = np.asarray([y])
y_prediction = model.predict(x)
if y_prediction[0] == y:
correct_count += 1
model.partial_fit(x, y)
n_samples += 1
print(f"Accuracy: {correct_count / n_samples}")
if __name__ == "__main__":
main()
However, I am yielding the following error:
Traceback (most recent call last):
File "/home/nathan/Documents/Research/BodmasOnline/main.py", line 31, in <module>
main()
File "/home/nathan/Documents/Research/BodmasOnline/main.py", line 24, in main
model.partial_fit(x, y)
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/meta/adaptive_random_forests.py", line 313, in partial_fit
self._partial_fit(X[i], y[i], self.classes, weight[i])
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/meta/adaptive_random_forests.py", line 328, in _partial_fit
self.ensemble[i].partial_fit(np.asarray([X]), np.asarray([y]),
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/meta/adaptive_random_forests.py", line 569, in partial_fit
self.classifier.partial_fit(X, y, classes=classes, sample_weight=sample_weight)
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/trees/hoeffding_tree.py", line 394, in partial_fit
self._partial_fit(X[i], y[i], sample_weight[i])
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/trees/hoeffding_tree.py", line 424, in _partial_fit
learning_node.learn_from_instance(X, y, sample_weight, self)
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/trees/nodes/random_learning_node_nb_adaptive.py", line 54, in learn_from_instance
super().learn_from_instance(X, y, weight, ht)
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/trees/nodes/random_learning_node_classification.py", line 58, in learn_from_instance
self.list_attributes = self._sample_features(get_dimensions(X)[1])
File "/home/nathan/Documents/Research/BodmasOnline/venv/lib/python3.10/site-packages/skmultiflow/trees/nodes/random_learning_node_classification.py", line 72, in _sample_features
return self.random_state.choice(
File "mtrand.pyx", line 965, in numpy.random.mtrand.RandomState.choice
ValueError: Cannot take a larger sample than population when 'replace=False'
Can anyone help me out?

Going to answer my own question, since scikit-multiflow does not necessarily have the best documentation. The feature vector x has to have dimensions (1, n), which in this case yields (1, 2381). This can be achieved programmatically as follows:
from skmultiflow.meta import AdaptiveRandomForestClassifier
import numpy as np
import data
np.random.seed(1)
def main() -> None:
dataset = data.get_full_dataset()
metadata = data.get_metadata()
training_batch = data.get_windows(dataset, metadata, data.get_initial_training_groups())
streaming_batch = data.get_windows(dataset, metadata, data.get_incremental_learning_groups())
initial_features = np.concatenate([dataset.feature_vectors for group, dataset in training_batch])
initial_labels = np.concatenate([dataset.labels for group, dataset in training_batch])
model = AdaptiveRandomForestClassifier()
correct_count = 0
n_samples = 0
for x, y in zip(initial_features, initial_labels):
x = np.expand_dims(x, axis=0)
y = np.asarray([y])
y_prediction = model.predict(x)
if y_prediction[0] == y:
correct_count += 1
model.partial_fit(x, y)
n_samples += 1
print(f"Accuracy: {correct_count / n_samples}")
if __name__ == "__main__":
main()

Writing training model for CNN

I am writing the training code for TwoStream-IQA which is a two-stream convolutional neural network. This model predicts the quality score for the patches being assessed through two streams of the network. In the training below, I have used test dataset provided in the GitHub link above.
The training code is as below:
import os
import time
import numpy as np
import argparse
import chainer
chainer.global_config.train=True
from chainer import cuda
from chainer import serializers
from chainer import optimizers
from chainer import iterators
from chainer import training
from chainer.training import extensions
from PIL import Image
from sklearn.feature_extraction.image import extract_patches
from model import Model
parser = argparse.ArgumentParser(description='train.py')
parser.add_argument('--model', '-m', default='',
help='path to the trained model')
parser.add_argument('--gpu', '-g', default=0, type=int, help='GPU ID')
args = parser.parse_args()
model = Model()
cuda.cudnn_enabled = True
cuda.check_cuda_available()
xp = cuda.cupy
model.to_gpu()
## prepare training data
test_label_path = 'data_list/test.txt'
test_img_path = 'data/live/'
test_Graimg_path = 'data/live_grad/'
save_model_path = '/models/nr_sana_2stream.model'
patches_per_img = 256
patchSize = 32
print('-------------Load data-------------')
final_train_set = []
with open(test_label_path, 'rt') as f:
for l in f:
line, la = l.strip().split() # for debug
tic = time.time()
full_path = os.path.join(test_img_path, line)
Grafull_path = os.path.join(test_Graimg_path, line)
inputImage = Image.open(full_path)
Graf = Image.open(Grafull_path)
img = np.asarray(inputImage, dtype=np.float32)
Gra = np.asarray(Graf, dtype=np.float32)
img = img.transpose(2, 0, 1)
Gra = Gra.transpose(2, 0, 1)
img1 = np.zeros((1, 3, Gra.shape[1], Gra.shape[2]))
img1[0, :, :, :] = img
Gra1 = np.zeros((1, 3, Gra.shape[1], Gra.shape[2]))
Gra1[0, :, :, :] = Gra
patches = extract_patches(img, (3, patchSize, patchSize), patchSize)
Grapatches = extract_patches(Gra, (3, patchSize, patchSize), patchSize)
X = patches.reshape((-1, 3, patchSize, patchSize))
GraX = Grapatches.reshape((-1, 3, patchSize, patchSize))
temp_slice1 = [X[int(float(index))] for index in range(256)]
temp_slice2 = [GraX[int(float(index))] for index in range(256)]
##############################################
for j in range(len(temp_slice1)):
temp_slice1[j] = xp.array(temp_slice1[j].astype(np.float32))
temp_slice2[j] = xp.array(temp_slice2[j].astype(np.float32))
final_train_set.append((
np.asarray((temp_slice1[j], temp_slice2[j])).astype(np.float32),
int(la)
))
##############################################
print('--------------Done!----------------')
print('--------------Iterator!----------------')
train_iter = iterators.SerialIterator(final_train_set, batch_size=4)
optimizer = optimizers.Adam()
optimizer.use_cleargrads()
optimizer.setup(model)
updater = training.StandardUpdater(train_iter, optimizer, device=0)
print('--------------Trainer!----------------')
trainer = training.Trainer(updater, (50, 'epoch'), out='result')
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(['epoch', 'iteration', 'main/loss', 'elapsed_time']))
print('--------------Running trainer!----------------')
trainer.run()
But the code is producing error on line trainer.run() as:
-------------Load data-------------
--------------Done!----------------
--------------Iterator!----------------
--------------Trainer!----------------
--------------Running trainer!----------------
Exception in main training loop: Unsupported dtype object
Traceback (most recent call last):
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/trainer.py", line 316, in run
update()
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 149, in update
self.update_core()
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 154, in update_core
in_arrays = self.converter(batch, self.device)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/dataset/convert.py", line 149, in concat_examples
return to_device(device, _concat_arrays(batch, padding))
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/dataset/convert.py", line 37, in to_device
return cuda.to_gpu(x, device)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/backends/cuda.py", line 285, in to_gpu
return _array_to_gpu(array, device_, stream)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/backends/cuda.py", line 333, in _array_to_gpu
return cupy.asarray(array)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/cupy/creation/from_data.py", line 60, in asarray
return core.array(a, dtype, False)
File "cupy/core/core.pyx", line 2049, in cupy.core.core.array
File "cupy/core/core.pyx", line 2083, in cupy.core.core.array
Will finalize trainer extensions and updater before reraising the exception.
Traceback (most recent call last):
File "<ipython-input-69-12b84b41c6b9>", line 1, in <module>
runfile('/mnt/nas/sanaalamgeer/Projects/1/MyOwnChainer/Two-stream_IQA-master/train.py', wdir='/mnt/nas/sanaalamgeer/Projects/1/MyOwnChainer/Two-stream_IQA-master')
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/spyder_kernels/customize/spydercustomize.py", line 668, in runfile
execfile(filename, namespace)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/spyder_kernels/customize/spydercustomize.py", line 108, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "/mnt/nas/sanaalamgeer/Projects/1/MyOwnChainer/Two-stream_IQA-master/train.py", line 129, in <module>
trainer.run()
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/trainer.py", line 330, in run
six.reraise(*sys.exc_info())
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/six.py", line 693, in reraise
raise value
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/trainer.py", line 316, in run
update()
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 149, in update
self.update_core()
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 154, in update_core
in_arrays = self.converter(batch, self.device)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/dataset/convert.py", line 149, in concat_examples
return to_device(device, _concat_arrays(batch, padding))
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/dataset/convert.py", line 37, in to_device
return cuda.to_gpu(x, device)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/backends/cuda.py", line 285, in to_gpu
return _array_to_gpu(array, device_, stream)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/chainer/backends/cuda.py", line 333, in _array_to_gpu
return cupy.asarray(array)
File "/home/sanaalamgeer/anaconda3/lib/python3.6/site-packages/cupy/creation/from_data.py", line 60, in asarray
return core.array(a, dtype, False)
File "cupy/core/core.pyx", line 2049, in cupy.core.core.array
File "cupy/core/core.pyx", line 2083, in cupy.core.core.array
ValueError: Unsupported dtype object
Maybe thats's because I am arraging training data wrong because the model takes training parameters as:
length = x_data.shape[0]
x1 = Variable(x_data[0:length:2])
x2 = Variable(x_data[1:length:2])
and y_data as:
t = xp.repeat(y_data[0:length:2], 1)
The variable final_train_set prepapres dataset of a tuple (Numpy Array, 66) where every Numpy Array has dimensions (2, 3, 32, 32) which carries two types patches (3, 32, 32).
I have used dataset from the github link provided above.
I am a newbie in Chainer,Please help!!

In very short, you inappropriately called numpy.asarray: numpy.asarray does not concatenate two cupy.ndarrays, while it concatenates two numpy.ndarrays.
Your code in brief:
import numpy, cupy
final_train_set = []
N_PATCH_PER_IMAGE = 8
for i in range(10):
label = 0
temp_slice_1 = [numpy.zeros((3, 3)) for j in range(N_PATCH_PER_IMAGE)]
temp_slice_2 = [numpy.zeros((3, 3)) for j in range(N_PATCH_PER_IMAGE)]
for j in range(N_PATCH_PER_IMAGE):
temp_slice_1[j] = cupy.array(temp_slice_1[j])
temp_slice_2[j] = cupy.array(temp_slice_2[j])
final_train_set.append(
[
# attempting to concatenate two cupy arrays by numpy.asarray
numpy.asarray([temp_slice_1[j], temp_slice_2[j]]),
label
]
)
The bug
import numpy as np
import cupy as cp
print("two numpy arrays")
print(np.asarray([np.zeros(shape=(1,)), np.zeros(shape=(1,))]))
print(np.asarray([np.zeros(shape=(1,)), np.zeros(shape=(1,))]).dtype)
print()
print("two cupy arrays")
print(np.asarray([cp.zeros(shape=(1,)), cp.zeros(shape=(1,))]))
print(np.asarray([cp.zeros(shape=(1,)), cp.zeros(shape=(1,))]).dtype)
two numpy arrays
[[0.]
[0.]]
float64
two cupy arrays
[[array(0.)]
[array(0.)]]
object
Solution: comment out two lines
import numpy # not import cupy here
for i in range(10):
label = 0
temp_slice_1 = [numpy.zeros((3, 3)) for j in range(N_PATCH_PER_IMAGE)]
temp_slice_2 = [numpy.zeros((3, 3)) for j in range(N_PATCH_PER_IMAGE)]
for j in range(N_PATCH_PER_IMAGE):
# temp_slice_1[j] = cupy.array(temp_slice_1[j]) <- comment out!
# temp_slice_2[j] = cupy.array(temp_slice_2[j]) <- comment out!
final_train_set.append(
[
# concatenate two numpy arrays: usually cupy should not be used in dataset
numpy.asarray([temp_slice_1[j], temp_slice_2[j]]),
label
]
)
Footnote
In the code you presented, xp is not specified, so you could not get answer from anyone. Please post WHOLE BODY of your code including the model if you were not able to separate the problem.
I guess you might not able to run the training code for another reason. In this code, the data is first brought to the main memory in the construction of final_train_set. But if the number of images is huge, the main memory would run out and MemoryError would be raised. (In other words, if the number of image is small and your memory is large enough, the error would not be happen)
In that case, the following references (Chainer at glance and Dataset Abstraction) would help.

DISCLAIMER: None of this code is written by me
I found this Github repository using OpenCV, Scipy and a few other module for the quality assessment. Here is the code:
# Python code for BRISQUE model
# Original paper title: No-Reference Image Quality Assessment in the Spatial Domain
# Link: http://ieeexplore.ieee.org/document/6272356/
import cv2
import numpy as np
from scipy import ndimage
import math
def get_gaussian_filter():
[m,n] = [(ss - 1.0) / 2.0 for ss in (shape,shape)]
[y,x] = np.ogrid[-m:m+1,-n:n+1]
window = np.exp( -(x*x + y*y) / (2.0*sigma*sigma) )
window[window < np.finfo(window.dtype).eps*window.max() ] = 0
sum_window = window.sum()
if sum_window != 0:
window = np.divide(window, sum_window)
return window
def lmom(X):
(rows, cols) = X.shape
if cols == 1:
X = X.reshape(1,rows)
n = rows
X.sort()
b = np.zeros(3)
b0 = X.mean()
for r in range(1,4):
Num = np.prod(np.tile(np.arange(r+1,n+1), (r,1))-np.tile(np.arange(1,r+1).reshape(r,1),(1,n-r)),0)
Num = Num.astype(np.float)
Den = np.prod(np.tile(n, (1, r)) - np.arange(1,r+1), 1)
b[r-1] = 1.0/n * sum(Num/Den * X[0,r:])
L = np.zeros(4)
L[0] = b0
L[1] = 2*b[0] - b0
L[2] = 6*b[1] - 6*b[0] + b0
L[3] = 20*b[2] - 30*b[1] + 12*b[0] - b0
return L
def compute_features(im):
im = im.astype(np.float)
window = get_gaussian_filter()
scalenum = 2
feat = []
for itr_scale in range(scalenum):
mu = cv2.filter2D(im, cv2.CV_64F, window, borderType=cv2.BORDER_CONSTANT)
mu_sq = mu * mu
sigma = np.sqrt(abs(cv2.filter2D(im*im, cv2.CV_64F, window, borderType=cv2.BORDER_CONSTANT) - mu_sq))
structdis = (im-mu)/(sigma+1)
structdis_col_vector = np.reshape(structdis.transpose(), (structdis.size,1))
L = lmom(structdis.reshape(structdis.size,1))
feat = np.append(feat,[L[1], L[3]])
shifts = [[0,1], [1,0], [1,1], [-1,1]]
for itr_shift in shifts:
shifted_structdis = np.roll(structdis, itr_shift[0], axis=0)
shifted_structdis = np.roll(shifted_structdis, itr_shift[1], axis=1)
shifted_structdis_col_vector = np.reshape(shifted_structdis.T, (shifted_structdis.size,1))
pair = structdis_col_vector * shifted_structdis_col_vector
L = lmom(pair.reshape(pair.size,1))
feat = np.append(feat, L)
im = cv2.resize(im, (0,0), fx=0.5, fy=0.5, interpolation=cv2.INTER_CUBIC)
return feat
im = ndimage.imread('example.bmp', flatten=True)
feat = compute_features(im)
print feat

shapes (401,1) and (401,1) not aligned: 1 (dim 1) != 401 (dim 0)

I am implementing the one vs all classifier, however, I got the error "shapes (401,1) and (401,1) not aligned: 1 (dim 1) != 401 (dim 0)",and the traceback is below :
Traceback (most recent call last):
File "<ipython-input-1-682bb50c2435>", line 1, in <module>
runfile('/Users/alvin/Documents/GitDemo/ML_Basic_Imple/Coursera_ML_Python/ex3/Multi_classify_oneVSall.py', wdir='/Users/alvin/Documents/GitDemo/ML_Basic_Imple/Coursera_ML_Python/ex3')
File "/Users/alvin/Documents/tools/anaconda3/lib/python3.6/site-packages/spyder/utils/site/sitecustomize.py", line 710, in runfile
execfile(filename, namespace)
File "/Users/alvin/Documents/tools/anaconda3/lib/python3.6/site-packages/spyder/utils/site/sitecustomize.py", line 101, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "/Users/alvin/Documents/GitDemo/ML_Basic_Imple/Coursera_ML_Python/ex3/Multi_classify_oneVSall.py", line 124, in <module>
trained_theta = training_OnevsAll_theta(X,y,10,0.1)
File "/Users/alvin/Documents/GitDemo/ML_Basic_Imple/Coursera_ML_Python/ex3/Multi_classify_oneVSall.py", line 119, in training_OnevsAll_theta
theta,cost = opt_Cost(initial_theta,X,y,lamada)
File "/Users/alvin/Documents/GitDemo/ML_Basic_Imple/Coursera_ML_Python/ex3/Multi_classify_oneVSall.py", line 96, in opt_Cost
res = optimize.fmin_bfgs(LR_Costfunction, theta, fprime=Gradient, args=(X,y,lamada) )
File "/Users/alvin/Documents/tools/anaconda3/lib/python3.6/site-packages/scipy/optimize/optimize.py", line 859, in fmin_bfgs
res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)
File "/Users/alvin/Documents/tools/anaconda3/lib/python3.6/site-packages/scipy/optimize/optimize.py", line 934, in _minimize_bfgs
old_fval, old_old_fval, amin=1e-100, amax=1e100)
File "/Users/alvin/Documents/tools/anaconda3/lib/python3.6/site-packages/scipy/optimize/optimize.py", line 765, in _line_search_wolfe12
**kwargs)
File "/Users/alvin/Documents/tools/anaconda3/lib/python3.6/site-packages/scipy/optimize/linesearch.py", line 97, in line_search_wolfe1
derphi0 = np.dot(gfk, pk)
ValueError: shapes (401,1) and (401,1) not aligned: 1 (dim 1) != 401 (dim 0)e
Could you find any problem in my below code?
Thank you for your patient!
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.io
import scipy.misc
import matplotlib.cm as cm # Used to display images in a specific colormap
import random
from scipy.special import expit
datapath = 'data/ex3data1.mat'
data = scipy.io.loadmat(datapath)
X = data['X']
y = data['y']
print(X.shape)
print(y.shape)
def _display_data():
all_fig = np.zeros((10*20,10*20))
index_of_samples = random.sample(range(X.shape[0]),100)
row, col = 0, 0
for i in index_of_samples:
if col == 10:
row += 1
col = 0
fig = X[i].reshape(20,20).T
all_fig[row * 20:(row+1)*20,col * 20:(col+1)*20] = fig
col += 1
plt.figure(figsize=(8,8))
img = scipy.misc.toimage(all_fig)
plt.imshow(img, cmap = plt.cm.gray_r)
_display_data()
# ============ Part 2a: Vectorize Logistic Regression ============
def hpy_sigmod_fucntion(X_inter,theta_inter):
return expit(np.dot(X_inter,theta_inter))
def LR_Costfunction(theta_inter,X_inter,y,lamada=0.):
m = X_inter.shape[0]
hyp = hpy_sigmod_fucntion(X_inter,theta_inter)
reg = np.dot(theta_inter.T,theta_inter) * (lamada / (2 * m))
J = np.dot(y.T,np.log(hyp))+np.dot((1 - y.T),np.log(1 - hyp))
return J + reg
def Gradient(theta_inter,X_inter,y,lamada=0.):
m = X_inter.shape[0]
hyp = hpy_sigmod_fucntion(X_inter,theta_inter)
hyp = np.asarray(hyp).reshape(hyp.shape[0],1)
h_y = hyp - y # 5000 * 1
reg = theta_inter[1:] * (lamada / m)
reg = np.asarray(reg).reshape(reg.shape[0],1)
grad = (1 / m) * np.dot(X_inter.T,h_y) # 401 * 1
grad[1:] = grad[1:] + reg
return grad # 401 * 1
def opt_Cost(theta,X,y,lamada=0.):
from scipy import optimize
res = optimize.fmin_bfgs(LR_Costfunction, theta, fprime=Gradient, args=(X,y,lamada) )
return result[0], result[1]
This function below maybe catch the problem.
Are there any restrictions when using fmin functions?
def training_OnevsAll_theta(X,y,num_labels,lamada=0.):
m = X.shape[0]
n = X.shape[1]
all_theta = np.zeros((num_labels,n+1))
X = np.hstack((np.ones((m,1)),X))
for c in range(num_labels):
print("Training theta for class %d" %c)
initial_theta = np.zeros((n+1,1))
theta,cost = opt_Cost(initial_theta,X,y,lamada)
all_theta[c] = theta
print("Finished!")
trained_theta = training_OnevsAll_theta(X,y,10,0.1)
Thank you!

Aha , I found the answer on matrices are not aligned Error: Python SciPy fmin_bfgs
Actually, the incorrect input gradient makes the problem occur, so I followed the answer up and add below code before 'return grad'
grad = np.ndarray.flatten(grad)
And It works!

IndexError returned on curve_fit: error on function call?

I am trying to use curve_fit given this function
def F(xy,*p):
x,y = xy
c = np.array(p).ravel()
n = (len(c)-1)/4
omega = pi/180.0
z = c[0]
for t in range(n):
z += c[4*t+1] * (cos((t+1)*omega*x))
z += c[4*t+2] * (cos((t+1)*omega*y))
z += c[4*t+3] * (sin((t+1)*omega*x))
z += c[4*t+4] * (sin((t+1)*omega*y))
return z
def G(xy,*p):
x,y = xy
c = np.array(p).ravel()
ngm = (len(c))/7
z = 0
for t in range(ngm):
a = c[7*t]
cx = c[7*t+1]
mx = c[7*t+2]
sx = c[7*t+3]
cy = c[7*t+4]
my = c[7*t+5]
sy = c[7*t+6]
z += a * np.exp(-((cx*(x-mx)**2)/(2*(sx**2)))-((cy*(y-my)**2)/(2*(sy**2))))
return z
def FG(xy,*p):
x,y = xy
c = np.array(p).ravel()
nf = int(c[0])
ng = int(c[1])
print nf,ng
pf = [c[i] for i in range(2,4*nf+3)]
pg = [c[i] for i in range(4*nf+3,4*nf+7*ng+3)]
z1 = F(xy,pf)
z2 = G(xy,pg)
return z1+z2
pfit,cov = opt.curve_fit(FG,xy,z,p,bounds=bounds)
I am sure that the shape of both p and bounds are appropriate. I tried printing nf and ng, and they are properly printed until after some number of iterations (around after 20th function call, not the same in every run), where the values changed significantly.
After the 20th (or more) run, it returns the following error:
File "/Users/pensieve/calcs/3D_AA/0_codes/fitpkgs.py", line 144, in FGfit
pfit,cov = opt.curve_fit(FG,xy,z,p,bounds=bounds)
File "/Library/Python/2.7/site-packages/scipy-0.18.1-py2.7-macosx-10.10-intel.egg/scipy/optimize/minpack.py", line 683, in curve_fit
**kwargs)
File "/Library/Python/2.7/site-packages/scipy-0.18.1-py2.7-macosx-10.10-intel.egg/scipy/optimize/_lsq/least_squares.py", line 878, in least_squares
tr_options.copy(), verbose)
File "/Library/Python/2.7/site-packages/scipy-0.18.1-py2.7-macosx-10.10-intel.egg/scipy/optimize/_lsq/trf.py", line 128, in trf
loss_function, tr_solver, tr_options, verbose)
File "/Library/Python/2.7/site-packages/scipy-0.18.1-py2.7-macosx-10.10-intel.egg/scipy/optimize/_lsq/trf.py", line 341, in trf_bounds
f_new = fun(x_new)
File "/Library/Python/2.7/site-packages/scipy-0.18.1-py2.7-macosx-10.10-intel.egg/scipy/optimize/_lsq/least_squares.py", line 764, in fun_wrapped
return np.atleast_1d(fun(x, *args, **kwargs))
File "/Library/Python/2.7/site-packages/scipy-0.18.1-py2.7-macosx-10.10-intel.egg/scipy/optimize/minpack.py", line 455, in func_wrapped
return func(xdata, *params) - ydata
File "/Users/pensieve/calcs/3D_AA/0_codes/fitfunctions.py", line 65, in FG
pgm = [c[i] for i in range(4*nf+3,4*nf+7*ng+3)]
IndexError: index out of bounds
For reference, I use scipy 0.18.1.

subplot matplotlib wrong syntax

I am using matplotlib to subplot in a loop. For instance, i would like to subplot 49 data sets, and from the doc, i implemented it this way;
import numpy as np
import matplotlib.pyplot as plt
X1=list(range(0,10000,1))
X1 = [ x/float(10) for x in X1 ]
nb_mix = 2
parameters = []
for i in range(49):
param = []
Y = [0] * len(X1)
for j in range(nb_mix):
mean = 5* (1 + (np.random.rand() * 2 - 1 ) * 0.5 )
var = 10* (1 + np.random.rand() * 2 - 1 )
scale = 5* ( 1 + (np.random.rand() * 2 - 1) * 0.5 )
Y = [ Y[k] + scale * np.exp(-((X1[k] - mean)/float(var))**2) for k in range(len(X1)) ]
param = param + [[mean, var, scale]]
ax = plt.subplot(7, 7, i + 1)
ax.plot(X1, Y)
parameters = parameters + [param]
ax.show()
However, i have an index out of range error from i=0 onwards.
Where can i do better to have it works ?
-- this is the error i get
Traceback (most recent call last):
File "F:\WORK\SOLVEUR\ALGOCODE\PYTHON_\DataSets\DataSets_DONLP2_gaussians.py", line 167, in <module>
ax = plt.subplot(7, 7, i + 1)
File "C:\Python27\lib\site-packages\matplotlib\pyplot.py", line 766, in subplot
a = fig.add_subplot(*args, **kwargs)
File "C:\Python27\lib\site-packages\matplotlib\figure.py", line 777, in add_subplot
a = subplot_class_factory(projection_class)(self, *args, **kwargs)
File "C:\Python27\lib\site-packages\matplotlib\axes.py", line 8364, in __init__
self._subplotspec = GridSpec(rows, cols)[int(num)-1]
File "C:\Python27\lib\site-packages\matplotlib\gridspec.py", line 175, in __getitem__
raise IndexError("index out of range")
IndexError: index out of range
Thanks

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

ValueError: x and y must have same first dimension - python

Related

Scikit-Multiflow - Cannot take a larger sample than population when 'replace'=False

Writing training model for CNN

shapes (401,1) and (401,1) not aligned: 1 (dim 1) != 401 (dim 0)

IndexError returned on curve_fit: error on function call?

subplot matplotlib wrong syntax

Categories

Resources