output operand requires a reduction, but reduction is not enabled Python - python

import numpy as np
from numpy.linalg import solve,norm,cond,inv,pinv
import math
import matplotlib.pyplot as plt
from scipy.linalg import toeplitz
from numpy.random import rand
c = np.zeros(512)
c[0] = 2
c[1] = -1
a = c
A = toeplitz(c,a)
cond_A = cond(A,2)
# creating 10 random vectors 512 x 1
b = rand(10,512)
# making b into unit vector
for i in range (10):
b[i]= b[i]/norm(b[i],2)
# creating 10 random del_b vectors
del_b = [rand(10,512), rand(10,512), rand(10,512), rand(10,512), rand(10,512), rand(10,512), rand(10,512), rand(10,512), rand(10,512), rand(10,512)]
# del_b = 10 sets of 10 vectors (512x1) whose norm is 0.01,0.02 ~0.1
for i in range(10):
for j in range(10):
del_b[i][j] = del_b[i][j]/(norm(del_b[i][j],2)/((float(j+1)/100)))
x_in = [np.zeros(512), np.zeros(512), np.zeros(512), np.zeros(512), np.zeros(512), np.zeros(512), np.zeros(512), np.zeros(512), np.zeros(512), np.zeros(512)]
x2 = np.zeros((10,10,512))
for i in range(10):
x_in[i] = A.transpose()*b[i]
for i in range(10):
for j in range(10):
x2[i][j] = ((A.transpose()*(b[i]+del_b[i][j]))
LAST line is giving me the error. ( output operand requires a reduction, but reduction is not enabled)
How do i fix it?
I'm new to python and please let me know if there is easier way to do this
Thanks

The error you're seeing is because of a mismatch in the dimensions of what you have created, but your code is also quite inefficient with all the looping and not making use of Numpy's automatic broadcasting optimally. I've rewritten the code to do what it seems you want:
import numpy as np
from numpy.linalg import solve,norm,cond,inv,pinv
import math
import matplotlib.pyplot as plt
from scipy.linalg import toeplitz
from numpy.random import rand
# These should probably get more sensible names
Nvec = 10 # number of vectors in b
Nlevels = 11 # number of perturbation norm levels
Nd = 512 # dimension of the vector space
c = np.zeros(Nd)
c[0] = 2
c[1] = -1
a = c
# NOTE: I'm assuming you want A to be a matrix
A = np.asmatrix(toeplitz(c, a))
cond_A = cond(A,2)
# create Nvec random vectors Nd x 1
# Note: packing the vectors in the columns makes the next step easier
b = rand(Nd, Nvec)
# normalise each column of b to be a unit vector
b /= norm(b, axis=0)
# create Nlevels of Nd x Nvec random del_b vectors
del_b = rand(Nd, Nvec, Nlevels)
# del_b = 10 sets of 10 vectors (512x1) whose norm is 0.01,0.02 ~0.1
targetnorms = np.linspace(0.01, 0.1, Nlevels)
# cause the norms in the Nlevels dimension to be equal to the target norms
del_b /= norm(del_b, axis=0)[None, :, :]/targetnorms[None, None, :]
# Straight linear transformation - make sure you actually want the transpose
x_in = A.T*b
# same linear transformation on perturbed versions of b
x2 = np.zeros((Nd, Nvec, Nlevels))
for i in range(Nlevels):
x2[:, :, i] = A.T*(b + del_b[:, :, i])

Related

Why isn't this Linear Regression line a straight line?

I have points with x and y coordinates I want to fit a straight line to with Linear Regression but I get a jagged looking line.
I am attemting to use LinearRegression from sklearn.
To create the points run a for loop that randomly crates one hundred points into an array that is 100 x 2 in shape. I slice the left side of it for the xs and the right side of it for the ys.
I expect to have a straight line when I print m.predict.
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.linear_model import LinearRegression
X = []
adder = 0
for z in range(100):
r = random.random() * 20
r2 = random.random() * 15
X.append([r+adder-0.4, r2+adder])
adder += 0.6
X = np.array(X)
plt.scatter(X[:,0], X[:,1], s=10)
plt.show()
m = LinearRegression()
m.fit(X[:,0].reshape(1, -1), X[:,1].reshape(1, -1))
plt.plot(m.predict(X[:,0].reshape(1, -1))[0])
I am not good with numpy but, I think it is because the use of reshape() function to convert X[:,0] and X[:,1] from 1D to 2D, the resulting 2D array contains only one element, instead of creating a 2D array of len(X[:,0]) and len(X[:,1]) respectively. And resulting into an undesired regressor.
I am able to recreate this model using pandas and able to plot the desired result. Code as follows
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.linear_model import LinearRegression
import pandas as pd
X = []
adder = 0
for z in range(100):
r = random.random() * 20
r2 = random.random() * 15
X.append([r+adder-0.4, r2+adder])
adder += 0.6
X = np.array(X)
y_train = pd.DataFrame(X[:,1],columns=['y'])
X_train = pd.DataFrame(X[:,0],columns=['X'])
//plt.scatter(X_train, y_train, s=10)
//plt.show()
m = LinearRegression()
m.fit(X_train, y_train)
plt.scatter(X_train,y_train)
plt.plot(X_train,m.predict(X_train),color='red')

A faster way to compute percentage correlation between two filter functions

I wrote this function to compute the normalized percentage correlation between two filter functions (with one shifted). The function works but takes about 8 to 12 minutes depending on the number of elements in nbs. I would like to know if there is another way to make this operation faster. Here is my code below:
import numpy as np
DT = 0.08
def corr_g(*nbs, Np=10000, sf = 0.5):
wb = 0.25 # bandwidth in Hz
freq = (1/DT)*np.linspace(-0.5,0.5-1/Np,Np) # frequency vector
dCg_norms = np.zeros((Np,len(nbs)))
for idx, nb in enumerate(nbs): # nb is the filter parameter
d_k_vector = np.linspace(-Np*sf, Np*sf, Np) # indices vector
dCg = d_k_vector*0 # array to hold correlation
g = ((1+np.exp(-nb))**2)/((1+np.exp(-nb*(freq+wb)/wb))*(1+np.exp(nb*(freq-wb)/wb))) # filter function
for index2, d_k in enumerate(d_k_vector): # loop through the new indices vector
for index, sth in enumerate(g):
# form a new array from g using the indices vector use only values within the limits of g. Then do a dot product operation
if (index+d_k) < Np and (index+d_k) >= 0:
dCg[index2] += g[index] * g[index+int(d_k)]
dCg_norm = dCg/np.max(dCg)*100 # normalized correlation
dCg_norms[:,idx] = dCg_norm # add to allocated array
return dCg_norms
my_arr = corr_g(*[2,4,8,16])
import matplotlib.pyplot as plt
Np = 10000
DT = 0.08
d_k_vector = np.linspace(-5000, 5000, Np)
plt.plot(d_k_vector/(10000*DT)/0.25,my_arr[:,1])
You should not calculate correlation yourself, better use np.correlate(vector, 'same'). There are small differences between your result and mine and I am pretty sure error is on your side.
def corr_g2(*nbs, Np=10000, sf = 0.5):
wb = 0.25 # bandwidth in Hz
freq = (1/DT)*np.linspace(-0.5,0.5-1/Np,Np) # frequency vector
dCg_norms = np.zeros((Np,len(nbs)))
for idx, nb in enumerate(nbs): # nb is the filter parameter
g = ((1+np.exp(-nb))**2)/((1+np.exp(-nb*(freq+wb)/wb))*(1+np.exp(nb*(freq-wb)/wb))) # filter function
dCg = np.correlate(g, g, 'same')
dCg_norm = dCg/np.max(dCg)*100 # normalized correlation
dCg_norms[:,idx] = dCg_norm # add to allocated array
return dCg_norms
def main():
my_arr = corr_g(*[2,4], Np=Np)
my_arr2 = corr_g2(*[2,4], Np=Np)
# import matplotlib.pyplot as plt
# d_k_vector = np.linspace(-Np / 2, Np / 2 - 1, Np)
# plt.plot(d_k_vector/(10000*DT)/0.25,my_arr[:,1])
# plt.plot(d_k_vector/(10000*DT)/0.25,my_arr2[:,1])
# plt.show()
if __name__ == '__main__':
main()
Profiling results for Np=1000:
Line # Hits Time Per Hit % Time Line Contents
==============================================================
39 #do_profile()
40 def main():
41 1 14419637.0 14419637.0 100.0 my_arr = corr_g(*[2,4], Np=Np)
42 1 1598.0 1598.0 0.0 my_arr2 = corr_g2(*[2,4], Np=Np)

Zero Padding a Matrix in Python

I have a 5x20 Matrix, I intend to use Matrix Function and Transformations like Fourier Transform which can only be used for Symmetric Matrix. How can I convert the 5x20 Matrix to 20x20 Matrix by Zero Padding?
import numpy as np
a = np.array([[1,1,1,1],[2,2,2,2]])
b=np.zeros((20,20))
result = np.zeros_like(b)
x = 0
y = 0
result[x:a.shape[0],y:a.shape[1]] = a
print(result)
You can use the above logic to implement the padding.
One more approach.
Here a is the array that you already have.
import numpy as np
a = np.random.rand(5,20)
za = np.zeros((1, 20))
while a.shape[0] < 20:
a = np.concatenate((a,za))
Are you using numpy? If you do you can do it by slicing the arrays
import numpy as np
random_array_5_20 = np.random.rand(5, 20)
padded_array_20_20 = np.zeros((20, 20))
padded_array_20_20[:5, :20] = random_array_5_20

Fitting a quadratic function in python without numpy polyfit

I am trying to fit a quadratic function to some data, and I'm trying to do this without using numpy's polyfit function.
Mathematically I tried to follow this website https://neutrium.net/mathematics/least-squares-fitting-of-a-polynomial/ but somehow I don't think that I'm doing it right. If anyone could assist me that would be great, or If you could suggest another way to do it that would also be awesome.
What I've tried so far:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
ones = np.ones(3)
A = np.array( ((0,1),(1,1),(2,1)))
xfeature = A.T[0]
squaredfeature = A.T[0] ** 2
b = np.array( (1,2,0), ndmin=2 ).T
b = b.reshape(3)
features = np.concatenate((np.vstack(ones), np.vstack(xfeature), np.vstack(squaredfeature)), axis = 1)
featuresc = features.copy()
print(features)
m_det = np.linalg.det(features)
print(m_det)
determinants = []
for i in range(3):
featuresc.T[i] = b
print(featuresc)
det = np.linalg.det(featuresc)
determinants.append(det)
print(det)
featuresc = features.copy()
determinants = determinants / m_det
print(determinants)
plt.scatter(A.T[0],b)
u = np.linspace(0,3,100)
plt.plot(u, u**2*determinants[2] + u*determinants[1] + determinants[0] )
p2 = np.polyfit(A.T[0],b,2)
plt.plot(u, np.polyval(p2,u), 'b--')
plt.show()
As you can see my curve doesn't compare well to nnumpy's polyfit curve.
Update:
I went through my code and removed all the stupid mistakes and now it works, when I try to fit it over 3 points, but I have no idea how to fit over more than three points.
This is the new code:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
ones = np.ones(3)
A = np.array( ((0,1),(1,1),(2,1)))
xfeature = A.T[0]
squaredfeature = A.T[0] ** 2
b = np.array( (1,2,0), ndmin=2 ).T
b = b.reshape(3)
features = np.concatenate((np.vstack(ones), np.vstack(xfeature), np.vstack(squaredfeature)), axis = 1)
featuresc = features.copy()
print(features)
m_det = np.linalg.det(features)
print(m_det)
determinants = []
for i in range(3):
featuresc.T[i] = b
print(featuresc)
det = np.linalg.det(featuresc)
determinants.append(det)
print(det)
featuresc = features.copy()
determinants = determinants / m_det
print(determinants)
plt.scatter(A.T[0],b)
u = np.linspace(0,3,100)
plt.plot(u, u**2*determinants[2] + u*determinants[1] + determinants[0] )
p2 = np.polyfit(A.T[0],b,2)
plt.plot(u, np.polyval(p2,u), 'r--')
plt.show()
Instead using Cramer's Rule, actually solve the system using least squares. Remember that Cramer's Rule will only work if the total number of points you have equals the desired order of polynomial plus 1.
If you don't have this, then Cramer's Rule will not work as you're trying to find an exact solution to the problem. If you have more points, the method is unsuitable as we will create an overdetermined system of equations.
To adapt this to more points, numpy.linalg.lstsq would be a better fit as it solves the solution to the Ax = b by computing the vector x that minimizes the Euclidean norm using the matrix A. Therefore, remove the y values from the last column of the features matrix and solve for the coefficients and use numpy.linalg.lstsq to solve for the coefficients:
import numpy as np
import matplotlib.pyplot as plt
ones = np.ones(4)
xfeature = np.asarray([0,1,2,3])
squaredfeature = xfeature ** 2
b = np.asarray([1,2,0,3])
features = np.concatenate((np.vstack(ones),np.vstack(xfeature),np.vstack(squaredfeature)), axis = 1) # Change - remove the y values
determinants = np.linalg.lstsq(features, b)[0] # Change - use least squares
plt.scatter(xfeature,b)
u = np.linspace(0,3,100)
plt.plot(u, u**2*determinants[2] + u*determinants[1] + determinants[0] )
plt.show()
I get this plot now, which matches what the dashed curve is in your graph, also matching what numpy.polyfit gives you:

How can I map a vectorized function to a numpy array without using a for loop?

So here's what I already have:
import numpy as np
import matplotlib.pyplot as plt
def monteCarloPi(n):
np.random.seed() #seed the random number generator
y = np.random.rand(n)*2 - 1 #n random samples on (-1,1)
x = np.linspace(-1,1,n) #x axis to plot against
square = np.array([x,y]) #collecting axes as a single object
mask1 = ((x**2 + y**2) < 1) #filters
hits = np.sum(mask1) #calculating approximation
ratio = hits/n
pi_approx = ratio * 4
return pi_approx
Here is what I would like to do:
x = np.arange(100,1000)
y = monteCarloPi(x)
plt.scatter(x,y)
However, when I run the above code block, I get the following error:
---------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-52-bf4dcedaa309> in <module>()
1 x = np.arange(100,1000)
----> 2 y = monteCarloPi(x)
3 plt.scatter(x,y)
<ipython-input-51-8d5b36e22d4b> in monteCarloPi(n)
1 def monteCarloPi(n):
2 np.random.seed() #seed the random number generator
----> 3 y = np.random.rand(n)*2 - 1 #n random samples on (-1,1)
4 x = np.linspace(-1,1,n) #x axis to plot against
5
mtrand.pyx in mtrand.RandomState.rand()
mtrand.pyx in mtrand.RandomState.random_sample()
mtrand.pyx in mtrand.cont0_array()
TypeError: only integer scalar arrays can be converted to a scalar index
Based on my understanding of how broadcasting works in numpy, this should work. I could just use a for loop but that gets really slow really quickly as the number of samples goes up.
halp
Here is one option, where the maximum sample size is based, then subsampling occurs if start>0 (error handling not included).
import numpy as np
import matplotlib.pyplot as plt
def monteCarloPi(n,start=0,stride=1):
np.random.seed() # seed the random number generator
y = np.random.rand(n)*2 - 1 # n random samples on (-1,1)
x = np.linspace(-1,1,n) # x axis to plot against
mask = ( x**2 + y**2 ) < 1 # masking
samples = {}
inds = arange(n)
for k in range(n-start,n+1,stride):
sub_inds = np.random.choice(inds,k,replace=False)
sub_mask = mask[sub_inds]
sub_hits = np.sum(sub_mask)
ratio = sub_hits/n
pi_approx = ratio * 4
samples[k]=pi_approx
return pi_approx
This still requires a for loop, but it's handled inside the method quickly, since you're subsampling from one large random sample. To recreate your original call (running from n=100 to n=1000 [note that I am going up to n=1000 here]):
estimates = monteCarloPi(1000,start=900)
plt.plot(estimates.keys(),estimates.values())
You could of course pass the original x=arange(100,1001), but then there would need to be error checking in the method (to make sure an array or list was passed), and then n would be equal to the last element of x (n=x[-1]), and finally, the looping would be done over the elements of x (for k in x:).

Categories

Resources