I'm getting hung trying to create a contour plot of three [1, m] numpy matrices. When I try to plot:
plt.contour(reg.theta[0,:], reg.theta[1,:], reg.J[0,:])
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
I think this has something to do with the fact that
In[167]: shape(reg.theta[0,:])
Out[167]: (1, 15000)
and contour wants something along the lines of (15000,)
I've tried to create a meshgrid similar to how I would in matlab using
X , Y = meshgrid(reg.theta[0,:], reg.theta[1,:]
ValueError: total size of new array must be unchanged[1,:])
However, I have no idea how to interpret this.
Please advise stackoverflow!
Here is my complete code, tested on Python 2.7
from numpy import *
import matplotlib.pyplot as plt
class LinearRegression:
def __init__(self, data):
self.data_loc = data
self.max_it = 15000
self.theta = matrix(ones((2, self.max_it)))
self.alpha = .01
self.J = matrix(zeros((1, self.max_it)))
def importData(self):
Data = loadtxt(self.data_loc, delimiter = ',')
rawData = matrix(Data)
x = rawData[:,0]
y = rawData[:,1]
[m,n] = shape(x)
x_0 = matrix(ones((m,1)))
x = concatenate((x_0, x), axis = 1)
return x, y, m, n
def center(self, x):
x = x / mean(x)
return x
def scale(self, x):
x = x/ std(x)
return x
def funct(self, m, b):
return lambda x: m*x + b
def cost(self, x, y, m, mx, b):
f = self.funct(mx, b)
err = power( (f(x) - y), 2).sum()
err = err / (2*len(x))
return err
def gradientDesc(self, x, y, m):
for i in range(0, self.max_it -1):
error = (1.0/m) * transpose(x) * ((x * self.theta[:,i]) - y )
delta = self.theta[:,i] - self.alpha*error
self.J[0,i] = (self.cost( x, y, m, self.theta[1,i], self.theta[0,i]))
self.theta[:, i+1] = delta
print('Calculation Complete')
return self.theta, error
reg = LinearRegression('/users/michael/documents/machine_learning/ex1/ex1data1.txt')
x, y, m ,n = reg.importData()
theta, error = reg.gradientDesc(x,y,m)
print (reg.theta[:,reg.max_it -1])
XX = linspace(0,m,m)
J = reg.theta[0,reg.max_it-1] + reg.theta[1, reg.max_it-1] * XX
plt.plot(XX, J)
plt.scatter(x[:,1],y)`
Related
I'd like to move points in X & Y with 1D Noise. To further clarify, I don't want each point to move by a unique random number, but rather a larger noise over the whole line with gradients moving the points. The Noise would serve as a multiplier for a move amount and would be a value between -1 and 1. For example, if the Noise value was 0.8, it would multiply the X & Y of points by the amount.
How would I go about this?
This is what I have so far (the black line is the original line). I think it's wrong, because the frequency is 1 but there appears to be multiple waves in the noise.
import numpy as np
import matplotlib.pyplot as plt
import random
import math
from enum import Enum
#PerlinNoise by alexandr-gnrk
class Interp(Enum):
LINEAR = 1
COSINE = 2
CUBIC = 3
class PerlinNoise():
def __init__(self,
seed, amplitude=1, frequency=1,
octaves=1, interp=Interp.COSINE, use_fade=False):
self.seed = random.Random(seed).random()
self.amplitude = amplitude
self.frequency = frequency
self.octaves = octaves
self.interp = interp
self.use_fade = use_fade
self.mem_x = dict()
def __noise(self, x):
# made for improve performance
if x not in self.mem_x:
self.mem_x[x] = random.Random(self.seed + x).uniform(-1, 1)
return self.mem_x[x]
def __interpolated_noise(self, x):
prev_x = int(x) # previous integer
next_x = prev_x + 1 # next integer
frac_x = x - prev_x # fractional of x
if self.use_fade:
frac_x = self.__fade(frac_x)
# intepolate x
if self.interp is Interp.LINEAR:
res = self.__linear_interp(
self.__noise(prev_x),
self.__noise(next_x),
frac_x)
elif self.interp is Interp.COSINE:
res = self.__cosine_interp(
self.__noise(prev_x),
self.__noise(next_x),
frac_x)
else:
res = self.__cubic_interp(
self.__noise(prev_x - 1),
self.__noise(prev_x),
self.__noise(next_x),
self.__noise(next_x + 1),
frac_x)
return res
def get(self, x):
frequency = self.frequency
amplitude = self.amplitude
result = 0
for _ in range(self.octaves):
result += self.__interpolated_noise(x * frequency) * amplitude
frequency *= 2
amplitude /= 2
return result
def __linear_interp(self, a, b, x):
return a + x * (b - a)
def __cosine_interp(self, a, b, x):
x2 = (1 - math.cos(x * math.pi)) / 2
return a * (1 - x2) + b * x2
def __cubic_interp(self, v0, v1, v2, v3, x):
p = (v3 - v2) - (v0 - v1)
q = (v0 - v1) - p
r = v2 - v0
s = v1
return p * x**3 + q * x**2 + r * x + s
def __fade(self, x):
# useful only for linear interpolation
return (6 * x**5) - (15 * x**4) + (10 * x**3)
x = np.linspace(10, 10, 20)
y = np.linspace(0, 10, 20)
seed = 10
gen_x = PerlinNoise(seed=seed, amplitude=5, frequency=1, octaves=1, interp=Interp.CUBIC, use_fade=True)
noise_x = np.array([gen_x.get(pos) for pos in y])
fig, ax = plt.subplots(1)
ax.set_aspect("equal")
ax.plot(x, y, linewidth=2, color="k")
ax.scatter(x, y, s=20, zorder=4, color="k")
ax.plot(x+noise_x, y, linewidth=2, color="blue")
ax.scatter(x+noise_x, y, s=80, zorder=4, color="red")
plt.show()
Thank you!
I am writing a program that operates out of a main() function. I am unable to change the main function (this is part of a class). How would I go about carrying over a variable from one function to the next, without changing the main()?
def read_data(fname) :
file = fname
x = []
y = []
with open(file) as f:
for line in f:
xi, yi = [float(x) for x in line.split(",")]
x.append(xi)
y.append(yi)
return x, y
def compute_m_and_b(x, y) :
sx, sy, sx2, sxy, sy2 = 0, 0, 0, 0, 0
for i in range(len(x)):
sx += x[i]
sy += y[i]
sx2 += (x[i] ** 2)
sy2 += (y[i] ** 2)
sxy += (x[i] * y[i])
m = (sxy * len(x) - sx * sy) / (sx2 * len(x) - sx**2)
b = (sy - m * sx) / len(x)
return m, b
def compute_fx_residual(x, y, m, b) :
fx = []
for xi in x:
fx.append(m * xi + b)
residual = []
for i in range(len(y)):
residual.append(y[i] - fx[i])
return fx, residual
def compute_sum_of_squared_residuals(residual) :
least_squares_r = 0
for i in range(len(y)) :
least_squares_r += (residual[i]) ** 2
return least_squares_r
def compute_total_sum_of_squares(y) :
sum_squares = 0
ymean = sum(y) / len(y)
for i in range(len(y)) :
sum_squares += (yi - ymean) ** 2
return sum_squares
as you can see, I am restricted to only pulling the variables listed in the parentheses of the def functions. This leads to variables calculated in prior functions being undefined. How can I import them without needing to change the main()?
Please let me know if I should be more clear. I can provide more examples, but I wanted to maintain some brevity.
EDIT: here is the main function:
def main():
fname = input("Enter Input Filename: ")
x, y = regress.read_data(fname)
print()
print("Input File: ", fname)
print("Data points: ", len(x))
#compute coefficients m and b
m, b = regress.compute_m_and_b(x, y)
#compute fx and residual
fx, residual = regress.compute_fx_residual(x, y, m, b)
#compute sum of squared residuals
least_squares_r = regress.compute_sum_of_squared_residuals(residual)
#compute sum of squares
sum_squares = regress.compute_total_sum_of_squares(y)
#compute coefficeint of determination
coeff_of_d = regress.compute_coeff_of_determination(least_squares_r, sum_squares)
regress.print_least_squares(x, y, m, b, fx, residual, least_squares_r, sum_squares, coeff_of_d)
#compute pearson coefficient
pearson_r = regress.compute_pearson_coefficient(x, y)
regress.print_pearson(pearson_r)
return
main()
You haven't provided the main function so it's unclear how you're currently using it.
Looks to me like you can just get the variable for each consecutive function and pass them into the next:
fname = "some/path/to/file.txt"
x, y = read_data(fname)
m, b = compute_m_and_b(x, y, m, b)
fx, residual = compute_fx_residual(x, y, m, b)
least_squares_r = compute_sum_of_squared_residuals(residual)
sum_squares = compute_total_sum_of_squares(y)
I am trying to solve a boundary-value problem, pg. 221, with Scipy solve_bvp. The code is
from scipy.integrate import solve_bvp
def fun(x, y):
tmp = np.vstack(( np.cos(y[2]), np.sin(y[2]), y[3], (y[4]*np.cos(y[2])), 0 ))
return tmp
def bc(ya, yb):
tmp = np.array([ ya[0], ya[1], ya[3], yb[1], yb[2]+(np.pi/2) ])
return tmp
x = np.linspace(0, 0.5, 400)
y = np.zeros((5, x.size))
sol = solve_bvp(fun, bc, x, y)
I am getting an error
ValueError: all the input array dimensions except for the concatenation axis must match exactly
Any ideas for the source? A simple problem works without an error
def fun(x, y):
tmp = np.vstack((y[1], -np.exp(y[0])))
print (tmp)
return tmp
def bc(ya, yb):
tmp = np.array([ya[0], yb[0]])
print (tmp)
return tmp
x = np.linspace(0, 1, 5)
y_1 = np.zeros((2, x.size))
from scipy.integrate import solve_bvp
res_1 = solve_bvp(fun, bc, x, y_1)
The 0 in np.vstack(...,(y[4]*np.cos(y[2])), 0 )) is the problem. It is not casted to the correct dimension.
from scipy.integrate import solve_bvp
def fun(x, y):
tmp = np.vstack(( np.cos(y[2]), np.sin(y[2]), y[3], (y[4]*np.cos(y[2])), np.zeros(y[3].shape) ))
return tmp
def bc(ya, yb):
tmp = np.array([ ya[0], ya[1], ya[3], yb[1], yb[2]+(np.pi/2) ])
return tmp
x = np.linspace(0, 0.5, 400)
y = np.zeros((5, x.size))
sol = solve_bvp(fun, bc, x, y)
runs without issues.
I am trying to calculate the error rate of the training data I'm using.
I believe I'm calculating the error incorrectly. The formula is as shown:
y is calculated as shown:
I am calculating this in the function fitPoly(M) at line 49. I believe I am incorrectly calculating y(x(n)), but I don't know what else to do.
Below is the Minimal, Complete, and Verifiable example.
import numpy as np
import matplotlib.pyplot as plt
dataTrain = [[2.362761180904257019e-01, -4.108125266714775847e+00],
[4.324296163702689988e-01, -9.869308732049049127e+00],
[6.023323504115264404e-01, -6.684279243433971729e+00],
[3.305079685397107614e-01, -7.897042003779912278e+00],
[9.952423271981121200e-01, 3.710086310489402628e+00],
[8.308127402955634011e-02, 1.828266768673480147e+00],
[1.855495407116576345e-01, 1.039713135916495501e+00],
[7.088332047815845138e-01, -9.783208407540947560e-01],
[9.475723071629885697e-01, 1.137746192425550085e+01],
[2.343475721257285427e-01, 3.098019704040922750e+00],
[9.338350584099475160e-02, 2.316408265530458976e+00],
[2.107903139601833287e-01, -1.550451474833406396e+00],
[9.509966727520677843e-01, 9.295029459100994984e+00],
[7.164931165416982273e-01, 1.041025972594300075e+00],
[2.965557300301902011e-03, -1.060607693351102121e+01]]
def strip(L, xt):
ret = []
for i in L:
ret.append(i[xt])
return ret
x1 = strip(dataTrain, 0)
y1 = strip(dataTrain, 1)
# HELP HERE
def getY(m, w, D):
y = w[0]
y += np.sum(w[1:] * D[:m])
return y
# HELP ABOVE
def dataMatrix(X, M):
Z = []
for x in range(len(X)):
row = []
for m in range(M + 1):
row.append(X[x][0] ** m)
Z.append(row)
return Z
def fitPoly(M):
t = []
for i in dataTrain:
t.append(i[1])
w, _, _, _ = np.linalg.lstsq(dataMatrix(dataTrain, M), t)
w = w[::-1]
errTrain = np.sum(np.subtract(t, getY(M, w, x1)) ** 2)/len(x1)
print('errTrain: %s' % (errTrain))
return([w, errTrain])
#fitPoly(8)
def plotPoly(w):
plt.ylim(-15, 15)
x, y = zip(*dataTrain)
plt.plot(x, y, 'bo')
xw = np.arange(0, 1, .001)
yw = np.polyval(w, xw)
plt.plot(xw, yw, 'r')
#plotPoly(fitPoly(3)[0])
def bestPoly():
m = 0
plt.figure(1)
plt.xlim(0, 16)
plt.ylim(0, 250)
plt.xlabel('M')
plt.ylabel('Error')
plt.suptitle('Question 3: training and Test error')
while m < 16:
plt.figure(0)
plt.subplot(4, 4, m + 1)
plotPoly(fitPoly(m)[0])
plt.figure(1)
plt.plot(fitPoly(m)[1])
#plt.plot(fitPoly(m)[2])
m+= 1
plt.figure(3)
plt.xlabel('t')
plt.ylabel('x')
plt.suptitle('Question 3: best-fitting polynomial (degree = 8)')
plotPoly(fitPoly(8)[0])
print('Best M: %d\nBest w: %s\nTraining error: %s' % (8, fitPoly(8)[0], fitPoly(8)[1], ))
bestPoly()
Updated: This solution uses numpy's np.interp which will connect the points as a kind of "best fit". We then use your error function to find the difference between this interpolated line and the predicted y values for the degree of each polynomial.
import numpy as np
import matplotlib.pyplot as plt
import itertools
dataTrain = [
[2.362761180904257019e-01, -4.108125266714775847e+00],
[4.324296163702689988e-01, -9.869308732049049127e+00],
[6.023323504115264404e-01, -6.684279243433971729e+00],
[3.305079685397107614e-01, -7.897042003779912278e+00],
[9.952423271981121200e-01, 3.710086310489402628e+00],
[8.308127402955634011e-02, 1.828266768673480147e+00],
[1.855495407116576345e-01, 1.039713135916495501e+00],
[7.088332047815845138e-01, -9.783208407540947560e-01],
[9.475723071629885697e-01, 1.137746192425550085e+01],
[2.343475721257285427e-01, 3.098019704040922750e+00],
[9.338350584099475160e-02, 2.316408265530458976e+00],
[2.107903139601833287e-01, -1.550451474833406396e+00],
[9.509966727520677843e-01, 9.295029459100994984e+00],
[7.164931165416982273e-01, 1.041025972594300075e+00],
[2.965557300301902011e-03, -1.060607693351102121e+01]
]
data = np.array(dataTrain)
data = data[data[:, 0].argsort()]
X,y = data[:, 0], data[:, 1]
fig,ax = plt.subplots(4, 4)
indices = list(itertools.product([0,1,2,3], repeat=2))
for i,loc in enumerate(indices, start=1):
xx = np.linspace(X.min(), X.max(), 1000)
yy = np.interp(xx, X, y)
w = np.polyfit(X, y, i)
y_pred = np.polyval(w, xx)
ax[loc].scatter(X, y)
ax[loc].plot(xx, y_pred)
ax[loc].plot(xx, yy, 'r--')
error = np.square(yy - y_pred).sum() / X.shape[0]
print(error)
plt.show()
This prints out:
2092.19807848
1043.9400277
1166.94550318
252.238810889
225.798905379
155.785478366
125.662973726
143.787869281
6553.66570273
10805.6609259
15577.8686283
13536.1755299
108074.871771
213513916823.0
472673224393.0
1.01198058355e+12
Visually, it plots out this:
From here, it's just a matter of saving those errors to a list and finding the minimum.
I may contribute :
def pol_y(x, w):
y = 0; power = 0;
for i in w:
y += i*(x**power);
power += 1;
return y
The M is included implicitly because it is the final index of w. So if w = [0, 0, 1], then pol_y(x, w) is as same as f(x) = x^2.
If you want to map the 1st column of the dataTrain :
get_Y = [pol_y(i, w) for i in x1 ]
The error may be calculated by
vec_error = [(y1[i] - getY[i])**2 for i in range(0, len(y1)];
train_error = np.sum(vec_error)/len(y1);
Hope this helps.
At first, thank you everybody for the amazing work on stackoverflow... you guys are amazing and have helped me out quite some times already. Regarding my problem: I have a series of vectors in the format (VectorX, VectorY, StartingpointX, StartingpointY)
data = [(-0.15304757819399128, -0.034405679205349315, -5.42877197265625, 53.412933349609375), (-0.30532995491023485, -0.21523935094046465, -63.36669921875, 91.832427978515625), (-0.15872430479453215, -0.077999419482978283, -67.805389404296875, 81.001983642578125), (-0.36415549211687903, -0.33757147194808113, -59.015228271484375, 82.976226806640625), (0.0, 0.0, 0.0, 0.0), (-0.052973530805275004, 0.098212384392411423, 19.02667236328125, -13.72125244140625), (-0.34318724086483599, 0.17123742336019632, 80.0394287109375, 108.58499145507812), (0.19410169197834648, -0.17635303976555861, -55.603790283203125, -76.298828125), (-0.38774018337716143, -0.0824692384322816, -44.59942626953125, 68.402496337890625), (0.062202543524108478, -0.37219011831012949, -79.828826904296875, -10.764404296875), (-0.56582988168383963, 0.14872365390732512, 39.67657470703125, 97.303192138671875), (0.12496832467900276, -0.12216653754859408, 24.65948486328125, -30.92584228515625)]
When I plot the vectorfield it looks like this:
import numpy as np
import matplotlib.pyplot as plt
def main():
# Format Data...
numdata = len(data)
x = np.zeros(numdata)
y = np.zeros(numdata)
u = np.zeros(numdata)
v = np.zeros(numdata)
for i,el in enumerate(data):
x[i] = el[2]
y[i] = el[3]
# length of vector
z[i] = math.sqrt(el[0]**2+el[1]**2)
u[i] = el[0]
v[i] = el[1]
# Plot
plt.quiver(x,y,u,v )
# showing the length with color
plt.scatter(x, y, c=z)
plt.show()
main()
I want to create a polynomial function to fit a continous vector field for the whole area. After some research I found the following functions for fitting polynoms in two dimensions. The problem is, that it only accepts one value for the value that is fitted.
def polyfit2d(x, y, z, order=3):
ncols = (order + 1)**2
G = np.zeros((x.size, ncols))
ij = itertools.product(range(order+1), range(order+1))
for k, (i,j) in enumerate(ij):
G[:,k] = x**i * y**j
m, _, _, _ = np.linalg.lstsq(G, z)
return m
def polyval2d(x, y, m):
order = int(np.sqrt(len(m))) - 1
ij = itertools.product(range(order+1), range(order+1))
z = np.zeros_like(x)
for a, (i,j) in zip(m, ij):
z += a * x**i * y**j
return z
Also when I tried to fit the one dimensional length of the vectors, the values returned from the polyval2d were completely off. Does anybody know a method to get a fitted function that will return a vector (x,y) for any point in the grid?
Thank you!
A polynomial to fit a 2-d vector field will be two bivariate polynomials - one for the x-component and one for the y-component. In other words, your final polynomial fitting will look something like:
P(x,y) = ( x + x*y, 1 + x + y )
So you will have to call polyfit2d twice. Here is an example:
import numpy as np
import itertools
def polyfit2d(x, y, z, order=3):
ncols = (order + 1)**2
G = np.zeros((x.size, ncols))
ij = itertools.product(range(order+1), range(order+1))
for k, (i,j) in enumerate(ij):
G[:,k] = x**i * y**j
m, _, _, _ = np.linalg.lstsq(G, z)
return m
def fmt1(x,i):
if i == 0:
return ""
elif i == 1:
return x
else:
return x + '^' + str(i)
def fmt2(i,j):
if i == 0:
return fmt1('y',j)
elif j == 0:
return fmt1('x',i)
else:
return fmt1('x',i) + fmt1('y',j)
def fmtpoly2(m, order):
for (i,j), c in zip(itertools.product(range(order+1), range(order+1)), m):
yield ("%f %s" % (c, fmt2(i,j)))
xs = np.array([ 0, 1, 2, 3] )
ys = np.array([ 0, 1, 2, 3] )
zx = np.array([ 0, 2, 6, 12])
zy = np.array([ 1, 3, 5, 7])
mx = polyfit2d(xs, ys, zx, 2)
print "x-component(x,y) = ", ' + '.join(fmtpoly2(mx,2))
my = polyfit2d(xs, ys, zy, 2)
print "y-component(x,y) = ", ' + '.join(fmtpoly2(my,2))
In this example our vector field is:
at (0,0): (0,1)
at (1,1): (2,3)
at (2,2): (6,5)
at (3,3): (12,7)
Also, I think I found a bug in polyval2d - this version gives more accurate results:
def polyval2d(x, y, m):
order = int(np.sqrt(len(m))) - 1
ij = itertools.product(range(order+1), range(order+1))
z = np.zeros_like(x)
for a, (i,j) in zip(m, ij):
z = z + a * x**i * y**j
return z