Iterate every csv column and predict value using linear regression - python

I am using a loop to grab values from every csv row and run it through linear_regression_model for prediction. The needed output is, for every row in the csv, print the predicted value that ran through the model, like:
4.500
4.256
3.909
4.565
...
4.433
Here is what I did:
def prediction_loop():
for index, row in ml_sample.iterrows():
print(row['column'])
new_data = OrderedDict(['column', row])
new_data = pd.Series(new_data).values.reshape(1,-1)
print(linear_regression_model.predict(new_data))
The actual output I get is:
Traceback (most recent call last):
new_data = OrderedDict(['column', row])
ValueError: too many values to unpack (expected 2)
In the csv there are 87 rows and 1 column.
How can I optimise the code?
Thank you

If I understand the question correctly, then this can be done very efficiently without the aid of any external modules. We just need a trivial class to manage the statistics. The assumption is that the input file contains one numerical value per line and that such values are Y and the implied line number is X. Try this:-
class Stats():
def __init__(self):
self.n = 0
self.sx = 0
self.sy = 0
self.sxx = 0
self.syy = 0
self.sxy = 0
def add(self, x, y):
self.sx += x
self.sy += y
self.sxx += x * x
self.syy += y * y
self.sxy += x * y
self.n += 1
def r(self): # correlation coefficient
return (self.n * self.sxy - self.sx * self.sy) / ((self.n * self.sxx - self.sx * self.sx) * (self.n * self.syy - self.sy * self.sy)) ** 0.5
def b(self): # slope
return (self.n * self.sxy - self.sx * self.sy) / (self.n * self.sxx - self.sx * self.sx)
def a(self): # intercept
return self.my() - self.b() * self.mx()
def mx(self): # mean x
assert self.n > 0
return self.sx / self.n
def my(self): # mean y
assert self.n > 0
return self.sy / self.n
def y(self, x): # estimate of y for given x
return x * self.b() + self.a()
stats = Stats()
with open('lr.txt') as data:
for i, line in enumerate(data):
stats.add(i, float(line.split()[0]))
print(f'r={stats.r():.4f} slope={stats.b():.4f} intercept={stats.a():.4f}')
for x in range(stats.n):
print(f'Estimate for {x} = {stats.y(x):.2f}')

Related

taking derivative using sparse matrix

I’m trying to solve the following problem for around 2 days, but have not had success.
I want to calculate the derivative using the sparse matrix, but the result doesn’t true. I think there is a mistake in the solution function, but I cannot find it.
class Cahn_Hillard(object):
def __init__(self, n, X, T, dt, skip):
self.n = n
self.X = X
self.dx = 1 / (n - 1)
self.T = T
self.dt = dt
self.N = int(self.T / self.dt)
def __call__(self):
central = self.forward_diff_sparse()
itr = int(self.T / self.dt)
for i in range(0, itr + 1):
if i == 0:
c = np.random.uniform(0, 1, (self.n, 1))
else:
c_next = self.solution(c, central)
c = c_next
print(i)
return c
def forward_diff_sparse(self):
sparse_mat = np.eye(self.n) - np.eye(self.n, k=-1)
return sparse_mat
def solution(self, c, central):
# calculate derivative of concentration
deriv = central.dot(c) / self.dx
# calculate difusion coeffcient
D_eff = (1 - 2 * self.X * c * (1 - c)) * deriv
Diff = central.dot(D_eff) / self.dx
# Calculate the next step concentration
next_c = c + self.dt * Diff
return next_c
It would be great if you help me.

Unsupported Operand in Python yet overloaded operator is defined

I'm working on a couple of basic math classes for performing line collisions in 2D.
Here is my python code:
collisions.py
import math
class Vector:
def __init__(self, x, y):
self._x = x
self._y = y
def __str__(self):
return '(%d, %d)' %(self._x, self._y)
def __add__(self, other):
return Vector(self._x + other._x, self._y + other._y)
def __sub__(self, other):
return Vector(self._x - other._x, self._y - other._y)
def __mul__(self, val):
return Vector(self._x * val, self._y * val)
# same as dot product
#def __mul__ (self, other):
# return self.dot(other)
# same as cross product
#def __pow__(self, other):
# return self.cross(other)
def x(self):
return self._x
def y(self):
return self._y
def cross(self, other):
return (self._x * other._y - self._y * other._x)
def dot(self, other):
return (self._x * other._x + self._y * other._y)
class Line:
def __init__(self, p1, p2):
self._p1 = p1
self._p2 = p2
def slope(self):
if self._p1.x() == self._p2.x():
if self._p1.y() == self._p2.y():
return str("both points coincide")
else:
if self._p1.y() < self._p2.y():
return float('inf')
else:
return float('-inf')
self._m = float((self._p2.y() - self._p1.y()) / (self._p2.x() - self._p1.x()))
return self._m
def p1(self):
return self._p1
def p2(self):
return self._p2
def LineIntersect(l1, l2):
a = l1.p1()
b = l1.p2()
c = l2.p1()
d = l2.p2()
#r = l1.p2() - l1.p1()
r = b - a
#s = l2.p2() - l2.p1()
s = d - c
d = r.cross(s)
#u = ((l2.p2.x() - l1.p1.x()) * r.y() - (l2.p2.y() - l1.p1.y()) * r.x()) / d
u = ((c.x() - a.x()) * r.y() - (c.y() - a.y()) * r.x()) / d
#t = ((l2.p2.x() - l1.p1.x()) * s.y() - (l2.p2.y() - l1.p1.y()) * s.x()) / d
t = ((c.x() - a.x()) * s.y() - (c.y() - a.y()) * s.x()) / d
if (0 <= u and u <= 1 and 0 <= t and t <= 1):
return (a + t * r)
else:
return False;
l1 = Line(Vector(0, 0), Vector(3, 3))
l2 = Line(Vector(3, 0), Vector(0, 3))
intersected = LineIntersect(l1, l2)
print(intersected)
When I run this through Windows 7 command prompt using Python 3.7's interpreter via python collisions.py
It is generating this error message:
D:\Dev\Languages\Python\Projects\Test>python collisions.py
Traceback (most recent call last):
File "collisions.py", line 88, in <module>
intersected = LineIntersect(l1, l2)
File "collisions.py", line 81, in LineIntersect
return (a + t * r)
TypeError: unsupported operand type(s) for *: 'float' and 'Vector'
I'm more inclined with the C languages specifically C++... I've been learning Python for about a year now and I'm starting to get used to it, but things like this, I understand the error message, I just don't know how to resolve them... I'm used to a strongly typed and compiled language over an interpreted language like Python.
Here I'm assuming that I have the operator within the Vector class overloaded to be able to multiply vectors by scalars... Yet Python is yelling at me that it is an unsupported operand. What can I do to resolve this issue? As a side note if you notice any other potential bugs or issues with the code please don't hesitate to mention them in a comment. I'm still learning this language!

Python script to calculate hypotenuse and angle measure based on user inputs and classes

So far, finding the hypotenuse, "side c" works fine without problems, the problem is when I try to calculate the angle, I am pretty sure it's returning a wrong value for the angle.
import math
from math import sqrt
class Triangle:
def __init__(self, side_a, side_b, ):
self.side_a = side_a
self.side_b = side_b
def SidesCalculate(self, ):
side_c = sqrt(self.side_a ** 2 + self.side_b ** 2)
return side_c
def AnglesCalculate(self, side_c):
x = math.sin(self.side_a / side_c)
math.asin(x)
x = round(x * 180 / math.pi)
return x
g = Triangle(side_a=int(input("Enter side a: ")), side_b=int(input("Enter side b: ")))
print("side c =", + g.SidesCalculate())
print("angle is =", + g.AnglesCalculate(side_c=True), '°')
Here is a way to do what you want.
First when you compute side_c, save it as an attribute.
def SidesCalculate(self, ):
self.side_c = sqrt(self.side_a ** 2 + self.side_b ** 2)
return self.side_c
Secondly, give the parameter side_c a default value None. (You also have another math error in the following function pointed by CFLS)
def AnglesCalculate(self, side_c = None):
if side_c == None:
side_c = self.side_c
x = math.asin(self.side_a / side_c)
x = round(x * 180 / math.pi)
return x
Now, if you want the class to pick previously computed side_c, you can do this:
g = Triangle(side_a=int(input("Enter side a: ")), side_b=int(input("Enter side b: ")))
print("side c =", + g.SidesCalculate())
print("angle is =", + g.AnglesCalculate(), '°')
def AnglesCalculate(self, side_c):
x = math.asin(self.side_a / side_c)
x = round(x * 180 / math.pi)
return x

How can I use the data in other object within the same class?

Let's say I have two objects Cell_i-1 and Cell_i
There are some data in each cell which are n, N, Q, alpha, S, R, and y
However, some value of the present cell Cell_i requires the value of data in the previous cell Cell_i-1, such as
alpha_i = 1 if n_i-1 <= Q_i
aplha_i = 5 if n_i-1 > Q_i
y_i = min {S_i-1 , R_i}
How can I use the value in the previous cell to compute the values of data in the present cell?
import numpy as np
import pandas as pd
import array as arr
class cell:
def __init__ (self, qmax ,n ,N , delta_t = 0.2 ,delta_l = 0.5 ):
self.qmax = qmax
self.Q = self.qmax * delta_t
self.n = n
self.N = N
self.alpha = []
self.S = []
self.R = []
self.y = []
def current (self):
x = np.minimum(self.Q , self.n)
self.S.append(x)
print(self.S)
def update (self)

De Casteljau algorithm ignore a control point

I wrote an implementation of the De Casteljau algorithm to create a Bezier curve. My problem is the function ignores the second control point, it does calculate some kind of curve, but it is not correct.
def DeCasteljau(CNTRL_P, t):
ReP = points.point()
Ret = points.point()
n = len(CNTRL_P)
k = 0
tmp = 0
while k < n:
tmp = (((1 - t)**((n-1) - k)) * (t**k))
ReP.addP(CNTRL_P[k])
#ReP.Prnt()
ReP.mulP(tmp)
Ret.addP(ReP)
ReP.Clr() #ReP => (0,0)
tmp = 0
k = k + 1
return Ret
For example: CNTRL_P = [P0, P1, P2]
It ignores P1
class point():
def __init__(self, X = 0, Y = 0):
self.x = X
self.y = Y
def addP(self, P1):
self.x = self.x + (P1.getX())
self.y = self.y + (P1.getY())
def subP(self, C = 0, D = 0):
self.x = self.x - C
self.y = self.y - D
def mulP(self, C):
self.x = self.x * C
self.y = self.y * C
def getX(self):
a = self.x
return a
def getY(self):
a = self.y
return a
def Prnt(self):
print "X:", self.x,"Y:", self.y
def Clr(self):
self.x = 0
self.y = 0
Implementation looks faulty. Where's the recursion?
Does this give you better results?
def DeCasteljau2(CNTRL_P, t):
tmp_points = CNTRL_P[:]
while len(tmp_points) > 1:
for k in range(len(tmp_points)-1):
ReP = point()
ReP2 = point()
ReP.addP(tmp_points[k])
ReP2.addP(tmp_points[k+1])
ReP.mulP((1-t))
ReP2.mulP(t)
ReP.addP(ReP2)
tmp_points[k] = ReP
tmp_points.pop()
return tmp_points[0]
This is the result after each iteration:
P0 | P1 | P2
P0 * (1-t) + P1 * t | P1 * (1-t) + P2
(P0 * (1-t) + P1 * t)*(1-t) + (P1 * (1-t) + P2)*t
You repeat the algorithm until you only have 1 point left. Each point P(n) in the next solution is the result of P(n) * (1-t) + p(n+1) * t from the last solution. At each iteration the last point is discarded since there is no next point that you could multiply and add with.
Wikipedia probably can explain that better than me: link

Categories

Resources