Python, memory error, csv file too large [duplicate] - python

This question already has answers here:
Reading a huge .csv file
(7 answers)
Closed 8 years ago.
I have a problem with a python module that cannot handle importing a big datafile (the file targets.csv weights nearly 1 Gb)
the error appens when this line is loaded:
targets = [(name, float(X), float(Y), float(Z), float(BG))
for name, X, Y, Z, BG in csv.reader(open('targets.csv'))]
traceback:
Traceback (most recent call last):
File "C:\Users\gary\Documents\EPSON STUDIES\colors_text_D65.py", line 41, in <module>
for name, X, Y, Z, BG in csv.reader(open('targets.csv'))]
MemoryError
I was wondering if there's a way to open the file targets.csv line by line? And also wondering it this would slow down the process?
This module is already pretty slow...
Thanks!
import geometry
import csv
import numpy as np
import random
import cv2
S = 0
img = cv2.imread("MAP.tif", -1)
height, width = img.shape
pixx = height * width
iterr = float(pixx / 1000)
accomplished = 0
temp = 0
ppm = file("epson gamut.ppm", 'w')
ppm.write("P3" + "\n" + str(width) + " " + str(height) + "\n" + "255" + "\n")
# PPM file header
all_colors = [(name, float(X), float(Y), float(Z))
for name, X, Y, Z in csv.reader(open('XYZcolorlist_D65.csv'))]
# background is marked SUPPORT
support_i = [i for i, color in enumerate(all_colors) if color[0] == '255 255 255']
if len(support_i)>0:
support = np.array(all_colors[support_i[0]][1:])
del all_colors[support_i[0]]
else:
support = None
tg, hull_i = geometry.tetgen_of_hull([(X,Y,Z) for name, X, Y, Z in all_colors])
colors = [all_colors[i] for i in hull_i]
print ("thrown out: "
+ ", ".join(set(zip(*all_colors)[0]).difference(zip(*colors)[0])))
targets = [(name, float(X), float(Y), float(Z), float(BG))
for name, X, Y, Z, BG in csv.reader(open('targets.csv'))]
for target in targets:
name, X, Y, Z, BG = target
target_point = support + (np.array([X,Y,Z]) - support)/(1-BG)
tet_i, bcoords = geometry.containing_tet(tg, target_point)
if tet_i == None:
#print str("out")
ppm.write(str("255 255 255") + "\n")
print "out"
temp += 1
if temp >= iterr:
accomplished += temp
print str(100 * accomplished / (float(pixx))) + str(" %")
temp = 0
continue
# not in gamut
else:
A = bcoords[0]
B = bcoords[1]
C = bcoords[2]
D = bcoords[3]
R = random.uniform(0,1)
names = [colors[i][0] for i in tg.tets[tet_i]]
if R <= A:
S = names[0]
elif R <= A+B:
S = names[1]
elif R <= A+B+C:
S = names[2]
else:
S = names[3]
ppm.write(str(S) + "\n")
temp += 1
if temp >= iterr:
accomplished += temp
print str(100 * accomplished / (float(pixx))) + str(" %")
temp = 0
print "done"
ppm.close()

csv.reader() already reads the lines one at a time. However, you're collecting all of the lines into a list first. You should process the lines one at a time. One approach is to switch to a generator, for example:
targets = ((name, float(X), float(Y), float(Z), float(BG))
for name, X, Y, Z, BG in csv.reader(open('targets.csv')))
(Switching from square brackets to parens should change target from a list comprehension to a generator.)

Related

Python: printed object has a type but returned object is NoneType? [duplicate]

This question already has answers here:
Why does my recursive function return None?
(4 answers)
Closed 12 months ago.
I have a function that returns a tuple containing a NumPy array and a list. At the end of the function I print out the array and the list and both look correct. Then I print their types and these also look correct. But when I return them, I get a NoneType error. I am very confused as to why this is happening. Code below. adjust_param is a helper function. The TypeError is asserted in the return line of optimize_theta.
def adjust_param(R, delta, i, theta):
thetaplus = theta.copy()
thetaminus = theta.copy()
thetaplus[i*2] += delta
thetaplus[i*2+1] += delta
thetaminus[i*2] -= delta
thetaminus[i*2+1] -= delta
y = Remp(q_data, labels, R, num_samples, theta)
yplus = Remp(q_data, labels, R, num_samples, thetaplus)
yminus = Remp(q_data, labels, R, num_samples, thetaminus)
if (yplus < y and yplus < yminus and yplus != -1):
return thetaplus, yplus
elif (yminus < y and yminus < yplus and yminus != -1):
return thetaminus, yminus
else:
return theta, y
def optimize_theta(N, R, delta, i, theta, risk):
if N == 0:
print("Theta : " + str(type(theta)))
print("= " + str(theta))
print()
print("Risk : " + str(type(risk)))
print("= " + str(risk))
return theta, risk
else:
theta_new, risk_new = adjust_param(R, delta, i, theta)
if i == (len(theta)/2)-1:
#print("N = " + str(N-1))
#print("theta = " + str(theta))
risk_copy = risk.copy()
risk_copy.append(risk_new)
optimize_theta(N-1, R, delta, 0, theta_new, risk_copy)
else:
optimize_theta(N, R, delta, i+1, theta_new, risk)
Output:
Theta : <class 'numpy.ndarray'>
= [0.85885111 0.86066499 0.47482528 0.13555158 0.87249245 0.02604654
0.2906744 0.34618303]
Risk : <class 'list'>
= [0.6273510217403618]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-307-8b43b528fee2> in <module>
----> 1 theta, risk = optimize_theta(N, R, delta, 0, theta0, [])
TypeError: cannot unpack non-iterable NoneType object
Any insight would be much appreciated. Thank you!
You must explicitly return results in the else section in optimize_theta.
def optimize_theta(N, R, delta, i, theta, risk):
if N == 0:
print("Theta : " + str(type(theta)))
print("= " + str(theta))
print()
print("Risk : " + str(type(risk)))
print("= " + str(risk))
return theta, risk
else:
theta_new, risk_new = adjust_param(R, delta, i, theta)
if i == (len(theta)/2)-1:
#print("N = " + str(N-1))
#print("theta = " + str(theta))
risk_copy = risk.copy()
risk_copy.append(risk_new)
return optimize_theta(N-1, R, delta, 0, theta_new, risk_copy)
else:
return optimize_theta(N, R, delta, i+1, theta_new, risk)

While conversion, invalid literal for float

I'm trying to train my own data on the Yolo network, but before that I have to convert the bounding boxes co-ordinates to the form it wants.
The file contents are like this:
0
53 19 163 116
and I'm trying to convert it to the form the network works with the following.
The code is:
import os
from os import walk, getcwd
from PIL import Image
classes = ["stopsign"]
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
"""-------------------------------------------------------------------
"""
""" Configure Paths"""
mypath = "/home/decentmakeover2/BBox-Label-Tool/Labels/002/"
outpath = "/home/decentmakeover2/output/"
cls = "stopsign"
if cls not in classes:
exit(0)
cls_id = classes.index(cls)
wd = getcwd()
list_file = open('%s/%s_list.txt'%(wd, cls), 'w')
""" Get input text file list """
txt_name_list = []
for (dirpath, dirnames, filenames) in walk(mypath):
txt_name_list.extend(filenames)
break
print(txt_name_list)
""" Process """
for txt_name in txt_name_list:
#txt_file = open("Labels/stop_sign/001.txt", "r")
""" Open input text files """
txt_path = mypath + txt_name
print("Input:" + txt_path)
txt_file = open(txt_path, "r")
lines = txt_file.read().split('\r\n') #for ubuntu, use "\r\n"
instead of "\n"
""" Open output text files """
txt_outpath = outpath + txt_name
print("Output:" + txt_outpath)
txt_outfile = open(txt_outpath, "w")
""" Convert the data to YOLO format """
ct = 0
for line in lines:
#print('lenth of line is: ')
#print(len(line))
#print('\n')
if(len(line) >= 2):
ct = ct + 1
print(line + "\n")
elems = line.split(' ')
print(elems)
xmin = elems[0]
xmax = elems[2]
ymin = elems[1]
ymax = elems[3]
#
img_path = str('%s/images/%s/%s.JPEG'%(wd, cls,
os.path.splitext(txt_name)[0]))
#t = magic.from_file(img_path)
#wh= re.search('(\d+) x (\d+)', t).groups()
im=Image.open(img_path)
w= int(im.size[0])
h= int(im.size[1])
#w = int(xmax) - int(xmin)
#h = int(ymax) - int(ymin)
# print(xmin)
print(w, h)
b = (float(xmin), float(xmax), float(ymin), float(ymax))
bb = convert((w,h), b)
print(bb)
txt_outfile.write(str(cls_id) + " " + " ".join([str(a) for
a
in bb]) + '\n')
""" Save those images with bb into list"""
if(ct != 0):
list_file.write('%s/images/%s/%s.JPEG\n'%(wd, cls,
os.path.splitext(txt_name)[0]))
list_file.close()
and i get the error:
first it prints out all the file names and the content of the data then,
['0\n53', '19', '163', '116\n']
(262, 192)
Traceback (most recent call last):
File "text.py", line 84, in <module>
b = (float(xmin), float(xmax), float(ymin), float(ymax))
ValueError: invalid literal for float(): 0
53
I'm not really sure what to do here.
Any suggestions?
As seen in the error message your first term is '0\n53' where as it should be '0' followed by '53'. thus it isn't detected as a float. just splitting with '\n' should work.

Seeking advice for classifying 3d mobile data

Project
I'm working on a project who's end goal will be to classify user input/behaviour on a smartphone, based on readings of 3D (accelerometer and gyroscope) movements (displacement, velocity, acceleration and jerk) as well as 2D interactions (velocity, acceleration, number of "touches" etc). My classifier will have to output whether the user is engaged/frustrated/perhaps other emotions subject to this paper. These aren't necessary at this step however.
Data
The smartphone produces JSON files, with the position of the phone in terms of x, y and z, as well as the timestamp. There's a new timestamp every 20milliseconds.
Idea
I've done some Python tutorials and produced the code at the bottom so far. I've been advised to use NumPy and SciPy to make my life easier. Aside from the obvious kinematics, I need to include a filtering process. Google suggested Kalman filters.
Question
If anyone has experience in the matter, can they recommend an approach to this, perhaps you've encountered a similar project with a nice methodology.
import sqlalchemy
import json
import ReferenceFrame, get_motion_params, dynamicsymbols, symbols
con = sqlalchemy.create_engine('postgresql+psycopg2://postgres:#localhost/airlib')
meta = sqlalchemy.MetaData(bind=con, reflect=True)
dataObjects = []
dataAccObjects = []
dataGyroObjects = []
displacementChangeInX = []
displacementChangeInY = []
displacementChangeInZ = []
t = [] #time
v = [] #velocity
a = [] #acceleration
j = [] #jerk
results = meta.tables['rawmobiledata']
class DataEntry(object):
item = 0
time = 0
x = 0
y = 0
z = 0
def __init__(self, item, time, x, y, z):
self.item = item
self.time = time
self.x = x
self.y = y
self.z = z
def make_accelerometerEntry(item, time, x, y, z):
dataAcc = DataEntry(item ,time, x, y, z)
#print str(dataAccObjects)
return dataAcc
def make_gyroEntry(item, time, x, y, z):
dataGyro = DataEntry(item, time, x, y, z)
#print str(dataGyroObjects)
return dataGyro
for row in con.execute(results.select()):
r = row[1]
r = json.dumps(r)
loaded_r = json.loads(r)
#print(loaded_r)
if loaded_r['sensor'] == 'accelerometer':
for item in range(0, 250):
time = loaded_r['data'][item]['time']
x = loaded_r['data'][item]['x']
y = loaded_r['data'][item]['y']
z = loaded_r['data'][item]['z']
dataAccObjects.append(make_accelerometerEntry(item, time, x, y, z))
#print "this is an accelerometerObj"
elif loaded_r['sensor'] == 'gyroscope':
for item in range(0, 250):
time = loaded_r['data'][item]['time']
x = loaded_r['data'][item]['x']
y = loaded_r['data'][item]['y']
z = loaded_r['data'][item]['z']
dataGyroObjects.append(make_gyroEntry(item, time, x, y, z))
#print "gyroObj with time " + str(time) + ", reading " + str(item+1) + "/250, test#" + str((row[0]/2)+1)
for row in con.execute(results.select()):
r = row[1]
r = json.dumps(r)
loaded_r = json.loads(r)
if loaded_r['sensor'] == 'gyroscope':
for item in range (0,250):
#rate of change
# ti = |t2-t1|
t[item] = abs(int(dataGyroObjects[item].time) - int(dataGyroObjects[item+1].time))
#calculate change in displacement over the 3 axes
displacementChangeInX[item] = abs(int(dataGyroObjects[item].x) - int(dataGyroObjects[item+1].x))
displacementChangeInY[item] = abs(int(dataGyroObjects[item].y) - int(dataGyroObjects[item+1].y))
displacementChangeInZ[item] = abs(int(dataGyroObjects[item].z) - int(dataGyroObjects[item+1].z))
# v = dx/dt
v[item] = abs(t[item] - 0)
#calculate acceleration
# a = dv/dx
#calculate jerk
# j = da/dt

Python- name not defined [duplicate]

This question already has answers here:
Why doesn't calling a string method (such as .replace or .strip) modify (mutate) the string?
(3 answers)
Closed 7 years ago.
I am having trouble getting my code to run. I keep getting the error that my x variable such as 'hsGPA' is not defined. Below is my code. Ive tried the solutions posted on the pother thread and none have helped so please don't mark this as a duplicate. THANKS!
def readData(fileName):
hsGPA = [] #High School GPA
mathSAT = [] #Math SAT scores
crSAT = [] #Verbal SAT scores
collegeGPA = [] #College GPA
FullList=[]
inputFile = open(fileName, 'r', encoding = 'utf-8')
for line in inputFile:
FullList=line.split(',')
hsGPA.append(float(FullList[0]))
mathSAT.append(int(FullList[1]))
crSAT.append(int(FullList[2]))
collegeGPA.append(float(FullList[3]))
return hsGPA, mathSAT, crSAT, collegeGPA
def plotData(hsGPA, mathSAT, crSAT, collegeGPA):
GPA1 = [] #High School GPA
Score1 = [] #Math SAT scores
Score2= [] #Verbal SAT scores
GPA2 = [] #College GPA
hsGPA, mathGPA, crSAT, collegeGPA = readData('SAT.txt')
pyplot.figure(1)
pyplot.subplot(4,1,1)
for line in range(len(hsGPA)):
GPA1.append(line)
pyplot.plot(GPA1,hsGPA)
pyplot.subplot(4,1,2)
for line in range(len(mathSAT)):
Score1.append(line)
pyplot.plot(Score1,mathSAT)
pyplot.subplot(4,1,3)
for line in range(len(crSAT)):
Score2.append(line)
pyplot.plot(Score2,crSAT)
pyplot.subplot(4,1,4)
for line in range(len(collegeGPA)):
GPA2.append(line)
pyplot.plot(GPA2,collegeGPA)
pyplot.show()
def LinearRegression(xList, yList):
'''
This function finds the constants in the y = mx+b, or linear regression
forumula
xList - a list of the x values
yList - a list of the y values
m - the slope f the line
b - where the line intercepts the y axis
'''
n = len(xList)
sumX = 0
sumXX = 0
sumXY = 0
sumY = 0
for index in range(n):
sumX += xList[index]
sumXY += xList[index] * yList[index]
sumXX += xList[index]**2
sumY += yList[index]
#the components needed to find m and b
m = (n*(sumXY - (sumX*sumY)))/(n*(sumXX - (sumX**2)))
b = (sumY - (m*sumX))/n
#actually implements formula
return m, b
def plotRegression(x,y, xLabel, yLabel):
ScoreT = []
pyplot.scatter(x,y)
m,b = linearRegression(xList,yList)
minX = min(x)
maxX = max(x)
pyplot.plot([minX, maxX], [m * minX + b, m * maxX + b], color ='red')
pyplot.xlabel(xLabel)
pyplot.ylabel(yLabel)
pyplot.show()
for index in range(len(mathSAT)):
sumscore = mathSAT[index] + crSAT[index]
ScoreT.append(sumscore)
return ScoreT
def rSquared(x,y,m,b):
n = len(x)
R=0
sumS=0
sumT=0
sumY=0
for index in range(n):
a=(y[index]-((m*x[index])+b))**2
sumS = sumS+a
for index in range(len(y)):
sumY = sumY= y[index]
MeanY= sumY/(len(y))
e=(y[index]-MeanY)**2
sumT = sumT+e
m,b= LinearRegression(xList, yList)
RG=1-(sumS/sumT)
def main():
print(readData('SAT.txt'))
plotData(*readData('SAT.txt'))
plotRegression(hsGPA,collegeGPA, 'highGPA', 'collegeGPA')
plotRegression(mathSAT,collegeGPA, 'highGPA' , 'collegeGPA')
plotRegression(crSAT,collegeGPA, 'highGPA' , 'collegeGPA')
plotRegression(ScoreT,collegeGPA, 'highGPA' , 'collegeGPA')
main()
It's giving the error in main, after plotRegression for each of the x variables. Please Help! Thanks!
Try this:
def plotRegression(x,y, xLabel, yLabel):
# I deleted ScoreT = [] here
pyplot.scatter(x,y)
m,b = linearRegression(x,y)
minX = min(x)
maxX = max(x)
pyplot.plot([minX, maxX], [m * minX + b, m * maxX + b], color ='red')
pyplot.xlabel(xLabel)
pyplot.ylabel(yLabel)
pyplot.show()
# I deleted the loop and return statement here
# ....
def main():
data = readData('SAT.txt')
print(data)
plotData(*data)
hsGPA, mathSAT, crSAT, collegeGPA = data
# added ScoreT calculation here
ScoreT = [sum(x) for x in zip(mathSAT, crSAT)]
plotRegression(hsGPA,collegeGPA, 'highGPA', 'collegeGPA')
plotRegression(mathSAT,collegeGPA, 'highGPA' , 'collegeGPA')
plotRegression(crSAT,collegeGPA, 'highGPA' , 'collegeGPA')
plotRegression(ScoreT,collegeGPA, 'highGPA' , 'collegeGPA')
In your main(), hsGPA is never defined. It's defined inside other function and is not shared in the global context. So main cannot access it.
You need to it from readData()'s return

Python Scatter plot

Got this question from 'how to think like a computer scientist' course:
Interpret the data file labdata.txt such that each line contains a an x,y coordinate pair. Write a function called plotRegression that reads the data from this file and uses a turtle to plot those points and a best fit line according to the following formulas:
y=y¯+m(x−x¯)
m=∑xiyi−nx¯y¯∑x2i−nx¯2
http://interactivepython.org/runestone/static/thinkcspy/Files/Exercises.html?lastPosition=1308
my code doesnt seem to be working and i cant figure out why. it looks like python is interpreting the data as str as opposed to float.
def plotregression(t):
labfile = open('labdata.txt','r')
sumx = 0
sumy = 0
count = 0
sumprod = 0
sumsqrx =0
sumsqrnx = 0
for i in labfile:
points = i.split()
print (points)
t.up()
t.setpos(points[0],points[1])
t.stamp()
sumx = sumx + int(points[0])
sumy = sumy + int(points[1])
prod = points[0]*int(points[1])
sumprod = sumprod + prod
count += 1
sqrx = int(points[0])**2
sumsqrx = sumsqrx + sqrx
sqrnx = int(points[0])**(-2)
sumsqrnx = sumsqrnx + sqrnx
avgx = sumx/count
avgy = sumy/count
m = (sumprod - count(avgx*avgy))/sumsqrx- (count(avgx**2))
print(m)
for bestline in labfile:
line = bestline.split()
y= avgy + m(int(line[0])-avgx)
t.down()
t.setpos(0,0)
t.setpos(line[0],y)
plotregression(kj)
Appreciate your help.
Thnx
I actually worked out the problem myself and it finally seems to be doing what i'm telling it to. But i would love to know if i can cut out any unnecessary lines of code. I'm thinking its a bit too long and i'm missing out something which would make this more simpler to do.
import turtle
wn= turtle.Screen()
kj = turtle.Turtle()
kj.shape('circle')
kj.turtlesize(0.2)
kj.color('blue')
kj.speed(1)
def plotregression(t):
sumx = 0
sumy = 0
count = 0
sumprod = 0
sumsqrx =0
labfile = open('labdata.txt','r')
for i in labfile:
points = i.split()
print (points)
t.up()
t.setpos(int(points[0]),int(points[1]))
t.stamp()
sumx = sumx + int(points[0])
sumy = sumy + int(points[1])
prod = int(points[0])*int(points[1])
sumprod = sumprod + prod
count += 1
sqrx = int(points[0])**2
sumsqrx = sumsqrx + sqrx
avgx = sumx/count
avgy = sumy/count
m = (sumprod - count*(avgx*avgy))/(sumsqrx- (count*(avgx**2)))
print('M is: ',m )
labfile.close()
labfile = open('labdata.txt','r')
besttfit = open('bestfit.txt','w')
for bestline in labfile:
line = bestline.split()
y = avgy + m*(int(line[0])-avgx)
print('y is:' ,y)
besttfit.write((line[0])+'\t'+str(y)+'\n')
labfile.close()
besttfit.close()
bestfitline = open('bestfit.txt','r')
for regline in bestfitline:
reg = regline.split()
t.goto(float(reg[0]),float(reg[1]))
t.down()
t.write('Best fit line')
bestfitline.close()
wn.setworldcoordinates(-10,-10,120,120)
figure = plotregression(kj)
wn.exitonclick()
please let me know if i can cut down anywhere
I was solving the same problem form the interactive python. Here is how I did it.
import turtle
def plotRegression(data):
win = turtle.Screen()
win.bgcolor('pink')
t = turtle.Turtle()
t.shape('circle')
t.turtlesize(0.2)
x_list, y_list = [i[0] for i in plot_data], [i[1] for i in plot_data]
x_list, y_list = [float(i) for i in x_list], [float(i) for i in y_list]
x_sum, y_sum = sum(x_list), sum(y_list)
x_bar, y_bar = x_sum / len(x_list), y_sum / len(y_list)
x_list_square = [i ** 2 for i in x_list]
x_list_square_sum = sum(x_list_square)
xy_list = [x_list[i] * y_list[i] for i in range(len(x_list))]
xy_list_sum = sum(xy_list)
m = (xy_list_sum - len(x_list) * x_bar * y_bar) / (x_list_square_sum - len(x_list) * x_bar ** 2)
# best y
y_best = [ (y_bar + m * (x_list[i] - x_bar)) for i in range( len(x_list) ) ]
# plot points
max_x = max(x_list)
max_y = max(y_list)
win.setworldcoordinates(0, 0, max_x, max_y)
for i in range(len(x_list)):
t.penup()
t.setposition(x_list[i], y_list[i])
t.stamp()
#plot best y
t.penup()
t.setposition(0,0)
t.color('blue')
for i in range(len(x_list)):
t.setposition(x_list[i],y_best[i])
t.pendown()
win.exitonclick()
with open('files/labdata.txt', 'r') as f:
plot_data = [aline.split() for aline in f]
plotRegression(plot_data)
I am about 5 years too late but here is my two cents.
The problem might be in the line:
t.setpos(points[0],points[1])
This is telling the turtle to go to the string value of the points[0] and points[1].
For example, if points[0] stores the value of "50" and points[1] holds the value "60" then "50" + "60" would be return the string "5060"
This line might have problems as well:
prod = points[0]*int(points[1])
This is adding the string value in points[0] to the integer value in points[1]
In this case, using the previous values points[0] would be "50" and int(points[1]) would be 60. That is 60 and not "60". So you cant add the string "50" with the integer 60.
Here is how I worked out the problem:
import turtle
import math
import statistics as stats
def get_line(means, slope, xlist):
"""Return a list of best y values."""
line = [(means[1] + slope * (xlist[x] + means[0]))
for x in range(len(xlist))]
return line
def get_mtop(xlist, ylist, n, means):
"""Return top half of m expression."""
xbyy_list = [xlist[x] * ylist[x] for x in range(len(xlist))]
xbyy_sum = sum(xbyy_list)
nby_means = n * (means[0] * means[1])
top = xbyy_sum - nby_means
return top
def get_mbot(xlist, n, means):
"""Return bottom half of m expression."""
sqr_comprehension = [x**2 for x in xlist]
sqr_sum = sum(sqr_comprehension)
nbymean_sqr = n * means[0]**2
bot = sqr_sum - nbymean_sqr
return bot
def get_mean(xlist, ylist):
"""Return a tuple that contains the means of xlist and ylist
in form of (xmean,ymean)."""
xmean = stats.mean(xlist)
ymean = stats.mean(ylist)
return xmean, ymean
def plotRegression(input_file, input_turtle):
"""Draw the plot regression.""""
infile = open(input_file, 'r')
input_turtle.shape("circle")
input_turtle.penup()
# Get a list of xcoor and a list of ycoor
xcoor = []
ycoor = []
for line in infile:
coor = line.split()
xcoor.append(int(coor[0]))
ycoor.append(int(coor[1]))
# Plot and count the points
num_points = 0
for count in range(len(xcoor)):
input_turtle.goto(xcoor[count], ycoor[count])
input_turtle.stamp()
num_points += 1
# Get the mean values of the xcoor and ycoor lists
means_tup = get_mean(xcoor, ycoor)
print(means_tup)
# Get the value for M
mtop = get_mtop(xcoor, ycoor, num_points, means_tup)
mbot = get_mbot(xcoor, num_points, means_tup)
m = mtop / mbot
print(m)
# Draw the line
yline = get_line(means_tup, m, xcoor)
input_turtle.color("green")
input_turtle.goto(xcoor[0], yline[0])
input_turtle.pendown()
for x in range(len(xcoor)):
print(xcoor[x], yline[x])
input_turtle.goto(xcoor[x], yline[x])
input_turtle.hideturtle()
def main():
"""Create the canvas and the turtle. Call the function(s)"""
# Set up the screen
sc = turtle.Screen()
sc.setworldcoordinates(0, 0, 100, 100)
sc.bgcolor("black")
# Create the turtle
Donatello = turtle.Turtle()
Donatello.color("purple")
# Run plot Regression
labdata = """C:\\Users\\user\\pathtofile\\labdata.txt"""
plotRegression(labdata, Donatello)
sc.exitonclick()
if __name__ == "__main__":
main()
I don't know if this is the correct slope but it seems to be in the right direction. Hopefully this helps some one who has the same problem.

Categories

Resources