How to add two lists of dictionaries to jsonify? - python

I have 2 lists full of dictionaries in a FLASK app code. I'd like to return a response of type JSON with the contents of the two lists.
I'm trying to calculate distances between two locations by their LONGITUDE/LATITUDE properties from an input CSV file, and to respond with a JSON type file filled with the results.
Below is the processing and POST part of the application.
When I run it - I receive a TypeError : unhashable type: 'list' on the last line - return jsonify({points : points, links : links}).
##app.route("/get_address", methods = ['POST'])
def process_file(points):
def upload_file():
if request.method == 'POST':
points = request.files['file']
points.save(secure_filename(points.filename))
return points
def calculate_distance(lat1, lon1, lat2, lon2):
# approximate radius of earth in mm
radius = 6371.0 * 1000
dlat = radians(lat2-lat1)
dlon = radians(lon2-lon1)
a = sin(dlat/2) * sin(dlat/2) + cos(radians(lat1)) \
* cos(radians(lat2)) * sin(dlon/2) * sin(dlon/2)
c = 2 * atan2(sqrt(a), sqrt(1-a))
d = radius * c
return d
points = upload_file()
with open(points, newline='') as csvfile:
reader = csv.DictReader(csvfile)
points = []
links = []
for row in reader:
p = {"name": list(row.items())[0][1], "address (La/Lo)": (list(row.items())[1][1], list(row.items())[2][1])}
points.append(p)
l = []
for point in points:
for v in point.values():
l.append(v)
links = []
for i in range(0, len(l) -1 , 2):
if(i>0):
for j in range(i-2, 0, -2):
links.append({'name': l[i] + l[j], 'distance': calculate_distance(float(l[i+1][0]), float(l[i+1][1]), float(l[j+1][0]), float(l[j+1][1]))})
else:
for j in range(i+2, len(l), 2):
links.append({'name': l[i] + l[j], 'distance': calculate_distance(float(l[i+1][0]), float(l[i+1][1]), float(l[j+1][0]), float(l[j+1][1]))})
return jsonify({points : points, links : links})

The keys should be strings:
return jsonify({"points": points, "links": links})
{points : points, links : links} would be the right thing in Javascript.
Alternatively you can make a dict with string keys like so:
dict(points=points, links=links)
According to the jsonify docs, you can do this directly:
return jsonify(points=points, links=links)

Related

How to read data in a class to solve a tabu search problem

I'm trying to learn search algorithms in order to prepare my master thesis, so I have a TSP problem in which I want to find the best and minimal route to visite all the states , I'm using a .txt file named cities__coordinates.txt that contains the coordinates for every state, so to read the data I've found this source code that has a class to read the data you'll find it here:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import copy
import re
import math
class Data():
'''
the format of solomon dataset
'''
def __init__(self):
self.customerNum = 0 # the number of customers
self.nodeNum = 0 # the sum of customers and depots
self.vehicleNum = 0
self.capacity = 0
self.cor_X = []
self.cor_Y = []
self.demand = []
self.readyTime = []
self.dueTime = []
self.serviceTime = []
self.disMatrix = {}
def read_data(self, path, customerNum, depotNum):
'''
function to read solomom data from .txt files, notice that it must be solomon dataset
INPUT
# data : class Data
# path : Data path
# customerNum : the number of customer
OutPut : none
'''
self.customerNum = customerNum
self.nodeNum = customerNum + depotNum
f = open('cities__coordinates.txt', 'r')
lines = f.readlines()
count = 0
for line in lines:
count = count + 1
if(count == 5):
line = line[:-1].strip()
str = re.split(r" +", line)
self.vehicleNum = float(str[0])
self.capacity = float(str[1])
elif(count >= 10 and count <= 10 + customerNum):
line = line[:-1]
str = re.split(r" +", line)
self.cor_X.append(float(str[2]))
self.cor_Y.append(float(str[3]))
self.demand.append(float(str[4]))
self.readyTime.append(float(str[5]))
self.dueTime.append(float(str[6]))
self.serviceTime.append(float(str[7]))
# compute the distance matrix
self.disMatrix = {}
for i in range(0, self.nodeNum):
dis_temp={}
for j in range(0, self.nodeNum):
dis_temp[j] = int(math.hypot(self.cor_X[i] - self.cor_X[j],self.cor_Y[i] - self.cor_Y[j]))
self.disMatrix[i] = dis_temp
def plot_nodes(self):
'''
Description: function to plot
'''
Graph = nx.DiGraph()
nodes_name = [str(x) for x in list(range(self.nodeNum))]
Graph.add_nodes_from(nodes_name)
cor_xy = np.array([self.cor_X,self.cor_Y]).T.astype(int)
pos_location = {nodes_name[i]:x for i,x in enumerate(cor_xy)}
nodes_color_dict = ['r'] + ['gray'] * (self.nodeNum-1)
nx.draw_networkx(Graph,pos_location,node_size=200,node_color=nodes_color_dict,labels=None)
plt.show(Graph)
def plot_route(self,route,color='k'):
Graph = nx.DiGraph()
nodes_name = [0]
cor_xy=[[self.cor_X[0] , self.cor_Y[0]]]
edge = []
edges = [[0,route[0]]]
for i in route :
nodes_name.append(i)
cor_xy.append([self.cor_X[i] , self.cor_Y[i]])
edge.append(i)
if len(edge) == 2 :
edges.append(copy.deepcopy(edge))
edge.pop(0)
edges.append([route[-1],0])
Graph.add_nodes_from(nodes_name)
Graph.add_edges_from(edges)
pos_location = {nodes_name[i]:x for i,x in enumerate(cor_xy)}
nodes_color_dict = ['r'] + ['gray'] * (len(route))
nx.draw_networkx(Graph,pos_location,node_size=200,node_color=nodes_color_dict,edge_color=color, labels=None)
plt.show(Graph)
so in read_data function I've changed the path to my .txt file , and for the code which will calculate all the distance and took the tabu search and all the staffs, here it is the code:
from itertools import combinations
import os,sys,copy
import numpy as np
import time
from Datareader import Data
import matplotlib.pyplot as plt
class Tabu():
def __init__(self,disMatrix,max_iters=200,maxTabuSize=20):
"""parameters definition"""
self.disMatrix = disMatrix
self.maxTabuSize = maxTabuSize
self.max_iters = max_iters
self.tabu_list=[]
def get_route_distance(self,route):
'''
Description: function to calculate total distance of a route. evaluate function.
parameters: route : list
return : total distance : folat
'''
routes = [0] + route + [0] # add the start and end point
total_distance = 0
for i,n in enumerate(routes):
if i != 0 :
total_distance = total_distance + self.disMatrix[last_pos][n]
last_pos = n
return total_distance
def exchange(self,s1,s2,arr):
"""
function to Swap positions of two elements in an arr
Args: int,int,list
s1 : target 1
s2 : target 2
arr : target array
Ouput: list
current_list : target array
"""
current_list = copy.deepcopy(arr)
index1 , index2 = current_list.index(s1) , current_list.index(s2) # get index
current_list[index1], current_list[index2]= arr[index2] , arr[index1]
return current_list
def generate_initial_solution(self,num=10,mode='greedy'):
"""
function to get the initial solution,there two different way to generate route_init.
Args:
num : int
the number of points
mode : string
"greedy" : advance step by choosing optimal one
"random" : randomly generate a series number
Ouput: list
s_init : initial solution route_init
"""
if mode == 'greedy':
route_init=[0]
for i in range(num):
best_distance = 10000000
for j in range(num+1):
if self.disMatrix[i][j] < best_distance and j not in route_init:
best_distance = self.disMatrix[i][j]
best_candidate = j
route_init.append(best_candidate)
route_init.remove(0)
if mode == 'random':
route_init = np.arange(1,num+1) #init solution from 1 to num
np.random.shuffle(route_init) #shuffle the list randomly
return list(route_init)
def tabu_search(self,s_init):
"""tabu search"""
s_best = s_init
bestCandidate = copy.deepcopy(s_best)
routes , temp_tabu = [] , [] # init
routes.append(s_best)
while(self.max_iters):
self.max_iters -= 1 # Number of iterations
neighbors = copy.deepcopy(s_best)
for s in combinations(neighbors, 2):
sCandidate = self.exchange(s[0],s[1],neighbors) # exchange number to generate candidates
if s not in self.tabu_list and self.get_route_distance(sCandidate) < self.get_route_distance(bestCandidate):
bestCandidate = sCandidate
temp_tabu = s
if self.get_route_distance(bestCandidate) < self.get_route_distance(s_best): # record the best solution
s_best = bestCandidate
if temp_tabu not in self.tabu_list:
self.tabu_list.append(temp_tabu)
if len(self.tabu_list) > self.maxTabuSize :
self.tabu_list.pop(0)
routes.append(bestCandidate)
return s_best, routes
if __name__ == "__main__":
data = Data()
data.read_data(path='cities__coordinates.txt',customerNum=100,depotNum=1) # change the path
""" Tabu :
disMatrix : the distance matrix from 0 to X , 0 represernt starting and stopping point。
for example: disMatrix = [[0,3,4,...
1,0,5,...
3,5,0,...]]
that means the distance from 0 to 0 is 0, from 0 to 1 is 3,... from 1 to 3 is 5....
max_iters : maximum iterations
maxTabuSize : maximum iterations
"""
tsp = Tabu(disMatrix=data.disMatrix,max_iters=10,maxTabuSize=10)
# two different way to generate initial solution
# num : the number of points
s_init = tsp.generate_initial_solution(num=10,mode='greedy') # mode = "greedy" or "random"
print('init route : ' , s_init)
print('init distance : ' , tsp.get_route_distance(s_init))
start = time.time()
best_route , routes = tsp.tabu_search(s_init) # tabu search
end = time.time()
print('best route : ' , best_route)
print('best best_distance : ' , tsp.get_route_distance(best_route))
print('the time cost : ',end - start )
# plot the result changes with iterations
results=[]
for i in routes:
results.append(tsp.get_route_distance(i))
plt.plot(np.arange(len(results)) , results)
plt.show()
# plot the route
data.plot_route(best_route)
when I execute it, it takes a little time and then it shows me this error :
Traceback (most recent call last):
File "C:/Users/malle/OneDrive/Desktop/TS.py", line 100, in <module>
data.read_data(path='cities__coordinates.txt',customerNum=100,depotNum=1) # change the path
File "C:/Users/malle/OneDrive/Desktop\Datareader.py", line 49, in read_data
self.cor_X.append(float(str[2]))
IndexError: list index out of range
anyone can help to resolve this problem please ?

How to avoid out of memory python?

I'm new to python and ubuntu. i got killed after running python code. The file I'm using for the code is around 2.7 GB and I have 16 GB RAM with one tera hard ... what should I do to avoid this problem because I'm searching and found it seems to be out of memory problem
I used this command
free -mh
I got
total used free shared buff/cache available
Mem: 15G 2.5G 9.7G 148M 3.3G 12G
Swap: 4.0G 2.0G 2.0G
the code link I tried Link
import numpy as np
import matplotlib.pyplot as plt
class ProcessData(object):
def data_process(self, folder):
'''
:folder: data file path
:rtype: dict pair distance
MAX id number
'''
distance = dict()
max_pt = 0
with open(folder, 'r') as data:
for line in data:
i, j, dis = line.strip().split()
i, j, dis = int(i), int(j), float(dis)
distance[(i, j)] = dis
distance[(j, i)] = dis
max_pt = max(i, j, max_pt)
for num in range(1, max_pt + 1):
distance[(num, num)] = 0
return distance, max_pt
def entropy(self, distance, maxid, factor):
'''
:distance: dict with pair: dist
:factor: impact factor
:maxid: max elem number
:rtype: entropy H in data field
'''
potential = dict()
for i in range(1, maxid + 1):
tmp = 0
for j in range(1, maxid + 1):
tmp += np.exp(-pow(distance[(i, j)] / factor, 2))
potential[i] = tmp
z = sum(potential.values())
H = 0
for i in range(1, maxid + 1):
x = potential[i] / z
H += x * np.log(x)
return -H
def threshold(self, dist, max_id):
'''
:rtype: factor value makes H smallest
'''
entro = 10.0
# given data:
# 0.02139999999999999 7.203581306901208
# 0.02149999999999999 7.203577254067677
# 0.02159999999999999 7.203577734107922
# generate data:
# 0.367020, 6.943842
# 0.368959, 6.943840
# 0.370898, 6.943841
scape = np.linspace(0.330, 0.430, 50)
# 通用数据使用以下一行
# scape = np.linspace(0.001, 1.001, 100)
for factor in scape:
value = self.entropy(dist, max_id, factor)
print('factor: {0:.6f}, entropy: {1:.8f}'.format(factor, value))
# plt.scatter(factor, value, c='r', s=1)
if value and value < entro:
entro, thresh = value, factor
thresh = 3 * thresh / pow(2, 0.5)
"""
plt.xlabel(r'$\sigma$')
plt.ylabel(r'H')
plt.savefig('./images/Entropy test.png')
plt.close()
"""
print('current: ', entro, thresh)
# given data: 7.203577254067677 0.04560838738653229
# generate data: 6.943840312796875 0.7828967189629044
return thresh
def CutOff(self, distance, max_id, threshold):
'''
:rtype: list with Cut-off kernel values by desc
'''
cut_off = dict()
for i in range(1, max_id + 1):
tmp = 0
for j in range(1, max_id + 1):
gap = distance[(i, j)] - threshold
tmp += 0 if gap >= 0 else 1
cut_off[i] = tmp
sorted_cutoff = sorted(cut_off.items(), key=lambda k:k[1], reverse=True)
return sorted_cutoff
def Guasse(self, distance, max_id, threshold):
'''
:rtype: list with Gaussian kernel values by desc
'''
guasse = dict()
for i in range(1, max_id + 1):
tmp = 0
for j in range(1, max_id + 1):
tmp += np.exp(-pow((distance[(i, j)] / threshold), 2))
guasse[i] = tmp
sorted_guasse = sorted(guasse.items(), key=lambda k:k[1], reverse=True)
return sorted_guasse
def min_distance(self, distance, srt_dens, maxid):
'''
:srt_dens: desc sorted list with density values (point, density)
:rtype: min distance dict
min number dict
'''
min_distance = dict()
min_number = dict()
h_dens = srt_dens[0][0]
min_number[h_dens] = 0
max_dist = -1
for i in range(1, maxid + 1):
max_dist = max(distance[(h_dens, i)], max_dist)
min_distance[h_dens] = max_dist
for j in range(1, len(srt_dens)):
min_dist, min_num = 1, 0
current_num = srt_dens[j][0]
for k in srt_dens[0:j]:
current_dist = distance[(current_num, k[0])]
if current_dist < min_dist:
min_dist, min_num = current_dist, k[0]
min_distance[srt_dens[j][0]] = min_dist
min_number[current_num] = min_num
return min_distance, min_number
def make_pair(self, srt_dens, min_dist, maxid):
'''
:rtype: pair dict with {point: [density, min dist]}
refer factor dict with {point: density * dist}
'''
pair_dict = dict()
dens_dict = dict()
refer_dict = dict()
# convert list to dict
for elem in srt_dens:
dens_dict[elem[0]] = elem[1]
if len(dens_dict) == maxid:
for key in dens_dict.keys():
pair_dict[key] = [dens_dict[key], min_dist[key]]
refer_dict[key] = dens_dict[key] * min_dist[key]
else:
return print('missing %d value', maxid - dens_dict)
return pair_dict, refer_dict
def show_pair_info(self, pair, threshold):
show_dict = dict()
for p in pair.values():
show_dict[p[0]] = p[1]
tmp = sorted(show_dict.items())
dens, mdis = zip(*tmp)
plt.scatter(dens, mdis)
plt.xlabel(r'$\rho$')
plt.ylabel(r'$\delta$')
plt.title(r'$d_c=$' + str(threshold))
plt.savefig('./images/Decision Graph Cutoff test.png')
plt.close()
I tried to figure by using fil-profile and got a problem with line 11 which indicate this data_process
An issue could be f.readlines() as it creates a complete list.
So if COOR_DATA is very large then you should only create memory for one line at a time, so try changing:
with open(COOR_DATA, 'r', encoding='utf-8') as f:
lines = f.readlines()
coords = dict()
for line in lines:
To:
with open(COOR_DATA, 'r', encoding='utf-8') as f:
coords = dict()
for line in f:
See https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects

Sending a Polynomial to PARI/GP from Python (ctypes)

I would like to call nfroots({nf}; x) function of PARI/GP from Python. (see function no 3.13.135.on page 371 in this link:), but the probllem is, I couldn't send the algebraic expression or the polynomial, that need to be send, for example, x^2-7x+12, here is a very simple example of what gp can do with a quartic polynomial:
> V = readvec("coeff.txt");
> print(V)
[1,-7,12]
> P = Pol(V); # I get following error when I use Pol in my code: func=self._FuncPtr((name_or_ordinal, self)) AttributeError: function 'pol' not found
> print(P)
x^2 -7*x +12
> print(nfroots(,P))
>4, 3
From the answer of Stephan Schlecht (click here), I manage to write -
from ctypes import *
pari = cdll.LoadLibrary("C:\\Program Files\\Pari64-2-11-3\\libpari.dll")
pari.stoi.restype = POINTER(c_long)
pari.cgetg.restype = POINTER(POINTER(c_long))
pari.nfroots.restype = POINTER(POINTER(c_long))
pari.pari_init(2 ** 19, 0)
def t_vec(numbers):
l = len(numbers) + 1
p1 = pari.cgetg(c_long(l), c_long(10)) #t_POL = 10,
for i in range(1, l):
p1[i] = pari.stoi(c_long(numbers[i - 1]))
return p1
def main():
h = "x^2-7x+12"
res = pari.nfroots(t_vec(h))
for i in range(1, len(res)):
print(pari.itos(res[i]))
if __name__ == '__main__':
main()
Note that there is specific process to create of PARI objects (see the answer of Stephan Schlecht), I changed the value for t_POL = 10 , but the code didn't work, How can I execute the above PARI/GP code from python?
One solution could be:
use gtopoly, return type is POINTER(c_long)
return type of nfroots is POINTER(POINTER(c_long))
output of result with .pari_printf
Code
from ctypes import *
pari = cdll.LoadLibrary("libpari.so")
pari.stoi.restype = POINTER(c_long)
pari.cgetg.restype = POINTER(POINTER(c_long))
pari.gtopoly.restype = POINTER(c_long)
pari.nfroots.restype = POINTER(POINTER(c_long))
(t_VEC, t_COL, t_MAT) = (17, 18, 19) # incomplete
precision = c_long(38)
pari.pari_init(2 ** 19, 0)
def t_vec(numbers):
l = len(numbers) + 1
p1 = pari.cgetg(c_long(l), c_long(t_VEC))
for i in range(1, l):
p1[i] = pari.stoi(c_long(numbers[i - 1]))
return p1
def main():
V = (1, -7, 12)
P = pari.gtopoly(t_vec(V), c_long(-1))
res = pari.nfroots(None, P)
pari.pari_printf(bytes("%Ps\n", "utf8"), res)
if __name__ == '__main__':
main()
Test
If you run the program you get the desired output in the debug console:
[3, 4]
Conversions
With glength one can determine the length of a vector, see
https://pari.math.u-bordeaux.fr/dochtml/html/Conversions_and_similar_elementary_functions_or_commands.html#length
With itos a long can be returned if the parameter is of type t_INT, see section 4.4.6 of https://pari.math.u-bordeaux.fr/pub/pari/manuals/2.7.6/libpari.pdf.
In code it would look like this:
pari.glength.restype = c_long
pari.itos.restype = c_long
...
print("elements as long (only if of type t_INT): ")
for i in range(1, pari.glength(res) + 1):
print(pari.itos(res[i]))
To GENtostr gives a string representation of the argument. It could be used like so:
pari.GENtostr.restype = c_char_p
...
print("elements as generic strings: ")
for i in range(1, pari.glength(res) + 1):
print(pari.GENtostr(res[i]).decode("utf-8"))
There are many more conversion options, see the two links above.

Parsing data from a file

I have been provided with a file containing data on recorded sightings of species, which is laid out in the format;
"Species", "\t", "Latitude", "\t", "Longitude"
I need to define a function that will load the data from the file into a list, whilst for every line in the list spiting it into three components, species name, latitude and longitude.
This is what i have but it is not working:
def LineToList(FileName):
FileIn = open(FileName, "r")
DataList = []
for Line in FileIn:
Line = Line.rstrip()
DataList.append(Line)
EntryList = []
for Entry in Line:
Entry = Line.split("\t")
EntryList.append(Entry)
FileIn.close()
return DataList
LineToList("Mammal.txt")
print(DataList[1])
I need the data on each line to be separated so that i can use it later to calculate where the species was located within a certain distance of a given location.
Sample Data:
Myotis nattereri 54.07663633 -1.006446707
Myotis nattereri 54.25637837 -1.002130504
Myotis nattereri 54.25637837 -1.002130504
I am Trying to print one line of the data set to test if it is splittiing correctly but nothing is showing in the shell
Update:
This is the code i am working with now;
def LineToList(FileName):
FileIn = open(FileName, "r")
DataList = []
for Line in FileIn:
Line = Line.rstrip()
DataList.append(Line)
EntryList = []
for Entry in Line:
Entry = Line.split("\t")
EntryList.append(Entry)
return EntryList
FileIn.close()
return DataList
def CalculateDistance(Lat1, Lon1, Lat2, Lon2):
Lat1 = float(Lat1)
Lon1 = float(Lon1)
Lat2 = float(Lat2)
Lon2 = float(Lon2)
nDLat = (Lat1 - Lat2) * 0.017453293
nDLon = (Lon1 - Lon2) * 0.017453293
Lat1 = Lat1 * 0.017453293
Lat2 = Lat2 * 0.017453293
nA = (math.sin(nDLat/2) ** 2) + math.cos(Lat1) * math.cos(Lat2) * (math.sin(nDLon/2) ** 2 )
nC = 2 * math.atan2(math.sqrt(nA),math.sqrt( 1 - nA ))
nD = 6372.797 * nC
return nD
DataList = LineToList("Mammal.txt")
for Line in DataList:
LocationCount = 0
CalculateDistance(Entry[1], Entry[2], 54.988056, -1.619444)
if CalculateDistance <= 10:
LocationCount += 1
print("Number Recordings within Location Range:", LocationCount)
When running the programme come up with an error:
CalculateDistance(Entry[1], Entry[2], 54.988056, -1.619444) NameError: name 'Entry' is not defined
I saw "Biological Sciences" in your profile and just because of that i would recommend you to take a closer look at Pandas module.
It can be very easy:
import pandas as pd
df = pd.read_csv('mammal.txt', sep='\t',
names=['species','lattitude','longitude'],
header=None)
print(df)
Output:
species lattitude longitude
0 Myotis nattereri 54.076636 -1.006447
1 Myotis nattereri 54.256378 -1.002131
2 Myotis nattereri 54.256378 -1.002131
Your DataList variable is local to the LineToList function; you have to assign to another variable at file scope:
DataList = LineToList("Mammal.txt")
print(DataList[1])
I think you have a regular tab-delimited CSV that csv.reader can easily parse for you.
import csv
DataList = [row for row in csv.reader(open('Mammal.txt'), dialect='excel-tab')]
for data in DataList:
print(data)
This results in
['Myotis nattereri', '54.07663633', '-1.006446707']
['Myotis nattereri', '54.25637837', '-1.002130504']
['Myotis nattereri', '54.25637837', '-1.002130504']

Python- name not defined [duplicate]

This question already has answers here:
Why doesn't calling a string method (such as .replace or .strip) modify (mutate) the string?
(3 answers)
Closed 7 years ago.
I am having trouble getting my code to run. I keep getting the error that my x variable such as 'hsGPA' is not defined. Below is my code. Ive tried the solutions posted on the pother thread and none have helped so please don't mark this as a duplicate. THANKS!
def readData(fileName):
hsGPA = [] #High School GPA
mathSAT = [] #Math SAT scores
crSAT = [] #Verbal SAT scores
collegeGPA = [] #College GPA
FullList=[]
inputFile = open(fileName, 'r', encoding = 'utf-8')
for line in inputFile:
FullList=line.split(',')
hsGPA.append(float(FullList[0]))
mathSAT.append(int(FullList[1]))
crSAT.append(int(FullList[2]))
collegeGPA.append(float(FullList[3]))
return hsGPA, mathSAT, crSAT, collegeGPA
def plotData(hsGPA, mathSAT, crSAT, collegeGPA):
GPA1 = [] #High School GPA
Score1 = [] #Math SAT scores
Score2= [] #Verbal SAT scores
GPA2 = [] #College GPA
hsGPA, mathGPA, crSAT, collegeGPA = readData('SAT.txt')
pyplot.figure(1)
pyplot.subplot(4,1,1)
for line in range(len(hsGPA)):
GPA1.append(line)
pyplot.plot(GPA1,hsGPA)
pyplot.subplot(4,1,2)
for line in range(len(mathSAT)):
Score1.append(line)
pyplot.plot(Score1,mathSAT)
pyplot.subplot(4,1,3)
for line in range(len(crSAT)):
Score2.append(line)
pyplot.plot(Score2,crSAT)
pyplot.subplot(4,1,4)
for line in range(len(collegeGPA)):
GPA2.append(line)
pyplot.plot(GPA2,collegeGPA)
pyplot.show()
def LinearRegression(xList, yList):
'''
This function finds the constants in the y = mx+b, or linear regression
forumula
xList - a list of the x values
yList - a list of the y values
m - the slope f the line
b - where the line intercepts the y axis
'''
n = len(xList)
sumX = 0
sumXX = 0
sumXY = 0
sumY = 0
for index in range(n):
sumX += xList[index]
sumXY += xList[index] * yList[index]
sumXX += xList[index]**2
sumY += yList[index]
#the components needed to find m and b
m = (n*(sumXY - (sumX*sumY)))/(n*(sumXX - (sumX**2)))
b = (sumY - (m*sumX))/n
#actually implements formula
return m, b
def plotRegression(x,y, xLabel, yLabel):
ScoreT = []
pyplot.scatter(x,y)
m,b = linearRegression(xList,yList)
minX = min(x)
maxX = max(x)
pyplot.plot([minX, maxX], [m * minX + b, m * maxX + b], color ='red')
pyplot.xlabel(xLabel)
pyplot.ylabel(yLabel)
pyplot.show()
for index in range(len(mathSAT)):
sumscore = mathSAT[index] + crSAT[index]
ScoreT.append(sumscore)
return ScoreT
def rSquared(x,y,m,b):
n = len(x)
R=0
sumS=0
sumT=0
sumY=0
for index in range(n):
a=(y[index]-((m*x[index])+b))**2
sumS = sumS+a
for index in range(len(y)):
sumY = sumY= y[index]
MeanY= sumY/(len(y))
e=(y[index]-MeanY)**2
sumT = sumT+e
m,b= LinearRegression(xList, yList)
RG=1-(sumS/sumT)
def main():
print(readData('SAT.txt'))
plotData(*readData('SAT.txt'))
plotRegression(hsGPA,collegeGPA, 'highGPA', 'collegeGPA')
plotRegression(mathSAT,collegeGPA, 'highGPA' , 'collegeGPA')
plotRegression(crSAT,collegeGPA, 'highGPA' , 'collegeGPA')
plotRegression(ScoreT,collegeGPA, 'highGPA' , 'collegeGPA')
main()
It's giving the error in main, after plotRegression for each of the x variables. Please Help! Thanks!
Try this:
def plotRegression(x,y, xLabel, yLabel):
# I deleted ScoreT = [] here
pyplot.scatter(x,y)
m,b = linearRegression(x,y)
minX = min(x)
maxX = max(x)
pyplot.plot([minX, maxX], [m * minX + b, m * maxX + b], color ='red')
pyplot.xlabel(xLabel)
pyplot.ylabel(yLabel)
pyplot.show()
# I deleted the loop and return statement here
# ....
def main():
data = readData('SAT.txt')
print(data)
plotData(*data)
hsGPA, mathSAT, crSAT, collegeGPA = data
# added ScoreT calculation here
ScoreT = [sum(x) for x in zip(mathSAT, crSAT)]
plotRegression(hsGPA,collegeGPA, 'highGPA', 'collegeGPA')
plotRegression(mathSAT,collegeGPA, 'highGPA' , 'collegeGPA')
plotRegression(crSAT,collegeGPA, 'highGPA' , 'collegeGPA')
plotRegression(ScoreT,collegeGPA, 'highGPA' , 'collegeGPA')
In your main(), hsGPA is never defined. It's defined inside other function and is not shared in the global context. So main cannot access it.
You need to it from readData()'s return

Categories

Resources