I am trying to run a small python code for data-mining and getting following error.
ERROR:
File "prediction.py", line 10
data=pd.read_csv(file_name)
^
IndentationError: expected an indented block
I am a beginner pls help.
My Code:
# Required Packages
import csv
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets, linear_model
# Function to get data
def get_data(file_name):
data = pd.read_csv(file_name)
flash_x_parameter = []
flash_y_parameter = []
arrow_x_parameter = []
arrow_y_parameter = []
for x1,y1,x2,y2 in zip(data[‘flash_episode_number’],data[‘flash_us_viewers’],data[‘arrow_episode_number’],data[‘arrow_us_viewers’]):
flash_x_parameter.append([float(x1)])
flash_y_parameter.append(float(y1))
arrow_x_parameter.append([float(x2)])
arrow_y_parameter.append(float(y2))
return flash_x_parameter,flash_y_parameter,arrow_x_parameter,arrow_y_parameter
# Function to know which Tv show will have more viewers
def more_viewers(x1,y1,x2,y2):
regr1 = linear_model.LinearRegression()
regr1.fit(x1, y1)
predicted_value1 = regr1.predict(9)
print predicted_value1
regr2 = linear_model.LinearRegression()
regr2.fit(x2, y2)
predicted_value2 = regr2.predict(9)
#print predicted_value1
#print predicted_value2
if predicted_value1 > predicted_value2:
print "The Flash Tv Show will have more viewers for next week"
else:
print "Arrow Tv Show will have more viewers for next week"
x1,y1,x2,y2 = get_data(‘input.csv’)
#print x1,y1,x2,y2
more_viewers(x1,y1,x2,y2)`
Indentation matters in python.
You're getting the error because you need to indent at the 10th line around your function definition:
# Function to get data
def get_data(file_name):
data = pd.read_csv(file_name)
flash_x_parameter = []
flash_y_parameter = []
...
In python you have to indent code for any nested block so python can recognize what belongs to global code, function code and any inner block code:
def get_data(file_name):
data = pd.read_csv(file_name)
....
global code again
This should take care of the indenting issue. Please read the docs for PEP8 [docs].
Use 4 spaces per indentation level.
Continuation lines should align wrapped elements either vertically
using Python's implicit line joining inside parentheses, brackets and
braces, or using a hanging indent [6] . When using a hanging indent
the following should be considered; there should be no arguments on
the first line and further indentation should be used to clearly
distinguish itself as a continuation line.
# Required Packages
import csv
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn
import datasets, linear_model
# Function to get data
def get_data(file_name):
data = pd.read_csv(file_name)
flash_x_parameter = []
flash_y_parameter = []
arrow_x_parameter = []
arrow_y_parameter = []
for x1, y1, x2, y2 in zip(data[‘flash_episode_number’], data[‘flash_us_viewers’], data[‘arrow_episode_number’], data[‘arrow_us_viewers’]):
flash_x_parameter.append([float(x1)])
flash_y_parameter.append(float(y1))
arrow_x_parameter.append([float(x2)])
arrow_y_parameter.append(float(y2))
return flash_x_parameter, flash_y_parameter, arrow_x_parameter, arrow_y_parameter
# Function to know which Tv show will have more viewers
def more_viewers(x1, y1, x2, y2):
regr1 = linear_model.LinearRegression()
regr1.fit(x1, y1)
predicted_value1 = regr1.predict(9)
print predicted_value1
regr2 = linear_model.LinearRegression()
regr2.fit(x2, y2)
predicted_value2 = regr2.predict(9)
# print predicted_value1# print predicted_value2
if predicted_value1 > predicted_value2:
print "The Flash Tv Show will have more viewers for next week"
else :
print "Arrow Tv Show will have more viewers for next week"
x1, y1, x2, y2 = get_data(‘input.csv’)# print x1, y1, x2, y2
more_viewers(x1, y1, x2, y2)
Related
I have a following function with takes 2 arguments psi,lam and returns 1 array y.
lam=np.arange(0,1,0.1)
psi=np.deg2rad(np.arange(0,361,1))
def test(psi,lam):
y=[]
for i in range(len(lam)):
sin_psi = np.sin(psi)
cos_psi = np.cos(psi)
sin_beta = lam*sin_psi
cos_beta = np.sqrt(1.0 - sin_beta**2)
ssin_pb = sin_psi*sin_beta
y.append((lam*(cos_psi/cos_beta)**2 - ssin_pb)/cos_beta + cos_psi)
plt.plot(psi,y[i])
return y
I would like the function to return range(len(lam))=10 plots of y on the vertical axis against psi on x axis.
However, it seems to be only plotting the same curve multiple times. Not sure what I am missing?
import matplotlib.pyplot as plt
import numpy as np
lam=np.arange(0,1,0.1)
psi=np.deg2rad(np.arange(0,361,1))
def test(angle,var):
sin_psi = np.sin(psi)
cos_psi = np.cos(psi)
sin_beta = var*sin_psi
cos_beta = np.sqrt(1.0 - sin_beta**2)
ssin_pb = sin_psi*sin_beta
return ((var*(cos_psi/cos_beta)**2 - ssin_pb)/cos_beta + cos_psi)
for i in lam:
plt.plot(psi,test(psi,i))
plt.show()
I moved the variable outside of the function, this way you may also use it for other cases. The only other thing is that you should call plt.show() after you're done drawing.
Your code has several problems the main being that the return function was inside the loop interrupting it after the first iteration. Imitating your code structure as closely as possible, we can rewrite the code as:
import numpy as np
import matplotlib.pyplot as plt
def test(psi,lam):
y=[]
for curr_lam in lam:
sin_psi = np.sin(psi)
cos_psi = np.cos(psi)
sin_beta = curr_lam*sin_psi
cos_beta = np.sqrt(1.0 - sin_beta**2)
ssin_pb = sin_psi*sin_beta
val = (curr_lam * (cos_psi/cos_beta)**2 - ssin_pb)/cos_beta + cos_psi
y.append(val)
plt.plot(psi, val)
plt.show()
return y
lam=np.arange(0, 1, 0.1)
psi=np.deg2rad(np.arange(0,361,1))
y = test(psi, lam)
print(y)
Sample output:
As Johan mentioned in the comments, you should also directly iterate over list/arrays. If you need to combine arrays, use
for x1, x2 in zip(arr1, arr2):
If you absolutely need the index value, use
for i, x in enumerate(arr):
I have these randomly generated data:
import numpy as np
import pandas as pd
np.random.seed(42)
y1 = np.random.randint(5,10,5)
y2 = np.random.randint(15,20,5)
y3 = np.random.randint(5,10,5)
y = np.append(y1 , y2)
y = np.append(y , y3)
y_cum = y.cumsum()
df_test = pd.DataFrame({'x': range(len(y)), 'y_cum': y_cum, 'y': y})
if we plot these data we can see an "elbow" (at 4 in the x-axis) and a "knee" (at 9 in the x-axis)
import matplotlib.pyplot as plt
plt.plot(df_test['x'], df_test['y_cum'])
I am using from kneed import KneeLocator to detect them.
I use the following code to detect the elbow:
kneedle = KneeLocator(df_test['x'], df_test['y'], curve='convex', direction='increasing', online=False, S=1)
elbow_point = kneedle.elbow
elbow_point
and the following code to detect the knee:
kneedle = KneeLocator(df_test['x'], df_test['y'], curve='concave', direction='increasing', online=False, S=1)
elbow_point = kneedle.elbow
elbow_point
The first one gives as output 13 and the second gives as output 1, which are not the correct values
I am a bit lost why these values pop up. I have looked into the source code but I couldnt figure it out.
Any ideas ?
I get the following error (last line is important) for the code below:
Warning (from warnings module):
File "C:/[file_location]/itteration 4.py", line 12
avgNug = reduce(lambda x, y: x + y, eachPix[:3])/len(eachPix[:3])
RuntimeWarning: overflow encountered in ubyte_scalars
Traceback (most recent call last):
File "C:/[file_location]/itteration 4.py", line 45, in
threshold(iar4)
File "C:/[file_location]/itteration 4.py", line 13, in threshold
balanceAr.append(avgNum)
NameError: global name 'balanceAr' is not defined
I've tried writing "global" before it, defining it outside the definition is in, with multiple syntaxes for the "global" definition.
The code is taken from the sentdex video https://www.youtube.com/watch?v=nych18rsXKU where this code works.
I'm using the same Python version as him, and I'm assuming the same libraries, since this is the fourth program from the playlist, and the previous 3 worked fine.
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import time
def threshold(imageArray):
balaceAr = []
newAr = imageArray
for eachRow in imageArray:
for eachPix in eachRow:
avgNug = reduce(lambda x, y: x + y, eachPix[:3])/len(eachPix[:3])
balanceAr.append(avgNum)
balance = reduce(lambda x, y: x + y, balanceAr)/len(balanceAr)
for eachRow in newAr:
for eachPix in eachRow:
if reduce(lambda x, y: x + y, eachPix[:3])/len(eachPix[:3]) > balance:
#eachPix 0,1,2,3 = 255
else:
#eachPix 0,1,2 = 0
eachPix[3] = 255
return newAr
'''in the original code this part is not commented, and there's also a i, i2 and i3
i4 = Image.open('images/sentdex.png')
iar4 = np.array(i4)'''
threshold(iar4)
'''same explanation as previous comment, only coordinates in 2nd () are 0,0;4,0;0,3
fig = plt.figure()
ax4 = plt.subplot2grid((8,6), (4,3), rowspan=4, colspan=3)
ax4.imshow(iar4)
'''
plt.show()
#P.S. I had to write " " on all lines that didn't have it for stackoverflow
# to interpret it as code, even if it was in the "code" section
you have a syntax error on decalration balaceAr = []
you may need to change it to balanceAr= []
Below your function definition:
balaceAr = [] # <===== Typo
Check for typos before posting next time.
I am trying to plot a .dat file from an stellar catalog using this code
try:
import pyfits
noPyfits=False
except:
noPyfits=True
import matplotlib.pyplot as plt
import numpy as np
f2 = open('/home/mcditoos/Desktop/Astrophysics_programs/Data_LAFT/ESPECTROS/165401.dat', 'r')
lines = f2.readlines()
f2.close()
x1 = []
y1 = []
for line in lines:
p = line.split()
x1.append(float(p[0]))
y1.append(float(p[1]))
xv = np.array(x1)
yv = np.array(y1)
plt.plot(xv, yv)
plt.show()
however i get the following error:
x1.append(float(p[0]))
IndexError: list index out of range
also i wanted to know if there is anyway of making it a program capable of opening the next .dat file given an input
I may not understand fully your question but why don't you use
X, Y = numpy.genfromtxt('yourfile', dtype='str')
X = X.astype('float')
Y = Y.astype('float')
If in your file you have 2 columns you can transpose your table with
X, Y = numpy.genfromtxt('yourfile', dtype='str').T
I am new to matplotlib, and I have a very simple (I'm guessing) question.
I have some data that need to be represented in a rectangle of 50x70 "units" (they're feet, actually representing a room) but I don't seem to be able to get matplotlib drawing a rectangle with the same scale on both axis and keeping the 50x70 "dimensions" at the same time.
I've tried the following:
import json
import matplotlib
import os
import sys
import traceback
import matplotlib.pyplot as plt
DATA_FILE = os.path.join(os.path.expanduser("~"), "results.json")
FLOOR_DIMENSIONS = (50, 70)
if __name__ == "__main__":
if len(sys.argv) > 1:
DATA_FILE = os.path.abspath(sys.argv[0])
print "Gonna see what happens with file %s" % DATA_FILE
try:
with open(DATA_FILE, 'r') as f:
result_dict = json.load(f)
except (IOError, OSError, ValueError), e:
print "Received %s %s when trying to parse json from %s\n"\
"Showing traceback: %s" % (type(e), e, DATA_FILE, traceback.format_exc())
result_dict = {}
for d_mac in result_dict:
data = result_dict[d_mac]
if len(data) < 3:
continue
x_s = list(d['x'] for d in data)
y_s = list(d['y'] for d in data)
plt.scatter(x_s, y_s, marker='o', c=numpy.random.rand(5,1), s=15)
plt.xlim([0, FLOOR_DIMENSIONS[0]])
plt.ylim([0, FLOOR_DIMENSIONS[1]])
#plt.axis('equal')
plt.show()
sys.exit(0)
Doing that, I get:
Which draws my data inside an square, changing the X-Y scale (X is 50 points, and Y is 70, therefor Y shows "shrunk")
Another option I tried was uncommenting the line saying plt.axis('equal'), but that "cuts" the Y axis (doesn't start in 0 and finishes in 70, but starts in 15 and ends in 55, probably because there's no data with y < 15 and y > 55)
But I don't want that either, I want the "canvas" starting in Y=0 and ending in Y=70, and if there's no data just show an empty space.
What I need is to draw something like this:
which I got by manually re-sizing the window where the plot was rendered :-D
Thank you in advance!
Add plt.axis('scaled').
edit: axis('image') may be better for your needs.
More axis settings can be found in the documentation.
import matplotlib.pyplot as plt
import numpy as np
xs = np.arange(50)
ys = (np.random.random(50)*70) + 15
plt.scatter(xs,ys)
plt.axis('image')
plt.axis([0, 50, 0, 70])
plt.show()
gives:
In the updated example I know the ys actually has a maximum of ~85, the offset was just to demonstrate proper axis enforcement.