MatPlot lib keeps giving "out of range error"

MatPlot lib keeps giving "out of range error" - python

Im trying to make a pokedex where it stores all the names and stats of the pokemon in a .csv file and reads off of that when called. It should also show an image of the pokemon when called.
The code is as follows:
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
import natsort
# reading the images dataset
dir1 = r"C:\Users\yash1\Desktop\pokedex\pokemon_images"
path1 = os.path.join(dir1, '*g')
files = glob.glob(path1)
files1 = natsort.natsorted(files, reverse=False)
imag = []
for x in files1:
img = plt.imread(x)
imag.append(img)
# reading the details dataset
data = pd.read_csv('pokemon.csv')
print("Pokedex\n")
print("Welcome Pokemon Lovers\n")
print("Search for a pokemon\n")
df1 =input("<A>Search by pokemon name\n<B>Search by pokemon ID\n(select A or B)\n")
df1.upper()
if(df1=="A"):
print("Enter the name of the pokemon")
name = input()
name.lower().strip()
dt = data[:].where(data['pokemon']==name)
st = dt[dt['id'].notnull()]
idx = dt.index[dt['pokemon'] == name]
if idx > 721:
exit(0)
plt.imshow(imag[idx[1]])
plt.axis("off") # turns off axes
plt.axis("tight") # gets rid of white border
plt.axis("image") # square up the image instead of filling the "figure" space
plt.show()
elif(df1=="B"):
print("Enter the ID of the pokemon")
ID = int(input())
tt = data[:].where(data['id']==ID)
idx1 = tt.index[tt['id']==ID]
qt = tt[tt['id'].notnull()]
for i in qt.columns:
print(i," : ",qt[i][idx1[0]])
if idx1>721:
exit(0)
#plt.imshow(imag[idx1[0]])
plt.axis("off") # turns off axes
plt.axis("tight") # gets rid of white border
plt.axis("image") # square up the image instead of filling the "figure" space
plt.show()
when I run it it gives me an error like this:
Traceback (most recent call last):
File "G:/pythonProject/main.py", line 33, in <module>
plt.imshow(imag[idx[1]])
File "C:\python\lib\site-packages\pandas\core\indexes\base.py", line 4604, in __getitem__
return getitem(key)
IndexError: index 1 is out of bounds for axis 0 with size 1
How do I fix the axis bounds, I even replaced the brackets with "0" and it still didn't work.

Related

Read values from .csv file based on user input

I am making a pokedex where the names and stats of pokemon are stored in a .csv file, based on user input it should read the .csv and output the values that are paired with it. If the user inputs the name "pikachu" it should read all the stats and display them:
25,"pikachu",25,4,60,112,"electric",NA,55,40,35,50,50,90,"static",NA,"lightning-rod","#F8D030",NA,NA,"ground","fairy","25.png",1,172,10,8,"quadruped"
but instead of this it just outputs the image, the code is below:
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
import natsort
# reading the images dataset
dir1 = r"G:\pythonProject\pokemon_images" #73
path1 = os.path.join(dir1, '*g')
files = glob.glob(path1)
files1 = natsort.natsorted(files, reverse=False)
imag = []
for x in files1:
img = plt.imread(x)
imag.append(img)
# reading the details dataset
data = pd.read_csv('pokemon.csv')
print("Pokedex\n")
print("Welcome Pokemon Lovers\n")
print("Search for a pokemon\n")
df1 =input("<A>Search by pokemon name\n<B>Search by pokemon ID\n(select A or B)\n")
df1.upper()
if(df1=="A"):
print("Enter the name of the pokemon")
name = input()
name.lower().strip()
dt = data[:].where(data['pokemon']==name)
st = dt[dt['id'].notnull()]
idx = dt.index[dt['pokemon'] == name]
if idx > 721:
exit(0)
plt.imshow(imag[idx[-1]])
plt.axis("off") # turns off axes
plt.axis("tight") # gets rid of white border
plt.axis("image") # square up the image instead of filling the "figure" space
plt.show()
elif(df1=="B"):
print("Enter the ID of the pokemon")
ID = int(input())
tt = data[:].where(data['id']==ID)
idx1 = tt.index[tt['id']==ID]
qt = tt[tt['id'].notnull()]
for i in qt.columns:
print(i," : ",qt[i][idx1[-1]])
if idx1>721:
exit(0)
plt.imshow(imag[idx1[-1]])
plt.axis("off") # turns off axes
plt.axis("tight") # gets rid of white border
plt.axis("image") # square up the image instead of filling the "figure" space
plt.show()

Problems with red pixel and loop check

I am new here and also in python (^_^')
I have a question about my code.
This is an infinite loop, when my code finds a red pixel in a saved screenshot send me a message, it works if there is a red pixel, but if I try to test another red pixel or delete the last red pixel detected and then re-use it, my code stops working with this error:
Warning (from warnings module):
File "C:\Users\Desktop\DCN.py", line 126
comparison_dcn = check_dcn == control_dcn
DeprecationWarning: elementwise comparison failed; this will raise an error in the future.
Traceback (most recent call last):
File "C:\Users\Desktop\DCN.py", line 127, in <module>
equal_dcn= comparison_dcn.all()
AttributeError: 'bool' object has no attribute 'all'
My idea was to create a numpy array to save the coordinates (x,y) and check if already exist inside this array, it must not detect it to me two times...
I tried to figure out the problem, but it is too early for my python experience....
I hope my english is understandable XD
Can someone kindly help me with my code and explain my issue?
#libaries
import mss
import mss.tools
from PIL import image
import psutil
import time
import cv2
import numpy as np
#global variables
loop = 1
check_dcn = np.column_stack((0,0))
counter_dcn = 0
while loop == 1 :
#detect red pixel
def detect_color(rgb, filename):
img = Image.open(filename)
img = img.convert('RGBA')
data = img.getdata()
for item in data:
if item[0] == rgb[0] and item[1] == rgb[1] and item[2] == rgb[2]:
return True
return False
with mss.mss() as sct:
# The screen part to capture
monitor = {"top": 190, "left": 0, "width": 1920, "height": 840}
output = "example.png".format(**monitor)
# Grab the data
sct_img = sct.grab(monitor)
# Save to the picture file
mss.tools.to_png(sct_img.rgb, sct_img.size, output=output)
print (detect_color((255,102,102), 'example.png')) #dcn red pixel
#dcn alarm detected
if detect_color((255,102,102), 'example.png'):
pixel_img = cv2.imread('example.png')
pop = [102,102,255] #BGR order
X,Y = np.where(np.all(pixel_img == pop, axis = 2)) #coordinates
control_dcn = np.column_stack((X,Y)) #assign coordinates
print(control_dcn) #test
if counter_dcn == 0:
counter_dcn = 1
check_dcn = control_dcn
print("first round dcn") #test
print(check_dcn) #test
###looking for solution here to empty comparison_dcn
comparison_dcn = check_dcn == control_dcn
equal_dcn= comparison_dcn.all()
if equal_dcn:
print("red pixel alread reported,waiting 20 seconds") #test
time.sleep(20)
else:
check_dcn = np.column_stack(X,Y)
print("red pixel added,waiting 5 seconds") #test
print(check_dcn) #test
time.sleep(5)
else:
print("Nothing, waiting 10 seconds")
time.sleep(10)

TypeError When Adding another value to GeoJsonToolTip

I'm trying to make a map with the number of noise complaints for each zipcode and everything runs fine, but I can't get the count number to appear on the map when I hover over each area. I tried making it into an int as the error suggested, but nothing seems to work.
import pandas as pd
df2020 = pd.read_csv('/Users/kenia/Desktop/CSCI 233 Seminar Project/311_Noise_Complaints.csv',sep=',', low_memory = False)
df2020=df2020[df2020['Created Date'].str[6:10] == '2020']
df2020['Incident Zip'].fillna(0, inplace=True)
df2020['Incident Zip'] = df2020['Incident Zip'].astype(int)
df2020_zip = df2020['Incident Zip'].value_counts().to_frame().reset_index()
df2020_zip.columns = ['postal_code', 'counts']
df2020_zip['postal_code'] = df2020_zip['postal_code'].astype(str)
df2020_zip['counts'] = df2020_zip['counts'].astype(int)
import folium
nycMap = folium.Map(location=[40.693943, -73.985880], zoom_start=10)
zipLines = '/Users/kenia/Desktop/CSCI 233 Seminar Project/zipMap.geojson.json'
df2020_zip['counts'] = df2020_zip['counts'].astype(int)
df2020_zip['counts'] = pd.Series(zipLines['counts'])
count_col = df2020_zip['counts']
bins = list(df2020_zip['counts'].quantile([0,0.2,0.4,0.6,0.8,1]))
choropleth = folium.Choropleth(geo_data = zipLines,
data=df2020_zip,
columns=['postal_code', 'counts'],
key_on='feature.properties.postalCode',
fill_color='OrRd',
fill_opacity=0.7,
line_opacity=1.0,
bins = bins,
highlight=True,
legend_name="Noise Frequency in 2020"
).add_to(nycMap)
folium.LayerControl().add_to(nycMap)
choropleth.geojson.add_child(
folium.features.GeoJsonTooltip(['postalCode','PO_NAME','count_col'])
)
nycMap.save(outfile='index.html')
Error:
Traceback (most recent call last):
File "/Users/kenia/Desktop/throwaway.py", line 20, in <module>
df2020_zip['counts'] = pd.Series(zipLines['counts'])
TypeError: string indices must be integers
Dataset: https://data.cityofnewyork.us/Social-Services/311-Noise-Complaints/p5f6-bkga
Zipcode GeoJson: https://data.beta.nyc/dataset/nyc-zip-code-tabulation-areas/resource/6df127b1-6d04-4bb7-b983-07402a2c3f90?view_id=b34c6552-9fdb-4f95-8810-0588ad1a4cc8

Trying to parse Word Documents and getting PdfReadError: EOF marker not found

I am testing some Python code to loop through resumes, open each, parse each, and create a comprehensive report based on the contents of each resume. Here is the code that I am running.
#importing all required libraries
import PyPDF2
import os
from os import listdir
from os.path import isfile, join
from io import StringIO
import pandas as pd
from collections import Counter
import en_core_web_sm
nlp = en_core_web_sm.load()
from spacy.matcher import PhraseMatcher
#Function to read resumes from the folder one by one
mypath='C:\\path_to_resumes\\' #enter your path here where you saved the resumes
onlyfiles = [os.path.join(mypath, f) for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath, f))]
def pdfextract(file):
fileReader = PyPDF2.PdfFileReader(open(file,'rb'))
countpage = fileReader.getNumPages()
count = 0
text = []
while count < countpage:
pageObj = fileReader.getPage(count)
count +=1
t = pageObj.extractText()
print (t)
text.append(t)
return text
#function to read resume ends
#function that does phrase matching and builds a candidate profile
def create_profile(file):
text = pdfextract(file)
text = str(text)
text = text.replace("\\n", "")
text = text.lower()
#below is the csv where we have all the keywords, you can customize your own
keyword_dict = pd.read_csv('D:/NLP_Resume/resume/template_new.csv')
stats_words = [nlp(text) for text in keyword_dict['Statistics'].dropna(axis = 0)]
NLP_words = [nlp(text) for text in keyword_dict['NLP'].dropna(axis = 0)]
ML_words = [nlp(text) for text in keyword_dict['Machine Learning'].dropna(axis = 0)]
DL_words = [nlp(text) for text in keyword_dict['Deep Learning'].dropna(axis = 0)]
R_words = [nlp(text) for text in keyword_dict['R Language'].dropna(axis = 0)]
python_words = [nlp(text) for text in keyword_dict['Python Language'].dropna(axis = 0)]
Data_Engineering_words = [nlp(text) for text in keyword_dict['Data Engineering'].dropna(axis = 0)]
matcher = PhraseMatcher(nlp.vocab)
matcher.add('Stats', None, *stats_words)
matcher.add('NLP', None, *NLP_words)
matcher.add('ML', None, *ML_words)
matcher.add('DL', None, *DL_words)
matcher.add('R', None, *R_words)
matcher.add('Python', None, *python_words)
matcher.add('DE', None, *Data_Engineering_words)
doc = nlp(text)
d = []
matches = matcher(doc)
for match_id, start, end in matches:
rule_id = nlp.vocab.strings[match_id] # get the unicode ID, i.e. 'COLOR'
span = doc[start : end] # get the matched slice of the doc
d.append((rule_id, span.text))
keywords = "\n".join(f'{i[0]} {i[1]} ({j})' for i,j in Counter(d).items())
## convertimg string of keywords to dataframe
df = pd.read_csv(StringIO(keywords),names = ['Keywords_List'])
df1 = pd.DataFrame(df.Keywords_List.str.split(' ',1).tolist(),columns = ['Subject','Keyword'])
df2 = pd.DataFrame(df1.Keyword.str.split('(',1).tolist(),columns = ['Keyword', 'Count'])
df3 = pd.concat([df1['Subject'],df2['Keyword'], df2['Count']], axis =1)
df3['Count'] = df3['Count'].apply(lambda x: x.rstrip(")"))
base = os.path.basename(file)
filename = os.path.splitext(base)[0]
name = filename.split('_')
name2 = name[0]
name2 = name2.lower()
## converting str to dataframe
name3 = pd.read_csv(StringIO(name2),names = ['Candidate Name'])
dataf = pd.concat([name3['Candidate Name'], df3['Subject'], df3['Keyword'], df3['Count']], axis = 1)
dataf['Candidate Name'].fillna(dataf['Candidate Name'].iloc[0], inplace = True)
return(dataf)
#function ends
#code to execute/call the above functions
final_database=pd.DataFrame()
i = 0
while i < len(onlyfiles):
file = onlyfiles[i]
dat = create_profile(file)
final_database = final_database.append(dat)
i +=1
print(final_database)
#code to count words under each category and visulaize it through Matplotlib
final_database2 = final_database['Keyword'].groupby([final_database['Candidate Name'], final_database['Subject']]).count().unstack()
final_database2.reset_index(inplace = True)
final_database2.fillna(0,inplace=True)
new_data = final_database2.iloc[:,1:]
new_data.index = final_database2['Candidate Name']
#execute the below line if you want to see the candidate profile in a csv format
#sample2=new_data.to_csv('sample.csv')
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 10})
ax = new_data.plot.barh(title="Resume keywords by category", legend=False, figsize=(25,7), stacked=True)
labels = []
for j in new_data.columns:
for i in new_data.index:
label = str(j)+": " + str(new_data.loc[i][j])
labels.append(label)
patches = ax.patches
for label, rect in zip(labels, patches):
width = rect.get_width()
if width > 0:
x = rect.get_x()
y = rect.get_y()
height = rect.get_height()
ax.text(x + width/2., y + height/2., label, ha='center', va='center')
plt.show()
In the folder, I have '.doc' and '.docx' files. Everything seems to work fine, up until this point, directly below. When I get here, the code throws an error. Here is the troublesome code. The weird thing is, that it looks like some kind of PDF error, but I'm iterating only through '.doc' and '.docx' files.
final_database=pd.DataFrame()
i = 0
while i < len(onlyfiles):
file = onlyfiles[i]
dat = create_profile(file)
final_database = final_database.append(dat)
i +=1
print(final_database)
Here is the StackTrace:
Traceback (most recent call last):
File "<ipython-input-2-c63fca79d39f>", line 5, in <module>
dat = create_profile(file)
File "<ipython-input-1-cdc3bf75cd26>", line 34, in create_profile
text = pdfextract(file)
File "<ipython-input-1-cdc3bf75cd26>", line 17, in pdfextract
fileReader = PyPDF2.PdfFileReader(open(file,'rb'))
File "C:\Users\ryans\Anaconda3\lib\site-packages\PyPDF2\pdf.py", line 1084, in __init__
self.read(stream)
File "C:\Users\ryans\Anaconda3\lib\site-packages\PyPDF2\pdf.py", line 1696, in read
raise utils.PdfReadError("EOF marker not found")
PdfReadError: EOF marker not found
The code comes from here.
https://towardsdatascience.com/do-the-keywords-in-your-resume-aptly-represent-what-type-of-data-scientist-you-are-59134105ba0d

You are using package PyPDF2, which is used to read and manipulate pdf files. In the article from towardsdatascience that you mentioned all resumes that author was working on were in pdf format.
Maybe if your resumes are in doc/docx format you should explore python-docx library:
https://python-docx.readthedocs.io/en/latest/index.html

updating a Slider min - max range in runtime in matplotlib [duplicate]

I am trying to write a small bit of code that interactively deletes selected slices in an image series using matplotlib. I have created a button 'delete' which stores a number of indices to be deleted when the button 'update' is selected. However, I am currently unable to reset the range of my slider widget, i.e. removing the number of deleted slices from valmax. What is the pythonic solution to this problem?
Here is my code:
import dicom
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider, Button
frame = 0
#store indices of slices to be deleted
delete_list = []
def main():
data = np.random.rand(16,256,256)
nframes = data.shape[0]
raw_dicom_stack = []
for x in range (nframes):
raw_dicom_stack.append(data[x,:,:])
#yframe = 0
# Visualize it
viewer = VolumeViewer(raw_dicom_stack, nframes)
viewer.show()
class VolumeViewer(object):
def __init__(self, raw_dicom_stack, nframes):
global delete_list
self.raw_dicom_stack = raw_dicom_stack
self.nframes = nframes
self.delete_list = delete_list
# Setup the axes.
self.fig, self.ax = plt.subplots()
self.slider_ax = self.fig.add_axes([0.2, 0.03, 0.65, 0.03])
self.delete_ax = self.fig.add_axes([0.85,0.84,0.1,0.04])
self.update_ax = self.fig.add_axes([0.85,0.78,0.1,0.04])
self.register_ax = self.fig.add_axes([0.85,0.72,0.1,0.04])
self.add_ax = self.fig.add_axes([0.85,0.66,0.1,0.04])
# Make the slider
self.slider = Slider(self.slider_ax, 'Frame', 1, self.nframes,
valinit=1, valfmt='%1d/{}'.format(self.nframes))
self.slider.on_changed(self.update)
#Make the buttons
self.del_button = Button(self.delete_ax, 'Delete')
self.del_button.on_clicked(self.delete)
self.upd_button = Button(self.update_ax, 'Update')
self.upd_button.on_clicked(self.img_update)
self.reg_button = Button(self.register_ax, 'Register')
self.add_button = Button(self.add_ax, "Add")
# Plot the first slice of the image
self.im = self.ax.imshow(np.array(raw_dicom_stack[0]))
def update(self, value):
global frame
frame = int(np.round(value - 1))
# Update the image data
dat = np.array(self.raw_dicom_stack[frame])
self.im.set_data(dat)
# Reset the image scaling bounds (this may not be necessary for you)
self.im.set_clim([dat.min(), dat.max()])
# Redraw the plot
self.fig.canvas.draw()
def delete(self,event):
global frame
global delete_list
delete_list.append(frame)
print 'Frame %s has been added to list of slices to be deleted' %str(frame+1)
print 'Please click update to delete these slices and show updated image series \n'
#Remove duplicates from delete list
def img_update(self,event):
#function deletes image stacks and updates viewer
global delete_list
#Remove duplicates from list and sort into numerical order
delete_list = list(set(delete_list))
delete_list.sort()
#Make sure delete_list is not empty
if not delete_list:
print "Delete list is empty, no slices to delete"
#Loop through delete list in reverse numerical order and remove slices from series
else:
for i in reversed(delete_list):
self.raw_dicom_stack.pop(i)
print 'Slice %i removed from dicom series \n' %(i+1)
#Can now remove contents from delete_list
del delete_list[:]
#Update slider range
self.nframes = len(self.raw_dicom_stack)
def show(self):
plt.show()
if __name__ == '__main__':
main()

In order to update a slider range you may set the min and max value of it directly,
slider.valmin = 3
slider.valmax = 7
In order to reflect this change in the slider axes you need to set the limits of the axes,
slider.ax.set_xlim(slider.valmin,slider.valmax)
A complete example, where typing in any digit changes the valmin of the slider to that value.
import matplotlib.pyplot as plt
import matplotlib.widgets
fig, (ax,sliderax) = plt.subplots(nrows=2,gridspec_kw=dict(height_ratios=[1,.05]))
ax.plot(range(11))
ax.set_xlim(5,None)
ax.set_title("Type number to set minimum slider value")
def update_range(val):
ax.set_xlim(val,None)
def update_slider(evt):
print(evt.key)
try:
val = int(evt.key)
slider.valmin = val
slider.ax.set_xlim(slider.valmin,None)
if val > slider.val:
slider.val=val
update_range(val)
fig.canvas.draw_idle()
except:
pass
slider=matplotlib.widgets.Slider(sliderax,"xlim",0,10,5)
slider.on_changed(update_range)
fig.canvas.mpl_connect('key_press_event', update_slider)
plt.show()

It looks like the slider does not have a way to update the range (api). I would suggest setting the range of the slider to be [0,1] and doing
frame = int(self.nframes * value)
On a somewhat related note, I would have made frame an instance variable a data attribute instead of a global variable (tutorial).

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

MatPlot lib keeps giving "out of range error" - python

Related

Read values from .csv file based on user input

Problems with red pixel and loop check

TypeError When Adding another value to GeoJsonToolTip

Trying to parse Word Documents and getting PdfReadError: EOF marker not found

updating a Slider min - max range in runtime in matplotlib [duplicate]

Categories

Resources