how to draw rectangles using list in python - python

for line, images_files in zip(lines, image_list):
info = line.split(',')
image_index = [int(info[0])]
box_coordiante1 = [info[2]]
box_coordiante2 = [info[3]]
box_coordiante3 = [info[4]]
box_coordiante4 = [info[5]]
prev_image_num = 1
for image_number in image_index: #### read each other image_number
if prev_image_num != image_number: # if read 11111 but appear different number such as 2, 3 and ect
prev_image_num = image_number # the different number becomes pre_image_num(it was 1)
#box_coordinate = [] # empty box_coordinate
#box_coordinate.append(info[2:6])
#print box_coordinate
# box_coordinate.append() #insert 2 to 6 axis
rect = plt.Rectangle((int(box_coordiante1), int(box_coordiante2)), int(box_coordiante3), int(box_coordiante4), linewidth=1, edgecolor='r', facecolor='none')
ax.add_patch(rect)
im = cv2.imread(images_files)
im = im[:, :, (2, 1, 0)]
# # Display the image
plt.imshow(im)
plt.draw()
plt.pause(0.1)
plt.cla()
I am supposed to draw boxes on each picture.
For showing boxes on each picture,
i guess that gather location of boxes and show them at that same time.
So i used a way using LIST to plt.Rectanle
but it said "TypeError: int() argument must be a string or a number, not 'list'"
Are there other ways??

Umm, I just did just. I don't know if this is what you wanted though.
x = 10
y = 10
a = []
for unit for range(x):
a.append(0)
for unit for range(y):
print(a)

I'm not very familiar with Python, but it seems like you want a plain number in the variables image_index and box_coordinateN. It looks like you're assigning single-element arrays to them. Try changing:
image_index = [int(info[0])] // list containing one element: int(info[0])
box_coordiante1 = [info[2]]
box_coordiante2 = [info[3]]
box_coordiante3 = [info[4]]
box_coordiante4 = [info[5]]
to:
image_index = int(info[0]) // number: int(info[0])
box_coordiante1 = info[2]
box_coordiante2 = info[3]
box_coordiante3 = info[4]
box_coordiante4 = info[5]

The answer above is carelessly sloppy and incorrect Python.
It must be rewritten and corrected as follows:
x = 10
y = 10
a = []
for unit in range(x):
a.append(0)
for unit in range(y):
print(a)

Related

Python compiler giving IndexError when trying to find the similarity matrix

I am trying to make a movie recommendation system which requires me to find the user-user similarity matrix for the top 100 users.
On running the code I get:
similarMatrix[row] = top100_similar
IndexError: index 663 is out of bounds for axis 0 with size 617
Code:
def getUser_UserSimilarity(sparseMatrix, top = 100):
startTimestamp20 = datetime.now()
row_index, col_index = sparseMatrix.nonzero() #this will give indices of rows in "row_index" and indices of columns in
#"col_index" where there is a non-zero value exist.
rows = np.unique(row_index)
similarMatrix = np.zeros(61700).reshape(617,100) # 617*100 = 61700. As we are building similarity matrix only
#for top 100 most similar users.
timeTaken = []
howManyDone = 0
for row in rows[:top]:
howManyDone += 1
startTimestamp = datetime.now().timestamp() #it will give seconds elapsed
sim = cosine_similarity(sparseMatrix.getrow(row), sparseMatrix).ravel()
top100_similar_indices = sim.argsort()[-top:]
top100_similar = sim[top100_similar_indices]
similarMatrix[row] = top100_similar
timeforOne = datetime.now().timestamp() - startTimestamp
timeTaken.append(timeforOne)
if howManyDone % 20 == 0:
print("Time elapsed for {} users = {}sec".format(howManyDone, (datetime.now() - startTimestamp20)))
print("Average Time taken to compute similarity matrix for 1 user = "+str(sum(timeTaken)/len(timeTaken))+"seconds")
fig = plt.figure(figsize = (12,8))
plt.plot(timeTaken, label = 'Time Taken For Each User')
plt.plot(np.cumsum(timeTaken), label='Cumulative Time')
plt.legend(loc='upper left', fontsize = 15)
plt.xlabel('Users', fontsize = 20)
plt.ylabel('Time(Seconds)', fontsize = 20)
plt.tick_params(labelsize = 15)
plt.show()
return similarMatrix
simMatrix = getUser_UserSimilarity(TrainUISparseData, 100)
Please tell me where exactly I need to make the changes.
The error is due to the following line
similarMatrix = np.zeros(61700).reshape(617,100)
Your similarMatrix is of smaller dimension than your sparseMatrix. Thats why you are getting index error.
You need to make the dimensions of similarMatrix equal to the dimensions of sparseMatrix. So modify the code as below
similarMatrix = np.zeros(sparseMatrix.shape[0]*100).reshape(sparseMatrix.shape[0],100)
Or for more simple structure
n_cols = 100
n_rows = sparseMatrix.shape[0]
similarMatrix = np.zeros(n_rows*n_cols).reshape(n_rows, n_cols)

Having trouble getting subplots to show up correctly with larger data sets

I am having trouble getting subplots to show up correctly with larger data sets.
I am ok with having the figure grow for my application. I am also ok with having the figure grow such that all the graphs would be about the size of the ones showing up in the small data set example if that is possible. (anaconda3/v4.2.0/python)
plt.rcParams['figure.autolayout']=True
figa, axa = plt.subplots(rowcnt, colcnt)
figa.suptitle("Users Disk Space Usage Over Time.\n")
ax_index = 0
for r in range(rowcnt)
for c in range(colcnt):
n = r * c
user = gr.columns[n]
ur = gr[user]
x = ur.index
y = ur.values
while is_color_like(colorpairs[colorindex]) == False or is_color_like(colorpairs[colorindex+1]) == False :
colorindex = int((colorindex + 2) % (len(colorpairs)/2))
axa[r,c].plot(x, y, color=colorpairs[colorindex+1], alpha=0.6)
plt.setp(axa[r,c].get_xticklabels(), rotation=30)
if len(x) > 1:
axa[r,c].fill_between(x, y, color=colorpairs[colorindex],alpha=0.4)
axa[r,c].set_ylim(0,disksizebytes)
axa[r,c].set_title(user)
axa[r,c].set_xlabel('date')
axa[r,c].set_ylabel('space used')
axa[r,c].grid(True)
i += 1
colorindex = int((colorindex + 2) % (len(colorpairs)/2))
detailarryimage = "{}/detailarryimage.png".format(datafolder)
figa.savefig(detailarryimage)
Small Set Image
Large Set Image

ValueError: Invalid RGBA argument: What is the reason of this error?

I am trying to create a 2D colored bar chart
import numpy as np
import matplotlib.pyplot as plt
import pickle
from graphviz import Digraph
from torch.autograd import Variable
import argparse
def make_dot(var):
'''
Visualization of the computation graph
Taken from : https://github.com/szagoruyko/functional-zoo/blob/master/visualize.py
'''
node_attr = dict(style='filled',
shape='box',
align='left',
fontsize='12',
ranksep='0.1',
height='0.2')
dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12"))
seen = set()
def add_nodes(var):
if var not in seen:
if isinstance(var, Variable):
value = '('+(', ').join(['%d' % v for v in var.size()])+')'
dot.node(str(id(var)), str(value), fillcolor='lightblue')
else:
dot.node(str(id(var)), str(type(var).__name__))
seen.add(var)
if hasattr(var, 'previous_functions'):
for u in var.previous_functions:
dot.edge(str(id(u[0])), str(id(var)))
add_nodes(u[0])
add_nodes(var.creator)
return dot
def plot_trajectories(true_trajs, pred_trajs, nodesPresent, obs_length, name, plot_directory, withBackground=False):
'''
Parameters
==========
true_trajs : Numpy matrix of shape seq_length x numNodes x 2
Contains the true trajectories of the nodes
pred_trajs : Numpy matrix of shape seq_length x numNodes x 2
Contains the predicted trajectories of the nodes
nodesPresent : A list of lists, of size seq_length
Each list contains the nodeIDs present at that time-step
obs_length : Length of observed trajectory
name : Name of the plot
withBackground : Include background or not
'''
traj_length, numNodes, _ = true_trajs.shape
# Initialize figure
plt.figure()
# Load the background
# im = plt.imread('plot/background.png')
# if withBackground:
# implot = plt.imshow(im)
# width_true = im.shape[0]
# height_true = im.shape[1]
# if withBackground:
# width = width_true
# height = height_true
# else:
width = 1
height = 1
traj_data = {}
for tstep in range(traj_length):
pred_pos = pred_trajs[tstep, :]
true_pos = true_trajs[tstep, :]
for ped in range(numNodes):
if ped not in traj_data and tstep < obs_length:
traj_data[ped] = [[], []]
if ped in nodesPresent[tstep]:
traj_data[ped][0].append(true_pos[ped, :])
traj_data[ped][1].append(pred_pos[ped, :])
for j in traj_data:
c = np.random.rand(3, 1)
true_traj_ped = traj_data[j][0] # List of [x,y] elements
pred_traj_ped = traj_data[j][1]
true_x = [(p[0]+1)/2*height for p in true_traj_ped]
true_y = [(p[1]+1)/2*width for p in true_traj_ped]
pred_x = [(p[0]+1)/2*height for p in pred_traj_ped]
pred_y = [(p[1]+1)/2*width for p in pred_traj_ped]
plt.plot(true_x, true_y, color=c, linestyle='solid', marker='o')
plt.plot(pred_x, pred_y, color=c, linestyle='dashed', marker='x')
if not withBackground:
plt.ylim((1, 0))
plt.xlim((0, 1))
# plt.show()
if withBackground:
plt.savefig('plot_with_background/'+name+'.png')
else:
plt.savefig(plot_directory+'/'+name+'.png')
plt.gcf().clear()
plt.close()
def main():
parser = argparse.ArgumentParser()
# Experiments
parser.add_argument('--test_dataset', type=int, default=0,
help='test dataset index')
# Parse the parameters
args = parser.parse_args()
# Save directory
save_directory = 'save/'
save_directory += str(args.test_dataset) + '/'
plot_directory = 'plot/'
f = open(save_directory+'/results.pkl', 'rb')
results = pickle.load(f)
# print "Enter 0 (or) 1 for without/with background"
# withBackground = int(input())
withBackground = 1
for i in range(len(results)):
print i
name = 'sequence' + str(i)
plot_trajectories(results[i][0], results[i][1], results[i][2], results[i][3], name, plot_directory, withBackground)
if __name__ == '__main__':
main()
Now I am unable to debug the Invalid RGBA argument because I don't understand what is causing the error. I even tried to use random colors instead with colors = np.random.rand(91,91,4) and still the error persists.
I have checked Stack Overflow posts regarding Invalid RGBA argument (for example this, this, this and this) and none of them seems to answer my problem.
I want to know what could be causing this error. I am using the standard Anaconda distribution for Python on Ubuntu Mate 16.
Could it be that due to recent updates in Python, the solution as in the original Stack Overflow post becomes obsolete?
Just replace
c = np.random.rand(3, 1)
with this:
c = np.random.rand(3)
It removes the error. matplotlib expects (3,) or (4,) shape for the c argument.

How to neaten up this code into a more Pythonic way?

I have plotted a box and whiskers plot for my data using the following code:
def make_labels(ax, boxplot):
iqr = boxplot['boxes'][0]
caps = boxplot['caps']
med = boxplot['medians'][0]
fly = boxplot['fliers'][0]
xpos = med.get_xdata()
xoff = 0.1 * (xpos[1] - xpos[0])
xlabel = xpos[1] + xoff
median = med.get_ydata()[1]
pc25 = iqr.get_ydata().min()
pc75 = iqr.get_ydata().max()
capbottom = caps[0].get_ydata()[0]
captop = caps[1].get_ydata()[0]
ax.text(xlabel, median, 'Median = {:6.3g}'.format(median), va='center')
ax.text(xlabel, pc25, '25th percentile = {:6.3g}'.format(pc25), va='center')
ax.text(xlabel, pc75, '75th percentile = {:6.3g}'.format(pc75), va='center')
ax.text(xlabel, capbottom, 'Bottom cap = {:6.3g}'.format(capbottom), va='center')
ax.text(xlabel, captop, 'Top cap = {:6.3g}'.format(captop), va='center')
for flier in fly.get_ydata():
ax.text(1 + xoff, flier, 'Flier = {:6.3g}'.format(flier), va='center')
and this gives me the following graph:
Now, what I want to do is to grab all the 'Flier' points that we can see in the graph and make it into a list and for that I did the following:
fliers_data = []
def boxplots(boxplot):
iqr = boxplot['boxes'][0]
fly = boxplot['fliers'][0]
pc25 = iqr.get_ydata().min()
pc75 = iqr.get_ydata().max()
inter_quart_range = pc75 - pc25
max_q3 = pc75 + 1.5*inter_quart_range
min_q1 = pc25 - 1.5*inter_quart_range
for flier in fly.get_ydata():
if (flier > max_q3):
fliers_data.append(flier)
elif (flier < min_q1):
fliers_data.append(flier)
Now, I have 2 queries:
In both functions, there are a few lines that are similar. Is there a way I can define them once and use them in both the functions?
Can the second function be edited or neatened in a more efficient way?
I think mostly its quite neat, the only thing I can suggest is spaces between different parts of the functions and maybe some quotes to tell someone reading what each part does?
Something like this, for example:
def myfunction(x):
# checking if x equals 10
if x == 10:
return True
# if equals 0 return string
elif x == 0:
return "equals zero"
# else return false
else:
return False
Also, I think you can locate any variables that are the same outside and before both functions (say, at the very start of your code) they should still be accessible in the functions.

K-Means clustering multidimensional data with a heatmap

I have been trying to implement k-means clustering with a heatmap, but have been unsuccessful.
Here is the initial data set:
https://raw.githubusercontent.com/gsprint23/cpts215/master/progassignments/files/simple.csv
And here is my code:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
import random
#%matplotlib inline
def truncate(f, n):
return math.floor(f * 10 ** n) / 10 ** n
def chooseCenter(data, centers):
length = data.shape
cent = []
while len(cent) < centers :
x = random.randrange(0,length[0])
y = random.randrange(0,length[1])
if data.iloc[x][y] not in cent:
d = truncate(data.iloc[x][y],2)
cent.append(d)
return cent
def distance(val, center):
return math.sqrt((val- center)**2)
def getDistances(centers, data):
length = data.shape
dist = []
for i in range(length[0]):
for j in range(length[1]):
y = []
for k in range(len(centers)):
val = distance(data.iloc[i][j], centers[k])
y.append(truncate(val,3))
dist.append(y)
return dist
def findClosest(data, dist):
close = data.copy()
length = close.shape
indexes = []
for i in range(len(dist)):
pt = min(dist[i])
idx = dist[i].index(pt)
indexes.append(idx)
#print(indexes)
length = data.shape
n = np.array(indexes)
n = pd.DataFrame(np.reshape(n, (length[0],length[1])))
#reshape this data frame into the same shape as the data
#keep running the find closest until there is no change
#try heatmap on this?
#this should cluster it, but to make sure test it
#might need to do some tweaking to this
return n
# for i in range(length[0]):
# for j in range(length[1]):
# print('dist[i]', dist[j])
# pt = min(dist[j])
# print(pt)
# idx = dist[j].index(pt)
# close.iloc[i][j] = int(idx)
#return close
def computeNewCenter(data, close):
d = dict()
for i in range(len(close)):
for j in range(len(close[0])):
d[close.iloc[i][j]] = []
for i in range(len(data)):
for j in range(len(data[0])):
if close.iloc[i][j] in d:
d[close.iloc[i][j]].append(data.iloc[i][j])
newCenters = []
for key, value in d.items():
m = np.mean(value)
newCenters.append(truncate(m, 3))
return newCenters
# lst = [[] * numcenters]
# for i in range(len(close)):
# for j in range(len(close[0])):
# if close.iloc[i][j]
def main():
data = np.array(pd.read_csv('https://raw.githubusercontent.com/gsprint23/cpts215/master/progassignments/files/simple.csv', header=None))
data = data.T
#print(data)
df = pd.DataFrame(data[1:], columns=data[0], dtype=float).T
df = df.iloc[::-1]
# print(df)
# print(df.iloc[1][9])
# print(df)
# print(df.iloc[0][1])
# heatmap = plt.pcolor(df, cmap=plt.cm.bwr)
# plt.colorbar(heatmap)
c = chooseCenter(df, 3)
print(c)
#print(len(c))
dist = getDistances(c, df)
#print(dist)
y = findClosest(df, dist)
# q = []
# for i in range(len(c)):
# q.append([])
# #print(q)
j = computeNewCenter(df, y)
#print(j)
length = df.shape
oldFrame = pd.DataFrame(np.ndarray((length[0],length[1])))
oldFrame = oldFrame.fillna(0)
ct=0
while y.equals(oldFrame) == False:
ct+=1
oldFrame = y.copy()
c = computeNewCenter(df, oldFrame)
#print(c)
dist = getDistances(c, df)
#print(dist)
y = findClosest(df, dist)
#print(y)
#plt.pcolor(df, cmap=plt.cm.bwr)
l = []
for i in range(len(y)):
for j in range(len(y[0])):
if y.iloc[i][j] == 1:
l.append(df.iloc[i][j])
for i in range(len(y)):
for j in range(len(y[0])):
if y.iloc[i][j] == 2:
l.append(df.iloc[i][j])
for i in range(len(y)):
for j in range(len(y[0])):
if y.iloc[i][j] == 0:
l.append(df.iloc[i][j])
l = np.ndarray((length[0],length[1]))
l = pd.DataFrame(l)
print(l)
hm = plt.pcolor(l, cmap=plt.cm.bwr)
plt.colorbar(hm)
# print(y)
# print(c)
# print(ct)
#plt.pcolor(y, cmap=plt.cm.bwr)
if __name__ == '__main__':
main()
My line of thinking was this:
My current thought process was to first randomly choose the centers.
Then create a list of lists for each point for the distance to each center.
Find the index of the minimum distance for each point for each center.
Create a data frame of the same size as the data set and fill each index for each element with the index of the center the point is closest to.
Recompute the center by taking the mean of the points with the same center index
Repeat this process multiple times until the index data frame does not change.
Create a new data frame and add the points which have the same center point close together in the frame.
Then create the heatmap.
This did not seem to work though.
Just wondering, am I on the right track or am I completely off, and if I am on the right track which parts would I need to change in order to fix the issue. If not could you please point me on the right track.
Here is a comparison of the maps:
Here are the maps
The first one is the one my program generated while the second is the way it is supposed to look.
I know my problem lies in some part of the k-means clustering algorithm, and my guess is it is either in the reassignment stage where you reassign the points to the centroids and calculate the new centroids or in the stopping condition in that the algorithm does not run long enough. Also in the back of my head, something tells me that I am not doing this as efficiently as I could have and that I am missing something key. I have watched several videos on K-means clustering and understand it conceptually, I'm just having a hard time implementing it.

Categories

Resources