recursion error while using dfs to find connected components in image - python

As the title says I am trying to find all connected components in an image using recursive dfs.
I based the principle algorithm on the pseudo over here https://www.programiz.com/dsa/graph-dfs
What I get is a recursion depth exceedet error.
Usually I would troubleshoot this by checking the base case for the specific recursion, but I cant seem to find the issue.
Since every pixel is either zero or will be marked as visited at some point in time, I feel the recursion should terminate at some point.
Are there other ways for troubleshooting such recursion errors?
import cv2
import numpy as np
class Image:
def __init__(self, path):
self.img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
self.n, self.m = self.img.shape
self.visited = {(i, j): False for i in range(self.n)
for j in range(self.m)}
self.components = []
def threshold(self):
self.img[self.img >= 200] = 255
self.img[self.img < 200] = 0
def neighbours(self, px):
(i, j) = px
corners = [(i+x, j+y) for x in range(-1, 2) for y in range(-1, 2)
if (i+x, j+y) != (i, j)
and (0 <= i+x < self.n)
and (0 <= j+y < self.m)]
return corners
def dfs(self):
self.threshold()
component = 0
for i in range(self.n):
for j in range(self.m):
if not self.visited.get((i, j)) and self.img[i][j] == 255:
self.components.append([(i, j)])
self.explore((i, j), component)
component += 1
def explore(self, px, component):
self.visited[px] = True
self.components[component].append(px)
for neigh in self.neighbours(px):
if not self.visited.get(neigh) and self.img[neigh[0]][neigh[1]] == 255:
self.explore(neigh, component)
img = Image("dots.png")
img.dfs()
Solved
I had to set the maximum recursion depth
import sys
sys.setrecursionlimit(new_limit)

Related

Setting Enumerate Value to a Variable changes the output of a function

I'm new to the enumeration function, I've only just started to use it. In this code block, I'm enumerating "height" multiple times:
class Solution(object):
def maxArea(self, height):
"""
:type height: List[int]
:rtype: int
"""
if len(height) == 2:
return (min(height))
maxArea = -1
for i, a in enumerate(height):
maxIndex, maxDistance = i, 0
for j, s in enumerate(height):
distance = abs(j-i)
if distance>maxDistance and s>=a:
maxIndex, maxDistance = j, distance
area = abs(maxIndex - i) * a
maxArea = max(area,maxArea)
return maxArea
I thought a way to optimize this was to set enumerate(height) to a variable to decrease the amount of times the height was put into the enumerate function:
class Solution(object):
def maxArea(self, height):
"""
:type height: List[int]
:rtype: int
"""
if len(height) == 2:
return (min(height))
maxArea = -1
enumeratedHeight = enumerate(height)
for i, a in enumeratedHeight:
maxIndex, maxDistance = i, 0
for j, s in enumeratedHeight:
distance = abs(j-i)
if distance>maxDistance and s>=a:
maxIndex, maxDistance = j, distance
area = abs(maxIndex - i) * a
maxArea = max(area,maxArea)
return maxArea
I tested this new function on this case [1,8,6,2,5,4,8,3,7]. The before function correctly answered 49, but this one returns 8?
(this code is to solve Container With Most Water on Leetcode, I know it's inefficient, but I've been told to write inefficient code if I can't find an efficient solution)

Why k-means clustering give me different answers when initialized with different centroids?

I followed the pseudo code for k-means clustering to write this code. This code gives different answers when initialized the clusters' centroids with different values and none of those answers are correct. Can you help me please?
I tested with 15 nodes, tolerance = 0.00001 and iterations = 100000
Thanks in advance.
class kMeans:
def __init__(self, coordinates, tolerance, iter, nof):
self.grid = coordinates
self.N = coordinates.shape[0]
self.t = tolerance
self.nof = nof
self.f = None
def kMeans(self, nof):
assign = [0]*self.N
self.fac = np.empty([nof,2])
for i in range(nof):
for j in range(2):
self.fac[i,j] = self.grid[i+10,j]
for itr in range(iter):
for n in range(self.N):
distance = [0]*nof
for f in range(nof):
distance[f] = math.sqrt((self.grid[n,0]-self.fac[f,0])**2 + (self.grid[n,1]-self.fac[f,1])**2 )
assign[n] = np.argmin(distance)
for fa in range(nof):
l = []
x,y = 0,0
for asg in range(self.N):
if fa == assign[asg]:
l.append(asg)
x = np.mean(self.grid[l,0])
y = np.mean(self.grid[l,1])
if abs(x-self.fac[fa,0]) >= self.t:
self.fac[fa,0] = x
if abs(y-self.fac[fa,1]) >= self.t:
self.fac[fa,1] = y
continue
print('dist:',distance)
print('assign:',assign)
print('fac:',self.fac)
print('locate:', self.grid[l,1])
self.f = self.fac
return self.fac
'''

Python - high disk usage in SumTree

I've encountered some weird behaviour of my python program. Basically when I tried to create adn fill a SumTree of length larger than 1000, my disk usage increases a lot to ~300MB/s then the programme died.
I'm pretty sure there's no file r/w involved in this process, and the problem is with the add function. The code is shown below.
import numpy as np
class SumTree():
trans_idx = 0
def __init__(self, capacity):
self.num_samples = 0
self.capacity = capacity
self.tree = np.zeros(2 * capacity - 1)
self.transitions = np.empty(self.capacity, dtype=object)
def add(self, p, experience):
tree_idx = self.trans_idx + self.capacity - 1
self.transitions[self.trans_idx] = experience
self.transitions.append(experience)
self.update(tree_idx, p)
self.trans_idx += 1
if self.trans_idx >= self.capacity:
self.trans_idx = 0
self.num_samples = min(self.num_samples + 1, self.capacity)
def update(self, tree_idx, p):
diff = p - self.tree[tree_idx]
self.tree[tree_idx] = p
while tree_idx != 0:
tree_idx = (tree_idx - 1) // 2
self.tree[tree_idx] += diff
def get_leaf(self, value):
parent_idx = 0
while True:
childleft_idx = 2 * parent_idx + 1
childright_idx = childleft_idx + 1
if childleft_idx >= len(self.tree):
leaf_idx = parent_idx
break
else:
if value <= self.tree[childleft_idx]:
parent_idx = childleft_idx
else:
value -= self.tree[childleft_idx]
parent_idx = childright_idx
data_idx = leaf_idx - self.capacity + 1
return leaf_idx, self.tree[leaf_idx], self.transitions[data_idx]
#property
def total_p(self):
return self.tree[0] # the root
#property
def volume(self):
return self.num_samples # number of transistions stored
Here's an example where this SumTree object will be used:
def add(self, experience)
max_p = np.max(self.tree.tree[-self.tree.capacity:])
if max_p == 0:
max_p = 1.0
exp = self.Experience(*experience)
self.tree.add(max_p, exp)
where Experience is a named tuple and self.tree is a Sumtree instance, when I removed the last line the high disk usage disappears.
Can anyone help me with this?
I finally sort this out because each experience is a tuple of namedtuple and I'm creating another namedtuple Experience from it. Fixed by changing experience to a tuple of numpy arrays.

Issue with python __eq__ method in checking if 2 lists are equal

I have a python program in which I have a class called Vector and an empty list inside of that class which is being populated runtime.
Here is the init:
def __init__(self,n):
self.vector = [];
self.n = n;
for x in range(n):
self.vector.append(False);
And here is the eq:
def __eq__(self, other):
t = True
for x in range(self.n):
if self.vector[x] != other.vector[x]:
t = False;
return t
however, when I try to check if 2 objects of this type are equal, I always get true, even though I changed values inside of vector in Vector class.
Here is the code where I do the above:
vectors = []
n = tmp.size();
k = calculateCombinationCount(n,int(n/2))
for i in range(k):
for j in range(0,n-1):
if (tmp.vector[j] != tmp.vector[j+1]):
t = True
for x in vectors:
if x == tmp:
t = False;
if t:
vectors.append(tmp)
tmp.printVector();
tmp.swap(j,j+1);
I would appreciate any help that you can provide. Thank you :)
EDIT:
def swap(self,i,j):
tmp = self.vector[i]
self.vector[i] = self.vector[j]
self.vector[j] = tmp
def calculateCombinationCount(n,r):
k = factorial(n)/(factorial(int(r))*factorial(int(n-r)))
return int(k)
Right so I've updated your code to be much more pythonic (I can tell you come from another language, Java?).
from math import factorial
class Vector:
def __init__(self, size):
self.size = size
self.vector = [False] * size
def __eq__(self, other):
"""
Same if self.size == other.size
"""
assert self.size == other.size, (self.size, other.size)
return self.vector == other.vector
def print_vector(self):
print(self.vector)
def swap(self, i, j):
"""
More efficient and pythonic
"""
self.vector[i], self.vector[j] = self.vector[j], self.vector[i]
def calculate_combination_count(n, r):
"""
This is slow, I'd replace it with scipy.special.comb
https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.comb.html#scipy.special.comb
"""
return factorial(n) // (factorial(r) * factorial(n-r))
tmp = Vector(10)
vectors = []
n = tmp.size
k = calculate_combination_count(n, n // 2)
for i in range(k):
for j in range(0, n-1):
if tmp.vector[j] != tmp.vector[j + 1]:
if not any(vec == tmp for vec in vectors): # much more efficient
vectors.append(tmp)
tmp.print_vector()
tmp.swap(j, j + 1)
else: # Just to prove why it doesn't work
print('tmp.vector is all False: {}'.format(not all(tmp.vector)))
This prints out tmp.vector is all False: True repeatedly. I think this is your problem.
If you

Sorting list of RBG triplets

I've been making a code for finding the dominant colors from an image.
I have a problem when I am printing the result (the result is in the form of a list with RGB triplets; [(244, 181, 28), (230, 146, 38)]).
I want the list to show the RGB-triplet with the most dominant color first but the result always gets printed out with the colors in random order.
Can anyone help me sort this list so I can have the most dominant first and second most and so on?
Here's the code I have so far:
class Cluster(object):
def __init__(self):
self.pixels = []
self.centroid = None
def addPoint(self, pixel):
self.pixels.append(pixel)
def setNewCentroid(self):
R = [colour[0] for colour in self.pixels]
G = [colour[1] for colour in self.pixels]
B = [colour[2] for colour in self.pixels]
R = sum(R) / len(R)
G = sum(G) / len(G)
B = sum(B) / len(B)
self.centroid = (R, G, B)
self.pixels = []
return self.centroid
class Kmeans(object):
def __init__(self, k=2, max_iterations=5, min_distance=2.0, size=200):
self.k = k
self.max_iterations = max_iterations
self.min_distance = min_distance
self.size = (size, size)
def run(self, image):
self.image = image
self.image.thumbnail(self.size)
self.pixels = numpy.array(image.getdata(), dtype=numpy.uint8)
self.clusters = [None for i in range(self.k)]
self.oldClusters = None
randomPixels = random.sample(self.pixels, self.k)
for idx in range(self.k):
self.clusters[idx] = Cluster()
self.clusters[idx].centroid = randomPixels[idx]
iterations = 0
while self.shouldExit(iterations) is False:
self.oldClusters = [cluster.centroid for cluster in self.clusters]
print iterations
for pixel in self.pixels:
self.assignClusters(pixel)
for cluster in self.clusters:
print len(cluster.pixels)
cluster.setNewCentroid()
iterations += 1
return [cluster.centroid for cluster in self.clusters]
def assignClusters(self, pixel):
shortest = float('Inf')
for cluster in self.clusters:
distance = self.calcDistance(cluster.centroid, pixel)
if distance < shortest:
shortest = distance
nearest = cluster
nearest.addPoint(pixel)
def calcDistance(self, a, b):
result = numpy.sqrt(sum((a - b) ** 2))
return result
def shouldExit(self, iterations):
if self.oldClusters is None:
return False
for idx in range(self.k):
dist = self.calcDistance(
numpy.array(self.clusters[idx].centroid),
numpy.array(self.oldClusters[idx])
)
if dist < self.min_distance:
return True
if iterations <= self.max_iterations:
return False
return True

Categories

Resources