I have a typical captcha image which contain only digits.
Ex.
i want to extract 78614 from this image.
I tried few library & code using OCR-Python. But its returning 0.
Sample Code-1
from captcha_solver import CaptchaSolver
solver = CaptchaSolver('browser')
with open('captcha.png', 'rb') as inp:
raw_data = inp.read()
print(solver.solve_captcha(raw_data))
Sample Code-2
from PIL import Image
def p(img, letter):
A = img.load()
B = letter.load()
mx = 1000000
max_x = 0
x = 0
for x in range(img.size[0] - letter.size[0]):
_sum = 0
for i in range(letter.size[0]):
for j in range(letter.size[1]):
_sum = _sum + abs(A[x+i, j][0] - B[i, j][0])
if _sum < mx :
mx = _sum
max_x = x
return mx, max_x
def ocr(im, threshold=200, alphabet="0123456789abcdef"):
img = Image.open(im)
img = img.convert("RGB")
box = (8, 8, 58, 18)
img = img.crop(box)
pixdata = img.load()
letters = Image.open(im)
ledata = letters.load()
# Clean the background noise, if color != white, then set to black.
for y in range(img.size[1]):
for x in range(img.size[0]):
if (pixdata[x, y][0] > threshold) \
and (pixdata[x, y][1] > threshold) \
and (pixdata[x, y][2] > threshold):
pixdata[x, y] = (255, 255, 255, 255)
else:
pixdata[x, y] = (0, 0, 0, 255)
counter = 0;
old_x = -1;
letterlist = []
for x in range(letters.size[0]):
black = True
for y in range(letters.size[1]):
if ledata[x, y][0] <> 0 :
black = False
break
if black :
if True :
box = (old_x + 1, 0, x, 10)
letter = letters.crop(box)
t = p(img, letter);
print counter, x, t
letterlist.append((t[0], alphabet[counter], t[1]))
old_x = x
counter += 1
box = (old_x + 1, 0, 140, 10)
letter = letters.crop(box)
t = p(img, letter)
letterlist.append((t[0], alphabet[counter], t[1]))
t = sorted(letterlist)
t = t[0:5] # 5-letter captcha
final = sorted(t, key=lambda e: e[2])
answer = ""
for l in final:
answer = answer + l[1]
return answer
print(ocr('captcha.png'))
Has anyone had the opportunity to get/extract text from such typical captcha?
You can use machine learning (neural networks) models to solve captchas and it will almost always outperform free OCR or any other method.
Here is a good starting point: https://medium.com/#ageitgey/how-to-break-a-captcha-system-in-15-minutes-with-machine-learning-dbebb035a710
Related
In the above image, how to get a clean line block, remove the block upper and down around the middle long strip broadband? I have tried projection, but failed.
def hProject(binary):
h, w = binary.shape
hprojection = np.zeros(binary.shape, dtype=np.uint8)
[red is the result][2]
h_h = [0]*h
for j in range(h):
for i in range(w):
if binary[j,i] == 255:
h_h[j] += 1
return h_h
def creat_T_rectangle(h, w, mode='x_up'):
assert mode in ['x_up', 'x_down', 'y_left', 'y_right']
if mode == 'x_up':
up_t = np.ones((h*2, w*3), np.uint8)
up_t[:h, :w] = 0
up_t[:h, 2*w:] = 0
return up_t, (0, h, w, 2*w) # y1, y2, x1, x2
elif mode == 'y_left':
left_t = np.ones((h*3, w*2), np.uint8)
left_t[:h, :w] = 0
left_t[2*h:, :w] = 0
return left_t, (h, 2*h, 0, w)
elif mode == 'x_down':
down_t = np.ones((h*2, w*3), np.uint8)
down_t[h:2*h, :w] = 0
down_t[h:2*h, 2*w:] = 0
return down_t, (h, 2*h, w, 2*w)
elif mode == 'y_right':
right_t = np.ones((h*3, w*2), np.uint8)
right_t[:h, w:2*w] = 0
right_t[2*h:, w:] = 0
return right_t, (h, 2*h, w, 2*w)
else:
raise NotImplementedError
def remove_around_rectangle(markers, bh, bw):
'''
markers:binary image, bh, bw = 5, 5 ...
'''
up_t, up_rect = creat_T_rectangle(bh, bw, mode='x_up')
down_t, down_rect = creat_T_rectangle(bh, bw, mode='x_down')
one_nums = up_t.sum()
i = bh
j = bw
while i < markers.shape[0]-bh:
while j < markers.shape[1]-2*bw:
block = markers[i-bh:i+bh, j-bw:j+2*bw]
inner_up = block * up_t
inner_down = block * down_t
if inner_down.sum()//255 == inner_up.sum()//255 == one_nums:
markers[down_rect[0]+i-bh:down_rect[1]+i-bh, down_rect[2]+j-bw:down_rect[3]+j-bw] = 0
else:
if inner_up.sum()//255 == one_nums:
markers[up_rect[0]+i-bh:up_rect[1]+i-bh, up_rect[2]+j-bw:up_rect[3]+j-bw] = 0
if inner_down.sum()//255 == one_nums:
markers[down_rect[0]+i-bh:down_rect[1]+i-bh, down_rect[2]+j-bw:down_rect[3]+j-bw] = 0
j += bw
i += 1
j = bw
left_t, left_rect = creat_T_rectangle(bh, bw, mode='y_left')
one_nums = left_t.sum()
right_t, right_rect = creat_T_rectangle(bh, bw, mode='y_right')
i = bh
j = bw
while i < markers.shape[0] - 2*bh:
while j < markers.shape[1] - bw:
block = markers[i-bh:i+2*bh, j-bw:j+bw]
inner_left = block * left_t
inner_right = block * right_t
if inner_left.sum()//255 == one_nums == inner_right.sum()//255 :
markers[left_rect[0]+i-bh:left_rect[1]+i-bh, left_rect[2]+j-bw:left_rect[3]+j-bw] = 0
else:
if inner_right.sum()//255 == one_nums:
markers[right_rect[0]+i-bh:right_rect[1]+i-bh, right_rect[2]+j-bw:right_rect[3]+j-bw] = 0
if inner_left.sum()//255 == one_nums :
markers[left_rect[0]+i-bh:left_rect[1]+i-bh, left_rect[2]+j-bw:left_rect[3]+j-bw] = 0
j += bw
i += 1
j = bw
return markers
the above is my code, but it is so slow.
I have a little project with OpenCV (python) where one of my steps is to take an x-ray image from the human body and convert it to a binary image where white pixels represent where some bone is present and black means there is no bone there.
Since sometimes "bone parts" can be darker than "non-bone parts" from another region, simple thresholding won't work. I also tried adaptive threshold and I couldn't see much difference.
I came up with a simple algorithm that applies a simple threshold for each row.
Here is the code:
def threshhold(image, val):
image = image.copy()
for row_idx in range(image.shape[0]):
max_row = image[row_idx].max()
min_row = image[row_idx].min()
tresh = np.median(image[row_idx]) + (val * (max_row - min_row))
# Or use np.mean instead of np.median
_, tresh = cv2.threshold(image[row_idx], tresh, 255, cv2.THRESH_BINARY)
image[row_idx] = tresh.ravel()
return image
And here is the code that does the same work but column-by-column instead of row-by-row:
def threshhold2(image, val):
image = image.copy()
for row_idx in range(image.shape[1]):
max_row = image[:, row_idx].max()
min_row = image[:, row_idx].min()
tresh = np.median(image[:, row_idx]) + (val * (max_row - min_row))
# Or use np.mean instead of np.median
_, tresh = cv2.threshold(image[:, row_idx], tresh, 255, cv2.THRESH_BINARY)
image[:, row_idx] = tresh.ravel()
return image
This method works pretty well with images like this:
Not quite well for this one but it is not that bad:
Very terrible:
Only the left half looks good
...
As you can see; this algorithm works well only for some images.
I will be glad to see more experienced people's ideas.
Images are not for me by the way.
Entire source code:
import os
import cv2
import numpy as np
files_to_see = os.listdir("data_set")
files_to_see.sort()
current_file = 0
print(files_to_see)
def slice(image, size):
out = []
x_count = image.shape[1] // size
y_count = image.shape[0] // size
for y_idx in range(y_count):
for x_idx in range(x_count):
out.append(
(
(y_idx, x_idx),
image[y_idx * size: (y_idx + 1) * size,
x_idx * size: (x_idx + 1) * size]
)
)
return y_count, x_count, out
def normalize(image):
image = image.copy()
min_pix = image.min()
max_pix = image.max()
for y in range(image.shape[0]):
for x in range(image.shape[1]):
val = image[y, x]
val -= min_pix
val *= 255 / (max_pix - min_pix)
image[y, x] = round(val)
# image -= min_pix
# image *= round(255 / (max_pix - min_pix))
return image
def threshhold(image, val, method):
image = image.copy()
for row_idx in range(image.shape[0]):
max_row = image[row_idx].max()
min_row = image[row_idx].min()
# tresh = np.median(image[row_idx]) + (val * (max_row - min_row))
tresh = method(image[row_idx]) + (val * (max_row - min_row))
_, tresh = cv2.threshold(image[row_idx], tresh, 255, cv2.THRESH_BINARY)
image[row_idx] = tresh.ravel()
return image
def threshhold2(image, val, method):
image = image.copy()
for row_idx in range(image.shape[1]):
max_row = image[:, row_idx].max()
min_row = image[:, row_idx].min()
tresh = method(image[:, row_idx]) + (val * (max_row - min_row))
_, tresh = cv2.threshold(image[:, row_idx], tresh, 255, cv2.THRESH_BINARY)
image[:, row_idx] = tresh.ravel()
return image
def recalculate_threshhold(v):
global original_current_image, thresh_current_image, y_c, x_c, slices
method = np.mean
if cv2.getTrackbarPos("method", "xb labeler") == 0:
method = np.median
thresh_current_image = threshhold2(original_current_image, cv2.getTrackbarPos("threshhold_value", "xb labeler") / 1000, method)
y_c, x_c, slices = slice(thresh_current_image, 128)
def thresh_current_image_mouse_event(event, x, y, flags, param):
if event == 1:
print(x // 128, y // 128)
cv2.imshow("slice", slices[(x // 128) + (y // 128) * x_c][1])
cv2.namedWindow("xb labeler")
cv2.createTrackbar("threshhold_value", "xb labeler", 0, 1000, recalculate_threshhold)
cv2.createTrackbar("method", "xb labeler", 0, 1, recalculate_threshhold)
cv2.namedWindow("thresh_current_image")
cv2.setMouseCallback("thresh_current_image", thresh_current_image_mouse_event)
def init():
global original_current_image, thresh_current_image, x_c, y_c, slices, files_to_see, current_file
original_current_image = cv2.imread("data_set/" + files_to_see[current_file], cv2.CV_8UC1)
original_current_image = cv2.resize(original_current_image, (512, 512))
original_current_image = normalize(original_current_image)
original_current_image = cv2.GaussianBlur(original_current_image, (5, 5), 10)
recalculate_threshhold(1)
y_c, x_c, slices = slice(thresh_current_image, 128)
init()
while True:
cv2.imshow("thresh_current_image", thresh_current_image)
cv2.imshow("xb labeler", original_current_image)
k = cv2.waitKey(1)
if k == ord('p'):
cv2.imwrite("ssq.png", thresh_current_image)
current_file += 1
init()
cv2.destroyAllWindows()
EDIT: Added original images:
I want to detect crop rows using aerial images(CRBD). I have done the necessary image processing like converting to grayscale, edge detection, skeletonization, Hough Transform(to identify and draw the lines), and I also set the accumulator angle to math.pi*4.0/180, which I varied time after time.
The algorithm works well at detection approximately 4 crop lines, I want to improve it so that it can detect variable number of crop rows, and it should be able to highlight this crop rows
Here is a link to the sample code I modified Here
import os
import os.path
import time
import cv2
import numpy as np
import math
### Setup ###
image_data_path = os.path.abspath('../8470p/CRBD/Images')
gt_data_path = os.path.abspath('../8470p/GT data')
image_out_path = os.path.abspath('../8470p/algorithm_1')
use_camera = False # whether or not to use the test images or camera
images_to_save = [2, 3, 4, 5] # which test images to save
timing = False # whether to time the test images
curr_image = 0 # global counter
HOUGH_RHO = 2 # Distance resolution of the accumulator in pixels
HOUGH_ANGLE = math.pi*4.0/18 # Angle resolution of the accumulator in radians
HOUGH_THRESH_MAX = 80 # Accumulator threshold parameter. Only those lines are
returned that get votes
HOUGH_THRESH_MIN = 10
HOUGH_THRESH_INCR = 1
NUMBER_OF_ROWS = 10 # how many crop rows to detect
THETA_SIM_THRESH = math.pi*(6.0/180) # How similar two rows can be
RHO_SIM_THRESH = 8 # How similar two rows can be
ANGLE_THRESH = math.pi*(30.0/180) # How steep angles the crop rows can be in
radians
def grayscale_transform(image_in):
'''Converts RGB to Grayscale and enhances green values'''
b, g, r = cv2.split(image_in)
return 2*g - r - b
def save_image(image_name, image_data):
'''Saves image if user requests before runtime'''
if curr_image in images_to_save:
image_name_new = os.path.join(image_out_path, "
{0}_{1}.jpg".format(image_name,
str(curr_image) ))
def skeletonize(image_in):
'''Inputs and grayscale image and outputs a binary skeleton image'''
size = np.size(image_in)
skel = np.zeros(image_in.shape, np.uint8)
ret, image_edit = cv2.threshold(image_in, 0, 255, cv2.THRESH_BINARY |
cv2.THRESH_OTSU)
element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,3))
done = False
while not done:
eroded = cv2.erode(image_edit, element)
temp = cv2.dilate(eroded, element)
temp = cv2.subtract(image_edit, temp)
skel = cv2.bitwise_or(skel, temp)
image_edit = eroded.copy()
zeros = size - cv2.countNonZero(image_edit)
if zeros == size:
done = True
return skel
def tuple_list_round(tuple_list, ndigits_1=0, ndigits_2=0):
'''Rounds each value in a list of tuples to the number of digits
specified
'''
new_list = []
for (value_1, value_2) in tuple_list:
new_list.append( (round(value_1, ndigits_1), round(value_2,
ndigits_2)) )
return new_list
def crop_point_hough(crop_points):
'''Iterates though Hough thresholds until optimal value found for
the desired number of crop rows. Also does filtering.
'''
height = len(crop_points)
width = len(crop_points[0])
hough_thresh = HOUGH_THRESH_MAX
rows_found = False
while hough_thresh > HOUGH_THRESH_MIN and not rows_found:
crop_line_data = cv2.HoughLines(crop_points, HOUGH_RHO, HOUGH_ANGLE,
hough_thresh)
crop_lines = np.zeros((height, width, 3), dtype=np.uint8)
crop_lines_hough = np.zeros((height, width, 3), dtype=np.uint8)
if crop_line_data is not None:
# get rid of duplicate lines. May become redundant if a similarity
threshold is done
crop_line_data_1 = tuple_list_round(crop_line_data[:,0,:],-1, 4)
crop_line_data_2 = []
x_offsets = []
crop_lines_hough = np.zeros((height, width, 3), dtype=np.uint8)
for (rho, theta) in crop_line_data_1:
a = math.cos(theta)
b = math.sin(theta)
x0 = a*rho
y0 = b*rho
point1 = (int(round(x0+1000*(-b))), int(round(y0+1000*(a))))
point2 = (int(round(x0-1000*(-b))), int(round(y0-1000*(a))))
cv2.line(crop_lines_hough, point1, point2, (0, 0, 255), 2)
for curr_index in range(len(crop_line_data_1)):
(rho, theta) = crop_line_data_1[curr_index]
is_faulty = False
if ((theta >= ANGLE_THRESH) and (theta <= math.pi-
ANGLE_THRESH)) or(theta <= 0.001):
is_faulty = True
else:
for (other_rho, other_theta) in
crop_line_data_1[curr_index+1:]:
if abs(theta - other_theta) < THETA_SIM_THRESH:
is_faulty = True
elif abs(rho - other_rho) < RHO_SIM_THRESH:
is_faulty = True
if not is_faulty:
crop_line_data_2.append( (rho, theta) )
for (rho, theta) in crop_line_data_2:
a = math.cos(theta)
b = math.sin(theta)
c = math.tan(theta)
x0 = a*rho
y0 = b*rho
point1 = (int(round(x0+1000*(-b))), int(round(y0+1000*(a))))
point2 = (int(round(x0-1000*(-b))), int(round(y0-1000*(a))))
cv2.line(crop_lines, point1, point2, (0, 0, 255), 2)
#cv2.circle(crop_lines, (np.clip(int(round(a*rho+c*
#(0.5*height))),0 ,239), 0), 4, (255,0,0), -1)
#cv2.circle(crop_lines, (np.clip(int(round(a*rho-c*
#(0.5*height))),0 ,239), height), 4, (255,0,0), -1)
cv2.circle(crop_lines, (np.clip(int(round(rho/a)),0 ,239), 0), 5,
(255,0,0), -1)
#cv2.circle(img,(447,63), 63, (0,0,255), -1)
x_offsets.append(np.clip(int(round(rho/a)),0 ,239))
cv2.line(crop_lines, point1, point2, (0, 0, 255), 2)
if len(crop_line_data_2) >= NUMBER_OF_ROWS:
rows_found = True
hough_thresh -= HOUGH_THRESH_INCR
if rows_found == False:
print(NUMBER_OF_ROWS, "rows_not_found")
x_offset = min (x_offsets)
width = max (x_offsets) - min (x_offsets)
return (crop_lines, crop_lines_hough, x_offset, width)
def crop_row_detect(image_in):
'''Inputs an image and outputs the lines'''
save_image('0_image_in', image_in)
### Grayscale Transform ###
image_edit = grayscale_transform(image_in)
save_image('1_image_gray', image_edit)
### Skeletonization ###
skeleton = skeletonize(image_edit)
save_image('2_image_skeleton', skeleton)
### Hough Transform ###
(crop_lines, crop_lines_hough, x_offset, width) =
crop_point_hough(skeleton)
save_image('3_image_hough',cv2.addWeighted(image_in, 1,
crop_lines_hough, 1, 0.0))
save_image('4_image_lines',cv2.addWeighted(image_in, 1,crop_lines,1,0.0))
return (crop_lines , x_offset, width)
def main():
if use_camera == False:
diff_times = []
for image_name in sorted(os.listdir(image_data_path)):
global curr_image
curr_image += 1
start_time = time.time()
image_path = os.path.join(image_data_path, image_name)
image_in = cv2.imread(image_path)
crop_lines = crop_row_detect(image_in)
if timing == False:
cv2.imshow(image_name, cv2.addWeighted(image_in, 1,
crop_lines, 1, 0.0))
print('Press any key to continue...')
cv2.waitKey()
cv2.destroyAllWindows()
### Timing ###
else:
diff_times.append(time.time() - start_time)
mean = 0
for diff_time in diff_times:
mean += diff_time
### Display Timing ###
print('max time = {0}'.format(max(diff_times)))
print('ave time = {0}'.format(1.0 * mean / len(diff_times)))
cv2.waitKey()
else: # use camera. Hasn't been tested on a farm.
capture = cv2.VideoCapture(0)
while cv2.waitKey(1) < 0:
_, image_in = capture.read()
(crop_lines, x_offset, width) = crop_row_detect(image_in)
cv2.imshow("Webcam", cv2.addWeighted(image_in, 1, crop_lines, 1,
0.0))
capture.release()
cv2.destroyAllWindows()
main()
Input Image
[![Input Image][1]][1]
Output Image
[![][4]][4]
Expected Output
[![Expected Output][5]][5]
I have tried thresholding with cv2.inRange() to find green lines, but am still not getting the desired out.
Also the algorithms seems to be only draw the crop_line_data_2 as shown in the Output Image, it doesn't draw the crop_line_data_1
def threshold_green(image_in):
hsv = cv2.cvtColor(image_in, cv2.COLOR_BGR2HSV)
## mask of green (36,25,25) ~ (86, 255,255)
# mask = cv2.inRange(hsv, (36, 25, 25), (86, 255,255))
mask = cv2.inRange(hsv, (36, 25, 25), (70, 255,255))
## slice the green
imask = mask>0
green = np.zeros_like(image_in, np.uint8)
green[imask] = image_in[imask]
return green
I want to draw a box around an image where there are black pixels.
this is my image:(quiz.JPG):
and i would like the image to be something like this:
but with the code below I get this(quiz2.JPG):
here is what i have tried:
from PIL import Image,ImageDraw
image = Image.open("quiz.JPG").convert('L') #open and convert to black and white
pixels = image.load()
width, height = image.size
black_pixels= []
for col in range(width):
for row in range(height):
pixel = pixels[col, row]
if pixel == (0): #select pixels that are black
black_pixels.append((col, row)) #add them to the list of black pixels
def sortCoordinates(lst): #this is just a bubble sort to get the coordinates in order of y values
length = len(lst) -1
solved = False
while not solved:
solved = True
for i in range(length):
if lst[i][1] > lst[i+1][1]:
solved = False
lst[i],lst[i+1] = lst[i+1],lst[i]
return lst
black_pixels = sortCoordinates(black_pixels)
def getShape(black_pixels): # function to generate box from the pixels given
firstPixelXValue = black_pixels[0][0]
lastPixelXValue = black_pixels[-1][0]
tallestPixel = 0
for pixel in black_pixels:
YValue = pixel[1]
if YValue > tallestPixel:
tallestPixel = YValue
topMostPixelYValue = tallestPixel
shortestPixel = pixel[1]
for pixel in black_pixels:
YValue = pixel[1]
if YValue < shortestPixel:
shortestPixel = YValue
bottomMostPixelYValue = shortestPixel
shape = [(firstPixelXValue-4, topMostPixelYValue+2), (lastPixelXValue+4,bottomMostPixelYValue-3)]
return shape
questions = {}
count = 0
count2 = 0
currentQuestionPixels = []
oldPixelY = black_pixels[0][1]
for newPixel in black_pixels:
newPixelY = newPixel[1]
if newPixelY > oldPixelY + 100 or len(black_pixels)-1 == count2: #run this if new question is found
questions[str(count)] = getShape(currentQuestionPixels)
currentQuestionPixels = []
count += 1
oldPixelY = newPixelY
currentQuestionPixels.append(newPixel)
count2 += 1
image = image.convert('RGB')
for question in questions: #draw shape around all questions found
shape = questions.get(question)
img1 = ImageDraw.Draw(image)
img1.rectangle(shape, outline ="red")
image.save("quiz2.JPG")
This should work, in theory, because I am getting all the black pixel coordinates and sorting them out into their Y values. Then I am looping through the black pixels and if the new pixel Y value is grater than the old pixel Y value + 100 or it is the last pixel it must be a new question.
So then I draw a box around all the questions
I've modified the getShape function
Check out the code below
from PIL import Image,ImageDraw
image = Image.open("quiz.JPG").convert('L') #open and convert to black and white
pixels = image.load()
width, height = image.size
black_pixels= []
for col in range(width):
for row in range(height):
pixel = pixels[col, row]
if pixel == (0): #select pixels that are black
black_pixels.append((col, row)) #add them to the list of black pixels
def sortCoordinates(lst): #this is just a bubble sort to get the coordinates in order of y values
length = len(lst) -1
solved = False
while not solved:
solved = True
for i in range(length):
if lst[i][1] > lst[i+1][1]:
solved = False
lst[i],lst[i+1] = lst[i+1],lst[i]
return lst
black_pixels = sortCoordinates(black_pixels)
def getShape(black_pixels): # function to generate box from the pixels given
first_x = 0
first_y = 0
last_x = 99999999
last_y = 99999999
for x, y in black_pixels:
if first_x < x :
first_x = x
if first_y < y :
first_y = y
if last_x > x :
last_x = x
if last_y > y :
last_y = y
shape = [(first_x-4, first_y+2), (last_x+4,last_y-3)]
return shape
questions = {}
count = 0
count2 = 0
currentQuestionPixels = []
oldPixelY = black_pixels[0][1]
for newPixel in black_pixels:
newPixelY = newPixel[1]
if newPixelY > oldPixelY + 100 or len(black_pixels)-1 == count2: #run this if new question is found
questions[str(count)] = getShape(currentQuestionPixels)
currentQuestionPixels = []
count += 1
oldPixelY = newPixelY
currentQuestionPixels.append(newPixel)
# print(currentQuestionPixels)
count2 += 1
image = image.convert('RGB')
for question in questions: #draw shape around all questions found
shape = questions.get(question)
print(shape)
img1 = ImageDraw.Draw(image)
img1.rectangle(shape, outline ="red")
image.save("quiz2.JPG")
I want to use the function cv2.connectedComponents to connect components on a binary image, like the following...
I have added the feature to cv2. connectedComponents to eliminate elements with a small number of pixels.
Unfortunately, the algorithm is extremely slow for large images due to the extension. Is there a way to rewrite the extension to speed up the algorithm?
import cv2
import numpy as np
def zerolistmaker(n):
listofzeros = [0] * n
return listofzeros
img = cv2.imread('files/motorway/gabor/eGaIy.jpg', 0)
img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)[1] # ensure binary
retval, labels = cv2.connectedComponents(img)
##################################################
# ENLARGEMENT
##################################################
sorted_labels = labels.ravel()
sorted_labels = np.sort(sorted_labels)
maxPixel = 50 # eliminate elements with less than maxPixel
# detect how often an element occurs
i=0
counter=0
counterlist = zerolistmaker(retval)
while i < len(sorted_labels):
if sorted_labels[i] == counter:
counterlist[counter] = counterlist[counter] + 1
else:
counter = counter + 1
i = i - 1
i = i + 1
# delete small pixel values
i=0
while i < len(counterlist):
if counterlist[i] < maxPixel:
counterlist[i] = 0
i = i + 1
i=0
counterlisthelper = []
while i < len(counterlist):
if counterlist[i] == 0:
counterlisthelper.append(i)
i = i + 1
i=0
j=0
k=0
while k < len(counterlisthelper):
while i < labels.shape[0]:
while j < labels.shape[1]:
if labels[i,j] == counterlisthelper[k]:
labels[i,j] = 0
else:
labels[i,j] = labels[i,j]
j = j + 1
j = 0
i = i + 1
i = 0
j = 0
k = k + 1
##################################################
##################################################
# Map component labels to hue val
label_hue = np.uint8(179*labels/np.max(labels))
blank_ch = 255*np.ones_like(label_hue)
labeled_img = cv2.merge([label_hue, blank_ch, blank_ch])
# cvt to BGR for display
labeled_img = cv2.cvtColor(labeled_img, cv2.COLOR_HSV2BGR)
# set bg label to black
labeled_img[label_hue==0] = 0
cv2.imshow('labeled.png', labeled_img)
cv2.waitKey()
In python, you should avoid deep loop. Prefer to use numpy other than python-loop.
Imporved:
##################################################
ts = time.time()
num = labels.max()
N = 50
## If the count of pixels less than a threshold, then set pixels to `0`.
for i in range(1, num+1):
pts = np.where(labels == i)
if len(pts[0]) < N:
labels[pts] = 0
print("Time passed: {:.3f} ms".format(1000*(time.time()-ts)))
# Time passed: 4.607 ms
##################################################
Result:
The whole code:
#!/usr/bin/python3
# 2018.01.17 22:36:20 CST
import cv2
import numpy as np
import time
img = cv2.imread('test.jpg', 0)
img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)[1] # ensure binary
retval, labels = cv2.connectedComponents(img)
##################################################
ts = time.time()
num = labels.max()
N = 50
for i in range(1, num+1):
pts = np.where(labels == i)
if len(pts[0]) < N:
labels[pts] = 0
print("Time passed: {:.3f} ms".format(1000*(time.time()-ts)))
# Time passed: 4.607 ms
##################################################
# Map component labels to hue val
label_hue = np.uint8(179*labels/np.max(labels))
blank_ch = 255*np.ones_like(label_hue)
labeled_img = cv2.merge([label_hue, blank_ch, blank_ch])
# cvt to BGR for display
labeled_img = cv2.cvtColor(labeled_img, cv2.COLOR_HSV2BGR)
# set bg label to black
labeled_img[label_hue==0] = 0
cv2.imshow('labeled.png', labeled_img)
cv2.imwrite("labeled.png", labeled_img)
cv2.waitKey()