Python - Detect a QR code from an image and crop using OpenCV - python

I'm working on a project using Python(3.7) and OpenCV in which I have an Image(captured using the camera) of a document with a QR code placed on it.
This QR code has 6 variables respectively as:
Size of QR code image
Top
Right
Bottom
Left
Unit
Latest Update:
Here are the steps I need to perform in the same order:
Detect the qr code & decode it to read size values
So, if the size of QR-code(image) is not equal to the size which is mentioned inside it then scale the image to equal both size values.
Then crop the image towards all sides from QR code image according to the values mentioned inside qr code.
I have tried this code:
def decodeAndCrop(inputImage):
print(str(inputImage))
image = cv2.imread(str(inputImage))
qrCodeDetector = cv2.QRCodeDetector()
decodedText, points, _ = qrCodeDetector.detectAndDecode(image)
qr_data = decodedText.split(",")
print("qr data from fucntion: {}".format(qr_data))
if points is not None:
pts = len(points)
# print(pts)
for i in range(pts):
nextPointIndex = (i + 1) % pts
if str(inputImage) == "scaled_img.jpg":
cv2.line(
image,
tuple(points[i][0]),
tuple(points[nextPointIndex][0]),
(255, 0, 0),
5,
)
print(points[i][0])
width = int(
math.sqrt(
(points[0][0][0] - points[1][0][0]) ** 2
+ (points[0][0][1] - points[1][0][1]) ** 2
)
)
height = int(
math.sqrt(
(points[1][0][0] - points[2][0][0]) ** 2
+ (points[1][0][1] - points[2][0][1]) ** 2
)
)
print("height and width after scaling: {} {}".format(height, width))
if not str(inputImage) == "scaled_img.jpg":
scaled_img = None
if width == qr_data[0] and height == qr_data[0]:
print("Sizes are equal")
# Add the extension values to points and crop
y = int(points[0][0][1]) - int(qr_data[1])
x = int(points[0][0][0]) - int(qr_data[4])
roi = image[
y : y + height + int(qr_data[3]), x : x + width + int(qr_data[2])
]
scaled_img = cv2.imwrite("scaled_img.jpg", roi)
return scaled_img
else:
print(
"Width and height "
+ str(width)
+ "x"
+ str(height)
+ " not equal to "
+ str(qr_data[0])
+ "x"
+ str(qr_data[0])
)
if height > int(qr_data[0]):
scale_width = int(width) - int(qr_data[0])
scale_height = int(height) - int(qr_data[0])
print(f"scaled width: {scale_width} scaled height: {scale_height}")
dimension = (scale_width, scale_height)
scaled_img = cv2.resize(
image, dimension, interpolation=cv2.INTER_AREA
)
print("new img dims: {}".format(scaled_img.shape))
cv2.imshow("scaled image:", scaled_img)
cv2.imwrite("scaled_img.jpg", scaled_img)
elif height < int(qr_data[0]):
scale_width = int(qr_data[0]) - width
scale_height = int(qr_data[0] - height)
print(f"scaled width: {scale_width} scaled height: {scale_height}")
dimension = (scale_width, scale_height)
scaled_img = cv2.resize(
image, dimension, interpolation=cv2.INTER_AREA
)
print("new img dims: {}".format(scaled_img.shape))
cv2.imshow("scaled image:", scaled_img)
cv2.imwrite("scaled_img.jpg", scaled_img)
cv2.imshow("final output:", roi)
return scaled_img
else:
y = int(points[0][0][1]) - int(qr_data[1])
x = int(points[0][0][0]) - int(qr_data[4])
print(" x and y")
print(x)
print(y)
roi = image[
y : y + height + int(qr_data[3]), x : x + width + int(qr_data[2])
]
final_img = cv2.imwrite("finalized_image.jpg", roi)
cv2.imshow("finalized image:", final_img)
return final_img
if __name__ == "__main__":
image_to_crop = decodeAndCrop("example_input_1.jpg")
final_image = decodeAndCrop("scaled_img.jpg")
cv2.imshow("Cropped:", image_to_crop)
# cv2.imshow("Final: ", final_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
The code above gives an error as:
final_img = cv2.imwrite("finalized_image.jpg", roi)
cv2.error: OpenCV(4.2.0) /Users/travis/build/skvark/opencv-python/opencv/modules/imgcodecs/src/loadsave.cpp:715: error: (-215:Assertion failed) !_img.empty() in function 'imwrite'
End of Latest Update:
An example decoded information of a QR code is as: 100, 20, 40, 60, 20, px
Now, I need to detect the QR code from this document image and in the first step I need to compare the size of QR code in captured image of document with the size which is mentioned in the decoded information for example if in the captured image the size of the QR image is 90X90px and the size from decoded info is 100X100px we need to compare that.
Then, in the second step I have to crop the complete image by using the Top, Right, Bottom & Left variables accordingly. According to the above example we need to crop the image from the position of detected QR code to 20px Top, 40px Right, 60px Bottom and 20px Right. I have added an example Image below.
I have done to decode the QR code information but how can I take the detected QR code area as a seprate image and compare it's size with the mentioned size and then crop the Image accordingly?
Here's what I have tried so far:
import cv2
image = cv2.imread('/Users/abdul/PycharmProjects/QScanner/images/second.jpg')
qrCodeDetector = cv2.QRCodeDetector()
decodedText, points, _ = qrCodeDetector.detectAndDecode(image)
qr_data = decodedText.split(',')
qr_size = qr_data[0]
top = qr_data[1]
right = qr_data[2]
bottom = qr_data[3]
left = qr_data[4]
print(f'Size: {qr_size}' + str(qr_data[5]))
print(f'Top: {top}')
print(f'Right: {right}')
print(f'Bottom: {bottom}')
print(f'Left: {left}')
if points is not None:
pts = len(points)
print(pts)
for i in range(pts):
nextPointIndex = (i+1) % pts
cv2.line(image, tuple(points[i][0]), tuple(points[nextPointIndex][0]), (255,0,0), 5)
print(points[i][0])
print(decodedText)
cv2.imshow("Image", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
else:
print("QR code not detected")
Here's an example Image:
and here's a sample of input image:

Here's a simple approach using thresholding, morphological operations, and contour filtering.
Obtain binary image. Load image, grayscale, Gaussian blur, Otsu's threshold
Connect individual QR contours. Create a rectangular structuring kernel with cv2.getStructuringElement() then perform morphological operations with cv2.MORPH_CLOSE.
Filter for QR code. Find contours
and filter using contour approximation, contour area, and aspect ratio.
Detected QR code
Extracted QR code
From here you can compare the QR code with your reference information
Code
import cv2
import numpy as np
# Load imgae, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph close
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
# Find contours and filter for QR code
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.04 * peri, True)
x,y,w,h = cv2.boundingRect(approx)
area = cv2.contourArea(c)
ar = w / float(h)
if len(approx) == 4 and area > 1000 and (ar > .85 and ar < 1.3):
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 3)
ROI = original[y:y+h, x:x+w]
cv2.imwrite('ROI.png', ROI)
cv2.imshow('thresh', thresh)
cv2.imshow('close', close)
cv2.imshow('image', image)
cv2.imshow('ROI', ROI)
cv2.waitKey()

I got the width and height data using points and compare it with the qr_data size. Then cropped the QR according to needed.
import cv2
import math
image = cv2.imread('/ur/image/directory/qr.jpg')
qrCodeDetector = cv2.QRCodeDetector()
decodedText, points, _ = qrCodeDetector.detectAndDecode(image)
qr_data = decodedText.split(',')
qr_size = qr_data[0]
top = qr_data[1]
right = qr_data[2]
bottom = qr_data[3]
left = qr_data[4]
if points is not None:
pts = len(points)
print(pts)
for i in range(pts):
nextPointIndex = (i+1) % pts
cv2.line(image, tuple(points[i][0]), tuple(points[nextPointIndex][0]), (255,0,0), 5)
print(points[i][0])
width = int(math.sqrt((points[0][0][0]-points[1][0][0])**2 + (points[0][0][1]-points[1][0][1])**2))
height = int(math.sqrt((points[1][0][0]-points[2][0][0])**2 + (points[1][0][1]-points[2][0][1])**2))
# Compare the size
if(width==qr_data[0] and height==qr_data[0]):
print("Sizes are equal")
else:
print("Width and height " + str(width) + "x" + str(height) + " not equal to "
+ str(qr_data[0]) + "x" + str(qr_data[0]))
# Add the extension values to points and crop
y = int(points[0][0][1]) - int(qr_data[1])
x = int(points[0][0][0]) - int(qr_data[4])
roi = image[y:y+height + int(qr_data[3]), x:x+width + int(qr_data[2])]
print(decodedText)
cv2.imshow("Image", image)
cv2.imshow("Crop", roi)
cv2.waitKey(0)
cv2.destroyAllWindows()
else:
print("QR code not detected")
Result:

So, you mainly have 3 problems here.
If the image is rotated with an angle \theta,
If the sheet is one a plane. (i.e., in the images, the upper line doesn't seem to be linear. But it should not be a big deal.)
The black borders. Will you always have those or may it be a different background? This is important because without cropping out those, you won't be able to get a reasonable result.
I improved your code a little bit and removed the border pixels:
import cv2
import matplotlib.pyplot as plt
import math
import numpy as np
image = cv2.imread('/Users/samettaspinar/Public/im.jpg')
qrCodeDetector = cv2.QRCodeDetector()
decodedText, points, _ = qrCodeDetector.detectAndDecode(image)
qr_data = decodedText.split(',')
qr_size = int(qr_data[0])
top = int(qr_data[1])
right = int(qr_data[2])
bottom = int(qr_data[3])
left = int(qr_data[4])
print(f'Size: {qr_size}' + str(qr_data[5]))
print(f'Top: {top}')
print(f'Right: {right}')
print(f'Bottom: {bottom}')
print(f'Left: {left}')
plt.imshow(image)
plt.show()
dists = [] #This is for estimating distances between corner points.
#I will average them to find ratio of pixels in image vs qr_size
#in the optimal case, all dists should be equal
if points is not None:
pts = len(points)
for i in range(pts):
p1 = points[i][0]
p2 = points[(i+1) % pts][0]
dists.append(math.sqrt((p1[0]-p2[0])**2 + (p1[1]-p2[1])**2))
print('line', tuple(p1), tuple(p2))
image = cv2.line(image, tuple(p1), tuple(p2), (255,0,0), 5)
else:
print("QR code not detected")
print('distances: ', dists)
# Remove the black border pixels. I had a simple idea for this
# Get the average intensity of the gray image
# If count the row average of the first half that are less than intensity/2.
# It approx gives number of black borders on the left. etc.
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
inten = np.mean(gray)
x = np.mean(gray, axis=0) # finds the vertical average
y = np.mean(gray, axis=1) # finds horizontal average
bl_left = np.sum([x[:int(col/2)] < inten/2])
bl_right = np.sum([x[int(col/2)+1:] < inten/2])
bl_top = np.sum([y[:int(row/2)] < inten/2])
bl_bottom = np.sum([y[int(row/2)+1:] < inten/2])
print('black margins: ', bl_left, bl_right, bl_top, bl_bottom)
# Estimate how many pixel you will crop out
ratio = np.mean(dists)/ int(qr_size)
print('actual px / qr_size in px: ', ratio)
row,col,dim = image.shape
top, left, right, bottom = int(top*ratio), int(left*ratio), int(right*ratio), int(bottom*ratio)
top += bl_top
left += bl_left
right += bl_right
bottom += bl_bottom
print('num pixels to be cropped: ', top, left, right, bottom)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image2 = image[top:row-bottom, left:col-right, :]
plt.imshow(image2)
plt.show()
Notice that I ignored the rotation issue. If there is rotation, you can find the angle by calculating the tangents/arctan where I calculated the distances.

For QR detection and parsing
import cv2
import sys
filename = sys.argv[1]
# read the QRCODE image
#in case if QR code is not black/white it is better to convert it into grayscale
img = cv2.imread(filename, 0)# Zero means grayscale
img_origin = cv2.imread(filename)
# initialize the cv2 QRCode detector
detector = cv2.QRCodeDetector()
# detect and decode
data, bbox, straight_qrcode = detector.detectAndDecode(img)
# if there is a QR code
if bbox is not None:
print(f"QRCode data:\n{data}")
# display the image with lines
# length of bounding box
n_lines = len(bbox[0])#Cause bbox = [[[float, float]]], we need to convert fload into int and loop over the first element of array
bbox1 = bbox.astype(int) #Float to Int conversion
for i in range(n_lines):
# draw all lines
point1 = tuple(bbox1[0, [i][0]])
point2 = tuple(bbox1[0, [(i+1) % n_lines][0]])
cv2.line(img_origin, point1, point2, color=(255, 0, 0), thickness=2)
# display the result
cv2.imshow("img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
else:
print("QR code not detected")

Related

OpenCV Python remove object/pattern from images

I have been facing this problem from some days:
i need to remove this image/pattern from images like this or this using OpenCV.
I know that the problem is a Template Matching problem and I have to use filters (like canny) and and "slide" the template over the image, once this has been transformed by the filters.
I tried some solutions like this or this, but i had poor results, for example applying the second method I obtain this images
1
2
this is my code
import cv2
import numpy as np
# Resizes a image and maintains aspect ratio
def maintain_aspect_ratio_resize(image, width=None, height=None, inter=cv2.INTER_AREA):
# Grab the image size and initialize dimensions
dim = None
(h, w) = image.shape[:2]
# Return original image if no need to resize
if width is None and height is None:
return image
# We are resizing height if width is none
if width is None:
# Calculate the ratio of the height and construct the dimensions
r = height / float(h)
dim = (int(w * r), height)
# We are resizing width if height is none
else:
# Calculate the ratio of the 0idth and construct the dimensions
r = width / float(w)
dim = (width, int(h * r))
# Return the resized image
return cv2.resize(image, dim, interpolation=inter)
# Load template, convert to grayscale, perform canny edge detection
template = cv2.imread('C:\\Users\Quirino\Desktop\Reti\Bounding_box\Checkboard.jpg')
template = cv2.resize(template, (640,480))
template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
template = cv2.Canny(template, 50, 200)
(tH, tW) = template.shape[:2]
# cv2.imshow("template", template)
# Load original image, convert to grayscale
original_image = cv2.imread('F:\\ARCHAIDE\Appearance\Data_Archaide_Complete\MTL_G6\MTL_G6_MMO090.jpg')
# original_image = cv2.resize(original_image, (640,480))
final = original_image.copy()
gray = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
found = None
# Dynamically rescale image for better template matching
for scale in np.linspace(0.2, 1.0, 20)[::-1]:
# Resize image to scale and keep track of ratio
resized = maintain_aspect_ratio_resize(gray, width=int(gray.shape[1] * scale))
r = gray.shape[1] / float(resized.shape[1])
# Stop if template image size is larger than resized image
if resized.shape[0] < tH or resized.shape[1] < tW:
break
# Detect edges in resized image and apply template matching
canny = cv2.Canny(resized, 50, 200)
detected = cv2.matchTemplate(canny, template, cv2.TM_CCOEFF)
(_, max_val, _, max_loc) = cv2.minMaxLoc(detected)
# Uncomment this section for visualization
'''
clone = np.dstack([canny, canny, canny])
cv2.rectangle(clone, (max_loc[0], max_loc[1]), (max_loc[0] + tW, max_loc[1] + tH), (0,255,0), 2)
cv2.imshow('visualize', clone)
cv2.waitKey(0)
'''
# Keep track of correlation value
# Higher correlation means better match
if found is None or max_val > found[0]:
found = (max_val, max_loc, r)
# Compute coordinates of bounding box
(_, max_loc, r) = found
(start_x, start_y) = (int(max_loc[0] * r), int(max_loc[1] * r))
(end_x, end_y) = (int((max_loc[0] + tW) * r), int((max_loc[1] + tH) * r))
original_image = cv2.resize(original_image, (640,480))
# Draw bounding box on ROI to remove
cv2.rectangle(original_image, (start_x, start_y), (end_x, end_y), (0,255,0), 2)
cv2.imshow('detected', original_image)
# Erase unwanted ROI (Fill ROI with white)
cv2.rectangle(final, (start_x, start_y), (end_x, end_y), (255,255,255), -1)
final = cv2.resize(final, (640,480))
cv2.imshow('final', final)
cv2.waitKey(0)
what could i try?
**20230207 EDIT
I tried the method below and it works great in the 80% of the images, but in some cases it doesn't recognize well the chess box and masks something else, for example you can see this or this and in other cases the chess box is recognized and covered only partially, like this
Here is one way to approach that in Python/OpenCV
Read the input
Threshold on the outer white region of the checkerboard pattern using cv2.inRange()
Get the external contours and keep the largest
Get the bounding box of the largest contour
Get the color just outside the 4 corners of the bounding box and get its average
Replace the bounding box region in a copy of the input with the average color
Save the results
Input:
import cv2
import numpy as np
# read the input
img = cv2.imread('checks_object.jpg')
# threshold on outer white area of checkerboard pattern
lower = (210,210,210)
upper = (255,255,255)
thresh = cv2.inRange(img, lower, upper)
# get external contours and keep largest
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
big_contour = max(contours, key=cv2.contourArea)
# get bounding box of big contour
x,y,w,h = cv2.boundingRect(big_contour)
# get the average color of the 4 pixels just outside of the bounding box corners
[[color1]] = img[y-1:y, x-1:x]
[[color2]] = img[y-1:y, x+w:x+w+1]
[[color3]] = img[y+h:y+h+1, x+w:x+w+1]
[[color4]] = img[y+h:y+h+1, x-1:x]
ave_color = (color1.astype(np.float32) + color2.astype(np.float32) + color3.astype(np.float32) + color4.astype(np.float32)) / 4
ave_color = ave_color.astype(np.uint8)
print(ave_color)
# fill color inside contour bounding box
result = img.copy()
result[y:y+h, x:x+w] = ave_color
# save results
cv2.imwrite('checks_object_color_filled.jpg', result)
# show results
cv2.imshow('thresh', thresh)
cv2.imshow('checks_color_filled', result)
cv2.waitKey(0)
Results:

Correct the object orientation in the image. Calculate the correct angle of rotation and correct the alignment of the object in the image

I have cropped images of electronic meter reading. Those readings are taken in random style. I need the orientation of the object(not the image) in the image to be aligned.
The detection of contours is not working. As there are lots of contours are formed in the image and in order to calculate the angle I need to select the right contour. Some times contour is not formed.
2.I want set of rotated images as shown in figure above. I tried some code of rotating image from the OpenCV. But due to two type of use case ( as we don't know from code that the reading style may be any of the two) The images are turned out as below.
Using the code below I am able to find the angle of rotation but for any one case. I need it to be done automatically for both type of cases. Also see the data set I have attached for other type of examples.
import cv2
import numpy as np
debug = True
# Display image
def display(img, frameName="OpenCV Image"):
if not debug:
return
h, w = img.shape[0:2]
neww = 800
newh = int(neww*(h/w))
img = cv2.resize(img, (neww, newh))
plt.imshow(img)
plt.show()
# cv2.imshow(frameName, img)
# cv2.waitKey(0)
#rotate the image with given theta value
def rotate(img, theta):
rows, cols = img.shape[0], img.shape[1]
image_center = (cols/2, rows/2)
M = cv2.getRotationMatrix2D(image_center,theta,1)
abs_cos = abs(M[0,0])
abs_sin = abs(M[0,1])
bound_w = int(rows * abs_sin + cols * abs_cos)
bound_h = int(rows * abs_cos + cols * abs_sin)
M[0, 2] += bound_w/2 - image_center[0]
M[1, 2] += bound_h/2 - image_center[1]
# rotate orignal image to show transformation
rotated = cv2.warpAffine(img,M,(bound_w,bound_h),borderValue=(255,255,255))
return rotated
def slope(x1, y1, x2, y2):
if x1 == x2:
return 0
slope = (y2-y1)/(x2-x1)
theta = np.rad2deg(np.arctan(slope))
return theta
def main(filePath):
img = cv2.imread(filePath)
(hi, wi) = img.shape[:2]
textImg = img.copy()
small = cv2.cvtColor(textImg, cv2.COLOR_BGR2GRAY)
# find the gradient map
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)
display(grad)
# Binarize the gradient image
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
display(bw)
# connect horizontally oriented regions
# kernal value (9,1) can be changed to improved the text detection
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
display(connected)
# using RETR_EXTERNAL instead of RETR_CCOMP
# _ , contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) #opencv >= 4.0
mask = np.zeros(bw.shape, dtype=np.uint8)
display(mask)
# cumulative theta value
cummTheta = 0
# number of detected text regions
ct = 0
flag=False
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y+h, x:x+w] = 0
# fill the contour
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
display(mask)
# ratio of non-zero pixels in the filled region
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
# assume at least 45% of the area is filled if it contains text
# if r > 0.39 and w > 8 and h > 8:
if (h/hi)>0.4 and (w/wi)>0.4:
flag=True
print(r,w,h)
# cv2.rectangle(textImg, (x1, y), (x+w-1, y+h-1), (0, 255, 0), 2)
rect = cv2.minAreaRect(contours[idx])
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(textImg,[box],0,(0,0,255),2)
center = (int(rect[0][0]),int(rect[0][1]))
width = int(rect[1][0])
height = int(rect[1][1])
angle = int(rect[2])
print(angle)
print(width,height)
if width < height:
angle = 90+angle
print(angle,'final')
# we can filter theta as outlier based on other theta values
# this will help in excluding the rare text region with different orientation from ususla value
theta = slope(box[0][0], box[0][1], box[1][0], box[1][1])
cummTheta += theta
ct +=1
# print("Theta", theta)
# find the average of all cumulative theta value
# orientation = cummTheta/ct
print("Image orientation in degress: ", angle)
finalImage = rotate(img, angle)
display(textImg, "Detectd Text minimum bounding box")
display(finalImage)
out_path='cropped_corrected/rotated/'+filePath.split('\\')[-1]
print(out_path)
cv2.imwrite(out_path,finalImage)
print('image svaed here in rotated')
break
if not flag:
out_path='cropped_corrected/not_rotated/'+filePath.split('\\')[-1]
print(out_path)
cv2.imwrite(out_path,img)
print('image svaed here without rotated')
if __name__ == "__main__":
filePath = 'cropped/N3963001963.jpg'
main(filePath)
I am attaching some sample images that need to be rotated and the object inside the image needs to be aligned:

How can I find the center the hole with hough circles

For this image, I tried to use hough cirlce to find the center of the "black hole".
After playing with the parameters of cv2.HoughCircles for a long time, the following is the best I can get.
raw image:
# reproducible code for stackoverflow
import cv2
import os
import sys
from matplotlib import pyplot as plt
import numpy as np
# read image can turn it gray
img = cv2.imread(FILE)
cimg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_gray = dst = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
plt.figure(figsize = (18,18))
plt.imshow(cimg, cmap = "gray")
# removing noises
element = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
closing = cv2.morphologyEx(y, cv2.MORPH_CLOSE, element, iterations = 7)
plt.figure(figsize = (12,12))
plt.imshow(closing, cmap = "gray")
# try to find the circles
circles = cv2.HoughCircles(closing,cv2.HOUGH_GRADIENT,3,50,
param1=50,param2=30,minRadius=20,maxRadius=50)
circles = np.uint16(np.around(circles))
for i in circles[0,:]:
# draw the outer circle
cv2.circle(cimg,(i[0],i[1]),i[2],(0,255,0),2)
# draw the center of the circle
cv2.circle(cimg,(i[0],i[1]),2,(0,0,255),3)
plt.figure(figsize = (12,12))
plt.imshow(cimg)
Update::
The one with Canny:
edges = cv2.Canny(closing, 100, 300)
plt.figure(figsize = (12,12))
plt.imshow(edges, cmap = "gray")
circles = cv2.HoughCircles(edges,cv2.HOUGH_GRADIENT,2,50,
param1=50,param2=30,minRadius=20,maxRadius=60)
circles = np.uint16(np.around(circles))
cimg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
for i in circles[0,:]:
# draw the outer circle
cv2.circle(cimg,(i[0],i[1]),i[2],(0,255,0),2)
# draw the center of the circle
cv2.circle(cimg,(i[0],i[1]),2,(0,0,255),3)
plt.figure(figsize = (12,12))
plt.imshow(cimg)
Still not the right circle that is wanted.
Update:
#crackanddie
Sometimes there is 6 or 9 in the identity number.
The circle in 6 or 9 is not very round.
Is there any way to filter that out?
This is an alternative method if you do not want to implement or fiddle with Hough's parameters. You must be sure there's at least one circle visible in your picture. The idea is to create a segmentation mask based on the CMYK color space and filter the blobs of interest by circularity and area. These are the steps:
Convert the image from BGR to CMYK
Threshold the K channel to get a binary mask
Filter blobs by circularity and area
Approximate the filtered blobs as circles
I'm choosing the CMYK color space because the circle is mostly black. The K (key) channel (in this case - black) should do a good job of representing the blob of interest, albeit, with some noise - as usual. Let's see the code:
# Imports:
import cv2
import numpy as np
# image path
path = "D://opencvImages//"
fileName = "dyj3O.jpg"
# load image
bgr = cv2.imread(path + fileName)
Alright, we need to convert the image from BGR to CMYK. OpenCV does not offer the conversion, so we need to do it manually. The formula is very straightforward. I'm just interested on the K channel, so I just calculate it like this:
# Make float and divide by 255:
bgrFloat = bgr.astype(np.float) / 255.
# Calculate K as (1 - whatever is biggest out of bgrFloat)
kChannel = 1 - np.max(bgrFloat, axis=2)
# Convert back to uint 8:
kChannel = 255 * kChannel
kChannel = kChannel.astype(np.uint8)
Gotta keep en eye on the data types, because there are float operations going on. This is the result:
As you see, the hole is almost 100% white, that's cool, we can threshold this image via Otsu like this:
# Compute binary mask of the hole via Otsu:
_, binaryImage = cv2.threshold(kChannel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
Which gives you this nice binary mask:
Now, here comes the laborious part. Let's find contours on this image. For every contour/blob compute circularity and area. Use this info to filter noise and get the contour of interest, keep in mind that a perfect circle should have circularity close to 1.0. Once you get a contour of interest, approximate a circle to it. This is the process:
# Find the big contours/blobs on the filtered image:
contours, hierarchy = cv2.findContours(binaryImage, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
# Store the detected circles here:
detectedCircles = []
# Look for the potential contours of interest:
for _, c in enumerate(contours):
# Get the blob's area and perimeter:
contourArea = cv2.contourArea(c)
contourPerimeter = cv2.arcLength(c, True)
# Compute circularity:
if contourPerimeter > 0:
circularity = (4 * 3.1416 * contourArea) / (pow(contourPerimeter, 2))
else:
circularity = 0.0
# Set the min threshold values to identify the
# blob of interest:
minCircularity = 0.7
minArea = 2000
if circularity >= minCircularity and contourArea >= minArea:
# Approximate the contour to a circle:
(x, y), radius = cv2.minEnclosingCircle(c)
# Compute the center and radius:
center = (int(x), int(y))
# Cast radius to in:
radius = int(radius)
# Store the center and radius:
detectedCircles.append([center, radius])
# Draw the circles:
cv2.circle(bgr, center, radius, (0, 255, 0), 2)
cv2.imshow("Detected Circles", bgr)
print("Circles Found: " + str(len(detectedCircles)))
Additionally, I have stored the circle (center and radius) in the detectedCircles list. This is the final result:
Circles Found: 1
Here it is:
import numpy as np
import cv2
def threshold_gray_const(image_, rang: tuple):
return cv2.inRange(image_, rang[0], rang[1])
def binary_or(image_1, image_2):
return cv2.bitwise_or(image_1, image_2)
def negate_image(image_):
return cv2.bitwise_not(image_)
def particle_filter(image_, power):
# Abdrakov's particle filter
nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(image_, connectivity=8)
sizes = stats[1:, -1]
nb_components = nb_components - 1
min_size = power
img2 = np.zeros(output.shape, dtype=np.uint8)
for i in range(0, nb_components):
if sizes[i] >= min_size:
img_to_compare = threshold_gray_const(output, (i + 1, i + 1))
img2 = binary_or(img2, img_to_compare)
img2 = img2.astype(np.uint8)
return img2
def reject_borders(image_):
# Abdrakov's border rejecter
out_image = image_.copy()
h, w = image_.shape[:2]
for row in range(h):
if out_image[row, 0] == 255:
cv2.floodFill(out_image, None, (0, row), 0)
if out_image[row, w - 1] == 255:
cv2.floodFill(out_image, None, (w - 1, row), 0)
for col in range(w):
if out_image[0, col] == 255:
cv2.floodFill(out_image, None, (col, 0), 0)
if out_image[h - 1, col] == 255:
cv2.floodFill(out_image, None, (col, h - 1), 0)
return out_image
src = cv2.imread("your_image")
img_gray = dst = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
element = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
closing = cv2.morphologyEx(img_gray, cv2.MORPH_CLOSE, element, iterations=2)
tv, thresh = cv2.threshold(closing, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
neg = negate_image(thresh)
rej = reject_borders(neg)
filtered = particle_filter(rej, 300)
edges = cv2.Canny(filtered, 100, 200)
circles = cv2.HoughCircles(edges, cv2.HOUGH_GRADIENT, 3, 50, param1=50, param2=30, minRadius=20, maxRadius=50)
circles = np.uint16(np.around(circles))
for i in circles[0, :]:
# draw the outer circle
cv2.circle(src, (i[0], i[1]), i[2], (0, 255, 0), 2)
# draw the center of the circle
cv2.circle(src, (i[0], i[1]), 2, (0, 0, 255), 3)
cv2.imshow("closing", closing)
cv2.imshow("edges", edges)
cv2.imshow("out", src)
cv2.waitKey(0)
I changed cv2.morphologyEx parameters a bit, because they were too strong. And after this noise removing I made a binary image using cv2.THRESH_OTSU parameter, negated it, rejected borders and filtered a bit. Then I used cv2.Canny to find edges and this 'cannied' image I passed into cv2.HoughCircles. If any questions - ask me :)
If you want to use a "thinking out of the box" solution then check this solution out. Remember this might have a few false positives in some cases and would only work in cases where circle contour is complete or joined.
import numpy as np
import cv2
import matplotlib.pyplot as plt
from math import pi
pi_eps = 0.1
rgb = cv2.imread('/path/to/your/image/find_circle.jpg')
gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
th = cv2.adaptiveThreshold(gray,255, cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV,21,5)
contours, hier = cv2.findContours(th.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
out_img = rgb.copy()
for i in range(len(contours)):
x,y,w,h = cv2.boundingRect(contours[i])
ar = min(w,h)/max(w,h)
# For a circle aspect ratio is close to 1.0
# In your use case circle diameter is between 40px-100px
if ar < 0.9 or \
w < 40 or w > 100:
continue
# P = 2 * PI * r
perimeter = cv2.arcLength(contours[i], True)
if perimeter == 0:
continue
# Second level confirmation could be done using PI = P * P / (4 * A)
# A = PI * r * r
area = cv2.contourArea(contours[i])
if area == 0:
continue
# d = (w+h) / 2 average diameter
# A contour is a circle if (P / d) = PI
ctr_pi = perimeter / ((w+h) / 2)
if abs(ctr_pi - pi) < pi_eps * pi:
cv2.circle(out_img, (int(x+w/2), int(y+h/2)), int(max(w,h)/2), (0, 255, 0), 1)
print("Center of the circle: ", x + w/2, y+h/2)
plt.imshow(out_img)

How to center the content/object of a binary image in python?

I have a code that computes the orientation of a figure. Based on this orientation the figure is then rotated until it is straightened out. This all works fine. What I am struggling with, is getting the center of the rotated figure to the center of the whole image. So the center point of the figure should match the center point of the whole image.
Input image:
code:
import cv2
import numpy as np
import matplotlib.pyplot as plt
path = "inputImage.png"
image=cv2.imread(path)
gray=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh=cv2.threshold(gray,0,255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
contours,hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
cnt1 = contours[0]
cnt=cv2.convexHull(contours[0])
angle = cv2.minAreaRect(cnt)[-1]
print("Actual angle is:"+str(angle))
rect = cv2.minAreaRect(cnt)
p=np.array(rect[1])
if p[0] < p[1]:
print("Angle along the longer side:"+str(rect[-1] + 180))
act_angle=rect[-1]+180
else:
print("Angle along the longer side:"+str(rect[-1] + 90))
act_angle=rect[-1]+90
#act_angle gives the angle of the minAreaRect with the vertical
if act_angle < 90:
angle = (90 + angle)
print("angleless than -45")
# otherwise, just take the inverse of the angle to make
# it positive
else:
angle=act_angle-180
print("grter than 90")
# rotate the image to deskew it
(h, w) = image.shape[:2]
print(h,w)
center = (w // 2, h // 2)
print(center)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h),flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
plt.imshow(rotated)
cv2.imwrite("rotated.png", rotated)
With output:
As you can see the white figure is slightly placed to left, I want it to be perfectly centered.
Does anyone know how this can be done?
EDIT: I have tried #joe's suggestion and subtracted the centroid coordinates, from the center of the image by dividing the width and height of the picture by 2. From this I got an offset, this had to be added to the array that describes the image. But I don't know how I add the offset to the array. How would this work with the x and y coordinates?
The code:
img = cv2.imread("inputImage")
gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray_image,127,255,0)
height, width = gray_image.shape
print(img.shape)
wi=(width/2)
he=(height/2)
print(wi,he)
M = cv2.moments(thresh)
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
offsetX = (wi-cX)
offsetY = (he-cY)
print(offsetX,offsetY)
print(cX,cY)
Here is one way in Python/OpenCV.
Get the bounding box for the white region from the contours. Compute the offset for the recentered region. Use numpy slicing to copy that to the center of a black background the size of the input.
Input:
import cv2
import numpy as np
# read image as grayscale
img = cv2.imread('white_shape.png', cv2.COLOR_BGR2GRAY)
# get shape
hh, ww = img.shape
# get contours (presumably just one around the nonzero pixels)
contours = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
x,y,w,h = cv2.boundingRect(cntr)
# recenter
startx = (ww - w)//2
starty = (hh - h)//2
result = np.zeros_like(img)
result[starty:starty+h,startx:startx+w] = img[y:y+h,x:x+w]
# view result
cv2.imshow("RESULT", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
# save reentered image
cv2.imwrite('white_shape_centered.png',result)
One approach is to obtain the bounding box coordinates of the binary object then crop the ROI using Numpy slicing. From here we calculate the new shifted coordinates then paste the ROI onto a new blank mask.
Code
import cv2
import numpy as np
# Load image as grayscale and obtain bounding box coordinates
image = cv2.imread('1.png', 0)
height, width = image.shape
x,y,w,h = cv2.boundingRect(image)
# Create new blank image and shift ROI to new coordinates
mask = np.zeros(image.shape, dtype=np.uint8)
ROI = image[y:y+h, x:x+w]
x = width//2 - ROI.shape[0]//2
y = height//2 - ROI.shape[1]//2
mask[y:y+h, x:x+w] = ROI
cv2.imshow('ROI', ROI)
cv2.imshow('mask', mask)
cv2.waitKey()
#NawinNarain, from this point onwards where you found out the relative shifts w.r.t. centroid of the image, it is very straightforward - You want to make an Affine matrix with this translations and apply cv2.warpAffine() to your image. That's -it.
T = np.float32([[1, 0, shift_x], [0, 1, shift_y]])
We then use warpAffine() to transform the image using the matrix, T
centered_image = cv2.warpAffine(image, T, (orig_width, orig_height))
This will transform your image so that the centroid is at the center. Hope this helps. The complete center image function will look like this:
def center_image(image):
height, width = image.shape
print(img.shape)
wi=(width/2)
he=(height/2)
print(wi,he)
ret,thresh = cv2.threshold(image,95,255,0)
M = cv2.moments(thresh)
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
offsetX = (wi-cX)
offsetY = (he-cY)
T = np.float32([[1, 0, offsetX], [0, 1, offsetY]])
centered_image = cv2.warpAffine(image, T, (width, height))
return centered_image

How to find rotate and crop a section of text in openCV, python

I'm in a struggle with a project that takes an image of a pretty clear font from say a label for example reads the "text region" and outputs it as a string using OCR tesseract for instance.
Now I've made quite some progress with the thing as I added varios global filters to get to a quite clear result but I'm struggling with finding method of filtering just the text out of there and then you have to think about rotating it to be as horizontal as possible and then after that the easy part should be to crop it.
May I have any leads to how to do that not using traning data and over complicating the system sins I only use a rasdpberry pi to do the computing?
Thanks for helping here's what I've came up with so far:
Original Image(Captured from PiCamera):
Adaptive thresh after shadow removal:
[
Glocad tresh after shadow removal:
Here's the code:
# import the necessary packages
from PIL import Image
import pytesseract
import argparse
import cv2
import os
import picamera
import time
import numpy as np
#preprocess = "tresh"
#Remaining textcorping and rotating:
import math
import json
from collections import defaultdict
from scipy.ndimage.filters import rank_filter
def dilate(ary, N, iterations):
"""Dilate using an NxN '+' sign shape. ary is np.uint8."""
kernel = np.zeros((N,N), dtype=np.uint8)
kernel[(N-1)/2,:] = 1
dilated_image = cv2.dilate(ary / 255, kernel, iterations=iterations)
kernel = np.zeros((N,N), dtype=np.uint8)
kernel[:,(N-1)/2] = 1
dilated_image = cv2.dilate(dilated_image, kernel, iterations=iterations)
return dilated_image
def props_for_contours(contours, ary):
"""Calculate bounding box & the number of set pixels for each contour."""
c_info = []
for c in contours:
x,y,w,h = cv2.boundingRect(c)
c_im = np.zeros(ary.shape)
cv2.drawContours(c_im, [c], 0, 255, -1)
c_info.append({
'x1': x,
'y1': y,
'x2': x + w - 1,
'y2': y + h - 1,
'sum': np.sum(ary * (c_im > 0))/255
})
return c_info
def union_crops(crop1, crop2):
"""Union two (x1, y1, x2, y2) rects."""
x11, y11, x21, y21 = crop1
x12, y12, x22, y22 = crop2
return min(x11, x12), min(y11, y12), max(x21, x22), max(y21, y22)
def intersect_crops(crop1, crop2):
x11, y11, x21, y21 = crop1
x12, y12, x22, y22 = crop2
return max(x11, x12), max(y11, y12), min(x21, x22), min(y21, y22)
def crop_area(crop):
x1, y1, x2, y2 = crop
return max(0, x2 - x1) * max(0, y2 - y1)
def find_border_components(contours, ary):
borders = []
area = ary.shape[0] * ary.shape[1]
for i, c in enumerate(contours):
x,y,w,h = cv2.boundingRect(c)
if w * h > 0.5 * area:
borders.append((i, x, y, x + w - 1, y + h - 1))
return borders
def angle_from_right(deg):
return min(deg % 90, 90 - (deg % 90))
def remove_border(contour, ary):
"""Remove everything outside a border contour."""
# Use a rotated rectangle (should be a good approximation of a border).
# If it's far from a right angle, it's probably two sides of a border and
# we should use the bounding box instead.
c_im = np.zeros(ary.shape)
r = cv2.minAreaRect(contour)
degs = r[2]
if angle_from_right(degs) <= 10.0:
box = cv2.cv.BoxPoints(r)
box = np.int0(box)
cv2.drawContours(c_im, [box], 0, 255, -1)
cv2.drawContours(c_im, [box], 0, 0, 4)
else:
x1, y1, x2, y2 = cv2.boundingRect(contour)
cv2.rectangle(c_im, (x1, y1), (x2, y2), 255, -1)
cv2.rectangle(c_im, (x1, y1), (x2, y2), 0, 4)
return np.minimum(c_im, ary)
def find_components(edges, max_components=16):
"""Dilate the image until there are just a few connected components.
Returns contours for these components."""
# Perform increasingly aggressive dilation until there are just a few
# connected components.
count = 21
dilation = 5
n = 1
while count > 16:
n += 1
dilated_image = dilate(edges, N=3, iterations=n)
contours, hierarchy = cv2.findContours(dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
count = len(contours)
#print dilation
#Image.fromarray(edges).show()
#Image.fromarray(255 * dilated_image).show()
return contours
def find_optimal_components_subset(contours, edges):
"""Find a crop which strikes a good balance of coverage/compactness.
Returns an (x1, y1, x2, y2) tuple.
"""
c_info = props_for_contours(contours, edges)
c_info.sort(key=lambda x: -x['sum'])
total = np.sum(edges) / 255
area = edges.shape[0] * edges.shape[1]
c = c_info[0]
del c_info[0]
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
crop = this_crop
covered_sum = c['sum']
while covered_sum < total:
changed = False
recall = 1.0 * covered_sum / total
prec = 1 - 1.0 * crop_area(crop) / area
f1 = 2 * (prec * recall / (prec + recall))
#print '----'
for i, c in enumerate(c_info):
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
new_crop = union_crops(crop, this_crop)
new_sum = covered_sum + c['sum']
new_recall = 1.0 * new_sum / total
new_prec = 1 - 1.0 * crop_area(new_crop) / area
new_f1 = 2 * new_prec * new_recall / (new_prec + new_recall)
# Add this crop if it improves f1 score,
# _or_ it adds 25% of the remaining pixels for <15% crop expansion.
# ^^^ very ad-hoc! make this smoother
remaining_frac = c['sum'] / (total - covered_sum)
new_area_frac = 1.0 * crop_area(new_crop) / crop_area(crop) - 1
if new_f1 > f1 or (
remaining_frac > 0.25 and new_area_frac < 0.15):
print '%d %s -> %s / %s (%s), %s -> %s / %s (%s), %s -> %s' % (
i, covered_sum, new_sum, total, remaining_frac,
crop_area(crop), crop_area(new_crop), area, new_area_frac,
f1, new_f1)
crop = new_crop
covered_sum = new_sum
del c_info[i]
changed = True
break
if not changed:
break
return crop
def pad_crop(crop, contours, edges, border_contour, pad_px=15):
"""Slightly expand the crop to get full contours.
This will expand to include any contours it currently intersects, but will
not expand past a border.
"""
bx1, by1, bx2, by2 = 0, 0, edges.shape[0], edges.shape[1]
if border_contour is not None and len(border_contour) > 0:
c = props_for_contours([border_contour], edges)[0]
bx1, by1, bx2, by2 = c['x1'] + 5, c['y1'] + 5, c['x2'] - 5, c['y2'] - 5
def crop_in_border(crop):
x1, y1, x2, y2 = crop
x1 = max(x1 - pad_px, bx1)
y1 = max(y1 - pad_px, by1)
x2 = min(x2 + pad_px, bx2)
y2 = min(y2 + pad_px, by2)
return crop
crop = crop_in_border(crop)
c_info = props_for_contours(contours, edges)
changed = False
for c in c_info:
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
this_area = crop_area(this_crop)
int_area = crop_area(intersect_crops(crop, this_crop))
new_crop = crop_in_border(union_crops(crop, this_crop))
if 0 < int_area < this_area and crop != new_crop:
print '%s -> %s' % (str(crop), str(new_crop))
changed = True
crop = new_crop
if changed:
return pad_crop(crop, contours, edges, border_contour, pad_px)
else:
return crop
def downscale_image(im, max_dim=2048):
"""Shrink im until its longest dimension is <= max_dim.
Returns new_image, scale (where scale <= 1).
"""
a, b = im.size
if max(a, b) <= max_dim:
return 1.0, im
scale = 1.0 * max_dim / max(a, b)
new_im = im.resize((int(a * scale), int(b * scale)), Image.ANTIALIAS)
return scale, new_im
def process_image(inputImg):
opnImg = Image.open(inputImg)
scale, im = downscale_image(opnImg)
edges = cv2.Canny(np.asarray(im), 100, 200)
# TODO: dilate image _before_ finding a border. This is crazy sensitive!
contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
borders = find_border_components(contours, edges)
borders.sort(key=lambda (i, x1, y1, x2, y2): (x2 - x1) * (y2 - y1))
border_contour = None
if len(borders):
border_contour = contours[borders[0][0]]
edges = remove_border(border_contour, edges)
edges = 255 * (edges > 0).astype(np.uint8)
# Remove ~1px borders using a rank filter.
maxed_rows = rank_filter(edges, -4, size=(1, 20))
maxed_cols = rank_filter(edges, -4, size=(20, 1))
debordered = np.minimum(np.minimum(edges, maxed_rows), maxed_cols)
edges = debordered
contours = find_components(edges)
if len(contours) == 0:
print '%s -> (no text!)' % path
return
crop = find_optimal_components_subset(contours, edges)
crop = pad_crop(crop, contours, edges, border_contour)
crop = [int(x / scale) for x in crop] # upscale to the original image size.
#draw = ImageDraw.Draw(im)
#c_info = props_for_contours(contours, edges)
#for c in c_info:
# this_crop = c['x1'], c['y1'], c['x2'], c['y2']
# draw.rectangle(this_crop, outline='blue')
#draw.rectangle(crop, outline='red')
#im.save(out_path)
#draw.text((50, 50), path, fill='red')
#orig_im.save(out_path)
#im.show()
text_im = opnImg.crop(crop)
text_im.save('Cropted_and_rotated_image.jpg')
return text_im
'''
text_im.save(out_path)
print '%s -> %s' % (path, out_path)
'''
#Camera capturing stuff:
myCamera = picamera.PiCamera()
myCamera.vflip = True
myCamera.hflip = True
'''
myCamera.start_preview()
time.sleep(6)
myCamera.stop_preview()
'''
myCamera.capture("Captured_Image.png")
#End capturing persidure
imgAddr = '/home/pi/My_examples/Mechanical_display_converter/Example1.jpg'
#imgAddr = "Captured_Image.png"
# construct the argument parse and parse the arguments
#ap = argparse.ArgumentParser()
'''
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
ap.add_argument("-p", "--preprocess", type=str, default="thresh",
help="type of preprocessing to be done")
args = vars(ap.parse_args())
'''
# load the example image and convert it to grayscale
img = cv2.imread(imgAddr)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow('Step1_gray_filter', gray)
'''
# check to see if we should apply thresholding to preprocess the
# image
if args["preprocess"] == "thresh":
gray = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# make a check to see if median blurring should be done to remove
# noise
elif args["preprocess"] == "blur":
gray = cv2.medianBlur(gray, 3)
if preprocess == "thresh":
gray = cv2.threshold(gray, 150, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# make a check to see if median blurring should be done to remove
# noise
elif preprocess == "blur":
gray = cv2.medianBlur(gray, 3)
'''
rgb_planes = cv2.split(img)
result_planes = []
result_norm_planes = []
for plane in rgb_planes:
dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
bg_img = cv2.medianBlur(dilated_img, 21)
diff_img = 255 - cv2.absdiff(plane, bg_img)
norm_img = cv2.normalize(diff_img, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
result_planes.append(diff_img)
result_norm_planes.append(norm_img)
result = cv2.merge(result_planes)
result_norm = cv2.merge(result_norm_planes)
cv2.imshow('shadows_out.png', result)
cv2.imshow('shadows_out_norm.png', result_norm)
grayUnShadowedImg = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
cv2.imshow('Shadow_Gray_CVT', grayUnShadowedImg)
ret, threshUnShadowedImg = cv2.threshold(grayUnShadowedImg, 200, 255, cv2.THRESH_BINARY)
cv2.imshow('unShadowed_Thresh_filtering', threshUnShadowedImg)
#v2.imwrite('unShadowed_Thresh_filtering.jpg', threshUnShadowedImg)
#croptedunShadowedImg = process_image('unShadowed_Thresh_filtering.jpg')
adptThreshUnShadowedImg = cv2.adaptiveThreshold(grayUnShadowedImg, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 115, 1)
cv2.imshow('unShadowed_Adaptive_Thresh_filtering', adptThreshUnShadowedImg)
'''
blurFImg = cv2.GaussianBlur(adptThreshUnShadowedImg,(25,25), 0)
ret, f3Img = cv2.threshold(blurFImg,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2.imshow('f3Img', f3Img )
'''
#OCR Stage:
'''
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, threshImg)
# load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open(filename))
os.remove(filename)
print("\n" + text)
'''
cv2.waitKey(0)
cv2.destroyAllWindows()
Tryed this source out as well but this doesn't seem to work and is not that clear to understand:
https://www.danvk.org/2015/01/07/finding-blocks-of-text-in-an-image-using-python-opencv-and-numpy.html
I have made an example to maybe give you an idea on how to proceede. I made it without your transformations of the image but you could do it with them if you would like.
What I did was to first transform the image to binary with cv2.THRESH_BINARY. Next I made a mask and drew the contours by limiting them with size (cv2.contourArea()) and ratio (got it from cv2.boundingRect()) for threshold. Then I conected all the contours that are near each other using cv2.morphologyEx() and a big kernel size (50x50).
Then I selected the biggest contour (text) and drew a rotated rectangle with cv2.minAreaRect() which got me the rotational angle.
Then I could rotate the image using cv2.getRotationMatrix2D() and cv2.warpAffine() and get a slightly bigger bounding box using the highest X, Y and lowest X,Y values of the rotated rectangle which I used to crop the image.
Then I serched again for contours and removed the noise (little contours) from the image and the result is a text with high contrast.
Final result:
This code is meant only to give an idea or another point of view to the problem and it may not work with other images (if they differ from the original too much) or at least you would have to adjust some parameters of code. Hope it helps. Cheers!
Code:
import cv2
import numpy as np
# Read image and search for contours.
img = cv2.imread('rotatec.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(threshold,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
# Create first mask used for rotation.
mask = np.ones(img.shape, np.uint8)*255
# Draw contours on the mask with size and ratio of borders for threshold.
for cnt in contours:
size = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
if 10000 > size > 500 and w*2.5 > h:
cv2.drawContours(mask, [cnt], -1, (0,0,0), -1)
# Connect neighbour contours and select the biggest one (text).
kernel = np.ones((50,50),np.uint8)
opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
gray_op = cv2.cvtColor(opening, cv2.COLOR_BGR2GRAY)
_, threshold_op = cv2.threshold(gray_op, 150, 255, cv2.THRESH_BINARY_INV)
contours_op, hierarchy_op = cv2.findContours(threshold_op, cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours_op, key=cv2.contourArea)
# Create rotated rectangle to get the angle of rotation and the 4 points of the rectangle.
_, _, angle = rect = cv2.minAreaRect(cnt)
(h,w) = img.shape[:2]
(center) = (w//2,h//2)
# Rotate the image.
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(img, M, (int(w),int(h)), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT)
# Create bounding box for rotated text (use old points of rotated rectangle).
box = cv2.boxPoints(rect)
a, b, c, d = box = np.int0(box)
bound =[]
bound.append(a)
bound.append(b)
bound.append(c)
bound.append(d)
bound = np.array(bound)
(x1, y1) = (bound[:,0].min(), bound[:,1].min())
(x2, y2) = (bound[:,0].max(), bound[:,1].max())
cv2.drawContours(img,[box],0,(0,0,255),2)
# Crop the image and create new mask for the final image.
rotated = rotated[y1:y2, x1:x2]
mask_final = np.ones(rotated.shape, np.uint8)*255
# Remove noise from the final image.
gray_r = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
_, threshold_r = cv2.threshold(gray_r, 150, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(threshold_r,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
for cnt in contours:
size = cv2.contourArea(cnt)
if size < 500:
cv2.drawContours(threshold_r, [cnt], -1, (0,0,0), -1)
# Invert black and white.
final_image = cv2.bitwise_not(threshold_r)
# Display results.
cv2.imshow('final', final_image)
cv2.imshow('rotated', rotated)
EDIT:
For text recognition I recomend you see this post from SO Simple Digit Recognition OCR in OpenCV-Python.
The result with the code from mentioned post:
EDIT:
This is my code implemented with the slightly modified code from the mentioned post. All steps are written in the comments. You should save the script and the training image to the same directory. This is my training image:
Code:
import cv2
import numpy as np
# Read image and search for contours.
img = cv2.imread('rotatec.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(threshold,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
# Create first mask used for rotation.
mask = np.ones(img.shape, np.uint8)*255
# Draw contours on the mask with size and ratio of borders for threshold.
for cnt in contours:
size = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
if 10000 > size > 500 and w*2.5 > h:
cv2.drawContours(mask, [cnt], -1, (0,0,0), -1)
# Connect neighbour contours and select the biggest one (text).
kernel = np.ones((50,50),np.uint8)
opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
gray_op = cv2.cvtColor(opening, cv2.COLOR_BGR2GRAY)
_, threshold_op = cv2.threshold(gray_op, 150, 255, cv2.THRESH_BINARY_INV)
contours_op, hierarchy_op = cv2.findContours(threshold_op, cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours_op, key=cv2.contourArea)
# Create rotated rectangle to get the angle of rotation and the 4 points of the rectangle.
_, _, angle = rect = cv2.minAreaRect(cnt)
(h,w) = img.shape[:2]
(center) = (w//2,h//2)
# Rotate the image.
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(img, M, (int(w),int(h)), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT)
# Create bounding box for rotated text (use old points of rotated rectangle).
box = cv2.boxPoints(rect)
a, b, c, d = box = np.int0(box)
bound =[]
bound.append(a)
bound.append(b)
bound.append(c)
bound.append(d)
bound = np.array(bound)
(x1, y1) = (bound[:,0].min(), bound[:,1].min())
(x2, y2) = (bound[:,0].max(), bound[:,1].max())
cv2.drawContours(img,[box],0,(0,0,255),2)
# Crop the image and create new mask for the final image.
rotated = rotated[y1:y2, x1-10:x2]
mask_final = np.ones(rotated.shape, np.uint8)*255
# Remove noise from the final image.
gray_r = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
_, threshold_r = cv2.threshold(gray_r, 150, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(threshold_r,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
for cnt in contours:
size = cv2.contourArea(cnt)
if size < 500:
cv2.drawContours(threshold_r, [cnt], -1, (0,0,0), -1)
# Invert black and white.
final_image = cv2.bitwise_not(threshold_r)
# Display results.
cv2.imwrite('rotated12.png', final_image)
# Import module for finding path to database.
from pathlib import Path
# This code executes once amd writes two files.
# If file exists it skips this step, else it runs again.
file = Path("generalresponses.data")
if file.is_file() == False:
# Reading the training image
im = cv2.imread('pitrain1.png')
im3 = im.copy()
gray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(5,5),0)
thresh = cv2.adaptiveThreshold(blur,255,1,1,11,2)
# Finding contour
_,contours,hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
# Creates array and list for appending data
samples = np.empty((0,100))
responses = []
# Value serving to increment the "automatic" learning
i = 0
# Iterating through contours and appending the array and list with "learned" values
for cnt in contours:
i+=1
[x,y,w,h] = cv2.boundingRect(cnt)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,0,255),2)
roi = thresh[y:y+h,x:x+w] # Croping ROI to bounding rectangle
roismall = cv2.resize(roi,(10,10)) # Resizing ROI to smaller image
cv2.imshow('norm',im)
# Appending values based on the pitrain1.png image
if i < 36:
responses.append(int(45))
elif 35 < i < 80:
responses.append(int(48))
elif 79 < i < 125:
responses.append(int(57))
elif 124 < i < 160:
responses.append(int(56))
elif 159 < i < 205:
responses.append(int(55))
elif 204 < i < 250:
responses.append(int(54))
elif 249 < i < 295:
responses.append(int(53))
elif 294 < i < 340:
responses.append(int(52))
elif 339 < i < 385:
responses.append(int(51))
elif 384 < i < 430:
responses.append(int(50))
elif 429 < i < 485:
responses.append(int(49))
else:
break
sample = roismall.reshape((1,100))
samples = np.append(samples,sample,0)
# Reshaping and saving database
responses = np.array(responses)
responses = responses.reshape((responses.size,1))
print('end')
np.savetxt('generalsamples.data',samples)
np.savetxt('generalresponses.data',responses, fmt='%s')
################### Recognition ########################
# Dictionary for numbers and characters (in this sample code the only
# character is " - ")
number = {
48 : "0",
53 : "5",
52 : "4",
50 : "2",
45 : "-",
55 : "7",
51 : "3",
57 : "9",
56 : "8",
54 : "6",
49 : "1"
}
####### training part ###############
samples = np.loadtxt('generalsamples.data',np.float32)
responses = np.loadtxt('generalresponses.data',np.float32)
responses = responses.reshape((responses.size,1))
model = cv2.ml.KNearest_create()
model.train(samples,cv2.ml.ROW_SAMPLE,responses)
############################# testing part #########################
im = cv2.imread('rotated12.png')
out = np.zeros(im.shape,np.uint8)
gray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray,255,1,1,11,2)
contours,hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
[x,y,w,h] = cv2.boundingRect(cnt)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2)
roi = thresh[y:y+h,x:x+w]
roismall = cv2.resize(roi,(10,10))
roismall = roismall.reshape((1,100))
roismall = np.float32(roismall)
retval, results, neigh_resp, dists = model.findNearest(roismall,k=5)
string = int((results[0][0]))
string2 = number.get(string)
print(string2)
cv2.putText(out,str(string2),(x,y+h),0,1,(0,255,0))
cv2.imshow('im',im)
cv2.imshow('out',out)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
Sorry for begin a complete moron in it,
I'm realy trying to learn as much as I can about coding,everything that goes around the computer and openCV with the very little time I have But here's the edited code I've managed to get partly working:
from PIL import Image
import pytesseract
import os
import picamera
import time
import cv2
import numpy as np
# Read image and search for contours.
img = cv2.imread('Example1.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(threshold,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE) #EDITED
# Create first mask used for rotation.
mask = np.ones(img.shape, np.uint8)*255
# Draw contours on the mask with size and ratio of borders for threshold.
for cnt in contours:
size = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
if 10000 > size > 500 and w*2.5 > h:
cv2.drawContours(mask, [cnt], -1, (0,0,0), -1)
# Connect neighbour contours and select the biggest one (text).
kernel = np.ones((50,50),np.uint8)
opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
gray_op = cv2.cvtColor(opening, cv2.COLOR_BGR2GRAY)
_, threshold_op = cv2.threshold(gray_op, 150, 255, cv2.THRESH_BINARY_INV)
contours_op, hierarchy_op = cv2.findContours(threshold_op, cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours_op, key=cv2.contourArea)
# Create rotated rectangle to get the angle of rotation and the 4 points of the rectangle.
_, _, angle = rect = cv2.minAreaRect(cnt)
(h,w) = img.shape[:2]
(center) = (w//2,h//2)
# Rotate the image.
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(img, M, (int(w),int(h)), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT)
# Create bounding box for rotated text (use old points of rotated rectangle).
box = cv2.cv.BoxPoints(rect) #edited
a, b, c, d = box = np.int0(box)
bound =[]
bound.append(a)
bound.append(b)
bound.append(c)
bound.append(d)
bound = np.array(bound)
(x1, y1) = (bound[:,0].min(), bound[:,1].min())
(x2, y2) = (bound[:,0].max(), bound[:,1].max())
cv2.drawContours(img,[box],0,(0,0,255),2)
# Crop the image and create new mask for the final image.
rotated = rotated[y1:y2, x1:x2]
mask_final = np.ones(rotated.shape, np.uint8)*255
# Remove noise from the final image.
gray_r = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
_, threshold_r = cv2.threshold(gray_r, 150, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(threshold_r,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
for cnt in contours:
size = cv2.contourArea(cnt)
if size < 500:
cv2.drawContours(threshold_r, [cnt], -1, (0,0,0), -1)
# Invert black and white.
final_image = cv2.bitwise_not(threshold_r)
# Display results.
cv2.imshow('final', final_image)
cv2.imshow('rotated', rotated)
#OCR Stage:
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite('Final_proc.jpg', final_image)
# load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open('Final_proc.jpg'))
os.remove('Final_proc.jpg')
print("\n" + text)
cv2.waitKey(0)
cv2.destroyAllWindows()
When compiling it now it gives me this output:
[img]https://i.imgur.com/ImdKSCv.jpg[/img]
which is a little different from what you showed and compiled on the windows machine but still super close.
anyidea what happened? just after that this should be realy easy to dissect the code and learn it easily.
Again thank you very much for your time! :D
So for the python 3 and openCV 3 version of the code in order to make the img work with tesseract you'd need to add an around 20px white boarder to extend the image for somereason (I assume it's because the convolutional matrix scanning effort) according to my other post:
pytesseract struggling to recognize clean black and white pictures with font numbers and 7 seg digits(python)
and here's how you'd add the boarder:
how to add border around an image in opencv python
In one line of code:
outputImage = cv2.copyMakeBorder(
inputImage,
topBorderWidth,
bottomBorderWidth,
leftBorderWidth,
rightBorderWidth,
cv2.BORDER_CONSTANT,
value=color of border
)

Categories

Resources