Get white text on black background - python

I want to identify text in a set of images. There are some images with both white and black colored text.
I used otsu thresholding to binarize image
After contour identification and removal of non text regions I identified the required text region.
I need all the text in white color. But I don't know how to do it. I thought of using a bitwise operator but couldn't find a method. Can someone help me with this?
Expected output:
import cv2
import numpy as np
def process(img):
# read image
img_no = str(img)
rgb = cv2.imread(img_no + '.jpg')
# cv2.imshow('original', rgb)
# convert image to grayscale
gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
_, bw_copy = cv2.threshold(gray, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# bilateral filter
blur = cv2.bilateralFilter(gray, 5, 75, 75)
# cv2.imshow('blur', blur)
# morphological gradient calculation
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = cv2.morphologyEx(blur, cv2.MORPH_GRADIENT, kernel)
# cv2.imshow('gradient', grad)
# binarization
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# cv2.imshow('otsu', bw)
# closing
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 1))
closed = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
# cv2.imshow('closed', closed)
# finding contours
contours, hierarchy = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
mask = np.zeros(closed.shape, dtype=np.uint8)
mask1 = np.zeros(bw_copy.shape, dtype=np.uint8)
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y + h, x:x + w] = 0
area = cv2.contourArea(contours[idx])
aspect_ratio = float(w) / h
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
r = float(cv2.countNonZero(mask[y:y + h, x:x + w])) / (w * h)
# identify region of interest
if r > 0.34 and 0.52 < aspect_ratio < 13 and area > 145.0:
cv2.drawContours(mask1, [contours[idx]], -1, (255, 255, 255), -1)
result = cv2.bitwise_and(bw_copy, mask1)
cv2.imshow('result', result)
print(img_no + " Done")
cv2.waitKey()
New Image
Accepted answer doesn't work with this picture.

At first glance this looks like a simple question but it is quite tricky to solve.
However you already have all the ingredients needed to solve the problem and only require a slight tweak to your algorithm.
Here are the gists:
What you need is a an inverted image(wb_copy) of your thresholded image(bw_copy).
You have done a great job creating a mask
Run bitwise_and operation on both bw_copy and wb_copy with the mask above. You should get the result shown below.
As you can see, your answer is abit from both images. All you need to do is for every font blob, count the non-zero pixel from both images and select the one with the higher count. Doing so will provide the result you wanted.
Here are the modifications I made to the code
# finding contours
_,contours,_ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
mask = np.zeros(closed.shape, dtype=np.uint8)
mask1 = np.zeros(bw_copy.shape, dtype=np.uint8)
wb_copy = cv2.bitwise_not(bw_copy)
new_bw = np.zeros(bw_copy.shape, dtype=np.uint8)
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y + h, x:x + w] = 0
area = cv2.contourArea(contours[idx])
aspect_ratio = float(w) / h
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
r = float(cv2.countNonZero(mask[y:y + h, x:x + w])) / (w * h)
# identify region of interest
if r > 0.34 and 0.52 < aspect_ratio < 13 and area > 145.0:
cv2.drawContours(mask1, [contours[idx]], -1, (255, 255, 255), -1)
bw_temp = cv2.bitwise_and(mask1[y:y + h, x:x + w],bw_copy[y:y + h, x:x + w])
wb_temp = cv2.bitwise_and(mask1[y:y + h, x:x + w],wb_copy[y:y + h, x:x + w])
bw_count = cv2.countNonZero(bw_temp)
wb_count = cv2.countNonZero(wb_temp)
if bw_count > wb_count:
new_bw[y:y + h, x:x + w]=np.copy(bw_copy[y:y + h, x:x + w])
else:
new_bw[y:y + h, x:x + w]=np.copy(wb_copy[y:y + h, x:x + w])
cv2.imshow('new_bw', new_bw)

INPUT IMAGE:
I did the following:
import cv2 as cv
import numpy as np
img_path = '10002.png'
img = cv.imread(img_path)
img = cv.resize(img, (None, None), None, 0.4, 0.4)
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
img = np.array(img, dtype='float32')
result = np.where(img == 0., 255, 0)
result = np.array(result, dtype='uint8')
result = cv.erode(result, kernel=np.ones(shape=(3, 3)), iterations=1)
cv.imshow('Image', img)
cv.imshow('Result', result)
cv.waitKey(0)
cv.destroyAllWindows()
OUTPUT IMAGE:
You can even invert the image using bitwise not and get black text on white background.

Related

Reliable program that can detect QR Codes without cv2.QRCodeDetector() or pyzbar library

I am having trouble finding a set of morphological operations that allow me to detect (only) the QR codes in various images using cv2.connectedComponentsWithStats() or cv2.findContours() (but I would prefer to solve this with cv2.connectedComponentsWithStats()).
The images I absolutely need the code to work on are the following:
I have been messing with 2 different codes, one using cv2.connectedComponentsWithStats() and the other cv2.findContours() and some other methods (based off nathancy's answer to Detect a QR code from an image and crop using OpenCV). To test I've been using the following codes:
Using cv2.connectedComponentsWithStats(), the problem with this code is that it captures more than the QR code in the 2nd as you can see bellow. In the 1st it works great and in the 3rd as well if scaled to 0.5, or else it also detects more than the QR code like the 2nd image.
import cv2
import numpy as np
#img = cv2.imread('Code-1.jpg'); scale = 1;
img = cv2.imread('Code-2.jpg'); scale = 1;
#img = cv2.imread('Code-3.jpg'); scale = 0.5;
width = int(img.shape[1] * scale); height = int(img.shape[0] * scale); img = cv2.resize(img, (width, height))
og = img.copy()
gray = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gaussianblur = cv2.GaussianBlur(gray, (7,7), 0)
otsuthresh = cv2.threshold(gaussianblur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
edges = cv2.Canny(otsuthresh, threshold1=100, threshold2=200)
dilate = cv2.dilate(edges,(5,5),iterations=1)
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(dilate, 8, cv2.CV_32S)
for i in range(1,num_labels):
objint = (labels == i).astype(np.uint8)*255/i
x = stats[i, cv2.CC_STAT_LEFT]
y = stats[i, cv2.CC_STAT_TOP]
w = stats[i, cv2.CC_STAT_WIDTH]
h = stats[i, cv2.CC_STAT_HEIGHT]
area = stats[i, cv2.CC_STAT_AREA]
ratio = w / float(h)
(cX, cY) = centroids[i]
if area > 500 and (ratio > .95 and ratio < 1.05) and (w < 0.99*img.shape[1]):
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
ROI = og[y:y + h, x:x + w]
cv2.imwrite('ROI.png', ROI)
cv2.imshow('image', img)
cv2.imshow('QR code', ROI)
Using cv2.findContours(), this one can't detect any of the QR codes in the images in which the code must not fail, but can detect in some other random images
import cv2
import numpy as np
#img = cv2.imread('Code-1.jpg'); scale = 1;
img = cv2.imread('Code-2.jpg'); scale = 1;
#img = cv2.imread('Code-3.jpg'); scale = 0.5;
width = int(img.shape[1] * scale); height = int(img.shape[0] * scale); img = cv2.resize(img, (width, height))
og = img.copy()
gray = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gaussianblur = cv2.GaussianBlur(gray, (7,7), 0)
otsuthresh = cv2.threshold(gaussianblur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
closed = cv2.morphologyEx(otsuthresh, cv2.MORPH_CLOSE, kernel, iterations=3)
contours = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if len(contours) == 2:
contours = contours[0]
else:
contours = contours[1]
for cnt in contours:
perim = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.05 * perim, True)
x,y,w,h = cv2.boundingRect(approx)
area = cv2.contourArea(cnt)
ratio = w / float(h)
if len(approx) == 4 and area > 1000 and (ratio > .80 and ratio < 1.2):
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 4)
ROI = og[y:y + h, x:x + w]
cv2.imwrite('ROI.png', ROI)
cv2.imshow('image', img)
cv2.imshow('QR code', ROI)
Thank you for reading and if I wasn't clear on something please let me know.
Filipe Almeida
Maybe, you could try QReader. It is just a wrapper of OpenCV, Pyzbar and other QR detection and image filtering methods, but it works quite out-of-the-box for those cases.
from qreader import QReader
from matplotlib import pyplot as plt
import cv2
if __name__ == '__main__':
# Initialize QReader
detector = QReader()
for img_path in ('0oOAF.jpg', 'HXlS8.jpg', '5fFTo.jpg'):
# Read the image
img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
# Detect the QR bbox
found, bbox = detector.detect(image=img)
if found:
# Draw the bbox
x1, y1, x2, y2 = bbox
cv2.rectangle(img=img, pt1=(x1, y1), pt2=(x2, y2), color=(0, 255, 0), thickness=2)
# Save the image
plt.imshow(img)
plt.savefig(f"{img_path}-bbox.png")
That's the output it gives:

Opencv rectangle detection on noisy image

One question, is it possible to dectect rectangle on image when it touch noise lines and other shapes
This is my function to detect contoures on image:
def findContours(img_in):
w, h, c = img_in.shape # img_in is the input image
resize_coeff = 0.25
img_in = cv2.resize(img_in,(int(resize_coeff * h), int(resize_coeff * w)))
img_in = ip.findObjects(img_in)
blr = cv2.GaussianBlur(img_in, (9, 9), 0)
img = cv2.Canny(blr, 50, 250, L2gradient=False)
kernel = np.ones((5, 5), np.uint8)
img_dilate = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img_dilate, kernel, iterations=1)
contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
max_index, max_area = max(enumerate([cv2.contourArea(x) for x in contours]), key=lambda x: x[1])
max_contour = contours[max_index]
img_out = cv2.resize(img, (int(resize_coeff * h), int(resize_coeff * w)))
cv2.drawContours(img_in, [max_contour], 0, (0, 0, 255), 2)
re.rectangle(img, [max_contour])
cv2.imshow("test",img_in)
cv2.imshow("test1",img)
cv2.waitKey()
return img
I got this result:
The result I want:
When I use shape detecion I got result that it have 15 angles and not four. Function:
def rectangle(img, contours):
for contour in contours:
approx = cv2.approxPolyDP(contour, 0.01 * cv2.arcLength(contour, True), True)
print(len(approx))
x = approx.ravel()[0]
y = approx.ravel()[1] - 5
if len(approx) == 4:
print("Rect")
x, y, w, h = cv2.boundingRect(approx)
aspectRatio = float(w) / h
print(aspectRatio)
cv2.putText(img, "rectangle", (x, y), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0))
EDIT:
Original image:
What if you can remove noise around that shape? I think your mask is good for more processing:
import numpy as np
import sys
import cv2
# Load the mask
dir = sys.path[0]
im = cv2.imread(dir+'/img.png')
H, W = im.shape[:2]
# Make gray scale image
gry = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
# Make binary image
bw = cv2.threshold(gry, 127, 255, cv2.THRESH_BINARY)[1]
bw = ~bw
# Focuse on edges
bw = cv2.erode(bw, np.ones((5, 5)))
# Use flood fill to remove noise
cv2.floodFill(bw, np.zeros((H+2, W+2), np.uint8), (0, 0), 0)
bw = cv2.medianBlur(bw, 7)
# Remove remained noise with another flood fill
nonRectArea = bw.copy()
cv2.floodFill(nonRectArea, np.zeros((H+2, W+2), np.uint8), (W//2, H//2), 0)
bw[np.where(nonRectArea == 255)] = 0
# Find contours and sort them by width
cnts, _ = cv2.findContours(bw, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnts.sort(key=lambda p: cv2.boundingRect(p)[2], reverse=True)
# Find biggest blob
x, y, w, h = cv2.boundingRect(cnts[0])
cv2.rectangle(im, (x, y), (x+w, y+h), 127, 1)
# Save output
cv2.imwrite(dir+'/img_1.png', im)
cv2.imwrite(dir+'/img_2.png', bw)
cv2.imwrite(dir+'/img_3.png', nonRectArea)

Extract the floor layout and threshold with OpenCV and Python

I've tried to use SSIM to extract the difference between two images to get only the floor area (image_a is the original and image_b has painted floor).
The output that was expected, is a threshold mask.
The problem I had was that the thresholding of ssim difference just didn't work in my case (example is shown below).
Can someone provide a better technique or theory of thresholding?
from skimage.measure import compare_ssim
import cv2
...
image_a = cv2.imread(first)
image_b = cv2.imread(second)
gray_a = cv2.cvtColor(image_a, cv2.COLOR_BGR2GRAY)
gray_b = cv2.cvtColor(image_b, cv2.COLOR_BGR2GRAY)
_, diff = compare_ssim(gray_a, gray_b, full=True, gaussian_weights=True)
diff = (diff * 255).astype("uint8")
thresh = cv2.threshold(diff, 0, 255,
cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
contours = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
contour_sizes = [(cv2.contourArea(contour), contour) for contour in contours]
if len(contour_sizes) > 0:
largest_contour = max(contour_sizes, key=lambda x: x[0])[1]
x, y, w, h = cv2.boundingRect(largest_contour)
cv2.rectangle(image_a, (x, y), (x + w, y + h), (36, 255, 12), 2)
cv2.rectangle(image_b, (x, y), (x + w, y + h), (36, 255, 12), 2)
cv2.imwrite('image_a.jpg', image_a)
cv2.imwrite('image_b.jpg',image_b)
cv2.imwrite('thresh.jpg', thresh)
image_a with max contour detected
image_b with max contour detected
thresh
A better result can be obtained by thresholding the mean of the difference beetween given images.
def get_mask(img1, img2, thresh):
if img1.shape != img2.shape:
return
diff = cv2.absdiff(img1, img2)
diff = np.mean(diff, axis=2)
diff[diff <= thresh] = 0
diff[diff > thresh] = 255
mask = np.dstack([diff] * 3)
return mask
Artifacts may appear in the resulting mask and can be reduced by applying Morphological Transformations.

TypeError: 'key' is an invalid keyword argument for this function

I'm trying to convert an image to text using opencv, but the code gives the following error:
contours.sort(key=lambda x: get_contour_precedence(x, img.shape[1]))
TypeError: 'key' is an invalid keyword argument for this function error.
Is there any way to fix it? This is the code:
import cv2
import pytesseract
import numpy as np
import PIL.Image as Image
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-
OCR\\tesseract'
def get_contour_precedence(contour, cols):
tolerance_factor = 20
origin = cv2.boundingRect(contour)
return ((origin[0] // tolerance_factor) * tolerance_factor) * cols +
origin[1]
img = cv2.imread("C:/Users/Akshatha/Desktop/text_detection_from
_image/images/news1.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY_INV +
cv2.THRESH_OTSU)
kernel = np.ones((5, 5), np.uint8)
erosion = cv2.erode(thresh, kernel, iterations=1)
dilation = cv2.dilate(thresh, kernel, iterations=3)
(contours, heirarchy,_) = cv2.findContours(dilation, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
heirarchy = heirarchy[0]
print("start")
print(np.array(heirarchy).shape, np.array(contours).shape)
print("stop")
contours.sort(key=lambda x: get_contour_precedence(x, img.shape[1]))
# print(contours[0])
idx = 0
textlist = []
i = 0
rect_list = []
for c in contours:
# get the bounding rect
x, y, w, h = cv2.boundingRect(c)
rect_list.append((x, y, x + w, y + h))
# draw a green rectangle to visualize the bounding rect
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 255, 0), 3)
roi = img[y:y + h, x:x + w]
text = pytesseract.image_to_string(roi, lang='eng', config='--oem 1 --
psm 6 -c preserve_interword_spaces=1 ')
print(text)
cv2.putText(img, "#{}".format(i + 1), (x, y - 15),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 4)
i += 1
cv2.namedWindow('Dilation', cv2.WINDOW_NORMAL)
cv2.imshow('Dilation', img)
cv2.waitKey(0)
The sort() function you're using doesn't take a key argument. If contours is an iterable, you can try using sorted() instead like this:
sorted(contours, key=lambda x:x)
Note that this will return a list.

Python file write all the bounding box coordinates using OpenCV

My task:
My task is to extract bounding box coordinates of following image:
I have following code. I am trying to get these coordinates using roi, but I am not sure how to get them.
import cv2
import numpy as np
large = cv2.imread('1.jpg')
small = cv2.cvtColor(large, cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
mask = np.zeros(bw.shape, dtype=np.uint8)
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y+h, x:x+w] = 0
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
if r > 0.45 and w > 8 and h > 8:
cv2.rectangle(large, (x, y), (x+w-1, y+h-1), (0, 255, 0), 1)
roi=large[y:y+h, x:x+w]
print(roi)
Result should be something like this:
1675,1335,2338,1338,2337,1455,1674,1452. :Box1
3067,519,3604,521,3603,651,3066,648 :Box2
1017,721,1729,726,1728,857,1016,852 :Box3
I have referred:
Extract all bounding boxes using OpenCV Python . On this link they are extracting images inside bounding boxes when they already have annotated image with rectangular GUI as a input. I want to extract the detected regions into a text file. How do I do it?
x, y, w, h = cv2.boundingRect(contours[idx]) is the coordinates you want, then write it to a txt file:
...
with open("coords.txt","w+") as file:
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y+h, x:x+w] = 0
file.write("Box {0}: ({1},{2}), ({3},{4}), ({5},{6}), ({7},{8})".format(idx,x,y,x+w,y,x+w,y+h,x,y+h))
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
...
The result will contain 4 points for each box, like this.
Box 0: (360,259), (364,259), (364,261), (360,261)
Box 1: (380,258), (385,258), (385,262), (380,262)
Box 2: (365,258), (370,258), (370,262), (365,262)
Box 3: (386,256), (393,256), (393,260), (386,260)
Box 4: (358,256), (361,256), (361,258), (358,258)
import cv2
import numpy as np
# Load an image in grayscale
img = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)
# Perform OTSU thresholding
thresh, img_bin = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# Find contours
contours, _ = cv2.findContours(img_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Iterate through all contours
for cnt in contours:
# Get bounding box coordinates
x, y, w, h = cv2.boundingRect(cnt)
# Draw bounding box on the original image
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Show the original image with bounding boxes
cv2.imshow("Bounding Boxes", img)
cv2.waitKey(0)
cv2.destroyAllWindows()

Categories

Resources