I'm trying to read cards and output card numbers and expiry date with OpenCV.
import cv2
import pytesseract
filename = 'image1.png'
img = cv2.imread(filename)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
canny = cv2.Canny(gray, 50, 150, apertureSize=3)
result = pytesseract.image_to_string(canny)
print(f"OCR Results: {result}")
cv2.imshow('img', img)
cv2.imshow('canny', canny)
if cv2.waitKey(0) & 0xff == 27:
cv2.destroyAllWindows()
Image before processing
Image after Canny
The result text does not look good. See the screenshot below:
Question: How can I properly handle the cards fonts well for better results. Any idea is highly appreciated.
Thanks.
It looks like the OCR is not working well when passing the edges of the text.
You better apply threshold instead of using Canny.
I suggest the following stages:
Convert from BGR to HSV color space, and get the S (saturation) color channel of HSV.
All gray pixels in S are zero, and colored pixels are above zero.
Convert to binary using automatic threshold (use cv2.THRESH_OTSU).
Crop the contour with the maximum size.
Because the image you posted contains some background.
Apply OCR on the cropped area.
Here is the code:
import numpy as np
import cv2
import imutils # https://pypi.org/project/imutils/
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # I am using Windows
img = cv2.imread('image1.png') # Read input image
# Convert from BGR to HSV color space
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# Get the saturation color channel - all gray pixels are zero, and colored pixels are above zero.
s = hsv[:, :, 1]
# Convert to binary using automatic threshold (use cv2.THRESH_OTSU)
ret, thresh = cv2.threshold(s, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Find contours (in inverted thresh)
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cnts = imutils.grab_contours(cnts)
# Find the contour with the maximum area.
c = max(cnts, key=cv2.contourArea)
# Get bounding rectangle
x, y, w, h = cv2.boundingRect(c)
# Crop the bounding rectangle out of thresh
thresh_card = thresh[y:y+h, x:x+w].copy()
# OCR
result = pytesseract.image_to_string(thresh_card)
print(f"OCR Results:\n {result}")
# Show images for debugging
cv2.imshow('s', s)
cv2.imshow('thresh', thresh)
cv2.imshow('thresh_card', thresh_card)
cv2.waitKey(0)
cv2.destroyAllWindows()
OCR Result:
Visa Classic
| By)
4000 1234 Sb18 9010
CARDHOLDER MARE
VISA
Still not perfect...
s:
thresh:
thresh_card:
Related
I try to use python, NumPy, and OpenCV to analyze the image below and just draw a circle on each object found. The idea here is not to identify the bug only identify any object that is different from the background.
Original Image:
Here is the code that I'm using.
import cv2
import numpy as np
img = cv2.imread('per.jpeg', cv2.IMREAD_GRAYSCALE)
if cv2.__version__.startswith('2.'):
detector = cv2.SimpleBlobDetector()
else:
detector = cv2.SimpleBlobDetector_create()
keypoints = detector.detect(img)
print(len(keypoints))
imgKeyPoints = cv2.drawKeypoints(img, keypoints, np.array([]), (0,0,255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
status = cv2.imwrite('teste.jpeg',imgKeyPoints)
print("Image written to file-system : ",status)
But the problem is that I'm getting only a greyscale image as result without any counting or red circle, as shown below:
Since I'm new to OpenCV and object recognition world I'm not able to identify what is wrong, and any help will be very appreciated.
Here is one way in Python/OpenCV.
Threshold on the bugs color in HSV colorspace. Then use morphology to clean up the threshold. Then get contours. Then find the minimum enclosing circle around each contour. Then bias the radius to make a bit larger and draw the circle around each bug.
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('bugs.jpg')
# convert image to hsv colorspace
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# threshold on bugs color
lower=(0,90,10)
upper=(100,250,170)
thresh = cv2.inRange(hsv, lower, upper)
# apply morphology to clean up
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (6,6))
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result = img.copy()
bias = 10
for cntr in contours:
center, radius = cv2.minEnclosingCircle(cntr)
cx = int(round(center[0]))
cy = int(round(center[1]))
rr = int(round(radius)) + bias
cv2.circle(result, (cx,cy), rr, (0, 0, 255), 2)
# save results
cv2.imwrite('bugs_threshold.jpg', thresh)
cv2.imwrite('bugs_cleaned.jpg', morph)
cv2.imwrite('bugs_circled.jpg', result)
# display results
cv2.imshow('thresh', thresh)
cv2.imshow('morph', morph)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Threshold Image:
Morphology Cleaned Image:
Resulting Circles:
from pdf2image import convert_from_path
import cv2,numpy,os
def pil_to_cv2(image):
open_cv_image = numpy.array(image)
return open_cv_image[:, :, ::-1].copy()
images = convert_from_path('test.pdf')
cv_h=[pil_to_cv2(i) for i in images]
for img in cv_h:
#function_to_crop()
cv2.imwrite('modified.png', img)
How can I remove the extra whiteness from the image (top,sideways,under) without actually intercepting the drawing, The drawings from pdf are from different sizes so I can't crop the images by a fixed number.
Ideally,the output would look like this
Here is another way to do that in Python/OpenCV.
Read the image
Convert to gray and invert the polarity
Threshold
Apply morphology close to fill in holes and make one solid region
Get the outer contour and its bounding box
Use the bounding box to crop the image using Numpy slicing
Save the result
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('multipower.png')
# convert to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# invert gray image
gray = 255 - gray
# threshold
thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY)[1]
# apply close and open morphology
kernel = np.ones((75,75), np.uint8)
mask = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get contours (presumably just one around the nonzero pixels)
contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
cntr = contours[0]
x,y,w,h = cv2.boundingRect(cntr)
# draw contour on input
contour_img = img.copy()
cv2.drawContours(contour_img,[cntr],0,(0,0,255),3)
# crop to bounding rectangle
crop = img[y:y+h, x:x+w]
# save cropped image
cv2.imwrite('multipower_thresh.png',thresh)
cv2.imwrite('multipower_mask.png',mask)
cv2.imwrite('multipower_contour.png',contour_img)
cv2.imwrite('multipower_cropped.png',crop)
# show the images
cv2.imshow("THRESH", thresh)
cv2.imshow("MASK", mask)
cv2.imshow("CONTOUR", contour_img)
cv2.imshow("CROP", crop)
cv2.waitKey(0)
cv2.destroyAllWindows()
Thresholded Image:
Morphology closed image:
Contour image:
Result:
import cv2 as cv
import numpy as np
frame = cv.imread('7dcoI.png')
frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
mask=cv.threshold(frame_gray, 85, 255, cv.THRESH_BINARY )[1]
rows, cols = mask.shape
non_empty_columns = np.where(mask.min(axis=0)==0)[0]
non_empty_rows = np.where(mask.min(axis=1)==0)[0]
cropBox = (min(non_empty_rows), min(max(non_empty_rows), rows), min(non_empty_columns), min(max(non_empty_columns), cols))
cropped = frame[cropBox[0]:cropBox[1]+1, cropBox[2]:cropBox[3]+1 , :]
cv.imwrite('out_mask.png', cropped)
please help me, I need to insert an image on the substrate.
substrate:
It png, and in the area that is blank with cities, you must insert the image from edge to edge of the frame.
The problem is that I can't find an example of how to insert an image to the known coordinate points of the corners of a given substrate.
Pls help))
My test image
import cv2
import numpy as np
from skimage import io
frame = cv2.cvtColor(io.imread('as.png'), cv2.COLOR_RGB2BGR)
image = cv2.cvtColor(io.imread("Vw5Rc.jpg"), cv2.COLOR_RGB2BGR)
mask = 255 * np.uint8(np.all(frame == [0, 0, 0], axis=2))
contours, _ = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnt = min(contours, key=cv2.contourArea)
(x, y, w, h) = cv2.boundingRect(cnt)
# Copy appropriately resized image to frame
frame[y:y+h, x:x+w] = cv2.resize(image, (w, h))
cv2.imwrite('frame.png', frame)
I'm trying to find the area where to insert the image by color, the red color of the area I can find, and if there is no color?
The static frame has a constant size.
Here is one way to do it in Python/OpenCV, if I understand what you want.
Read the substrate and trees images
Extract the alpha channel from the substrate
Extract the substrate image without the alpha channel
Use the alpha channel to color the base substrate image white where the alpha channel is black to correct a flaw in the base image
Threshold the alpha channel and invert it
Use morphology to remove the grid lines so that there is only one "outer" contour.
Extract the contour and its bounding box
Resize the trees image to the size of the bounding box.
Use numpy indexing and slicing to multiply the region of the substrate with the resized trees image.
Save the results.
Optionally, display the various images.
Substrate Image:
Trees Image:
import cv2
import numpy as np
# load substrate with alpha channel
substrate = cv2.imread("substrate.png", cv2.IMREAD_UNCHANGED)
hh, ww, cc = substrate.shape
# load colored image
trees = cv2.imread("trees.jpg")
# make img white where alpha is black to merge the alpha channel with the image
alpha = substrate[:,:,3]
img = substrate[:,:,0-2]
img[alpha==0] = 255
img = cv2.merge((img,img,img))
# threshold the img
ret, thresh = cv2.threshold(alpha,0,255,0)
# invert thresh
thresh = 255 - thresh
# make grid lines white in thresh so will get only one contour
kernel = np.ones((9,9), np.uint8)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# find one outer contour
cntrs = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
# get bounding box of contour of white rectangle in thresh
for c in cntrs:
x,y,w,h = cv2.boundingRect(c)
#cv2.rectangle(img, (x,y), (x+w,y+h),(0, 0, 255), 2)
# resize trees
trees = cv2.resize(trees,(w,h),0,0)
# generate result
result = img.copy()
result[y:y+h, x:x+w] = img[y:y+h, x:x+w]/255 * trees
# write result to disk
cv2.imwrite("substrate_over_trees.jpg", result)
cv2.imshow("ALPHA", alpha)
cv2.imshow("IMG", img)
cv2.imshow("THRESH", thresh)
cv2.imshow("TREES", trees)
cv2.imshow("RESULT", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
Note that there is distortion of the trees image, because its aspect ratio does not match the region of the substrate image corresponding to the contour bounding box. This can be changed to maintain the aspect ratio, but then the image will need to be padded to white or some other color to fill the remaining area of the bounding box.
I'm looking to extract the text from an image, The output I am receiving is not very accurate. I wonder if there's any additional steps I can take to process the image more to increase the accuracy of this OCR.
I've looked into some of the different ways to process the image and improve the OCR results. The image is quite small and I've been able to blow it up slightly, but to no avail.
The image will always be horizontal, no other text will be present other than the numbers. The maximum number will go up to 55000.
An example of the image in question:
After image processing, my image is scaled up by 4 on the X and Y axis. And some saturation is removed, although this does not improve the accuracy at all.
image = self._process(scale=6, iterations=2)
text = pytesseract.image_to_string(image, config="--psm 7")
My process method is doing the following:
# Resize and desaturate.
image = cv2.resize(image, None, fx=scale, fy=scale,
interpolation=cv2.INTER_CUBIC)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion.
kernel = np.ones((1, 1), np.uint8)
image = cv2.dilate(image, kernel, iterations=iterations)
image = cv2.erode(image, kernel, iterations=iterations)
return image
Expected: "10411"
The actual value is varied, usually an unrecognizable string, or some numbers are parsed but the accuracy rate is too low to be usable.
I don't have experience with OCR, but I think you're on the right track: increasing the image size so the algorithm has more pixels to work with and increasing the distinction between the numbers and the background.
Tricks I added: thresholding the image, which creates a mask where only the white pixels remain. There were a few white blobs that were not numbers, so I used findContours to color those unwanted blobs black.
Result:
Code:
import numpy as np
import cv2
# load image
image = cv2.imread('number.png')
# resize image
image = cv2.resize(image,None,fx=5, fy=5, interpolation = cv2.INTER_CUBIC)
# create grayscale
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# perform threshold
retr, mask = cv2.threshold(gray_image, 230, 255, cv2.THRESH_BINARY)
# find contours
ret, contours, hier = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# draw black over the contours smaller than 200 - remove unwanted blobs
for cnt in contours:
# print contoursize to detemine threshold
print(cv2.contourArea(cnt))
if cv2.contourArea(cnt) < 200:
cv2.drawContours(mask, [cnt], 0, (0), -1)
#show image
cv2.imshow("Result", mask)
cv2.imshow("Image", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
I'm building a simple OCR, I'm facing a problem of not being to crop the letters after segmenting them using OpenCV. Can anyone help me with a simple way to crop the letters?
Here's the segmenting code.
import cv2
import numpy as np
mser = cv2.MSER_create()
# original image
# -1 loads as-is so if it will be 3 or 4 channel as the original
image = cv2.imread('1.jpg', -1)
# mask defaulting to black for 3-channel and transparent for 4-channel
# (of course replace corners with yours)
mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
vis = image.copy()
regions = mser.detectRegions(gray)
hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions[0]]
channel_count = image.shape[2] # i.e. 3 or 4 depending on your image
ignore_mask_color = (255,)*channel_count
cv2.fillConvexPoly(mask, hulls, ignore_mask_color)
# from Masterfool: use cv2.fillConvexPoly if you know it's convex
masked_image = cv2.bitwise_and(vis, hulls)
cv2.imwrite('img')
#for m in range(len(hulls)):
#masked_image = cv2.bitwise_and(vis, ignore_mask_color)
# save the result
#cv2.imwrite('img'+m, masked_image)
This results:
I need each letter to be cropped using the same hulls. Any help?
You can't crop and directly save the hulls as you can see them in the example you posted. Or, better, you can crop and paste them in a square/rectangle canvas. But it's not the answer you want for this question.
So, if you have all the text which is computer written, best option to begin is to apply cv2.findContours() to the image. There are also other specific tools you can use, but for now (and relatively to this question) use this.
import cv2
import numpy as np
#import image
image = cv2.imread('image.png')
#grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
cv2.imshow('gray', gray)
cv2.waitKey(0)
#binary
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV)
cv2.imshow('second', thresh)
cv2.waitKey(0)
#dilation
kernel = np.ones((1,1), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
cv2.imshow('dilated', img_dilation)
cv2.waitKey(0)
#find contours
im2,ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y:y+h, x:x+w]
# show ROI
#cv2.imshow('segment no:'+str(i),roi)
cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2)
#cv2.waitKey(0)
if w > 15 and h > 15:
cv2.imwrite('roi{}.png'.format(i), roi)
cv2.imshow('marked areas',image)
cv2.waitKey(0)
You can tweak the kernel for more or less wide of the rectangle detection.