I have an image not of much good quality with a single letter in it. I need to extract the value from this
I tried doing this with open CV. the code works on good quality image but need help to extract from this image
from PIL import Image
import pytesseract
import argparse
import os
import cv2
import numpy as np
img = cv2.imread(r"/home/ubuntu/xyz/xyz.jpg")
img = cv2.resize(img, None, fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
img = cv2.GaussianBlur(img, (5, 5), 0)
img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)\[1\]
# Save the filtered image
cv2.imwrite(r"/home/ubuntu/xyz/rr.jpg", img)
# Read text with tesseract for python
result = pytesseract.image_to_string(img, lang="eng")
result
why u need Gaussian Blur in this situation
img = cv2.GaussianBlur(img, (5, 5), 0)
with a big window (5,5)
I think you can make a white border outside instead of resizing the image,
and you may use erosion technical to remove the noise from image
Related
I would like to read this captcha using pytesseract:
I follow the advice here: Use pytesseract OCR to recognize text from an image
My code is:
import pytesseract
import cv2
def captcha_to_string(picture):
image = cv2.imread(picture)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph open to remove noise and invert image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
invert = 255 - opening
cv2.imwrite('thresh.jpg', thresh)
cv2.imwrite('opening.jpg', opening)
cv2.imwrite('invert.jpg', invert)
# Perform text extraction
text = pytesseract.image_to_string(invert, lang='eng', config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789')
return text
But my code returns 8\n\x0c which is nonsense.
This is how thresh looks like:
This is how opening looks like:
This is how invert looks like:
Can you help me, how can I improve captcha_to_string function to read the captcha properly? Thanks a lot.
You are on the right way. Removing the noise (small black spots in the inverted image) looks like the way to extract the text successfully.
FYI, the configuration of pytessearct makes the outcome worse only. So, I removed it.
My approach is as follows:
import pytesseract
import cv2
import matplotlib.pyplot as plt
import numpy as np
def remove_noise(img,threshold):
"""
remove salt-and-pepper noise in a binary image
"""
filtered_img = np.zeros_like(img)
labels,stats= cv2.connectedComponentsWithStats(img.astype(np.uint8),connectivity=8)[1:3]
label_areas = stats[1:, cv2.CC_STAT_AREA]
for i,label_area in enumerate(label_areas):
if label_area > threshold:
filtered_img[labels==i+1] = 1
return filtered_img
def preprocess(img_path):
"""
convert the grayscale captcha image to a clean binary image
"""
img = cv2.imread(img_path,0)
blur = cv2.GaussianBlur(img, (3,3), 0)
thresh = cv2.threshold(blur, 150, 255, cv2.THRESH_BINARY_INV)[1]
filtered_img = 255-remove_noise(thresh,20)*255
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
erosion = cv2.erode(filtered_img,kernel,iterations = 1)
return erosion
def extract_letters(img):
text = pytesseract.image_to_string(img)#,config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789')
return text
img = preprocess('captcha.jpg')
text=extract_letters(img)
print(text)
plt.imshow(img,'gray')
plt.show()
This is the processed image.
And, the script returns 18L9R.
I'm trying to read some alphanumerical strings in python with pytesseract.
I pre-process the images to reduce noise and make them black and white, but I consistently have issues reading the digits inside the string.
original:
after cleanup:
Extracted text: WISOMW
Code used:
def convert(path):
image = cv2.imread(path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3, 3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
invert = 255 - thresh
cv2.imwrite("processed.jpg", invert)
# Perform text extraction
return pytesseract.image_to_string(invert, config="--psm 7")
I've tried different configuration options for tesseract:
oem: tried 1, 3
psm: tried different modes
tessedit_char_whitelist: limited to alphanumerical characters
I feel I'm missing something obvious given that it reliably reads the alpha characters. Any ideas of what can it be?
You were so close. A dilate helps increase white/decrease black. The resolution is low, so a small kernel is used for dilate. If you remove the _INV from your threshold step, you don't need to do another inversion.
import cv2
import numpy as np
import pytesseract
img = cv2.imread('wis9mw.jpg', cv2.IMREAD_GRAYSCALE )
img = cv2.GaussianBlur(img, (3, 3), 0)
img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
kernel = np.ones((1,1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
cv2.imwrite('processed.jpg', img)
text = pytesseract.image_to_string(img, config="--psm 6")
print(text)
gives
WIS9MW
I am trying to extract date from an image, but it is not working. I have more images with dates written in different colour.I have tried few preprocessing techniques like adaptive thresholding, erosion, dilation etc.
def cropright(img):
(h, w) = img.shape[:2]
crp = img[h-60:h, int((4*w)/7):w]
crp = cv2.resize(crp, (0, 0), fx=5, fy=5,interpolation=cv2.INTER_CUBIC)
return(crp)
def extract_text(img):
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
plt.imshow(img)
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
plt.imshow(img)
img = cv2.GaussianBlur(img, (5, 5), 0)
plt.imshow(img)
text = pytesseract.image_to_string(img,lang='eng')
return text
file = 'test.jpg'
img = cv2.imread("Request/" + file)
img = cropright(img)
plt.imshow(img)
text2 = extract_text(img)
print(text2)
Here is the image.I have more images with date written in different colours, so I need to develop a solution which will automatically work for all them
Image
I think by converting the image into gray scale you should be able to extract the date from the image irrespective of which color It is written in. I created an InstaFilters application that can apply filters to your images. The image is from that Web App. You can access it at https://share.streamlit.io/arkalsekar/instafilters/main/app.py
The code to apply a grayscale filter can be found on Github in the filters file:
https://github.com/arkalsekar/instafilters
You can even try these ones and believe that this should work. If it doesn't work then you can read this amazing post with some more intersting preprocessing techniques for OCR.
I am trying to run OCR on set of images that are similar but can vary in size. For some reason I cannot get a predictable result. Is there anything I can do do get better results.
Tesseract with or without cv2 preprocessing works beautifully on some images and fails on some and there is no pattern. Images are more or less similar.
Upper image represents processed image
def filter_img(img):
# Read pil image as cv2
img = np.array(img)
img = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
# Converting image to grayscale (important for applying threshold)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
# img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# Apply blur to smooth out the edges
img = cv2.GaussianBlur(img, (5, 5), 0)
# img = cv.medianBlur(img,5)
# Apply threshold to get image with only b&w (binarization)
img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
img = Image.fromarray(img)
img = ImageOps.expand(img,border=2,fill='black')
visualize.show_labeled_image(img,boxes)
return img
# Applying Tesseract OCR
def run_tesseract(img):
# Tesseract cmd setup
# pytesseract.pytesseract.tesseract_cmd = "tesseract"
whitelist = string.ascii_uppercase + string.digits + ".-"
parameters = '-c load_freq_dawg=0 -c tessedit_char_whitelist="{}"'.format(whitelist)
psm = 8
custom_oem_psm_config = "--dpi 300 --oem 3 --psm {psm} {parameters}".format(parameters=parameters, psm=psm)
try:
text = pytesseract.image_to_string(img, config=custom_oem_psm_config, timeout=2)
return text.strip()
except RuntimeError:
print ("TIMEOUT")
return ""
If your image format is highly consistent, you might consider using split images. And after ocr the image, use conditional judgments on the first letter or number for error-prone areas, such as 0 and O are confusing. Of course, all of the above is only valid if the image is highly consistent.
enter code here
import cv2
import numpy as np
import pytesseract
import matplotlib.pyplot as plt
pytesseract.pytesseract.tesseract_cmd = 'D://Program Files/Tesseract-
OCR/tesseract.exe'
img = cv2.imread('vATKQ.png')
img2 = img[100:250, 180:650] #split to region you want
plt.imshow(img2)
text=pytesseract.image_to_string(img2)
print(text)
I must write the program to detect width of object. I understand that without reference object it will be expressed in pixels but it's enough for me. The background will always be white. I have problem what i should to do right now.
I will be sow greatfull for Your help !
enter image description here
import numpy as np
import imutils
import cv2
import math
# Function to show array of images (intermediate results)
def show_images(images):
for i, img in enumerate(images):
cv2.imshow("image_" + str(i), img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Read image and preprocess
image = cv2.imread('44.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9, 9), 0)
edged = cv2.Canny(blur, 50, 100)
edged = cv2.dilate(edged, None, iterations=1)
edged = cv2.erode(edged, None, iterations=1)
show_images([blur, edged])
#show_images([cnts, edged])
Welcome to Stack Overflow!
Since your're using OpenCV, finding the image dimensions is as simple as the code below.
import numpy as np
import cv2
img = cv2.imread('image.png')
dimension = img.shape
height = img.shape[0]
width = img.shape[1]
channels = img.shape[2]
Read more about this here: