I'm trying to read the 213 from this image but i cant even get pytesseract to read everything
Here is my best effort code:
import cv2
import pytesseract
img = cv2.imread('gamepictures/text.png') # Load the image
img = cv2.cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # convert to grey
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 3, 15)
txt = pytesseract.image_to_string(img, config='--psm 6')
print(txt)
cv2.imshow("", img)
cv2.waitKey(0)
I have been trying to change the treshholding algorithm i even tried with canny, but i can't get it to work.
So my questions are how can i read everything?
And how can i only read the 213
image
Something like this works:
import cv2
import pytesseract
img = cv2.imread('gamepictures/text.png') # Load the image
img = img[98:190,6:149,:]
img = cv2.cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # convert to grey
img = cv2.GaussianBlur(img, (5, 5), 3)
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 7, -2)
txt = pytesseract.image_to_string(img, config='--psm 10 -c tessedit_char_whitelist=0123456789')
print(img.shape)
print(txt)
cv2.imshow("", img)
cv2.waitKey(0)
Basically I just sliced the image and played around with the parameters a bit. The GaussianBlur is there to make the image more continuous.
The -c tessedit_char_whitelist=0123456789 is optional and just makes sure that only numbers are read.
I want to recognize a image like this:
I am using the following config:
config="--psm 6 --oem 3 -c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ,."
but when I try to convert that, I get the following:
1581
1
W
I think that the image shows really clearly what is written and think that there is a problem with pytesseract. Can you help?
Preprocessing the image to obtain a binary image before performing OCR seems to work. You could also try to resize the image so that more details would be seen
Results
158.1
1
IT
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Grayscale and Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Perform text extraction
data = pytesseract.image_to_string(thresh, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.waitKey()
I want to solve automatically captchas like this one (all of them with red background and white letters) with Pytesseract
I have been trying processing image to make Pytesseract be able to read it, but no success. Would be great to receive your ideas to process this image. Here my code:
import cv2
import pytesseract
tessdata_dir_config = '--tessdata-dir "C:\\Program Files\\Tesseract-OCR\\tessdata"'
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
img = cv2.imread("captcha.png")
img = cv2.resize(img, None, fx=2, fy=2)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
adaptive = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 85, 20)
print((pytesseract.image_to_string(img, config=tessdata_dir_config)).strip())
print((pytesseract.image_to_string(gray, config=tessdata_dir_config)).strip())
print((pytesseract.image_to_string(adaptive, config=tessdata_dir_config)).strip())
cv2.imshow("Captcha", img) # Output: IMQW
cv2.imshow("Gray", gray) # Output: IMOW
cv2.imshow("Adaptive", adaptive) # Output: IMOW,
cv2.waitKey(7000)
I have a three-step solution
Resize
Closing
Threshold
Step-1: Resize
Resizing the image enables the OCR-algorithm to detect the character or digit strokes in the input image.
Step-2: Closing
Closing is a morphological operation aims to remove the small-holes in the input image.
If we look carefully Q and W characters consists of lots of small holes.
Step-3: Threhsold
We will apply simple-threhsolding to binarize the image. Our aim to remove any leftover artifacts from the image.
Resize
Closing
Threshold
Result:
IMQW
Code:
import cv2
from pytesseract import image_to_string
img = cv2.imread("QUfxY.png")
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(h, w) = gry.shape[:2]
gry = cv2.resize(gry, (w*2, h*2))
cls = cv2.morphologyEx(gry, cv2.MORPH_CLOSE, None)
thr = cv2.threshold(cls, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
txt = image_to_string(thr)
print(txt)
I am using tesseract with python. It recognizes almost all of my images with 2 or more numbers or characters
I don't want to train tesseract with "only digits" because I am recognizing characters too.
But the attached image is not recognized from tessearact
I think the problem is caused by that bold border.
After removing that, the digit got recognized correctly.
Above is the corrected image:
And here's the code if you are interested:
import cv2
import numpy as np
import pytesseract
def discard(image):
image = np.uint8(image)
_, im_label, stts, _ = cv2.connectedComponentsWithStats(image, connectivity=4)
msk1 = np.isin(im_label, np.where(stts[:, cv2.CC_STAT_WIDTH] > 100)[0])
msk2 = np.isin(im_label, np.where(stts[:, cv2.CC_STAT_HEIGHT] > 100)[0])
image[(msk1 | msk2)] = 0
return image
img = cv2.imread("check_img.jpg", 0)
# Binarization
thresh = 255 - img
ret, thresh = cv2.threshold(thresh, 5, 255, cv2.THRESH_BINARY)
# removing long connected-components
thresh = discard(thresh)
# remove noise
thresh = cv2.medianBlur(thresh, 3)
# invert again
thresh = 255 - thresh
# showing the image
cv2.imshow("img", thresh)
# Using Tesseract OCR
custom_config = r'--oem 3 --psm 6'
text = pytesseract.image_to_string(thresh, config=custom_config)
print(text)
cv2.waitKey(0)
I need to use Pytesseract to extract text from this picture:
and the code:
from PIL import Image, ImageEnhance, ImageFilter
import pytesseract
path = 'pic.gif'
img = Image.open(path)
img = img.convert('RGBA')
pix = img.load()
for y in range(img.size[1]):
for x in range(img.size[0]):
if pix[x, y][0] < 102 or pix[x, y][1] < 102 or pix[x, y][2] < 102:
pix[x, y] = (0, 0, 0, 255)
else:
pix[x, y] = (255, 255, 255, 255)
img.save('temp.jpg')
text = pytesseract.image_to_string(Image.open('temp.jpg'))
# os.remove('temp.jpg')
print(text)
and the "temp.jpg" is
Not bad, but the result of print is ,2 WW
Not the right text2HHH, so how can I remove those black dots?
Here's a simple approach using OpenCV and Pytesseract OCR. To perform OCR on an image, its important to preprocess the image. The idea is to obtain a processed image where the text to extract is in black with the background in white. To do this, we can convert to grayscale, apply a slight Gaussian blur, then Otsu's threshold to obtain a binary image. From here, we can apply morphological operations to remove noise. Finally we invert the image. We perform text extraction using the --psm 6 configuration option to assume a single uniform block of text. Take a look here for more options.
Here's a visualization of the image processing pipeline:
Input image
Convert to grayscale -> Gaussian blur -> Otsu's threshold
Notice how there are tiny specs of noise, to remove them we can perform morphological operations
Finally we invert the image
Result from Pytesseract OCR
2HHH
Code
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph open to remove noise and invert image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
invert = 255 - opening
# Perform text extraction
data = pytesseract.image_to_string(invert, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('invert', invert)
cv2.waitKey()
Here is my solution:
import pytesseract
from PIL import Image, ImageEnhance, ImageFilter
im = Image.open("temp.jpg") # the second one
im = im.filter(ImageFilter.MedianFilter())
enhancer = ImageEnhance.Contrast(im)
im = enhancer.enhance(2)
im = im.convert('1')
im.save('temp2.jpg')
text = pytesseract.image_to_string(Image.open('temp2.jpg'))
print(text)
I have something different pytesseract approach for our community.
Here is my approach
import pytesseract
from PIL import Image
text = pytesseract.image_to_string(Image.open("temp.jpg"), lang='eng',
config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789')
print(text)
To extract the text directly from the web, you can try the following implementation (making use of the first image):
import io
import requests
import pytesseract
from PIL import Image, ImageFilter, ImageEnhance
response = requests.get('https://i.stack.imgur.com/HWLay.gif')
img = Image.open(io.BytesIO(response.content))
img = img.convert('L')
img = img.filter(ImageFilter.MedianFilter())
enhancer = ImageEnhance.Contrast(img)
img = enhancer.enhance(2)
img = img.convert('1')
img.save('image.jpg')
imagetext = pytesseract.image_to_string(img)
print(imagetext)
Here is my small advancement with removing noise and arbitrary line within certain colour frequency range.
import pytesseract
from PIL import Image, ImageEnhance, ImageFilter
im = Image.open(img) # img is the path of the image
im = im.convert("RGBA")
newimdata = []
datas = im.getdata()
for item in datas:
if item[0] < 112 or item[1] < 112 or item[2] < 112:
newimdata.append(item)
else:
newimdata.append((255, 255, 255))
im.putdata(newimdata)
im = im.filter(ImageFilter.MedianFilter())
enhancer = ImageEnhance.Contrast(im)
im = enhancer.enhance(2)
im = im.convert('1')
im.save('temp2.jpg')
text = pytesseract.image_to_string(Image.open('temp2.jpg'),config='-c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyz -psm 6', lang='eng')
print(text)
you only need grow up the size of picture by cv2.resize
image = cv2.resize(image,(0,0),fx=7,fy=7)
my picture 200x40 -> HZUBS
resized same picture 1400x300 -> A 1234 (so, this is right)
and then,
retval, image = cv2.threshold(image,200,255, cv2.THRESH_BINARY)
image = cv2.GaussianBlur(image,(11,11),0)
image = cv2.medianBlur(image,9)
and change parameters for enhance results
Page segmentation modes:
0 Orientation and script detection (OSD) only.
1 Automatic page segmentation with OSD.
2 Automatic page segmentation, but no OSD, or OCR.
3 Fully automatic page segmentation, but no OSD. (Default)
4 Assume a single column of text of variable sizes.
5 Assume a single uniform block of vertically aligned text.
6 Assume a single uniform block of text.
7 Treat the image as a single text line.
8 Treat the image as a single word.
9 Treat the image as a single word in a circle.
10 Treat the image as a single character.
11 Sparse text. Find as much text as possible in no particular order.
12 Sparse text with OSD.
13 Raw line. Treat the image as a single text line,
bypassing hacks that are Tesseract-specific.
from PIL import Image, ImageEnhance, ImageFilter
import pytesseract
path = 'hhh.gif'
img = Image.open(path)
img = img.convert('RGBA')
pix = img.load()
for y in range(img.size[1]):
for x in range(img.size[0]):
if pix[x, y][0] < 102 or pix[x, y][1] < 102 or pix[x, y][2] < 102:
pix[x, y] = (0, 0, 0, 255)
else:
pix[x, y] = (255, 255, 255, 255)
text = pytesseract.image_to_string(Image.open('hhh.gif'))
print(text)