I have this image that was cropped from another image and I want to give this image as an input to image_to_string method:
import pytesseract
import cv2
num_plate = cv2.imread('E:\Images\car_plate222.jpeg' , cv2.IMREAD_GRAYSCALE)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
cv2.dilate(num_plate, (15, 15), num_plate)
pytesseract.image_to_string(num_plate)
Here's the photo:
Car Plate:
I used dilation for better performance, but it doesn't give me desired output (Sometimes gives me empty string and sometimes gives me weird output)
Does anybody know what's wrong?
You must threshold the image before passing it to pytesseract. That increases the accuracy.
Here is a sample:
import cv2
import numpy as np
import pytesseract
from PIL import Image
# Grayscale image
img = Image.open('E:\\WorkDir\\KAVSEE\\Python\\test.jpg').convert('L')
ret,img = cv2.threshold(np.array(img), 125, 255, cv2.THRESH_BINARY)
# Older versions of pytesseract need a pillow image
# Convert back if needed
img = Image.fromarray(img.astype(np.uint8))
print(pytesseract.image_to_string(img))
Hope this helps :)
Related
I tried extract numbers from original image https://imgur.com/a/adMaKGy , but with no luck.
Output from pytesseract is: "[a ]:[4] G2):Go] [7 ):Ce J"
Thank you for advice,
My code:
import pytesseract
import cv2
pytesseract.pytesseract.tesseract_cmd = 'folder /tesseract.exe'
img = cv2.imread("folder /test_image.png")
text = pytesseract.image_to_string(img)
print(text)
The README says that OpenCV images are in BGR format and pytesseract assumes RGB format, so you need to convert it
import cv2
img_cv = cv2.imread(r'/<path_to_image>/digits.png')
# By default OpenCV stores images in BGR format and since pytesseract assumes RGB format,
# we need to convert from BGR to RGB format/mode:
img_rgb = cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB)
print(pytesseract.image_to_string(img_rgb))
# OR
img_rgb = Image.frombytes('RGB', img_cv.shape[:2], img_cv, 'raw', 'BGR', 0, 0)
print(pytesseract.image_to_string(img_rgb))
Hi I'm trying to convert a series of images into an array and then convert the RGB to gray scale.
In my work folder I have x number of frames.png, I need to read all this frames in an array and then convert each frame (RGB) to Gray scale.
For one frame my code is:
import numpy as np
import cv2 as cv
from PIL import Image
# Read image
Image = cv.imread('frame0.png')
# RGB to Gray Scale
GS = cv.cvtColor(Image, cv.COLOR_BGR2GRAY)
th, Gray = cv.threshold(GS, 128, 192, cv.THRESH_OTSU)
Any idea?
You can use os to read files from a folder. With the "endswith" function, you can extract file formats and pull them all.
Here is a working code
import numpy as np
import cv2 as cv
import os
for file in os.listdir("/mydir"): # images folder path
if file.endswith((".png",".jpg")): # you can add formats
img = cv.imread(file)
GS = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
th, Gray = cv.threshold(GS, 128, 192, cv.THRESH_OTSU)
cv.imwrite("converted-"+file,Gray)
I am trying to read some characters that come out on the screen, but none of my attempts is successful. Example image here
And here is my code:
import pytesseract as tess
tess.pytesseract.tesseract_cmd = r'C:\Users\myuser\AppData\Local\Tesseract-OCR\tesseract.exe'
from PIL import Image
img = Image.open(r'E:\images\numbers.PNG')
text = tess.image_to_string(img)
print(text)
The "garbage" output that displays is:
C NCES IC DICIIED)
CK STOO TEED
#©O®D#O#O#O#O®
I suppose this is happening because of the color of the numbers, and the different background image they could appear on.
Unfortunately I do not know how to proceed further and how to get it working.
Can you please help? Your assistance is much appreciated!
Thanks!
I don't have Tesseract installed right now but try with the result of this code:
import cv2
img = cv2.imread('img.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 3, 6)
cv2.imshow('threshold', thresh)
cv2.waitKey(0)
You can fine tune it to achieve your result.
I have a input image:
I am normalising the image and then binarising it using OTSU.
import cv2
import numpy as np
import sys
import os
import time
import imutils
import numpy as np
from matplotlib import pyplot as plt
import re
import math
orgImg = cv2.imread(sys.argv[1])
orgHeight,orgWidth,_ = orgImg.shape
image = imutils.resize(orgImg,height=350)
rheight, rwidth,_= image.shape
kernel = np.ones((7,7), np.uint16)
# image = cv2.erode(image, kernel, iterations=1)
image11 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Copy image to new variable
g = image11.copy()
cv2.normalize(g, g, 70, 255, cv2.NORM_MINMAX)
ret,black_mask = cv2.threshold(g,110,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2.imshow("image",black_mask)
cv2.imwrite(sys.argv[1]+"thresh.jpg",black_mask)
cv2.waitKey(0)
The output image is:
In the output there are many jagged edges. How do I remove these edges and get a perfect "Ca"?
Output should be something like this:
P.S. Please note that the output image does not necessarily has to be binary
If you leave out the resize with imutils, the result will look like this:
I'm not sure where the extra black lines in your output image come from, because they are not created when I run you're code directly.
I changed around your code and cleaned it up a bit, it now looks like this:
import cv2
import numpy as np
from PIL import Image
orgImg = cv2.imread("ca.png")
image11 = cv2.cvtColor(orgImg, cv2.COLOR_BGR2GRAY)
cv2.normalize(image11, image11, 70, 255, cv2.NORM_MINMAX)
ret, black_mask = cv2.threshold(image11, 110, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
cv2.imshow("Original image", orgImg)
cv2.imshow("Final image", black_mask)
cv2.waitKey(0)
cv2.imwrite("ca_blackmask.png", black_mask)
I have an image, and from the image I want to extract key and value pair details.
As an example, I want to extract the value of "MASTER-AIRWAYBILL NO:"
I have written to extract the entire text from the image using python opencv and OCR, but I don't have any clue how to extract only the value for "MASTER-AIRWAYBILL NO:" from the entire result text of the image.
Please find the code:
import cv2
import numpy as np
import pytesseract
from PIL import Image
print ("Hello")
src_path = "C:\\Users\Venkatraman.R\Desktop\\alpha_bill.jpg"
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe"
print (src_path)
# Read image with opencv
img = cv2.imread(src_path)
# Convert to gray
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# Write image after removed noise
cv2.imwrite(src_path + "removed_noise.png", img)
# Apply threshold to get image with only black and white
#img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
# Write the image after apply opencv to do some ...
cv2.imwrite(src_path + "thres.png", img)
# Recognize text with tesseract for python
result = pytesseract.image_to_string(Image.open(src_path + "thres.png"))
# Remove template file
#os.remove(temp)
print ('--- Start recognize text from image ---')
print (result)
So output should be like:
MASTER-AIRWAYBILL NO: 157-46637194
You can use pytesseract image_to_string() and a regex to extract the desired text, i.e.:
from PIL import Image
import pytesseract, re
f = "ocr.jpg"
t = pytesseract.image_to_string(Image.open(f))
m = re.findall(r"MASTER-AIRWAYBILL NO: [\d—-]+", t)
if m:
print(m[0])
Output:
MASTER-AIRWAYBILL NO: 157—46637194
I m using python 2.7 and i m also want to finde vendor name from the image
how should i find?
m = re.findall(r"MASTER-AIRWAYBILL NO: [\d—-]+", t)
for the above line its showing error
and if i use m=re.findall(r'Vendor Name:[\d--]+', t) then also its showing error
You can try this after installing tesseract.
from PIL import Image
import pytesseract, re
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
t = pytesseract.image_to_string(Image.open("path"))
m = re.findall(r"Invoice No. [\d—-]+", t)
if m:
print(m[0])