I'm using tesseract as my OCR to output the text from 198 images into a text file. The images can be found here. However, despite my best efforts, the results are poor.
I've followed the official ImproveQuality guidance in that I have already tried all 13 different page segmentation methods, I've tried changing the scale of the images (both up and down), and I've converted the images into a binary threshold before passing them into tesseract, I've dilated the images slightly to remove white space.
The images are already orientated correctly, and there is no border.
Before I attempted to train tesseract (the internet seems torn on this being a good or bad idea) I thought I'd see if there was anything I was missing or doing wrong via Stack.
This is the exact code I'm running on the images found in the link above.
from pytesseract import pytesseract
import cv2 as cv
import glob
path_to_tesseract = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
path_to_images = glob.glob(r"C:\test\*.png")
pytesseract.tesseract_cmd = path_to_tesseract
def t(image):
img = cv.imread(image)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
ret, thresh1 = cv.threshold(gray, 40, 255, cv.THRESH_BINARY)
rect_kernel = cv.getStructuringElement(cv.MORPH_RECT, (12, 12))
dilation = cv.dilate(thresh1, rect_kernel, iterations = 3)
contours, hierarchy = cv.findContours(dilation, cv.RETR_EXTERNAL,cv.CHAIN_APPROX_NONE)
im2 = img.copy()
for cnt in contours:
x, y, w, h = cv.boundingRect(cnt)
# Draw the bounding box on the text area
rect=cv.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Crop the bounding box area
cropped = im2[y:y + h, x:x + w]
# Using tesseract on the cropped image area to get text
# custom_config = r'-l eng+jpn+chin_sim+chin_tra+fil+ind+kor+mal+rus+san --psm 7'
custom_config = r'-l eng --psm 7'
text = pytesseract.image_to_string(cropped, config=custom_config)
# Adding the text to the file
with open('image_to_string.txt','a') as file:
file.write(text)
# Close the file
file.close
And this is my output:
SPACE ENEMY(???)
L� Storm(???)
Kestrall>??)
sHONK: (77?)
Gatzkol???)
HEA? PP]
Gatzkol???)
Shoesss(???)
Admiral Coll???)
Hraderik(>??)
OWWRO(???)
Xaol???)
Eeoral???)
Frchy[???]
L� Archon(???)
�� Red Leader }3(???)
ta eae
MurderousErickal???)
Dupao-Vincel???)
Nicer_Dicer(???)
Space ranger?7[???)
Joarasik(???)
M.Gracia=O20[???)
Faspywntaenbesel(???)
COCOVEL(???)
GliuGl???)
Jose.nik(???)
SRRESF??P)
Kato Sikarius(???)
Gulford(???)
HBS???)
Shinekul???)
fag???)
rirarumpall???)
DudeliciOus(???)
FORE| Randgriorx(???)
Masked Darkness(>??)
Ahatvr(???)
Ugo.S[???)
LLL the Reaper(???)
aulohh(???)
Frimo Lupus(???)
Kaml7(???)
Ruds3{???)
ddaa67[???)
Lecroy606=(??7)
DNAfarm(???)
SRYCUAL? 22)
Malkyoril???)
Stok-Ed[???)
SEalths(???)
PHO?)
Aipex(???)
[VA] Aimino*[???)
HEYDOR(???)
HUU(F77)
xFlyingDuck[???)
Chuchler(???)
Than Th�ng(???)
Robert.Hul???)
DeadFixell>?>?)
Lovely deuteron(???)
Ribiribick[(???)
e1 BS???)
Shylock75(??7)
AMBUS(?PF)
Mall???)
xDuckyDuc(???)
Keepsi(???)
Montferrand[(>??)
KAIKG[???)
LoneRaptor(???)
stephanfritzell(???)
HYSELUFE(F >?)
Crazy Sugoi_YT[???)
OR???)
Gearworks[???)
Bese Rel? PP]
Ravenous Firefly(???)
JrohTheJolly(???)
Starman Smith(???)
Golactic-ALIEN(???)
Six(???)
/* Sand(???) |
Canto de Yemanjal???)
Lauuslliat(???)
Jakhammer(???)
Gormengast(???)
Soadhiro(???)
Lollita(???)
DigON(???)
Lucid Alien(???)
Dopper(???)
WHA Bl???)
Yukaze(???)
Jean???)
WHEE???)
see Si???)
Crapannr(???)
Maccinael>??)
Moomoo=Ihi[???)
% GREY WOLF #[???)
faljur(???)
Koboo(???)
Zombie Wolf(>??)
Gldman�i[???)
OBdoodlal???)
siug(???)
-Ppugan-(???)
FREI???)
Kattlinal???)
SRO? PP)
Drama Llamal???
ManapTretal???)
Shepard Lochel???)
CKa1E(??7)
L* Now???)
Firestarter(???)
Sweet Angel 666(7?7)
Coco Solo(???)
Lavender Town(???)
Lovaly triton(???]
BEhibachil???)
L� Fengsool???)
See???)
MoSterEhiald(???)
RAHIME(???)
James(???)
EByFreezel(???)
INSIDER(???)
EByFreezel(???)
susissorglos(???)
Entaroll>?>?)
Mason Escher(???)
FGT_[???)
L� Galactical???)
Eghk(???)
Hans Oktanel[>??)
Human Fly(???)
Unique eXe[???)
DURA FRSA PPP)
Ariokaar(???)
NoBecTBobstensl(???)
BALD DFP?)
L* Ganil???)
Mifray[???)
L� Timeworld(???)
Xesiest(???)
Dark Moon_[(???]
Xesiest(???)
Se ARAOAl>??)
HiimCoconut(???)
CRHAAAAA(???)
EA???)
REA RBAM? PP)
Be BOO???)
Hostile Target(???)
Lantian(???)
LucasS1��[(>??)
ScorpionMlE 7???)
-Arcadia-[???]
Incredible Axis(>>>)
�co cai) (P77)
Failname[???)
TonyTchopper[???)
MasterJedil[>>?)
TonyTchopper[???)
WG???)
DRAGON(???)
Roryt(???)
SlayingMantis(???)
SlayingMantis(???)
TiMeZEuRn(???)
TiMeZEuRn(???)
TiMeZEuRn(???)
Tezzarl(???)
JobForThosel???)
Wraithbourne[>??)
Choosdracan(???)
Spahrep(???)
EBsoumSS[(???)
Bossdread(???)
Capt EO[???)
Cybiz(???)
Atomic Samurail???)
L�herbe faux[???)
Baby YodatheEased(???)
Eeltzel???)
EeltzeBuBil???)
EVA Starwalker[???)
MightyWombat(???)
Moxas(???)
orchidlougR (7???)
RaptorOnal???)
KA__Suigetsul???)
Related
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
image_path='kyc_sample1.jpg'
plot_flag=True
save_output=True
out_folder='outs'
os.makedirs(out_folder,exist_ok=True)
image=cv2.imread(image_path)
def plot(image,cmap=None):
plt.figure(figsize=(15,15))
plt.imshow(image,cmap=cmap)
if plot_flag:
plot(image)
gray_scale=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
th1,img_bin = cv2.threshold(gray_scale,150,225,cv2.THRESH_BINARY)
img_bin=~img_bin
if plot_flag:
plot(img_bin,'gray')
if save_output:
cv2.imwrite(os.path.join(out_folder,f'bin_{image_path}'),img_bin)
line_min_width = 15
kernal_h = np.ones((1,line_min_width), np.uint8)
kernal_v = np.ones((line_min_width,1), np.uint8)
img_bin_h = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernal_h)
if plot_flag:
plot(img_bin_h,'gray')
if save_output:
cv2.imwrite(os.path.join(out_folder,f'hor_{image_path}'),img_bin_h)
img_bin_v = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernal_v)
if plot_flag:
plot(img_bin_v,'gray')
if save_output:
cv2.imwrite(os.path.join(out_folder,f'ver_{image_path}'),img_bin_v)
img_bin_final = img_bin_h | img_bin_v
if plot_flag:
plot(img_bin_final, 'gray')
if save_output:
cv2.imwrite(os.path.join(out_folder, f'merge_{image_path}'), img_bin_final)
def imshow_components(labels):
### creating a hsv image, with a unique hue value for each label
label_hue = np.uint8(179*labels/np.max(labels))
### making saturation and volume to be 255
empty_channel = 255*np.ones_like(label_hue)
labeled_img = cv2.merge([label_hue, empty_channel, empty_channel])
### converting the hsv image to BGR image
labeled_img = cv2.cvtColor(labeled_img, cv2.COLOR_HSV2BGR)
labeled_img[label_hue==0] = 0
### returning the color image for visualising Connected Componenets
return labeled_img
def detect_box(image,line_min_width=15):
gray_scale=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
th1,img_bin = cv2.threshold(gray_scale,150,225,cv2.THRESH_BINARY)
kernal6h = np.ones((1,line_min_width), np.uint8)
kernal6v = np.ones((line_min_width,1), np.uint8)
img_bin_h = cv2.morphologyEx(~img_bin, cv2.MORPH_OPEN, kernal6h)
img_bin_v = cv2.morphologyEx(~img_bin, cv2.MORPH_OPEN, kernal6v)
img_bin_final=img_bin_h|img_bin_v
final_kernel = np.ones((3,3), np.uint8)
img_bin_final=cv2.dilate(img_bin_final,final_kernel,iterations=1)
ret, labels, stats,centroids = cv2.connectedComponentsWithStats(~img_bin_final, connectivity=8, ltype=cv2.CV_32S)
return stats,labels
image_path='kyc_sample1.jpg'
image=cv2.imread(image_path)
stats,labels=detect_box(image)
cc_out=imshow_components(labels)
for x,y,w,h,area in stats:
cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),1)
if plot_flag:
plot(cc_out)
plot(image)
if save_output:
cv2.imwrite(os.path.join(out_folder,f'cc_{image_path}'),cc_out)
cv2.imwrite(os.path.join(out_folder,f'out_{image_path}'),image)
Required solution: PAN: 577634563744, Name: Mr William Smith Jons
Is there any solution to ocr the text and get the required solution mentioned above from the image. The required solution is to get the text which has the bounding box with their labels. Is there any solution using tesseract to extract text which has bounding box and combine letter has Mr William Smith Jons
I am trying to get specific information from a bill. I have used ocr till now and OpenCV and here are the results:
import cv2
import pytesseract
import numpy as np
image = cv2.imread('1.png')
# get grayscale image
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# noise removal
def remove_noise(image):
return cv2.medianBlur(image,5)
#thresholding
def thresholding(image):
return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
#dilation
def dilate(image):
kernel = np.ones((5,5),np.uint8)
return cv2.dilate(image, kernel, iterations = 1)
#erosion
def erode(image):
kernel = np.ones((5,5),np.uint8)
return cv2.erode(image, kernel, iterations = 1)
#opening - erosion followed by dilation
def opening(image):
kernel = np.ones((5,5),np.uint8)
return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
#canny edge detection
def canny(image):
return cv2.Canny(image, 100, 200)
#skew correction
def deskew(image):
coords = np.column_stack(np.where(image > 0))
angle = cv2.minAreaRect(coords)[-1]
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return rotated
#template matching
def match_template(image, template):
return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
gray = get_grayscale(image)
thresh = thresholding(gray)
opening = opening(gray)
canny = canny(gray)
cv2.imshow('res', gray)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Adding custom options
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
custom_config = r'--oem 3 --psm 6'
pytesseract.image_to_string(gray, config=custom_config)
the output I got was
Out[9]: 'aso en bosaanes sosesoen\nSee arr ee\n[internationale Spedition “works carrier:\nree Meese
Eaton oro\nSE Eesn Srey alata ascea\ntay See eae ror\nTBlaecaseew £2 saserzaz9gn [acs Sue Saeeats
Arve\noricore toptetschlBve ta\nbares eye creat tere\nLene et aan Ease\ncoon soos\nreaee\nbenenter
petachand AiG & co. x8\nese See ete Fests\nsee Sse\npearson | |\nen 7\nFeanséurt an main bawegoansn
|\npe |\nsor per tantace e/ear0003537\nEl = T=] | = [== |\nSta psa a4 fonstsanern\nLerper
atcnen\nwe\n20 ocd hoes ale 22ers wf\n30 ped londed on pwc aoasonnr #0\n35 ped londed on pwc 2008es00
#0\n64 pcs loaded on| PMC BO3BBART MD &\n[ental — |\n=\n|\nSJ |] Spscrinan copnapen as wtshan momen
ante\nart veins otetrich cata 60. RAS sem\n[re ote\n[\\gesoago |__| tars ena Detrich ea\nTon anine
Setrion cn a co. eta a5 scan\nSS aan ee ee\nee eS] -
esemen\ncision\n\x0c'
I need specific information only like the name, shipping address, quantity, etc, and not all the characters. Also, the output is all mashed up. Can anyone please help me with this? any code or any other help would be appreciated.
You can use pytesseract.image_to_pdf_or_hocr(), choosing hocr as output format. This will contain bounding boxes on the character, word, and line level.
I have the picture below used in Tesseract OCR:
My code to process the picture is:
# HOCR
with image[450:6200, 840:3550] as cropped:
imgPage = wi(image = cropped)
imageBlob = imgPage.make_blob('png')
horas = gerarHocr(imageBlob)
def gerarHocr(imageBlob):
image = Image.open(io.BytesIO(imageBlob))
markup = pytesseract.image_to_pdf_or_hocr(image, lang='por', extension='hocr', config='--psm 6')
soup = BeautifulSoup(markup, features='html.parser')
spans = soup.find_all('span', {'class' : 'ocrx_word'})
listHoras = []
...
return listHoras
Although my OCR is getting sometimes confused and duplicating 8 with 3 and returning 07:44/14:183 instead of 07:44/14:13 for example.
I think if I remove the grey lines using Wand I improve the confidence of the OCR.
How do I do that, please?
Thank you,
If the system is using ImageMagick-6, you can call Image.threshold(), but might need to remove the transparency first.
with Image(filename='PWILE.png') as img:
img.background_color = 'WHITE'
img.alpha_channel = False
img.threshold(threshold=0.5)
img.save(filename='output_threshold.png')
If you're using ImageMagick-7 (anything above version 7.0.8-41), then Image.auto_threshold() will work.
with Image(filename='support/PWILE.png') as img:
img.auto_threshold(method='otsu')
I would use cv2 and/or numpy.array
to convert light gray colors to white
img[ img > 128 ] = 255
to convert dark gray colors to black
img[ img < 128 ] = 0
import cv2
folder = '/home/user/images/'
# read it
img = cv2.imread(folder + 'old_img.png')
# convert ot grayscale
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# reduce colors
img[ img > 128 ] = 255
img[ img < 128 ] = 0
# save it
cv2.imwrite(folder + 'new_img.png', img)
# display result
#cv2.imshow('window', img)
#cv2.waitKey(0) # press any key in window to close it
#cv2.destroyAllWindows()
Result
I am trying to perform edge detection for my images of soil grains using holistically nested edge detection method HED as shown however when using combined fine and coarse soil grains , the region of fine particles is not clear so I suggest making image convolution by cutting the image into smaller rectangular areas in both directions and make HED for every portion of image and store them to black copy image so as to add the edged portions to this image .
I faced an error after repeating the algorithm of HED in a for loop by dividing the width of image to 5 portions and the height to 4 portions but I can't fix that error .
Here is the algorithm used
# import the necessary packages
import argparse
import cv2
import os
import easygui
path = easygui.fileopenbox()
print(path)
hdir = os.path.dirname(path)
print(hdir)
hfilename = os.path.basename(path)
print(hfilename)
hname = os.path.splitext(hfilename)[0]
print(hname)
houtname = hname+"_out.jpg"
print(houtname)
hout = os.path.sep.join([hdir,houtname])
print(hout)
# # construct the argument parser and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-d", "--edge-detector", type=str, required=True,
# help="path to OpenCV's deep learning edge detector")
# ap.add_argument("-i", "--image", type=str, required=True,
# help="path to input image")
# args = vars(ap.parse_args())
class CropLayer(object):
def __init__(self, params, blobs):
# initialize our starting and ending (x, y)-coordinates of
# the crop
self.startX = 0
self.startY = 0
self.endX = 0
self.endY = 0
def getMemoryShapes(self, inputs):
# the crop layer will receive two inputs -- we need to crop
# the first input blob to match the shape of the second one,
# keeping the batch size and number of channels
(inputShape, targetShape) = (inputs[0], inputs[1])
(batchSize, numChannels) = (inputShape[0], inputShape[1])
(H, W) = (targetShape[2], targetShape[3])
# compute the starting and ending crop coordinates
self.startX = int((inputShape[3] - targetShape[3]) / 2)
self.startY = int((inputShape[2] - targetShape[2]) / 2)
self.endX = self.startX + W
self.endY = self.startY + H
# return the shape of the volume (we'll perform the actual
# crop during the forward pass
return [[batchSize, numChannels, H, W]]
def forward(self, inputs):
# use the derived (x, y)-coordinates to perform the crop
return [inputs[0][:, :, self.startY:self.endY,
self.startX:self.endX]]
# load our serialized edge detector from disk
print("[INFO] loading edge detector...")
fpath = os.path.abspath(__file__)
fdir = os.path.dirname(fpath)
print(fdir)
protoPath = os.path.sep.join([fdir,"hed_model", "deploy.prototxt"])
print(protoPath)
modelPath = os.path.sep.join([fdir,"hed_model","hed_pretrained_bsds.caffemodel"])
print(modelPath)
net = cv2.dnn.readNetFromCaffe(protoPath, modelPath)
# register our new layer with the model
cv2.dnn_registerLayer("Crop", CropLayer)
# load the input image and grab its dimensions
image = cv2.imread('D:\My work\MASTERS WORK\GSD files\Sample E photos\SampleE_#1_26pxfor1mm.jpg')
im_copy = image.copy()*0
(H, W) = image.shape[:2]
# print(image.shape[:2])
# image.shape[:2] =(H*3, W*3)
# image = cv2.resize(image,0.5)
h=0
w=0
for m in range(0,H ,int(H/5)):
for n in range(0,W,int(W/3)):
gray = image[h:m,w:n]
# convert the image to grayscale, blur it, and perform Canny
# edge detection
print("[INFO] performing Canny edge detection...")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
canny = cv2.Canny(blurred, 30, 150)
# construct a blob out of the input image for the Holistically-Nested
# Edge Detector
# cc = cv2.cvtColor(canny, cv2.COLOR_GRAY2BGR)
# image = image+cc
# mean = (104.00698793, 116.66876762, 122.67891434),
blob = cv2.dnn.blobFromImage(image, scalefactor=1.0, size=((m-h), (n-w)),
# mean=(230, 120, 50),
mean=(104.00698793, 116.66876762, 122.67891434),
swapRB=False, crop=False)
print( blob)
cv2.waitKey(0)
# set the blob as the input to the network and perform a forward pass
# to compute the edges
print("[INFO] performing holistically-nested edge detection...")
net.setInput(blob)
hed = net.forward()
hed = cv2.resize(hed[0, 0], ((m-h), (n-w)))
hed = (255 * hed).astype("uint8")
# Adding the edge detection for each portion to the copy image as follows
im_copy = im_copy + hed
h+=int(H/5)
w+=int(W/4)
# show the output edge detection results for Canny and
# Holistically-Nested Edge Detection
cv2.imshow("Input", image)
cv2.imshow("Canny", canny)
cv2.imshow("HED", hed)
cv2.waitKey(0)
cv2.imshow('Frame ',im_copy)
cv2.imwrite(hout, im_copy)
cv2v2.waitKey(0)
I then use this edged image in further analysis on the image .
The error I got using the algorithm
net = cv2.dnn.readNetFromCaffe(protoPath, modelPath)
cv2.error: OpenCV(4.1.1) C:\projects\opencv-python\opencv\modules\dnn\src\caffe\caffe_io.cpp:1121: error: (-2:Unspecified error) FAILED: fs.is_open(). Can't open "D:\My work\MASTERS WORK\hed_model\deploy.prototxt" in function 'cv::dnn::ReadProtoFromTextFile'
I'm creating a function (in Python) that expects/receives a single image of multiple human faces in it, and returns multiple smaller images (one image per human face). I am able to do a cv2.imshow inside the function and see the expected smaller images, but when I attempt a cv2.imshow from outside the function, it does not work (unable to see the smaller image, and get a TypeError instead). Would appreciate some guidance.
def stills(user_image):
#sub_frames = []
fqp_image_src = (user_image)
raw_pic = cv2.imread(fqp_image_src)
mpic = cv2.resize(raw_pic,(0,0), fx=0.30, fy=0.30)
mpic_rgb = cv2.cvtColor(mpic, cv2.COLOR_BGR2RGB)
face_boxes = haar_cascade_face.detectMultiScale(mpic_rgb, scaleFactor = 1.2, minNeighbors = 5)
count = int(len(face_boxes))
for i in range(count):
face_box = face_boxes[i]
final = cv2.rectangle(mpic, (face_box[0], face_box[1]), ((face_box[0]+face_box[2]),(face_box[1]+face_box[3])), (0,255,0),2)
sub_frame = final[face_box[1]:(face_box[1]+face_box[3]), face_box[0]:(face_box[0]+face_box[2])]
#sub_frames.append(sub_frame)
cv2.imshow('frame', sub_frame) # this works
cv2.waitKey()
return (sub_frame, final)
# calling the function
something = stills("abc.jpg")
cv2.imshow('frame',something) # this does not work
cv2.waitKey()
TypeError: Expected cv::UMat for argument 'mat'
This will do what you expected, just whit some simplification and with full file paths
.
One of the key erros was give detectMultiScale a colored image, the imput shuld have 1 dimension, with brigtness (gray scales).
In order to display a colored image with the faces in a box a copy of the image is needed to convert into gar scales and detect, giving coordenates to draw in the colored image.
import cv2
import os
# Take as a global the dir in witch is this file
PATH = os.path.dirname(os.path.abspath(__file__))
haar_cascade_face = cv2.CascadeClassifier(os.path.join(PATH, 'haarcascade_frontalface_alt.xml'))
def stills(user_image):
image = os.path.join(PATH, user_image)
image = cv2.imread(image)
image = cv2.resize(image, (0, 0), fx=0.30, fy=0.30)
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
face_boxes = haar_cascade_face.detectMultiScale(gray_image, scaleFactor=1.073, minNeighbors=8)
final = image # make the funtion alwais give a image
sub_frames = []
# Check if there are faces
if len(face_boxes) > 0:
for x, y, w, h in face_boxes:
final = cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)
sub_frame = image[y:y+h, x:x+w]
sub_frames.append([x, y, x+w, y+h])
cv2.imshow('sub_frame', sub_frame)
# cv2.waitKey() # No need to wait the user
else:
print('No faces found')
return (sub_frames, final)
if __name__ == '__main__':
fragments, final = stills("abc.jpg")
cv2.imshow('frame', final)
cv2.waitKey()