Extract only specific information using OCR and OpenCV

Extract only specific information using OCR and OpenCV - python

I am trying to get specific information from a bill. I have used ocr till now and OpenCV and here are the results:
import cv2
import pytesseract
import numpy as np
image = cv2.imread('1.png')
# get grayscale image
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# noise removal
def remove_noise(image):
return cv2.medianBlur(image,5)
#thresholding
def thresholding(image):
return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
#dilation
def dilate(image):
kernel = np.ones((5,5),np.uint8)
return cv2.dilate(image, kernel, iterations = 1)
#erosion
def erode(image):
kernel = np.ones((5,5),np.uint8)
return cv2.erode(image, kernel, iterations = 1)
#opening - erosion followed by dilation
def opening(image):
kernel = np.ones((5,5),np.uint8)
return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
#canny edge detection
def canny(image):
return cv2.Canny(image, 100, 200)
#skew correction
def deskew(image):
coords = np.column_stack(np.where(image > 0))
angle = cv2.minAreaRect(coords)[-1]
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return rotated
#template matching
def match_template(image, template):
return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
gray = get_grayscale(image)
thresh = thresholding(gray)
opening = opening(gray)
canny = canny(gray)
cv2.imshow('res', gray)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Adding custom options
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
custom_config = r'--oem 3 --psm 6'
pytesseract.image_to_string(gray, config=custom_config)
the output I got was
Out[9]: 'aso en bosaanes sosesoen\nSee arr ee\n[internationale Spedition “works carrier:\nree Meese
Eaton oro\nSE Eesn Srey alata ascea\ntay See eae ror\nTBlaecaseew £2 saserzaz9gn [acs Sue Saeeats
Arve\noricore toptetschlBve ta\nbares eye creat tere\nLene et aan Ease\ncoon soos\nreaee\nbenenter
petachand AiG & co. x8\nese See ete Fests\nsee Sse\npearson | |\nen 7\nFeanséurt an main bawegoansn
|\npe |\nsor per tantace e/ear0003537\nEl = T=] | = [== |\nSta psa a4 fonstsanern\nLerper
atcnen\nwe\n20 ocd hoes ale 22ers wf\n30 ped londed on pwc aoasonnr #0\n35 ped londed on pwc 2008es00
#0\n64 pcs loaded on| PMC BO3BBART MD &\n[ental — |\n=\n|\nSJ |] Spscrinan copnapen as wtshan momen
ante\nart veins otetrich cata 60. RAS sem\n[re ote\n[\\gesoago |__| tars ena Detrich ea\nTon anine
Setrion cn a co. eta a5 scan\nSS aan ee ee\nee eS] -
esemen\ncision\n\x0c'
I need specific information only like the name, shipping address, quantity, etc, and not all the characters. Also, the output is all mashed up. Can anyone please help me with this? any code or any other help would be appreciated.

You can use pytesseract.image_to_pdf_or_hocr(), choosing hocr as output format. This will contain bounding boxes on the character, word, and line level.

Related

How do I address poor tesseract output despite clear images

I'm using tesseract as my OCR to output the text from 198 images into a text file. The images can be found here. However, despite my best efforts, the results are poor.
I've followed the official ImproveQuality guidance in that I have already tried all 13 different page segmentation methods, I've tried changing the scale of the images (both up and down), and I've converted the images into a binary threshold before passing them into tesseract, I've dilated the images slightly to remove white space.
The images are already orientated correctly, and there is no border.
Before I attempted to train tesseract (the internet seems torn on this being a good or bad idea) I thought I'd see if there was anything I was missing or doing wrong via Stack.
This is the exact code I'm running on the images found in the link above.
from pytesseract import pytesseract
import cv2 as cv
import glob
path_to_tesseract = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
path_to_images = glob.glob(r"C:\test\*.png")
pytesseract.tesseract_cmd = path_to_tesseract
def t(image):
img = cv.imread(image)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
ret, thresh1 = cv.threshold(gray, 40, 255, cv.THRESH_BINARY)
rect_kernel = cv.getStructuringElement(cv.MORPH_RECT, (12, 12))
dilation = cv.dilate(thresh1, rect_kernel, iterations = 3)
contours, hierarchy = cv.findContours(dilation, cv.RETR_EXTERNAL,cv.CHAIN_APPROX_NONE)
im2 = img.copy()
for cnt in contours:
x, y, w, h = cv.boundingRect(cnt)
# Draw the bounding box on the text area
rect=cv.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Crop the bounding box area
cropped = im2[y:y + h, x:x + w]
# Using tesseract on the cropped image area to get text
# custom_config = r'-l eng+jpn+chin_sim+chin_tra+fil+ind+kor+mal+rus+san --psm 7'
custom_config = r'-l eng --psm 7'
text = pytesseract.image_to_string(cropped, config=custom_config)
# Adding the text to the file
with open('image_to_string.txt','a') as file:
file.write(text)
# Close the file
file.close
And this is my output:
SPACE ENEMY(???)
L� Storm(???)
Kestrall>??)
sHONK: (77?)
Gatzkol???)
HEA? PP]
Gatzkol???)
Shoesss(???)
Admiral Coll???)
Hraderik(>??)
OWWRO(???)
Xaol???)
Eeoral???)
Frchy[???]
L� Archon(???)
�� Red Leader }3(???)
ta eae
MurderousErickal???)
Dupao-Vincel???)
Nicer_Dicer(???)
Space ranger?7[???)
Joarasik(???)
M.Gracia=O20[???)
Faspywntaenbesel(???)
COCOVEL(???)
GliuGl???)
Jose.nik(???)
SRRESF??P)
Kato Sikarius(???)
Gulford(???)
HBS???)
Shinekul???)
fag???)
rirarumpall???)
DudeliciOus(???)
FORE| Randgriorx(???)
Masked Darkness(>??)
Ahatvr(???)
Ugo.S[???)
LLL the Reaper(???)
aulohh(???)
Frimo Lupus(???)
Kaml7(???)
Ruds3{???)
ddaa67[???)
Lecroy606=(??7)
DNAfarm(???)
SRYCUAL? 22)
Malkyoril???)
Stok-Ed[???)
SEalths(???)
PHO?)
Aipex(???)
[VA] Aimino*[???)
HEYDOR(???)
HUU(F77)
xFlyingDuck[???)
Chuchler(???)
Than Th�ng(???)
Robert.Hul???)
DeadFixell>?>?)
Lovely deuteron(???)
Ribiribick[(???)
e1 BS???)
Shylock75(??7)
AMBUS(?PF)
Mall???)
xDuckyDuc(???)
Keepsi(???)
Montferrand[(>??)
KAIKG[???)
LoneRaptor(???)
stephanfritzell(???)
HYSELUFE(F >?)
Crazy Sugoi_YT[???)
OR???)
Gearworks[???)
Bese Rel? PP]
Ravenous Firefly(???)
JrohTheJolly(???)
Starman Smith(???)
Golactic-ALIEN(???)
Six(???)
/* Sand(???) |
Canto de Yemanjal???)
Lauuslliat(???)
Jakhammer(???)
Gormengast(???)
Soadhiro(???)
Lollita(???)
DigON(???)
Lucid Alien(???)
Dopper(???)
WHA Bl???)
Yukaze(???)
Jean???)
WHEE???)
see Si???)
Crapannr(???)
Maccinael>??)
Moomoo=Ihi[???)
% GREY WOLF #[???)
faljur(???)
Koboo(???)
Zombie Wolf(>??)
Gldman�i[???)
OBdoodlal???)
siug(???)
-Ppugan-(???)
FREI???)
Kattlinal???)
SRO? PP)
Drama Llamal???
ManapTretal???)
Shepard Lochel???)
CKa1E(??7)
L* Now???)
Firestarter(???)
Sweet Angel 666(7?7)
Coco Solo(???)
Lavender Town(???)
Lovaly triton(???]
BEhibachil???)
L� Fengsool???)
See???)
MoSterEhiald(???)
RAHIME(???)
James(???)
EByFreezel(???)
INSIDER(???)
EByFreezel(???)
susissorglos(???)
Entaroll>?>?)
Mason Escher(???)
FGT_[???)
L� Galactical???)
Eghk(???)
Hans Oktanel[>??)
Human Fly(???)
Unique eXe[???)
DURA FRSA PPP)
Ariokaar(???)
NoBecTBobstensl(???)
BALD DFP?)
L* Ganil???)
Mifray[???)
L� Timeworld(???)
Xesiest(???)
Dark Moon_[(???]
Xesiest(???)
Se ARAOAl>??)
HiimCoconut(???)
CRHAAAAA(???)
EA???)
REA RBAM? PP)
Be BOO???)
Hostile Target(???)
Lantian(???)
LucasS1��[(>??)
ScorpionMlE 7???)
-Arcadia-[???]
Incredible Axis(>>>)
�co cai) (P77)
Failname[???)
TonyTchopper[???)
MasterJedil[>>?)
TonyTchopper[???)
WG???)
DRAGON(???)
Roryt(???)
SlayingMantis(???)
SlayingMantis(???)
TiMeZEuRn(???)
TiMeZEuRn(???)
TiMeZEuRn(???)
Tezzarl(???)
JobForThosel???)
Wraithbourne[>??)
Choosdracan(???)
Spahrep(???)
EBsoumSS[(???)
Bossdread(???)
Capt EO[???)
Cybiz(???)
Atomic Samurail???)
L�herbe faux[???)
Baby YodatheEased(???)
Eeltzel???)
EeltzeBuBil???)
EVA Starwalker[???)
MightyWombat(???)
Moxas(???)
orchidlougR (7???)
RaptorOnal???)
KA__Suigetsul???)

How to extract text from the specific bounding box images

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
image_path='kyc_sample1.jpg'
plot_flag=True
save_output=True
out_folder='outs'
os.makedirs(out_folder,exist_ok=True)
image=cv2.imread(image_path)
def plot(image,cmap=None):
plt.figure(figsize=(15,15))
plt.imshow(image,cmap=cmap)
if plot_flag:
plot(image)
gray_scale=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
th1,img_bin = cv2.threshold(gray_scale,150,225,cv2.THRESH_BINARY)
img_bin=~img_bin
if plot_flag:
plot(img_bin,'gray')
if save_output:
cv2.imwrite(os.path.join(out_folder,f'bin_{image_path}'),img_bin)
line_min_width = 15
kernal_h = np.ones((1,line_min_width), np.uint8)
kernal_v = np.ones((line_min_width,1), np.uint8)
img_bin_h = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernal_h)
if plot_flag:
plot(img_bin_h,'gray')
if save_output:
cv2.imwrite(os.path.join(out_folder,f'hor_{image_path}'),img_bin_h)
img_bin_v = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernal_v)
if plot_flag:
plot(img_bin_v,'gray')
if save_output:
cv2.imwrite(os.path.join(out_folder,f'ver_{image_path}'),img_bin_v)
img_bin_final = img_bin_h | img_bin_v
if plot_flag:
plot(img_bin_final, 'gray')
if save_output:
cv2.imwrite(os.path.join(out_folder, f'merge_{image_path}'), img_bin_final)
def imshow_components(labels):
### creating a hsv image, with a unique hue value for each label
label_hue = np.uint8(179*labels/np.max(labels))
### making saturation and volume to be 255
empty_channel = 255*np.ones_like(label_hue)
labeled_img = cv2.merge([label_hue, empty_channel, empty_channel])
### converting the hsv image to BGR image
labeled_img = cv2.cvtColor(labeled_img, cv2.COLOR_HSV2BGR)
labeled_img[label_hue==0] = 0
### returning the color image for visualising Connected Componenets
return labeled_img
def detect_box(image,line_min_width=15):
gray_scale=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
th1,img_bin = cv2.threshold(gray_scale,150,225,cv2.THRESH_BINARY)
kernal6h = np.ones((1,line_min_width), np.uint8)
kernal6v = np.ones((line_min_width,1), np.uint8)
img_bin_h = cv2.morphologyEx(~img_bin, cv2.MORPH_OPEN, kernal6h)
img_bin_v = cv2.morphologyEx(~img_bin, cv2.MORPH_OPEN, kernal6v)
img_bin_final=img_bin_h|img_bin_v
final_kernel = np.ones((3,3), np.uint8)
img_bin_final=cv2.dilate(img_bin_final,final_kernel,iterations=1)
ret, labels, stats,centroids = cv2.connectedComponentsWithStats(~img_bin_final, connectivity=8, ltype=cv2.CV_32S)
return stats,labels
image_path='kyc_sample1.jpg'
image=cv2.imread(image_path)
stats,labels=detect_box(image)
cc_out=imshow_components(labels)
for x,y,w,h,area in stats:
cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),1)
if plot_flag:
plot(cc_out)
plot(image)
if save_output:
cv2.imwrite(os.path.join(out_folder,f'cc_{image_path}'),cc_out)
cv2.imwrite(os.path.join(out_folder,f'out_{image_path}'),image)
Required solution: PAN: 577634563744, Name: Mr William Smith Jons
Is there any solution to ocr the text and get the required solution mentioned above from the image. The required solution is to get the text which has the bounding box with their labels. Is there any solution using tesseract to extract text which has bounding box and combine letter has Mr William Smith Jons

Disparity Map just shows contouring

The problem:
The goal is to create a disparity map for two parallel cameras. Currently the calculation itself is working, and I have a live disparitymap. It just shows contouring instead of information for every pixel, which is not what a disparity map should be doing.
.
What I have tried:
I tried the tsuka example, the lines are commented out, but they work. So this proves that the used functions work.
The result of my code is here: https://imgur.com/a/bIDmdkk (I probably don't have the reputation needed to upload images)
As can be seen in that image just the outline, the contour, of my face is visible. This contour reacts to my actual distance - with getting brighter or darker - but the rest of the image is dark.
With all parameters commented out (as is the example) it does now work either but has lots and lots of speckles laying over.
I also tried almost any combination of numDisparities and blocksize.
Changing the position of the cameras to one another alters the result but does not change it massively. I made sure to have them in a line with each other, looking in parallel.
Edit: I tinkered a bit and got this result: https://imgur.com/a/m2o9FOE compared to the previous result there are more features, but also more noise. (This one has fewer disparities and another color convertion)
SOLVED: [I tried running the stereo.compute within the while-loop with BGR-Images, but that does not work. The tsuka-example images are colored though, so there might be some case of wrong datatype that I do not see.
Everything is uint8 currently.] => I forgot that imread("",0) reads an image as grayscale. So everything behaves as it should in this regard.
.
So what is the difference between my left/right images and the ones resulting in https://docs.opencv.org/master/disparity_map.jpg ?
.
The code:
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
cap1 = cv.VideoCapture(1)
cap3 = cv.VideoCapture(3)
#imgR = cv.imread('tsuL.png',0)
#imgL = cv.imread('tsuR.png',0)
#stereoTest = cv.StereoBM_create(numDisparities=16, blockSize=15)
#disparityTest = stereoTest.compute(imgL,imgR)
while True:
# save current camera image
ret1, frame1 = cap1.read()
ret3, frame3 = cap3.read()
# switch from BGR to gray
grayFrame1 = cv.cvtColor(frame1, cv.COLOR_BGR2GRAY)
grayFrame3 = cv.cvtColor(frame3, cv.COLOR_BGR2GRAY)
# disparity params
stereo = cv.StereoBM_create(numDisparities=128, blockSize=5)
stereo.setTextureThreshold(600)
#stereo.setSpeckleRange(4)
#stereo.setSpeckleWindowSize(9)
stereo.setMinDisparity(0)
# calculate both variants (Camera 1 Left, Camera 2 Right and Camera 1 right, Camera 2 left)
disparity = stereo.compute(grayFrame1,grayFrame3)
disparity2 = stereo.compute(grayFrame3,grayFrame1)
#res = cv.cvtColor(disparity,cv.COLOR_GRAY2BGR)
# Should have been 65535 from int16 to int8, but 4095 works..
div = 65535.0/16
res = cv.convertScaleAbs(disparity, alpha=(255.0/div))
res2= cv.convertScaleAbs(disparity2, alpha=(255.0/div))
# Show disparity map
cv.namedWindow("Disparity")
cv.moveWindow("Disparity", 450, 20)
cv.imshow('Disparity', np.hstack([res,res2]))
keyboard = cv.waitKey(30)
if keyboard == 'q' or keyboard == 27:
break
cap.release()
cv.destroyAllWindows()
New Code
I got the camera calibration data from boofcv and copied some lines from https://stackoverflow.com/a/29151300/13150965 to my code.
Schwarz S/W
Xc 311,0 323,3
Yc 257,1 261,9
fx 603,0 593,6
fy 604,3 596,5
skew
radial 1,43e-01 1,1e-01
-3,03e-01 -2,43e-01
tangential 1,37e-02 1,25e-02
-9,77e-03 -9,79e-04
These are the values I received for each Camera (Schwarz and S/W are just names for each camera, they have different cables, that's how I recognize them)
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
cap1 = cv.VideoCapture(0)
cap3 = cv.VideoCapture(1)
cameraMatrix1 = np.array(
[[603.0, 0, 311.0],
[0, 604.3, 257.1],
[0, 0, 1]]
)
cameraMatrix2 = np.array(
[[593.6, 0, 323.3],
[0, 596.5, 261.9],
[0, 0, 1]]
)
distCoeffs1 = np.array([[0.143, -0.303, 0.0137, -0.00977, 0.0]])
distCoeffs2 = np.array([[0.11, -0.243, 0.0125, -0.000979, 0.0]])
R = np.array(
[[1.0, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0]]
)
T = np.array(
[[98.0],
[0.0],
[0.0]]
)
# Params from camera calibration
camMats = [cameraMatrix1, cameraMatrix2]
distCoeffs = [distCoeffs1, distCoeffs2]
camSources = [0,1]
for src in camSources:
distCoeffs[src][0][4] = 0.0 # use only the first 2 values in distCoeffs
xOff = 450
div = 64.0
i = 0
while True:
# save current camera image
ret1, frame1 = cap1.read()
ret3, frame3 = cap3.read()
w, h = frame1.shape[:2]
# The rectification process
newCams = [0,0]
roi = [0,0]
frames = [frame1, frame3]
i = i + 1
if i > 10:
for src in camSources:
newCams[src], roi[src] = cv.getOptimalNewCameraMatrix(cameraMatrix = camMats[src],
distCoeffs = distCoeffs[src],
imageSize = (w,h),
alpha = 0)
rectFrames = [0,0]
for src in camSources:
rectFrames[src] = cv.undistort(frames[src], camMats[src], distCoeffs[src])
R1,R2,P1,P2,Q,roi1,roi2 = cv.stereoRectify(
cameraMatrix1 =camMats[0],
cameraMatrix2 =camMats[1],
distCoeffs1 =distCoeffs1,
distCoeffs2 =distCoeffs2,
imageSize = (w,h),
R=R,
T=T,
alpha=1
)
# show camera images
cv.namedWindow("RectFrames")
cv.moveWindow("RectFrames", xOff, 532)
cv.imshow('RectFrames', np.hstack([rectFrames[0],rectFrames[1]]))
# switch from BGR to gray
grayFrame1 = cv.cvtColor(rectFrames[0], cv.COLOR_BGR2GRAY)
grayFrame3 = cv.cvtColor(rectFrames[1], cv.COLOR_BGR2GRAY)
# disparity params
stereo = cv.StereoBM_create(numDisparities=16, blockSize=15)
# calculate both variants (Camera 1 Left, Camera 2 Right and Camera 1 right, Camera 2 left)
disparity = stereo.compute(grayFrame1,grayFrame3)
disparity2 = stereo.compute(grayFrame3,grayFrame1)
# Should have been 65535 from int16 to int8, but 4095 works..
res = cv.convertScaleAbs(disparity, alpha=(255.0/(div-1)))
res2= cv.convertScaleAbs(disparity2, alpha=(255.0/(div-1)))
# Show disparity map
cv.namedWindow("Disparity")
cv.moveWindow("Disparity", xOff, 20)
cv.imshow('Disparity', np.hstack([res,res2]))
keyboard = cv.waitKey(30)
if keyboard == 'q' or keyboard == 27:
break
cap.release()
cv.destroyAllWindows()
I can see, that the images are being undistorted. https://imgur.com/a/SBmv7IY
But I am still doing something wrong.
The R and T are made up, as they look parallel (No Rotation) and are 9.8cm apart from another.
The Values for R and T calculated via the script from StereoCalibration in OpenCV on Python resulted in the unity-matrix for R and an empty vector for T. The latter cannot be right.
I now got the R and T values for a given calibration of the cameras. But it does in fact not solve my problem. So either there is still an error in that calculation or this problem has to be solved differently.
I rewrote the entire script, to see at which step it misbehaves - and do tidy things up. At is stands, the calibration works up to the cv2.initUndistortRectifyMap , if I use this map with cv2.remap onto my camera image, I just get a black image.
import numpy as np
import cv2
from VideoCapture import Device
from PIL import Image
import glob
print("Importing Images")
image_listR = []
image_listL = []
w = 640
h = 480
for filename in glob.glob('StereoCalibrate\imageR*'): #assuming gif
im=Image.open(filename).convert('RGB')
cvim= np.array(im)
cvim = cvim[:, :, ::-1].copy()
image_listR.append(cvim)
for filename in glob.glob('StereoCalibrate\imageL*'): #assuming gif
im=Image.open(filename).convert('RGB')
cvim= np.array(im)
cvim = cvim[:, :, ::-1].copy()
image_listL.append(cvim)
imagesR = len(image_listR)
imagesL = len(image_listL)
print("Found {%d} images for Left camera" % imagesL)
print("Found {%d} images for Right camera" % imagesR)
if imagesR == imagesL:
print("Number of Images match")
else:
print("Number of Images do not match")
print("Using loaded images")
board_w = 8
board_h = 5
board_sz = (8,5)
board_n = board_w*board_h
# termination criteria
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# Arrays to store object points and image points from all the images.
object_points = [] # 3d point in real world space
imagePoints1 = [] # 2d points in image plane.
imagePoints2 = [] # 2d points in image plane.
corners1 = []
corners2 = []
obj = np.zeros((5*8,3), np.float32)
obj[:,:2] = np.mgrid[0:8,0:5].T.reshape(-1,2)
vidStreamL = cv2.VideoCapture(1) # index of your camera
vidStreamR = cv2.VideoCapture(0) # index of your camera
success = 0
found1 = False
found2 = False
i=0
while (success < imagesR*0.9):
#Loop through the image list
if i >= imagesL:
i = 0
img1 = image_listL[i]
img2 = image_listR[i]
#Convert images to grayscale
gray1 = cv2.cvtColor(img1,cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2,cv2.COLOR_BGR2GRAY)
#Check for Chessboard Pattern
found1, corners1 = cv2.findChessboardCorners(img1, board_sz)
found2, corners2 = cv2.findChessboardCorners(img2, board_sz)
#Draw Chessboard in image
if (found1):
cv2.cornerSubPix(gray1, corners1, (11, 11), (-1, -1),criteria)
cv2.drawChessboardCorners(gray1, board_sz, corners1, found1)
if (found2):
cv2.cornerSubPix(gray2, corners2, (11, 11), (-1, -1), criteria)
cv2.drawChessboardCorners(gray2, board_sz, corners2, found2)
#Show grayscale image with chessboard marker
cv2.imshow('image1', gray1)
cv2.imshow('image2', gray2)
if (found1 != 0 and found2 != 0):
#Remove successful detected images from list
image_listL.pop(i)
image_listR.pop(i)
imagesL-=1
imagePoints1.append(corners1);
imagePoints2.append(corners2);
object_points.append(obj);
success+=1
print("{", success, "} / {",imagesR*0.9,"} calibration images detected")
if (success >= imagesR*0.9):
break
i = i + 1
cv2.waitKey(1)
cv2.destroyAllWindows()
print("Calibrating")
cx1 = 327.0
cy1 = 247.9
fx1 = 608.3
fy1 = 607.7
rx1 = 0.129
ry1 = -0.269
tx1 = 0.00382
ty1 = -0.00151
camMat1 = np.array(
[[fx1, 0, cx1],
[0, fy1, cy1],
[0, 0, 1]])
cx2 = 329.8
cy2 = 249.0
fx2 = 601.7
fy2 = 601.1
rx2 = 0.149
ry2 = -0.322
tx2 = 0.0039
ty2 = -0.000837
camMat2 = np.array(
[[fx2, 0, cx2],
[0, fy2, cy2],
[0, 0, 1]])
disCoe1 = np.array([[0.0,0.0,0.0,0.0,0.0]])
disCoe2 = np.array([[0.0,0.0,0.0,0.0,0.0]])
R = np.zeros(shape=(3,3))
T = np.zeros(shape=(3,3))
E = np.zeros(shape=(3,3))
F = np.zeros(shape=(3,3))
retval, camMat1, disCoe1, camMat2, disCoe2, R, T, E, F = cv2.stereoCalibrate(object_points, imagePoints1, imagePoints2, camMat1, disCoe1, camMat2, disCoe2, (w, h), flags = cv2.CALIB_USE_INTRINSIC_GUESS)
print("Done Calibration\n")
R1 = np.zeros(shape=(3,3))
R2 = np.zeros(shape=(3,3))
P1 = np.zeros(shape=(3,4))
P2 = np.zeros(shape=(3,4))
print("T:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in T]))
print("E:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in E]))
print("F:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in F]))
print("R:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in R]))
print("CAM1:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in camMat1]))
print("CAM2:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in camMat2]))
print("DIS1:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in disCoe1]))
print("DIS2:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in disCoe2]))
print("Rectifying cameras")
cv2.stereoRectify(camMat1, disCoe1, camMat2, disCoe2,(w, h), R, T)
#print("Undistort image")
#map1x, map1y = cv2.initUndistortRectifyMap(camMat1, disCoe1, R1, camMat1, (w, h), cv2.CV_32FC1)
#map2x, map2y = cv2.initUndistortRectifyMap(camMat2, disCoe2, R2, camMat2, (w, h), cv2.CV_32FC1)
print("Settings complete\n")
i = 1
j = 1
while(True):
retL, img1 = vidStreamL.read()
retR, img2 = vidStreamR.read()
img1 = cv2.undistort(img1, camMat1, disCoe1)
img2 = cv2.undistort(img2, camMat2, disCoe2)
cv2.imshow("ImgCam", np.hstack([img1,img2]));
#imgU1 = np.zeros((h,w,3), np.uint8)
#imgU2 = np.zeros((h,w,3), np.uint8)
#imgU1 = cv2.remap(img1, map1x, map1y, cv2.INTER_LINEAR, imgU1, cv2.BORDER_CONSTANT, 0)
#imgU2 = cv2.remap(img2, map2x, map2y, cv2.INTER_LINEAR, imgU2, cv2.BORDER_CONSTANT, 0)
#cv2.imshow("ImageCam", np.hstack([imgU1,imgU2]));
#imgU1 = cv2.cvtColor(imgU1, cv2.COLOR_BGR2GRAY)
#imgU2 = cv2.cvtColor(imgU2, cv2.COLOR_BGR2GRAY)
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
stereo = cv2.StereoBM_create(numDisparities=16, blockSize=15)
disparity = stereo.compute(img1,img2)
disparit2 = stereo.compute(img2,img1)
res = cv2.convertScaleAbs(disparity, alpha=(255.0/512.0))
re2 = cv2.convertScaleAbs(disparit2, alpha=(255.0/512.0))
cv2.namedWindow("Disparity")
cv2.imshow('Disparity', np.hstack([res,re2]))
cv2.waitKey(1)
Output:
Importing Images
Found {90} images for Left camera
Found {90} images for Right camera
Number of Images match
Using loaded images
{ 1 } / { 81.0 } calibration images detected
{ 2 } / { 81.0 } calibration images detected
...
{ 81 } / { 81.0 } calibration images detected
Calibrating
Done Calibration
T:
-3.4549164747952514
-0.15507627811210184
-0.058176064658149625
E:
0.0009397723130476023 0.05762864132890782 -0.15527769659160615
-0.01780225919479015 0.01349075458635349 3.455334047732434
-0.008356129824974412 -3.458367965240172 0.010848591597549652
F:
3.59441069386539e-08 2.1966757991956236e-06 -0.0032581679670958268
-6.799554333159719e-07 5.135279707045414e-07 0.060534502577423176
6.856712419870922e-06 -0.061575681061419536 1.0
R:
0.9988149170858261 -0.0472903202575948 -0.01150595570860947
0.047251107481307925 0.998876350140538 -0.0036564971909233096
0.011665943966274269 0.0031084947887139625 0.9999271188499311
CAM1:
457.8949692862012 0.0 333.02411929079784
0.0 459.45537763505865 239.7961684844508
0.0 0.0 1.0
CAM2:
460.4374113961873 0.0 342.68117331116434
0.0 461.07367491328057 244.62051778708334
0.0 0.0 1.0
DIS1:
0.06391854958023913 -0.2191286122082927 -0.000947168228999159 0.004660285089171575 0.08044318478168837
DIS2:
0.011643796283126952 0.14239490114798584 0.001548517080560543 0.011862118627062223 -0.5191998209097282
Rectifying cameras
Settings complete

You missed the Calibration and Rectification process, which is the first step of a disparity algorithm.
Below steps help you get your disparity map:
Calibrate your camera and find the intrinsic and extrinsic of the camera.
With the available camera and distortion matrix from the calibration, rectify your images.
Pass the images to your algorithm.
Get the disparity map.
Note: raw disparity map will be bad in a textureless region.

Improve HED algorithm for edge detection

I am working on an image processing task using python which depends mainly in detecting the grains in the image of soil samples so the first step in the processing process is edge detection ,I use HED algorithm (holistically nested edge detection ) for this step rather than using other edge detection functions in python as canny or sobel .
However , I face a problem in detecting the grains of fine soil particles as sand samples images shown below . I am asking if there is modification can be done on the image or the algorithm to improve edge detection to get the borders of the grains or as maximum as possible of the grains.
This is the used algorithm and results of using this algorithm in edge detection.
# USAGE
# python detect_edges_image.py --edge-detector hed_model --image images/guitar.jpg
# import the necessary packages
import argparse
import cv2
import os
import easygui
import pandas as pd
path = easygui.fileopenbox()
print(path)
hdir = os.path.dirname(path)
print(hdir)
hfilename = os.path.basename(path)
print(hfilename)
hname = os.path.splitext(hfilename)[0]
print(hname)
houtname = hname+"_out.jpg"
print(houtname)
hout = os.path.sep.join([hdir,houtname])
print(hout)
# # construct the argument parser and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-d", "--edge-detector", type=str, required=True,
# help="path to OpenCV's deep learning edge detector")
# ap.add_argument("-i", "--image", type=str, required=True,
# help="path to input image")
# args = vars(ap.parse_args())
class CropLayer(object):
def __init__(self, params, blobs):
# initialize our starting and ending (x, y)-coordinates of
# the crop
self.startX = 0
self.startY = 0
self.endX = 0
self.endY = 0
def getMemoryShapes(self, inputs):
# the crop layer will receive two inputs -- we need to crop
# the first input blob to match the shape of the second one,
# keeping the batch size and number of channels
(inputShape, targetShape) = (inputs[0], inputs[1])
(batchSize, numChannels) = (inputShape[0], inputShape[1])
(H, W) = (targetShape[2], targetShape[3])
# compute the starting and ending crop coordinates
self.startX = int((inputShape[3] - targetShape[3]) / 2)
self.startY = int((inputShape[2] - targetShape[2]) / 2)
self.endX = self.startX + W
self.endY = self.startY + H
# return the shape of the volume (we'll perform the actual
# crop during the forward pass
return [[batchSize, numChannels, H, W]]
def forward(self, inputs):
# use the derived (x, y)-coordinates to perform the crop
return [inputs[0][:, :, self.startY:self.endY,
self.startX:self.endX]]
# load our serialized edge detector from disk
print("[INFO] loading edge detector...")
fpath = os.path.abspath(__file__)
fdir = os.path.dirname(fpath)
print(fdir)
protoPath = os.path.sep.join([fdir,"hed_model", "deploy.prototxt"])
print(protoPath)
modelPath = os.path.sep.join([fdir,"hed_model","hed_pretrained_bsds.caffemodel"])
print(modelPath)
net = cv2.dnn.readNetFromCaffe(protoPath, modelPath)
# register our new layer with the model
cv2.dnn_registerLayer("Crop", CropLayer)
# load the input image and grab its dimensions
image = cv2.imread('D:\My work\MASTERS WORK\SAND - UNIFORM\sand_180pxfor1cm(130,120,75).jpg')
# image =cv2.equalizeHist(img)
# image = cv2.pyrMeanShiftFiltering(image1,10,20)
(H, W) = image.shape[:2]
# print(image.shape[:2])
# image.shape[:2] =(H*3, W*3)ho
# image = cv2.resize(image,0.5)
# convert the image to grayscale, blur it, and perform Canny
# edge detection
print("[INFO] performing Canny edge detection...")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# blurred = cv2.addWeighted(gray,1.5,blurred,-0.5,0)
canny = cv2.Canny(blurred,30, 150)
# construct a blob out of the input image for the Holistically-Nested
# Edge Detector
# cc = cv2.cvtColor(canny, cv2.COLOR_GRAY2BGR)
# image = image+cc
# mean = (104.00698793, 116.66876762, 122.67891434),
blob = cv2.dnn.blobFromImage(image, scalefactor=1.0, size=(W, H),
# mean=(110,95,95),
# mean=(104.00698793, 116.66876762, 122.67891434),
# mean=(104, 116, 122),
mean=(130, 120, 75),
# mean=(145, 147, 180),
swapRB= False, crop=False)
print( blob)
cv2.waitKey(0)
# set the blob as the input to the network and perform a forward pass
# to compute the edges
print("[INFO] performing holistically-nested edge detection...")
net.setInput(blob)
hed = net.forward()
hed = cv2.resize(hed[0, 0], (W, H))
hed = (255 * hed).astype("uint8")
# show the output edge detection results for Canny and
# Holistically-Nested Edge Detection
cv2.imshow("Input", image)
cv2.imshow("Canny", canny)
cv2.imshow("HED", hed)
cv2.imwrite(hout, hed)
cv2.waitKey(0)

How to perform image convolution on an image using opencv python

I am trying to perform edge detection for my images of soil grains using holistically nested edge detection method HED as shown however when using combined fine and coarse soil grains , the region of fine particles is not clear so I suggest making image convolution by cutting the image into smaller rectangular areas in both directions and make HED for every portion of image and store them to black copy image so as to add the edged portions to this image .
I faced an error after repeating the algorithm of HED in a for loop by dividing the width of image to 5 portions and the height to 4 portions but I can't fix that error .
Here is the algorithm used
# import the necessary packages
import argparse
import cv2
import os
import easygui
path = easygui.fileopenbox()
print(path)
hdir = os.path.dirname(path)
print(hdir)
hfilename = os.path.basename(path)
print(hfilename)
hname = os.path.splitext(hfilename)[0]
print(hname)
houtname = hname+"_out.jpg"
print(houtname)
hout = os.path.sep.join([hdir,houtname])
print(hout)
# # construct the argument parser and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-d", "--edge-detector", type=str, required=True,
# help="path to OpenCV's deep learning edge detector")
# ap.add_argument("-i", "--image", type=str, required=True,
# help="path to input image")
# args = vars(ap.parse_args())
class CropLayer(object):
def __init__(self, params, blobs):
# initialize our starting and ending (x, y)-coordinates of
# the crop
self.startX = 0
self.startY = 0
self.endX = 0
self.endY = 0
def getMemoryShapes(self, inputs):
# the crop layer will receive two inputs -- we need to crop
# the first input blob to match the shape of the second one,
# keeping the batch size and number of channels
(inputShape, targetShape) = (inputs[0], inputs[1])
(batchSize, numChannels) = (inputShape[0], inputShape[1])
(H, W) = (targetShape[2], targetShape[3])
# compute the starting and ending crop coordinates
self.startX = int((inputShape[3] - targetShape[3]) / 2)
self.startY = int((inputShape[2] - targetShape[2]) / 2)
self.endX = self.startX + W
self.endY = self.startY + H
# return the shape of the volume (we'll perform the actual
# crop during the forward pass
return [[batchSize, numChannels, H, W]]
def forward(self, inputs):
# use the derived (x, y)-coordinates to perform the crop
return [inputs[0][:, :, self.startY:self.endY,
self.startX:self.endX]]
# load our serialized edge detector from disk
print("[INFO] loading edge detector...")
fpath = os.path.abspath(__file__)
fdir = os.path.dirname(fpath)
print(fdir)
protoPath = os.path.sep.join([fdir,"hed_model", "deploy.prototxt"])
print(protoPath)
modelPath = os.path.sep.join([fdir,"hed_model","hed_pretrained_bsds.caffemodel"])
print(modelPath)
net = cv2.dnn.readNetFromCaffe(protoPath, modelPath)
# register our new layer with the model
cv2.dnn_registerLayer("Crop", CropLayer)
# load the input image and grab its dimensions
image = cv2.imread('D:\My work\MASTERS WORK\GSD files\Sample E photos\SampleE_#1_26pxfor1mm.jpg')
im_copy = image.copy()*0
(H, W) = image.shape[:2]
# print(image.shape[:2])
# image.shape[:2] =(H*3, W*3)
# image = cv2.resize(image,0.5)
h=0
w=0
for m in range(0,H ,int(H/5)):
for n in range(0,W,int(W/3)):
gray = image[h:m,w:n]
# convert the image to grayscale, blur it, and perform Canny
# edge detection
print("[INFO] performing Canny edge detection...")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
canny = cv2.Canny(blurred, 30, 150)
# construct a blob out of the input image for the Holistically-Nested
# Edge Detector
# cc = cv2.cvtColor(canny, cv2.COLOR_GRAY2BGR)
# image = image+cc
# mean = (104.00698793, 116.66876762, 122.67891434),
blob = cv2.dnn.blobFromImage(image, scalefactor=1.0, size=((m-h), (n-w)),
# mean=(230, 120, 50),
mean=(104.00698793, 116.66876762, 122.67891434),
swapRB=False, crop=False)
print( blob)
cv2.waitKey(0)
# set the blob as the input to the network and perform a forward pass
# to compute the edges
print("[INFO] performing holistically-nested edge detection...")
net.setInput(blob)
hed = net.forward()
hed = cv2.resize(hed[0, 0], ((m-h), (n-w)))
hed = (255 * hed).astype("uint8")
# Adding the edge detection for each portion to the copy image as follows
im_copy = im_copy + hed
h+=int(H/5)
w+=int(W/4)
# show the output edge detection results for Canny and
# Holistically-Nested Edge Detection
cv2.imshow("Input", image)
cv2.imshow("Canny", canny)
cv2.imshow("HED", hed)
cv2.waitKey(0)
cv2.imshow('Frame ',im_copy)
cv2.imwrite(hout, im_copy)
cv2v2.waitKey(0)
I then use this edged image in further analysis on the image .
The error I got using the algorithm
net = cv2.dnn.readNetFromCaffe(protoPath, modelPath)
cv2.error: OpenCV(4.1.1) C:\projects\opencv-python\opencv\modules\dnn\src\caffe\caffe_io.cpp:1121: error: (-2:Unspecified error) FAILED: fs.is_open(). Can't open "D:\My work\MASTERS WORK\hed_model\deploy.prototxt" in function 'cv::dnn::ReadProtoFromTextFile'

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Extract only specific information using OCR and OpenCV - python

You can use pytesseract.image_to_pdf_or_hocr(), choosing hocr as output format. This will contain bounding boxes on the character, word, and line level.

Related

How do I address poor tesseract output despite clear images

How to extract text from the specific bounding box images

Disparity Map just shows contouring

Improve HED algorithm for edge detection

How to perform image convolution on an image using opencv python

Categories

Resources