Extract text from pdf file - python

I want to extract certain words from pdf file, such as the words in the first column in the image, but the first letter of them is not recognized, perhaps because it is close to the edge of the table.
Is there a suggestion to extract the text correctly from a file as the displayed image??.
I used python tesseract lib.
image = cv2.imread('page' + str(j) + '.jpg')
rows,cols,_ = image.shape
image = cv2.resize(image, (np.int32(cols/2),np.int32(rows/2)))
# convert to gray and binarize
gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
binary_img = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9)
# note: erosion and dilation works on white forground
binary_img = cv2.bitwise_not(binary_img)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
dilated_img = cv2.morphologyEx(binary_img, cv2.MORPH_DILATE ,kernel,iterations=2)
cnts, _ = cv2.findContours(dilated_img, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
rect_cnts = [] # Rectangular contours
for cnt in cnts:
rect_cnts.append(cnt)
rect_cnts = sorted(rect_cnts, key=cv2.contourArea, reverse=True)[:1]
for rec in rect_cnts:
# find bounding rectangle of biggest contour
box = cv2.boundingRect(rec)
x,y,w,h = box[:]
img = image[y:y+h,x:x+w]
cv2.imwrite('ROI_{}.png'.format(ROI_number), img)
ROI_number += 1
if j < len(d):
d[j] = pytesseract.image_to_data(img, output_type=Output.DICT,config = config, lang='eng') # extract words from image - store extracted words in d
else:
d.append(pytesseract.image_to_data(img, output_type=Output.DICT,config = config, lang='eng'))
n_boxes = len(d[j]['level'])
for i in range(n_boxes):
text = d[j]['text'][i]
text = text.replace("-", "").replace("/", "").replace("=", "").replace(",", "").replace(".", "").upper()
print(text)
for word in wordMap:
if text[len(text) - 1 : len(text)] == ',':
print("----- detect text ------")
print(text[len(text) - 1 : len(text)])
if word in text[0 : len(text) - 1]:
if not(word in im_list):
im_list.append(word)
else:
if word in text:
print("----- detect text ------")
print(text[len(text) - 1 : len(text)])
if not(word in im_list):
im_list.append(word)

Related

Template matching with updating templates from previous detection

I am trying to write a code where after matching with a given template, the detected part of that frame becomes the template for the next frame.
temp = "image.png"
while True:
try:
_, frame = cap.read()
copy = frame.copy()
w,h=temp.shape[:-1]
res=cv2.matchTemplate(frame,temp,cv2.TM_CCOEFF_NORMED)
threshold=0.75
#try:
loc=np.where(res>=threshold)
print(len(loc))
for pt in zip(*loc[::-1]):
#cv2.rectangle(img,pt,(pt[0]+w,pt[1]+h),(0,255,255),2)
point = pt
cropped_image = copy[point[1]:point[1]+h, point[0]:point[0]+w]
temp = cropped_image #update the template
but after writing this code the template matching is going in totally wrong direction, even though if i remove the "temp = cropped_image" then the cropped_image is actually good.
You can find x,y,w,h of the matched image with cv2.minMaxLoc()
import cv2
src = cv2.imread("source.png", cv2.IMREAD_GRAYSCALE)
templit = cv2.imread("initial_template.png", cv2.IMREAD_GRAYSCALE)
result = cv2.matchTemplate(src, templit, cv2.TM_SQDIFF_NORMED)
minVal, maxVal, minLoc, maxLoc = cv2.minMaxLoc(result)
x, y = minLoc
h, w = templit.shape
cropped_img = src[y: y + h, x: x + w]
//do template matching again with cropped_image

SVM always gives the same prediction

I have a page and I am trying to predict letters on the page by using SVM. However, the prediction result always the same. As a result, I am unable to read the page.
This is my code to detect the letters from the line images(I separated lines before):
def letters(img):
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
morph_letters = cv2.morphologyEx(img, cv2.MORPH_CROSS, rect_kernel)
cv2.imshow("morph", morph_letters)
cnts = cv2.findContours(morph_letters.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cnts = imutils.grab_contours(cnts)
lengthOfContours = len(cnts)
print("Number of counturs: ", lengthOfContours)
points = []
rects = []
for (i, c) in enumerate(cnts):
area = cv2.contourArea(c)
if area > 80:
print("area "+str(area))
(x, y, w, h) = cv2.boundingRect(c)
x = x - 3
y = y - 10
w = w + 4
h = h + 12
cv2.rectangle(img_letters, (x, y), (x + w, y + h), (0, 255, 0), 2)
rect = [x, y, w+x, h+y]
leftPoint = x
#print("leftPoint: ", leftPoint)
points.append(leftPoint)
points.sort()
rects.append([i,rect])
rects.sort(key=lambda x: x[1])
#print("rects: ", rects)
if i == lengthOfContours-1:
for i in range(len(rects)):
found_letter = thresh_adaptive[rects[i][1][1]:rects[i][1][3], rects[i][1][0]:rects[i][1][2]]
prediction(found_letter)
And this is my prediction code. It takes the letter image from def letters function, name: found_letter. However, the result is always the same, l.
def prediction(word_img):
pick = open('model-letters.sav', 'rb')
#pickle.dump(model, pick)
model = pickle.load(pick)
pick.close()
#prediction = model.predict(x_test)
#accuracy = model.score(x_test, y_test)
categories = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','A','B','C','D','E','F','G','H','I','J', 'K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','1','2','3','4', '5','6','7','8','9','0']
img = word_img
try:
img = cv2.resize(img, (50,50))
img = np.array(img).flatten()
img = img.reshape(1, -1)
print("working")
prediction = model.predict(img)
letter = categories[prediction[0]]
print("Prediction ", prediction)
print("Prediction letter: ", letter)
#print("Accuracy: ",accuracy)
letterImg = img
cv2.imshow("letter", letterImg)
cv2.waitKey(0)
fh = open("letters.txt", "a")
fh.write(letter)
fh.close()
except:
print("error")
And this is my for loop to gives all the line images to def letters(img) function.
index = -1
for i in range(30,-1,-1):
index = index + 1
dir = "images-lines2/line" + str(i) + ".png"
img_orj = cv2.imread(dir)
img_orj = cv2.resize(img_orj, None, fx=1, fy=1.5, interpolation=cv2.INTER_AREA)
img_letters = img_orj.copy()
gray = cv2.cvtColor(img_orj, cv2.COLOR_BGR2GRAY)
img_lines = img_orj.copy()
blur_gaus = cv2.GaussianBlur(gray,(3,3),0)
kernel = np.ones((3,3), np.uint8)
thresh_adaptive = cv2.adaptiveThreshold(blur_gaus,255,cv2. ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV,7,11)
letter_img = letters(thresh_adaptive)
If you want to see cv2.imshow("letter", letterImg), this is the
output
Any help is appreciated.

Captcha crashes when selected. selenium Python

I am trying to download images using this code. However, The website comes up with a captcha. When I try to select the captcha it displays a broken PC image. Cannot figure a way past this. Is their a way to avoid the captcha altogether? Or select it somehow for clicking options via selenium. It's a long code but an MRE.
from selenium import webdriver
from bs4 import BeautifulSoup as soup
from datetime import date ,timedelta
import requests
import time
import base64
import cv2
import pytesseract
import xlsxwriter
import numpy as np
import pandas as pd
import os
import shutil
driver = webdriver.Chrome("chromedriver")
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
excel_name = ['Сите','Упис на основање','2б Ново со спојување на __','3б Ново со раздвојување од __','5б Ново со издвојување од__','Упис на промена','Документ за корекција','1б Присоединување во __','4б Превземање со раздвојување на __','5а Издвојување во ново__','6б Превземање од__','6а Издвојување во __','Документ за регистрирање на работно време','Документ за определување на главна приходна шифра и големина','Документ за евидентирање на казна/санкција','Документ за евидентирање на бришење на казна/санкција','Документ за евидентирање на стечај на друг субјект','Документ за евидентирање на заклучување на стечај на друг субјект','Упис на бришење','2а Спојување во ново__со бришење','4а Раздвојување со превземање во __ со бришење','3а Раздвојување на ново __ со бришење','1а Присоединување на __ со бришење','Судска Процедура - Стечај','Ликвидација','Претстечај (Претходна постапка)','Објава(Друго)','Објава(Стечајна постапка)','Објава(Ликвидациона постапка)','Вонсудска спогодба','Објава(Вонсудска спогодба)','Предбелешка']
#excel_name = ['Сите','Упис на основање','2б Ново со спојување на __','Упис на промена']
image_name = ['image', 'image0', 'image1', 'image2', 'image3', 'image4', 'image5', 'image6', 'image7', 'image8', 'image9', 'image10', 'image11', 'image12', 'image13', 'image14', 'image15', 'image16', 'image17', 'image18', 'image19', 'image20', 'image21', 'image22', 'image23', 'image24', 'image25', 'image26', 'image27', 'image28', 'image29', 'image30']
def get_text(data_number, image_name, excel_name):
workbook = xlsxwriter.Workbook(str(date.today() - timedelta(days=1)) + '-' + excel_name + '.xlsx')
worksheet = workbook.add_worksheet("content")
row = 0
print(image_name, data_number)
# Load image, grayscale, and Otsu's threshold
for i in range(data_number):
print('./images/' + str(date.today() - timedelta(days=3)) + '-' + image_name + str(i) + '.png')
image = cv2.imread('./images/' + str(date.today() - timedelta(days=3)) + '-' + image_name + str(i) + '.png')
try:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
except:
continue
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Remove horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 1))
detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(thresh, [c], -1, (0, 0, 0), 2)
# Remove vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 15))
detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(thresh, [c], -1, (0, 0, 0), 3)
# Dilate to connect text and remove dots
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 1))
dilate = cv2.dilate(thresh, kernel, iterations=2)
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 500:
cv2.drawContours(dilate, [c], -1, (0, 0, 0), -1)
# Bitwise-and to reconstruct image
result = cv2.bitwise_and(image, image, mask=dilate)
result[dilate == 0] = (255, 255, 255)
# OCR
data = pytesseract.image_to_string(result, lang='mkd+eng', config='--psm 6')
# data = pytesseract.image_to_string(result,config='--psm 6')
#print(data)
worksheet.write(row, 0, data)
row = row + 1
workbook.close()
def sort_contours(cnts, method="left-to-right"):
# initialize the reverse flag and sort index
reverse = False
i = 0
# handle if we need to sort in reverse
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
# handle if we are sorting against the y-coordinate rather than
# the x-coordinate of the bounding box
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
# construct the list of bounding boxes and sort them from top to
# bottom
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b: b[1][i], reverse=reverse))
# return the list of sorted contours and bounding boxes
return (cnts, boundingBoxes)
def get_table(path):
image = cv2.imread(path, 0)
image_colour=cv2.imread(path)
ret, img = cv2.threshold(image, 240, 255, cv2.THRESH_BINARY)
img_inv = 255 - img
kernel_len = np.array(img).shape[1] // 100
# Defining a vertical kernel to detect all vertical lines of image
ver_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_len))
img_bin = img_inv
image_1 = cv2.erode(img_bin, ver_kernel, iterations=3)
vertical_lines = cv2.dilate(image_1, ver_kernel, iterations=3)
cv2.imwrite("vertical.jpg", vertical_lines)
hor_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_len, 1))
# A kernel of 2x2
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
image_2 = cv2.erode(img_bin, hor_kernel, iterations=3)
horizontal_lines = cv2.dilate(image_2, hor_kernel, iterations=3)
cv2.imwrite("horizontal.jpg", horizontal_lines)
img_vh = cv2.addWeighted(vertical_lines, 0.5, horizontal_lines, 0.5, 0.0)
# Eroding and thesholding the image
img_vh = cv2.erode(~img_vh, kernel, iterations=2)
thresh, img_vh = cv2.threshold(img_vh, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
cv2.imwrite("img_vh.jpg", img_vh)
#bitxor = cv2.bitwise_xor(img, img_vh)
#bitnot = cv2.bitwise_not(bitxor)
contours, hierarchy = cv2.findContours(img_vh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Sort all the contours by top to bottom.
contours, boundingBoxes = sort_contours(contours, method="top-to-bottom")
# Creating a list of heights for all detected boxes
heights = [boundingBoxes[i][3] for i in range(len(boundingBoxes))]
# Get mean of heights
mean = np.mean(heights)
box = []
# Get position (x,y), width and height for every contour and show the contour on image
for c in contours:
x, y, w, h = cv2.boundingRect(c)
if (100 <w < 0.8*image.shape[1] and 40 < h < 500):
image = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
box.append([x, y, w, h])
return arrange_rows(box,mean),image_colour
def arrange_rows(box,mean):
row = []
column = []
j = 0
# Sorting the boxes to their respective row and column
for i in range(len(box)):
if (i == 0):
column.append(box[i])
previous = box[i]
else:
if (box[i][1] <= previous[1] + mean / 2):
column.append(box[i])
previous = box[i]
if (i == len(box) - 1):
row.append(column)
else:
row.append(column)
column = []
previous = box[i]
column.append(box[i])
return row
def cell_ocr(im,rcts):
rcts = [sorted(c, key=lambda x: x[0]) for c in rcts]
output = []
for i, row in enumerate(rcts):
y, x, w, h = row[0]
y1, x1, w1, h1 = row[1]
finalimg = im[x:x + h, y:y + w]
finalimg_val = im[x1:x1 + h1, y1:y1 + w1]
resizing = cv2.resize(finalimg, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
resizing_val = cv2.resize(finalimg_val, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
out = pytesseract.image_to_string(resizing, lang='mkd+eng')
out_val = pytesseract.image_to_string(resizing_val, lang='mkd+eng')
output.append([out.strip(), out_val.strip()])
return output
def get_text_v(path="images", date_of=(date.today() - timedelta(days=1))):
type_dict = {}
for f in os.listdir(path):
print("Processing File : " + str(f) + " ...")
r, im = get_table(os.path.join(path,f))
output=cell_ocr(im,r)
try:
idx=[x[0] for x in output].index("Вид на упис")
attr_key = output[idx][1]
except ValueError:
attr_key = "custom"
if attr_key in type_dict:
grp_df=pd.DataFrame(output).groupby(0,as_index=False).agg(lambda x: ",".join([str(xc) for xc in x]))
type_dict[attr_key]=type_dict[attr_key].merge(grp_df, how="outer",on=0)
else:
type_dict[attr_key]=pd.DataFrame(output).groupby(0,as_index=False).agg(lambda x: ",".join([str(xc) for xc in x]))
type_dict.pop('Упис на промена', None) # this should delete the Упис на промена sheet
type_dict.pop('Упис на основање', None) # this should delete the Упис на основање sheet
type_dict.pop('Упис на основање', None) # this should delete the Упис на основање sheet
with pd.ExcelWriter("workbook"+str(date_of)+'.xlsx') as writer:
for k, v in type_dict.items():
v.transpose().to_excel(writer, sheet_name=k[:30], header=False, index=False)
return type_dict
def main():
count = 0
driver.get("http://crm.com.mk/mk/otvoreni-podatotsi/objavi-na-upisi-za-subjekti")
time.sleep(30)
for l in range(len(excel_name)):
print("visiting option : " + excel_name[l])
data_list = []
if (l < 1):
continue
today = str(date.today() - timedelta(days=3)).split('-')
get_date = today[2] + '.' + today[1] + '.' + today[0]
driver.find_element_by_xpath(
'//*[#id="content"]/cms-container/crm-template-fs-latestannouncement/crm-cnt-latestannouncement/crm-cnt-latestannouncement-list/div/crm-cnt-latestannouncement-list-oss/div[2]/div/div[1]/div[2]/div[1]/fieldset/span/select/option[' + str(
l + 1) + ']').click()
time.sleep(2)
driver.find_element_by_xpath(
'//*[#id="content"]/cms-container/crm-template-fs-latestannouncement/crm-cnt-latestannouncement/crm-cnt-latestannouncement-list/div/crm-cnt-latestannouncement-list-oss/div[2]/div/div[1]/div[2]/div[2]/fieldset/input').send_keys(
get_date)
time.sleep(2)
driver.find_element_by_xpath(
'//*[#id="content"]/cms-container/crm-template-fs-latestannouncement/crm-cnt-latestannouncement/crm-cnt-latestannouncement-list/div/crm-cnt-latestannouncement-list-oss/div[2]/div/div[2]/div/button[1]').click()
time.sleep(10)
page_content = soup(driver.page_source, 'html.parser')
if (page_content.find('table', {'class': 'table--mobile'}) != None):
if (page_content.find('ul', {'class': 'ngx-pagination'}) != None):
page_list = page_content.find('ul', {'class': 'ngx-pagination'}).findAll("li")
print(page_list[len(page_list) - 2].text.replace('page ', ''))
for i in range(int(page_list[len(page_list) - 2].text.replace('page ', ''))):
time.sleep(3)
driver.find_element_by_xpath(
'//*[#id="content"]/cms-container/crm-template-fs-latestannouncement/crm-cnt-latestannouncement/crm-cnt-latestannouncement-list/div/crm-cnt-latestannouncement-list-oss/div[4]/div/div/pagination-controls/pagination-template/ul/li[' + str(
i + 3) + ']').click()
time.sleep(3)
page_res = soup(driver.page_source, 'html.parser')
if (page_res.find('table', {'class': 'table--mobile'}) != None):
table_list = page_res.find('table', {'class': 'table--mobile'}).findAll('tr')
for j in range(len(table_list)):
if (j > 0):
tr_list = table_list[j].findAll('td')
data_list.append(tr_list[0].text)
else:
count = 1
if count == 1:
break
else:
table_list = page_content.find('table', {'class': 'table--mobile'}).findAll('tr')
for j in range(len(table_list)):
if (j > 0):
tr_list = table_list[j].findAll('td')
data_list.append(tr_list[0].text)
print("number of items found in option " + excel_name[l] + " : " + str(len(data_list)))
data_number = len(data_list)
if (data_number == 0):
driver.find_element_by_xpath(
'//*[#id="content"]/cms-container/crm-template-fs-latestannouncement/crm-cnt-latestannouncement/crm-cnt-latestannouncement-list/div/crm-cnt-latestannouncement-list-oss/div[2]/div/div[1]/div[2]/div[2]/fieldset/input').clear()
continue
for k in range(len(data_list)):
print("Downloading image number : " + str(k) + "/" + str(len(data_list)))
#if(k>2):
# break
driver.get("http://crm.com.mk/mk/otvoreni-podatotsi/objavi-na-upisi-za-subjekti?id=" + data_list[k] + "&p=1")
time.sleep(60)
page_cont = soup(driver.page_source, 'html.parser')
if (page_cont.find('div', {'class': 'row center'}) != None):
image_src = page_cont.find('div', {'class': 'row center'}).div.img['src']
try:
imagedata = base64.b64decode(image_src.replace('data:image/png;base64,', ''))
image = open("./images/" + str(date.today() - timedelta(days=)) + '-' + image_name[l] + str(k) + ".png", "wb")
image.write(imagedata)
image.close()
except:
print("An exception occurred on image " + str(k) +" with id : " + str(data_list[k]) )
driver.get("http://crm.com.mk/mk/otvoreni-podatotsi/objavi-na-upisi-za-subjekti")
time.sleep(20)
if excel_name[l]=="Упис на промена":
get_text(data_number, image_name[l], excel_name[l])
if excel_name[l]=="Упис на основање":
get_text(data_number, image_name[l], excel_name[l])
count = 0
driver.close()
main()
print("Generating workbook please wait ...")
get_text_v()
print("Workbook file generated !!")
print("Moving files from images to oldimages ...")
source_dir = 'images'
target_dir = 'oldimages'
file_names = os.listdir(source_dir)
for file_name in file_names:
print("moving file " + str(file_name) + " ...")
try:
shutil.move(os.path.join(source_dir, file_name), target_dir)
except:
print("An exception occurred, File already exist !!!")
print("Moving files from images to oldimages Done !!!")

opencv segment conntected characters

I am trying to do character detect for handwriting letters.
Upon recognizing itself, I use tesseract or opencv SVM, and works fine till now.
Everything works fine for segment letters until I hit those connected ones.
I use the following code to segment letters:
# -*- coding: utf-8 -*-
import numpy as np
import cv2
# from matplotlib import pyplot as plt
from os.path import dirname, join, basename
import sys
from glob import glob
trainpic=[]
targetdir = dirname(__file__)+'tmporigin'
#print glob(join(dirname(__file__)+'/cat','*.jpg'))
img = {}
debug = True
a_num = 0
for fn in glob(join(targetdir, '*')):
filename = basename(fn)
trainpic.append(cv2.imread(fn, 0))
img_rgb = cv2.imread(fn)
img = cv2.imread(fn, 0)
image_close = cv2.morphologyEx(img_rgb, cv2.MORPH_CLOSE, np.ones((1, 7), np.uint8))
#if debug:
# cv2.imshow('morphology', image_close)
# key = cv2.waitKey(0)
_, contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
samples = np.empty((0, 100))
responses = []
# keys = [i for i in range(48, 58)]
tmp_list = []
tmpcount = 0
for cnt in contours:
print 'contourarea:%s' % cv2.contourArea(cnt)
if cv2.contourArea(cnt) > 130: # 50 300
[x, y, w, h] = cv2.boundingRect(cnt)
print 'boundingRect width:%s' % w
print 'boundingRect height:%s' % h
if h > 28:
cv2.rectangle(img_rgb, (x, y), (x+w, y+h), (0, 0, 255), 2)
roi = img[y:y+h, x:x+w]
roismall = cv2.resize(roi, (45, 55))
if debug:
cv2.imshow('norm', img_rgb)
key = cv2.waitKey(0)
# tmp_list.append(roi)
tmpfilename = fn if tmpcount == 0 else fn.rsplit('.', 1)[0] + '_' + str(tmpcount) + '.png'
cv2.imwrite(tmpfilename, roismall)
tmpcount += 1
else:
print 'contarea less, skip...'
# print img[num].shape
a_num += 1
print '%s images processed' % a_num
So, its fine to handle letters with inter space like this guy(split to D and B):
However, failed to segment connected letters like this:
I googled a lot for connected letters and find related links like these two:
enter link description here
enter link description here
I tried a lot, e.g. morphology dilate, erode, open, close, watershed .etc but didn't fix my problem.
I use opencv 3.2.0 and python 2.7.10 upon my Ubuntu desktop.
Any suggestion is great appreciated.
Thanks.
Wesley

Face Recognition with multiple faces don't detect multiple faces

I've written a small program, which detects faces and saves them to an Train file for the recognition.
I have some trouble with this algorithm. Sometimes it throws the error, that the LBPH::Train was feed with empty data, which is wrong.
OpenCV Error: Unsupported format or combination of formats (Empty training data was given. You'll need more than one sample to learn a model.) in cv::LBPH::train, file ........\opencv\modules\contrib\src\facerec.cpp, line 917
Traceback (most recent call last):
Moreover the algorithm detects multiple faces, but recognizes it just as the same face, which is wrong.
Could someone give me a hint on what I'm missing?
import cv2
import os
import numpy as np
import sys
i = 0
global allFaces
global first
first = True
allFaces = []
cap = cv2.VideoCapture(0)
faceCascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
recognizer = cv2.createLBPHFaceRecognizer()
font=cv2.cv.InitFont(cv2.cv.CV_FONT_HERSHEY_COMPLEX_SMALL,1,1,0,1)
id = 0
class Face:
def __init__(self, id, face):
self.id = id
self.face = face
self.gatheredFaces = []
def main(self):
print("main")
def getFace(self):
return self.face
def setKnownFace(self):
self.known = False
def getKownFace(self):
return self.knwon
def getId(self):
return self.id
def setFacesInfo(self, frame, face):
x,y,h,w = face
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
self.gatheredFaces.append(gray[y:y+h, x:x+w])
# count = 0
# while (count != 10):
# gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# cv2.imshow("frame in set", frame)
# faces = faceCascade.detectMultiScale(gray)
# for face in faces:
# self.gatheredFaces.append(gray[y:y+h,x:x+w])
# cv2.imshow("gathered Faces", self.gatheredFaces[0])
# cv2.imwrite("dataSet/User"+ str(self.getId()) +".jpg", gray)
# count = count+1
# cv2.waitKey(30)
def getFacesInfo(self):
return self.gatheredFaces
def trainDetector(self):
faceSamples = []
Ids = []
print("laenge von gathered FAces")
print(len(allFaces[0].getFacesInfo()))
for (i) in range(len(allFaces)):
temp = allFaces[i].getFacesInfo()
for (j) in range(len(temp)):
imageNP = np.array(temp[j], 'uint8')
id = allFaces[i].getId()
faces = faceCascade.detectMultiScale(imageNP)
for (x,y,h,w) in faces:
faceSamples.append(imageNP)
Ids.append(id)
recognizer.train(faceSamples, np.array(Ids))
recognizer.save('recognizer/train.yml')
def updateDetector(self):
recognizer.load('recognizer/train.yml')
faceSamples = []
Ids = []
for (i) in range(len(allFaces)):
temp = allFaces[i].getFacesInfo()
for (j) in range(len(temp)):
imageNP = np.array(temp[j], 'uint8')
id = allFaces[i].getId()
faces = faceCascade.detectMultiScale(imageNP)
for (x,y,h,w) in faces:
faceSamples.append(imageNP)
Ids.append(id)
recognizer.update(faceSamples, np.array(Ids))
recognizer.save('recognizer/train.yml')
while True:
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cv2.imshow("actual Frame", frame)
cv2.imshow("gray", gray)
faces = faceCascade.detectMultiScale(gray, 1.3, 5)
print(faces)
for face in faces:
x,y,h,w = face
temp = Face(id, frame[y:y+h,x:x+w])
allFaces.append(temp)
temp = None
id = id+1
###Detector
detector = cv2.SIFT()
FLANN_INDEX_KDTREE = 0
flannParam = dict(algorithm = FLANN_INDEX_KDTREE, tree = 5)
flann = cv2.FlannBasedMatcher(flannParam,{})
trainImg = allFaces[0].getFace()
trainKP, trainDecs = detector.detectAndCompute(trainImg, None)
if((len(allFaces)==1) and first):
print("only one object in allFaces")
for i in range(10):
print(i)
allFaces[0].setFacesInfo(frame, face)
allFaces[0].trainDetector()
first = False
else:
for(i) in range(len(allFaces)):
QueryImg = cv2.cvtColor(allFaces[i].getFace(), cv2.COLOR_BGR2GRAY)
queryKP, queryDesc = detector.detectAndCompute(QueryImg, None)
matches = flann.knnMatch(queryDesc, trainDecs, k = 2)
goodMatch = []
for m, n in matches:
if(m.distance < 0.75 * n.distance):
goodMatch.append(m)
if(len(goodMatch) > 30):
print("good match")
#allFaces[i].
tp = []
qp = []
for m in goodMatch:
tp.append(trainKP[m.trainIdx].pt)
qp.append(queryKP[m.queryIdx].pt)
tp, qp = np.float32((tp, qp))
H, status = cv2.findHomography(tp, qp, cv2.RANSAC, 3.0)
allFaces.pop(len(allFaces)-1)
break
else:
print ("bad match")
for i in range(10):
allFaces[len(allFaces)-1].setFacesInfo(frame, face)
allFaces[len(allFaces)-1].updateDetector()
cv2.waitKey(10)
for (x,y,w,h) in faces:
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,0,255),2)
tempid, conf = recognizer.predict(gray[y:y+h,x:x+w])
cv2.cv.PutText(cv2.cv.fromarray(frame), str(tempid),(x,y+h),font,(0,0,255))
cv2.waitKey(30)
cv2.imshow("detectedFace", frame)
cv2.waitKey(30)

Categories

Resources