Inaccurate facial recognition using OpenCV when the image is resized - python

https://snag.gy/6MrLNi.jpg
The chin is a bit off in this photo.
https://snag.gy/ORZHSe.jpg
Not this one.
Difference in Code:
image = cv2.resize(image,(2170, 2894), interpolation = cv2.INTER_AREA)
The second one does not have this line.
Complete Source Code:
import cv2
import sys
import dlib
import numpy as np
from PIL import Image
import rawpy
# Get user supplied values
imagePath = sys.argv[1]
cascPath = "HS.xml"
pointOfInterestX = 200
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("okgood.dat")
raw = rawpy.imread(imagePath)
rgb = raw.postprocess()
image = Image.fromarray(rgb)
#image.save("WOO.jpg")
open_cv_image = np.array(image)
open_cv_image = open_cv_image[:, :, ::-1].copy()
image = open_cv_image
image = cv2.resize(image,(2170, 2894), interpolation = cv2.INTER_AREA)
widthO, heightO = image.shape[:2]
faceCascade = cv2.CascadeClassifier(cascPath)
# Read the image
#image = cv2.imread(imagePath)
gray = cv2.cvtColor((image), cv2.COLOR_RGB2BGR)
#height, width = image.shape[:2]
# Detect faces in the image
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=4,
minSize=(500, 500)
#flags = cv2.CV_HAAR_SCALE_IMAGE
)
newdigit = 0
def test():
for l in range(y, y+h):
for d in range(x, x+w):
# print(image[l,d])
font = cv2.FONT_HERSHEY_SIMPLEX
if all(item < 150 for item in image[l, d]):
cv2.putText(image,"here",(d,l), font, .2,(255,255,255),1,cv2.LINE_AA)
return l;
image[l,d] = [0,0,0]
###
### put hairline 121 pixels from the top.
###
def shape_to_np(shape, dtype="int"):
# initialize the list of (x, y)-coordinates
coords = np.zeros((68, 2), dtype=dtype)
# loop over the 68 facial landmarks and convert them
# to a 2-tuple of (x, y)-coordinates
for i in range(0, 68):
coords[i] = (shape.part(i).x, shape.part(i).y)
# return the list of (x, y)-coordinates
return coords
two = 1
# Draw a rectangle around the faces
for (x, y, w, h) in faces:
print(str(len(faces)))
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)
pointOfInterestX = test()
break
dets = detector(image, 1)
one = 0
pointOfEight = 0
for k, d in enumerate(dets):
shape = predictor(image, d)
shape = shape_to_np(shape)
for (x, y) in shape:
if one == 8:
pointOfEight = y
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(image,str(one),(x,y), font, .2,(255,255,255),1,cv2.LINE_AA)
one = one + 1
cv2.circle(image, (x, y), 1, (0, 0, 255), -1)
# loop over the (x, y)-coordinates for the facial landmarks
# and draw them on the image
new_dimensionX = heightO * 631 / (pointOfEight - pointOfInterestX)
new_dimensionY = widthO * 631 / (pointOfEight - pointOfInterestX)
print(str(new_dimensionY))
image = cv2.resize(image,(int(new_dimensionX), int(new_dimensionY)))
Rx = new_dimensionX / heightO
Ry = new_dimensionY / widthO
crop_img = image[int((pointOfInterestX * Rx)-121):int(new_dimensionY), 0:int(new_dimensionX-((Rx *pointOfInterestX)+121))]
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(image,"xxxx",(100,pointOfInterestX ), font, 4,(255,255,255),1,cv2.LINE_AA)
cv2.imshow("Faces found", crop_img)
cv2.imwrite("cropped.jpg", crop_img)
cv2.waitKey(0)
Towards the top you will see the line where I resize the image to 2170,2894. Like I said, with this line absent, the chin detection is accurate. With it, it is not. I need the chin detection accurate at this resolution.

Try to use DLIB's face detector, landmarks detector initialized with face detector ROI, and DLIB's detector ROI is different from OpenCV Haar cascade one. DLIB's landmark detector trained using ROI's from DLIB's face detector, and should work better with it.

Related

Keypoint detection not working when keypoints are a certain colour

I'm using keypoint detection to find text within a game.
The background in the below images is dynamic, it's always a vaguely moving star-lit sky that you can barely see.
The detection works well when the text is white:
However, when the text is purple (unpredictable when this happens) the detection fails entirely:
Both the object I'm looking to detect and the image I'm running detection on are identical, screenshots are taken directly from within the game of the text i.e. the above. And then run on the exact same location the original screenshot were taken from.
The below code I've written using the official documentation I found here and here as a guide but it's very light on explaining itself.
Question: Is this an inherent limitation or is there something I can do to adjust to detect keypoints within the purple image?
import cv2 as cv
import win32gui, win32con, win32ui
import numpy as np
import glob
def get_haystack_image():
w, h = 1920, 1080
hwnd = None
wDC = win32gui.GetWindowDC(hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
def loadImages(directory):
# Intialise empty array
image_list = []
# Add images to array
for i in directory:
img = cv.imread(i, cv.IMREAD_UNCHANGED)
image_list.append((img, i))
return image_list
def preProcessNeedle(image_list):
needle_kp1_desc = []
for i in image_list:
img = i[0]
orb = cv.ORB_create(edgeThreshold=0, patchSize=32)
keypoint_needle, descriptors_needle = orb.detectAndCompute(img, None)
needle_kp1_desc.append((keypoint_needle, descriptors_needle, img))
return needle_kp1_desc
def match_keypoints(descriptors_needle, keypoint_haystack, min_match_count):
orbHaystack = cv.ORB_create(edgeThreshold=0, patchSize=32, nfeatures=3000)
keypoints_haystack, descriptors_haystack = orbHaystack.detectAndCompute(keypoint_haystack, None)
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
search_params = dict(checks=50)
try:
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
except cv.error:
return None, None, [], []
good = []
points = []
for pair in matches:
if len(pair) == 2:
if pair[0].distance < 0.7*pair[1].distance:
good.append(pair[0])
if len(good) > min_match_count:
for match in good:
points.append(keypoints_haystack[match.trainIdx].pt)
return keypoints_haystack, good, points
def shipDetection(needle_kp1_desc):
res = False
# Object Detection
for i, img in enumerate(needle_kp1_desc):
kp1 = img[0]
descriptors_needle = img[1]
needle_img = img[2]
# get an updated image of the screen & crop it
keypoint_haystack = get_haystack_image()
keypoint_haystack = keypoint_haystack[40:110, 850:1000]
kp2, matches, match_points, ship_avoided = match_keypoints(kp1, descriptors_needle, keypoint_haystack, min_match_count=40)
# display the matches
match_image = cv.drawMatches(needle_img, kp1, keypoint_haystack, kp2, matches, None)
cv.imshow('Keypoint Search', match_image)
cv.moveWindow("Keypoint Search",1940,30)
cv.waitKey(1)
if match_points:
# removed code as irrelevant to detection but left comments in
# find the center point of all the matched features
# account for the width of the needle image that appears on the left
# drawn the found center point on the output image
# display the processed image
cv.imshow('Keypoint Search', match_image)
cv.waitKey(1)
res = True
break
return res
ships_to_avoid = loadImages(glob.glob(r"C:\Users\*.png"))
needle_kp1_desc = preProcessNeedle(ships_to_avoid)
if shipDetection(needle_kp1_desc):
# do something with the output
Isolating the red channel, converting to grayscale and applying binary thresholding has normalised the results, they're all now a consistent "white" which my detection is successfully identifying.
apply_thresholding will perform this pre-processing to a folder, move the images from image_dir to output_dir then it'll delete the un-processes images from image_dir.
def apply_thresholding():
# get directory path where the images are stored
image_dir = r"C:\Users\pre"
# get directory path where you want to save the images
output_dir = r"C:\Users\post"
#iterate through all the files in the image directory
for _, _, image_names in os.walk(image_dir):
#iterate through all the files in the image_dir
for image_name in image_names:
# check for extension .png
if '.png' in image_name:
# get image read path(path should not contain spaces in them)
filepath = os.path.join(image_dir, image_name)
# get image write path
dstpath = os.path.join(output_dir, image_name)
print(filepath, dstpath)
# read the image
image = cv.imread(filepath)
r = image.copy()
# set blue and green channels to 0
r[:, :, 0] = 0
r[:, :, 1] = 0
# convert to grayscale now we've dropped b and g channels
gray = cv.cvtColor(r, cv.COLOR_BGR2GRAY)
# Apply binary thersholding
(T, thresh) = cv.threshold(gray, 40, 255, cv.THRESH_BINARY)
# write the image in a different path with the same name
cv.imwrite(dstpath, thresh)
files = glob.glob(r"C:\Users\pre\*")
for f in files:
os.remove(f)
I then applied the same channel isolation, grayscale conversion and binary thresholding to my detection area.
def get_haystack_image():
w, h = 1920, 1080
hwnd = None
wDC = win32gui.GetWindowDC(hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
r = img.copy()
# set blue and green channels to 0
r[:, :, 0] = 0
r[:, :, 1] = 0
# convert to grayscale now we've dropped b and g channels
gray = cv.cvtColor(r, cv.COLOR_BGR2GRAY)
# Apply binary thersholding
(T, img) = cv.threshold(gray, 40, 255, cv.THRESH_BINARY)
return img

How To Draw a Triangle-Arrow With The Positions of Detected Objects

I am making a object detection project.
I have my code. And I have written it by following a tutorial. In the tutorial, the guy drew a rectangle in opencv for every single object which is detected.
But I want to change the rectangle to triangle or Arrow.
let me explain with code===>
In my function, I detect objects.
And here I draw rectangle for detected objects==>
cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
But I want to change this rectangle to a triangle.(And I want to set position of triangle to above of object.
Just like in these images:::
This is the object detection with triangle
[![enter image description here][1]][1]
This is the thing that what I want to make instead of rectangle:::
[![enter image description here][2]][2]
How Can I make a triangle/arrow with positions of my detected objects?
All of my code is here==>
from os.path import sep
import cv2 as cv2
import numpy as np
import json
# Camera feed
cap_cam = cv2.VideoCapture(0)
ret, frame_cam = cap_cam.read()
hey = 0
print(cv2. __version__)
whT = 320
confThreshold =0.5
nmsThreshold= 0.2
classesFile = "coco.names"
classNames = []
with open(classesFile, 'rt') as f:
classNames = f.read().rstrip('\n').split('\n')
print(classNames)
## Model Files
modelConfiguration = "custom-yolov4-tiny-detector.cfg"
modelWeights = "custom-yolov4-tiny-detector_last.weights"
net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
def findObjects(outputs,img):
global hey
global previousHey
hT, wT, cT = img.shape
bbox = []
classIds = []
confs = []
for output in outputs:
for det in output:
scores = det[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
w,h = int(det[2]*wT) , int(det[3]*hT)
x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
bbox.append([x,y,w,h])
classIds.append(classId)
confs.append(float(confidence))
global indicates
indices = cv2.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
hey = 0
for i in indices:
i = i[0]
box = bbox[i]
x, y, w, h = box[0], box[1], box[2], box[3]
# print(x,y,w,h)
cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
#cv2.line(img, (350,400), (x, y), (255,0,0), 4)
#cv2.line(img, (400,400), (x + 50 , y), (255,0,0), 4)
#cv.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',
#(x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
print('success')
hey = 1
video_frame_counter = 0
while cap_cam.isOpened():
img = cv2.imread('photos' + sep + 'lutfen.jpg')
#BURADA OK VİDEOSU OYNATILACAK
#if not decetiona diye dene yarın.
blob = cv2.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
net.setInput(blob)
layersNames = net.getLayerNames()
outputNames = [(layersNames[i[0] - 1]) for i in net.getUnconnectedOutLayers()]
outputs = net.forward(outputNames)
findObjects(outputs,img)
cv2.imshow('Image', img)
# Video feed
if hey == 1:
filename = 'photos' + sep + 'Baslksz-3.mp4'
cap_vid = cv2.VideoCapture(filename)
if hey == 0:
filename = 'photos' + sep + 'vid2.mp4'
cap_vid = cv2.VideoCapture(filename)
print(hey)
ret, frame_vid = cap_vid.read()
#cap_cam.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
#cap_cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
# Resize the camera frame to the size of the video
height = int(cap_vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(cap_vid.get(cv2.CAP_PROP_FRAME_WIDTH))
# Capture the next frame from camera
ret, frame_cam = cap_cam.read()
video_frame_counter += 1
if video_frame_counter == cap_vid.get(cv2.CAP_PROP_FRAME_COUNT):
video_frame_counter = 0
cap_vid.set(cv2.CAP_PROP_POS_FRAMES, 0)
frame_cam = cv2.resize(frame_cam, (width, height), interpolation = cv2.INTER_AREA)
#ret = cap_vid.set(cv2.CAP_PROP_POS_MSEC, time_passed)
ret, frame_vid = cap_vid.read()
if not ret:
print('Cannot read from video stream')
break
# Blend the two images and show the result
tr = 0.4 # transparency between 0-1, show camera if 0
frame = ((1-tr) * frame_cam.astype(np.float) + tr * frame_vid.astype(np.float)).astype(np.uint8)
cv2.imshow('Transparent result', frame)
if cv2.waitKey(1) == 27: # ESC is pressed
break
cap_cam.release()
cap_vid.release()
cv2.destroyAllWindows()
The easy way
You can use the cv.arrowedLine() function that will draw something similar to what you want. For example, to draw a red arrow above your rectangle:
center_x = x + w//2
cv2.arrowedLine(img, (center_x, y-50), (center_x, y-5), (0,0,255), 2, 8, 0, 0.5)
which should give a result similar to the image below. Take a look at the OpenCV documentation for the description of the parameters of the function. You can change its size, thickness, color, etc.
Custom arrow shape
If you want more control over the shape of your arrow, you can define a contour (vertex by vertex) and use cv.drawContours() to render it. For example:
# define the arrow shape
shape = np.array([[[0,0],[-25,-25],[-10,-25],[-10,-50],
[10,-50],[10,-25],[25,-25]]])
# move it to the desired position
cx = x + w // 2
cy = y - 5
shape[:,:,0] += cx
shape[:,:,1] += cy
# draw it
cv2.drawContours(img, shape, -1, (0, 255, 0), -1)
This snippet will give you the image below. You can adjust the shape by altering the vertices in the shape array, or look at the documentation to change the way OpenCV draws it.

Bad character recognition with Pytesseract OCR for images with table structure

I use a code to locate text boxes and create a rectangle around them. This allows me to rebuild the grid around the table structure in the image.
However, even if the text box detection works very well, if I try to define the characters present in each rectangle, pytesseract does not identify them well and does not allow to find the original text.
Here is my Python code :
import os
import cv2
import imutils
import argparse
import numpy as np
import pytesseract
# This only works if there's only one table on a page
# Important parameters:
# - morph_size
# - min_text_height_limit
# - max_text_height_limit
# - cell_threshold
# - min_columns
def pre_process_image(img, save_in_file, morph_size=(8, 8)):
# get rid of the color
pre = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
def img_estim(img, threshold=127):
is_dark = np.mean(img) < threshold
return True if is_dark else False
# Negative
if img_estim(pre):
print("non")
pre = cv2.bitwise_not(pre)
# Contrast & Brightness control
contrast = 2.0 #0 to 3
brightness = 0 #0 to 100
for y in range(pre.shape[0]):
for x in range(pre.shape[1]):
pre[y,x] = np.clip(contrast*pre[y,x] + brightness, 0, 255)
# Otsu threshold
pre = cv2.threshold(pre, 250, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# dilate the text to make it solid spot
cpy = pre.copy()
struct = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size)
cpy = cv2.dilate(~cpy, struct, anchor=(-1, -1), iterations=1)
pre = ~cpy
if save_in_file is not None:
cv2.imwrite(save_in_file, pre)
return pre
def find_text_boxes(pre, min_text_height_limit=15, max_text_height_limit=40):
# Looking for the text spots contours
# OpenCV 3
# img, contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# OpenCV 4
contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# Getting the texts bounding boxes based on the text size assumptions
boxes = []
for contour in contours:
box = cv2.boundingRect(contour)
h = box[3]
if min_text_height_limit < h < max_text_height_limit:
boxes.append(box)
return boxes
def find_table_in_boxes(boxes, cell_threshold=10, min_columns=2):
rows = {}
cols = {}
# Clustering the bounding boxes by their positions
for box in boxes:
(x, y, w, h) = box
col_key = x // cell_threshold
row_key = y // cell_threshold
cols[row_key] = [box] if col_key not in cols else cols[col_key] + [box]
rows[row_key] = [box] if row_key not in rows else rows[row_key] + [box]
# Filtering out the clusters having less than 2 cols
table_cells = list(filter(lambda r: len(r) >= min_columns, rows.values()))
# Sorting the row cells by x coord
table_cells = [list(sorted(tb)) for tb in table_cells]
# Sorting rows by the y coord
table_cells = list(sorted(table_cells, key=lambda r: r[0][1]))
return table_cells
def build_lines(table_cells):
if table_cells is None or len(table_cells) <= 0:
return [], []
max_last_col_width_row = max(table_cells, key=lambda b: b[-1][2])
max_x = max_last_col_width_row[-1][0] + max_last_col_width_row[-1][2]
max_last_row_height_box = max(table_cells[-1], key=lambda b: b[3])
max_y = max_last_row_height_box[1] + max_last_row_height_box[3]
hor_lines = []
ver_lines = []
for box in table_cells:
x = box[0][0]
y = box[0][1]
hor_lines.append((x, y, max_x, y))
for box in table_cells[0]:
x = box[0]
y = box[1]
ver_lines.append((x, y, x, max_y))
(x, y, w, h) = table_cells[0][-1]
ver_lines.append((max_x, y, max_x, max_y))
(x, y, w, h) = table_cells[0][0]
hor_lines.append((x, max_y, max_x, max_y))
return hor_lines, ver_lines
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
# ap.add_argument("-east", "--east", type=str,
# help="path to input EAST text detector")
args = vars(ap.parse_args())
in_file = os.path.join("images", args["image"])
pre_file = os.path.join("images", "pre.png")
out_file = os.path.join("images", "out.png")
img = cv2.imread(os.path.join(in_file))
top, bottom, left, right = [25]*4
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_REPLICATE)
orig = img.copy()
pre_processed = pre_process_image(img, pre_file)
text_boxes = find_text_boxes(pre_processed)
cells = find_table_in_boxes(text_boxes)
hor_lines, ver_lines = build_lines(cells)
# (H, W) = img.shape[:2]
# net = cv2.dnn.readNet(args["east"])
# blob = cv2.dnn.blobFromImage(img, 1.0, (W, H),(123.68, 116.78, 103.94), swapRB=True, crop=False)
# net.setInput(blob)
# Visualize the result
vis = img.copy()
results = []
for box in text_boxes:
(x, y, w, h) = box
startX = x -2
startY = y -2
endX = x + w
endY = y + h
cv2.rectangle(vis, (startX, startY), (endX, endY), (0, 255, 0), 1)
roi=orig[startX:endX,startY:endY]
config = ("-l eng --psm 6")
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe'
text = pytesseract.image_to_string(roi,config=config )
results.append(((startX, startY, (endX), (endY)), text))
results = sorted(results, key=lambda r:r[0][1])
output = orig.copy()
for ((startX, startY, endX, endY), text) in results:
print("{}\n".format(text))
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
cv2.rectangle(output, (startX, startY), (endX, endY),(0, 0, 255), 1)
cv2.putText(output, text, (startX, startY - 20),cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
# for line in hor_lines:
# [x1, y1, x2, y2] = line
# cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
# for line in ver_lines:
# [x1, y1, x2, y2] = line
# cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imwrite(out_file, vis)
cv2.imshow("Text Detection", output)
cv2.waitKey(0)
Initial image :
Initial image
Preprocessed image with detection of text outlines to define the dimensions of rectangles :
Preprocessed image with detection of text outlines to define the dimensions of rectangles
Final image :
Final image
Résultat obtenu par OCR :
"
a
ra
at
12
1
"
Thank you in advance for your help, hope my description is clear enough.
When performing OCR, it is extrememly important to preprocess the image to get the foreground text in black with the background in white. In addition, enlarging the image can help improve the detection results. I've also found that adding a slight Gaussian blur improves accuracy before throwing it into Pytesseract. Here's the results with --psm 6 to treat the image as a single block of text. Look here for more configuration options.
Preprocessed enlarged, thresholded, and slightly blurred image
Results from Pytesseract OCR
Series Type Scan Range CTDIvol DLP Phantom
(mm) (mGy) — (mGy-cm) cm
1 Scout - - - -
1 Scout - - - -
2 Axial = 113.554-1272.929 11.22 269.35 Body 32
Total Exam DLP: = 269.35
1/1
Code
import cv2
import pytesseract
import imutils
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image = cv2.imread('1.jpg')
image = imutils.resize(image, width=700)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
thresh = cv2.GaussianBlur(thresh, (3,3), 0)
data = pytesseract.image_to_string(thresh, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imwrite('thresh.png', thresh)
cv2.waitKey()

Read multi images in a folder python

I am trying to read read multi images on a folder and do some processing. I have a code that extracts facial landmark coordinates. But I can apply this code to only one image. I want the script to work with all images in the folder. I have read some solutions but they didn't work for me. Can you tell me how can I apply a loop for this?
This is my code:
import numpy as np
import cv2
import dlib
import os
from glob import glob
mouth_matrice= open("C:/Users/faruk/Desktop/matrices/mouth.txt","w")
lefteye_matrice= open("C:/Users/faruk/Desktop/matrices/lefteye.txt","w")
righteye_matrice= open("C:/Users/faruk/Desktop/matrices/righteye.txt","w")
cascPath = ("C:/opencv/sources/data/haarcascades_cuda/haarcascade_frontalface_default.xml")
all_matrice= open("C:/Users/faruk/Desktop/matrices/all.txt","w")
#imagePath = ("C:/Users/faruk/Desktop/Dataset/Testing/342_spontaneous_smile_4 (2-17-2018 8-37-58 PM)/342_spontaneous_smile_4 357.jpg")
mypath=os.path.join("c:", os.sep, "Users", "faruk", "Desktop", "Dataset","Testing2")
PREDICTOR_PATH = ("C:/Users/faruk/Desktop/Working projects/facial-landmarks/shape_predictor_68_face_landmarks.dat")
JAWLINE_POINTS = list(range(0, 17))
RIGHT_EYEBROW_POINTS = list(range(17, 22))
LEFT_EYEBROW_POINTS = list(range(22, 27))
NOSE_POINTS = list(range(27, 36))
#RIGHT_EYE_POINTS = list(range(36, 42))
RIGHT_EYE_POINTS = list([36,39])
ALL_POINTS= list([36,39,42,45,48,51,54,57])
##LEFT_EYE_POINTS = list(range(42, 48))
LEFT_EYE_POINTS = list([42, 45])
##MOUTH_OUTLINE_POINTS = list(range(48, 61))
MOUTH_OUTLINE_POINTS = list([48,51,54,57])
MOUTH_INNER_POINTS = list(range(61, 68))
# Create the haar cascade
faceCascade = cv2.CascadeClassifier(cascPath)
predictor = dlib.shape_predictor(PREDICTOR_PATH)
# Read the image
cv2.namedWindow('Landmarks found',cv2.WINDOW_NORMAL)
cv2.resizeWindow('Landmarks found', 800,800)
image = cv2.imread(imagePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Detect faces in the image
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.05,
minNeighbors=5,
minSize=(100, 100),
flags=cv2.CASCADE_SCALE_IMAGE
)
print("Found {0} faces!".format(len(faces)))
for (x, y, w, h) in faces:
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Converting the OpenCV rectangle coordinates to Dlib rectangle
dlib_rect = dlib.rectangle(int(x), int(y), int(x + w), int(y + h))
landmarks = np.matrix([[p.x, p.y]
for p in predictor(image, dlib_rect).parts()])
#landmarks_display = landmarks[LEFT_EYE_POINTS]
landmarks_display = np.matrix(landmarks[ALL_POINTS])
for idx, point in enumerate(landmarks_display):
pos = (point[0, 0], point[0, 1])
cv2.circle(image, pos, 2, color=(0, 255, 255), thickness=-1)
np.savetxt(all_matrice,landmarks_display,fmt='%.f',newline=',')
all_matrice.close()
# Draw a rectangle around the faces
cv2.imshow("Landmarks found", image)
cv2.waitKey(0)
You can use something like this to get paths of all images in a directory:
import os
# Folder with images
directory = 'c:/users/username/path/'
for filename in os.listdir(directory):
if filename.endswith(".jpg"):
image_path = os.path.join(directory, filename)
# Your code
continue
else:
continue
You need to add your code and process each path.
Hope this helps.
Edit:
I have no way to test it and it certainly needs a cleanup but might just work. Not sure what image extensions you want to include so i only included jpg.
import os
import numpy as np
import cv2
import dlib
# Chage directory path to the path of your image folder
directory = 'c:/users/admin/desktop/'
mouth_matrice= open("C:/Users/faruk/Desktop/matrices/mouth.txt","w")
lefteye_matrice= open("C:/Users/faruk/Desktop/matrices/lefteye.txt","w")
righteye_matrice= open("C:/Users/faruk/Desktop/matrices/righteye.txt","w")
cascPath = ("C:/opencv/sources/data/haarcascades_cuda/haarcascade_frontalface_default.xml")
all_matrice= open("C:/Users/faruk/Desktop/matrices/all.txt","w")
mypath=os.path.join("c:", os.sep, "Users", "faruk", "Desktop", "Dataset","Testing2")
PREDICTOR_PATH = ("C:/Users/faruk/Desktop/Working projects/facial-landmarks/shape_predictor_68_face_landmarks.dat")
JAWLINE_POINTS = list(range(0, 17))
RIGHT_EYEBROW_POINTS = list(range(17, 22))
LEFT_EYEBROW_POINTS = list(range(22, 27))
NOSE_POINTS = list(range(27, 36))
#RIGHT_EYE_POINTS = list(range(36, 42))
RIGHT_EYE_POINTS = list([36,39])
ALL_POINTS= list([36,39,42,45,48,51,54,57])
##LEFT_EYE_POINTS = list(range(42, 48))
LEFT_EYE_POINTS = list([42, 45])
##MOUTH_OUTLINE_POINTS = list(range(48, 61))
MOUTH_OUTLINE_POINTS = list([48,51,54,57])
MOUTH_INNER_POINTS = list(range(61, 68))
# Create the haar cascade
faceCascade = cv2.CascadeClassifier(cascPath)
predictor = dlib.shape_predictor(PREDICTOR_PATH)
for filename in os.listdir(directory):
if filename.endswith(".jpg"):
imagePath=os.path.join(directory, filename)
cv2.namedWindow('Landmarks found',cv2.WINDOW_NORMAL)
cv2.resizeWindow('Landmarks found', 800,800)
image = cv2.imread(imagePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Detect faces in the image
faces = faceCascade.detectMultiScale(gray,
scaleFactor=1.05,
minNeighbors=5,
minSize=(100, 100),
flags=cv2.CASCADE_SCALE_IMAGE
)
print("Found {0} faces!".format(len(faces)))
for (x, y, w, h) in faces:
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Converting the OpenCV rectangle coordinates to Dlib rectangle
dlib_rect = dlib.rectangle(int(x), int(y), int(x + w), int(y + h))
landmarks = np.matrix([[p.x, p.y] for p in predictor(image, dlib_rect).parts()])
#landmarks_display = landmarks[LEFT_EYE_POINTS]
landmarks_display = np.matrix(landmarks[ALL_POINTS])
for idx, point in enumerate(landmarks_display):
pos = (point[0, 0], point[0, 1])
cv2.circle(image, pos, 2, color=(0, 255, 255), thickness=-1)
np.savetxt(all_matrice,landmarks_display,fmt='%.f',newline=',')
all_matrice.close()
# Draw a rectangle around the faces
cv2.imshow("Landmarks found", image)
cv2.waitKey(0)
continue
else:
continue
P.s You should try and learn basic programming concepts before you try to tackle something like face recognition or image processing.

Face Recoginition Python open cv

Is there any way i can make my own train set for face recognition in python ? To be more specific i want to make a train set like an AT&T Face database. I want my camera to take 20 images of each person(30 max) and store it in the separate folders by the name of each person.
import cv2, sys, numpy, os
size = 4
fn_haar = 'haarcascade_frontalface_default.xml'
fn_dir = 'att_faces'
fn_name = sys.argv[1]
path = os.path.join(fn_dir, fn_name)
if not os.path.isdir(path):
os.mkdir(path)
(im_width, im_height) = (112, 92)
haar_cascade = cv2.CascadeClassifier(fn_haar)
webcam = cv2.VideoCapture(0)
# The program loops until it has 20 images of the face.
count = 0
while count < 20:
(rval, im) = webcam.read()
im = cv2.flip(im, 1, 0)
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
mini = cv2.resize(gray, (gray.shape[1] / size, gray.shape[0] / size))
faces = haar_cascade.detectMultiScale(mini)
faces = sorted(faces, key=lambda x: x[3])
if faces:
face_i = faces[0]
(x, y, w, h) = [v * size for v in face_i]
face = gray[y:y + h, x:x + w]
face_resize = cv2.resize(face, (im_width, im_height))
pin=sorted([int(n[:n.find('.')]) for n in os.listdir(path)
if n[0]!='.' ]+[0])[-1] + 1
cv2.imwrite('%s/%s.png' % (path, pin), face_resize)
cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 0), 3)
cv2.putText(im, fn_name, (x - 10, y - 10), cv2.FONT_HERSHEY_PLAIN,
1,(0, 255, 0))
count += 1
cv2.imshow('OpenCV', im)
key = cv2.waitKey(10)
if key == 27:
break
For this, you just need to provide a particular path to save all the image with (.png) or (.bmp) or (.jpg) extension in a sorted manner.
# train.py
import cv2, sys, numpy, os
size = 4
fn_haar = 'haarcascade_frontalface_default.xml'
fn_dir = 'face_data'
fn_name = sys.argv[0]
path = os.path.join(fn_dir, fn_name)
(im_width, im_height) = (112, 92)
haar_cascade = cv2.CascadeClassifier(fn_haar)
webcam = cv2.VideoCapture(0)
# Generate name for image file
pin=sorted([int(n[:n.find('.')]) for n in os.listdir(path)
if n[0]!='.' ]+[0])[-1] + 1
# Beginning message
print("\n\033[94mThe program will save 20 samples. \
Move your head around to increase while it runs.\033[0m\n")
# The program loops until it has 20 images of the face.
count = 0
pause = 0
count_max = 20
while count < count_max:
# Loop until the camera is working
rval = False
while(not rval):
# Put the image from the webcam into 'frame'
(rval, frame) = webcam.read()
if(not rval):
print("Failed to open webcam. Trying again...")
# Get image size
height, width, channels = frame.shape
# Flip frame
frame = cv2.flip(frame, 1, 0)
# Convert to grayscale
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Scale down for speed
mini = cv2.resize(gray, (int(gray.shape[1] / size), int(gray.shape[0] / size)))
# Detect faces
faces = haar_cascade.detectMultiScale(mini)
# We only consider largest face
faces = sorted(faces, key=lambda x: x[3])
if faces:
face_i = faces[0]
(x, y, w, h) = [v * size for v in face_i]
face = gray[y:y + h, x:x + w]
face_resize = cv2.resize(face, (im_width, im_height))
# Draw rectangle and write name
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 3)
cv2.putText(frame, fn_name, (x - 10, y - 10), cv2.FONT_HERSHEY_PLAIN,
1,(0, 255, 0))
# Remove false positives
if(w * 6 < width or h * 6 < height):
print("Face too small")
else:
# To create diversity, only save every fith detected image
if(pause == 0):
print("Saving training sample "+str(count+1)+"/"+str(count_max))
# Save image file
cv2.imwrite('%s/%s.png' % (path, pin), face_resize)
pin += 1
count += 1
pause = 1
if(pause > 0):
pause = (pause + 1) % 5
cv2.imshow('OpenCV', frame)
key = cv2.waitKey(10)
if key == 27:
break
This code will help you to get the cropped images from the webcam and store them in a directory name as face_data for training purpose.
In case, you don't want to train your dataset from webcam, you can simply do one thing:
that just create a directory and create 5-6 sub-directory in it as in Happy, Sad, Angry, Neutral, Calm, etc.
Download the images and put them in corresponding folders for training purpose, now follow this code.
## This program first ensures if the face of a person exists in the given image or
not then if it exists, it crops
## the image of the face and saves to the given directory.
## Importing Modules
import cv2
import os
directory = "C:\\Users\\hp"
## directory where the images to be saved:
f_directory = "C:\\Users\\hp\\face_data/"
def facecrop(image):
## Crops the face of a person from an image!
## OpenCV XML FILE for Frontal Facial Detection using HAAR CASCADES.
facedata=
"C:\\opencv\\build\\etc\\haarcascades\\haarcascade_frontalface_default.xml"
cascade = cv2.CascadeClassifier(facedata)
## Reading the given Image with OpenCV
img = cv2.imread(image)
try:
minisize = (img.shape[1],img.shape[0])
miniframe = cv2.resize(img, minisize)
faces = cascade.detectMultiScale(miniframe)
for f in faces:
x, y, w, h = [ v for v in f ]
cv2.rectangle(img, (x,y), (x+w,y+h), (0,255,0), 2)
sub_face = img[y:y+h, x:x+w]
f_name = image.split('/')
f_name = f_name[-1]
## Change here the Desired directory.
cv2.imwrite(f_directory + f_name, sub_face)
print ("Writing: " + image)
except:
pass
if __name__ == '__main__':
images = os.listdir(directory)
i = 0
for img in images:
file = directory + img
print (i)
facecrop(file)
i += 1

Categories

Resources