Related
I'm using keypoint detection to find text within a game.
The background in the below images is dynamic, it's always a vaguely moving star-lit sky that you can barely see.
The detection works well when the text is white:
However, when the text is purple (unpredictable when this happens) the detection fails entirely:
Both the object I'm looking to detect and the image I'm running detection on are identical, screenshots are taken directly from within the game of the text i.e. the above. And then run on the exact same location the original screenshot were taken from.
The below code I've written using the official documentation I found here and here as a guide but it's very light on explaining itself.
Question: Is this an inherent limitation or is there something I can do to adjust to detect keypoints within the purple image?
import cv2 as cv
import win32gui, win32con, win32ui
import numpy as np
import glob
def get_haystack_image():
w, h = 1920, 1080
hwnd = None
wDC = win32gui.GetWindowDC(hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
def loadImages(directory):
# Intialise empty array
image_list = []
# Add images to array
for i in directory:
img = cv.imread(i, cv.IMREAD_UNCHANGED)
image_list.append((img, i))
return image_list
def preProcessNeedle(image_list):
needle_kp1_desc = []
for i in image_list:
img = i[0]
orb = cv.ORB_create(edgeThreshold=0, patchSize=32)
keypoint_needle, descriptors_needle = orb.detectAndCompute(img, None)
needle_kp1_desc.append((keypoint_needle, descriptors_needle, img))
return needle_kp1_desc
def match_keypoints(descriptors_needle, keypoint_haystack, min_match_count):
orbHaystack = cv.ORB_create(edgeThreshold=0, patchSize=32, nfeatures=3000)
keypoints_haystack, descriptors_haystack = orbHaystack.detectAndCompute(keypoint_haystack, None)
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
search_params = dict(checks=50)
try:
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
except cv.error:
return None, None, [], []
good = []
points = []
for pair in matches:
if len(pair) == 2:
if pair[0].distance < 0.7*pair[1].distance:
good.append(pair[0])
if len(good) > min_match_count:
for match in good:
points.append(keypoints_haystack[match.trainIdx].pt)
return keypoints_haystack, good, points
def shipDetection(needle_kp1_desc):
res = False
# Object Detection
for i, img in enumerate(needle_kp1_desc):
kp1 = img[0]
descriptors_needle = img[1]
needle_img = img[2]
# get an updated image of the screen & crop it
keypoint_haystack = get_haystack_image()
keypoint_haystack = keypoint_haystack[40:110, 850:1000]
kp2, matches, match_points, ship_avoided = match_keypoints(kp1, descriptors_needle, keypoint_haystack, min_match_count=40)
# display the matches
match_image = cv.drawMatches(needle_img, kp1, keypoint_haystack, kp2, matches, None)
cv.imshow('Keypoint Search', match_image)
cv.moveWindow("Keypoint Search",1940,30)
cv.waitKey(1)
if match_points:
# removed code as irrelevant to detection but left comments in
# find the center point of all the matched features
# account for the width of the needle image that appears on the left
# drawn the found center point on the output image
# display the processed image
cv.imshow('Keypoint Search', match_image)
cv.waitKey(1)
res = True
break
return res
ships_to_avoid = loadImages(glob.glob(r"C:\Users\*.png"))
needle_kp1_desc = preProcessNeedle(ships_to_avoid)
if shipDetection(needle_kp1_desc):
# do something with the output
Isolating the red channel, converting to grayscale and applying binary thresholding has normalised the results, they're all now a consistent "white" which my detection is successfully identifying.
apply_thresholding will perform this pre-processing to a folder, move the images from image_dir to output_dir then it'll delete the un-processes images from image_dir.
def apply_thresholding():
# get directory path where the images are stored
image_dir = r"C:\Users\pre"
# get directory path where you want to save the images
output_dir = r"C:\Users\post"
#iterate through all the files in the image directory
for _, _, image_names in os.walk(image_dir):
#iterate through all the files in the image_dir
for image_name in image_names:
# check for extension .png
if '.png' in image_name:
# get image read path(path should not contain spaces in them)
filepath = os.path.join(image_dir, image_name)
# get image write path
dstpath = os.path.join(output_dir, image_name)
print(filepath, dstpath)
# read the image
image = cv.imread(filepath)
r = image.copy()
# set blue and green channels to 0
r[:, :, 0] = 0
r[:, :, 1] = 0
# convert to grayscale now we've dropped b and g channels
gray = cv.cvtColor(r, cv.COLOR_BGR2GRAY)
# Apply binary thersholding
(T, thresh) = cv.threshold(gray, 40, 255, cv.THRESH_BINARY)
# write the image in a different path with the same name
cv.imwrite(dstpath, thresh)
files = glob.glob(r"C:\Users\pre\*")
for f in files:
os.remove(f)
I then applied the same channel isolation, grayscale conversion and binary thresholding to my detection area.
def get_haystack_image():
w, h = 1920, 1080
hwnd = None
wDC = win32gui.GetWindowDC(hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
r = img.copy()
# set blue and green channels to 0
r[:, :, 0] = 0
r[:, :, 1] = 0
# convert to grayscale now we've dropped b and g channels
gray = cv.cvtColor(r, cv.COLOR_BGR2GRAY)
# Apply binary thersholding
(T, img) = cv.threshold(gray, 40, 255, cv.THRESH_BINARY)
return img
def resolve(img_path):
image = Image.open(img_path)
new_image = Image.new("RGBA", image.size, "WHITE") # Create a white rgba background
new_image.paste(image, (0, 0), image) # Paste the image on the background.
new_image.convert('RGB').save(img_path, "JPEG") # Save as JPEG
enhancedImage = enhance(img_path)
return pytesseract.image_to_string(img_path)
def enhance(img_path):
image1 = cv2.imread(img_path)
#print(image1)
img = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
ret, thresh1 = cv2.threshold(img, 180, 255, cv2.THRESH_BINARY_INV)
#thresh = 50
#im_bw = cv2.threshold(thresh3, thresh, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 2))
erosion = cv2.erode(thresh1, kernel, iterations = 1)
return erosion
I'm trying to solve captcha for above images. Tried converting transparent bg to white and then enchancing the image but results are not correct.
Let me start with the potential problem with your code
def enhance(img_path):
image1 = cv2.imread(img_path)
Now, if you read it with imread the result will be:
You can't read it with pytesseract from the output image.
This is a known issue stated in this answer: cv2 imread transparency gone
As mentioned in the answer:
put a white image behind the transparent one an with that you solve the problem.
We will apply the same technique and now result will be:
As for the second image result will be:
We will be doing the following steps for efficiently reading from the output image:
Resize the image
Apply adaptive-threshold
For the first image the result will be:
For the second image the result will be:
Now when you read it with pytesseract with mode 6 (modes), result will be:
3daab
b42yb
Code:
import cv2
from PIL import Image
from pytesseract import image_to_string
def save_transparent_image(image_path, save_name):
image = Image.open(image_path).convert("RGBA")
canvas = Image.new(mode='RGBA',
size=image.size, color=(255, 255, 255, 255))
canvas.paste(image, mask=image)
canvas.save(save_name, format="PNG")
img_lst = ["o3upS.png", "kGpYk.png"]
for i, img_name in enumerate(img_lst):
save_image = "captcha" + str(i) + ".png"
save_transparent_image(img_name, save_image)
# Step 1: Resize the image
img = cv2.imread(save_image)
(h, w) = img.shape[:2]
img = cv2.resize(img, (w*2, h*2))
# Step 2: Apply adaptive-threshold
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.adaptiveThreshold(gry, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 33, 79)
# Step 3: Read the threshold image
txt = image_to_string(thr, config="--psm 6")
txt = txt.split("\n")[0]
print(txt)
Question Will this code will works for the other captchas?
No, it won't. Unless the captchas are similar to the given example. You need to change the adaptive-threshold's block-size and C parameters, see if it works.
Hey I started working with Tesseract OCR but I'm having problems getting the text from really simple RGB images.
It works just fine with text2image images.
Here is my code:
from PIL import Image
import pytesseract
import argparse
import cv2
import os
import sys
class wordExtractor():
def __init__(self, image_path):
self.image_path = image_path
pytesseract.pytesseract.tesseract_cmd = r'/home/yarin/tesseract/bin/debug/tesseract'
#self.resize_image()
def resize_image(self):
basewidth = 800
img = Image.open(self.image_path)
wpercent = (basewidth/float(img.size[0]))
hsize = int((float(img.size[1])*float(wpercent)))
img = img.resize((basewidth,hsize), Image.ANTIALIAS)
os.remove(self.image_path)
img.save(self.image_path[:-4] + '.png')
self.image_path = self.image_path[:-4] + '.png'
def get_text(self, lang):
# load the example image and convert it to grayscale
image = cv2.imread(self.image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# check to see if we should apply thresholding to preprocess the
# image
#if args["preprocess"] == "thresh":
gray = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# make a check to see if median blurring should be done to remove
# noise
#elif args["preprocess"] == "blur":
# gray = cv2.medianBlur(gray, 3)
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
#load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open(filename), lang='eng')
os.remove(filename)
return text
# show the output images
#cv2.imshow("Image", image)
#cv2.imshow("Output", gray)
#cv2.waitKey(0)
w = wordExtractor('6.png')
print(w.get_text('eng'))
Tesseract returns empty string for the following images:
Please show me how can I solve this Thanks in advance!
After thresholding, you can use findContours to find contour for each shape. Then you can filter the contours and put every contour you are interested in into a blank white image. By then, you will get the letters and ready to process using tesseract. You can see the detail in the code below.
import cv2
import numpy as np
import pytesseract
# img = cv2.imread("dwLFQ.png", cv2.IMREAD_COLOR)
img = cv2.imread("NfwY4.png", cv2.IMREAD_COLOR)
# img = cv2.imread("xTH6s.png", cv2.IMREAD_COLOR)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
items = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = items[0] if len(items) == 2 else items[1]
base = np.zeros(thresh.shape, dtype=np.uint8)
base = cv2.bitwise_not(base)
max_area = 0
for i in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[i])
ratio = h / w
area = cv2.contourArea(contours[i])
cv2.drawContours(img, [contours[i]], 0, (255, 0, 0), 2)
if 1 < ratio < 3:
max_area = max(area, max_area)
print("area: " + str(area) + ", max area: " + str(max_area) + ", ratio: " + str(ratio))
# if 1000 < area < max_area / 2:
if 1000 < area < 40000:
mask = np.zeros(thresh.shape, dtype=np.uint8)
cv2.drawContours(mask, [contours[i]], -1, color=255, thickness=-1)
mean = cv2.mean(thresh, mask=mask)
segment = np.zeros((h, w), dtype=np.uint8)
segment[:h, :w] = thresh[y:y + h, x:x + w]
if mean[0] > 150:
# white, invert
segment = cv2.bitwise_not(segment)
base[y:y + h, x:x + w] = segment[:h, :w]
cv2.imshow("base", base)
cv2.drawContours(img, [contours[i]], 0, (255, 0, 0), 2)
cv2.waitKey(0)
custom_config = r'-l eng --oem 3 --psm 6 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ " '
text = pytesseract.image_to_string(base, config=custom_config)
print("detected: " + text)
cv2.imshow("img", img)
cv2.imshow("base", base)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result
detected: NO
ENTRY
I use a code to locate text boxes and create a rectangle around them. This allows me to rebuild the grid around the table structure in the image.
However, even if the text box detection works very well, if I try to define the characters present in each rectangle, pytesseract does not identify them well and does not allow to find the original text.
Here is my Python code :
import os
import cv2
import imutils
import argparse
import numpy as np
import pytesseract
# This only works if there's only one table on a page
# Important parameters:
# - morph_size
# - min_text_height_limit
# - max_text_height_limit
# - cell_threshold
# - min_columns
def pre_process_image(img, save_in_file, morph_size=(8, 8)):
# get rid of the color
pre = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
def img_estim(img, threshold=127):
is_dark = np.mean(img) < threshold
return True if is_dark else False
# Negative
if img_estim(pre):
print("non")
pre = cv2.bitwise_not(pre)
# Contrast & Brightness control
contrast = 2.0 #0 to 3
brightness = 0 #0 to 100
for y in range(pre.shape[0]):
for x in range(pre.shape[1]):
pre[y,x] = np.clip(contrast*pre[y,x] + brightness, 0, 255)
# Otsu threshold
pre = cv2.threshold(pre, 250, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# dilate the text to make it solid spot
cpy = pre.copy()
struct = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size)
cpy = cv2.dilate(~cpy, struct, anchor=(-1, -1), iterations=1)
pre = ~cpy
if save_in_file is not None:
cv2.imwrite(save_in_file, pre)
return pre
def find_text_boxes(pre, min_text_height_limit=15, max_text_height_limit=40):
# Looking for the text spots contours
# OpenCV 3
# img, contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# OpenCV 4
contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# Getting the texts bounding boxes based on the text size assumptions
boxes = []
for contour in contours:
box = cv2.boundingRect(contour)
h = box[3]
if min_text_height_limit < h < max_text_height_limit:
boxes.append(box)
return boxes
def find_table_in_boxes(boxes, cell_threshold=10, min_columns=2):
rows = {}
cols = {}
# Clustering the bounding boxes by their positions
for box in boxes:
(x, y, w, h) = box
col_key = x // cell_threshold
row_key = y // cell_threshold
cols[row_key] = [box] if col_key not in cols else cols[col_key] + [box]
rows[row_key] = [box] if row_key not in rows else rows[row_key] + [box]
# Filtering out the clusters having less than 2 cols
table_cells = list(filter(lambda r: len(r) >= min_columns, rows.values()))
# Sorting the row cells by x coord
table_cells = [list(sorted(tb)) for tb in table_cells]
# Sorting rows by the y coord
table_cells = list(sorted(table_cells, key=lambda r: r[0][1]))
return table_cells
def build_lines(table_cells):
if table_cells is None or len(table_cells) <= 0:
return [], []
max_last_col_width_row = max(table_cells, key=lambda b: b[-1][2])
max_x = max_last_col_width_row[-1][0] + max_last_col_width_row[-1][2]
max_last_row_height_box = max(table_cells[-1], key=lambda b: b[3])
max_y = max_last_row_height_box[1] + max_last_row_height_box[3]
hor_lines = []
ver_lines = []
for box in table_cells:
x = box[0][0]
y = box[0][1]
hor_lines.append((x, y, max_x, y))
for box in table_cells[0]:
x = box[0]
y = box[1]
ver_lines.append((x, y, x, max_y))
(x, y, w, h) = table_cells[0][-1]
ver_lines.append((max_x, y, max_x, max_y))
(x, y, w, h) = table_cells[0][0]
hor_lines.append((x, max_y, max_x, max_y))
return hor_lines, ver_lines
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
# ap.add_argument("-east", "--east", type=str,
# help="path to input EAST text detector")
args = vars(ap.parse_args())
in_file = os.path.join("images", args["image"])
pre_file = os.path.join("images", "pre.png")
out_file = os.path.join("images", "out.png")
img = cv2.imread(os.path.join(in_file))
top, bottom, left, right = [25]*4
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_REPLICATE)
orig = img.copy()
pre_processed = pre_process_image(img, pre_file)
text_boxes = find_text_boxes(pre_processed)
cells = find_table_in_boxes(text_boxes)
hor_lines, ver_lines = build_lines(cells)
# (H, W) = img.shape[:2]
# net = cv2.dnn.readNet(args["east"])
# blob = cv2.dnn.blobFromImage(img, 1.0, (W, H),(123.68, 116.78, 103.94), swapRB=True, crop=False)
# net.setInput(blob)
# Visualize the result
vis = img.copy()
results = []
for box in text_boxes:
(x, y, w, h) = box
startX = x -2
startY = y -2
endX = x + w
endY = y + h
cv2.rectangle(vis, (startX, startY), (endX, endY), (0, 255, 0), 1)
roi=orig[startX:endX,startY:endY]
config = ("-l eng --psm 6")
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe'
text = pytesseract.image_to_string(roi,config=config )
results.append(((startX, startY, (endX), (endY)), text))
results = sorted(results, key=lambda r:r[0][1])
output = orig.copy()
for ((startX, startY, endX, endY), text) in results:
print("{}\n".format(text))
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
cv2.rectangle(output, (startX, startY), (endX, endY),(0, 0, 255), 1)
cv2.putText(output, text, (startX, startY - 20),cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
# for line in hor_lines:
# [x1, y1, x2, y2] = line
# cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
# for line in ver_lines:
# [x1, y1, x2, y2] = line
# cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imwrite(out_file, vis)
cv2.imshow("Text Detection", output)
cv2.waitKey(0)
Initial image :
Initial image
Preprocessed image with detection of text outlines to define the dimensions of rectangles :
Preprocessed image with detection of text outlines to define the dimensions of rectangles
Final image :
Final image
Résultat obtenu par OCR :
"
a
ra
at
12
1
"
Thank you in advance for your help, hope my description is clear enough.
When performing OCR, it is extrememly important to preprocess the image to get the foreground text in black with the background in white. In addition, enlarging the image can help improve the detection results. I've also found that adding a slight Gaussian blur improves accuracy before throwing it into Pytesseract. Here's the results with --psm 6 to treat the image as a single block of text. Look here for more configuration options.
Preprocessed enlarged, thresholded, and slightly blurred image
Results from Pytesseract OCR
Series Type Scan Range CTDIvol DLP Phantom
(mm) (mGy) — (mGy-cm) cm
1 Scout - - - -
1 Scout - - - -
2 Axial = 113.554-1272.929 11.22 269.35 Body 32
Total Exam DLP: = 269.35
1/1
Code
import cv2
import pytesseract
import imutils
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image = cv2.imread('1.jpg')
image = imutils.resize(image, width=700)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
thresh = cv2.GaussianBlur(thresh, (3,3), 0)
data = pytesseract.image_to_string(thresh, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imwrite('thresh.png', thresh)
cv2.waitKey()
https://snag.gy/6MrLNi.jpg
The chin is a bit off in this photo.
https://snag.gy/ORZHSe.jpg
Not this one.
Difference in Code:
image = cv2.resize(image,(2170, 2894), interpolation = cv2.INTER_AREA)
The second one does not have this line.
Complete Source Code:
import cv2
import sys
import dlib
import numpy as np
from PIL import Image
import rawpy
# Get user supplied values
imagePath = sys.argv[1]
cascPath = "HS.xml"
pointOfInterestX = 200
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("okgood.dat")
raw = rawpy.imread(imagePath)
rgb = raw.postprocess()
image = Image.fromarray(rgb)
#image.save("WOO.jpg")
open_cv_image = np.array(image)
open_cv_image = open_cv_image[:, :, ::-1].copy()
image = open_cv_image
image = cv2.resize(image,(2170, 2894), interpolation = cv2.INTER_AREA)
widthO, heightO = image.shape[:2]
faceCascade = cv2.CascadeClassifier(cascPath)
# Read the image
#image = cv2.imread(imagePath)
gray = cv2.cvtColor((image), cv2.COLOR_RGB2BGR)
#height, width = image.shape[:2]
# Detect faces in the image
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=4,
minSize=(500, 500)
#flags = cv2.CV_HAAR_SCALE_IMAGE
)
newdigit = 0
def test():
for l in range(y, y+h):
for d in range(x, x+w):
# print(image[l,d])
font = cv2.FONT_HERSHEY_SIMPLEX
if all(item < 150 for item in image[l, d]):
cv2.putText(image,"here",(d,l), font, .2,(255,255,255),1,cv2.LINE_AA)
return l;
image[l,d] = [0,0,0]
###
### put hairline 121 pixels from the top.
###
def shape_to_np(shape, dtype="int"):
# initialize the list of (x, y)-coordinates
coords = np.zeros((68, 2), dtype=dtype)
# loop over the 68 facial landmarks and convert them
# to a 2-tuple of (x, y)-coordinates
for i in range(0, 68):
coords[i] = (shape.part(i).x, shape.part(i).y)
# return the list of (x, y)-coordinates
return coords
two = 1
# Draw a rectangle around the faces
for (x, y, w, h) in faces:
print(str(len(faces)))
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)
pointOfInterestX = test()
break
dets = detector(image, 1)
one = 0
pointOfEight = 0
for k, d in enumerate(dets):
shape = predictor(image, d)
shape = shape_to_np(shape)
for (x, y) in shape:
if one == 8:
pointOfEight = y
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(image,str(one),(x,y), font, .2,(255,255,255),1,cv2.LINE_AA)
one = one + 1
cv2.circle(image, (x, y), 1, (0, 0, 255), -1)
# loop over the (x, y)-coordinates for the facial landmarks
# and draw them on the image
new_dimensionX = heightO * 631 / (pointOfEight - pointOfInterestX)
new_dimensionY = widthO * 631 / (pointOfEight - pointOfInterestX)
print(str(new_dimensionY))
image = cv2.resize(image,(int(new_dimensionX), int(new_dimensionY)))
Rx = new_dimensionX / heightO
Ry = new_dimensionY / widthO
crop_img = image[int((pointOfInterestX * Rx)-121):int(new_dimensionY), 0:int(new_dimensionX-((Rx *pointOfInterestX)+121))]
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(image,"xxxx",(100,pointOfInterestX ), font, 4,(255,255,255),1,cv2.LINE_AA)
cv2.imshow("Faces found", crop_img)
cv2.imwrite("cropped.jpg", crop_img)
cv2.waitKey(0)
Towards the top you will see the line where I resize the image to 2170,2894. Like I said, with this line absent, the chin detection is accurate. With it, it is not. I need the chin detection accurate at this resolution.
Try to use DLIB's face detector, landmarks detector initialized with face detector ROI, and DLIB's detector ROI is different from OpenCV Haar cascade one. DLIB's landmark detector trained using ROI's from DLIB's face detector, and should work better with it.