Change input in python opencv project from image to video - python

I have been trying to develop a program - written with python and using OpenCV -which counts the overall value of coins shown in the stream video in the real time. I should note that I am new to the opencv platform.
So, for now i have this code and it's working very well but with images, my question is how to change the input from image to a real time video stream.
Here is the code:
# import classifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import math
import numpy as np
import argparse
import glob
import cv2
# construct argument parser and parse arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="path to image")
args = vars(ap.parse_args())
image = cv2.imread(args["image"])
# resize image while retaining aspect ratio
d = 1024 / image.shape[1]
dim = (1024, int(image.shape[0] * d))
image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
# create a copy of the image to display results
output = image.copy()
# convert image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# improve contrast accounting for differences in lighting conditions:
# create a CLAHE object to apply contrast limiting adaptive histogram equalization
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
gray = clahe.apply(gray)
def calcHistogram(img):
# create mask
m = np.zeros(img.shape[:2], dtype="uint8")
(w, h) = (int(img.shape[1] / 2), int(img.shape[0] / 2))
cv2.circle(m, (w, h), 60, 255, -1)
# calcHist expects a list of images, color channels, mask, bins, ranges
h = cv2.calcHist([img], [0, 1, 2], m, [8, 8, 8], [0, 256, 0, 256, 0, 256])
# return normalized "flattened" histogram
return cv2.normalize(h, h).flatten()
def calcHistFromFile(file):
img = cv2.imread(file)
return calcHistogram(img)
# define Enum class
class Enum(tuple): __getattr__ = tuple.index
# Enumerate material types for use in classifier
Material = Enum(('Copper', 'Brass', 'Euro1', 'Euro2'))
# locate sample image files
sample_images_copper = glob.glob("sample_images/copper/*")
sample_images_brass = glob.glob("sample_images/brass/*")
sample_images_euro1 = glob.glob("sample_images/euro1/*")
sample_images_euro2 = glob.glob("sample_images/euro2/*")
# define training data and labels
X = []
y = []
# compute and store training data and labels
for i in sample_images_copper:
X.append(calcHistFromFile(i))
y.append(Material.Copper)
for i in sample_images_brass:
X.append(calcHistFromFile(i))
y.append(Material.Brass)
for i in sample_images_euro1:
X.append(calcHistFromFile(i))
y.append(Material.Euro1)
for i in sample_images_euro2:
X.append(calcHistFromFile(i))
y.append(Material.Euro2)
# instantiate classifier
# Multi-layer Perceptron
# score: 0.974137931034
clf = MLPClassifier(solver="lbfgs")
# split samples into training and test data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=.2)
# train and score classifier
clf.fit(X_train, y_train)
score = int(clf.score(X_test, y_test) * 100)
print("Classifier mean accuracy: ", score)
# blur the image using Gaussian blurring, where pixels closer to the center
# contribute more "weight" to the average, first argument is the source image,
# second argument is kernel size, third one is sigma (0 for autodetect)
# we use a 7x7 kernel and let OpenCV detect sigma
blurred = cv2.GaussianBlur(gray, (7, 7), 0)
# circles: A vector that stores x, y, r for each detected circle.
# src_gray: Input image (grayscale)
# CV_HOUGH_GRADIENT: Defines the detection method.
# dp = 2.2: The inverse ratio of resolution
# min_dist = 100: Minimum distance between detected centers
# param_1 = 200: Upper threshold for the internal Canny edge detector
# param_2 = 100*: Threshold for center detection.
# min_radius = 50: Minimum radius to be detected.
# max_radius = 120: Maximum radius to be detected.
circles = cv2.HoughCircles(blurred, cv2.HOUGH_GRADIENT, dp=2.2, minDist=100,
param1=200, param2=100, minRadius=50, maxRadius=120)
def predictMaterial(roi):
# calculate feature vector for region of interest
hist = calcHistogram(roi)
# predict material type
s = clf.predict([hist])
# return predicted material type
return Material[int(s)]
# todo: refactor
diameter = []
materials = []
coordinates = []
count = 0
if circles is not None:
# append radius to list of diameters (we don't bother to multiply by 2)
for (x, y, r) in circles[0, :]:
diameter.append(r)
# convert coordinates and radii to integers
circles = np.round(circles[0, :]).astype("int")
# loop over coordinates and radii of the circles
for (x, y, d) in circles:
count += 1
# add coordinates to list
coordinates.append((x, y))
# extract region of interest
roi = image[y - d:y + d, x - d:x + d]
# try recognition of material type and add result to list
material = predictMaterial(roi)
materials.append(material)
# write masked coin to file
if False:
m = np.zeros(roi.shape[:2], dtype="uint8")
w = int(roi.shape[1] / 2)
h = int(roi.shape[0] / 2)
cv2.circle(m, (w, h), d, (255), -1)
maskedCoin = cv2.bitwise_and(roi, roi, mask=m)
cv2.imwrite("extracted/01coin{}.png".format(count), maskedCoin)
# draw contour and results in the output image
cv2.circle(output, (x, y), d, (0, 255, 0), 2)
cv2.putText(output, material,
(x - 40, y), cv2.FONT_HERSHEY_PLAIN,
1.5, (0, 255, 0), thickness=2, lineType=cv2.LINE_AA)
# get biggest diameter
biggest = max(diameter)
i = diameter.index(biggest)
# scale everything according to maximum diameter
# todo: this should be chosen by the user
if materials[i] == "Euro2":
diameter = [x / biggest * 25.75 for x in diameter]
scaledTo = "Scaled to 2 Euro"
elif materials[i] == "Brass":
diameter = [x / biggest * 24.25 for x in diameter]
scaledTo = "Scaled to 50 Cent"
elif materials[i] == "Euro1":
diameter = [x / biggest * 23.25 for x in diameter]
scaledTo = "Scaled to 1 Euro"
elif materials[i] == "Copper":
diameter = [x / biggest * 21.25 for x in diameter]
scaledTo = "Scaled to 5 Cent"
else:
scaledTo = "unable to scale.."
i = 0
total = 0
while i < len(diameter):
d = diameter[i]
m = materials[i]
(x, y) = coordinates[i]
t = "Unknown"
# compare to known diameters with some margin for error
if math.isclose(d, 25.75, abs_tol=1.25) and m == "Euro2":
t = "2 Euro"
total += 200
elif math.isclose(d, 23.25, abs_tol=2.5) and m == "Euro1":
t = "1 Euro"
total += 100
elif math.isclose(d, 19.75, abs_tol=1.25) and m == "Brass":
t = "10 Cent"
total += 10
elif math.isclose(d, 22.25, abs_tol=1.0) and m == "Brass":
t = "20 Cent"
total += 20
elif math.isclose(d, 24.25, abs_tol=2.5) and m == "Brass":
t = "50 Cent"
total += 50
elif math.isclose(d, 16.25, abs_tol=1.25) and m == "Copper":
t = "1 Cent"
total += 1
elif math.isclose(d, 18.75, abs_tol=1.25) and m == "Copper":
t = "2 Cent"
total += 2
elif math.isclose(d, 21.25, abs_tol=2.5) and m == "Copper":
t = "5 Cent"
total += 5
# write result on output image
cv2.putText(output, t,
(x - 40, y + 22), cv2.FONT_HERSHEY_PLAIN,
1.5, (255, 255, 255), thickness=2, lineType=cv2.LINE_AA)
i += 1
# resize output image while retaining aspect ratio
d = 768 / output.shape[1]
dim = (768, int(output.shape[0] * d))
image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
output = cv2.resize(output, dim, interpolation=cv2.INTER_AREA)
# write summary on output image
cv2.putText(output, scaledTo,
(5, output.shape[0] - 40), cv2.FONT_HERSHEY_PLAIN,
1.0, (0, 0, 255), lineType=cv2.LINE_AA)
cv2.putText(output, "Coins detected: {}, EUR {:2}".format(count, total / 100),
(5, output.shape[0] - 24), cv2.FONT_HERSHEY_PLAIN,
1.0, (0, 0, 255), lineType=cv2.LINE_AA)
cv2.putText(output, "Classifier mean accuracy: {}%".format(score),
(5, output.shape[0] - 8), cv2.FONT_HERSHEY_PLAIN,
1.0, (0, 0, 255), lineType=cv2.LINE_AA)
# show output and wait for key to terminate program
cv2.imshow("Output", np.hstack([image, output]))
cv2.waitKey(0)

Video stream is just a sequence of images. Here is an example of OpenCV:
import numpy as np
import cv2
cap = cv2.VideoCapture(0)
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
# <Put your code for processing 1 image here>
# Display the resulting frame
cv2.imshow('frame',processed_frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
PS. You shouldn't post all your code, it's hard for others to follow. Try to make a minimal, reproducible example.

Related

Correct the object orientation in the image. Calculate the correct angle of rotation and correct the alignment of the object in the image

I have cropped images of electronic meter reading. Those readings are taken in random style. I need the orientation of the object(not the image) in the image to be aligned.
The detection of contours is not working. As there are lots of contours are formed in the image and in order to calculate the angle I need to select the right contour. Some times contour is not formed.
2.I want set of rotated images as shown in figure above. I tried some code of rotating image from the OpenCV. But due to two type of use case ( as we don't know from code that the reading style may be any of the two) The images are turned out as below.
Using the code below I am able to find the angle of rotation but for any one case. I need it to be done automatically for both type of cases. Also see the data set I have attached for other type of examples.
import cv2
import numpy as np
debug = True
# Display image
def display(img, frameName="OpenCV Image"):
if not debug:
return
h, w = img.shape[0:2]
neww = 800
newh = int(neww*(h/w))
img = cv2.resize(img, (neww, newh))
plt.imshow(img)
plt.show()
# cv2.imshow(frameName, img)
# cv2.waitKey(0)
#rotate the image with given theta value
def rotate(img, theta):
rows, cols = img.shape[0], img.shape[1]
image_center = (cols/2, rows/2)
M = cv2.getRotationMatrix2D(image_center,theta,1)
abs_cos = abs(M[0,0])
abs_sin = abs(M[0,1])
bound_w = int(rows * abs_sin + cols * abs_cos)
bound_h = int(rows * abs_cos + cols * abs_sin)
M[0, 2] += bound_w/2 - image_center[0]
M[1, 2] += bound_h/2 - image_center[1]
# rotate orignal image to show transformation
rotated = cv2.warpAffine(img,M,(bound_w,bound_h),borderValue=(255,255,255))
return rotated
def slope(x1, y1, x2, y2):
if x1 == x2:
return 0
slope = (y2-y1)/(x2-x1)
theta = np.rad2deg(np.arctan(slope))
return theta
def main(filePath):
img = cv2.imread(filePath)
(hi, wi) = img.shape[:2]
textImg = img.copy()
small = cv2.cvtColor(textImg, cv2.COLOR_BGR2GRAY)
# find the gradient map
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)
display(grad)
# Binarize the gradient image
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
display(bw)
# connect horizontally oriented regions
# kernal value (9,1) can be changed to improved the text detection
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
display(connected)
# using RETR_EXTERNAL instead of RETR_CCOMP
# _ , contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) #opencv >= 4.0
mask = np.zeros(bw.shape, dtype=np.uint8)
display(mask)
# cumulative theta value
cummTheta = 0
# number of detected text regions
ct = 0
flag=False
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y+h, x:x+w] = 0
# fill the contour
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
display(mask)
# ratio of non-zero pixels in the filled region
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
# assume at least 45% of the area is filled if it contains text
# if r > 0.39 and w > 8 and h > 8:
if (h/hi)>0.4 and (w/wi)>0.4:
flag=True
print(r,w,h)
# cv2.rectangle(textImg, (x1, y), (x+w-1, y+h-1), (0, 255, 0), 2)
rect = cv2.minAreaRect(contours[idx])
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(textImg,[box],0,(0,0,255),2)
center = (int(rect[0][0]),int(rect[0][1]))
width = int(rect[1][0])
height = int(rect[1][1])
angle = int(rect[2])
print(angle)
print(width,height)
if width < height:
angle = 90+angle
print(angle,'final')
# we can filter theta as outlier based on other theta values
# this will help in excluding the rare text region with different orientation from ususla value
theta = slope(box[0][0], box[0][1], box[1][0], box[1][1])
cummTheta += theta
ct +=1
# print("Theta", theta)
# find the average of all cumulative theta value
# orientation = cummTheta/ct
print("Image orientation in degress: ", angle)
finalImage = rotate(img, angle)
display(textImg, "Detectd Text minimum bounding box")
display(finalImage)
out_path='cropped_corrected/rotated/'+filePath.split('\\')[-1]
print(out_path)
cv2.imwrite(out_path,finalImage)
print('image svaed here in rotated')
break
if not flag:
out_path='cropped_corrected/not_rotated/'+filePath.split('\\')[-1]
print(out_path)
cv2.imwrite(out_path,img)
print('image svaed here without rotated')
if __name__ == "__main__":
filePath = 'cropped/N3963001963.jpg'
main(filePath)
I am attaching some sample images that need to be rotated and the object inside the image needs to be aligned:

Obtain original colours from the cropped image yolov3

This is the first time I am trying out YOLov3. I am trying to obtain the cropped image within the bounding box predicted by YOLOv3. I am able to obtain the cropped image. The problem is, the image is of a different colour than the original cropped image. I want to pass this cropped image into another model so that I can confirm that the cropped image does have fire in it.
I can't do that when the cropped image is different from the original. Any help would be appreciated.
Here is the piece of code for cropping the image:
import cv2
import numpy as np
import glob
import random
from scipy import ndimage
import matplotlib.pyplot as plt
from PIL import Image
# Load Yolo
net = cv2.dnn.readNet("D:\obj_detection\yolo_custom_detection\yolov3_training_last.weights", "D:\obj_detection\yolo_custom_detection\yolov3_testing.cfg")
# Name custom object
classes = ["fire"]
# Images path
images_path = glob.glob(r"D:\obj_detection\test_img\*.jpg")
layer_names = net.getLayerNames() #Get the name of all layers of the network.
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()] #Get the index of the output layers.
#These two functions are used for getting the output layers (82,94,106).
colors = np.random.uniform(0, 255, size=(len(classes), 3))
# Insert here the path of your images
random.shuffle(images_path)
# loop through all the images
for img_path in images_path:
# Loading image
img = cv2.imread(img_path)
img = cv2.resize(img, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
# Syntax: blob = cv2.dnn.blobFromImage(image, scalefactor=1.0, size, mean, swapRB=True)
net.setInput(blob)
outs = net.forward(output_layers)
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.3:
# Object detected
print(class_id)
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) #Non max suppression
print(indexes)
font = cv2.FONT_HERSHEY_COMPLEX_SMALL
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
if x<0:
x = 0
if y<0:
y = 0
if w<0:
w = 0
if h<0:
h = 0
print("The box dimensions:",boxes[i])
print(img_path)
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
print("Dimensions of x:", x,",y:",y,",x+w:", x+w, ",y+h:", y+h)
cropped_image = img[y:y+h, x:x+w]
plt.imshow(cropped_image)
cv2.rectangle(img, (x, y), (x + w, y + h), color, 1)
cv2.putText(img, label, (x, y + 30), font, 0.8, color, 2)
img = cv2.resize(img, (700, 600))
cv2.imshow("Fire Detection", img)
cv2.resizeWindow("Fire Detection", 1600,1200)
key = cv2.waitKey(0)
if key == ord('q'):
break
cv2.destroyAllWindows()
The output I obtain is:
Original Image:
enter image description here
Cropped image:
enter image description here
How can I obtain the cropped image as it is in the original image (the same colour)?

Get the contour coordinates in a final predicted mask

I'm implementing a Unet model for nuclei segmentation. The model is working fine and the segmentation was successfully done. However, I want to save the contours on a json file to properly load it in a web app.
Here's my original image:
And here's the corresponding predicted mask.
I tried to use findContours on the mask but overlapped cells would be recognized as one. Note that overlapped cells got that green boundary to differentiate nucleis.
What I want is to get the coordinates of the single nuclei contours and save it as a json, like this:
{"1_0.jpeg-1":{"filename":"1_0.jpeg","size":-1,"regions":[{"shape_attributes":{"name":"polyline","all_points_x":[
216.0,
510.5,
215.5,
510.0,
216.0,
509.5,
216.5,
510.0,
216.0,
510.5
],"all_points_y":[]},"region_attributes":{}}],"file_attributes":{}}}
This is my predict function where I save the mask of each image-to-predict
if __name__ == '__main__':
t0 = timeit.default_timer()
args_models = ['best_resnet101_2_fold0.h5']
weights = [os.path.join(args.models_dir, m) for m in args_models]
models = []
for w in weights:
model = make_model(args.network, (None, None, 3))
print("Building model {} from weights {} ".format(args.network, w))
model.load_weights(w)
models.append(model)
os.makedirs(test_pred, exist_ok=True)
print('Predicting test')
for d in tqdm(listdir(test_folder)):
final_mask = None
for scale in range(1):
fid = d
print(path.join(test_folder, '{0}'.format(fid)))
img = cv2.imread(path.join(test_folder, '{0}'.format(fid)), cv2.IMREAD_COLOR)[...,::-1]
if final_mask is None:
final_mask = np.zeros((img.shape[0], img.shape[1], OUT_CHANNELS))
if scale == 1:
img = cv2.resize(img, None, fx=0.75, fy=0.75, interpolation=cv2.INTER_AREA)
elif scale == 2:
img = cv2.resize(img, None, fx=1.25, fy=1.25, interpolation=cv2.INTER_CUBIC)
x0 = 16
y0 = 16
x1 = 16
y1 = 16
if (img.shape[1] % 32) != 0:
x0 = int((32 - img.shape[1] % 32) / 2)
x1 = (32 - img.shape[1] % 32) - x0
x0 += 16
x1 += 16
if (img.shape[0] % 32) != 0:
y0 = int((32 - img.shape[0] % 32) / 2)
y1 = (32 - img.shape[0] % 32) - y0
y0 += 16
y1 += 16
img0 = np.pad(img, ((y0, y1), (x0, x1), (0, 0)), 'symmetric')
inp0 = []
inp1 = []
for flip in range(2):
for rot in range(4):
if flip > 0:
img = img0[::-1, ...]
else:
img = img0
if rot % 2 == 0:
inp0.append(np.rot90(img, k=rot))
else:
inp1.append(np.rot90(img, k=rot))
inp0 = np.asarray(inp0)
inp0 = preprocess_inputs(np.array(inp0, "float32"))
inp1 = np.asarray(inp1)
inp1 = preprocess_inputs(np.array(inp1, "float32"))
mask = np.zeros((img0.shape[0], img0.shape[1], OUT_CHANNELS))
for model in models:
pred0 = model.predict(inp0, batch_size=1)
pred1 = model.predict(inp1, batch_size=1)
j = -1
for flip in range(2):
for rot in range(4):
j += 1
if rot % 2 == 0:
pr = np.rot90(pred0[int(j / 2)], k=(4 - rot))
else:
pr = np.rot90(pred1[int(j / 2)], k=(4 - rot))
if flip > 0:
pr = pr[::-1, ...]
mask += pr # [..., :2]
mask /= (8 * len(models))
mask = mask[y0:mask.shape[0] - y1, x0:mask.shape[1] - x1, ...]
if scale > 0:
mask = cv2.resize(mask, (final_mask.shape[1], final_mask.shape[0]))
final_mask += mask
final_mask /= 1
if OUT_CHANNELS == 2:
final_mask = np.concatenate([final_mask, np.zeros_like(final_mask)[..., 0:1]], axis=-1)
final_mask = final_mask * 255
final_mask = final_mask.astype('uint8')
cv2.imwrite(path.join(test_pred, '{0}'.format(fid)), final_mask, [cv2.IMWRITE_PNG_COMPRESSION, 9])
elapsed = timeit.default_timer() - t0
print('Time: {:.3f} min'.format(elapsed / 60))
Do you have any idea how to get the coordinates of each classified nuclei? The json part should be easy but I don't get how can I get the countours' coordinates. Should I do it after the predicted mask is written? Or should I do it on my predict process?
Kind Regards
Coordinates points of a contour can be found by
mask = np.zeros(imgray.shape,np.uint8)
cv.drawContours(mask,[cnt],0,255,-1)
pixelpoints = np.transpose(np.nonzero(mask))
Have a look at https://docs.opencv.org/3.4/d1/d32/tutorial_py_contour_properties.html
[EDIT]
To separate the cells, you can remove green boundary by extracting the blue color alone.
I take the predicted mask image as input.
img = cv2.imread('1.jpg')
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# define range of blue color in HSV
lower_blue = np.array([110,50,50])
upper_blue = np.array([130,255,255])
# Threshold the HSV image to get only blue colors
mask = cv2.inRange(hsv, lower_blue, upper_blue)
blue_only = cv2.bitwise_and(img,img, mask= mask)
im2, contours, hierarchy = cv2.findContours(mask,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
#draw contours with indexes and save coordinates to a txt file
with open('coords.txt', 'w+') as f:
for i,cnt in enumerate(contours):
cv2.drawContours(blue_only, cnt, -1, (0,0,255), 1)
cv2.putText(blue_only, str(i), (cnt[0][0][0], cnt[0][0][1]),cv2.FONT_HERSHEY_SIMPLEX, 1,(0,0,255), 1)
f.writelines("contour " + str(i) +" :" + str(cnt))
cv2.imshow('img',img)
cv2.imshow('mask',mask)
cv2.imshow('blue_only',blue_only)

How to find rotate and crop a section of text in openCV, python

I'm in a struggle with a project that takes an image of a pretty clear font from say a label for example reads the "text region" and outputs it as a string using OCR tesseract for instance.
Now I've made quite some progress with the thing as I added varios global filters to get to a quite clear result but I'm struggling with finding method of filtering just the text out of there and then you have to think about rotating it to be as horizontal as possible and then after that the easy part should be to crop it.
May I have any leads to how to do that not using traning data and over complicating the system sins I only use a rasdpberry pi to do the computing?
Thanks for helping here's what I've came up with so far:
Original Image(Captured from PiCamera):
Adaptive thresh after shadow removal:
[
Glocad tresh after shadow removal:
Here's the code:
# import the necessary packages
from PIL import Image
import pytesseract
import argparse
import cv2
import os
import picamera
import time
import numpy as np
#preprocess = "tresh"
#Remaining textcorping and rotating:
import math
import json
from collections import defaultdict
from scipy.ndimage.filters import rank_filter
def dilate(ary, N, iterations):
"""Dilate using an NxN '+' sign shape. ary is np.uint8."""
kernel = np.zeros((N,N), dtype=np.uint8)
kernel[(N-1)/2,:] = 1
dilated_image = cv2.dilate(ary / 255, kernel, iterations=iterations)
kernel = np.zeros((N,N), dtype=np.uint8)
kernel[:,(N-1)/2] = 1
dilated_image = cv2.dilate(dilated_image, kernel, iterations=iterations)
return dilated_image
def props_for_contours(contours, ary):
"""Calculate bounding box & the number of set pixels for each contour."""
c_info = []
for c in contours:
x,y,w,h = cv2.boundingRect(c)
c_im = np.zeros(ary.shape)
cv2.drawContours(c_im, [c], 0, 255, -1)
c_info.append({
'x1': x,
'y1': y,
'x2': x + w - 1,
'y2': y + h - 1,
'sum': np.sum(ary * (c_im > 0))/255
})
return c_info
def union_crops(crop1, crop2):
"""Union two (x1, y1, x2, y2) rects."""
x11, y11, x21, y21 = crop1
x12, y12, x22, y22 = crop2
return min(x11, x12), min(y11, y12), max(x21, x22), max(y21, y22)
def intersect_crops(crop1, crop2):
x11, y11, x21, y21 = crop1
x12, y12, x22, y22 = crop2
return max(x11, x12), max(y11, y12), min(x21, x22), min(y21, y22)
def crop_area(crop):
x1, y1, x2, y2 = crop
return max(0, x2 - x1) * max(0, y2 - y1)
def find_border_components(contours, ary):
borders = []
area = ary.shape[0] * ary.shape[1]
for i, c in enumerate(contours):
x,y,w,h = cv2.boundingRect(c)
if w * h > 0.5 * area:
borders.append((i, x, y, x + w - 1, y + h - 1))
return borders
def angle_from_right(deg):
return min(deg % 90, 90 - (deg % 90))
def remove_border(contour, ary):
"""Remove everything outside a border contour."""
# Use a rotated rectangle (should be a good approximation of a border).
# If it's far from a right angle, it's probably two sides of a border and
# we should use the bounding box instead.
c_im = np.zeros(ary.shape)
r = cv2.minAreaRect(contour)
degs = r[2]
if angle_from_right(degs) <= 10.0:
box = cv2.cv.BoxPoints(r)
box = np.int0(box)
cv2.drawContours(c_im, [box], 0, 255, -1)
cv2.drawContours(c_im, [box], 0, 0, 4)
else:
x1, y1, x2, y2 = cv2.boundingRect(contour)
cv2.rectangle(c_im, (x1, y1), (x2, y2), 255, -1)
cv2.rectangle(c_im, (x1, y1), (x2, y2), 0, 4)
return np.minimum(c_im, ary)
def find_components(edges, max_components=16):
"""Dilate the image until there are just a few connected components.
Returns contours for these components."""
# Perform increasingly aggressive dilation until there are just a few
# connected components.
count = 21
dilation = 5
n = 1
while count > 16:
n += 1
dilated_image = dilate(edges, N=3, iterations=n)
contours, hierarchy = cv2.findContours(dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
count = len(contours)
#print dilation
#Image.fromarray(edges).show()
#Image.fromarray(255 * dilated_image).show()
return contours
def find_optimal_components_subset(contours, edges):
"""Find a crop which strikes a good balance of coverage/compactness.
Returns an (x1, y1, x2, y2) tuple.
"""
c_info = props_for_contours(contours, edges)
c_info.sort(key=lambda x: -x['sum'])
total = np.sum(edges) / 255
area = edges.shape[0] * edges.shape[1]
c = c_info[0]
del c_info[0]
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
crop = this_crop
covered_sum = c['sum']
while covered_sum < total:
changed = False
recall = 1.0 * covered_sum / total
prec = 1 - 1.0 * crop_area(crop) / area
f1 = 2 * (prec * recall / (prec + recall))
#print '----'
for i, c in enumerate(c_info):
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
new_crop = union_crops(crop, this_crop)
new_sum = covered_sum + c['sum']
new_recall = 1.0 * new_sum / total
new_prec = 1 - 1.0 * crop_area(new_crop) / area
new_f1 = 2 * new_prec * new_recall / (new_prec + new_recall)
# Add this crop if it improves f1 score,
# _or_ it adds 25% of the remaining pixels for <15% crop expansion.
# ^^^ very ad-hoc! make this smoother
remaining_frac = c['sum'] / (total - covered_sum)
new_area_frac = 1.0 * crop_area(new_crop) / crop_area(crop) - 1
if new_f1 > f1 or (
remaining_frac > 0.25 and new_area_frac < 0.15):
print '%d %s -> %s / %s (%s), %s -> %s / %s (%s), %s -> %s' % (
i, covered_sum, new_sum, total, remaining_frac,
crop_area(crop), crop_area(new_crop), area, new_area_frac,
f1, new_f1)
crop = new_crop
covered_sum = new_sum
del c_info[i]
changed = True
break
if not changed:
break
return crop
def pad_crop(crop, contours, edges, border_contour, pad_px=15):
"""Slightly expand the crop to get full contours.
This will expand to include any contours it currently intersects, but will
not expand past a border.
"""
bx1, by1, bx2, by2 = 0, 0, edges.shape[0], edges.shape[1]
if border_contour is not None and len(border_contour) > 0:
c = props_for_contours([border_contour], edges)[0]
bx1, by1, bx2, by2 = c['x1'] + 5, c['y1'] + 5, c['x2'] - 5, c['y2'] - 5
def crop_in_border(crop):
x1, y1, x2, y2 = crop
x1 = max(x1 - pad_px, bx1)
y1 = max(y1 - pad_px, by1)
x2 = min(x2 + pad_px, bx2)
y2 = min(y2 + pad_px, by2)
return crop
crop = crop_in_border(crop)
c_info = props_for_contours(contours, edges)
changed = False
for c in c_info:
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
this_area = crop_area(this_crop)
int_area = crop_area(intersect_crops(crop, this_crop))
new_crop = crop_in_border(union_crops(crop, this_crop))
if 0 < int_area < this_area and crop != new_crop:
print '%s -> %s' % (str(crop), str(new_crop))
changed = True
crop = new_crop
if changed:
return pad_crop(crop, contours, edges, border_contour, pad_px)
else:
return crop
def downscale_image(im, max_dim=2048):
"""Shrink im until its longest dimension is <= max_dim.
Returns new_image, scale (where scale <= 1).
"""
a, b = im.size
if max(a, b) <= max_dim:
return 1.0, im
scale = 1.0 * max_dim / max(a, b)
new_im = im.resize((int(a * scale), int(b * scale)), Image.ANTIALIAS)
return scale, new_im
def process_image(inputImg):
opnImg = Image.open(inputImg)
scale, im = downscale_image(opnImg)
edges = cv2.Canny(np.asarray(im), 100, 200)
# TODO: dilate image _before_ finding a border. This is crazy sensitive!
contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
borders = find_border_components(contours, edges)
borders.sort(key=lambda (i, x1, y1, x2, y2): (x2 - x1) * (y2 - y1))
border_contour = None
if len(borders):
border_contour = contours[borders[0][0]]
edges = remove_border(border_contour, edges)
edges = 255 * (edges > 0).astype(np.uint8)
# Remove ~1px borders using a rank filter.
maxed_rows = rank_filter(edges, -4, size=(1, 20))
maxed_cols = rank_filter(edges, -4, size=(20, 1))
debordered = np.minimum(np.minimum(edges, maxed_rows), maxed_cols)
edges = debordered
contours = find_components(edges)
if len(contours) == 0:
print '%s -> (no text!)' % path
return
crop = find_optimal_components_subset(contours, edges)
crop = pad_crop(crop, contours, edges, border_contour)
crop = [int(x / scale) for x in crop] # upscale to the original image size.
#draw = ImageDraw.Draw(im)
#c_info = props_for_contours(contours, edges)
#for c in c_info:
# this_crop = c['x1'], c['y1'], c['x2'], c['y2']
# draw.rectangle(this_crop, outline='blue')
#draw.rectangle(crop, outline='red')
#im.save(out_path)
#draw.text((50, 50), path, fill='red')
#orig_im.save(out_path)
#im.show()
text_im = opnImg.crop(crop)
text_im.save('Cropted_and_rotated_image.jpg')
return text_im
'''
text_im.save(out_path)
print '%s -> %s' % (path, out_path)
'''
#Camera capturing stuff:
myCamera = picamera.PiCamera()
myCamera.vflip = True
myCamera.hflip = True
'''
myCamera.start_preview()
time.sleep(6)
myCamera.stop_preview()
'''
myCamera.capture("Captured_Image.png")
#End capturing persidure
imgAddr = '/home/pi/My_examples/Mechanical_display_converter/Example1.jpg'
#imgAddr = "Captured_Image.png"
# construct the argument parse and parse the arguments
#ap = argparse.ArgumentParser()
'''
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
ap.add_argument("-p", "--preprocess", type=str, default="thresh",
help="type of preprocessing to be done")
args = vars(ap.parse_args())
'''
# load the example image and convert it to grayscale
img = cv2.imread(imgAddr)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow('Step1_gray_filter', gray)
'''
# check to see if we should apply thresholding to preprocess the
# image
if args["preprocess"] == "thresh":
gray = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# make a check to see if median blurring should be done to remove
# noise
elif args["preprocess"] == "blur":
gray = cv2.medianBlur(gray, 3)
if preprocess == "thresh":
gray = cv2.threshold(gray, 150, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# make a check to see if median blurring should be done to remove
# noise
elif preprocess == "blur":
gray = cv2.medianBlur(gray, 3)
'''
rgb_planes = cv2.split(img)
result_planes = []
result_norm_planes = []
for plane in rgb_planes:
dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
bg_img = cv2.medianBlur(dilated_img, 21)
diff_img = 255 - cv2.absdiff(plane, bg_img)
norm_img = cv2.normalize(diff_img, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
result_planes.append(diff_img)
result_norm_planes.append(norm_img)
result = cv2.merge(result_planes)
result_norm = cv2.merge(result_norm_planes)
cv2.imshow('shadows_out.png', result)
cv2.imshow('shadows_out_norm.png', result_norm)
grayUnShadowedImg = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
cv2.imshow('Shadow_Gray_CVT', grayUnShadowedImg)
ret, threshUnShadowedImg = cv2.threshold(grayUnShadowedImg, 200, 255, cv2.THRESH_BINARY)
cv2.imshow('unShadowed_Thresh_filtering', threshUnShadowedImg)
#v2.imwrite('unShadowed_Thresh_filtering.jpg', threshUnShadowedImg)
#croptedunShadowedImg = process_image('unShadowed_Thresh_filtering.jpg')
adptThreshUnShadowedImg = cv2.adaptiveThreshold(grayUnShadowedImg, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 115, 1)
cv2.imshow('unShadowed_Adaptive_Thresh_filtering', adptThreshUnShadowedImg)
'''
blurFImg = cv2.GaussianBlur(adptThreshUnShadowedImg,(25,25), 0)
ret, f3Img = cv2.threshold(blurFImg,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2.imshow('f3Img', f3Img )
'''
#OCR Stage:
'''
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, threshImg)
# load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open(filename))
os.remove(filename)
print("\n" + text)
'''
cv2.waitKey(0)
cv2.destroyAllWindows()
Tryed this source out as well but this doesn't seem to work and is not that clear to understand:
https://www.danvk.org/2015/01/07/finding-blocks-of-text-in-an-image-using-python-opencv-and-numpy.html
I have made an example to maybe give you an idea on how to proceede. I made it without your transformations of the image but you could do it with them if you would like.
What I did was to first transform the image to binary with cv2.THRESH_BINARY. Next I made a mask and drew the contours by limiting them with size (cv2.contourArea()) and ratio (got it from cv2.boundingRect()) for threshold. Then I conected all the contours that are near each other using cv2.morphologyEx() and a big kernel size (50x50).
Then I selected the biggest contour (text) and drew a rotated rectangle with cv2.minAreaRect() which got me the rotational angle.
Then I could rotate the image using cv2.getRotationMatrix2D() and cv2.warpAffine() and get a slightly bigger bounding box using the highest X, Y and lowest X,Y values of the rotated rectangle which I used to crop the image.
Then I serched again for contours and removed the noise (little contours) from the image and the result is a text with high contrast.
Final result:
This code is meant only to give an idea or another point of view to the problem and it may not work with other images (if they differ from the original too much) or at least you would have to adjust some parameters of code. Hope it helps. Cheers!
Code:
import cv2
import numpy as np
# Read image and search for contours.
img = cv2.imread('rotatec.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(threshold,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
# Create first mask used for rotation.
mask = np.ones(img.shape, np.uint8)*255
# Draw contours on the mask with size and ratio of borders for threshold.
for cnt in contours:
size = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
if 10000 > size > 500 and w*2.5 > h:
cv2.drawContours(mask, [cnt], -1, (0,0,0), -1)
# Connect neighbour contours and select the biggest one (text).
kernel = np.ones((50,50),np.uint8)
opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
gray_op = cv2.cvtColor(opening, cv2.COLOR_BGR2GRAY)
_, threshold_op = cv2.threshold(gray_op, 150, 255, cv2.THRESH_BINARY_INV)
contours_op, hierarchy_op = cv2.findContours(threshold_op, cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours_op, key=cv2.contourArea)
# Create rotated rectangle to get the angle of rotation and the 4 points of the rectangle.
_, _, angle = rect = cv2.minAreaRect(cnt)
(h,w) = img.shape[:2]
(center) = (w//2,h//2)
# Rotate the image.
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(img, M, (int(w),int(h)), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT)
# Create bounding box for rotated text (use old points of rotated rectangle).
box = cv2.boxPoints(rect)
a, b, c, d = box = np.int0(box)
bound =[]
bound.append(a)
bound.append(b)
bound.append(c)
bound.append(d)
bound = np.array(bound)
(x1, y1) = (bound[:,0].min(), bound[:,1].min())
(x2, y2) = (bound[:,0].max(), bound[:,1].max())
cv2.drawContours(img,[box],0,(0,0,255),2)
# Crop the image and create new mask for the final image.
rotated = rotated[y1:y2, x1:x2]
mask_final = np.ones(rotated.shape, np.uint8)*255
# Remove noise from the final image.
gray_r = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
_, threshold_r = cv2.threshold(gray_r, 150, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(threshold_r,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
for cnt in contours:
size = cv2.contourArea(cnt)
if size < 500:
cv2.drawContours(threshold_r, [cnt], -1, (0,0,0), -1)
# Invert black and white.
final_image = cv2.bitwise_not(threshold_r)
# Display results.
cv2.imshow('final', final_image)
cv2.imshow('rotated', rotated)
EDIT:
For text recognition I recomend you see this post from SO Simple Digit Recognition OCR in OpenCV-Python.
The result with the code from mentioned post:
EDIT:
This is my code implemented with the slightly modified code from the mentioned post. All steps are written in the comments. You should save the script and the training image to the same directory. This is my training image:
Code:
import cv2
import numpy as np
# Read image and search for contours.
img = cv2.imread('rotatec.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(threshold,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
# Create first mask used for rotation.
mask = np.ones(img.shape, np.uint8)*255
# Draw contours on the mask with size and ratio of borders for threshold.
for cnt in contours:
size = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
if 10000 > size > 500 and w*2.5 > h:
cv2.drawContours(mask, [cnt], -1, (0,0,0), -1)
# Connect neighbour contours and select the biggest one (text).
kernel = np.ones((50,50),np.uint8)
opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
gray_op = cv2.cvtColor(opening, cv2.COLOR_BGR2GRAY)
_, threshold_op = cv2.threshold(gray_op, 150, 255, cv2.THRESH_BINARY_INV)
contours_op, hierarchy_op = cv2.findContours(threshold_op, cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours_op, key=cv2.contourArea)
# Create rotated rectangle to get the angle of rotation and the 4 points of the rectangle.
_, _, angle = rect = cv2.minAreaRect(cnt)
(h,w) = img.shape[:2]
(center) = (w//2,h//2)
# Rotate the image.
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(img, M, (int(w),int(h)), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT)
# Create bounding box for rotated text (use old points of rotated rectangle).
box = cv2.boxPoints(rect)
a, b, c, d = box = np.int0(box)
bound =[]
bound.append(a)
bound.append(b)
bound.append(c)
bound.append(d)
bound = np.array(bound)
(x1, y1) = (bound[:,0].min(), bound[:,1].min())
(x2, y2) = (bound[:,0].max(), bound[:,1].max())
cv2.drawContours(img,[box],0,(0,0,255),2)
# Crop the image and create new mask for the final image.
rotated = rotated[y1:y2, x1-10:x2]
mask_final = np.ones(rotated.shape, np.uint8)*255
# Remove noise from the final image.
gray_r = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
_, threshold_r = cv2.threshold(gray_r, 150, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(threshold_r,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
for cnt in contours:
size = cv2.contourArea(cnt)
if size < 500:
cv2.drawContours(threshold_r, [cnt], -1, (0,0,0), -1)
# Invert black and white.
final_image = cv2.bitwise_not(threshold_r)
# Display results.
cv2.imwrite('rotated12.png', final_image)
# Import module for finding path to database.
from pathlib import Path
# This code executes once amd writes two files.
# If file exists it skips this step, else it runs again.
file = Path("generalresponses.data")
if file.is_file() == False:
# Reading the training image
im = cv2.imread('pitrain1.png')
im3 = im.copy()
gray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(5,5),0)
thresh = cv2.adaptiveThreshold(blur,255,1,1,11,2)
# Finding contour
_,contours,hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
# Creates array and list for appending data
samples = np.empty((0,100))
responses = []
# Value serving to increment the "automatic" learning
i = 0
# Iterating through contours and appending the array and list with "learned" values
for cnt in contours:
i+=1
[x,y,w,h] = cv2.boundingRect(cnt)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,0,255),2)
roi = thresh[y:y+h,x:x+w] # Croping ROI to bounding rectangle
roismall = cv2.resize(roi,(10,10)) # Resizing ROI to smaller image
cv2.imshow('norm',im)
# Appending values based on the pitrain1.png image
if i < 36:
responses.append(int(45))
elif 35 < i < 80:
responses.append(int(48))
elif 79 < i < 125:
responses.append(int(57))
elif 124 < i < 160:
responses.append(int(56))
elif 159 < i < 205:
responses.append(int(55))
elif 204 < i < 250:
responses.append(int(54))
elif 249 < i < 295:
responses.append(int(53))
elif 294 < i < 340:
responses.append(int(52))
elif 339 < i < 385:
responses.append(int(51))
elif 384 < i < 430:
responses.append(int(50))
elif 429 < i < 485:
responses.append(int(49))
else:
break
sample = roismall.reshape((1,100))
samples = np.append(samples,sample,0)
# Reshaping and saving database
responses = np.array(responses)
responses = responses.reshape((responses.size,1))
print('end')
np.savetxt('generalsamples.data',samples)
np.savetxt('generalresponses.data',responses, fmt='%s')
################### Recognition ########################
# Dictionary for numbers and characters (in this sample code the only
# character is " - ")
number = {
48 : "0",
53 : "5",
52 : "4",
50 : "2",
45 : "-",
55 : "7",
51 : "3",
57 : "9",
56 : "8",
54 : "6",
49 : "1"
}
####### training part ###############
samples = np.loadtxt('generalsamples.data',np.float32)
responses = np.loadtxt('generalresponses.data',np.float32)
responses = responses.reshape((responses.size,1))
model = cv2.ml.KNearest_create()
model.train(samples,cv2.ml.ROW_SAMPLE,responses)
############################# testing part #########################
im = cv2.imread('rotated12.png')
out = np.zeros(im.shape,np.uint8)
gray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray,255,1,1,11,2)
contours,hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
[x,y,w,h] = cv2.boundingRect(cnt)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2)
roi = thresh[y:y+h,x:x+w]
roismall = cv2.resize(roi,(10,10))
roismall = roismall.reshape((1,100))
roismall = np.float32(roismall)
retval, results, neigh_resp, dists = model.findNearest(roismall,k=5)
string = int((results[0][0]))
string2 = number.get(string)
print(string2)
cv2.putText(out,str(string2),(x,y+h),0,1,(0,255,0))
cv2.imshow('im',im)
cv2.imshow('out',out)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
Sorry for begin a complete moron in it,
I'm realy trying to learn as much as I can about coding,everything that goes around the computer and openCV with the very little time I have But here's the edited code I've managed to get partly working:
from PIL import Image
import pytesseract
import os
import picamera
import time
import cv2
import numpy as np
# Read image and search for contours.
img = cv2.imread('Example1.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(threshold,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE) #EDITED
# Create first mask used for rotation.
mask = np.ones(img.shape, np.uint8)*255
# Draw contours on the mask with size and ratio of borders for threshold.
for cnt in contours:
size = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
if 10000 > size > 500 and w*2.5 > h:
cv2.drawContours(mask, [cnt], -1, (0,0,0), -1)
# Connect neighbour contours and select the biggest one (text).
kernel = np.ones((50,50),np.uint8)
opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
gray_op = cv2.cvtColor(opening, cv2.COLOR_BGR2GRAY)
_, threshold_op = cv2.threshold(gray_op, 150, 255, cv2.THRESH_BINARY_INV)
contours_op, hierarchy_op = cv2.findContours(threshold_op, cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours_op, key=cv2.contourArea)
# Create rotated rectangle to get the angle of rotation and the 4 points of the rectangle.
_, _, angle = rect = cv2.minAreaRect(cnt)
(h,w) = img.shape[:2]
(center) = (w//2,h//2)
# Rotate the image.
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(img, M, (int(w),int(h)), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT)
# Create bounding box for rotated text (use old points of rotated rectangle).
box = cv2.cv.BoxPoints(rect) #edited
a, b, c, d = box = np.int0(box)
bound =[]
bound.append(a)
bound.append(b)
bound.append(c)
bound.append(d)
bound = np.array(bound)
(x1, y1) = (bound[:,0].min(), bound[:,1].min())
(x2, y2) = (bound[:,0].max(), bound[:,1].max())
cv2.drawContours(img,[box],0,(0,0,255),2)
# Crop the image and create new mask for the final image.
rotated = rotated[y1:y2, x1:x2]
mask_final = np.ones(rotated.shape, np.uint8)*255
# Remove noise from the final image.
gray_r = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
_, threshold_r = cv2.threshold(gray_r, 150, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(threshold_r,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
for cnt in contours:
size = cv2.contourArea(cnt)
if size < 500:
cv2.drawContours(threshold_r, [cnt], -1, (0,0,0), -1)
# Invert black and white.
final_image = cv2.bitwise_not(threshold_r)
# Display results.
cv2.imshow('final', final_image)
cv2.imshow('rotated', rotated)
#OCR Stage:
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite('Final_proc.jpg', final_image)
# load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open('Final_proc.jpg'))
os.remove('Final_proc.jpg')
print("\n" + text)
cv2.waitKey(0)
cv2.destroyAllWindows()
When compiling it now it gives me this output:
[img]https://i.imgur.com/ImdKSCv.jpg[/img]
which is a little different from what you showed and compiled on the windows machine but still super close.
anyidea what happened? just after that this should be realy easy to dissect the code and learn it easily.
Again thank you very much for your time! :D
So for the python 3 and openCV 3 version of the code in order to make the img work with tesseract you'd need to add an around 20px white boarder to extend the image for somereason (I assume it's because the convolutional matrix scanning effort) according to my other post:
pytesseract struggling to recognize clean black and white pictures with font numbers and 7 seg digits(python)
and here's how you'd add the boarder:
how to add border around an image in opencv python
In one line of code:
outputImage = cv2.copyMakeBorder(
inputImage,
topBorderWidth,
bottomBorderWidth,
leftBorderWidth,
rightBorderWidth,
cv2.BORDER_CONSTANT,
value=color of border
)

No output from Kalman filter in 2D object tracking

I'm trying to apply Kalman filter with opencv in python for tracking position of a ball. I can already detect it but there is still some noise I want to eliminate. There are two variables I measure - x and y position - and there are four variables I would like to get - x and y position and x and y velocity - but I get none. When I display x0, y0, vy and vx on the screen I get "[.0]".
Another problem is that I cannot apply control matrix to kalman.predict() function because I get the following error:
OpenCV Error: Assertion failed (a_size.width == len) in gemm, file /tmp/opencv3-20170518-8732-1bjq2j7/opencv-3.2.0/modules/core/src/matmul.cpp, line 1537
Traceback (most recent call last):
File "kalman.py", line 128, in <module>
kalmanout = kalman.predict(kalman.controlMatrix)
cv2.error: /tmp/opencv3-20170518-8732-1bjq2j7/opencv-3.2.0/modules/core/src/matmul.cpp:1537: error: (-215) a_size.width == len in function ge
This is the piece of code I'm using for Kalman filter (for control matrix application I use line kalmanout = kalman.predict(kalman.controlMatrix) at the end:
# import the necessary packages
from collections import deque
import numpy as np
import argparse
import imutils
import cv2
import time
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video",
help="path to the (optional) video file")
ap.add_argument("-b", "--buffer", type=int, default=10,
help="max buffer size")
ap.add_argument("-a", "--min-area", type=int, default=500, help="minimum area size")
args = vars(ap.parse_args())
# define the lower and upper boundaries of the "blue"
# ball in the HSV color space, then initialize the
# list of tracked points
greenLower = (48, 62, 88)
greenUpper = (151, 238, 255)
pts = deque(maxlen=args["buffer"])
tintervals = deque(maxlen=args["buffer"])
tPrev = 0;
pRad = 0
mapix = 0
mspeed = 0
# if a video path was not supplied, grab the reference
# to the webcam
if not args.get("video", False):
camera = cv2.VideoCapture(0)
# otherwise, grab a reference to the video file
else:
camera = cv2.VideoCapture(args["video"])
# keep looping
#initialize background subtraction
fgbg = cv2.createBackgroundSubtractorMOG2()
while True:
# grab the current frame
(grabbed, frame) = camera.read()
displayx = 0
# start counting time
tPrev = time.time()
# if we are viewing a video and we did not grab a frame,
# then we have reached the end of the video
if args.get("video") and not grabbed:
break
# resize the frame and apply background subtraction
frame = imutils.resize(frame, width=500)
mask = fgbg.apply(frame)
res = cv2.bitwise_and(frame, frame, mask = mask)
# blur the frame and convert it to the HSV
blurred = cv2.GaussianBlur(res, (11, 11), 0)
hsv = cv2.cvtColor(res, cv2.COLOR_BGR2HSV)
# construct a mask for the color "blue", then perform
# a series of dilations and erosions to remove any small
# blobs left in the mask
mask = cv2.inRange(hsv, greenLower, greenUpper)
mask = cv2.erode(mask, None, iterations=2)
mask = cv2.dilate(mask, None, iterations=2)
# find contours in the mask and initialize the current
# (x, y) center of the ball
cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[-2]
center = None
# only proceed if at least one contour was found
if len(cnts) > 0:
# find the largest contour in the mask, then use
# it to compute the minimum enclosing circle and
# centroid
c = max(cnts, key=cv2.contourArea)
((x, y), radius) = cv2.minEnclosingCircle(c)
pRad = radius
M = cv2.moments(c)
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
# only proceed if the radius meets a minimum size
if radius > 10:
# draw the circle and centroid on the frame,
# then update the list of tracked points
cv2.circle(frame, (int(x), int(y)), int(radius),
(0, 255, 255), 2)
cv2.circle(frame, center, 5, (0, 0, 255), -1)
# update time intervals queue
tintervals.appendleft(time.time() - tPrev)
# update the points queue
pts.appendleft(center)
# predict position of the ball
if (pRad > 0 and len(pts) > 5):
if pts[0] != None and pts[1] != None:
apix = 98.1/(0.032/pRad)
mapix = apix
y0 = pts[0][1]
x0 = pts[0][0]
kalmanin = np.array((2,1), np.float32) # measurement
kalmanout = np.zeros((4,1), np.float32) # tracked / prediction
kalmanin = np.array([[np.float32(x0)],[np.float32(y0)]])
tkalman = 0.01
kalman = cv2.KalmanFilter(4,2)
kalman.measurementMatrix = np.array([[1,0,0,0],[0,1,0,0]],np.float32)
kalman.transitionMatrix = np.array([[1,0,tkalman,0],[0,1,0,tkalman],[0,0,1,0],[0,0,0,1]],np.float32)
kalman.controlMatrix = np.array([[0],[0.5*(tkalman**2.0)], [0],[tkalman]],np.float32) * mapix
kalman.processNoiseCov = np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]],np.float32) * 0.03
kalman.processNoiseCov = np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]],np.float32) * 0.03
kalman.measurementNoiseCov = np.array([[1,0],[0,1]],np.float32) * 0.00009
kalman.correct(kalmanin)
kalmanout = kalman.predict(kalman.controlMatrix)
x0 = kalmanout[0]
y0 = kalmanout[1]
vx = kalmanout[2]
vy = kalmanout[3]
displayx = x0
listX = []
listY = []
for i in range(1, 11):
t = 0.01 * i
y = y0 + vy * t + (apix * (t ** 2)) / 2
x = x0 + vx * t
listX.append(int(x))
listY.append(int(y))
mspeed = vy
for i in range(0, 9):
cv2.line(frame, (listX[i], listY[i]), (listX[i+1], listY[i+1]), (255, 0, 0), 4)
# loop over the set of tracked points
for i in xrange(1, len(pts)):
# if either of the tracked points are None, ignore
# them
if pts[i - 1] is None or pts[i] is None:
continue
# otherwise, compute the thickness of the line and
# draw the connecting lines
thickness = int(np.sqrt(args["buffer"] / float(i + 1)) * 2.5)
cv2.line(frame, pts[i - 1], pts[i], (0, 0, 255), thickness)
cv2.putText(frame, "y axis speed: {}".format(displayx),
(120, frame.shape[0] - 70), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 255), 1)
cv2.putText(frame, "radius in px: {}".format(pRad),
(120, frame.shape[0] - 30), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 255), 1)
cv2.putText(frame, "apix: {}".format(mapix),
(120, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 255), 1)
if (mapix != 0):
cv2.putText(frame, "radius in meters: {}".format((9.81*pRad)/mapix),
(120, frame.shape[0] - 50), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 255), 1)
# shows x, y position, (newest input from pts)
cv2.putText(frame, "x, y: {}".format(pts[0]),
(10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX,
0.35, (0, 0, 255), 1)
# show the frame to our screen
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the 'q' key is pressed, stop the loop
if key == ord("q"):
break
# cleanup the camera and close any open windows
camera.release()
cv2.destroyAllWindows()
First of all I would move the initialization of the Kalman filter outside the loop. The main issue with your code is that you have set the control matrix. If I understand your task you are only observing the system, not controlling it. Just skip the kalman.controlMatrix initialization or set it to a zero matrix. In the loop you then just use
kalmanout = kalman.predict()
kalman.correct(kalmanin)

Categories

Resources