Extracting image from bounding box - selective search - python

I am learning how to properly use a selective search algorithm to create bounding boxes around an image, extract the smaller images within the bounding box and then run further analysis on the smaller images.
I am able to obtain the bounding boxes through the following, but how do I save/extract/export the images within each bounding box?
import skimage.data
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import selectivesearch
import time
import io
import PIL
import scipy.misc
from skimage.io import imread
from PIL import Image
from skimage.transform import rescale, resize, downscale_local_mean
def main():
# loading astronaut image
# image = skimage.io.imread('/Users/vivek/Desktop/IMG_3350.JPG')
# img = resize(image, (500,500), mode = 'reflect')
img = skimage.io.imread('/Users/vivek/Downloads/IMG_3350_640x480.JPG')
print ('image loaded')
# perform selective search
print ('initializing selective search')
start = time.time()
img_lbl, regions = selectivesearch.selective_search(
img, scale=600, sigma=0.9, min_size=10)
candidates = set()
for r in regions:
# excluding same rectangle (with different segments)
if r['rect'] in candidates:
continue
# excluding regions smaller than 2000 pixels
if r['size'] < 2000:
continue
# distorted rects
x, y, w, h = r['rect']
if w / h > 1.2 or h / w > 1.2:
continue
candidates.add(r['rect'])
print ('selective search complete')
end = time.time()
totalTime = end - start
print ('time taken to run this is : ' + str(totalTime))
# draw rectangles on the original image
fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6))
ax.imshow(img)
for x, y, w, h in candidates:
print x, y, w, h
rect = mpatches.Rectangle(
(x, y), w, h, fill=False, edgecolor='red', linewidth=1)
ax.add_patch(rect)
#plt.imsave("testerimage.jpg", None)
plt.show()
if __name__ == "__main__":
main()
Thanks in advance

You know how to get each rectangle using the lines
for x, y, w, h in candidates:
To get the image of the in this rectangle just do:
imgRect = img[y:y+h,x:x+w]

Related

How to make minimum rectangle box using contours in scikit-image?

i have an image of rice grain's my objective is to find the length/width of rice grains and rotate them to 90 degree angle. So far I have gotten is that I have detected the rice grain by finding the contours using the function measure.find_contours in scikit-image. I have the contours now I want to find the length/width of the grain, draw rectangle around the grain and rotate it. Any insights on how to do it using scikit-image?
My code is:
from pathlib import Path
import os, skimage
from skimage import io, color, filters, morphology, measure, draw
from matplotlib import pyplot as plt
import numpy as np
def Detect_Images():
"""Entering the directory name and join it with the current path"""
directory = input('Enter directory: ')
paths = os.getcwd() + '/' + directory
image_name = []
'''Detecting image names'''
for path in Path(paths).rglob('*.jpg'):
image_name.append(path.name)
# print(image_name, paths)
'''Reading images and converting them to Gray'''
image_objects = []
for file in image_name:
image_objects.append(io.imread(str(paths) + '/' + str(file), as_gray=True))
return image_name, image_objects
def Adaptive_Threshold(image_objects):
"""Applying adaptive thresholding"""
thresholded_images = []
for image in image_objects:
blur = skimage.filters.gaussian(image)
t = skimage.filters.threshold_otsu(blur)
binary = blur > t
arr = skimage.img_as_ubyte(binary)
thresholded_images.append(arr)
return thresholded_images
def Erosion(thresholded_images):
"""Performing erosion"""
eroded_images = []
kernel = np.ones((3, 3), np.uint8)
for image in thresholded_images:
eroded_images.append(morphology.erosion(image, kernel))
return eroded_images
if __name__ == '__main__':
image_name, image_objects = Detect_Images()
thresholded_images = Adaptive_Threshold(image_objects)
eroded_images = Erosion(thresholded_images)
'''Finding contours'''
contours = []
figs = []
for image in eroded_images:
contours.append(measure.find_contours(image, 0.8))
# io.imshow(image)
ss, ax = plt.subplots()
ax.imshow(image, cmap='gray')
figs.append(ax)
for i, j in zip(contours, image_name):
print(j, ' has ', len(i), 'rice grains.')
'''Drawing Contours'''
for contour, fig in zip(contours, figs):
a = fig
for i in contour:
a.plot(i[:, 1], i[:, 0], linewidth=2)
plt.show()
And the image is:
Rice

Images are changed to different colors (with pillow), how to get it back to the original colors?

I am trying to find the dominant color in a frame in a video. This works well, however, my frames are somehow converted into different colors. Yellow/pink becomes blue/purple-ish, but black and white stay the same (thus it is not the inverted colors).
Does anyone know where it comes from and how I can change it so that the original colors are kept? This is my code:
import cv2
from sklearn.cluster import KMeans
from collections import Counter
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
video = cv2.VideoCapture('video.mp4')
def show_blurred_image(image, dominant_color):
frame_to_blur = Image.fromarray(image)
blurred_frame = cv2.blur(image, (200,200))
blurred_frame = Image.fromarray(blurred_frame)
plt.subplot(121),plt.imshow(frame_to_blur),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(blurred_frame),plt.title('Blurred')
plt.xticks([]), plt.yticks([])
R = round(dominant_color[0])
G = round(dominant_color[1])
B = round(dominant_color[2])
custom_color = '#%02x%02x%02x' % (R, G, B)
print(custom_color)
rect = patches.Rectangle((1620,0),300,1080,linewidth=1,
fill = True,
edgecolor=custom_color,
facecolor=custom_color)
ax = plt.gca()
ax.add_patch(rect)
plt.show()
def get_dominant_color(image, k=4, image_processing_size = None):
"""
takes an image as input
returns the dominant color of the image as a list
dominant color is found by running k means on the
pixels & returning the centroid of the largest cluster
processing time is sped up by working with a smaller image;
this resizing can be done with the image_processing_size param
which takes a tuple of image dims as input
>>> get_dominant_color(my_image, k=4, image_processing_size = (25, 25))
[56.2423442, 34.0834233, 70.1234123]
"""
#resize image if new dims provided
if image_processing_size is not None:
image = cv2.resize(image, image_processing_size,
interpolation = cv2.INTER_AREA)
#reshape the image to be a list of pixels
image = image.reshape((image.shape[0] * image.shape[1], 3))
#cluster and assign labels to the pixels
clt = KMeans(n_clusters = k)
labels = clt.fit_predict(image)
#count labels to find most popular
label_counts = Counter(labels)
#subset out most popular centroid
dominant_color = clt.cluster_centers_[label_counts.most_common(1)[0][0]]
return list(dominant_color)
dominant_colors = []
show_frame = 10
frame_nb = 0
while(video.isOpened()):
ret, frame = video.read()
if ret == True:
if (frame_nb == show_frame):
dominant_color = get_dominant_color(frame)
show_blurred_image(frame, dominant_color)
frame_nb += 1
else:
break
video.release()
cv2.destroyAllWindows()
OpenCV loads images in a BGR format, while PIL and matplotlib works with the RGB format. If you want to use the libraries together, you need to convert the images in the right color spaces.
In your case :
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

How to find a point after wrapTransform?

I have a set of coordinates/points I found under the original image before warpPerspective, how do I get the corresponding points in the now cropped & corrected image which is perspective corrected ?
For example:
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
img = cv.imread('sudoku.png')
rows,cols,ch = img.shape
pts1 = np.float32([[56,65],[368,52],[28,387],[389,390]])
pts2 = np.float32([[0,0],[300,0],[0,300],[300,300]])
point = np.array([[10,10]])
M = cv.getPerspectiveTransform(pts1,pts2)
dst = cv.warpPerspective(img,M,(300,300))
plt.subplot(121),plt.imshow(img),plt.title('Input')
plt.subplot(122),plt.imshow(dst),plt.title('Output')
How do I get the new coordinate [10,10] in img map to the dst image ?
You have to perform the same transformations (mathematically) as you have done on the image. In this case it means using cv2.perspectiveTransform (note that the input needs to have 1 row per point, 1 column, and 2 channels -- first being X, second Y cordinate).
This function will transform all the input points, it doesn't perform and cropping. You will need to post-process the transformed coordinates, and discard ones that fall outside the crop area. In your case you want to retain points where (0 <= x < 300) and (0 <= y < 300).
Sample code:
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
img = cv.imread('sudoku.png')
rows,cols,ch = img.shape
pts1 = np.float32([[56,65],[368,52],[28,387],[389,390]])
pts2 = np.float32([[0,0],[300,0],[0,300],[300,300]])
points = np.float32([[[10, 10]], [[116,128]], [[254,261]]])
M = cv.getPerspectiveTransform(pts1,pts2)
dst = cv.warpPerspective(img,M,(300,300))
# Transform the points
transformed = cv.perspectiveTransform(points, M)
# Perform the cropping -- filter out points that are outside the crop area
cropped = []
for pt in transformed:
x, y = pt[0]
if x >= 0 and x < dst.shape[1] and y >= 0 and y < dst.shape[0]:
print "Valid point (%d, %d)" % (x, y)
cropped.append([[x,y]])
else:
print "Out-of-bounds point (%d, %d)" % (x, y)
# Turn it back into a single numpy array
cropped = np.hstack(cropped)
# Visualize
plt.subplot(121)
plt.imshow(img)
for pt in points:
x, y = pt[0]
plt.scatter(x, y, s=100, c='red', marker='x')
plt.title('Input')
plt.subplot(122)
plt.imshow(dst)
for pt in transformed:
x, y = pt[0]
plt.scatter(x, y, s=100, c='red', marker='x')
plt.title('Output')
plt.show()
Console Output:
Out-of-bounds point (-53, -63)
Valid point (63, 67)
Valid point (192, 194)
Visualization:

Connected Component Labeling Algorithm in Python

My work requires applying Local Binary Operator on Images. For that I have already converted the images in Gray then implemented a Connected Components analysis on the image also.
Here is the Code:
Adding Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.io import imread, imshow
from skimage.color import rgb2gray
from skimage.morphology import (erosion, dilation, closing, opening,area_closing, area_opening)
from skimage.measure import label, regionprops, regionprops_table
Rendering the image
plt.figure(figsize=(6,6))
painting = imread("E:/Project/for_annotation/Gupi Gain0032.jpg")
plt.imshow(painting);
plt.figure(figsize=(6,6))
Binarizing Image
gray_painting = rgb2gray(painting)
binarized = gray_painting<0.55
plt.imshow(binarized);
4.Declaring Kernel
square = np.array([[1,1,1],
[1,1,1],
[1,1,1]])
Dilation function
def multi_dil(im, num, element=square):
for i in range(num):
im = dilation(im, element)
return im
Erosion function
def multi_ero(im, num, element=square):
for i in range(num):
im = erosion(im, element)
return im
Functions Applied
plt.figure(figsize=(6,6))
multi_dilated = multi_dil(binarized, 7)
area_closed = area_closing(multi_dilated, 50000)
multi_eroded = multi_ero(area_closed, 7)
opened = opening(multi_eroded)
plt.imshow(opened);
Label function
plt.figure(figsize=(6,6))
label_im = label(opened)
regions = regionprops(label_im)
plt.imshow(label_im);
Extract features
properties = ['area','convex_area','bbox_area', 'extent', 'mean_intensity','solidity', 'eccentricity', 'orientation']
pd.DataFrame(regionprops_table(label_im, gray_painting,
properties=properties))
Filtering Regions
masks = []
bbox = []
list_of_index = []
for num, x in enumerate(regions):
area = x.area
convex_area = x.convex_area
if (num!=0 and (area>100) and (convex_area/area <1.05)
and (convex_area/area >0.95)):
masks.append(regions[num].convex_image)
bbox.append(regions[num].bbox)
list_of_index.append(num)
count = len(masks)
Extracting Images
fig, ax = plt.subplots(2, int(count/2), figsize=(15,8))
for axis, box, mask in zip(ax.flatten(), bbox, masks):
red = painting[:,:,0][box[0]:box[2], box[1]:box[3]] * mask
green = painting[:,:,1][box[0]:box[2], box[1]:box[3]] * mask
blue = painting[:,:,2][box[0]:box[2], box[1]:box[3]] * mask
image = np.dstack([red,green,blue])
axis.imshow(image)
plt.tight_layout()
plt.figure(figsize=(6,6))
rgb_mask = np.zeros_like(label_im)
for x in list_of_index:
rgb_mask += (label_im==x+1).astype(int)
red = painting[:,:,0] * rgb_mask
green = painting[:,:,1] * rgb_mask
blue = painting[:,:,2] * rgb_mask
image = np.dstack([red,green,blue])
plt.imshow(image);
I am getting an error.
ValueError: Number of columns must be a positive integer, not 0
There is a possible approach which is not very far from what you attempted. Assume the background pixels are assigned the label 0, and the object pixels the value 1.
scan the image row by row;
when you meet a pixel 1, set a new label and perform a flood fill operation, replacing 1 by the new label.
Flood filling can be implemented very simply:
set the starting pixel to the new label;
recursively fill the eight neighbors, if they have a 1.
https://en.wikipedia.org/wiki/Flood_fill
The code of this version is pretty simple. But you will notice that it can easily overflow the stack because the number of pending fills can be as large as the image size.
def FloodFill(X, Y, Label):
I[X,Y]= Label
for all 8-way neighbors (X'=X±1, Y'=Y±1, inside image):
if I[X',Y'] == 1:
FloodFill(X', Y', Label)
def CCL(Image I):
Label= 1
for Y in range(I.Height):
for X in range(I.Width):
if I[X, Y] == 1:
Label+= 1
FloodFill(X, Y, Label)
So I would recommend the scanline version, which is a little more involved.
https://en.wikipedia.org/wiki/Flood_fill#Scanline_fill

10 Pin Bowling score capture

I want to use OCR to capture the bowling scores from the monitor at the lances. I had a look at this sudoku solver, as I think its pretty similar - numbers and grids right? It has trouble finding the horizontal lines. Has anyone got any tips for pre-processing this image to make it easier to detect the lines (or numbers!). Also any tips for how to deal with the split (the orange ellipse around some of the 8's int he image)?
So far I have got the outline of the score area and cropped it.
import matplotlib
matplotlib.use('TkAgg')
from skimage import io
import numpy as np
import matplotlib.pyplot as plt
from skimage import measure
from skimage.color import rgb2gray
# import pytesseract
from matplotlib.path import Path
from qhd import *
def polygonArea(poly):
"""
Return area of an unclosed polygon.
:see: https://stackoverflow.com/a/451482
:param poly: (n,2)-array
"""
# we need a plain list for the following operations
if isinstance(poly, np.ndarray):
poly = poly.tolist()
segments = zip(poly, poly[1:] + [poly[0]])
return 0.5 * abs(sum(x0*y1 - x1*y0
for ((x0, y0), (x1, y1)) in segments))
filename = 'good.jpg'
image = io.imread(filename)
image = rgb2gray(image)
# Find contours at a constant value of 0.8
contours = measure.find_contours(image, 0.4)
# Display the image and plot all contours found
fig, ax = plt.subplots()
c = 0
biggest = None
biggest_size = 0
for n, contour in enumerate(contours):
curr_size = polygonArea(contour)
if curr_size > biggest_size:
biggest = contour
biggest_size = curr_size
biggest = qhull2D(biggest)
# Approximate that so we just get a rectangle.
biggest = measure.approximate_polygon(biggest, 500)
# vertices of the cropping polygon
yc = biggest[:,0]
xc = biggest[:,1]
xycrop = np.vstack((xc, yc)).T
# xy coordinates for each pixel in the image
nr, nc = image.shape
ygrid, xgrid = np.mgrid[:nr, :nc]
xypix = np.vstack((xgrid.ravel(), ygrid.ravel())).T
# construct a Path from the vertices
pth = Path(xycrop, closed=False)
# test which pixels fall within the path
mask = pth.contains_points(xypix)
# reshape to the same size as the image
mask = mask.reshape(image.shape)
# create a masked array
masked = np.ma.masked_array(image, ~mask)
# if you want to get rid of the blank space above and below the cropped
# region, use the min and max x, y values of the cropping polygon:
xmin, xmax = int(xc.min()), int(np.ceil(xc.max()))
ymin, ymax = int(yc.min()), int(np.ceil(yc.max()))
trimmed = masked[ymin:ymax, xmin:xmax]
plt.imshow(trimmed, cmap=plt.cm.gray), plt.title('trimmed')
plt.show()
https://imgur.com/LijB85I is an example of how the score is displayed.

Categories

Resources