I am using cvlib for detecting object and I want to be able to save the cropped imaged based on the bbox coordinates.
I have this in my code:
def detect_object(img):
# Open image
image_stream = io.BytesIO(img)
image_stream.seek(0)
file_bytes = np.asarray(bytearray(image_stream.read()), dtype=np.uint8)
frame = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
# Detection
bbox, label, conf = cv.detect_common_objects(frame)
output_image = draw_bbox(frame, bbox, label, conf)
return output_image, bbox, label, conf
and when I print bbox y get:
[3, -23, 1231, 731]
So, I want to use these coordinates to crop the original image and save only de detected object defined by these coordinates
Something like this:
crop = output_image[bbox[2]:bbox[1], bbox[0]:bbox[3],:]
cv2.imwrite("crop.png", crop)
But when I do this I realised that the crop doesn't contain the desired object, the coordinates are wrong.
How can I fix it? Why am I getting negative coordinates?
My Image is 1280x720 and the desired object occupies approximately the entire image.
I solved it using as reference the comment from #Christoph Rackwitz, i.e handling the offsets:
image_height, image_width, image_channels = output_image.shape
box_xmin, box_ymin, box_xmax, box_ymax = bbox
if box_xmin < 0:
box_xmin = 0
if box_ymin < 0:
box_ymin = 0
if box_xmax > image_width:
box_xmax = image_width
if box_ymax > image_height:
box_ymax = image_height
crop = output_image[box_ymin:box_ymax, box_xmin:box_xmax,:]
cv2.imwrite("crop.jpg", crop)
Related
Have a look at the image and it will give you the better idea what I want to achieve. I want to rotate the image and fill the black part of image just like in required image.
# Read the image
img = cv2.imread("input.png")
# Get the image size
h, w = img.shape[:2]
# Define the rotation matrix
M = cv2.getRotationMatrix2D((w/2, h/2), 30, 1)
# Rotate the image
rotated = cv2.warpAffine(img, M, (w, h))
mask = np.zeros(rotated.shape[:2], dtype=np.uint8)
mask[np.where((rotated == [0, 0, 0]).all(axis=2))] = 255
img_show(mask)
From the code I am able to get the mask of black regions. Now I want to replace these black regions with the image portion as shown in the image 1. Any better solution how can I achieve this.
Use the borderMode parameter of warpAffine.
You want to pass the BORDER_WRAP value.
Here's the result. This does exactly what you described with your first picture.
I have an approach. You can first create a larger image consisting of 3 * 3 times your original image. When you rotate this image and only cut out the center of this large image, you have your desired result.
import cv2
import numpy as np
# Read the image
img = cv2.imread("input.png")
# Get the image size of the origial image
h, w = img.shape[:2]
# make a large image containing 3 copies of the original image in each direction
large_img = np.tile(img, [3,3,1])
cv2.imshow("large_img", large_img)
# Define the rotation matrix. Rotate around the center of the large image
M = cv2.getRotationMatrix2D((w*3/2, h*3/2), 30, 1)
# Rotate the image
rotated = cv2.warpAffine(large_img, M, (w*3, h*3))
# crop only the center of the image
cropped_image = rotated[w:w*2,h:h*2,:]
cv2.imshow("cropped_image", cropped_image)
cv2.waitKey(0)
I Here is my Code
# Json file in which Easyocr anotations have saved.
img = cv2.imread('dummy.jpg')
img1 = img.copy()
#rotoated because anotation have according to vertical alignment of image i have matched the orientation
img1=cv2.rotate(img1,rotateCode=cv2.ROTATE_90_CLOCKWISE)
rects = []
with open('dummy.json') as jsn:
jsn_dict = json.load(jsn)
for k in jsn_dict['textAnnotations']:
vertices= k['boundingPoly']['vertices']
cv2.rectangle(img1,list(vertices[2].values()),list(vertices[0].values()),[0,255,0],10)
# I want to put predicted text on top of bounding boxes vertically because my image is rotated anti clockwise
cv2.putText(img1, k['description'], list(vertices[0].values()),cv2.FONT_HERSHEY_SIMPLEX,5,[0,255,0],5)
I have the code mentioned above I am labelling the recognized text. First step is, I put the image into the OCR model and it returns some values according to the image, in which we have three values for every detected text. These values are the vertex of the bounding box, the text that was recognized, and the accuracy percentage. But my problem is that my image was rotated by the Exif orientation value but cv2 read it as a zero angle and my text is printing horizontally. I want to print text on an image vertically. I have tried so many times but could not resolve my problem. I hope I have explained it well.
Try this one
import cv2
def transparentOverlay(src, overlay, pos=(0, 0), scale=1):
"""
:param src: Input Color Background Image
:param overlay: transparent Image (BGRA)
:param pos: position where the image to be blit.
:param scale : scale factor of transparent image.
:return: Resultant Image
"""
overlay = cv2.resize(overlay, (0, 0), fx=scale, fy=scale)
h, w, _ = overlay.shape # Size of foreground
rows, cols, _ = src.shape # Size of background Image
y, x = pos[0], pos[1] # Position of foreground/overlay image
# loop over all pixels and apply the blending equation
for i in range(h):
for j in range(w):
if x + i >= rows or y + j >= cols:
continue
alpha = float(overlay[i][j][3] / 255.0) # read the alpha channel
src[x + i][y + j] = alpha * overlay[i][j][:3] + (1 - alpha) * src[x + i][y + j]
return src
def addImageWatermark(LogoImage,MainImage,opacity,pos=(10,100),):
opacity = opacity / 100
OriImg = cv2.imread(MainImage, -1)
waterImg = cv2.imread(LogoImage, -1)
tempImg = OriImg.copy()
print(tempImg.shape)
overlay = transparentOverlay(tempImg, waterImg, pos)
output = OriImg.copy()
# apply the overlay
cv2.addWeighted(overlay, opacity, output, 1 - opacity, 0, output)
cv2.imshow('Life2Coding', output)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
addImageWatermark('./logo.png','./hanif.jpg',100,(10,100))
Rotate your image 90º clockwise, add the text, and rotate the image back to the original.
# Rotate 90º clockwise
img_rot = cv2.rotate(img1 , cv2.ROTATE_90_CLOCKWISE)
# Add your text here, adjusting x and y coordinates to the new orientation.
# The new adjusted coordinates will be:
# (x2, y2) = (original_height - y, x)
# [...]
# Rotate back
img1 = cv2.rotate(img_rot, cv2.ROTATE_90_CLOCKWISE)
I wrote my own affine transformation code, but I want to save the new pixel coordinates I created to apply to the image I own, how can I do this. So how can I save the new pixel coordinate of an image to this image? Here is an example of the part I couldn't do in this code.
img = cv2.imread("high1.jpeg", 0)
new_pixel_coor = np.dot(coefficient_pixel_mat, unknown_parameters).astype(int)
a = len(new_pixel_coor) // 2
new_pixel_coor = new_pixel_coor.reshape((a, 2))
print("Pixel Coordinates", "\n", new_pixel_coor)
img2 = np.zeros((width, height, 3), dtype=np.uint)
# There is an error. I could not do anything to save image
for w in range(width):
for h in range(height):
img[w][h] = img2[new_pixel_coor]
# Show and save the image
cv2.imshow('OutputImage.jpg', img2)
cv2.imwrite('OutputImage.jpg', img2)
My machine learning algorithm has already learned the 70000 images in the MNIST database. I want to test it on an image not included in the MNIST dataset. However, my predict function cannot read the array representation of my test image.
How do I test my algorithm on an external image?
Why is my code failing?
PS I'm using python3
Error Received:
Traceback (most recent call last):
File "hello_world2.py", line 28, in <module>
print(sgd_clf.predict(arr))
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/sklearn/linear_model/base.py", line 336, in predict
scores = self.decision_function(X)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/sklearn/linear_model/base.py", line 317, in decision_function
% (X.shape[1], n_features))
ValueError: X has 15 features per sample; expecting 784
Code:
# Common Imports
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.linear_model import SGDClassifier
from PIL import Image
from resizeimage import resizeimage
# loading and learning MNIST data
mnist = fetch_mldata('MNIST original')
x, y = mnist["data"], mnist["target"]
sgd_clf = SGDClassifier(random_state=42)
sgd_clf.fit(x, y)
# loading and converting to array a non-MNIST image of a "5", which is in the same folder
img = Image.open("5.png")
arr = np.array(img)
# trying to predict that the image is a "5"
img = Image.open("5.png")
img = img.convert('L') #makes it greyscale
img = resizeimage.resize_thumbnail(img, [28,28])
arr = np.array(img)
print(sgd_clf.predict(arr)) # ERROR... why????????? How do you fix it?????
It's not simply a matter of resizing, the image needs the digit centered and white on black etc. I've been working on a function to this job. This is the current version that uses opencv, although it could do with further improvement, including using PIL instead of opencv, but it should give an idea of the steps required.
def open_as_mnist(image_path):
"""
Assume this is a color or grey scale image of a digit which has not so far been preprocessed
Black and White
Resize to 20 x 20 (digit in center ideally)
Sharpen
Add white border to make it 28 x 28
Convert to white on black
"""
# open as greyscale
image = cv2.imread(image_path, 0)
# crop to contour with largest area
cropped = do_cropping(image)
# resizing the image to 20 x 20
resized20 = cv2.resize(cropped, (20, 20), interpolation=cv2.INTER_CUBIC)
cv2.imwrite('1_resized.jpg', resized20)
# gaussian filtering
blurred = cv2.GaussianBlur(resized20, (3, 3), 0)
# white digit on black background
ret, thresh = cv2.threshold(blurred, 127, 255, cv2.THRESH_BINARY_INV)
padded = to20by20(thresh)
resized28 = padded_image(padded, 28)
# normalize the image values to fit in the range [0,1]
norm_image = np.asarray(resized28, dtype=np.float32) / 255.
# cv2.imshow('image', norm_image)
# cv2.waitKey(0)
# # Flatten the image to a 1-D vector and return
flat = norm_image.reshape(1, 28 * 28)
# return flat
# normalize pixels to 0 and 1. 0 is pure white, 1 is pure black.
tva = [(255 - x) * 1.0 / 255.0 for x in flat]
return tva
def padded_image(image, tosize):
"""
This method adds padding to the image and makes it to a tosize x tosize array,
without losing the aspect ratio.
Assumes desired image is square
:param image: the input image as numpy array
:param tosize: the final dimensions
"""
# image dimensions
image_height, image_width = image.shape
# if not already square then pad to square
if image_height != image_width:
# Add padding
# The aim is to make an image of different width and height to a sqaure image
# For that first the biggest attribute among width and height are determined.
max_index = np.argmax([image_height, image_width])
# if height is the biggest one, then add padding to width until width becomes
# equal to height
if max_index == 0:
#order of padding is: top, bottom, left, right
left = int((image_height - image_width) / 2)
right = image_height - image_width - left
padded_img = cv2.copyMakeBorder(image, 0, 0,
left,
right,
cv2.BORDER_CONSTANT)
# else if width is the biggest one, then add padding to height until height becomes
# equal to width
else:
top = int((image_width - image_height) / 2)
bottom = image_width - image_height - top
padded_img = cv2.copyMakeBorder(image, top, bottom, 0, 0, cv2.BORDER_CONSTANT)
else:
padded_img = image
# now that it's a square, add any additional padding required
image_height, image_width = padded_img.shape
padding = tosize - image_height
# need to handle where padding is not divisiable by 2
left = top = int(padding/2)
right = bottom = padding - left
resized = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT)
return resized
If you want to read a picture then resize it, please try
In [1]: import PIL.Image as Image
In [2]: img = Image.open('2.jpg', mode='r')
In [3]: img.mode
Out[3]: 'RGB'
In [4]: img.size
Out[4]: (2880, 1800)
In [5]: img_new = img.resize([4000, 4000], Image.ANTIALIAS)
In [6]: img_new2 = img.resize([32, 32], Image.ANTIALIAS)
Docs are here
This is the 2.jpg, sorry, it is not a digit.
This picture is from the Internet, sorry, I forget the source.
If you encounter the mode is 'RGBA', I recommend you transfer it to 'RGB' mode,
newimg = Image.new('RGB', img.size)
newimg.paste(img, mask=img.split()[3])
return newimg
Please try this:
img = Image.open("5.png")
img = img.resize((28,28))
img = img.convert('L') #makes it greyscale
Can someone tell me how to rotate only part of an image like this:
How to find coordinate / center of this image:
i can rotate all pict using this
from PIL import Image
def rotate_image():
img = Image.open("nime1.png")
img.rotate(45).save("plus45.png")
img.rotate(-45).save("minus45.png")
img.rotate(90).save("90.png")
img.transpose(Image.ROTATE_90).save("90_trans.png")
img.rotate(180).save("180.png")
if __name__ == '__main__':
rotate_image()
You can crop an area of the picture as a new variable. In this case, I cropped a 120x120 pixel box out of the original image. It is rotated by 90 and then pasted back on the original.
from PIL import Image
img = Image.open('./image.jpg')
sub_image = img.crop(box=(200,0,320,120)).rotate(90)
img.paste(sub_image, box=(200,0))
So I thought about this a bit more and crafted a function that applies a circular mask to the cropped image before rotations. This allows an arbitrary angle without weird effects.
def circle_rotate(image, x, y, radius, degree):
img_arr = numpy.asarray(image)
box = (x-radius, y-radius, x+radius+1, y+radius+1)
crop = image.crop(box=box)
crop_arr = numpy.asarray(crop)
# build the cirle mask
mask = numpy.zeros((2*radius+1, 2*radius+1))
for i in range(crop_arr.shape[0]):
for j in range(crop_arr.shape[1]):
if (i-radius)**2 + (j-radius)**2 <= radius**2:
mask[i,j] = 1
# create the new circular image
sub_img_arr = numpy.empty(crop_arr.shape ,dtype='uint8')
sub_img_arr[:,:,:3] = crop_arr[:,:,:3]
sub_img_arr[:,:,3] = mask*255
sub_img = Image.fromarray(sub_img_arr, "RGBA").rotate(degree)
i2 = image.copy()
i2.paste(sub_img, box[:2], sub_img.convert('RGBA'))
return i2
i2 = circle_rotate(img, 260, 60, 60, 45)
i2
You can solve this problem as such. Say you have img = Image.open("nime1.png")
Create a copy of the image using img2 = img.copy()
Create a crop of img2 at the desired location using img2.crop(). You can read how to do this here
Paste img2 back onto img at the appropriate location using img.paste()
Notes:
To find the center coordinate, you can divide the width and height by 2 :)