I have a transparent-background image with some non-transparent text.
And I want to find all the bounding boxes of each individual word in the text.
Here is the code about creating a transparent image and draw some text ("Hello World", for example) , after that, do affine transform and thumbnail it.
from PIL import Image, ImageFont, ImageDraw, ImageOps
import numpy as np
fontcolor = (255,255,255)
fontsize = 180
# padding rate for setting the image size of font
fimg_padding = 1.1
# check code bbox padding rate
bbox_gap = fontsize * 0.05
# Rrotation +- N degree
# Choice a font type for output---
font = ImageFont.truetype('Fonts/Bebas.TTF', fontsize)
# the text is "Hello World"
code = "Hello world"
# Get the related info of font---
code_w, code_h = font.getsize(code)
# Setting the image size of font---
img_size = int((code_w) * fimg_padding)
# Create a RGBA image with transparent background
img = Image.new("RGBA", (img_size,img_size),(255,255,255,0))
d = ImageDraw.Draw(img)
# draw white text
code_x = (img_size-code_w)/2
code_y = (img_size-code_h)/2
d.text( ( code_x, code_y ), code, fontcolor, font=font)
# img.save('initial.png')
# Transform the image---
img = img_transform(img)
# crop image to the size equal to the bounding box of whole text
alpha = img.split()[-1]
img = img.crop(alpha.getbbox())
# resize the image
img.thumbnail((512,512), Image.ANTIALIAS)
# img.save('myimage.png')
# what I want is to find all the bounding box of each individual word
boxes=find_all_bbx(img)
Here is the code about affine transform (provided here for those who want to do some experiment)
def find_coeffs(pa, pb):
matrix = []
for p1, p2 in zip(pa, pb):
matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])
A = np.matrix(matrix, dtype=np.float)
B = np.array(pb).reshape(8)
res = np.dot(np.linalg.inv(A.T * A) * A.T, B)
return np.array(res).reshape(8)
def rand_degree(st,en,gap):
return (np.fix(np.random.random()* (en-st) * gap )+st)
def img_transform(img):
width, height = img.size
print img.size
m = -0.5
xshift = abs(m) * width
new_width = width + int(round(xshift))
img = img.transform((new_width, height), Image.AFFINE,
(1, m, -xshift if m > 0 else 0, 0, 1, 0), Image.BICUBIC)
range_n = width*0.2
gap_n = 1
x1 = rand_degree(0,range_n,gap_n)
y1 = rand_degree(0,range_n,gap_n)
x2 = rand_degree(width-range_n,width,gap_n)
y2 = rand_degree(0,range_n,gap_n)
x3 = rand_degree(width-range_n,width,gap_n)
y3 = rand_degree(height-range_n,height,gap_n)
x4 = rand_degree(0,range_n,gap_n)
y4 = rand_degree(height-range_n,height,gap_n)
coeffs = find_coeffs(
[(x1, y1), (x2, y2), (x3, y3), (x4, y4)],
[(0, 0), (width, 0), (new_width, height), (xshift, height)])
img = img.transform((width, height), Image.PERSPECTIVE, coeffs, Image.BICUBIC)
return img
How to implement find_all_bbx to find the bounding box of each individual word?
For example, one of the box can be found in 'H' ( you can download the image to see the partial result).
For what you want to do you need to label the individual words and then compute the bounding box of each object with the same label.
The most straigh forward approach here is just taking the min and max positions of the pixels that make up that word.
The labeling is a little bit more difficult. For example you could use a morphological operation to combine the letters of the words (morphological opening, see PIL documentation) and then use ImageDraw.floodfill. Or you could try to anticipate the positions of the words from the position where you first draw the text
code_x and code_y
and the chosen font and size of the letters and the spacing (this will trickier I think).
Related
What I'm trying to do is generate equal diagonal lines in PIL. What I'm doing is first making a horizontal equally square and then rotating it 45 degrees. But when I'm rotating it the lines aren't big enough, there shouldn't be any black and still be equal. It also should work with more colors
code:
import random
im = Image.new('RGB', (1000, 1000), (255, 255, 255))
draw = ImageDraw.Draw(im)
colors = [(255,0,255), (0,0,255)]
random.shuffle(colors)
length = len(colors)
amount = 1000 / length
x1 = 0
y1 = 0
x2 = 1000
y2 = 0
for color in colors:
shape = [(x1, y1 + amount // 2), (x2, y2 + amount // 2)]
draw.line(shape, fill=color, width=int(amount))
y1 += amount
y2 += amount
im.save("pre_diagonal.png")
colorimage = Image.open('pre_diagonal.png')
out = colorimage.rotate
You can do it by first generating an image of vertical lines like I showed you in my answer to your other question, rotating that by 45°, and then cropping it. To avoid having areas of black, you need to generate an initial image that is large enough for the cropping.
In this case that's simply a square image with sides the length of the hypotenuse (diagonal) of the final target image's size.
i.e.
Graphically, here's what I mean:
At any rate, here's the code that does it:
from math import hypot
from PIL import Image, ImageDraw
import random
IMG_WIDTH, IMG_HEIGHT = 1000, 1000
DIAG = round(hypot(IMG_WIDTH, IMG_HEIGHT))
img = Image.new('RGB', (DIAG, DIAG), (255, 255, 255))
draw = ImageDraw.Draw(img)
colors = [(255,0,255), (0,0,255)]
random.shuffle(colors)
length = len(colors) # Number of lines.
line_width = DIAG / length # Width of each.
difx = line_width / 2
x1, y1 = difx, 0
x2, y2 = difx, DIAG
for color in colors:
endpoints = (x1, y1), (x2, y2)
draw.line(endpoints, fill=color, width=round(line_width))
x1 += line_width
x2 += line_width
img = img.rotate(-45, resample=Image.Resampling.BICUBIC)
difx, dify = (DIAG-IMG_WIDTH) // 2, (DIAG-IMG_HEIGHT) // 2
img = img.crop((difx, dify, difx+IMG_WIDTH, dify+IMG_HEIGHT))
img.save('diagonal.png')
#img.show()
Here's the resulting image:
I am trying to create a GAN model which will remove watermark. After doing some homework, I got to this Google AI Blog which makes things worse. Thus I need to create a dataset from these websites Shutterstock, Adobe Stock, Fotolia and Canstock and manymore.
So, when I try to do same image using reverse image search. I founded out that the resolutions, images are changed which makes it more worse.
Thus, I'm only left to create a custom dataset doing the same watermark like from these websites and that's why I need to create same watermark like them on images from unsplash and so..
Can anyone please help me create same watermark which we can get from Shutterstock and Adobe Stock. It'd be a great help.
Note: I have gone through this link for watermark using Imagemagick but I need it in python. If someone can show me a way of doing the same in python. That'd be a great help.
EDIT1: If you look at this Example of Shutterstock. Zoom in and you will find that not only lines but text and rounded symbols are curved and also name and rounded symbol with different opacity. So, that's what I want to replicate.
Here is one way to do that in Python/OpenCV.
Read the input
Create an image of the text
Rotate the text image
Tile out the rotated text image to the size of the input
Blend the tiled, rotated text image with the input image
Save the output
Input:
import cv2
import numpy as np
import math
text = "WATERMARK"
thickness = 2
scale = 0.75
pad = 5
angle = -45
blend = 0.25
def rotate_bound(image, angle):
# function to rotate an image
# from https://github.com/PyImageSearch/imutils/blob/master/imutils/convenience.py
# grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
# read image
photo = cv2.imread('lena.jpg')
ph, pw = photo.shape[:2]
# determine size for text image
(wd, ht), baseLine = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, scale, thickness)
print (wd, ht, baseLine)
# add text to black background image padded all around
pad2 = 2 * pad
text_img = np.zeros((ht+pad2,wd+pad2,3), dtype=np.uint8)
text_img = cv2.putText(text_img, text, (pad,ht+pad), cv2.FONT_HERSHEY_SIMPLEX, scale, (255,255,255), thickness)
# rotate text image
text_rot = rotate_bound(text_img, angle)
th, tw = text_rot.shape[:2]
# tile the rotated text image to the size of the input
xrepeats = math.ceil(pw/tw)
yrepeats = math.ceil(ph/th)
print(yrepeats,xrepeats)
tiled_text = np.tile(text_rot, (yrepeats,xrepeats,1))[0:ph, 0:pw]
# combine the text with the image
result = cv2.addWeighted(photo, 1, tiled_text, blend, 0)
# save results
cv2.imwrite("text_img.png", text_img)
cv2.imwrite("text_img_rot.png", text_rot)
cv2.imwrite("lena_tiled_rotated_text_img.jpg", result)
# show the results
cv2.imshow("text_img", text_img)
cv2.imshow("text_rot", text_rot)
cv2.imshow("tiled_text", tiled_text)
cv2.imshow("result", result)
cv2.waitKey(0)
Text Image:
Rotated Text Image:
Result:
Here is another variation in Python/OpenCV that does outline font for the watermark. I have made the font size larger so that the outline is more visible.
import cv2
import numpy as np
import math
text = "WATERMARK"
thickness = 2
scale = 1.5
pad = 5
angle = -45
blend = 0.4
# function to rotate an image
def rotate_bound(image, angle):
# from https://github.com/PyImageSearch/imutils/blob/master/imutils/convenience.py
# grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
# read image
photo = cv2.imread('lena.jpg')
ph, pw = photo.shape[:2]
# determine size for text image
(wd, ht), baseLine = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, scale, thickness)
print (wd, ht, baseLine)
# add text to black background image padded all around
# write thicker white text and then write over that with thinner gray text to make outline text
pad2 = 2 * pad
text_img = np.zeros((ht+pad2,wd+pad2,3), dtype=np.uint8)
text_img = cv2.putText(text_img, text, (pad,ht+pad), cv2.FONT_HERSHEY_SIMPLEX, scale, (256,256,256), thickness+3)
text_img = cv2.putText(text_img, text, (pad,ht+pad), cv2.FONT_HERSHEY_SIMPLEX, scale, (128,128,128), thickness)
# rotate text image
text_rot = rotate_bound(text_img, angle)
th, tw = text_rot.shape[:2]
# tile the rotated text image to the size of the input
xrepeats = math.ceil(pw/tw)
yrepeats = math.ceil(ph/th)
print(yrepeats,xrepeats)
tiled_text = np.tile(text_rot, (yrepeats,xrepeats,1))[0:ph, 0:pw]
# combine the text with the image
#result = cv2.addWeighted(photo, 1, tiled_text, blend, 0)
mask = blend * cv2.threshold(tiled_text, 0, 255, cv2.THRESH_BINARY)[1]
result = (mask * tiled_text.astype(np.float64) + (255-mask)*photo.astype(np.float64))/255
result = result.clip(0,255).astype(np.uint8)
# save results
cv2.imwrite("text_img.png", text_img)
cv2.imwrite("text_img_rot.png", text_rot)
cv2.imwrite("lena_tiled_rotated_text_img2.jpg", result)
# show the results
cv2.imshow("text_img", text_img)
cv2.imshow("text_rot", text_rot)
cv2.imshow("tiled_text", tiled_text)
cv2.imshow("result", result)
cv2.waitKey(0)
Result:
I Here is my Code
# Json file in which Easyocr anotations have saved.
img = cv2.imread('dummy.jpg')
img1 = img.copy()
#rotoated because anotation have according to vertical alignment of image i have matched the orientation
img1=cv2.rotate(img1,rotateCode=cv2.ROTATE_90_CLOCKWISE)
rects = []
with open('dummy.json') as jsn:
jsn_dict = json.load(jsn)
for k in jsn_dict['textAnnotations']:
vertices= k['boundingPoly']['vertices']
cv2.rectangle(img1,list(vertices[2].values()),list(vertices[0].values()),[0,255,0],10)
# I want to put predicted text on top of bounding boxes vertically because my image is rotated anti clockwise
cv2.putText(img1, k['description'], list(vertices[0].values()),cv2.FONT_HERSHEY_SIMPLEX,5,[0,255,0],5)
I have the code mentioned above I am labelling the recognized text. First step is, I put the image into the OCR model and it returns some values according to the image, in which we have three values for every detected text. These values are the vertex of the bounding box, the text that was recognized, and the accuracy percentage. But my problem is that my image was rotated by the Exif orientation value but cv2 read it as a zero angle and my text is printing horizontally. I want to print text on an image vertically. I have tried so many times but could not resolve my problem. I hope I have explained it well.
Try this one
import cv2
def transparentOverlay(src, overlay, pos=(0, 0), scale=1):
"""
:param src: Input Color Background Image
:param overlay: transparent Image (BGRA)
:param pos: position where the image to be blit.
:param scale : scale factor of transparent image.
:return: Resultant Image
"""
overlay = cv2.resize(overlay, (0, 0), fx=scale, fy=scale)
h, w, _ = overlay.shape # Size of foreground
rows, cols, _ = src.shape # Size of background Image
y, x = pos[0], pos[1] # Position of foreground/overlay image
# loop over all pixels and apply the blending equation
for i in range(h):
for j in range(w):
if x + i >= rows or y + j >= cols:
continue
alpha = float(overlay[i][j][3] / 255.0) # read the alpha channel
src[x + i][y + j] = alpha * overlay[i][j][:3] + (1 - alpha) * src[x + i][y + j]
return src
def addImageWatermark(LogoImage,MainImage,opacity,pos=(10,100),):
opacity = opacity / 100
OriImg = cv2.imread(MainImage, -1)
waterImg = cv2.imread(LogoImage, -1)
tempImg = OriImg.copy()
print(tempImg.shape)
overlay = transparentOverlay(tempImg, waterImg, pos)
output = OriImg.copy()
# apply the overlay
cv2.addWeighted(overlay, opacity, output, 1 - opacity, 0, output)
cv2.imshow('Life2Coding', output)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
addImageWatermark('./logo.png','./hanif.jpg',100,(10,100))
Rotate your image 90º clockwise, add the text, and rotate the image back to the original.
# Rotate 90º clockwise
img_rot = cv2.rotate(img1 , cv2.ROTATE_90_CLOCKWISE)
# Add your text here, adjusting x and y coordinates to the new orientation.
# The new adjusted coordinates will be:
# (x2, y2) = (original_height - y, x)
# [...]
# Rotate back
img1 = cv2.rotate(img_rot, cv2.ROTATE_90_CLOCKWISE)
How would I center-align (and middle-vertical-align) text when using PIL?
Deprecation Warning: textsize is deprecated and will be removed in Pillow 10 (2023-07-01). Use textbbox or textlength instead.
Code using textbbox instead of textsize.
from PIL import Image, ImageDraw, ImageFont
def create_image(size, bgColor, message, font, fontColor):
W, H = size
image = Image.new('RGB', size, bgColor)
draw = ImageDraw.Draw(image)
_, _, w, h = draw.textbbox((0, 0), message, font=font)
draw.text(((W-w)/2, (H-h)/2), message, font=font, fill=fontColor)
return image
myFont = ImageFont.truetype('Roboto-Regular.ttf', 16)
myMessage = 'Hello World'
myImage = create_image((300, 200), 'yellow', myMessage, myFont, 'black')
myImage.save('hello_world.png', "PNG")
Result
Use Draw.textsize method to calculate text size and re-calculate position accordingly.
Here is an example:
from PIL import Image, ImageDraw
W, H = (300,200)
msg = "hello"
im = Image.new("RGBA",(W,H),"yellow")
draw = ImageDraw.Draw(im)
w, h = draw.textsize(msg)
draw.text(((W-w)/2,(H-h)/2), msg, fill="black")
im.save("hello.png", "PNG")
and the result:
If your fontsize is different, include the font like this:
myFont = ImageFont.truetype("my-font.ttf", 16)
draw.textsize(msg, font=myFont)
Here is some example code which uses textwrap to split a long line into pieces, and then uses the textsize method to compute the positions.
from PIL import Image, ImageDraw, ImageFont
import textwrap
astr = '''The rain in Spain falls mainly on the plains.'''
para = textwrap.wrap(astr, width=15)
MAX_W, MAX_H = 200, 200
im = Image.new('RGB', (MAX_W, MAX_H), (0, 0, 0, 0))
draw = ImageDraw.Draw(im)
font = ImageFont.truetype(
'/usr/share/fonts/truetype/msttcorefonts/Arial.ttf', 18)
current_h, pad = 50, 10
for line in para:
w, h = draw.textsize(line, font=font)
draw.text(((MAX_W - w) / 2, current_h), line, font=font)
current_h += h + pad
im.save('test.png')
One shall note that the Draw.textsize method is inaccurate. I was working with low pixels images, and after some testing, it turned out that textsize considers every character to be 6 pixel wide, whereas an I takes max. 2 pixels and a W takes min. 8 pixels (in my case). And so, depending on my text, it was or wasn't centered at all. Though, I guess "6" was an average, so if you're working with long texts and big images, it should still be ok.
But now, if you want some real accuracy, you better use the getsize method of the font object you're going to use:
arial = ImageFont.truetype("arial.ttf", 9)
w,h = arial.getsize(msg)
draw.text(((W-w)/2,(H-h)/2), msg, font=arial, fill="black")
As used in Edilio's link.
A simple solution if you're using PIL 8.0.0 or above: text anchors
width, height = # image width and height
draw = ImageDraw.draw(my_image)
draw.text((width/2, height/2), "my text", font=my_font, anchor="mm")
mm means to use the middle of the text as anchor, both horizontally and vertically.
See the anchors page for other kinds of anchoring. For example if you only want to center horizontally you may want to use ma.
The PIL docs for ImageDraw.text are a good place to start, but don't answer your question.
Below is an example of how to center the text in an arbitrary bounding box, as opposed to the center of an image. The bounding box is defined as: (x1, y1) = upper left corner and (x2, y2) = lower right corner.
from PIL import Image, ImageDraw, ImageFont
# Create blank rectangle to write on
image = Image.new('RGB', (300, 300), (63, 63, 63, 0))
draw = ImageDraw.Draw(image)
message = 'Stuck in\nthe middle\nwith you'
bounding_box = [20, 30, 110, 160]
x1, y1, x2, y2 = bounding_box # For easy reading
font = ImageFont.truetype('Consolas.ttf', size=12)
# Calculate the width and height of the text to be drawn, given font size
w, h = draw.textsize(message, font=font)
# Calculate the mid points and offset by the upper left corner of the bounding box
x = (x2 - x1 - w)/2 + x1
y = (y2 - y1 - h)/2 + y1
# Write the text to the image, where (x,y) is the top left corner of the text
draw.text((x, y), message, align='center', font=font)
# Draw the bounding box to show that this works
draw.rectangle([x1, y1, x2, y2])
image.show()
image.save('text_center_multiline.png')
The output shows the text centered vertically and horizontally in the bounding box.
Whether you have a single or multiline message no longer matters, as PIL incorporated the align='center' parameter. However, it is for multiline text only. If the message is a single line, it needs to be manually centered. If the message is multiline, align='center' does the work for you on subsequent lines, but you still have to manually center the text block. Both of these cases are solved at once in the code above.
Use the textsize method (see docs) to figure out the dimensions of your text object before actually drawing it. Then draw it starting at the appropriate coordinates.
All the other answers did NOT take text ascender into consideration.
Here's a backport of ImageDraw.text(..., anchor="mm"). Not sure if it's fully compatible with anchor="mm", cause I haven't tested the other kwargs like spacing, stroke_width yet. But I ensure you this offset fix works for me.
from PIL import ImageDraw
from PIL import __version__ as pil_ver
PILLOW_VERSION = tuple([int(_) for _ in pil_ver.split(".")[:3]])
def draw_anchor_mm_text(
im,
xy,
# args shared by ImageDraw.textsize() and .text()
text,
font=None,
spacing=4,
direction=None,
features=None,
language=None,
stroke_width=0,
# ImageDraw.text() exclusive args
**kwargs,
):
"""
Draw center middle-aligned text. Basically a backport of
ImageDraw.text(..., anchor="mm").
:param PIL.Image.Image im:
:param tuple xy: center of text
:param unicode text:
...
"""
draw = ImageDraw.Draw(im)
# Text anchor is firstly implemented in Pillow 8.0.0.
if PILLOW_VERSION >= (8, 0, 0):
kwargs.update(anchor="mm")
else:
kwargs.pop("anchor", None) # let it defaults to "la"
if font is None:
font = draw.getfont()
# anchor="mm" middle-middle coord xy -> "left-ascender" coord x'y'
# offset_y = ascender - top, https://stackoverflow.com/a/46220683/5101148
# WARN: ImageDraw.textsize() return text size with offset considered.
w, h = draw.textsize(
text,
font=font,
spacing=spacing,
direction=direction,
features=features,
language=language,
stroke_width=stroke_width,
)
offset = font.getoffset(text)
w, h = w - offset[0], h - offset[1]
xy = (xy[0] - w / 2 - offset[0], xy[1] - h / 2 - offset[1])
draw.text(
xy,
text,
font=font,
spacing=spacing,
direction=direction,
features=features,
language=language,
stroke_width=stroke_width,
**kwargs,
)
Refs
https://pillow.readthedocs.io/en/stable/handbook/text-anchors.html
https://github.com/python-pillow/Pillow/issues/4789
https://stackoverflow.com/a/46220683/5101148
https://github.com/python-pillow/Pillow/issues/2486
Using a combination of anchor="mm" and align="center" works wonders. Example
draw.text(
xy=(width / 2, height / 2),
text="centered",
fill="#000000",
font=font,
anchor="mm",
align="center"
)
Note: Tested where font is an ImageFont class object constructed as such:
ImageFont.truetype('path/to/font.ttf', 32)
This is a simple example to add a text in the center of the image
from PIL import Image, ImageDraw, ImageFilter
msg = "hello"
img = Image.open('image.jpg')
W, H = img.size
box_image = img.filter(ImageFilter.BoxBlur(4))
draw = ImageDraw.Draw(box_image)
w, h = draw.textsize(msg)
draw.text(((W - w) / 2, (H - h) / 2), msg, fill="black")
box_image.show()
if you are using the default font then you can use this simple calculation
draw.text((newimage.width/2-len(text)*3, 5), text,fill="black", align ="center",anchor="mm")
the main thing is
you have to divide the image width by 2 then get the length of the string you want and multiply it by 3 and subtract it from the division result
newimage.width/2-len(text)*3 #this is X position
**this answer is an estimation for the default font size used if you use a custom font then the multiplier must be changed accordingly. in the default case it is 3
I am trying to turn an object detector for images into object detector for videos.
But, I am getting multiple bounding boxes and I don't know why.
It seems like the first frame of the video has the correct number of bounding boxes, namely 1. But as it loops the function draw_boxes outputs images that have multiple or overlapping bounding boxes.
If you can help I will appreciate it. Thanks.
Here is an example of some frame:
And here is the code:
for i in tqdm(range(nb_frames)):
_, frame = video_reader.read()
cv2.imwrite("framey.jpg", frame)
filename = "framey.jpg"
image, image_w, image_h = load_image_pixels(filename, (input_w, input_h))
yhat = model.predict(image)
for i in range(len(yhat)):
# decode the output of the network
boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w)
# correct the sizes of the bounding boxes for the shape of the image
correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w)
# suppress non-maximal boxes
do_nms(boxes, 0.5)
# get the details of the detected objects
v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)
# draw what we found
imagex = draw_boxes(filename, v_boxes, v_labels, v_scores)
video_writer.write(imagex)
video_reader.release()
video_writer.release()
And here is the function that is spitting out the above image:
def draw_boxes(filename, v_boxes, v_labels, v_scores):
# load the image
data = pyplot.imread(filename)
# plot the image
pyplot.imshow(data)
# get the context for drawing boxes
ax = pyplot.gca()
# plot each box
for i in range(len(v_boxes)):
box = v_boxes[i]
# get coordinates
y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
# calculate width and height of the box
width, height = x2 - x1, y2 - y1
# create the shape
rect = Rectangle((x1, y1), width, height, fill=False, color='white')
# draw the box
ax.add_patch(rect)
# draw text and score in top left corner
label = "%s (%.3f)" % (v_labels[i], v_scores[i])
pyplot.text(x1, y1, label, color='white')
# show the plot
pyplot.savefig('detected.jpg')
filename = "detected.jpg"
image = load_img(filename)
image_array = img_to_array(image)
image_array = (image_array*255).astype(np.uint8)
return image_array
So, the error was in the 'draw_boxes' function.
I changed 'draw_boxes' and it worked.
def draw_bounding_boxes(image, v_boxes, v_labels, v_scores):
for i in range(len(v_boxes)):
box = v_boxes[i]
y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
width, height = x2 - x1, y2 - y1
label = "%s (%.3f)" % (v_labels[i], v_scores[i])
region = np.array([[x1 - 3, y1],
[x1-3, y1 - height-26],
[x1+width+13, y1-height-26],
[x1+width+13, y1]], dtype='int32')
cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 5)
cv2.fillPoly(image,[region], (255, 0, 0))
cv2.putText(image,
label,
(x1+13, y1-13),
cv2.FONT_HERSHEY_SIMPLEX,
1e-3 * image.shape[0],
(0,0,0),
2)
return image