convert Kitti labels to Yolo - python

Trying to convert Kitti label format to Yolo. But after converting the bbox is misplaced.
this is kitti bounding box
This is conversion code:
def convertToYoloBBox(bbox, size):
# Yolo uses bounding bbox coordinates and size relative to the image size.
# This is taken from https://pjreddie.com/media/files/voc_label.py .
dw = 1. / size[0]
dh = 1. / size[1]
x = (bbox[0] + bbox[1]) / 2.0
y = (bbox[2] + bbox[3]) / 2.0
w = bbox[1] - bbox[0]
h = bbox[3] - bbox[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
convert =convertToYoloBBox([kitti_bbox[0],kitti_bbox[1],kitti_bbox[2],kitti_bbox[3]],image.shape[:2])
The function does some normalization which is essential for yolo and outputs following:
(0.14763590391908976,
0.3397063758389261,
0.20452591656131477,
0.01810402684563757)
but when i try to check if the normalization is being done correctly with this code:
x = int(convert[0] * image.shape[0])
y = int(convert[1] * image.shape[1])
width = x+int(convert[2] * image.shape[0])
height = y+ int(convert[3] * image.shape[1])
cv.rectangle(image, (int(x), int(y)), (int(width), int(height)), (255,0,0), 2 )
the bounding box is misplaced:
Any suggestions ? Is conversion fucntion correct? or the problem is in the checking code ?

You got the centroid calculation wrong.
Kitti labels are given in the order of left, top, right, and bottom.
to get the centroid you have to do (left + right)/ 2 and (top + bottom)/2
so your code will become
x = (bbox[0] + bbox[2]) / 2.0
y = (bbox[1] + bbox[3]) / 2.0
w = bbox[2] - bbox[0]
h = bbox[3] - bbox[1]

Related

Wrong width calculations using OpenCV

I'm using a RealSense D455 camera and trying to detect objects and calculate the width of them. I found some code that does it for the height but when I try to change this the calculations are wrong. For height it's usually pretty accurate only showing small increases in height when wrong. But with the changed code it says for example an object that's ~40cm as 1-1,5 meters.
if score > 0.8 and class_ == 1: # 1 for human
left = box[1] * W
top = box[0] * H
right = box[3] * W
bottom = box[2] * H
width = right - left
height = bottom - top
bbox = (int(left), int(top), int(width), int(height))
heightB = bbox[1] + bbox[3]
p1 = (int(bbox[0]), int(bbox[1]))
p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
# draw box
cv2.rectangle(color_image, p1, p2, (255,0,0), 2, 1)
# x,y,z of bounding box
obj_points = verts[int(bbox[1]):int(bbox[1] + bbox[3]), int(bbox[0]):int(bbox[0] + bbox[2])].reshape(-1, 3)
print(obj_points.shape)
zs = obj_points[:, 2]
z = np.median(zs)
ys = obj_points[:, 0]
ys = np.delete(ys, np.where(
(zs < z - 1) | (zs > z + 1))) # take only y for close z to prevent including background
my = np.amin(ys, initial=1)
My = np.amax(ys, initial=-1)
height = (My - my) # add next to rectangle print of height using cv library
height = float("{:.2f}".format(height))
print("[INFO] object height is: ", height, "[m]")
height_txt = str(height) + "[m]"
# Write some Text
font = cv2.FONT_HERSHEY_SIMPLEX
bottomLeftCornerOfText = (p1[0], p1[1] + 20)
fontScale = 1
fontColor = (255, 255, 255)
lineType = 2
cv2.putText(color_image, height_txt,
bottomLeftCornerOfText,
font,
fontScale,
fontColor,
lineType)
# Show images
cv2.namedWindow('RealSense', cv2.WINDOW_AUTOSIZE)
cv2.imshow('RealSense', color_image)
cv2.waitKey(1)
Object pointers are used, they split up the dimensions into their own array, so zs = obj_points[:, 2] will be for z ys = obj_points[:, 1] is for y. I thought just changing ys = obj_points[:, 1] to ys = obj_points[:, 0] would calculate width but aforementioned it does not work.
ys = np.delete(ys, np.where((zs < z - 1) | (zs > z + 1)))
This is is just to take out the outliers so as to not take into account background values.
This is the part that calculates the height, since the camera will be horizontal the height difference will be the width.
my = np.amin(ys, initial=1)
My = np.amax(ys, initial=-1)
height = (My - my) # add next to rectangle print of height using cv library
Since the camera is horizontal I can just the the length of Y. But this does not seem to work when I try the same for X.
If it's necessary this is the link to the original GitHub repo: https://github.com/IntelRealSense/librealsense/tree/master/wrappers/tensorflow I'm using Example2.

Turning Circle into Square. Solved: squircle package

OpenCV / Python related:
Given a photo of a round object, how can you output that object flattened, while adjusting for surface area? Here is an example image of an input:
Soccer ball
It is similar to adjusting for camera distortion (turning a round object into flat one), but in this case the distortion comes from the object itself and not the camera.
Distorted image:
Undistorted image:
Any suggestions would help. Thank you!
Edit: The package squircle is just what I needed, thank you fmw42!
Here is a solution in Python/OpenCV. It creates transformation maps that define the equations from output back to input and applies them using cv2.remap(). The equations come from https://arxiv.org/pdf/1509.06344.pdf for the Elliptical Grid Mapping approach.
Input:
import numpy as np
import cv2
import math
# References:
# https://arxiv.org/pdf/1509.06344.pdf
# http://squircular.blogspot.com/2015/09/mapping-circle-to-square.html
# Evaluate:
# u = x*sqrt(1-y**2/2)
# v = y*sqrt(1-x**2/2)
# u,v are input circle coordinates and x,y are output square coordinates
# read input
img = cv2.imread("rings.png")
# get dimensions and center
h, w = img.shape[:2]
xcent = w / 2
ycent = h / 2
# set up the maps as float32 from output square (x,y) to input circle (u,v)
map_u = np.zeros((h, w), np.float32)
map_v = np.zeros((h, w), np.float32)
# create u and v maps where x,y is measured from the center and scaled from -1 to 1
for y in range(h):
Y = (y - ycent)/ycent
for x in range(w):
X = (x - xcent)/xcent
map_u[y, x] = xcent * X * math.sqrt(1 - 0.5*Y**2) + xcent
map_v[y, x] = ycent * Y * math.sqrt(1 - 0.5*X**2) + ycent
# do the remap
result = cv2.remap(img, map_u, map_v, cv2.INTER_LINEAR, borderMode = cv2.BORDER_REFLECT_101, borderValue=(0,0,0))
# save results
cv2.imwrite("rings_circle2square.png", result)
# display images
cv2.imshow('img', img)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
Here is another example:
Input:
Result:
And here is a 3rd example:
Input:
Result:
ADDITION
Here is an alternate approach based upon the Simple Stretch equations in the reference above:
import numpy as np
import cv2
import math
# References:
# https://arxiv.org/pdf/1509.06344.pdf
# Simple stretch equations
# read input
img = cv2.imread("rings.png")
#img = cv2.imread("ICM.png")
#img = cv2.imread("soccerball_small.jpg")
# get dimensions and center
h, w = img.shape[:2]
xcent = w / 2
ycent = h / 2
# set up the maps as float32 from output square (x,y) to input circle (u,v)
map_u = np.zeros((h, w), np.float32)
map_v = np.zeros((h, w), np.float32)
# create u and v maps where x,y is measured from the center and scaled from -1 to 1
# note: copysign(1,x) is signum(x) and returns 1 ,0, or -1 depending upon sign of x
for y in range(h):
Y = (y - ycent)/ycent
for x in range(w):
X = (x - xcent)/xcent
X2 = X*X
Y2 = Y*Y
XY = X*Y
R = math.sqrt(X2+Y2)
if R == 0:
map_u[y, x] = xcent
map_v[y, x] = ycent
elif X2 >= Y2:
map_u[y, x] = xcent * math.copysign(1, X) * X2/R + xcent
map_v[y, x] = ycent * math.copysign(1, X) * XY/R + ycent
else:
map_u[y, x] = xcent * math.copysign(1, Y) * XY/R + xcent
map_v[y, x] = ycent * math.copysign(1, Y) * Y2/R + ycent
# do the remap
result = cv2.remap(img, map_u, map_v, cv2.INTER_LINEAR, borderMode = cv2.BORDER_REFLECT_101, borderValue=(0,0,0))
# save results
cv2.imwrite("rings_circle2square2.png", result)
#cv2.imwrite("ICM_circle2square2.png", result)
#cv2.imwrite("soccerball_small_circle2square2.png", result)
# display images
cv2.imshow('img', img)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Input:
Result:
Input:
Result:
Input:
Result:

How to calculate bounding box coordinates?

I've created a trained yolov4 model, and I tried to test it as well.
My original image size was - width 1920 and height 1080.
For training I reduced it to 416*416. When I was tested I got a good result but I cannot understand the output values:
(left_x: 506 top_y: -376 width: 2076 height: 1179)
(how can a coordinate be negative or bigger than the image's size?)
I'm sure that there is a formula behind it but I wasn't be able to find it.
I searched inside the code (darknet.py) and bbox2points(bbox) function returned a bad result.
What am I missing?
Can you help me to find the bounding box’s coordinate in this example?
Code - darknet.py:
x, y, w, h = bbox
xmin = int(round(x - (w / 2)))
xmax = int(round(x + (w / 2)))
ymin = int(round(y - (h / 2)))
ymax = int(round(y + (h / 2)))
return xmin, xmax, ymin, ymax
those x, y, w, h are the same as it is outputted (left_x, top_y, width, height)
Code:
https://github.com/AlexeyAB/darknet/blob/master/darknet.py
This is how i transform the bounding boxes returned by the darknet_video.py to use it in opencv.
def __transform_boxes(boxes, image):
image_height, image_width, image_channels = image.shape
top_coordinates_x = int((boxes[0] - (boxes[2]) / 2) * (image_width / 416))
top_coordinates_y = int((boxes[1] - (boxes[3]) / 2) * (image_height / 416))
bottom_coordinates_x = int((boxes[0] + (boxes[2]) / 2) * (image_width / 416))
bottom_coordinates_y = int((boxes[1] + (boxes[3]) / 2) * (image_height / 416))
return bottom_coordinates_x, bottom_coordinates_y, top_coordinates_x, top_coordinates_y

I keep getting a white picture in this lens distortion correction program

I want to write a code that corrects the distortion and also helps defish a fisheye image.
I found a pseudocode for it here and I have tried to stick to it:
http://www.tannerhelland.com/4743/simple-algorithm-correcting-lens-distortion/
from PIL import Image
import numpy as np
im = Image.open('myimage.png')
img = Image.new("RGB",(512,512),'green')
im = im.convert("RGB")
pix_val = im.load()
pix_valNew = img.load()
width, height = im.size
strength = 1.5
zoom = 1.0
halfWidth = width/2
halfHeight = height/2
theta = -1
if strength == 0:
strength = 0.00001
correctionRadius = ((width**2 + height**2)/strength)**0.5
for x in range(512):
for y in range(512):
newX = x - halfWidth
newY = y - halfHeight
distance = (newX**2 + newY**2)**0.5
r = distance/correctionRadius
if r == 0:
theta = 1
else:
theta = np.arctan(r)/r
sourceX = (int)(halfWidth + theta * newX * zoom)
sourceY = (int)(halfHeight + theta * newY * zoom)
pix_valNew[x,y] = pix_val[sourceX,sourceY]
img.show()
I keep getting an image that is completely white and I am not able to troubleshoot it because I am completely new to it.
512x512 is the resolution of the image i want to "de-fish".
The logic as far as I understand is to find the location of a particular pixel in
the fisheye image and map it on its corresponding location in t he normal image
Someone asked for the pseudocode for which I did put the link but I am pasting it here as well. It is as Follows:
input:
strength as floating point >= 0. 0 = no change, high numbers equal stronger correction.
zoom as floating point >= 1. (1 = no change in zoom)
algorithm:
set halfWidth = imageWidth / 2
set halfHeight = imageHeight / 2
if strength = 0 then strength = 0.00001
set correctionRadius = squareroot(imageWidth ^ 2 + imageHeight ^ 2) / strength
for each pixel (x,y) in destinationImage
set newX = x - halfWidth
set newY = y - halfHeight
set distance = squareroot(newX ^ 2 + newY ^ 2)
set r = distance / correctionRadius
if r = 0 then
set theta = 1
else
set theta = arctangent(r) / r
set sourceX = halfWidth + theta * newX * zoom
set sourceY = halfHeight + theta * newY * zoom
set color of pixel (x, y) to color of source image pixel at (sourceX, sourceY)
Any form of help will be very much appreciated.
It appears that under some combinations of inputs, illegal indices for the source image are being calculated. A simple fix is to replace
pix_valNew[x,y] = pix_val[sourceX,sourceY]
with:
try:
pix_valNew[x,y] = pix_val[sourceX,sourceY]
except IndexError:
print('IndexError', x, y, sourceX, sourceY)
pix_valNew[x, y] = (0, 0, 0)
Also, just noticed that a line of your code:
correctionRadius = ((width**2 + height**2)/strength)**0.5
should be:
correctionRadius = ((width**2 + height**2)**0.5)/strength

How do I divide a given image into 8 unique pie segments?

I am pretty new to Python and want to do the following: I want to divide the following image into 8 pie segments:
I want it to look something like this (I made this in PowerPoint):
The background should be black and the edge of the figure should have an unique color as well as each pie segment.
EDIT: I have written a code that divides the whole image in 8 segments:
from PIL import Image, ImageDraw
im=Image.open('C:/Users/20191881/Documents/OGO Beeldanalyse/Python/asymmetrie/rotation.png')
fill = 255
draw = ImageDraw.Draw(im)
draw.line((0,0) + im.size, fill)
draw.line((0, im.size[1], im.size[0], 0), fill)
draw.line((0.5*im.size[0],0, 0.5*im.size[0], im.size[1]), fill)
draw.line((0, 0.5*im.size[1], im.size[0], 0.5*im.size[1]), fill)
del draw
im.show()
The output gives:
The only thing that is left to do is to find a way to make each black segment inside the border an unique color and also give all the white edge segments an unique color.
Your code divides the image in eight parts, that's correct, but with respect to the image center, you don't get eight "angular equally" pie segments like you show in your sketch.
Here would be my solution, only using Pillow and the math module:
import math
from PIL import Image, ImageDraw
def segment_color(i_color, n_colors):
r = int((192 - 64) / (n_colors - 1) * i_color + 64)
g = int((224 - 128) / (n_colors - 1) * i_color + 128)
b = 255
return (r, g, b)
# Load image; generate ImageDraw
im = Image.open('path_to/vgdrD.png').convert('RGB')
draw = ImageDraw.Draw(im)
# Number of pie segments (must be an even number)
n = 8
# Replace (all-white) edge with defined edge color
edge_color = (255, 128, 0)
pixels = im.load()
for y in range(im.height):
for x in range(im.width):
if pixels[x, y] == (255, 255, 255):
pixels[x, y] = edge_color
# Draw lines with defined line color
line_color = (0, 255, 0)
d = min(im.width, im.height) - 10
center = (int(im.width/2), int(im.height)/2)
for i in range(int(n/2)):
angle = 360 / n * i
x1 = math.cos(angle/180*math.pi) * d/2 + center[0]
y1 = math.sin(angle/180*math.pi) * d/2 + center[1]
x2 = math.cos((180+angle)/180*math.pi) * d/2 + center[0]
y2 = math.sin((180+angle)/180*math.pi) * d/2 + center[1]
draw.line([(x1, y1), (x2, y2)], line_color)
# Fill pie segments with defined segment colors
for i in range(n):
angle = 360 / n * i + 360 / n / 2
x = math.cos(angle/180*math.pi) * 20 + center[0]
y = math.sin(angle/180*math.pi) * 20 + center[1]
ImageDraw.floodfill(im, (x, y), segment_color(i, n))
im.save(str(n) + '_pie.png')
For n = 8 pie segments, the following result is produced:
The first step is to replace all white pixels in the original image with the desired edge color. Of course, the assumption here is, that there are no other (white) pixels in the image. Also, this might be better done using NumPy and vectorized code, but I wanted to keep the solution Pillow-only.
Next step is to draw the (green) lines. Here, I calculate the proper coordinates of the lines' start and end using sin and cos.
The last step is to flood fill the pie segments' area, cf. ImageDraw.floodfill. Therefore, I calculate the seed points the same way as before, but add an angular shift to hit a point exactly within the pie segment.
As you can see, n is variable in my solution (n must be even):
Of course, there are limitations regarding the angular resolution, most due to the small image.
Hope that helps!
EDIT: Here's a modified version to also allow for individually colored edges.
import math
from PIL import Image, ImageDraw
def segment_color(i_color, n_colors):
r = int((192 - 64) / (n_colors - 1) * i_color + 64)
g = int((224 - 128) / (n_colors - 1) * i_color + 128)
b = 255
return (r, g, b)
def edge_color(i_color, n_colors):
r = 255
g = 255 - int((224 - 32) / (n_colors - 1) * i_color + 32)
b = 255 - int((192 - 16) / (n_colors - 1) * i_color + 16)
return (r, g, b)
# Load image; generate ImageDraw
im = Image.open('images/vgdrD.png').convert('RGB')
draw = ImageDraw.Draw(im)
center = (int(im.width/2), int(im.height)/2)
# Number of pie segments (must be an even number)
n = 8
# Replace (all-white) edge with defined edge color
max_len = im.width + im.height
im_pix = im.load()
for i in range(n):
mask = Image.new('L', im.size, 0)
mask_draw = ImageDraw.Draw(mask)
angle = 360 / n * i
x1 = math.cos(angle/180*math.pi) * max_len + center[0]
y1 = math.sin(angle/180*math.pi) * max_len + center[1]
angle = 360 / n * (i+1)
x2 = math.cos(angle/180*math.pi) * max_len + center[0]
y2 = math.sin(angle/180*math.pi) * max_len + center[1]
mask_draw.polygon([center, (x1, y1), (x2, y2)], 255)
mask_pix = mask.load()
for y in range(im.height):
for x in range(im.width):
if (im_pix[x, y] == (255, 255, 255)) & (mask_pix[x, y] == 255):
im_pix[x, y] = edge_color(i, n)
# Draw lines with defined line color
line_color = (0, 255, 0)
d = min(im.width, im.height) - 10
for i in range(int(n/2)):
angle = 360 / n * i
x1 = math.cos(angle/180*math.pi) * d/2 + center[0]
y1 = math.sin(angle/180*math.pi) * d/2 + center[1]
x2 = math.cos((180+angle)/180*math.pi) * d/2 + center[0]
y2 = math.sin((180+angle)/180*math.pi) * d/2 + center[1]
draw.line([(x1, y1), (x2, y2)], line_color)
# Fill pie segments with defined segment colors
for i in range(n):
angle = 360 / n * i + 360 / n / 2
x = math.cos(angle/180*math.pi) * 20 + center[0]
y = math.sin(angle/180*math.pi) * 20 + center[1]
ImageDraw.floodfill(im, (x, y), segment_color(i, n))
im.save(str(n) + '_pie.png')
Binary masks for each pie segment are created, and all white pixels only within that binary mask are replaced with a defined edge color.
Using NumPy still seems favorable, but I was curious to do that in Pillow only.

Categories

Resources