given a contour, I can extract mean, and eigenvectors by performing PCA. Then I want to project all pixels inside a contour on an eigenvector. Below are my code and my images
my input images
Read image, extract contours, and draw the first component
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
src = cv.imread(cv.samples.findFile('/Users/bryan/Desktop/lung.png'))
gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
def drawAxis(img, p_, q_, colour, scale):
p = list(p_)
q = list(q_)
## [visualization1]
angle = atan2(p[1] - q[1], p[0] - q[0]) # angle in radians
hypotenuse = sqrt((p[1] - q[1]) * (p[1] - q[1]) + (p[0] - q[0]) * (p[0] - q[0]))
# Here we lengthen the arrow by a factor of scale
q[0] = p[0] - scale * hypotenuse * cos(angle)
q[1] = p[1] - scale * hypotenuse * sin(angle)
cv.line(img, (int(p[0]), int(p[1])), (int(q[0]), int(q[1])), colour, 1, cv.LINE_AA)
# create the arrow hooks
p[0] = q[0] + 9 * cos(angle + pi / 4)
p[1] = q[1] + 9 * sin(angle + pi / 4)
cv.line(img, (int(p[0]), int(p[1])), (int(q[0]), int(q[1])), colour, 1, cv.LINE_AA)
p[0] = q[0] + 9 * cos(angle - pi / 4)
p[1] = q[1] + 9 * sin(angle - pi / 4)
cv.line(img, (int(p[0]), int(p[1])), (int(q[0]), int(q[1])), colour, 1, cv.LINE_AA)
def getOrientation(pts, img, scale_factor=25):
## [pca]
# Construct a buffer used by the pca analysis
sz = len(pts)
data_pts = np.empty((sz, 2), dtype=np.float64)
for i in range(data_pts.shape[0]):
data_pts[i, 0] = pts[i, 0, 0]
data_pts[i, 1] = pts[i, 0, 1]
# Perform PCA analysis
mean = np.empty(0)
mean, eigenvectors, eigenvalues = cv.PCACompute2(data_pts, mean)
# Store the center of the object
cntr = (int(mean[0, 0]), int(mean[0, 1]))
## [pca]
## [visualization]
# Draw the principal components
cv.circle(img, cntr, 3, (255, 0, 255), 2)
p1 = (
cntr[0] + scale_factor * eigenvectors[0, 0],
cntr[1] + scale_factor * eigenvectors[0, 1])
p2 = (
cntr[0] - scale_factor * eigenvectors[1, 0],
cntr[1] - scale_factor * eigenvectors[1, 1])
drawAxis(img, cntr, p1, (0, 255, 0), 4)
## [visualization]
# doing projections along eigenvectors
dim1_ = []
for _ in data_pts:
p = make_vector_projection(_, np.array(p1))
dim1_.append(p.astype(int))
dim1 = np.array(dim1_)
dim2_ = []
for _ in data_pts:
p = make_vector_projection(_, np.array(p2))
dim2_.append(p.astype(int))
dim2 = np.array(dim2_)
return mean, eigenvectors, eigenvalues, p1, p2, dim1, dim2
for i, c in enumerate(contours):
mean, evecs, evalues, p1, p2, dim1, dim2 = getOrientation(c, src)
plt.figure(figsize=(10, 10))
plt.axis('equal')
plt.gca().invert_yaxis()
plt.imshow(src)
Look as I expected but I want to double-check, I compute angles between a random vector in the first dimension and the first eigenvector. I defined
def unit_vector(vector):
""" Returns the unit vector of the vector. """
return vector / np.linalg.norm(vector)
def angle_between(v1, v2):
""" Returns the angle in radians between vectors 'v1' and 'v2'::
>>> angle_between((1, 0, 0), (0, 1, 0))
1.5707963267948966
>>> angle_between((1, 0, 0), (1, 0, 0))
0.0
>>> angle_between((1, 0, 0), (-1, 0, 0))
3.141592653589793
"""
v1_u = unit_vector(v1)
v2_u = unit_vector(v2)
return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))
for i, c in enumerate(contours):
mean, evecs, evalues, p1, p2, dim1, dim2 = getOrientation(c, src)
draw_point = dim1[45].astype(int) # extract the first dimension
print(draw_point, evecs[0], angle_between(draw_point, p1))
print('#'* 10)
I got output like below, angle_between is in radian, close to zero means they are paralleled
[ 97 148] [ 0.14189901 -0.98988114] 0.002321780300502494
##########
[332 163] [-0.22199134 -0.97504864] 0.0006249775357550807
##########
My problem occurs when I want to plot projected points on eigenvectors. Points I plotted don't lie on green line (my eigenvector). My code is
point_colors = [
(0, 0, 255), # blue
(0, 255, 0) # green
]
for i, c in enumerate(contours):
mean, evecs, evalues, p1, p2, dim1, dim2 = getOrientation(c, src)
draw_point = dim1[45].astype(int)
print(draw_point, evecs[0], angle_between(draw_point, p1))
cv.circle(src, (draw_point[1], draw_point[0]), 7, point_colors[i], 2) # plot point
print('#'* 10)
and output
My question is, I want to plot all projected pixels on the eigenvector lines, but my calculation doesn't seem correct as points don't lie on the eigenvector lines. Could you help?
Related
I am trying to do ray tracing implementation using python but i am getting a white and black image for output. the shape is correct but the the image lacks colors. I am new to this type of programming so i dont really know what is causing the problem here.
My code
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot
def vector_nomalization(vector): #function to normalize a vector
return vector / np.linalg.norm(vector)
def intersection_with_sphere(center, radius, origin, direction_vector): #function to detect intersection between sphere and ray
b = 2 * np.dot(direction_vector, origin - center)
c = np.linalg.norm(origin - center) ** 2 - radius ** 2 # die länge von (origin - center)^2 - radius
delta = b **2 - 4 * c
if delta > 0:
t1 = (-b +np.sqrt(delta))/2
t2 = (-b - np.sqrt(delta))/2
if t1 > 0 and t2 > 0:
return min(t1,t2) #distance from origin to the nearest intersection point
return None
def closest_intersection(spheres, origin, direction): #find closest sphere that intersects with our ray
distances = [intersection_with_sphere(sphere['center'], sphere['radius'], origin, direction) for sphere in spheres]
closest_sphere = None
min_distance = np.inf
for index, distance in enumerate(distances):
if distance and distance < min_distance:
min_distance = distance
closest_sphere = spheres[index]
return closest_sphere, min_distance
def reflection_ray(vector, axis):
return vector - 2 * np.dot(vector, axis) * axis
spheres =[
{ 'center': np.array([-0.2, 0, -1]), 'radius': 0.7, 'ambient': np.array([0.1, 0, 0]), 'Diffuse': np.array([0.7, 0, 0]), 'specular': np.array([1, 1, 1]), 'shininess': 100, 'reflection': 0.5 },
{ 'center': np.array([0.1, -0.3, 0]), 'radius': 0.1, 'ambient': np.array([0.1, 0, 0.1]), 'Diffuse': np.array([0.7, 0, 0.7]), 'specular': np.array([1, 1, 1]), 'shininess': 100, 'reflection': 0.5 },
{ 'center': np.array([-0.3, 0, 0]), 'radius': 0.15, 'ambient': np.array([0, 0.1, 0]), 'Diffuse': np.array([0, 0.6, 0]), 'specular': np.array([1, 1, 1]), 'shininess': 100, 'reflection': 0.5 },
{ 'center': np.array([0, -9000, 0]), 'radius': 9000 - 0.7, 'ambient': np.array([0.1, 0.1, 0.1]), 'Diffuse': np.array([0.6, 0.6, 0.6]), 'specular': np.array([1, 1, 1]), 'shininess': 100, 'reflection': 0.5}
]
light = {'position': np.array([5, 5, 5]), 'ambient': np.array([1, 1, 1]), 'Diffuse': np.array([1, 1, 1]), 'specular': np.array([1, 1, 1])}
width = 300
height = 200
max_depth = 3
camera = np.array([0, 0, 1])
ratio = float(width) / height # ratio = image width / image height
screen = (
-1, 1 / ratio, 1, -1 / ratio) # screen defined by 4 numbers: left,top,right,bottom (x coordinate ranges from -1 to 1)
image = np.zeros((height, width, 3))
for i, y in enumerate(np.linspace(screen[1], screen[3], height)): # splits the screen into width and height points in x and y directions
for j, x in enumerate(np.linspace(screen[0], screen[2], width)):
pixel = np.array([x, y, 0])
origin = camera
direction_vector = vector_nomalization(pixel - origin) #oder auch ray
color = np.zeros((3))
reflection = 1
for k in range(max_depth):
#intersection?
closest_sphere, min_distance = closest_intersection(spheres, origin, direction_vector)
if closest_sphere is None:
break
#intersectionpoint between ray and closest sphere
intersection = origin + min_distance * direction_vector
normalized_to_surface = vector_nomalization(intersection - closest_sphere['center'])
shifted_point = intersection + 1e-5 * normalized_to_surface
lights_intersection = vector_nomalization(light['position'] - shifted_point)
_, min_distance = closest_intersection(spheres, shifted_point, lights_intersection)
lights_intersection_distance = np.linalg.norm(light['position']-intersection)
shadowed = min_distance < lights_intersection_distance
if shadowed:
break
#RGB
illumination = np.zeros((3))
#diffuse
illumination += closest_sphere['Diffuse'] * light['Diffuse'] * np.dot(lights_intersection,
normalized_to_surface)
#specular
camera_intersection = vector_nomalization(camera - intersection)
camera_light = vector_nomalization(lights_intersection + camera_intersection)
illumination += closest_sphere['specular'] * light['specular'] * np.dot(normalized_to_surface,
camera_light)
color += reflection + illumination
reflection *= closest_sphere['reflection']
origin = shifted_point
direction_vector = reflection_ray(direction_vector,normalized_to_surface)
image[i, j] = np.clip(color, 0, 1)
print("progress: %d/%d" % (i + 1, height))
plt.imsave('image.png', image)
This is what i am getting
and this is how its supposed to look like
Why you're getting only white (and black) is because of your 'reflection'
you have hard-coded reflection = 1 on line 66 - The devil's in the details, as they say!
And then on line 103: color += reflection + illumination. As reflection starts at 1 for every new iteration in the for j, x loop, you're making sure here that color is at least 1, 1, 1. So that's why you're getting white.
I am training a neural network to do Human Single Pose Estimation on the MPII dataset. Within it, many images contain more than one person and I need to crop the image in order to extract each single person.
Of each I have the position (or center) and the scale w.r.t. 200 px height.
This code does just what I need:
def get_transform(center, scale, res, rot=0):
# Generate transformation matrix
h = 200 * scale
t = np.zeros((3, 3))
t[0, 0] = float(res[1]) / h
t[1, 1] = float(res[0]) / h
t[0, 2] = res[1] * (-float(center[0]) / h + .5)
t[1, 2] = res[0] * (-float(center[1]) / h + .5)
t[2, 2] = 1
if not rot == 0:
rot = -rot # To match direction of rotation from cropping
rot_mat = np.zeros((3,3))
rot_rad = rot * np.pi / 180
sn,cs = np.sin(rot_rad), np.cos(rot_rad)
rot_mat[0,:2] = [cs, -sn]
rot_mat[1,:2] = [sn, cs]
rot_mat[2,2] = 1
# Need to rotate around center
t_mat = np.eye(3)
t_mat[0,2] = -res[1]/2
t_mat[1,2] = -res[0]/2
t_inv = t_mat.copy()
t_inv[:2,2] *= -1
t = np.dot(t_inv,np.dot(rot_mat,np.dot(t_mat,t)))
return t
def transform(pt, center, scale, res, invert=0, rot=0):
# Transform pixel location to different reference
t = get_transform(center, scale, res, rot=rot)
if invert:
t = np.linalg.inv(t)
new_pt = np.array([pt[0], pt[1], 1.]).T
new_pt = np.dot(t, new_pt)
return new_pt[:2].astype(int)
def crop(img, center, scale, res, rot=0):
# Upper left point
ul = np.array(transform([0, 0], center, scale, res, invert=1))
# Bottom right point
br = np.array(transform(res, center, scale, res, invert=1))
new_shape = [br[1] - ul[1], br[0] - ul[0]]
if len(img.shape) > 2:
new_shape += [img.shape[2]]
new_img = np.zeros(new_shape)
# Range to fill new array
new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0]
new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1]
# Range to sample from original image
old_x = max(0, ul[0]), min(len(img[0]), br[0])
old_y = max(0, ul[1]), min(len(img), br[1])
new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]]
return cv2.resize(new_img, res)
However, I haven't figured out what kind of transformation matrix it is (the one that was created to derive ul or br).
Could someone explain to me what happens in these functions?
Thank you
The code creates 3D coordinates for the cube and then displays them on the 2D screen but you can still see the back faces of the cube. I just want to tell the code not to draw the points that are behind the faces in the 3D coordinates.
points = [(-1,-1,-1),(-1,-1,1),(-1,1,1),(-1,1,-1),
(1,-1,-1),(1,-1,1),(1,1,1),(1,1,-1), ]#coords for points
faces = [(3,0,4,7),(1,5,6,2),(2,1,0,3),(5,4,7,6),(3,2,6,7),(0,1,5,4)]
These are the coordinates of the points and which points should be joined to which
def flattenPoint(point):
(x, y, z) = (point[0], point[1], point[2])
xnew = x#x axis rotation
ynew = y * math.cos(rotatedanglex) - z * math.sin(rotatedanglex)
znew = y * math.sin(rotatedanglex) + z * math.cos(rotatedanglex)
xnew = znew * math.sin(rotatedangley) + xnew * math.cos(rotatedangley)
ynew = ynew #y axis rotation
znew = ynew * math.cos(rotatedangley) - xnew * math.sin(rotatedangley)
projectedY = int(height / 2 + ((ynew * distance) / (znew + distance)) * scale)
projectedX = int(width / 2 + ((xnew * distance) / (znew + distance)) * scale)
return (projectedX, projectedY, znew)
def createOutline(points):
a, b, c, d = points[0], points[1], points[2], points[3]
coords = ((b[0], b[1]), (a[0], a[1]), (d[0], d[1]),(c[0], c[1]))
pygame.draw.polygon(screen, blue, coords, 1)
'''
The FlattenPoint function rotates the 3D points and then turns them into 2D coordinates that are displayed.
'''
def createFace(points):
a, b, c, d = points[0], points[1], points[2], points[3]
coords = ((b[0], b[1]), (a[0], a[1]), (d[0], d[1]),(c[0], c[1]))
pygame.draw.polygon(screen, green, coords)
createFace joins up the 2D coordinates.
def render(points, faces):
coords = []
for point in points:
coords.append(flattenPoint(point))
screen.fill(screencolour)
for face in faces:
createFace((coords[face[0]], coords[face[1]], coords[face[2]], coords[face[3]]))
for face in faces:#must draw outline after all the faces have been drawn
createOutline((coords[face[0]], coords[face[1]], coords[face[2]],coords[face[3]]))
'''
Compute the normal vector to of a face and cull the faces where the normal vector points away from the view. The normal vector can be computed with the Cross product:
def cross(a, b):
return [a[1]*b[2] - a[2]*b[1], a[2]*b[0] - a[0]*b[2], a[0]*b[1] - a[1]*b[0]]
Use the cross product and cull the faces:
def createFace(points):
a, b, c, d = points[0], points[1], points[2], points[3]
v1 = b[0]-a[0], b[1]-a[1], b[2]-a[2]
v2 = c[0]-a[0], c[1]-a[1], c[2]-a[2]
n = cross(v1, v2)
if n[2] < 0:
return
coords = ((b[0], b[1]), (a[0], a[1]), (d[0], d[1]),(c[0], c[1]))
pygame.draw.polygon(screen, green, coords)
You have to ensure that the winding order of all the faces in counter clockwise. See also Face Culling and Back-face culling
Change the vertices and indices as follows:
points = [(-1,-1,-1),( 1,-1,-1), (1, 1,-1),(-1, 1,-1),
(-1,-1, 1),( 1,-1, 1), (1, 1, 1),(-1, 1, 1)]
faces = [(0,1,2,3),(5,4,7,6),(4,0,3,7),(1,5,6,2),(4,5,1,0),(3,2,6,7)]
However, I recommend to implement a depth test. See Pygame rotating cubes around axis and Does PyGame do 3d?.
Complete example:
import pygame
import math
points = [(-1,-1,-1),( 1,-1,-1), (1, 1,-1),(-1, 1,-1),
(-1,-1, 1),( 1,-1, 1), (1, 1, 1),(-1, 1, 1)]
faces = [(0,1,2,3),(5,4,7,6),(4,0,3,7),(1,5,6,2),(4,5,1,0),(3,2,6,7)]
def flattenPoint(point):
(x, y, z) = (point[0], point[1], point[2])
xnew = x#x axis rotation
ynew = y * math.cos(rotatedanglex) - z * math.sin(rotatedanglex)
znew = y * math.sin(rotatedanglex) + z * math.cos(rotatedanglex)
xnew = znew * math.sin(rotatedangley) + xnew * math.cos(rotatedangley)
ynew = ynew #y axis rotation
znew = ynew * math.cos(rotatedangley) - xnew * math.sin(rotatedangley)
projectedY = int(height / 2 + ((ynew * distance) / (znew + distance)) * scale)
projectedX = int(width / 2 + ((xnew * distance) / (znew + distance)) * scale)
return (projectedX, projectedY, znew)
def cross(a, b):
return [a[1]*b[2] - a[2]*b[1], a[2]*b[0] - a[0]*b[2], a[0]*b[1] - a[1]*b[0]]
def createOutline(points):
a, b, c, d = points[0], points[1], points[2], points[3]
v1 = b[0]-a[0], b[1]-a[1], b[2]-a[2]
v2 = c[0]-a[0], c[1]-a[1], c[2]-a[2]
n = cross(v1, v2)
if n[2] < 0:
return
coords = ((b[0], b[1]), (a[0], a[1]), (d[0], d[1]),(c[0], c[1]))
pygame.draw.polygon(screen, blue, coords, 3)
def createFace(points):
a, b, c, d = points[0], points[1], points[2], points[3]
v1 = b[0]-a[0], b[1]-a[1], b[2]-a[2]
v2 = c[0]-a[0], c[1]-a[1], c[2]-a[2]
n = cross(v1, v2)
if n[2] < 0:
return
coords = ((b[0], b[1]), (a[0], a[1]), (d[0], d[1]),(c[0], c[1]))
pygame.draw.polygon(screen, green, coords)
def render(points, faces):
coords = []
for point in points:
coords.append(flattenPoint(point))
screen.fill(screencolour)
for face in faces:
createFace((coords[face[0]], coords[face[1]], coords[face[2]], coords[face[3]]))
for face in faces:#must draw outline after all the faces have been drawn
createOutline((coords[face[0]], coords[face[1]], coords[face[2]],coords[face[3]]))
pygame.init()
screen = pygame.display.set_mode((500, 500))
clock = pygame.time.Clock()
rotatedanglex = 0.0
rotatedangley = 0.0
width, height = screen.get_size()
distance = 200.0
scale = 75.0
green = (0, 255, 0)
blue = (0, 0, 255)
screencolour = (0, 0, 0)
run = True
while run:
clock.tick(60)
for event in pygame.event.get():
if event.type == pygame.QUIT:
run = False
screen.fill(0)
render(points, faces)
pygame.display.flip()
rotatedanglex += 0.01
rotatedangley += 0.02
pygame.quit()
exit()
I am trying to detect the lines within an image using the Hough Transformation. Therefore I first create the accumulator like this:
from math import hypot, pi, cos, sin
from PIL import Image
import numpy as np
import cv2 as cv
import math
def hough(img):
thetaAxisSize = 460 #Width of the hough space image
rAxisSize = 360 #Height of the hough space image
rAxisSize= int(rAxisSize/2)*2 #we make sure that this number is even
img = im.load()
w, h = im.size
houghed_img = Image.new("L", (thetaAxisSize, rAxisSize), 0) #legt Bildgroesse fest
pixel_houghed_img = houghed_img.load()
max_radius = hypot(w, h)
d_theta = pi / thetaAxisSize
d_rho = max_radius / (rAxisSize/2)
#Accumulator
for x in range(0, w):
for y in range(0, h):
treshold = 255
col = img[x, y]
if col >= treshold: #determines for each pixel at (x,y) if there is enough evidence of a straight line at that pixel.
for vx in range(0, thetaAxisSize):
theta = d_theta * vx #angle between the x axis and the line connecting the origin with that closest point.
rho = x*cos(theta) + y*sin(theta) #distance from the origin to the closest point on the straight line
vy = rAxisSize/2 + int(rho/d_rho+0.5) #Berechne Y-Werte im hough space image
pixel_houghed_img[vx, vy] += 1 #voting
return houghed_imgcode here
And then call the function like this:
im = Image.open("img3.pgm").convert("L")
houghed_img = hough(im)
houghed_img.save("ho.bmp")
houghed_img.show()
The result seems to be okay:
So here comes the problem. I know want to find the top 3 highest values in the hough space and transform it back to 3 lines. The highest values should be the strongest lines.
Therefore I am first looking for the highest values within the pixel array and take the X and Y values of the maxima I found. From my understading this X and Y values are my rho and theta. I finding the maxima like this:
def find_maxima(houghed_img):
w, h = houghed_img.size
max_radius = hypot(w, h)
pixel_houghed_img = houghed_img.load()
max1, max2, max3 = 0, 0, 0
x1position, x2position, x3position = 0, 0, 0
y1position, y2position, y3position = 0, 0, 0
rho1, rho2, rho3 = 0, 0, 0
theta1, theta2, theta3 = 0, 0, 0
for x in range(1, w):
for y in range(1, h):
value = pixel_houghed_img[x, y]
if(value > max1):
max1 = value
x1position = x
y1position = y
rho1 = x
theta1 = y
elif(value > max2):
max2 = value
x2position = x
x3position = y
rho2 = x
theta2 = y
elif(value > max3):
max3 = value
x3position = x
y3position = y
rho3 = x
theta3 = y
print('max', max1, max2, max3)
print('rho', rho1, rho2, rho3)
print('theta', theta1, theta2, theta3)
# Results of the print:
# ('max', 255, 255, 255)
# ('rho', 1, 1, 1)
# ('theta', 183, 184, 186)
return rho1, theta1, rho2, theta2, rho3, theta3
And now I want to use this rho and theta values to draw the detected lines. I am doing this with the following code:
img_copy = np.ones(im.size)
rho1, theta1, rho2, theta2, rho3, theta3 = find_maxima(houghed_img)
a1 = math.cos(theta1)
b1 = math.sin(theta1)
x01 = a1 * rho1
y01 = b1 * rho1
pt11 = (int(x01 + 1000*(-b1)), int(y01 + 1000*(a1)))
pt21 = (int(x01 - 1000*(-b1)), int(y01 - 1000*(a1)))
cv.line(img_copy, pt11, pt21, (0,0,255), 3, cv.LINE_AA)
a2 = math.cos(theta2)
b2 = math.sin(theta2)
x02 = a2 * rho2
y02 = b2 * rho2
pt12 = (int(x02 + 1000*(-b2)), int(y02 + 1000*(a2)))
pt22 = (int(x02 - 1000*(-b2)), int(y02 - 1000*(a2)))
cv.line(img_copy, pt12, pt22, (0,0,255), 3, cv.LINE_AA)
a3 = math.cos(theta3)
b3 = math.sin(theta3)
x03 = a3 * rho3
y03 = b3 * rho3
pt13 = (int(x03 + 1000*(-b3)), int(y03 + 1000*(a3)))
pt23 = (int(x03 - 1000*(-b3)), int(y03 - 1000*(a3)))
cv.line(img_copy, pt13, pt23, (0,0,255), 3, cv.LINE_AA)
cv.imshow('lines', img_copy)
cv.waitKey(0)
cv.destroyAllWindows()
However, the result seems to be wrong:
So my assuption is that I either do something wrong when I declare the rho and theta values in the find_maxima() function, meaning that something is wrong with this:
max1 = value
x1position = x
y1position = y
rho1 = x
theta1 = y
OR that I am doing something wrong when translating the rho and theta value back to a line.
I would be very thankful if someone can help me with that!
Edit1: As request please finde the original Image where I want to finde the lines from below:
Edit2:
Thanks to the input of #Alessandro Jacopson and #Cris Luegno I was able to make some changes that definitely give me some hope!
In my def hough(img): I was setting the threshold to 255, which means that I only voted for white pixels, which is wrong since I want to look at the black pixels, since these pixels will indicate lines and not the white background of my image. So the calculation of the accumlator in def hough(img): looks like this now:
#Accumulator
for x in range(0, w):
for y in range(0, h):
treshold = 0
col = img[x, y]
if col <= treshold: #determines for each pixel at (x,y) if there is enough evidence of a straight line at that pixel.
for vx in range(0, thetaAxisSize):
theta = d_theta * vx #angle between the x axis and the line connecting the origin with that closest point.
rho = x*cos(theta) + y*sin(theta) #distance from the origin to the closest point on the straight line
vy = rAxisSize/2 + int(rho/d_rho+0.5) #Berechne Y-Werte im hough space image
pixel_houghed_img[vx, vy] += 1 #voting
return houghed_img
This leads to the following Accumulator and the following rho and thea values, when using the find_maxima() function:
# Results of the prints: (now top 8 instead of top 3)
# ('max', 155, 144, 142, 119, 119, 104, 103, 98)
# ('rho', 120, 264, 157, 121, 119, 198, 197, 197)
# ('theta', 416, 31, 458, 414, 417, 288, 291, 292)
The Lines that I can draw from this values look like this:
So this results are much more better but something seems to be still wrong. I have a strong suspicion that still something is wrong here:
for x in range(1, w):
for y in range(1, h):
value = pixel_houghed_img[x, y]
if(value > max1):
max1 = value
x1position = x
y1position = y
rho1 = value
theta1 = x
Here I am setting rho and theta equals [0...w] respectively [0...h]. I think that this is wrong since in the hough space values of X and why Y are not 0, 1,2,3... since we are in a another space. So I assume, that I have to multiply X and Y with something to bring them back in hough space. But this is just an assumption, maybe you guys can think of something else?
Again thank you very much to Alessandro and Cris for helping me out here!
Edit3: Working Code, thanks to #Cris Luengo
from math import hypot, pi, cos, sin
from PIL import Image
import numpy as np
import cv2 as cv
import math
def hough(img):
img = im.load()
w, h = im.size
thetaAxisSize = w #Width of the hough space image
rAxisSize = h #Height of the hough space image
rAxisSize= int(rAxisSize/2)*2 #we make sure that this number is even
houghed_img = Image.new("L", (thetaAxisSize, rAxisSize), 0) #legt Bildgroesse fest
pixel_houghed_img = houghed_img.load()
max_radius = hypot(w, h)
d_theta = pi / thetaAxisSize
d_rho = max_radius / (rAxisSize/2)
#Accumulator
for x in range(0, w):
for y in range(0, h):
treshold = 0
col = img[x, y]
if col <= treshold: #determines for each pixel at (x,y) if there is enough evidence of a straight line at that pixel.
for vx in range(0, thetaAxisSize):
theta = d_theta * vx #angle between the x axis and the line connecting the origin with that closest point.
rho = x*cos(theta) + y*sin(theta) #distance from the origin to the closest point on the straight line
vy = rAxisSize/2 + int(rho/d_rho+0.5) #Berechne Y-Werte im hough space image
pixel_houghed_img[vx, vy] += 1 #voting
return houghed_img, rAxisSize, d_rho, d_theta
def find_maxima(houghed_img, rAxisSize, d_rho, d_theta):
w, h = houghed_img.size
pixel_houghed_img = houghed_img.load()
maxNumbers = 9
ignoreRadius = 10
maxima = [0] * maxNumbers
rhos = [0] * maxNumbers
thetas = [0] * maxNumbers
for u in range(0, maxNumbers):
print('u:', u)
value = 0
xposition = 0
yposition = 0
#find maxima in the image
for x in range(0, w):
for y in range(0, h):
if(pixel_houghed_img[x,y] > value):
value = pixel_houghed_img[x, y]
xposition = x
yposition = y
#Save Maxima, rhos and thetas
maxima[u] = value
rhos[u] = (yposition - rAxisSize/2) * d_rho
thetas[u] = xposition * d_theta
pixel_houghed_img[xposition, yposition] = 0
#Delete the values around the found maxima
radius = ignoreRadius
for vx2 in range (-radius, radius): #checks the values around the center
for vy2 in range (-radius, radius): #checks the values around the center
x2 = xposition + vx2 #sets the spectated position on the shifted value
y2 = yposition + vy2
if not(x2 < 0 or x2 >= w):
if not(y2 < 0 or y2 >= h):
pixel_houghed_img[x2, y2] = 0
print(pixel_houghed_img[x2, y2])
print('max', maxima)
print('rho', rhos)
print('theta', thetas)
return maxima, rhos, thetas
im = Image.open("img5.pgm").convert("L")
houghed_img, rAxisSize, d_rho, d_theta = hough(im)
houghed_img.save("houghspace.bmp")
houghed_img.show()
img_copy = np.ones(im.size)
maxima, rhos, thetas = find_maxima(houghed_img, rAxisSize, d_rho, d_theta)
for t in range(0, len(maxima)):
a = math.cos(thetas[t])
b = math.sin(thetas[t])
x = a * rhos[t]
y = b * rhos[t]
pt1 = (int(x + 1000*(-b)), int(y + 1000*(a)))
pt2 = (int(x - 1000*(-b)), int(y - 1000*(a)))
cv.line(img_copy, pt1, pt2, (0,0,255), 3, cv.LINE_AA)
cv.imshow('lines', img_copy)
cv.waitKey(0)
cv.destroyAllWindows()
Original Image:
Accumulator:
Successful Line Detection:
This part of your code doesn't seem right:
max1 = value
x1position = x
y1position = y
rho1 = value
theta1 = x
If x and y are the two coordinates in the parameter space, they will correspond to rho and theta. Setting rho equal to the value makes no sense. I also don't know why you store x1position and y1position, since you don't use these variables.
Next, you need to transform these coordinates back to actual rho and theta values, inverting the transform you do when writing:
theta = d_theta * vx #angle between the x axis and the line connecting the origin with that closest point.
rho = x*cos(theta) + y*sin(theta) #distance from the origin to the closest point on the straight line
vy = rAxisSize/2 + int(rho/d_rho+0.5) #Berechne Y-Werte im hough space image
The inverse would be:
rho = (y - rAxisSize/2) * d_rho
theta = x * d_theta
First of all, following How to create a Minimal, Complete, and Verifiable example you should post or give a link to your image img3.pgm, if possible.
Then, you wrote that:
# Results of the print:
# ('max', 255, 255, 255)
# ('rho', 1, 1, 1)
# ('theta', 183, 184, 186)
so rho is the same for the three lines and theta is not so different varying between 183 and 186; so the three lines are almost equal each other and this fact does not depend on the method you use to get the line equation and draw it.
According to the tutorial Hough Line Transform it seems to me that your method for finding two points on a line is correct. That's is what the tutorial is suggesting and it seems to me equivalent to your code:
lines = cv2.HoughLines(edges,1,np.pi/180,200)
for rho,theta in lines[0]:
a = np.cos(theta)
b = np.sin(theta)
x0 = a*rho
y0 = b*rho
x1 = int(x0 + 1000*(-b))
y1 = int(y0 + 1000*(a))
x2 = int(x0 - 1000*(-b))
y2 = int(y0 - 1000*(a))
cv2.line(img,(x1,y1),(x2,y2),(0,0,255),2)
I suspect the peak finding algorithm may not be correct.
Your peak finding algorithm finds the location of the largest peak and then the two locations very close to that maximum.
For the sake of simplicity see what happens in just one dimension, a peak finding algorithm is expected to find three peak locations at x=-1, x=0 and x=1 and the peak values should be close to .25, .5 and 1.
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(-2, 2, 1000)
y = np.exp(-(x-1)**2/0.01)+.5*np.exp(-(x)**2/0.01)+.25*np.exp(-(x+1)**2/0.01)
max1, max2, max3 = 0, 0, 0
m1 = np.zeros(1000)
m2 = np.zeros(1000)
m3 = np.zeros(1000)
x1position, x2position, x3position = 0, 0, 0
for i in range(0,1000):
value = y[i]
if(value > max1):
max1 = value
x1position = x[i]
elif(value > max2):
max2 = value
x2position = x[i]
elif(value > max3):
max3 = value
x3position = x[i]
m1[i] = max1
m2[i] = max2
m3[i] = max3
print('xposition',x1position, x2position, x3position )
print('max', max1, max2, max3)
plt.figure()
plt.subplot(4,1,1)
plt.plot(x, y)
plt.ylabel('$y$')
plt.subplot(4,1,2)
plt.plot(x, m1)
plt.ylabel('$max_1$')
plt.subplot(4,1,3)
plt.plot(x, m2)
plt.ylabel('$max_2$')
plt.subplot(4,1,4)
plt.plot(x, m3)
plt.xlabel('$x$')
plt.ylabel('$max_3$')
plt.show()
the output is
('xposition', 0.99899899899899891, 1.0030030030030028, 1.0070070070070072)
('max', 0.99989980471948192, 0.99909860379824966, 0.99510221871862647)
and it is not what expected.
Here you have a visual trace of the program:
To detect multiple peaks in a 2D field you should have a look for example at this Peak detection in a 2D array
So I found this code on line and it does a random Bezier Curve which uses random points. I was trying to make it non random so that it would use static points I got it to use only 4 points which was easy. I have never used PIL before in python and in fact I am slowly learning python. And I have only really done front end work (html, javascript, css, etc) and I just wanted to know if some one can help me.
Here is the code I found on line:
# Random Bezier Curve using De Casteljau's algorithm
# http://en.wikipedia.org/wiki/Bezier_curve
# http://en.wikipedia.org/wiki/De_Casteljau%27s_algorithm
# FB - 201111244
import random
from PIL import Image, ImageDraw
imgx = 500
imgy = 500
image = Image.new("RGB", (imgx, imgy))
draw = ImageDraw.Draw(image)
def B(coorArr, i, j, t):
if j == 0:
return coorArr[i]
return B(coorArr, i, j - 1, t) * (1 - t) + B(coorArr, i + 1, j - 1, t) * t
n = 4 # number of control points
coorArrX = []
coorArrY = []
for k in range(n):
x = (0, imgx - 1)
y = (0, imgy - 1)
coorArrX.append(x)
coorArrY.append(y)
# plot the curve
numSteps = 10000
for k in range(numSteps):
t = float(k) / (numSteps - 1)
x = int(B(coorArrX, 0, n - 1, t))
y = int(B(coorArrY, 0, n - 1, t))
try:
image.putpixel((x, y), (0, 255, 0))
except:
pass
# plot the control points
cr = 3 # circle radius
for k in range(n):
x = coorArrX[k]
y = coorArrY[k]
try:
draw.ellipse((x - cr, y - cr, x + cr, y + cr), (255, 0, 0))
except:
pass
# image.save("BezierCurve.png", "PNG")
image.show() I add this so I can see it right away
Any help if at all would be great.
Ok The long detailed BS that began this all is below the long line. The resulting answer is here.
Your static points are x,y coordinates with the x values and y values placed in seperate arrays (coorArrx and coorArrY respectively) make sure to never use a value = imgx or imy.
# Random Bezier Curve using De Casteljau's algorithm
# http://en.wikipedia.org/wiki/Bezier_curve
# http://en.wikipedia.org/wiki/De_Casteljau%27s_algorithm
# FB - 201111244
import random
from PIL import Image, ImageDraw
imgx = 500
imgy = 500
image = Image.new("RGB", (imgx, imgy))
draw = ImageDraw.Draw(image)
def B(coorArr, i, j, t):
if j == 0:
return coorArr[i]
return B(coorArr, i, j - 1, t) * (1 - t) + B(coorArr, i + 1, j - 1, t) * t
# n = random.randint(3, 6) # number of control points
n=4
#coorArrX = []
#coorArrY = []
#for k in range(n):
# x = random.randint(0, imgx - 1)
# y = random.randint(0, imgy - 1)
# coorArrX.append(x)
# coorArrY.append(y)
coorArrX=[3,129,12,77]
coorArrY=[128,52,12,491]
# plot the curve
numSteps = 10000
for k in range(numSteps):
t = float(k) / (numSteps - 1)
x = int(B(coorArrX, 0, n - 1, t))
y = int(B(coorArrY, 0, n - 1, t))
try:
image.putpixel((x, y), (0, 255, 0))
except:
pass
# plot the control points
cr = 3 # circle radius
for k in range(n):
x = coorArrX[k]
y = coorArrY[k]
try:
draw.ellipse((x - cr, y - cr, x + cr, y + cr), (255, 0, 0))
except:
pass
image.show()
=.........................................................................................=
I am also something of a newcommer to all of this, and I REFUSE to look this up as I see it like you do...a learning experiencee.
But as I look at this code I see something strange
for k in range(n):
x = (0, imgx - 1)
y = (0, imgy - 1)
coorArrX.append(x)
coorArrY.append(y)
Are you sure this part is correct? imgx is defined as 500 elsewhere, and n is 4.
so this could read as
for k in range(4):
x = (0, 500 - 1)
y = (0, 500 - 1)
which (since these values never change at all in this code) means:
x = (0, 499)
y = (0, 499)
on every pass.
So each time they get to :
coorArrX.append(x)
coorArrY.append(y)
They simply keep adding new copies of the same data to the array, so when it is done the array looks like this (internally)
[(0, 499), (0, 499), (0, 499), (0,499)]
What makes this more confusing, is that coorArrX and coorArrY are A) Identical, and B) identical in their basic parts(that is each element is identical). Therefore, when you get to this part of the code:
# plot the control points
cr = 3 # circle radius
for k in range(n):
x = coorArrX[k]
y = coorArrY[k]
try:
draw.ellipse((x - cr, y - cr, x + cr, y + cr), (255, 0, 0))
except:
pass
and you substitute in the values in the arrays, you get:
# plot the control points
cr = 3 # circle radius
for k in range(n):
x = coorArrX[k]
y = coorArrY[k]
try:
draw.ellipse(((0, 499) - 3, (0, 499) - 3, (0, 499) + 3, (0, 499) + 3), (255, 0, 0))
except:
pass
Now this is the part that controls the drawing of the curved segments for the plot, but I do not see how centering an elispe on those impossible coordinate sets can draw anything?!
Broke down and did a copy paste test run. This code is purely bogus, either placed to dupe people into wasting time, or placed where OP found it for same reason.
But it was fun trying!!
From your description, the only problem seems to be about Python basics. I have rearranged the code as follows, so the only things that need to be touched are at bottom. Now, if you want to manually specify 4 control points, go ahead and do it (in the following code I have specified 4 of them myself as an example). You need to understand that, in the original code, coorArrX and coorArrY are just lists, which will hold 4 points each (x and y coordinates, respectively). If you are manually specifying them, there is no point in using a loop to write them. I hope this code is clear enough:
# Random Bezier Curve using De Casteljau's algorithm
# http://en.wikipedia.org/wiki/Bezier_curve
# http://en.wikipedia.org/wiki/De_Casteljau%27s_algorithm
# FB - 201111244
from PIL import Image, ImageDraw
def plot_curve(image, px, py, steps=1000, color=(0, 255, 0)):
def B(coord, i, j, t):
if j == 0:
return coord[i]
return (B(coord, i, j - 1, t) * (1 - t) +
B(coord, i + 1, j - 1, t) * t)
img = image.load()
for k in range(steps):
t = float(k) / (steps - 1)
x = int(B(px, 0, n - 1, t))
y = int(B(py, 0, n - 1, t))
try:
img[x, y] = color
except IndexError:
pass
def plot_control_points(image, px, py, radi=3, color=(255, 0, 0)):
draw = ImageDraw.Draw(image)
for x, y in zip(px, py):
draw.ellipse((x - radi, y - radi, x + radi, y + radi), color)
# Your fixed, manually specified, points.
n = 4
coord_x = [25, 220, 430, 410]
coord_y = [250, 10, 450, 40]
image = Image.new("RGB", (500, 500))
plot_curve(image, coord_x, coord_y)
plot_control_points(image, coord_x, coord_y)
image.save("BezierCurve.png")