How to improve a variable lens blur algorithm in Python OpenCV? - python

I want to emulate the blur of a cheap camera lens (like Holga).
Blur is very weak close to the photo center.
And it's getting more decisive close to corners.
I wrote the code and it works in general.
Input image:
Result image:
But I feel that it could be done better and faster.
I've found a similar question but it still has no answer.
How to improve an algorithm speed and avoid iteration over pixels?
It's not the same as standard Gaussian or 2D filter blur with constant kernel size.
import cv2
import numpy as np
import requests
from tqdm import tqdm
import warnings
def blur(img=None, blur_radius=None, test=False):
# test image loading
if img is None:
print('test mode ON')
print('loading image...')
url = r''
resp = requests.get(url, stream=True).raw
img = np.asarray(bytearray(, dtype="uint8")
img = cv2.imdecode(img, cv2.IMREAD_COLOR)
cv2.imwrite('img_input.png', img)
print('image loaded')
# channels splitting
img_lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(img_lab)
if test:
cv2.imwrite('l_channel.png', l)
print('l channel saved')
# make blur map
height, width = l.shape[:2]
center = np.array([height/2, width/2])
diag = ((height / 2) ** 2 + (width / 2) ** 2) ** 0.5
blur_map = np.linalg.norm(
np.indices(img.shape[:2]) - center[:,None,None] + 0.5,
axis = 0
if blur_radius is None:
blur_radius = int(max(height, width) * 0.03)
blur_map = blur_map / diag
blur_map = blur_map * blur_radius
if test:
blur_map_norm = cv2.normalize(blur_map, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_32F)
cv2.imwrite('blur_map.png', blur_map_norm)
print('blur map saved')
# very inefficient blur algorithm!!!
l_blur = np.copy(l)
for x in tqdm(range(width)):
for y in range(height):
kernel_size = int(blur_map[y, x])
if kernel_size == 0:
l_blur[y, x] = l[y, x]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
cut = l[
max(0, y - kernel_size):min(height, y + kernel_size),
max(0, x - kernel_size):min(width, x + kernel_size)
if cut.shape == kernel.shape:
cut = (cut * kernel).mean()
cut = cut.mean()
l_blur[y, x] = cut
if test: cv2.imwrite('l_blur.png', l_blur); print('l_blur saved')
if test: print('done')
return l_blur

The only way to implement a filter where the kernel is different for every pixel is to create the kernel for each pixel and apply it in a loop, like OP's code does. The Fourier transform does not apply to this case. Python is a very slow language, the same algorithm implemented in a compiled language would be much faster. Unless there is some predefined structure in how the kernel is created at each pixel, there is no way to reduce the complexity of the algorithm.
For example, the uniform filter with a square kernel (commonly called the "box" filter) can be computed based on the integral image, using only 4 additions per pixel. This implementation should be able to choose a different kernel size at each pixel without any additional cost.
DIPlib has an implementation of an adaptive Gaussian filter [disclaimer: I'm an author of DIPlib, but I did not implement this functionality]. Here is the documentation.
This filter applies a Gaussian filter, but the Gaussian kernel is scaled and rotated differently at every pixel.
Lens blur is not a Gaussian, but it's not easy to see the difference by eye in most cases; the difference matters only if there is a very small dot with high contrast.
OP's case would be implemented as follows:
import diplib as dip
img = dip.ImageRead('examples/trui.ics')
blur_map = dip.CreateRadiusSquareCoordinate(img.Sizes())
blur_map /= dip.Maximum(blur_map)
img_blur = dip.AdaptiveGauss(img, [0, blur_map], sigmas=[5])
(the blur_map here is defined differently, I chose a quadratic function of the distance to the center, because I think it looks really nice; use dip.CreateRadiusCoordinate() to reproduce OP's map).
I've chosen a maximum blur of 5 (this is the sigma, in pixels, of the Gaussian, not the footprint of the kernel), and blur_map here scales this sigma with a factor between 0 in the middle and 1 at the corners of the image.
Another interesting effect would be as follows, with increasing blur tangential to each circle centered in the middle of the image, with very little blur radially:
angle_map = dip.CreatePhiCoordinate(img.Sizes())
img_blur = dip.AdaptiveGauss(img, [angle_map, blur_map], sigmas=[8,1])

Here is one way to apply (uniform, non-varying) lens defocus blur in Python/OpenCV by transforming both the image and filter to the Fourier (frequency) domain.
Read the input
Take dft of input to transform to Fourier domain
Draw a white filled circle on a black background the size of the input as a mask (filter kernel). This is the defocus kernel in the spatial domain, i.e. a circular rect function.
Blur the circle slightly to anti-alias the edge
Roll the mask so that the center is at the origin (top left corner) and normalize so that the sum of values = 1
Take dft of mask to transform to Fourier domain where its amplitude profile is a jinx function.
Multiply the two dft images to apply the blur
Take the idft of the product to transform back to spatial domain
Get the magnitude of the real and imaginary components of the product, clip and convert to uint8 as the result
Save the result
import numpy as np
import cv2
# read input and convert to grayscale
img = cv2.imread('lena_512_gray.png', cv2.IMREAD_GRAYSCALE)
# do dft saving as complex output
dft_img = np.fft.fft2(img, axes=(0,1))
# create circle mask
radius = 32
mask = np.zeros_like(img)
cy = mask.shape[0] // 2
cx = mask.shape[1] // 2, (cx,cy), radius, 255, -1)[0]
# blur the mask slightly to antialias
mask = cv2.GaussianBlur(mask, (3,3), 0)
# roll the mask so that center is at origin and normalize to sum=1
mask_roll = np.roll(mask, (256,256), axis=(0,1))
mask_norm = mask_roll / mask_roll.sum()
# take dft of mask
dft_mask_norm = np.fft.fft2(mask_norm, axes=(0,1))
# apply dft_mask to dft_img
dft_shift_product = np.multiply(dft_img, dft_mask_norm)
# do idft saving as complex output
img_filtered = np.fft.ifft2(dft_shift_product, axes=(0,1))
# combine complex real and imaginary components to form (the magnitude for) the original image again
img_filtered = np.abs(img_filtered).clip(0,255).astype(np.uint8)
cv2.imshow("ORIGINAL", img)
cv2.imshow("MASK", mask)
cv2.imshow("FILTERED DFT/IFT ROUND TRIP", img_filtered)
# write result to disk
cv2.imwrite("lena_512_gray_mask.png", mask)
cv2.imwrite("lena_dft_numpy_lowpass_filtered_rad32.jpg", img_filtered)
Mask - Filter Kernel In Spatial Domain:
Result for Circle Radius=4:
Result for Circle Radius=8:
Result for Circle Radius=16:
Result for Circle Radius=32
Using OpenCV for the dft, etc rather than Numpy, the above becomes:
import numpy as np
import cv2
# read input and convert to grayscale
img = cv2.imread('lena_512_gray.png', cv2.IMREAD_GRAYSCALE)
# do dft saving as complex output
dft_img = cv2.dft(np.float32(img), flags = cv2.DFT_COMPLEX_OUTPUT)
# create circle mask
radius = 32
mask = np.zeros_like(img)
cy = mask.shape[0] // 2
cx = mask.shape[1] // 2, (cx,cy), radius, 255, -1)[0]
# blur the mask slightly to antialias
mask = cv2.GaussianBlur(mask, (3,3), 0)
# roll the mask so that center is at origin and normalize to sum=1
mask_roll = np.roll(mask, (256,256), axis=(0,1))
mask_norm = mask_roll / mask_roll.sum()
# take dft of mask
dft_mask_norm = cv2.dft(np.float32(mask_norm), flags = cv2.DFT_COMPLEX_OUTPUT)
# apply dft_mask to dft_img
dft_product = cv2.mulSpectrums(dft_img, dft_mask_norm, 0)
# do idft saving as complex output, then clip and convert to uint8
img_filtered = cv2.idft(dft_product, flags=cv2.DFT_SCALE+cv2.DFT_REAL_OUTPUT)
img_filtered = img_filtered.clip(0,255).astype(np.uint8)
cv2.imshow("ORIGINAL", img)
cv2.imshow("MASK", mask)
cv2.imshow("FILTERED DFT/IFT ROUND TRIP", img_filtered)
# write result to disk
cv2.imwrite("lena_512_gray_mask.png", mask)
cv2.imwrite("lena_dft_opencv_defocus_rad32.jpg", img_filtered)


creating a neon-glow with python numpy

I'm trying to create a neon-effect w/ a source image. I have included three images, the source, my current attempt & a target. The program takes the image, finds the white-edges, & calculates the distance from each pixel to the nearest white-edge (these parts both work fine); from there, I am struggling to find the right saturation and value parameters to create the neon-glow.
From the target image, what I need to happen is basically for the saturation to be 0 on a white-edge, then to dramatically increase the further away it gets from an edge; for value, I need it to be 1 on a white-edge, then to dramatically decrease. I can't figure out the best way to manipulate distance_image (which holds each pixel's distance from the nearest white-edge) such as to achieve these two results with saturation and value.
from PIL import Image
import cv2
import numpy as np
from scipy.ndimage import binary_erosion
from scipy.spatial import KDTree
def find_closest_distance(img):
white_pixel_points = np.array(np.where(img))
tree = KDTree(white_pixel_points.T)
img_meshgrid = np.array(np.meshgrid(np.arange(img.shape[0]),
distances, _ = tree.query(img_meshgrid)
return distances
def find_edges(img):
img_np = np.array(img)
kernel = np.ones((3,3))
return img_np - binary_erosion(img_np, kernel)*255
img ='a.png').convert('L')
edge_image = find_edges(img)
distance_image = find_closest_distance(edge_image)
max_dist = np.max(distance_image)
distance_image = distance_image / max_dist
hue = np.full(distance_image.shape, 0.44*180)
saturation = distance_image * 255
value = np.power(distance_image, 0.2)
value = 255 * (1 - value**2)
new_tups = np.dstack((hue, saturation, value)).astype('uint8')
new_tups = cv2.cvtColor(new_tups, cv2.COLOR_HSV2BGR)
new_img = Image.fromarray(new_tups, 'RGB').save('out.png')
The following images show the source data (left), the current result (middle), and the desired result (right).
I think I would do this with convolution instead. Convolving an image with a Gaussian kernel is a common way to blur an image. You can do it in various ways, but maybe the easiest to use is scipy.ndimage.gaussian_filter. Here's one way to implement all this, see if you like the result.
from PIL import Image
from io import BytesIO
import requests
import numpy as np
r = requests.get('')
img =
imarray = np.asarray(img)[..., 0] / 255
This is your first image, the white rectangles.
Now I'll make those outlines, do the blur, create the colour images, and combine them:
from scipy.ndimage import binary_erosion
from scipy.ndimage import gaussian_filter
eroded = binary_erosion(imarray, iterations=3)
# Make the outlined rectangles.
outlines = imarray - eroded
# Convolve with a Gaussian to effect a blur.
blur = gaussian_filter(outlines, sigma=11)
# Make binary images into neon green.
neon_green_rgb = [0.224, 1.0, 0.0784]
outlines = outlines[:, :, None] * neon_green_rgb
blur = blur[:, :, None] * neon_green_rgb
# Combine the images and constrain to [0, 1].
blur_strength = 3
glow = np.clip(outlines + blur_strength*blur, 0, 1)
And look at it:
import matplotlib.pyplot as plt
You'll want to adjust the sigma of the Gaussian (its width), the colours, blur strength, and so on. Hope it helps.
Here is one way to do that in Python/OpenCV.
Read the input
Convert to grayscale
Threshold to binary
Get edges of desired thickness using morphology gradient
Invert the edges so black on white background
Do distance transform
Stretch to full dynamic range
Normalize to range 0 to 1 by dividing by the maximum value
Attenuate using a power law to control distance roll-off (ramping)
Create a color image of the size of the input and the desired color
Multiply the attenuated image by the color image
Save results
import cv2
import numpy as np
import skimage.exposure
# read input
img = cv2.imread('rectangles.png')
# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# threshold
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# do morphology gradient to get edges and invert so black edges on white background
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
edges = cv2.morphologyEx(thresh, cv2.MORPH_GRADIENT, kernel)
edges = 255 - edges
# get distance transform
dist = edges.copy()
distance = cv2.distanceTransform(dist, distanceType=cv2.DIST_L2, maskSize=3)
print(np.amin(distance), np.amax(distance))
# stretch to full dynamic range and convert to uint8 as 3 channels
stretch = skimage.exposure.rescale_intensity(distance, in_range=('image'), out_range=(0,255))
# invert
stretch = 255 - stretch
max_stretch = np.amax(stretch)
# normalize to range 0 to 1 by dividing by max_stretch
stretch = (stretch/max_stretch)
# attenuate with power law
pow = 4
attenuate = np.power(stretch, pow)
attenuate = cv2.merge([attenuate,attenuate,attenuate])
# create a green image the size of the input
color_img = np.full_like(img, (0,255,0), dtype=np.float32)
# multiply the color image with the attenuated distance image
glow = (color_img * attenuate).clip(0,255).astype(np.uint8)
# save results
cv2.imwrite('rectangles_edges.png', edges)
cv2.imwrite('rectangles_stretch.png', (255*stretch).clip(0,255).astype(np.uint8))
cv2.imwrite('rectangles_attenuate.png', (255*attenuate).clip(0,255).astype(np.uint8))
cv2.imwrite('rectangles_glow.png', glow)
# view results
cv2.imshow("EDGES", edges)
cv2.imshow("STRETCH", stretch)
cv2.imshow("ATTENUATE", attenuate)
cv2.imshow("RESULT", glow)
Edges (inverted):
Stretched Distance Transform:
Attenuated Distance Transform:
Glow Result:

How to fix or balance the image with an white color overlay?

Hi all I have this image that has a white overlay on it, I am trying to equalize the color or in other words remove the white overlay so it is one color. I am new to image processing and I thought maybe to extract the color channel then using the equalize histogram it if that works? What would be the best approach to this? Thanks!
Here is a simple attempt to match the mean of the inner region with that of the outer region. It does not work terribly well because it is a global change and does not take into account brightness variation across the image. But you can play around with it to start.
It takes a mask image and gets the means of the inner and outer regions. Then gets the difference and subtracts from the inner region.
import cv2
import numpy as np
# load image
img = cv2.imread('writer.jpg', cv2.IMREAD_GRAYSCALE)
# rectangle coordinates
x = 61
y = 8
w = 663
h = 401
# create mask for inner area
mask = np.zeros_like(img, dtype=np.uint8)
mask[y:y+h, x:x+w] = 255
# compute means of inner rectangle region and outer region
mean_inner = np.mean(img[np.where(mask == 255)])
mean_outer = np.mean(img[np.where(mask == 0)])
# compute difference in mean values
bias = 0
diff = mean_inner - mean_outer + bias
# print mean of each
print("mean of inner region:", mean_inner)
print("mean of outer region:", mean_outer)
print("difference:", diff)
# subtract diff from img
img_diff = cv2.subtract(img, diff)
# blend with original using mask
result = np.where(mask==255, img_diff, img)
# save resulting masked image
cv2.imwrite('writer_balanced.jpg', result)
# show results
cv2.imshow("IMAGE", img)
cv2.imshow("MASK", mask)
cv2.imshow("RESULT", result)
mean of inner region: 195.44008004122423
mean of outer region: 154.1415758021116
difference: 41.298504239112646
You can change the bias to make the inner region lighter or darker over all.
Here is the next order of improvement. Simply measure the darkest area inside and outside close to each other and then do the same for the brightest areas. Then compute a linear transformation of intensities from the measurements and apply to the image. Then use the mask to mix the two images.
This is similar to what #Christoph Rackwitz proposed. Except he did very rigorous computation and I simply did some visual measurements.
So for example, here are the places that I measured:
import cv2
import numpy as np
# load image
img = cv2.imread('writer.jpg', cv2.IMREAD_GRAYSCALE)
# rectangle coordinates
x = 62
y = 8
w = 662
h = 401
# create mask for inner area
mask = np.zeros_like(img, dtype=np.uint8)
mask[y:y+h, x:x+w] = 255
# measure darkest and lightest neighboring regions inside and outside mask area of input
# darkest from chair in lower right
# brightest from background wall in upper left
# compute linear transformation equation coefficients
# let x1=in1, y1=out1, x2=in2, y2=out2
# y1=a*x1+b
# y2=a*x2+b
# y2-y1 = a*(x2-x1)
# a = (y2-y1)/(x2-x1)
# b = y2 - a*x2
x1 = in1
y1 = out1
x2 = in2
y2 = out2
a = (y2-y1)/(x2-x1)
b = y2 - a*x2
print("a:", a, "b:", b)
# process image with linear transformation
modified = (a * img.astype(np.float64) + b).clip(0,255).astype(np.uint8)
# blend with original using mask
result = np.where(mask==255, modified, img)
# save resulting masked image
cv2.imwrite('writer_balanced2.jpg', result)
# show results
cv2.imshow("IMAGE", img)
cv2.imshow("MASK", mask)
cv2.imshow("RESULT", result)
here's my best attempt:
Grayscale histograms of the reference area, and the area to fix:
You see, the blending with white caused the histogram to be squeezed and moved rightward. An original value of 255 was mapped to 255, but the darker the original value, the more it is brightened. The mask area contains samples of presumably black (the backrest) that are also black in the untouched reference area, so we can estimate what's going on... original black was mapped to a grayscale value of ~88.
I find the minima in both spectra and use those for a linear mapping:
refmax = 255
refmin = gray[~mask].min()
fixmax = 255
fixmin = gray[mask].min()
composite = im.copy()
composite[mask] = np.clip((composite[mask] - fixmin) / (fixmax - fixmin) * (refmax - refmin) + refmin, 0, 255)
And that's it.

How to demonstrate the Gaussian Kernal used in opencv GausssianBlurr, using the instruction `GaussianBlur()` from OpenCV?

I want to demonstrate the Gaussian Kernel used in openCV. cv2.GaussianBlurr(img, kernel_size, sigma) for explanation purposes.
I know how to demonstrate the image which results after applying the blur, and that is not my objective here.
My objective is to demonstrate the kernel automatically for any used sigma, and any used kernel size!
I have seen a code(mentioned down) but I prefer to use something more related to instruction used in OpenCV, rather than just a general mathematical dependent approach.
The expected output kernel is something like this:
import cv2
import numpy as np
# Read Image
img_path = 'image.jpg'
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Gaussian Blurr
Kernel = np.ones((15,15))
sigma = 2
Blurred_Image = cv2.GaussianBlur(img, (Kernel.shape[0], Kernel.shape[1]), sigma)
Gaussian Kernel Manual Code:
def dnorm(x, mu, sd):
return 1 / (np.sqrt(2 * np.pi) * sd) * np.e ** (-np.power((x - mu) / sd, 2) / 2)
def gaussian_kernel(size, sigma=1, verbose=False):
kernel_1D = np.linspace(-(size // 2), size // 2, size)
for i in range(size):
kernel_1D[i] = dnorm(kernel_1D[i], 0, sigma)
kernel_2D = np.outer(kernel_1D.T, kernel_1D.T)
kernel_2D *= 1.0 / kernel_2D.max()
if verbose:
plt.imshow(kernel_2D, interpolation='none',cmap='gray')
return kernel_2D
Here is one way in Python/OpenCV.
- Read the input
- Create a delta image (one white pixel in the center of a black background)
- Blur the image
- List item
- Resize the image to enlarge it
- Stretch the image to full dynamic range
- Save the result
import cv2
import numpy as np
import skimage.exposure as exposure
# create delta image
dims = 30
dims2 = 30 // 2
delta = np.zeros((dims,dims,3), dtype=np.float32)
delta[dims2:dims2+1, dims2:dims2+1] = (255,255,255)
# blur image
blur = cv2.GaussianBlur(delta, (0,0), sigmaX=5, sigmaY=5)
# resize 16x
dims4x = dims * 16
resized = cv2.resize(blur, (dims4x,dims4x), interpolation = cv2.INTER_AREA)
# stretch to full dynamic range
result = exposure.rescale_intensity(resized, in_range='image', out_range=(0,255)).astype(np.uint8)
# save image
# show the images
cv2.imshow("delta", delta)
cv2.imshow("result", result)
Delta image:

How to blur the image according to segmentation map

Forgive me if I am unable to explain well because I am not native speaker.
I am working on blurring the part of image according to the white part of segmentation map. For example here is my segmentation image ( bmp image ).
Now what I want is to blur the part of original image where the pixels are white in the segmentation map. I just wrote the following code to so.
mask = mask >= 0.5
mask = np.reshape(mask, (512, 512))
mh, mw = 512, 512
mask_n = np.ones((mh, mw, 3))
mask_n[:,:,0] *= mask
mask_n[:,:,1] *= mask
mask_n[:,:,2] *= mask
# discard padded area
ih, iw, _ = image_n.shape
delta_h = mh - ih
delta_w = mw - iw
top = delta_h // 2
bottom = mh - (delta_h - top)
left = delta_w // 2
right = mw - (delta_w - left)
mask_n = mask_n[top:bottom, left:right, :]
# addWeighted
image_n = image_n *1 + cv2.blur(mask_n * 0.8, (800, 800))
Please help me, Thanks.
You can do it in the following steps:
Load original image and mask image.
Blur the whole original image and save it in a different variable.
Use np.where() method to select the pixels from the mask where you want blurred values and then replace it.
See the sample code below:
import cv2
import numpy as np
img = cv2.imread("./image.png")
blurred_img = cv2.GaussianBlur(img, (21, 21), 0)
mask = cv2.imread("./mask.png")
output = np.where(mask==np.array([255, 255, 255]), blurred_img, img)
cv2.imwrite("./output.png", output)
Here's an alternative to the solution proposed by #Chris Henri. It relies on scipy.ndimage.filters.gaussian_filter and NumPy's boolean indexing:
from skimage import io
import numpy as np
from scipy.ndimage.filters import gaussian_filter
import matplotlib.pyplot as plt
mask = io.imread('')
img = np.random.random(size=mask.shape[:2])
idx = mask.min(axis=-1) == 255
blurred = gaussian_filter(img, sigma=3)
blurred[~idx] = 0
fig, axs = plt.subplots(1, 3, figsize=(12, 4))
for ax, im in zip(axs, [img, mask, blurred]):
ax.imshow(im, cmap='gray')
Here is yet another alternative to do so, useful though when you have a 2D segmentation array indicating the segmented object class of pixel (mutually exclusive) for every index (i,j), and a 3D image on which you want to apply the blur.
def gaussian_blur(image: np.ndarray,
segmentation: np.ndarray,
classes_of_interest: list,
gaussian_variance: float = 10) -> np.ndarray:
Function that applies a gaussian filter to the image,
specifically to the pixels contained in the possible segmented classes.
Returns an image (np.ndarray) where the gaussian blur intensity is
regulated by the parameter gaussian_variance.
#Apply masking to select only the indices where the specific class is present
mask = np.isin(segmentation, classes_of_interest)
#Creating a 3D mask for all the channels and place it at channel axis
mask_3d = np.stack([mask,mask,mask], axis=2)
#Mask the image according to the 3D mask
img_masked = np.where(mask_3d, img, 0).astype(np.int8)
#Define gaussian blur noisy function
def noisy(image):
row,col,ch= image.shape
mean = 0
var = gaussian_variance
sigma = np.sqrt(var)
gauss = np.random.normal(mean,sigma,(row,col,ch))
gauss = gauss.reshape(row,col,ch)
#Sums up gaussian noise to img
noisy = image + gauss
return noisy.astype(np.uint8)
#Blurs the masked segmentation
img_masked_noisy = noisy(img_masked)
#Puts the blurred part back in the original image as substitution
img[mask_3d] = img_masked_noisy[mask_3d]
return img
And here is a toy example:
import numpy as np
possible_classes = [1,2,3]
#Setting up a toy example with a small image,
#shape (N, N, 3)
img = np.floor(np.random.random(size=(8,8,3)) * 256).astype(np.uint8)
#Setting up a fake segmentation with 3 mutually exclusive possible classes,
#shape (N, N)
segmentation = np.random.choice(possible_classes, size=(8,8))
new_img_blurred = gaussian_blur(img,
segmentation= segmentation,
classes_of_interest= possible_classes[:2])

Writing robust (size invariant) circle detection (Watershed)

Edit: Quick Summary so far: I use the watershed algorithm but I have probably a problem with threshold. It didn't detect the brighter circles.
New: Fast radial symmetry transform approach which didn't quite work eiter (Edit 6).
I want to detect circles with different sizes. The use case is to detect coins on an image and to extract them solely. -> Get the single coins as single image files.
For this I used the Hough Circle Transform of open-cv:
import sys
import cv2 as cv
import numpy as np
def main(argv):
## [load]
default_file = "data/newcommon_1euro.jpg"
filename = argv[0] if len(argv) > 0 else default_file
# Loads an image
src = cv.imread(filename, cv.IMREAD_COLOR)
# Check if image is loaded fine
if src is None:
print ('Error opening image!')
print ('Usage: [image_name -- default ' + default_file + '] \n')
return -1
## [load]
## [convert_to_gray]
# Convert it to gray
gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
## [convert_to_gray]
## [reduce_noise]
# Reduce the noise to avoid false circle detection
gray = cv.medianBlur(gray, 5)
## [reduce_noise]
## [houghcircles]
rows = gray.shape[0]
circles = cv.HoughCircles(gray, cv.HOUGH_GRADIENT, 1, rows / 8,
param1=100, param2=30,
minRadius=0, maxRadius=120)
## [houghcircles]
## [draw]
if circles is not None:
circles = np.uint16(np.around(circles))
for i in circles[0, :]:
center = (i[0], i[1])
# circle center, center, 1, (0, 100, 100), 3)
# circle outline
radius = i[2], center, radius, (255, 0, 255), 3)
## [draw]
## [display]
cv.imshow("detected circles", src)
## [display]
return 0
if __name__ == "__main__":
I tried all parameters (rows, param1, param2, minRadius, and maxRadius) to optimize the results. This worked very well for one specific image but other images with different sized coins didn't work.
circles = cv.HoughCircles(gray, cv.HOUGH_GRADIENT, 1, rows / 16,
param1=100, param2=30,
minRadius=0, maxRadius=120)
With the same parameters:
Changed to rows/8
I also tried two other approaches of this thread: writing robust (color and size invariant) circle detection with opencv (based on Hough transform or other features)
The approach of fireant leads to this result:
The approach of fraxel didn't work either.
For the first approach: This happens with all different sizes and also the min and max radius.
How could I change the code, so that the coin size is not important or that it finds the parameters itself?
Thank you in advance for any help!
I tried the watershed algorithm of Open-cv, as suggested by Alexander Reynolds:
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
img = cv.imread('data/P1190263.jpg')
gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
ret, thresh = cv.threshold(gray,0,255,cv.THRESH_BINARY_INV+cv.THRESH_OTSU)
# noise removal
kernel = np.ones((3,3),np.uint8)
opening = cv.morphologyEx(thresh,cv.MORPH_OPEN,kernel, iterations = 2)
# sure background area
sure_bg = cv.dilate(opening,kernel,iterations=3)
# Finding sure foreground area
dist_transform = cv.distanceTransform(opening,cv.DIST_L2,5)
ret, sure_fg = cv.threshold(dist_transform,0.7*dist_transform.max(),255,0)
# Finding unknown region
sure_fg = np.uint8(sure_fg)
unknown = cv.subtract(sure_bg,sure_fg)
# Marker labelling
ret, markers = cv.connectedComponents(sure_fg)
# Add one to all labels so that sure background is not 0, but 1
markers = markers+1
# Now, mark the region of unknown with zero
markers[unknown==255] = 0
markers = cv.watershed(img,markers)
img[markers == -1] = [255,0,0]
cv.imshow("detected circles", img)
It works very well on the test image of the open-cv website:
But it performs very bad on my own images:
I can't really think of a good reason why it's not working on my images?
Edit 2:
As suggested I looked at the intermediate images. The thresh looks not good in my opinion. Next, there is no difference between opening and dist_transform. The corresponding sure_fg shows the detected images.
Edit 3:
I tried all distanceTypes and maskSizes I could find, but the results were quite the same (
Edit 4:
Furthermore, I tried to change the (first) threshold function. I used different threshold values instead of the OTSU Function. The best one was with 160, but it was far from good:
In the tutorial it looks like this:
It seems like the coins are somehow too bright to be detected by this algorithm, but I don't know how to improve it?
Edit 5:
Changing the overall contrast and brightness of the image (with cv.convertScaleAbs) didn't improve the results. Increasing the contrast however should increase the "difference" between foreground and background, at least on the normal image. But it even got worse. The corresponding threshold image didn't improved (didn't get more white pixel).
Edit 6: I tried another approach, the fast radial symmetry transform (from here
import cv2
import numpy as np
def gradx(img):
img = img.astype('int')
rows, cols = img.shape
# Use hstack to add back in the columns that were dropped as zeros
return np.hstack((np.zeros((rows, 1)), (img[:, 2:] - img[:, :-2]) / 2.0, np.zeros((rows, 1))))
def grady(img):
img = img.astype('int')
rows, cols = img.shape
# Use vstack to add back the rows that were dropped as zeros
return np.vstack((np.zeros((1, cols)), (img[2:, :] - img[:-2, :]) / 2.0, np.zeros((1, cols))))
# Performs fast radial symmetry transform
# img: input image, grayscale
# radii: integer value for radius size in pixels (n in the original paper); also used to size gaussian kernel
# alpha: Strictness of symmetry transform (higher=more strict; 2 is good place to start)
# beta: gradient threshold parameter, float in [0,1]
# stdFactor: Standard deviation factor for gaussian kernel
# mode: BRIGHT, DARK, or BOTH
def frst(img, radii, alpha, beta, stdFactor, mode='BOTH'):
mode = mode.upper()
assert mode in ['BRIGHT', 'DARK', 'BOTH']
dark = (mode == 'DARK' or mode == 'BOTH')
bright = (mode == 'BRIGHT' or mode == 'BOTH')
workingDims = tuple((e + 2 * radii) for e in img.shape)
# Set up output and M and O working matrices
output = np.zeros(img.shape, np.uint8)
O_n = np.zeros(workingDims, np.int16)
M_n = np.zeros(workingDims, np.int16)
# Calculate gradients
gx = gradx(img)
gy = grady(img)
# Find gradient vector magnitude
gnorms = np.sqrt(np.add(np.multiply(gx, gx), np.multiply(gy, gy)))
# Use beta to set threshold - speeds up transform significantly
gthresh = np.amax(gnorms) * beta
# Find x/y distance to affected pixels
gpx = np.multiply(np.divide(gx, gnorms, out=np.zeros(gx.shape), where=gnorms != 0),
gpy = np.multiply(np.divide(gy, gnorms, out=np.zeros(gy.shape), where=gnorms != 0),
# Iterate over all pixels (w/ gradient above threshold)
for coords, gnorm in np.ndenumerate(gnorms):
if gnorm > gthresh:
i, j = coords
# Positively affected pixel
if bright:
ppve = (i + gpx[i, j], j + gpy[i, j])
O_n[ppve] += 1
M_n[ppve] += gnorm
# Negatively affected pixel
if dark:
pnve = (i - gpx[i, j], j - gpy[i, j])
O_n[pnve] -= 1
M_n[pnve] -= gnorm
# Abs and normalize O matrix
O_n = np.abs(O_n)
O_n = O_n / float(np.amax(O_n))
# Normalize M matrix
M_max = float(np.amax(np.abs(M_n)))
M_n = M_n / M_max
# Elementwise multiplication
F_n = np.multiply(np.power(O_n, alpha), M_n)
# Gaussian blur
kSize = int(np.ceil(radii / 2))
kSize = kSize + 1 if kSize % 2 == 0 else kSize
S = cv2.GaussianBlur(F_n, (kSize, kSize), int(radii * stdFactor))
return S
img = cv2.imread('data/P1190263.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
result = frst(gray, 60, 2, 0, 1, mode='BOTH')
cv2.imshow("detected circles", result)
I only get this nearly black output (it has some very dark grey shadows). I don't know what to change and would be thankful for help!

