def normalize_brightness(img: Image) -> Image:
"""
Normalize the brightness of the given Image img by:
computing the average brightness of the picture:
- this can be done by calculating the average brightness of each pixel
in img (the average brightness of each pixel is the sum of the values
of red, blue and green of the pixel, divided by 3 as a float division)
- the average brightness of the picture is then the sum of all the
pixel averages, divided by the product of the width and height of img
find the factor, let's call it x, which we can multiply the
average brightness by to get the value of 128.
multiply the colors in each pixel by this factor x
"""
img_width, img_height = img.size
pixels = img.load() # create the pixel map
h = 0
for i in range(img_width):
for j in range(img_height):
r, g, b = pixels[i, j]
avg = sum(pixels[i, j]) / 3
h += avg
total_avg = int(h / (img_width * img_height))
x = 128 // total_avg
r, g, b = pixels[i, j]
pixels[i, j] = (r * x, g * x, b * x)
return img
I am a little lost as to what I am doing wrong can someone help?
You really should avoid for loops when image processing with Python whenever possible because it is seriously slow, verbose, harder to read and more likely to contain errors. Try to use vectorised Numpy functions, or OpenCV or PIL built-in functions.
#!/usr/bin/env python3
from PIL import Image
import numpy as np
def normalize(im):
"""Normalise brightness of image"""
# Convert to Numpy array
na = np.array(im, dtype=np.float32)
# Calculate average brightness
avg = na.mean()
# Calculate factor x
x = 128 / avg
# Scale whole array as float since likely fractional
na *= x
# Convert back to PIL Image and return
return Image.fromarray(na.astype(np.uint8))
# Load image and normalize
im = Image.open('start.png').convert('RGB')
result = normalize(im)
result.save('result.png')
This code runs in around 800 microseconds on my machine whereas any version with a for loop requires around 70x longer.
Input image:
Result:
Your calculation code to get the factor seems okay, processing every pixel to get the average of sum of averages.
However, your modification code to adjust the brightness is not done within a similar loop so it will operate on one pixel, and I'm not even sure that pixel is even within the image. You should do that within a loop as well:
for i in range(img_width):
for j in range(img_height):
(r, g, b) = pixels[i, j]
pixels[i, j] = (r * x, g * x, b * x)
This should replace the third-last and second-last lines of what you have at the moment (between x = ... and return ...). So what you would end up with is:
img_width, img_height = img.size
pixels = img.load() # create the pixel map
h = 0
for i in range(img_width):
for j in range(img_height):
r, g, b = pixels[i, j]
avg = sum(pixels[i, j]) / 3
h += avg
total_avg = int(h / (img_width * img_height))
x = 128 // total_avg
# == New stuff below
for i in range(img_width):
for j in range(img_height):
(r, g, b) = pixels[i, j]
pixels[i, j] = (r * x, g * x, b * x)
# == New stuff above
return img
A few other things to look in to:
First, I'm not sure if returning img is the right thing to do here, unless pixels is a reference to (not copy of) the pixels in the image. You may want to check up on that as well.
Further, it may be possible that the value for [rgb] * x gives you something more than 255 for certain input data sets. If that's the case, you probably want to clamp them into the range 0..255 to ensure this doesn't happen. Something like (replacing the "new stuff" in the code above):
for i in range(img_width):
for j in range(img_height):
# Get original pixel.
(r, g, b) = pixels[i, j]
# Scale with upper limit.
r = min(255, r * x)
g = min(255, g * x)
b = min(255, b * x)
# Replace pixel with scaled one.
pixels[i, j] = (r, g, b)
At first, thanks to paxdiablo for sharing his answer.
I would just like to improve on the answer.
The calculation of the average can be optimized using list comprehension like:
x = 128 // (sum([sum(pixels[i, j]) / 3 for i in range(img_width) for j in range(img_height)]) / (img_width * img_height))
So my complete answer will be:
Normalize the brightness of the given Image
img_width, img_height = img.size
pixels = img.load() # create the pixel map
x = 128 // (sum([sum(pixels[i, j]) / 3 for i in range(img_width) for j in range(img_height)]) / (img_width * img_height))
for i in range(img_width):
for j in range(img_height):
r, g, b = pixels[i, j]
pixels[i, j] = [min(255, r * x), min(255, g * x), min(255, b * x)]
return img
Related
I am at a loss here, sorry if this question seems stupid.
I have the following code that should return a blurred image of a cat:
def convolve(image, kernel): #IMPLEMENT HERE
image_copy=image.copy()
final_img=np.empty_like(image_copy) #make empty array of same size
for ix in range(len(image)):
for iy in range(len(image[ix])):
for iz in range(len(image[ix][iy])):
# print(new_kernel[ix][iy])
final_img[ix, iy, iz]=np.multiply(image[ix, iy, iz], kernel[ix%len(kernel), iy%len(kernel),iz])*1/9
return final_img
new_kernel=np.ones((3,3,3))
new_kernel[0,1]=2
new_kernel[1,0]=2
new_kernel[1,2]=2
new_kernel[1,1]=4
new_kernel[2,1]=2
new_kernel[2]=3
plt.imshow(convolve(image, new_kernel))
Instead, it returns a darkened version of the image. How do I fix this?
Blurring an image is the equivalent of reducing the image features, i.e we are either reducing the number of pixels, reducing the sharpness of pixels or both, that said your function does not do either, your function is simply going over the image and updating the channel, specifically if our image is
white = [255, 255, 255]
black = [0, 0, 0]
image = [[white, white, white],
[white, black, white],
[white, white, white]]
Then your code will change it into
image = [[white / 9, 2 * white / 2, white / 9],
[2 * white / 9, 4 * black / 9, 2 * white / 9],
[3 * white / 9, 3 * white / 9, 3 * white / 9]]
What you are doing is you are reducing the value of each pixel and since 0 is black and 255 is white, the closer we are to 0 the darker the pixel, that is why your image is appearing darker.
Back to blurring, we really just need 2 things:
1. Convolution
Convolving an image with a kernel of size k x k x c means we are taking a piece of the image of size k x k x c and multiplying it element-wise by the kernel
Then in order to blur an image we need to sum the result of our convolution and divide by k x k
If k = 3, then convolving our image with the kernel of all ones can be visualized as shown below
We can control how blurry the image gets by not convolving all the pixels and skipping some, this will force the resolution to decrease.
2. Choosing right kernel
The most basic blurring kernel is the all ones kernel, it just averages out every pixel, but you can experiment with others
Here is an implementation of a blurring convolution operator
import numpy as np
def blur(img, kernel, stride = 3):
h, w, c = img.shape
new_img = []
for i in range(0, h, stride):
row = []
for j in range(0, w, stride):
neighborhood = get_neighbors(img, (i, j), c)
average = get_average(neighborhood, kernel)
row.append(average)
new_img.append(row)
return np.array(new_img)
### Helper functions ###
def get_neighbors(img, loc, c):
"""
We find all the neighbors of the pixel located at loc = (x, y)
within a 1 pixel radius
Inputs
:img: <np.ndarray> of size (h, w, c) representing the image
:loc: <tuple<int>> = (x, y) indicating pixel location
:c: <int> number of channels (1 for black and white, 3 for rgb images)
Outputs
:returns: <np.ndarray> of size (3, 3, c) representing the neighborhood of a pixel, if a pixel is on a boundary, we will assume missing pixels are 0
"""
h, w, _ = img.shape
x, y = loc
neighborhood = []
for i in range(-1, 2):
x_ = x + i
row = []
if 0 <= x_ < h:
for j in range(-1, 2):
y_ = y + j
if 0 <= y_ < w: row.append(img[x_, y_])
else: row.append(np.zeros(c, dtype=np.int32))
else:
row = np.zeros((3, c), dtype=np.int32)
neighborhood.append(row)
return np.array(neighborhood)
def get_average(neighborhood, kernel):
"""
Computes the average of a neighborhood using given kernel
Ex:
If kernel is [[1 1 1]
[1 1 1]
[1 1 1]]
Then image will become
[[1 1 1]
[1 0 1] -> [[8/9]]
[1 1 1]]
"""
k_h, k_w, _ = kernel.shape
n = k_h * k_w
avg = (neighborhood * kernel) // n
avg = avg.sum(0, dtype=np.int32).sum(0, dtype=np.int32)
return avg
I recommend you try to implement your own blurring convolution operation and use this only as a reference.
This question already has answers here:
Segmenting License Plate Characters
(2 answers)
Closed 2 years ago.
So, I need to build a homomorphic filter, but my code seems to be wrong. I don't know if it's execution or if it's some detail I don't know about in python, but I do know that it's wrong. I'd love som insights on what I can do to improve it.
I'm using
image as input reference, because it's on the Ricardo C. Gonzales book of DIP and I know how the output should look like. I'm even using the same parameters the book used in it's filter but it isn't working.
Gonzalez's Input and output, respectively:
.
My output:
My code is as follows:
# coding: utf-8
import cv2
import numpy as np
from matplotlib import pyplot as plt
tss = cv2.imread("The_Seventh_Seal_1.jpg", 0)
mc = cv2.imread("mussels_cave_050.JPG", 0)
sh = cv2.imread("shelter_homomorphic.bmp", 0)
pet = cv2.imread("pet.png", 0)
def filtro_gaussiano_livro(img, gl, gh, inc, Dz):
im = np.copy(img)
P = im.shape[0] / 2
Q = im.shape[1] / 2
h = np.zeros(im.shape)
U, V = np.meshgrid(range(im.shape[0]), range(im.shape[1]), sparse=False, indexing='ij')
d = ((U - P) ** 2 + (V - Q) ** 2).astype(float)
d0 = Dz
c = inc
h = (gh - gl) * (1 - (np.exp(-c * (d / (d0 ** 2))))) + gl
return h
def filtro_gaussiano(img, Dz):
im = np.copy(img)
P = im.shape[0] / 2
Q = im.shape[1] / 2
h = np.zeros(im.shape)
U, V = np.meshgrid(range(im.shape[0]), range(im.shape[1]), sparse=False, indexing='ij')
d = (((U - P) ** 2) + ((V - Q) ** 2)).astype(float)
h = 1 - np.exp(-(d / (2 * (Dz ** 2))))
return h
def uint8_conv(img):
mat = np.copy(img)
for i in range(mat.shape[0]):
for j in range(mat.shape[1]):
if mat[i, j] < 0:
mat[i, j] = 0
elif mat[i, j] > 255:
mat[i, j] = 255
else:
mat[i, j] = mat[i, j]
return np.uint8(mat)
def reescalona(img, min, max):
mat = np.copy(img)
ph = cv2.add(min, (
cv2.divide((cv2.multiply((cv2.subtract(mat, np.min(mat))), (max - min))), (np.max(mat) - np.min(mat)))))
rtn = np.uint8(ph)
return rtn
def homomorfica(img, l, s):
im = np.float64(np.copy(img))
cv2.imshow("BORDER", im)
if s == 0:
f = filtro_gaussiano(im, l)
elif s == 1:
f = filtro_gaussiano_livro(im, 0.05, 3.5, 1, l)
cv2.imshow("gauss " + str(s), f)
im_log = np.log1p(im)
Im_shift = np.fft.fftshift(np.fft.fft2(im_log))
Im_fft_filt = np.multiply(f, Im_shift)
cv2.imshow("FFT Shift", uint8_conv(np.real(Im_shift)))
Im_filt = np.real(np.fft.ifft2(np.fft.ifftshift(Im_fft_filt)))
Im = np.exp(Im_filt) - 1
Im = reescalona(Im, 0, 255)
return uint8_conv(Im)
# def notch(img):
raio = 2500
i = pet
a = homomorfica(i, raio, 0)
b = homomorfica(i, raio, 1)
cv2.imshow("Imagem original", i)
cv2.imshow("Filtro homofobico comum", a)
cv2.imshow("Filtro homofobico do livro", b)
k = 0
while k != 27:
k = cv2.waitKey(0)
cv2.destroyAllWindows()
Here is one way to do homomorphic filtering in the frequency domain using Python/Numpy/OpenCV.
I believe your issue is just your filtering. I will show two different filters below that vary in radius of the circle and Gaussian filtering.
Read the input as grayscale
Take the natural log of the input
Do FFT to real/imaginary components
Shift the FFT so DC point is in the center
Create a black circular mask on a white background of small radius
Apply Gaussian blur to the mask
Shift the FFT so DC point is at the top left corner
Do IFFT and convert to a simple real image
Take the exponential of the IFFT
Stretch that to the range 0 to 255
Save the result
import numpy as np
import cv2
# read input and convert to grayscale
img = cv2.imread('person.png', cv2.IMREAD_GRAYSCALE)
hh, ww = img.shape[:2]
# take ln of image
img_log = np.log(np.float64(img), dtype=np.float64)
# do dft saving as complex output
dft = np.fft.fft2(img_log, axes=(0,1))
# apply shift of origin to center of image
dft_shift = np.fft.fftshift(dft)
# create black circle on white background for high pass filter
#radius = 3
radius = 13
mask = np.zeros_like(img, dtype=np.float64)
cy = mask.shape[0] // 2
cx = mask.shape[1] // 2
cv2.circle(mask, (cx,cy), radius, 1, -1)
mask = 1 - mask
# antialias mask via blurring
#mask = cv2.GaussianBlur(mask, (7,7), 0)
mask = cv2.GaussianBlur(mask, (47,47), 0)
# apply mask to dft_shift
dft_shift_filtered = np.multiply(dft_shift,mask)
# shift origin from center to upper left corner
back_ishift = np.fft.ifftshift(dft_shift_filtered)
# do idft saving as complex
img_back = np.fft.ifft2(back_ishift, axes=(0,1))
# combine complex real and imaginary components to form (the magnitude for) the original image again
img_back = np.abs(img_back)
# apply exp to reverse the earlier log
img_homomorphic = np.exp(img_back, dtype=np.float64)
# scale result
img_homomorphic = cv2.normalize(img_homomorphic, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
# write result to disk
cv2.imwrite("person_dft_numpy_mask.png", (255*mask).astype(np.uint8))
cv2.imwrite("person_dft_numpy_homomorphic.png", img_homomorphic)
cv2.imshow("ORIGINAL", img)
cv2.imshow("MASK", mask)
cv2.imshow("FILTERED DFT/IFT ROUND TRIP", img_back)
cv2.imshow("HOMOMORPHIC", img_homomorphic)
cv2.waitKey(0)
cv2.destroyAllWindows()
High Pass Filter Mask and Homomorphic Result for radius=3 and blur=7:
High Pass Filter Mask and Homomorphic Result for radius=13 and blur=47:
i am trying to create a mask for analyzing some of my images. in this mask i need to have at least 500 different numbers. but i can just use an ndarray without changing it into an image because i use opencv in the process of creating the mask.. so if a number is larger than 255 it just change it to (number - 255).
def create_mask(x=20, y=12, l=480):
s = int(l / y)
lin_mask = np.zeros([l*2, s*x], np.uint8)
color = 0
for i in range(y*2):
for j in range(x):
lin_mask[i*s:(i+1)*s, j*s:(j+1)*s] = np.ones([s, s], np.uint8)*color
color = color + 1
m = cv2.warpPolar(lin_mask, (l*2, l*2), (l, l), l, cv2.WARP_INVERSE_MAP)
t = int(l/2)
m = m[t: t+l, t:t+l]
return m
I need to synthesize many FishEye images with different intrinsic matrices based on normal pictures. I am following the method mentioned in this paper.
Ideally, if the algorithm is correct, the ideal fish eye effect should look like this:
.
But when I used my algorithm to convert a picture
it looks like this
So below is my code's flow:
1. First, I read the raw image with cv2
def read_img(image):
img = ndimage.imread(image) #this would return a 4-d array: [R,G,B,255]
img_shape = img.shape
print(img_shape)
#get the pixel coordinate
w = img_shape[1] #the width
# print(w)
h= img_shape[0] #the height
# print(h)
uv_coord = []
for u in range(w):
for v in range(h):
uv_coord.append([float(u),float(v)]) #this records the coord in the fashion of [x1,y1],[x1, y2], [x1, y3]....
return np.array(uv_coord)
Then, based on the paper:
r(θ) = k1θ + k2θ^3 + k3θ^5 + k4θ^7, (1)
where Ks are the distorted coefficients
Given pixel coordinates (x,y) in the pinhole projection image, the corresponding image coordinates (x',y')in the fisheye can be computed as:
x'=r(θ) cos(ϕ), y' = r(θ) sin(ϕ), (2)
where ϕ = arctan((y − y0)/(x − x0)), and (x0, y0) are the coordinates of the principal point in the pinhole projection image.
And then the image coordinates (x',y') is converted into pixel coordinates (xf,yf): (xf, yf):
*xf = mu * x' + u0, yf = mv * y' + v0,* (3)
where (u0, v0) are the coordinates of the principle points in the fisheye, and mu, mv denote the number of pixels per unit distance in the horizontal and vertica directions. So I am guessing there are just from the intrinsic matrix [fx, fy] and u0 v0 are the [cx, cy].
def add_distortion(sourceUV, dmatrix,Kmatrix):
'''This function is programmed to remove the pixel of the given original image coords
input arguments:
dmatrix -- the intrinsic matrix [k1,k2,k3,k4] for tweaking purposes
Kmatrix -- [fx, fy, cx, cy, s]'''
u = sourceUV[:,0] #width in x
v = sourceUV[:,1] #height in y
rho = np.sqrt(u**2 + v**2)
#get theta
theta = np.arctan(rho,np.full_like(u,1))
# rho_mat = np.array([rho, rho**3, rho**5, rho**7])
rho_mat = np.array([theta,theta**3, theta**5, theta**7])
#get the: rho(theta) = k1*theta + k2*theta**3 + k3*theta**5 + k4*theta**7
rho_d = dmatrix#rho_mat
#get phi
phi = np.arctan2((v - Kmatrix[3]), (u - Kmatrix[2]))
xd = rho_d * np.cos(phi)
yd = rho_d * np.sin(phi)
#converting the coords from image plane back to pixel coords
ud = Kmatrix[0] * (xd + Kmatrix[4] * yd) + Kmatrix[2]
vd = Kmatrix[1] * yd + Kmatrix[3]
return np.column_stack((ud,vd))
Then after gaining the distorded coordinates, I perform moving pixels in this way, where I think the problem might be:
def main():
image_name = "original.png"
img = cv2.imread(image_name)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) #the cv2 read the image as BGR
w = img.shape[1]
h = img.shape[0]
uv_coord = read_img(image_name)
#for adding distortion
dmatrix = [-0.391942708316175,0.012746418822063 ,-0.001374061848026 ,0.005349692659231]
#the Intrinsic matrix of the original picture's
Kmatrix = np.array([9.842439e+02,9.808141e+02 , 1392/2, 2.331966e+02, 0.000000e+00])
# Kmatrix = np.array([2234.23470710156 ,2223.78349134123, 947.511596277837, 647.103139639432,-3.20443253476976]) #the distorted intrinsics
uv = add_distortion(uv_coord,dmatrix,Kmatrix)
i = 0
dstimg = np.zeros_like(img)
for x in range(w): #tthe coo
for y in range(h):
if i > (512 * 1392 -1):
break
xu = uv[i][0] #x, y1, y2, y3
yu = uv[i][1]
i +=1
# if new pixel is in bounds copy from source pixel to destination pixel
if 0 <= xu and xu < img.shape[1] and 0 <= yu and yu < img.shape[0]:
dstimg[int(yu)][int(xu)] = img[int(y)][int(x)]
img = Image.fromarray(dstimg, 'RGB')
img.save('my.png')
img.show()
However, this code does not perform in the way I want. Could you guys please help me with debugging it? I spent 3 days but I still could not see any problem with it. Thanks!!
Say you want to scale a transparent image but do not yet know the color(s) of the background you will composite it onto later. Unfortunately PIL seems to incorporate the color values of fully transparent pixels leading to bad results. Is there a way to tell PIL-resize to ignore fully transparent pixels?
import PIL.Image
filename = "trans.png" # http://qrc-designer.com/stuff/trans.png
size = (25,25)
im = PIL.Image.open(filename)
print im.mode # RGBA
im = im.resize(size, PIL.Image.LINEAR) # the same with CUBIC, ANTIALIAS, transform
# im.show() # does not use alpha
im.save("resizelinear_"+filename)
# PIL scaled image has dark border
original image with (0,0,0,0) (black but fully transparent) background (left)
output image with black halo (middle)
proper output scaled with gimp (right)
edit: It looks like to achieve what I am looking for I would have to modify the sampling of the resize function itself such that it would ignore pixels with full transparency.
edit2: I have found a very ugly solution. It sets the color values of fully transparent pixels to the average of the surrounding non fully transparent pixels to minimize impact of fully transparent pixel colors while resizing. It is slow in the simple form but I will post it if there is no other solution. Might be possible to make it faster by using a dilate operation to only process the necessary pixels.
edit3: premultiplied alpha is the way to go - see Mark's answer
It appears that PIL doesn't do alpha pre-multiplication before resizing, which is necessary to get the proper results. Fortunately it's easy to do by brute force. You must then do the reverse to the resized result.
def premultiply(im):
pixels = im.load()
for y in range(im.size[1]):
for x in range(im.size[0]):
r, g, b, a = pixels[x, y]
if a != 255:
r = r * a // 255
g = g * a // 255
b = b * a // 255
pixels[x, y] = (r, g, b, a)
def unmultiply(im):
pixels = im.load()
for y in range(im.size[1]):
for x in range(im.size[0]):
r, g, b, a = pixels[x, y]
if a != 255 and a != 0:
r = 255 if r >= a else 255 * r // a
g = 255 if g >= a else 255 * g // a
b = 255 if b >= a else 255 * b // a
pixels[x, y] = (r, g, b, a)
Result:
You can resample each band individually:
im.load()
bands = im.split()
bands = [b.resize(size, Image.LINEAR) for b in bands]
im = Image.merge('RGBA', bands)
EDIT
Maybe by avoiding high transparency values like so (need numpy)
import numpy as np
# ...
im.load()
bands = list(im.split())
a = np.asarray(bands[-1])
a.flags.writeable = True
a[a != 0] = 1
bands[-1] = Image.fromarray(a)
bands = [b.resize(size, Image.LINEAR) for b in bands]
a = np.asarray(bands[-1])
a.flags.writeable = True
a[a != 0] = 255
bands[-1] = Image.fromarray(a)
im = Image.merge('RGBA', bands)
Maybe you can fill the whole image with the color you want, and only create the shape in the alpha channnel?
sorry for answering myself but this is the only working solution that I know of. It sets the color values of fully transparent pixels to the average of the surrounding non fully transparent pixels to minimize impact of fully transparent pixel colors while resizing. There are special cases where the proper result will not be achieved.
It is very ugly and slow. I'd be happy to accept your answer if you can come up with something better.
# might be possible to speed this up by only processing necessary pixels
# using scipy dilate, numpy where
import PIL.Image
filename = "trans.png" # http://qrc-designer.com/stuff/trans.png
size = (25,25)
import numpy as np
im = PIL.Image.open(filename)
npImRgba = np.asarray(im, dtype=np.uint8)
npImRgba2 = np.asarray(im, dtype=np.uint8)
npImRgba2.flags.writeable = True
lenY = npImRgba.shape[0]
lenX = npImRgba.shape[1]
for y in range(npImRgba.shape[0]):
for x in range(npImRgba.shape[1]):
if npImRgba[y, x, 3] != 0: # only change completely transparent pixels
continue
colSum = np.zeros((3), dtype=np.uint16)
i = 0
for oy in [-1, 0, 1]:
for ox in [-1, 0, 1]:
if not oy and not ox:
continue
iy = y + oy
if iy < 0:
continue
if iy >= lenY:
continue
ix = x + ox
if ix < 0:
continue
if ix >= lenX:
continue
col = npImRgba[iy, ix]
if not col[3]:
continue
colSum += col[:3]
i += 1
npImRgba2[y, x, :3] = colSum / i
im = PIL.Image.fromarray(npImRgba2)
im = im.transform(size, PIL.Image.EXTENT, (0,0) + im.size, PIL.Image.LINEAR)
im.save("slime_"+filename)
result: