Related
I'm trying to write the code of this paper paper for a university project. the idea is to insert an invisible watermark into a grayscale image, which can be extracted later to verify the image ownership.
This is the code I wrote for the watermark embedding process :
import pywt
import numpy as np
import cv2
from PIL import Image
from math import sqrt, log10
from scipy.fftpack import dct, idct
def Get_MSB_LSB_Watermark () : #Function that separates the watermark into MSB and LSB images
MSBs = []
LSBs = []
for i in range (len(Watermark)) :
binary = '{:0>8}'.format(str(bin(Watermark[i]))[2:])
MSB = (binary[0:4])
LSB = (binary[4:])
MSB = int(MSB, 2)
LSB = int(LSB,2)
MSBs.append(MSB)
LSBs.append(LSB)
MSBs = np.array(MSBs)
LSBs = np.array(LSBs)
return MSBs.reshape(64,64), LSBs.reshape(64,64)
def split(array, nrows, ncols): #Split array into blocks of size nrows* ncols
r, h = array.shape
return (array.reshape(h//nrows, nrows, -1, ncols)
.swapaxes(1, 2)
.reshape(-1, nrows, ncols))
def unblockshaped(arr, h, w): #the inverse of the split function
n, nrows, ncols = arr.shape
return (arr.reshape(h//nrows, -1, nrows, ncols)
.swapaxes(1,2)
.reshape(h, w))
def ISVD (U,S,V): #the inverse of singular value decomposition
s = np.zeros(np.shape(U))
for i in range(4):
s[i, i] = S[i]
recon_image = U # s # V
return recon_image
def Watermark_Embedding (blocks, watermark) :
Watermarked_blocks = []
k1 = []
k2 = []
#convert the watermark to a list
w = list(np.ndarray.flatten(watermark))
for i in range (len(blocks)) :
B = blocks[i]
#Aplly singular value decoposition to the block
U, s, V = np.linalg.svd(B)
#Modify the singular values of the block
P = s[1] - s[2]
delta = abs(w[i]) - P
s[1] = s[1] + delta
if s[0] >= s[1] :
k1.append(1)
else :
k1.append(-1)
#the inverse of SVD after watermark embedding
recunstructed_B = ISVD(U, s, V)
Watermarked_blocks.append(recunstructed_B)
for j in range(len(w)):
if w[j] >= 0:
k2.append(1)
else:
k2.append(-1)
return k1,k2, np.array(Watermarked_blocks)
def apply_dct(image_array):
size = image_array[0].__len__()
all_subdct = np.empty((size, size))
for i in range (0, size, 4):
for j in range (0, size, 4):
subpixels = image_array[i:i+4, j:j+4]
subdct = dct(dct(subpixels.T, norm="ortho").T, norm="ortho")
all_subdct[i:i+4, j:j+4] = subdct
return all_subdct
def inverse_dct(all_subdct):
size = all_subdct[0].__len__()
all_subidct = np.empty((size, size))
for i in range (0, size, 4):
for j in range (0, size, 4):
subidct = idct(idct(all_subdct[i:i+4, j:j+4].T, norm="ortho").T, norm="ortho")
all_subidct[i:i+4, j:j+4] = subidct
return all_subidct
#read watermark
Watermark = Image.open('Copyright.png').convert('L')
Watermark = list(Watermark.getdata())
#Separate the watermark into LSB and MSB images
Watermark1, Watermark2 = Get_MSB_LSB_Watermark()
#Apply descrete cosine Transform on the two generated images
DCT_Watermark1 = apply_dct(Watermark1)
DCT_Watermark2 = apply_dct(Watermark2)
#read cover Image
Cover_Image = Image.open('10.png').convert('L')
#Apply 1 level descrete wavelet transform
LL1, (LH1, HL1, HH1) = pywt.dwt2(Cover_Image, 'haar')
#Split the LH1 and HL1 subbands into blocks of size 4*4
blocks_LH1 = split(LH1,4,4)
blocks_HL1 = split(HL1,4,4)
#Watermark Embedding in LH1 and HL1 and Keys generation
Key1, Key3, WatermarkedblocksLH1 = Watermark_Embedding(blocks_LH1,DCT_Watermark1)
Key2 ,Key4, WatermarkedblocksHL1 = Watermark_Embedding(blocks_HL1,DCT_Watermark2)
#Merge the watermzrked Blocks
reconstructed_LH1 = unblockshaped(WatermarkedblocksLH1, 256,256)
reconstructed_HL1 = unblockshaped(WatermarkedblocksHL1, 256,256)
#Apply the inverse of descrete wavelet transform to get the watermarked image
IDWT = pywt.idwt2((LL1, (reconstructed_LH1, reconstructed_HL1, HH1)), 'haar')
cv2.imwrite('Watermarked_img.png', IDWT)
This is the code I wrote for the Extraction process :
import pywt
from scipy import fftpack
import numpy as np
import cv2
from PIL import Image
import scipy
from math import sqrt, log10
from Watermark_Embedding import *
def Watermark_Extraction(blocks,key1, key2) :
Extracted_Watermark = []
for i in range(len(blocks)):
B = blocks[i]
#apply SVD on the Block
U, s, V = np.linalg.svd(B)
if key1[i] == 1 :
P = (s[1] - s[2])
Extracted_Watermark.append(P)
else :
P = (s[0] - s[2])
Extracted_Watermark.append(P)
for j in range(len(Extracted_Watermark)) :
if key2[j] == 1 :
Extracted_Watermark[j] = Extracted_Watermark[j]
else :
Extracted_Watermark[j] = - (Extracted_Watermark[j])
return np.array(Extracted_Watermark)
def Merge_W1_W2 ():
Merged_watermark = []
w1 = list(np.ndarray.flatten(IDCTW1))
w2 = list(np.ndarray.flatten(IDCTW2))
for i in range (len(w2)):
bw1 = '{:0>4}'.format((bin(int(abs(w1[i]))))[2:])
bw2 = '{:0>4}'.format((bin(int(abs(w2[i]))))[2:])
P = bw1+bw2
pixel = (int(P,2))
Merged_watermark.append(pixel)
return Merged_watermark
Watermarked_Image = Image.open('Watermarked_img.png')
LL1, (LH1, HL1, HH1) = pywt.dwt2(Watermarked_Image, 'haar')
blocks_LH1 = split(LH1,4,4)
blocks_HL1 = split(HL1,4,4)
W1 = Watermark_Extraction(blocks_LH1, Key1,Key3)
W2 = Watermark_Extraction(blocks_HL1, Key2, Key4)
W1 = W1.reshape(64,64)
W2 = W2.reshape(64,64)
IDCTW1 = inverse_dct(W1)
IDCTW2 = inverse_dct(W2)
Merged = np.array(Merge_W1_W2())
Merged = Merged.reshape(64,64)
cv2.imwrite('Extracted_Watermark.png', Merged)
The cover Image of size 512*512:
The 64*64 watermark I used
The watermarked Image :
The extracted Watermark I get:
I calculated the similarity between the two watermarks using SSIM :
from skimage.metrics import structural_similarity
original_Watermark = cv2.imread('Copyright.png')
extracted_watermark = cv2.imread('Extracted_Watermark.png')
# Convert images to grayscale
original_watermark = cv2.cvtColor(original_Watermark, cv2.COLOR_BGR2GRAY)
extracted_Watermark = cv2.cvtColor(extracted_watermark, cv2.COLOR_BGR2GRAY)
# Compute SSIM between two images
(score, diff) = structural_similarity(original_Watermark, extracted_Watermark, full=True)
print("SSIM = ", score)
I didn't apply any modification on the watermarked image and The SSIM I got is 0.8445354561524052. however the SSIM of the extracted watermark should be 0.99 according to the paper.
I don't know what's wrong with my code and I have a deadline after two days so I really need help.
thanks in advance.
There are two issues:
In Merge_W1_W2 you are using int to convert from float to int but that introduces errors for numbers where the floating point representation is not exact (e.g. 14.99999999999997); this can be fixed by using round instead.
Saving cv2.imwrite('Watermarked_img.png', IDWT) is a lossy operation because it rounds the values in IDWT to the nearest integer; if you use Watermarked_Image = IDWT then you will get back the exact same watermark image.
Continued from this question: How could you rewrite a list of lists so that "islands" of values are unique from one another?
Brief: How would you parse an image, for example:
in such a way that you identify the several clusters of distinct pixels and rewrite the file so that each cluster has a unique color, for example:
Here's how I have tried to implement it with assistance from a few sources, including stackoverflow user #Rabinzel: (detailed reasoning below main code block)
from scipy import ndimage
import numpy as np
from PIL import Image
#set the file path to wherever your provinces.png is located
im = Image.open(r"C:\\Users\\scoop\\Desktop\\prov_test.png")
print('-------------------------------------------')
#DEBUGGING: simply prints the format, size, and mode of your file
print(im.format, im.size, im.mode)
#saves the width and depth of the file
im_xsize = im.size[0]
im_ysize = im.size[1]
#DEBUGGING: prints it
print(im_xsize, im_ysize)
#DEBUGGNG: prints data bands, should be R, G, B
print(im.getbands())
#DEBUGGING: prints RGB value of pixel of choice
print(im.getpixel((0,0)))
print('-------------------------------------------')
#creates array for pixel RGBs
rgb_array = [[None] * im_ysize for length in range(0,im_xsize)]
#fills pixel RGB array
for x in range(0,im_xsize):
for y in range(0,im_ysize):
rgb_array[x][y] = im.getpixel((x,y))
#find unique clusters of identical RGB codes
def find_clusters(array):
clustered = np.empty_like(array)
unique_vals = np.unique(array)
cluster_count = 0
for val in unique_vals:
labelling, label_count = ndimage.label(array == val)
for k in range(1, label_count + 1):
clustered[labelling == k] = cluster_count
cluster_count += 1
return clustered, cluster_count
clusters, cluster_count = find_clusters(rgb_array)
print("Found {} clusters:".format(cluster_count))
#print(clusters)
#defining a list of unique colors
province_color_list = [[0] * 3 for length in range(0,cluster_count)]
#DEBUGGING
print('province count...', cluster_count)
#variables
r = 255
g = 0
b = 0
count = 0
#generating colors
for length in range(0,cluster_count):
province_color_list[length][0] = r
province_color_list[length][1] = g
province_color_list[length][2] = b
g += 25
b += 25
count += 1
if count >= 11:
r -= 1
g = 0
b = 0
count = 0
#DEBUGGING
print('# of colors... ', len(province_color_list))
print(province_color_list)
print('-------------------------------------------')
#writing colors to pixels
for x in range(0,im_xsize):
for y in range(0,im_ysize):
#places province color based on which province current pixel is assigned to
im.putpixel((x,y), (province_color_list[0][0], province_color_list[0][1], province_color_list[0][2]))
#im.save(r"C:\\Users\\scoop\\Desktop\\prov_test.png", im.format)
I load the image using PIL:
im = Image.open(r"C:\\Users\\scoop\\Desktop\\prov_test.png")
I create an array to more easily(?) access the image array, which stores each pixel's color as an RGB color code in tuple form. Then this method identifies the relevant pixel clusters.
rgb_array = [[None] * im_ysize for length in range(0,im_xsize)]
#fills pixel RGB array
for x in range(0,im_xsize):
for y in range(0,im_ysize):
rgb_array[x][y] = im.getpixel((x,y))
#find unique clusters of identical RGB codes
def find_clusters(array):
clustered = np.empty_like(array)
unique_vals = np.unique(array)
cluster_count = 0
for val in unique_vals:
labelling, label_count = ndimage.label(array == val)
for k in range(1, label_count + 1):
clustered[labelling == k] = cluster_count
cluster_count += 1
return clustered, cluster_count
clusters, cluster_count = find_clusters(rgb_array)
Then I create a list of unique RGB codes the length of the # of pixel clustes that exist.
province_color_list = [[0] * 3 for length in range(0,cluster_count)]
#DEBUGGING
print('province count...', cluster_count)
#variables
r = 255
g = 0
b = 0
count = 0
#generating colors
for length in range(0,cluster_count):
province_color_list[length][0] = r
province_color_list[length][1] = g
province_color_list[length][2] = b
g += 25
b += 25
count += 1
if count >= 11:
r -= 1
g = 0
b = 0
count = 0
and finally, I rewrite each pixel with the new RGB code associated with the unique cluster from earlier (and save the image).
#writing colors to pixels
for x in range(0,im_xsize):
for y in range(0,im_ysize):
#places province color based on which province current pixel is assigned to
im.putpixel((x,y), (province_color_list[clusters[x][y]][0], province_color_list[clusters[x][y]][1], province_color_list[clusters[x][y]][2]))
#im.save(r"C:\\Users\\scoop\\Desktop\\prov_test.png", im.format)
Unfortunately there's multiple issues with this script and I get the feeling its degenerated into a bit of nonsense. The chief issues seem to be accessing the RGB tuples of the .PNG Image class and changing them to integers to identify them properly as well as differentiating between distinct clusters not just distinct colors. I haven't even been able to get the script to write the image as anything but a flat color so far.
For reference, I hope to be able to scale this up to handle an image like this:
and give each of those little clusters a unique color. Any and all help appreciated.
OK, let's see if that works for you. If I understood it right what you are trying to achieve, here is my (beginner) solution.
Essentially I take the image, in a 3D array, find all unique colors in the picture and replace them with an integer( function: arr_to_int). Then find all the clusters with the function find_clusters. Create a dictionary with new colors with as many colors as number of clusters (so every int of every cluster gets replaced with a color again).
At the end replace all int with colors again and save the picture.
This was the image I used to start with:
and that's the new picture I got as output:
If you change the process of how to apply them clusters the specific colors you want to use, I think I'm pretty close to what you are trying to achieve (hope so :) )
import numpy as np
import cv2
from scipy import ndimage
# array of GBR colors to single int
def arr_to_int(arr, col_mask):
out = np.ndarray(shape=arr.shape[:2], dtype=int)
out[:,:] = -1
for rgb, idx in col_mask.items():
out[(arr==rgb).all(2)] = idx
return out
# find unique clusters of identical RGB codes
def find_clusters(array):
clustered = np.empty_like(array)
unique_vals = np.unique(array)
cluster_count = 0
for val in unique_vals:
labelling, label_count = ndimage.label(array == val)
for k in range(1, label_count + 1):
clustered[labelling == k] = cluster_count
cluster_count += 1
return clustered, cluster_count
# Load image
im = cv2.imread("prov_test.png")
#im = cv2.resize(im, (2, 3)) #resize for debugging
#print('original image: \n', im, '\n')
#find all unique colors in image (cv2 presents in BGR format!!!)
unique_col_BGR = list(set(tuple(v) for m2d in im for v in m2d))
print('unique values: ', unique_col_BGR, '\n')
#create dict with GBR_colors as keys and unique integers as value
mask_GBR_int = {color:idx for idx,color in enumerate(unique_col_BGR)}
print('mask dict: ', mask_GBR_int, '\n')
#change all color values in im to a single int (mask)
im_with_ints = arr_to_int(im, mask_GBR_int)
#print('pic with mask values: \n', im_with_ints, '\n')
# due to replacing array of 3 values to a single int, new array has one dimension less
print('orig pic resized shape', im.shape)
print('Mask int pic shape', im_with_ints.shape, '\n')
clusters, cluster_count = find_clusters(im_with_ints)
print(f'Found {cluster_count} clusters', '\n')
#print(clusters)
#create dict with length equal to number of clusters and choose color of list_of_colors (random from the internet)
list_of_colors = [[192,192,192],[128,128,128],[128,0,0],[128,128,0],[0,128,0],[128,0,128],[0,128,128],[0,0,128],[255,0,0],[0,255,0],[0,0,255],[255,255,0],[0,255,255],[255,0,255]]
new_color_dict = {idx:val for idx,val in enumerate(list_of_colors[:cluster_count])}
print('new_color_dict: ', new_color_dict,'\n')
#change arr with int to colors again
res = np.array([*new_color_dict.values()])[clusters]
#print('image array with new colors: \n', res)
cv2.imwrite("prov_test_output.png", res)
Output:
unique values: [(0, 255, 0), (255, 0, 0), (0, 0, 255), (0, 255, 255)]
mask dict: {(0, 255, 0): 0, (255, 0, 0): 1, (0, 0, 255): 2, (0, 255, 255): 3}
orig pic resized shape (100, 100, 3)
Mask int pic shape (100, 100)
Found 9 clusters
new_color_dict: {0: [192, 192, 192], 1: [128, 128, 128], 2: [128, 0, 0], 3: [128, 128, 0], 4: [0, 128, 0], 5: [128, 0, 128], 6: [0, 128, 128], 7: [0, 0, 128], 8: [255, 0, 0]}
I'm trying to improve the speed of my image manipulation as it's been too slow for actual use.
What I need to do is apply a complex transformation on the colour of every pixel on an image. The manipulation is basically apply a vector transform like T(r, g, b, a) => (r * x, g * x, b * y, a) or in layman's terms, it's a multiplication of Red and Green values by a constant, a different multiplication for Blue and keep Alpha. But I also need to manipulate it differently if the RGB colour falls under some specific colours, in those cases they must follow a dictionary/transformation table where RGB => newRGB again keeping alpha.
The algorithm would be:
for each pixel in image:
if pixel[r, g, b] in special:
return special[pixel[r, g, b]] + pixel[a]
else:
return T(pixel)
It's simple but speed has been sub-optimal. I believe there's some way using numpy vectors, but I could not find how.
Important details about the implementation:
I don't care about the original buffer/image (manipulation can be in place)
I can use wxPython, Pillow and NumPy
Order or dimension of the array is not important as long as the buffer keeps the length
The buffer is obtained from a wxPython Bitmap and special and (RG|B)_pal are transformation tables, the end result will become a wxPython Bitmap too. They're obtained like these:
# buffer
bitmap = wx.Bitmap # it's valid wxBitmap here, this is just to let you know it exists
buff = bytearray(bitmap.GetWidth() * bitmap.GetHeight() * 4)
bitmap.CopyToBuffer(buff, wx.BitmapBufferFormat_RGBA)
self.RG_mult= 0.75
self.B_mult = 0.83
self.RG_pal = []
self.B_pal = []
for i in range(0, 256):
self.RG_pal.append(int(i * self.RG_mult))
self.B_pal.append(int(i * self.B_mult))
self.special = {
# RGB: new_RGB
# Implementation specific for the fastest access
# with buffer keys are 24bit numbers, with PIL keys are tuples
}
Implementations I tried include direct buffer manipulation:
for x in range(0, bitmap.GetWidth() * bitmap.GetHeight()):
index = x * 4
r = buf[index]
g = buf[index + 1]
b = buf[index + 2]
rgb = buf[index:index + 3]
if rgb in self.special:
special = self.special[rgb]
buf[index] = special[0]
buf[index + 1] = special[1]
buf[index + 2] = special[2]
else:
buf[index] = self.RG_pal[r]
buf[index + 1] = self.RG_pal[g]
buf[index + 2] = self.B_pal[b]
Use Pillow with getdata():
pil = Image.frombuffer("RGBA", (bitmap.GetWidth(), bitmap.GetHeight()), buf)
pil_buf = []
for colour in pil.getdata():
colour_idx = colour[0:3]
if (colour_idx in self.special):
special = self.special[colour_idx]
pil_buf.append((
special[0],
special[1],
special[2],
colour[3],
))
else:
pil_buf.append((
self.RG_pal[colour[0]],
self.RG_pal[colour[1]],
self.B_pal[colour[2]],
colour[3],
))
pil.putdata(pil_buf)
buf = pil.tobytes()
Pillow with point() and getdata() (fastest I achieved, more than twice times faster than others)
pil = Image.frombuffer("RGBA", (bitmap.GetWidth(), bitmap.GetHeight()), buf)
r, g, b, a = pil.split()
r = r.point(lambda r: r * self.RG_mult)
g = g.point(lambda g: g * self.RG_mult)
b = b.point(lambda b: b * self.B_mult)
pil = Image.merge("RGBA", (r, g, b, a))
i = 0
for colour in pil.getdata():
colour_idx = colour[0:3]
if (colour_idx in self.special):
special = self.special[colour_idx]
pil.putpixel(
(i % bitmap.GetWidth(), i // bitmap.GetWidth()),
(
special[0],
special[1],
special[2],
colour[3],
)
)
i += 1
buf = pil.tobytes()
I also tried working with numpy.where but then I could not get it to work. With numpy.apply_along_axis it worked but the performance was terrible. Other tries with numpy I could not access the RGB together, only as separated bands.
Pure Numpy Version
This first optimization relies on the fact, that one probably has way less special colors than pixels. I use numpy to do all the inner loops. This works well with images of up to 1MP. If You have multiple images I'd recommend the parallel approach.
Let's define a test case:
import requests
from io import BytesIO
from PIL import Image
import numpy as np
# Load some image, so we have the same
response = requests.get("https://upload.wikimedia.org/wikipedia/commons/4/41/Rick_Astley_Dallas.jpg")
# Make areas of known color
img = Image.open(BytesIO(response.content)).rotate(10, expand=True).rotate(-10,expand=True, fillcolor=(255,255,255)).convert('RGBA')
print("height: %d, width: %d (%.2f MP)"%(img.height, img.width, img.width*img.height/10e6))
height: 5034, width: 5792 (2.92 MP)
Define our special colors
specials = {
(4,1,6):(255,255,255),
(0, 0, 0):(255, 0, 255),
(255, 255, 255):(0, 255, 0)
}
Algorithm
def transform_map(img, specials, R_factor, G_factor, B_factor):
# Your transform
def transform(x, a):
a *= x
return a.clip(0, 255).astype(np.uint8)
# Convert to array
img_array = np.asarray(img)
# Extract channels
R = img_array.T[0]
G = img_array.T[1]
B = img_array.T[2]
A = img_array.T[3]
# Find Special colors
# First, calculate a uniqe hash
color_hashes = (R + 2**8 * G + 2**16 * B)
# Find inidices of special colors
special_idxs = []
for k, v in specials.items():
key_arr = np.array(list(k))
val_arr = np.array(list(v))
spec_hash = key_arr[0] + 2**8 * key_arr[1] + 2**16 * key_arr[2]
special_idxs.append(
{
'mask': np.where(np.isin(color_hashes, spec_hash)),
'value': val_arr
}
)
# Apply transform to whole image
R = transform(R, R_factor)
G = transform(G, G_factor)
B = transform(B, B_factor)
# Replace values where special colors were found
for idx in special_idxs:
R[idx['mask']] = idx['value'][0]
G[idx['mask']] = idx['value'][1]
B[idx['mask']] = idx['value'][2]
return Image.fromarray(np.array([R,G,B,A]).T, mode='RGBA')
And finally some bench marks on a Intel Core i5-6300U # 2.40GHz
import time
times = []
for i in range(10):
t0 = time.time()
# Test
transform_map(img, specials, 1.2, .9, 1.2)
#
t1 = time.time()
times.append(t1-t0)
np.round(times, 2)
print('average run time: %.2f +/-%.2f'%(np.mean(times), np.std(times)))
average run time: 9.72 +/-0.91
EDIT Parallelization
With the same setup as above, we can get a 2x speed increase on large images. (Small ones are faster without numba)
from numba import njit, prange
from numba.core import types
from numba.typed import Dict
# Map dict of special colors or transform over array of pixel values
#njit(parallel=True, locals={'px_hash': types.uint32})
def check_and_transform(img_array, d, T):
#Save Shape for later
shape = img_array.shape
# Flatten image for 1-d iteration
img_array_flat = img_array.reshape(-1,3).copy()
N = img_array_flat.shape[0]
# Replace or map
for i in prange(N):
px_hash = np.uint32(0)
px_hash += img_array_flat[i,0]
px_hash += types.uint32(2**8) * img_array_flat[i,1]
px_hash += types.uint32(2**16) * img_array_flat[i,2]
try:
img_array_flat[i] = d[px_hash]
except Exception:
img_array_flat[i] = (img_array_flat[i] * T).astype(np.uint8)
# return image
return img_array_flat.reshape(shape)
# Wrapper for function above
def map_or_transform_jit(image: Image, specials: dict, T: np.ndarray):
# assemble numba typed dict
d = Dict.empty(
key_type=types.uint32,
value_type=types.uint8[:],
)
for k, v in specials.items():
k = types.uint32(k[0] + 2**8 * k[1] + 2**16 * k[2])
v = np.array(v, dtype=np.uint8)
d[k] = v
# get rgb channels
img_arr = np.array(img)
rgb = img_arr[:,:,:3].copy()
img_shape = img_arr.shape
# apply map
rgb = check_and_transform(rgb, d, T)
# set color channels
img_arr[:,:,:3] = rgb
return Image.fromarray(img_arr, mode='RGBA')
# Benchmark
import time
times = []
for i in range(10):
t0 = time.time()
# Test
test_img = map_or_transform_jit(img, specials, np.array([1, .5, .5]))
#
t1 = time.time()
times.append(t1-t0)
np.round(times, 2)
print('average run time: %.2f +/- %.2f'%(np.mean(times), np.std(times)))
test_img
average run time: 3.76 +/- 0.08
`n = 3
array = np.ones((n,n)) / (n*n)
n = array.shape[0] * array.shape1
while(True):
ret, frame = cap.read()
if ret is True:
print("newframe")
gframe = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
dst = cv2.copyMakeBorder(gframe, 1, 1, 1, 1, borderType, None, None)
blur = cv2.blur(dst,(3,3))
if k == 1 :
lastframe = gframe
curframe = gframe
nextframe = gframe
newFrame = gframe
k = 0
else :
lf = ndimage.convolve(lastframe, array, mode='constant', cval= 0.0)
cf = ndimage.convolve(curframe, array, mode='constant', cval= 0.0)
nf = ndimage.convolve(nextframe, array, mode='constant', cval= 0.0)
lastframe = curframe
curframe = nextframe
nextframe = gframe
b = np.zeros((3, 528, 720))
b[0] = lf
b[1] = cf
b[2] = nf
result = np.mean(b, axis=0)
cv2.imshow('frame',result)
cv2.imshow('frame2',gframe)
`enter image description here
I am trying to add all pixel values of a 3x3 pixel and then average them. I need to do that for every pixel and every frame and replace the primary pixel with the averaged one. However the way i am trying to do it makes it really slow and not really accurate.
This sounds like a convolution.
import numpy as np
from scipy import ndimage
a = np.random.random((5, 5))
a
[[0.14742615 0.83548453 0.67433445 0.59162829 0.21160044]
[0.1700598 0.89074466 0.84155171 0.65092969 0.3842437 ]
[0.22662423 0.2266929 0.47757456 0.34480112 0.06261333]
[0.89402116 0.00101947 0.90503461 0.93112109 0.44817247]
[0.21788789 0.3338606 0.07323461 0.28944439 0.91217591]]
Convolution operation with window size 3x3
n = 3
k = np.ones((n, n)) / (n * n)
n = k.shape[0] * k.shape[1]
b = ndimage.convolve(a, k, mode='constant', cval=0.0)
b
[[0.22707946 0.39551126 0.49829704 0.3726987 0.2042669 ]
[0.27744803 0.49894366 0.61486021 0.47103081 0.24953517]
[0.26768469 0.51481368 0.58549664 0.56067136 0.31354238]
[0.21112292 0.37288334 0.39808704 0.4937969 0.33203648]
[0.16075435 0.26945093 0.28152386 0.39546479 0.28676821]]
Now you just have to do it for the current frame, and the two prior frames.
-------- EDIT: For three frames -----------
For 3D you could write a convolution function like in this post, but its quite complex as it uses FFTs
If you just want to average across three frames, you could do:
f1 = np.random.random((5, 5)) # Frame1
f2 = np.random.random((5, 5)) # Frame2
f3 = np.random.random((5, 5)) # Frame3
n = 3
k = np.ones((n, n)) / (n * n)
n = k.shape[0] * k.shape[1]
b0 = ndimage.convolve(f1, k, mode='constant', cval=0.0)
b1 = ndimage.convolve(f2, k, mode='constant', cval=0.0)
b2 = ndimage.convolve(f3, k, mode='constant', cval=0.0)
# Create a 3D Matrix, with each fame placed along the first dimension
b = np.zeros((3, 5, 5))
b[0] = b0
b[1] = b1
b[2] = b2
# Take the average across the first dimension (across frames)
result = np.mean(b, axis=0)
There probably is a more elegant solution than this, but it gets the job done.
-------- EDIT: For Movies -----------
Based on all the questions in the comments I've decided to attempt to add some more code to help with implementation.
Firstly I'm starting out with these 7 consecutive stills from a movie:
I have not verified that the following code is bug proof or actually returns the correct result.
import cv2
import numpy as np
from scipy import ndimage
# this is a function to do previous code
def mean_frames(frames, kernel):
b = np.zeros(frames.shape)
for i in range(frames.shape[0]):
b[i] = ndimage.convolve(frames[i], k, mode='constant', cval=0.0)
b = np.mean(b, axis=0) / frames.shape[0]
return b
mean_N = 3 # frames to average
# read in 1 file to get dimenions
im = cv2.imread(f'{root}1.png', cv2.IMREAD_GRAYSCALE)
# setup numpy matrix that will hold mean_N frames at a time
frames = np.zeros((mean_N, im.shape[0], im.shape[1]))
avg_frames = [] # list to store our 3 averaged frames
count = 0 # counter to position frames in 1st dim of 3D matrix for avg
k = np.ones((3, 3)) / (3 * 3) # kernel for 2D convolution
for j in range(1, 7): # 7 images
file_name = root + str(j) + '.png'
im = cv2.imread(file_name, cv2.IMREAD_GRAYSCALE)
frames[count, ::] = im # store in 3D matrix
# if loaded more than min req. for avg, we average
if j >= mean_N:
# average and store to list
avg_frames.append(mean_frames(frames, k))
# if the count is mean_N - 1, that means we need to replace
# the 0th matrix in frames so that we are doing a 'moving avg'
if count == (mean_N - 1):
count = 0
else:
count += 1 #increase position in 0th dim for 3D matrix storage
# ouput averaged frames
for i, f in enumerate(avg_frames):
cv2.imwrite(f'{path}output{i}.jpg', f)
Then looking at the folder, there are 5 files (as expected if we did a moving average of 3 frames over 7 stills:
looking at before and after:
Image 3:
and averaged image #1:
The image not only is in gray scale (as expected) but seems quite dark. Perhaps some brightening would make things look better/more apparent.
Your question is very interesting.
I saw that you use many loops for activating this function. Let's process analysis.
Just for a frame.
You want to add all pixel values of a 3x3 pixel neighborhood. So I think Image interpolation is very suitable for this case. In OpenCV, we use resize() to interpolate pixel for image. So the INTER_NEAREST is best for this situation.
This is the formula for INTER_NEAREST.
Now you get the pixel added image.
Then you want to do that for every pixel and every frame and replace the primary pixel with the average one. And I think the Average filtering is a better solution.
The filter will work every pixel.
The code of a temporary example.
Interpolation
img = cv2.resize(img, (img.size[0]*3, img.size[1]*3), cv2.INTER_NEAREST)
Filter
img = cv2.blur(img, (3, 3))
I'm working on a preprocessing function that takes DICOM files a input and returns a 3D np.array (image stack). The problem is that I need to keep the association between ImagePositionPatient[2] and the relative position of the processed images in the output array.
For example, if a slice with ImagePositionPatient[2] == 5 is mapped to a processed slice in position 3 in the returned stack, I need to return another array that has 5 in the third position, and the same for all original slices. For slices created during processing by interpolation or padding, the array shall contain a palceholder value like -99999 instead.
I paste my code here.
EDIT: new simplified version
def lung_segmentation(patient_dir):
"""
Load the dicom files of a patient, build a 3D image of the scan, normalize it to (1mm x 1mm x 1mm) and segment
the lungs
:param patient_dir: directory of dcm files
:return: a numpy array of size (384, 288, 384)
"""
""" LOAD THE IMAGE """
# Initialize image and get dcm files
dcm_list = glob(patient_dir + '/*.dcm')
img = np.zeros((len(dcm_list), 512, 512), dtype='float32') # inizializza un
# vettore di len(..) di matrici di 0 e di ampiezza 512x512
z = []
# For each dcm file, get the corresponding slice, normalize HU values, and store the Z position of the slice
for i, f in enumerate(dcm_list):
dcm = dicom.read_file(f)
img[i] = float(dcm.RescaleSlope) * dcm.pixel_array.astype('float32') + float(dcm.RescaleIntercept)
z.append(dcm.ImagePositionPatient[-1])
# Get spacing and reorder slices
spacing = list(map(float, dcm.PixelSpacing)) + [np.median(np.diff(np.sort(z)))]
print("LO SPACING e: "+str(spacing))
# spacing = list(map(lambda dcm, z: dcm.PixelSpacing + [np.median(np.diff(np.sort(z)))]))
img = img[np.argsort(z)]
""" NORMALIZE HU AND RESOLUTION """
# Clip and normalize
img = np.clip(img, -1024, 4000) # clippa con minimo a 1024 e max a 4k
img = (img + 1024.) / (4000 + 1024.)
# Rescale 1mm x 1mm x 1mm
new_shape = map(lambda x, y: int(x * y), img.shape, spacing[::-1])
old_shape = img.shape
img = resize(img, new_shape, preserve_range=True)
print('nuova shape calcolata'+ str(img.shape)+' con calcolo eseguito su img_shape: '+str(old_shape)+' * '+str(spacing[::-1]))
lungmask = np.zeros(img.shape) # WE NEED LUNGMASK FOR CODE BELOW
lungmask[int(img.shape[0]/2 - img.shape[0]/4) : int(img.shape[0]/2 + img.shape[0]/4),
int(img.shape[1]/2 - img.shape[1]/4) : int(img.shape[1]/2 + img.shape[1]/4),
int(img.shape[2]/2 - img.shape[2]/4) : int(img.shape[2]/2 + img.shape[2]/4)] = 1
# I set to value = 1 some pixel for executing code below, free to change
""" CENTER AND PAD TO GET SHAPE (384, 288, 384) """
# Center the image
sum_x = np.sum(lungmask, axis=(0, 1))
sum_y = np.sum(lungmask, axis=(0, 2))
sum_z = np.sum(lungmask, axis=(1, 2))
mx = np.nonzero(sum_x)[0][0]
Mx = len(sum_x) - np.nonzero(sum_x[::-1])[0][0]
my = np.nonzero(sum_y)[0][0]
My = len(sum_y) - np.nonzero(sum_y[::-1])[0][0]
mz = np.nonzero(sum_z)[0][0]
Mz = len(sum_z) - np.nonzero(sum_z[::-1])[0][0]
img = img * lungmask
img = img[mz:Mz, my:My, mx:Mx]
# Pad the image to (384, 288, 384)
nz, nr, nc = img.shape
pad1 = int((384 - nz) / 2)
pad2 = 384 - nz - pad1
pad3 = int((288 - nr) / 2)
pad4 = 288 - nr - pad3
pad5 = int((384 - nc) / 2)
pad6 = 384 - nc - pad5
# Crop images too big
if pad1 < 0:
img = img[:, -pad1:384 - pad2]
pad1 = pad2 = 0
if img.shape[0] == 383:
pad1 = 1
if pad3 < 0:
img = img[:, :, -pad3:288 - pad4]
pad3 = pad4 = 0
if img.shape[1] == 287:
pad3 = 1
if pad5 < 0:
img = img[:, :, -pad5:384 - pad6]
pad5 = pad6 = 0
if img.shape[2] == 383:
pad5 = 1
# Pad
img = np.pad(img, pad_width=((pad1 - 4, pad2 + 4), (pad3, pad4), (pad5, pad6)), mode='constant')
# The -4 / +4 is here for "historical" reasons, but it can be removed
return img
reference library for resize methods etc. is skimage
I will try to give at least some hints to the answer. As has been discussed in the comments, resizing may remove the processed data at the original positions due to needed interpolation - so in the end you have to come up with a solution for that, either by changing the resizing target to a multipe of the actual resolution, or by returning the interpolated positions instead.
The basic idea is to have your positions array z be transformed the same as the images are in z direction. So for each operation in processing that changes the z location of the processed image, a similar operation has to be done for z.
Let's say you have 5 slices with a slice distance of 3mm:
>>> z
[0, 6, 3, 12, 9]
We can make a numpy array from it for easier handling:
z_out = np.array(y)
This corresponds to the unprocessed img list.
Now you sort the image list, so you have to also sort z_out:
img = img[np.argsort(z)]
z_out = np.sort(z_out)
>>> z_out
[0, 3, 6, 9, 12]
Next, the image is resized, introducing interpolated slices.
I will assume here that the resizing is done so that the slice distance is a multiple of the target resolution during resizing. In this case you to calculate the number of interpolated slices, and fill the new position array with corresponding placeholder values:
slice_distance = int((max(z) - min(z)) / (len(z) - 1))
nr_interpolated = slice_distance - 1 # you may adapt this to your algorithm
index_diff = np.arange(len(z) - 1) # used to adapt the insertion index
for i in range(nr_interpolated):
index = index_diff * (i + 1) + 1 # insertion index for placeholders
z_out = np.insert(z_out, index, -99999) # insert placeholder for interpolated positions
This gives you the z array filled with the placeholder value where interpolated slices occur in the image array:
>>> z_out
[0, -99999, -999999, 3, -99999, -999999, 6, -99999, -999999, 9, -99999, -999999, 12]
Then you have to do the same padding as for the image in the z direction:
img = np.pad(img, pad_width=((pad1 - 4, pad2 + 4), (pad3, pad4), (pad5, pad6)), mode='constant')
# use 'minimum' so that the placeholder is used
z_out = np.pad(z_out, pad_width=(pad1 - 4, pad2 + 4), mode='minimum')
Assuming padding values 1 and 3 for simplicity this gives you:
>>> z_out
[-99999, 0, -99999, -999999, 3, -99999, -999999, 6, -99999, -999999, 9, -99999, -999999, 12, -99999, -999999, -99999]
If you have more transformations in z directions, you have to do the corresponding changes to z_out. If you are done, you can return your position list together with the image list:
return img, z_out
As an aside: your code will only work as intented if your image has a transverse (axial) orientation, otherwise you have to calculate the z position array from Image Position Patient and Image Orientation Patient, instead of just using the z component of the image position.