Why the extracted watermark is not the same as the embedded one? - python

I'm trying to write the code of this paper paper for a university project. the idea is to insert an invisible watermark into a grayscale image, which can be extracted later to verify the image ownership.
This is the code I wrote for the watermark embedding process :
import pywt
import numpy as np
import cv2
from PIL import Image
from math import sqrt, log10
from scipy.fftpack import dct, idct
def Get_MSB_LSB_Watermark () : #Function that separates the watermark into MSB and LSB images
MSBs = []
LSBs = []
for i in range (len(Watermark)) :
binary = '{:0>8}'.format(str(bin(Watermark[i]))[2:])
MSB = (binary[0:4])
LSB = (binary[4:])
MSB = int(MSB, 2)
LSB = int(LSB,2)
MSBs.append(MSB)
LSBs.append(LSB)
MSBs = np.array(MSBs)
LSBs = np.array(LSBs)
return MSBs.reshape(64,64), LSBs.reshape(64,64)
def split(array, nrows, ncols): #Split array into blocks of size nrows* ncols
r, h = array.shape
return (array.reshape(h//nrows, nrows, -1, ncols)
.swapaxes(1, 2)
.reshape(-1, nrows, ncols))
def unblockshaped(arr, h, w): #the inverse of the split function
n, nrows, ncols = arr.shape
return (arr.reshape(h//nrows, -1, nrows, ncols)
.swapaxes(1,2)
.reshape(h, w))
def ISVD (U,S,V): #the inverse of singular value decomposition
s = np.zeros(np.shape(U))
for i in range(4):
s[i, i] = S[i]
recon_image = U # s # V
return recon_image
def Watermark_Embedding (blocks, watermark) :
Watermarked_blocks = []
k1 = []
k2 = []
#convert the watermark to a list
w = list(np.ndarray.flatten(watermark))
for i in range (len(blocks)) :
B = blocks[i]
#Aplly singular value decoposition to the block
U, s, V = np.linalg.svd(B)
#Modify the singular values of the block
P = s[1] - s[2]
delta = abs(w[i]) - P
s[1] = s[1] + delta
if s[0] >= s[1] :
k1.append(1)
else :
k1.append(-1)
#the inverse of SVD after watermark embedding
recunstructed_B = ISVD(U, s, V)
Watermarked_blocks.append(recunstructed_B)
for j in range(len(w)):
if w[j] >= 0:
k2.append(1)
else:
k2.append(-1)
return k1,k2, np.array(Watermarked_blocks)
def apply_dct(image_array):
size = image_array[0].__len__()
all_subdct = np.empty((size, size))
for i in range (0, size, 4):
for j in range (0, size, 4):
subpixels = image_array[i:i+4, j:j+4]
subdct = dct(dct(subpixels.T, norm="ortho").T, norm="ortho")
all_subdct[i:i+4, j:j+4] = subdct
return all_subdct
def inverse_dct(all_subdct):
size = all_subdct[0].__len__()
all_subidct = np.empty((size, size))
for i in range (0, size, 4):
for j in range (0, size, 4):
subidct = idct(idct(all_subdct[i:i+4, j:j+4].T, norm="ortho").T, norm="ortho")
all_subidct[i:i+4, j:j+4] = subidct
return all_subidct
#read watermark
Watermark = Image.open('Copyright.png').convert('L')
Watermark = list(Watermark.getdata())
#Separate the watermark into LSB and MSB images
Watermark1, Watermark2 = Get_MSB_LSB_Watermark()
#Apply descrete cosine Transform on the two generated images
DCT_Watermark1 = apply_dct(Watermark1)
DCT_Watermark2 = apply_dct(Watermark2)
#read cover Image
Cover_Image = Image.open('10.png').convert('L')
#Apply 1 level descrete wavelet transform
LL1, (LH1, HL1, HH1) = pywt.dwt2(Cover_Image, 'haar')
#Split the LH1 and HL1 subbands into blocks of size 4*4
blocks_LH1 = split(LH1,4,4)
blocks_HL1 = split(HL1,4,4)
#Watermark Embedding in LH1 and HL1 and Keys generation
Key1, Key3, WatermarkedblocksLH1 = Watermark_Embedding(blocks_LH1,DCT_Watermark1)
Key2 ,Key4, WatermarkedblocksHL1 = Watermark_Embedding(blocks_HL1,DCT_Watermark2)
#Merge the watermzrked Blocks
reconstructed_LH1 = unblockshaped(WatermarkedblocksLH1, 256,256)
reconstructed_HL1 = unblockshaped(WatermarkedblocksHL1, 256,256)
#Apply the inverse of descrete wavelet transform to get the watermarked image
IDWT = pywt.idwt2((LL1, (reconstructed_LH1, reconstructed_HL1, HH1)), 'haar')
cv2.imwrite('Watermarked_img.png', IDWT)
This is the code I wrote for the Extraction process :
import pywt
from scipy import fftpack
import numpy as np
import cv2
from PIL import Image
import scipy
from math import sqrt, log10
from Watermark_Embedding import *
def Watermark_Extraction(blocks,key1, key2) :
Extracted_Watermark = []
for i in range(len(blocks)):
B = blocks[i]
#apply SVD on the Block
U, s, V = np.linalg.svd(B)
if key1[i] == 1 :
P = (s[1] - s[2])
Extracted_Watermark.append(P)
else :
P = (s[0] - s[2])
Extracted_Watermark.append(P)
for j in range(len(Extracted_Watermark)) :
if key2[j] == 1 :
Extracted_Watermark[j] = Extracted_Watermark[j]
else :
Extracted_Watermark[j] = - (Extracted_Watermark[j])
return np.array(Extracted_Watermark)
def Merge_W1_W2 ():
Merged_watermark = []
w1 = list(np.ndarray.flatten(IDCTW1))
w2 = list(np.ndarray.flatten(IDCTW2))
for i in range (len(w2)):
bw1 = '{:0>4}'.format((bin(int(abs(w1[i]))))[2:])
bw2 = '{:0>4}'.format((bin(int(abs(w2[i]))))[2:])
P = bw1+bw2
pixel = (int(P,2))
Merged_watermark.append(pixel)
return Merged_watermark
Watermarked_Image = Image.open('Watermarked_img.png')
LL1, (LH1, HL1, HH1) = pywt.dwt2(Watermarked_Image, 'haar')
blocks_LH1 = split(LH1,4,4)
blocks_HL1 = split(HL1,4,4)
W1 = Watermark_Extraction(blocks_LH1, Key1,Key3)
W2 = Watermark_Extraction(blocks_HL1, Key2, Key4)
W1 = W1.reshape(64,64)
W2 = W2.reshape(64,64)
IDCTW1 = inverse_dct(W1)
IDCTW2 = inverse_dct(W2)
Merged = np.array(Merge_W1_W2())
Merged = Merged.reshape(64,64)
cv2.imwrite('Extracted_Watermark.png', Merged)
The cover Image of size 512*512:
The 64*64 watermark I used
The watermarked Image :
The extracted Watermark I get:
I calculated the similarity between the two watermarks using SSIM :
from skimage.metrics import structural_similarity
original_Watermark = cv2.imread('Copyright.png')
extracted_watermark = cv2.imread('Extracted_Watermark.png')
# Convert images to grayscale
original_watermark = cv2.cvtColor(original_Watermark, cv2.COLOR_BGR2GRAY)
extracted_Watermark = cv2.cvtColor(extracted_watermark, cv2.COLOR_BGR2GRAY)
# Compute SSIM between two images
(score, diff) = structural_similarity(original_Watermark, extracted_Watermark, full=True)
print("SSIM = ", score)
I didn't apply any modification on the watermarked image and The SSIM I got is 0.8445354561524052. however the SSIM of the extracted watermark should be 0.99 according to the paper.
I don't know what's wrong with my code and I have a deadline after two days so I really need help.
thanks in advance.

There are two issues:
In Merge_W1_W2 you are using int to convert from float to int but that introduces errors for numbers where the floating point representation is not exact (e.g. 14.99999999999997); this can be fixed by using round instead.
Saving cv2.imwrite('Watermarked_img.png', IDWT) is a lossy operation because it rounds the values in IDWT to the nearest integer; if you use Watermarked_Image = IDWT then you will get back the exact same watermark image.

Related

Quick pixel manipulation with Pillow and/or NumPy

I'm trying to improve the speed of my image manipulation as it's been too slow for actual use.
What I need to do is apply a complex transformation on the colour of every pixel on an image. The manipulation is basically apply a vector transform like T(r, g, b, a) => (r * x, g * x, b * y, a) or in layman's terms, it's a multiplication of Red and Green values by a constant, a different multiplication for Blue and keep Alpha. But I also need to manipulate it differently if the RGB colour falls under some specific colours, in those cases they must follow a dictionary/transformation table where RGB => newRGB again keeping alpha.
The algorithm would be:
for each pixel in image:
if pixel[r, g, b] in special:
return special[pixel[r, g, b]] + pixel[a]
else:
return T(pixel)
It's simple but speed has been sub-optimal. I believe there's some way using numpy vectors, but I could not find how.
Important details about the implementation:
I don't care about the original buffer/image (manipulation can be in place)
I can use wxPython, Pillow and NumPy
Order or dimension of the array is not important as long as the buffer keeps the length
The buffer is obtained from a wxPython Bitmap and special and (RG|B)_pal are transformation tables, the end result will become a wxPython Bitmap too. They're obtained like these:
# buffer
bitmap = wx.Bitmap # it's valid wxBitmap here, this is just to let you know it exists
buff = bytearray(bitmap.GetWidth() * bitmap.GetHeight() * 4)
bitmap.CopyToBuffer(buff, wx.BitmapBufferFormat_RGBA)
self.RG_mult= 0.75
self.B_mult = 0.83
self.RG_pal = []
self.B_pal = []
for i in range(0, 256):
self.RG_pal.append(int(i * self.RG_mult))
self.B_pal.append(int(i * self.B_mult))
self.special = {
# RGB: new_RGB
# Implementation specific for the fastest access
# with buffer keys are 24bit numbers, with PIL keys are tuples
}
Implementations I tried include direct buffer manipulation:
for x in range(0, bitmap.GetWidth() * bitmap.GetHeight()):
index = x * 4
r = buf[index]
g = buf[index + 1]
b = buf[index + 2]
rgb = buf[index:index + 3]
if rgb in self.special:
special = self.special[rgb]
buf[index] = special[0]
buf[index + 1] = special[1]
buf[index + 2] = special[2]
else:
buf[index] = self.RG_pal[r]
buf[index + 1] = self.RG_pal[g]
buf[index + 2] = self.B_pal[b]
Use Pillow with getdata():
pil = Image.frombuffer("RGBA", (bitmap.GetWidth(), bitmap.GetHeight()), buf)
pil_buf = []
for colour in pil.getdata():
colour_idx = colour[0:3]
if (colour_idx in self.special):
special = self.special[colour_idx]
pil_buf.append((
special[0],
special[1],
special[2],
colour[3],
))
else:
pil_buf.append((
self.RG_pal[colour[0]],
self.RG_pal[colour[1]],
self.B_pal[colour[2]],
colour[3],
))
pil.putdata(pil_buf)
buf = pil.tobytes()
Pillow with point() and getdata() (fastest I achieved, more than twice times faster than others)
pil = Image.frombuffer("RGBA", (bitmap.GetWidth(), bitmap.GetHeight()), buf)
r, g, b, a = pil.split()
r = r.point(lambda r: r * self.RG_mult)
g = g.point(lambda g: g * self.RG_mult)
b = b.point(lambda b: b * self.B_mult)
pil = Image.merge("RGBA", (r, g, b, a))
i = 0
for colour in pil.getdata():
colour_idx = colour[0:3]
if (colour_idx in self.special):
special = self.special[colour_idx]
pil.putpixel(
(i % bitmap.GetWidth(), i // bitmap.GetWidth()),
(
special[0],
special[1],
special[2],
colour[3],
)
)
i += 1
buf = pil.tobytes()
I also tried working with numpy.where but then I could not get it to work. With numpy.apply_along_axis it worked but the performance was terrible. Other tries with numpy I could not access the RGB together, only as separated bands.
Pure Numpy Version
This first optimization relies on the fact, that one probably has way less special colors than pixels. I use numpy to do all the inner loops. This works well with images of up to 1MP. If You have multiple images I'd recommend the parallel approach.
Let's define a test case:
import requests
from io import BytesIO
from PIL import Image
import numpy as np
# Load some image, so we have the same
response = requests.get("https://upload.wikimedia.org/wikipedia/commons/4/41/Rick_Astley_Dallas.jpg")
# Make areas of known color
img = Image.open(BytesIO(response.content)).rotate(10, expand=True).rotate(-10,expand=True, fillcolor=(255,255,255)).convert('RGBA')
print("height: %d, width: %d (%.2f MP)"%(img.height, img.width, img.width*img.height/10e6))
height: 5034, width: 5792 (2.92 MP)
Define our special colors
specials = {
(4,1,6):(255,255,255),
(0, 0, 0):(255, 0, 255),
(255, 255, 255):(0, 255, 0)
}
Algorithm
def transform_map(img, specials, R_factor, G_factor, B_factor):
# Your transform
def transform(x, a):
a *= x
return a.clip(0, 255).astype(np.uint8)
# Convert to array
img_array = np.asarray(img)
# Extract channels
R = img_array.T[0]
G = img_array.T[1]
B = img_array.T[2]
A = img_array.T[3]
# Find Special colors
# First, calculate a uniqe hash
color_hashes = (R + 2**8 * G + 2**16 * B)
# Find inidices of special colors
special_idxs = []
for k, v in specials.items():
key_arr = np.array(list(k))
val_arr = np.array(list(v))
spec_hash = key_arr[0] + 2**8 * key_arr[1] + 2**16 * key_arr[2]
special_idxs.append(
{
'mask': np.where(np.isin(color_hashes, spec_hash)),
'value': val_arr
}
)
# Apply transform to whole image
R = transform(R, R_factor)
G = transform(G, G_factor)
B = transform(B, B_factor)
# Replace values where special colors were found
for idx in special_idxs:
R[idx['mask']] = idx['value'][0]
G[idx['mask']] = idx['value'][1]
B[idx['mask']] = idx['value'][2]
return Image.fromarray(np.array([R,G,B,A]).T, mode='RGBA')
And finally some bench marks on a Intel Core i5-6300U # 2.40GHz
import time
times = []
for i in range(10):
t0 = time.time()
# Test
transform_map(img, specials, 1.2, .9, 1.2)
#
t1 = time.time()
times.append(t1-t0)
np.round(times, 2)
print('average run time: %.2f +/-%.2f'%(np.mean(times), np.std(times)))
average run time: 9.72 +/-0.91
EDIT Parallelization
With the same setup as above, we can get a 2x speed increase on large images. (Small ones are faster without numba)
from numba import njit, prange
from numba.core import types
from numba.typed import Dict
# Map dict of special colors or transform over array of pixel values
#njit(parallel=True, locals={'px_hash': types.uint32})
def check_and_transform(img_array, d, T):
#Save Shape for later
shape = img_array.shape
# Flatten image for 1-d iteration
img_array_flat = img_array.reshape(-1,3).copy()
N = img_array_flat.shape[0]
# Replace or map
for i in prange(N):
px_hash = np.uint32(0)
px_hash += img_array_flat[i,0]
px_hash += types.uint32(2**8) * img_array_flat[i,1]
px_hash += types.uint32(2**16) * img_array_flat[i,2]
try:
img_array_flat[i] = d[px_hash]
except Exception:
img_array_flat[i] = (img_array_flat[i] * T).astype(np.uint8)
# return image
return img_array_flat.reshape(shape)
# Wrapper for function above
def map_or_transform_jit(image: Image, specials: dict, T: np.ndarray):
# assemble numba typed dict
d = Dict.empty(
key_type=types.uint32,
value_type=types.uint8[:],
)
for k, v in specials.items():
k = types.uint32(k[0] + 2**8 * k[1] + 2**16 * k[2])
v = np.array(v, dtype=np.uint8)
d[k] = v
# get rgb channels
img_arr = np.array(img)
rgb = img_arr[:,:,:3].copy()
img_shape = img_arr.shape
# apply map
rgb = check_and_transform(rgb, d, T)
# set color channels
img_arr[:,:,:3] = rgb
return Image.fromarray(img_arr, mode='RGBA')
# Benchmark
import time
times = []
for i in range(10):
t0 = time.time()
# Test
test_img = map_or_transform_jit(img, specials, np.array([1, .5, .5]))
#
t1 = time.time()
times.append(t1-t0)
np.round(times, 2)
print('average run time: %.2f +/- %.2f'%(np.mean(times), np.std(times)))
test_img
average run time: 3.76 +/- 0.08

Storing pixel values of a greyscale video ,averaging them and then showing the resulting video

`n = 3
array = np.ones((n,n)) / (n*n)
n = array.shape[0] * array.shape1
while(True):
ret, frame = cap.read()
if ret is True:
print("newframe")
gframe = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
dst = cv2.copyMakeBorder(gframe, 1, 1, 1, 1, borderType, None, None)
blur = cv2.blur(dst,(3,3))
if k == 1 :
lastframe = gframe
curframe = gframe
nextframe = gframe
newFrame = gframe
k = 0
else :
lf = ndimage.convolve(lastframe, array, mode='constant', cval= 0.0)
cf = ndimage.convolve(curframe, array, mode='constant', cval= 0.0)
nf = ndimage.convolve(nextframe, array, mode='constant', cval= 0.0)
lastframe = curframe
curframe = nextframe
nextframe = gframe
b = np.zeros((3, 528, 720))
b[0] = lf
b[1] = cf
b[2] = nf
result = np.mean(b, axis=0)
cv2.imshow('frame',result)
cv2.imshow('frame2',gframe)
`enter image description here
I am trying to add all pixel values of a 3x3 pixel and then average them. I need to do that for every pixel and every frame and replace the primary pixel with the averaged one. However the way i am trying to do it makes it really slow and not really accurate.
This sounds like a convolution.
import numpy as np
from scipy import ndimage
a = np.random.random((5, 5))
a
[[0.14742615 0.83548453 0.67433445 0.59162829 0.21160044]
[0.1700598 0.89074466 0.84155171 0.65092969 0.3842437 ]
[0.22662423 0.2266929 0.47757456 0.34480112 0.06261333]
[0.89402116 0.00101947 0.90503461 0.93112109 0.44817247]
[0.21788789 0.3338606 0.07323461 0.28944439 0.91217591]]
Convolution operation with window size 3x3
n = 3
k = np.ones((n, n)) / (n * n)
n = k.shape[0] * k.shape[1]
b = ndimage.convolve(a, k, mode='constant', cval=0.0)
b
[[0.22707946 0.39551126 0.49829704 0.3726987 0.2042669 ]
[0.27744803 0.49894366 0.61486021 0.47103081 0.24953517]
[0.26768469 0.51481368 0.58549664 0.56067136 0.31354238]
[0.21112292 0.37288334 0.39808704 0.4937969 0.33203648]
[0.16075435 0.26945093 0.28152386 0.39546479 0.28676821]]
Now you just have to do it for the current frame, and the two prior frames.
-------- EDIT: For three frames -----------
For 3D you could write a convolution function like in this post, but its quite complex as it uses FFTs
If you just want to average across three frames, you could do:
f1 = np.random.random((5, 5)) # Frame1
f2 = np.random.random((5, 5)) # Frame2
f3 = np.random.random((5, 5)) # Frame3
n = 3
k = np.ones((n, n)) / (n * n)
n = k.shape[0] * k.shape[1]
b0 = ndimage.convolve(f1, k, mode='constant', cval=0.0)
b1 = ndimage.convolve(f2, k, mode='constant', cval=0.0)
b2 = ndimage.convolve(f3, k, mode='constant', cval=0.0)
# Create a 3D Matrix, with each fame placed along the first dimension
b = np.zeros((3, 5, 5))
b[0] = b0
b[1] = b1
b[2] = b2
# Take the average across the first dimension (across frames)
result = np.mean(b, axis=0)
There probably is a more elegant solution than this, but it gets the job done.
-------- EDIT: For Movies -----------
Based on all the questions in the comments I've decided to attempt to add some more code to help with implementation.
Firstly I'm starting out with these 7 consecutive stills from a movie:
I have not verified that the following code is bug proof or actually returns the correct result.
import cv2
import numpy as np
from scipy import ndimage
# this is a function to do previous code
def mean_frames(frames, kernel):
b = np.zeros(frames.shape)
for i in range(frames.shape[0]):
b[i] = ndimage.convolve(frames[i], k, mode='constant', cval=0.0)
b = np.mean(b, axis=0) / frames.shape[0]
return b
mean_N = 3 # frames to average
# read in 1 file to get dimenions
im = cv2.imread(f'{root}1.png', cv2.IMREAD_GRAYSCALE)
# setup numpy matrix that will hold mean_N frames at a time
frames = np.zeros((mean_N, im.shape[0], im.shape[1]))
avg_frames = [] # list to store our 3 averaged frames
count = 0 # counter to position frames in 1st dim of 3D matrix for avg
k = np.ones((3, 3)) / (3 * 3) # kernel for 2D convolution
for j in range(1, 7): # 7 images
file_name = root + str(j) + '.png'
im = cv2.imread(file_name, cv2.IMREAD_GRAYSCALE)
frames[count, ::] = im # store in 3D matrix
# if loaded more than min req. for avg, we average
if j >= mean_N:
# average and store to list
avg_frames.append(mean_frames(frames, k))
# if the count is mean_N - 1, that means we need to replace
# the 0th matrix in frames so that we are doing a 'moving avg'
if count == (mean_N - 1):
count = 0
else:
count += 1 #increase position in 0th dim for 3D matrix storage
# ouput averaged frames
for i, f in enumerate(avg_frames):
cv2.imwrite(f'{path}output{i}.jpg', f)
Then looking at the folder, there are 5 files (as expected if we did a moving average of 3 frames over 7 stills:
looking at before and after:
Image 3:
and averaged image #1:
The image not only is in gray scale (as expected) but seems quite dark. Perhaps some brightening would make things look better/more apparent.
Your question is very interesting.
I saw that you use many loops for activating this function. Let's process analysis.
Just for a frame.
You want to add all pixel values of a 3x3 pixel neighborhood. So I think Image interpolation is very suitable for this case. In OpenCV, we use resize() to interpolate pixel for image. So the INTER_NEAREST is best for this situation.
This is the formula for INTER_NEAREST.
Now you get the pixel added image.
Then you want to do that for every pixel and every frame and replace the primary pixel with the average one. And I think the Average filtering is a better solution.
The filter will work every pixel.
The code of a temporary example.
Interpolation
img = cv2.resize(img, (img.size[0]*3, img.size[1]*3), cv2.INTER_NEAREST)
Filter
img = cv2.blur(img, (3, 3))

Concatenating multiple images into one

This function receives a list of numpy arrays that consist of cropped parts of an image. The crops are all the same size, except for the right-most and bottom-most images which might be of smaller size.
predictions[2] would return the 3rd sub-image that was cropped from the original image. Each crop is a numpy array. There are WxH crops, enumerated from left to right, top to bottom (so if there are 4 sub-images constituting the width, the 5th image in predictions would be the first sub-image on the left from the 2nd row of sub-images).
crops contains the necessary information to find number of horizontal and vertical images that will constitute the reconstructed images. crops[2][3] will contain the 3rd from the top, 4th from the left image cropped.
The images contained by crops are of smaller dimension than the ones in predictions (I am basically making a model that increases the resolution of images). The reconstructed image if from the images in predictions, arranged in the same order as the ones in crops.
def reconstruct(predictions, crops):
if len(crops) != 0:
print("use crops")
# TODO: properly extract the size of the full image
width_length = 0
height_length = 0
full_image = np.empty(shape=(height_length, width_length))
print(full_image.shape)
# TODO: properly merge the crops back into a single image
for height in range(len(predictions[0])):
for width in range(len(predictions)):
# concatenate here
print(height, width)
return full_image
I was going to use numpy.concatenate, but according to other answers I've seen on SO it wouldn't be an efficient way of doing it (apparently numpy will just recreate a new variable in memory, copy the old one, and add the new data, etc.). So now I'm left wondering how to properly merge my multiple images into a single image. The current idea I was going for was to create a python list of the proper shape and progressively fill it with each numpy array's data, but even that I'm not sure if it's the proper idea.
Here is more or less the kind of bunch of images I'm trying to concatenate into a single image:
Here is the expected result:
And to help you out with understanding what more might be available to you, here is some more code:
def predict(args):
model = load_model(save_dir + '/' + args.model)
image = skimage.io.imread(tests_path + args.image)
predictions = []
images = []
crops = seq_crop(image) # crops into multiple sub-parts the image based on 'input_' constants
for i in range(len(crops)): # amount of vertical crops
for j in range(len(crops[0])): # amount of horizontal crops
current_image = crops[i][j]
images.append(current_image)
# Hack because GPU can only handle one image at a time
input_img = (np.expand_dims(images[p], 0)) # Add the image to a batch where it's the only member
predictions.append(model.predict(input_img)[0]) # returns a list of lists, one for each image in the batch
return predictions, image, crops
# adapted from: https://stackoverflow.com/a/52463034/9768291
def seq_crop(img):
"""
To crop the whole image in a list of sub-images of the same size.
Size comes from "input_" variables in the 'constants' (Evaluation).
Padding with 0 the Bottom and Right image.
:param img: input image
:return: list of sub-images with defined size
"""
width_shape = ceildiv(img.shape[1], input_width)
height_shape = ceildiv(img.shape[0], input_height)
sub_images = [] # will contain all the cropped sub-parts of the image
for j in range(height_shape):
horizontal = []
for i in range(width_shape):
horizontal.append(crop_precise(img, i*input_width, j*input_height, input_width, input_height))
sub_images.append(horizontal)
return sub_images
def crop_precise(img, coord_x, coord_y, width_length, height_length):
"""
To crop a precise portion of an image.
When trying to crop outside of the boundaries, the input to padded with zeros.
:param img: image to crop
:param coord_x: width coordinate (top left point)
:param coord_y: height coordinate (top left point)
:param width_length: width of the cropped portion starting from coord_x
:param height_length: height of the cropped portion starting from coord_y
:return: the cropped part of the image
"""
tmp_img = img[coord_y:coord_y + height_length, coord_x:coord_x + width_length]
return float_im(tmp_img) # From [0,255] to [0.,1.]
# from https://stackoverflow.com/a/17511341/9768291
def ceildiv(a, b):
"""
To get the ceiling of a division
:param a:
:param b:
:return:
"""
return -(-a // b)
if __name__ == '__main__':
preds, original, crops = predict(args) # returns the predictions along with the original
# TODO: reconstruct image
enhanced = reconstruct(preds, crops) # reconstructs the enhanced image from predictions
EDIT:
The answer worked. Here is the version I've used:
# adapted from https://stackoverflow.com/a/52733370/9768291
def reconstruct(predictions, crops):
# unflatten predictions
def nest(data, template):
data = iter(data)
return [[next(data) for _ in row] for row in template]
predictions = nest(predictions, crops)
H = np.cumsum([x[0].shape[0] for x in predictions])
W = np.cumsum([x.shape[1] for x in predictions[0]])
D = predictions[0][0]
recon = np.empty((H[-1], W[-1], D.shape[2]), D.dtype)
for rd, rs in zip(np.split(recon, H[:-1], 0), predictions):
for d, s in zip(np.split(rd, W[:-1], 1), rs):
d[...] = s
return recon
The most convenient is probably np.block
import numpy as np
from scipy import misc
import Image
# get example picture
data = misc.face()
# chop it up
I, J = map(np.arange, (200, 200), data.shape[:2], (200, 200))
chops = [np.split(row, J, axis=1) for row in np.split(data, I, axis=0)]
# do something with the bits
predictions = [chop-(i+j)*(chop>>3) for j, row in enumerate(chops) for i, chop in enumerate(row)]
# unflatten predictions
def nest(data, template):
data = iter(data)
return [[next(data) for _ in row] for row in template]
pred_lol = nest(predictions, chops)
# almost builtin reconstruction
def np_block_2D(chops):
return np.block([[[x] for x in row] for row in chops])
recon = np_block_2D(pred_lol)
Image.fromarray(recon).save('demo.png')
Reconstructed manipulated image:
But we can do faster than that by avoiding intermediary arrays. Instead, we copy into a preallocated array:
def speed_block_2D(chops):
H = np.cumsum([x[0].shape[0] for x in chops])
W = np.cumsum([x.shape[1] for x in chops[0]])
D = chops[0][0]
recon = np.empty((H[-1], W[-1], D.shape[2]), D.dtype)
for rd, rs in zip(np.split(recon, H[:-1], 0), chops):
for d, s in zip(np.split(rd, W[:-1], 1), rs):
d[...] = s
return recon
Timings, also including a generalized ND-ready variant of each method:
numpy 2D: 0.991 ms
prealloc 2D: 0.389 ms
numpy general: 1.021 ms
prealloc general: 0.448 ms
Code for general case and timings:
def np_block(chops):
d = 0
tl = chops
while isinstance(tl, list):
tl = tl[0]
d += 1
if d < tl.ndim:
def adjust_depth(L):
if isinstance(L, list):
return [adjust_depth(l) for l in L]
else:
ret = L
for j in range(d, tl.ndim):
ret = [ret]
return ret
chops = adjust_depth(chops)
return np.block(chops)
def speed_block(chops):
def line(src, i):
while isinstance(src, list):
src = src[0]
return src.shape[i]
def hyper(src, i):
src = iter(src)
fst = next(src)
if isinstance(fst, list):
res, dtype, szs = hyper(fst, i+1)
szs.append([res[i], *(line(s, i) for s in src)])
res[i] = sum(szs[-1])
return res, dtype, szs
res = np.array(fst.shape)
szs = [res[i], *(s.shape[i] for s in src)]
res[i] = sum(szs)
return res, fst.dtype, [szs]
shape, dtype, szs = hyper(chops, 0)
recon = np.empty(shape, dtype)
def cpchp(dst, src, i, szs=None):
szs = np.array(hyper(src, i)[2]) if szs is None else szs
dst = np.split(dst, np.cumsum(szs[-1][:-1]), i)
if isinstance(src[0], list):
szs = szs[:-1]
for ds, sr in zip(dst, src):
cpchp(ds, sr, i+1, szs)
szs = None
else:
for ds, sr in zip(dst, src):
ds[...] = sr
cpchp(recon, chops, 0, np.array(szs))
return recon
from timeit import timeit
T = (timeit(lambda: speed_block(pred_lol), number=1000),
timeit(lambda: np_block(pred_lol), number=1000),
timeit(lambda: speed_block_2D(pred_lol), number=1000),
timeit(lambda: np_block_2D(pred_lol), number=1000))
assert (np.all(speed_block(pred_lol)==np_block(pred_lol)) and
np.all(speed_block_2D(pred_lol)==np_block(pred_lol)) and
np.all(speed_block(pred_lol)==np_block_2D(pred_lol)))
print(f"""
numpy 2D: {T[3]:10.3f} ms
prealloc 2D: {T[2]:10.3f} ms
numpy general: {T[1]:10.3f} ms
prealloc general: {T[0]:10.3f} ms
""")

Over-percentage while creating PICKLE (Python)

To the following code happens a strange thing when executed: the percentage which shows completition is going over 100%. The code is running for about 45 minutes by now..
This is the code I adapted to Python 3 after the errors original one gave me.
'''
Read and pre-process SD19 characters text file.
Blog post : http://seeb0h.github.io/howto/preprocess-sd19-dataset-for-digits-learning/
Characters in txt file are in 128x128 images with much padded zeros.
It may be suitable for learning to have smaller, deskewed, trimmed, squared ones
Following preprocessing is applied to the dataset:
- Read glyph (see read_glyph())
- Moment-based image deskew (see deskew())
- Trim zeros rows and columns (see trim_padding())
- Resize image while keeping aspect ratio (see resize_with_constant_ratio())
- Pad zeros in order to get a square image (see pad_digit())
Extends original code from http://asciirain.com/wordpress/2013/04/08/exploring-sd19-glyph-recognition-with-randomforests/
Usage:
preprocess_sd19_text.py
'''
#
import os
import re
import sys
import pickle
import cv2
import numpy as np
import math
def read_glyph(_line):
"""Extract digit from the text file
Parameters
----------
_line : string
current line in SD19 text file
Returns
-------
digit : np.array
2D digit 128x128
label : int
the label
"""
match = re.search("^(\S+) (\d+)", _line)
label = match.group(1)
vector = list(match.group(2))
vector = [int(x) for x in vector]
label = ord(label)
label = str(symbol_map[label]) #changed from int to str
digit = np.array(vector, 'float32')
digit = (digit*-1.+1.).reshape(128, 128)
return digit, label
def deskew(img):
"""Deskew digit
Parameters
----------
img : np.array
2D digit array
Returns
-------
dst : Deskewed digit
"""
m = cv2.moments(img)
if abs(m['mu02']) < 1e-2:
return img.copy()
skew = m['mu11']/m['mu02']
rot_mat = np.float32([[1, skew, -0.5*max(img.shape[0], img.shape[1])*skew], [0, 1, 0]])
img = cv2.warpAffine(img, rot_mat, (img.shape[0], img.shape[1]), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR)
return img
def resize_with_constant_ratio(img, char_dim):
"""Resize image while keeping aspect ratio. Max dim is char_dim
pad_dim is applied in order to have derivative friendly image
Parameters
----------
img : np.array
2D digit array
char_dim : int
dst dim
Returns
-------
dst : resized digit
"""
roi_h = img.shape[0]
roi_w = img.shape[1]
max_dim = max(roi_w, roi_h)
pad_dim = 2
scale = float(char_dim-pad_dim) / max_dim
if roi_w >= roi_h:
new_w = int(char_dim-pad_dim)
new_h = int(roi_h * scale)
else:
new_w = int(roi_w * scale)
new_h = int(char_dim-pad_dim)
dst = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
return dst
def trim_padding(img):
"""Trim zeros rows and columns
Parameters
----------
img : np.array
2D digit array
Returns
-------
dst : trimmed digit
"""
mask_row = np.all(np.equal(img, 0), axis=1)
dst = img[~mask_row]
mask_col = np.all(np.equal(dst, 0), axis=0)
dst = dst[:, ~mask_col]
return dst
def pad_digit(img, char_dim):
"""Pad zeros in order to get a square char_dimxchar_dim image
Parameters
----------
img : np.array
2D digit array
char_dim : int
image dim
Returns
-------
dst : padded digit
"""
pad_h = char_dim-img.shape[0]
pad_w = char_dim-img.shape[1]
pad_h_b = math.floor(pad_h/2)
pad_h_t = pad_h - pad_h_b
pad_w_r = math.floor(pad_w/2)
pad_w_l = pad_w - pad_w_r
dst = np.hstack(( img, np.zeros((img.shape[0], pad_w_r))))
dst = np.hstack(( np.zeros((dst.shape[0], pad_w_l)), dst))
dst = np.vstack(( dst, np.zeros((pad_h_b, dst.shape[1]))))
dst = np.vstack(( np.zeros((pad_h_t, dst.shape[1])), dst))
return dst
def print_overwrite(text):
"""Print with overwrite (for progression counter)
Parameters
----------
text : string
text to display
"""
delete = "\b" * (len (text)+1)
print ("{0}{1}".format(delete, text)),
if __name__ == '__main__':
print (__doc__)
sd19_filename = "sd19-binary_digits.txt"
data = open(sd19_filename, "r")
dataset = []
symbol_map = dict([(x, chr(x)) for x in list(range(48, 58)) + list(range(65, 91)) + list(range(97, 123))]) #added list() to every range
current_dir = os.curdir
num_records = 0
num_lines = 402953
char_dim=28
pickle_name = "SD19_" + str(char_dim) + "x" + str(char_dim) + "_"
for line in data:
num_records += 1
if num_records%20000 == 0:
with open(os.path.join(current_dir, pickle_name +\
str(num_records) + ".pickle"), 'wb') as f:
pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
print_overwrite("num_records : {}/{} - {:5.2f}%"\
.format(num_records, num_lines, num_records*1./num_lines*100))
digit, label = read_glyph(line)
digit_deskewed = deskew(digit)
digit_trimmed = trim_padding(digit_deskewed)
digit_resized = resize_with_constant_ratio(digit_trimmed, char_dim)
digit_padded = pad_digit(digit_resized, char_dim)
item = []
item.append((digit_padded*255).astype('uint8'))
item.append(label)
dataset.append(item)
with open(os.path.join(current_dir, pickle_name +\
str(num_lines) + ".pickle"), 'wb') as f:
pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
It is used to create PICKLE from a .txt file containing binary images. For more see here.
And my "error"...
By now it is at 135% and last PICKLE file is around 400mb...
Why it happens ? Also, it continue to create files (should have stopped at 400.000 or little more).

If statement to cycle through individual elements in array in Python

In my code, I am trying to import a grayscale image (2D array) and then solve for the optical density (OD) based off an empirical formula I came up with. The optical density has a relationship with the grayscale value where OD = 0.51*((f-22.08)/(176.09-f))**(1./-1.519) where f is the grayscale value of each element in the array. Then, I converted it into an RGB image.
My problem is I am trying to run each individual element of the image array into an if statement. It does not enter the statement though. What I want to do is increase the intensity of each individual element or pixel value in R, G, and B based on what condition is met with the optical density. Say if it has an OD value that falls between b and c, it adds [128,0,0] to each element that satisfies that criteria.
t = Image.open("IMG_1.jpg").convert('L') #grayscale image
f = array(t) #Convert test image into an array
OD = 0.51*((f-22.08)/(176.09-f))**(1./-1.519) #Empirical Rodbard formula
OD[np.isnan(OD)] = 0
def to_rgb5(im):
OD.resize((OD.shape[0], OD.shape[1], 1))
return np.repeat(OD.astype(np.uint8), 3, 2)
cmap = plt.get_cmap('jet')
rgba_img = cmap(OD)
rgb_img = np.delete(rgba_img, 3, 2)
a = 0.08
b = 0.11
c = 0.15
if np.all(OD < a):
background_noise = rgb_img
if np.all(OD < b):
small = rgb_img + [128, 0, 0]
elif np.all(OD >= c):
large = rgb_img + [0, 0, 128]
Red = f + small
Green = f
Blue = f + large
I was able to get it work using the np.where suggested by #Stuart
t = Image.open("IMG_1.jpg").convert('L') #grayscale image
f = array(t) #Convert test image into an array
OD = 0.51*((f-22.08)/(176.09-f))**(1./-1.519) #Empirical Rodbard formula
OD[np.isnan(OD)] = 0
thB = 0.02
ththin = 0.11
ththick = 0.15
GSb = 162
GSthin = 150
GSthick = 145
if np.where(OD < ththin):
thresholded = threshold(f, GSthin, GSb)
def to_rgb1(thresholded):
thresholded.resize((thresholded.shape[0], thresholded.shape[1], 1))
return np.repeat(thresholded.astype(np.uint8), 3, 2)
cmap = plt.get_cmap('jet')
rgba_img1 = cmap(thresholded)
rgb_img1 = np.delete(rgba_img1, 3, 2)
view = rgb_img1[:, :, 2]
view[view < 0.1] += 128
thin = rgb_img1
if np.where(OD > ththick):
thresholded2 = threshold(f, threshmax = GSthick)
def to_rgb2(thresholded2):
thresholded2.resize((thresholded2.shape[0], thresholded2.shape[1], 1))
return np.repeat(thresholded2.astype(np.uint8), 3, 2)
cmap = plt.get_cmap('jet')
rgba_img2 = cmap(thresholded2)
rgb_img2 = np.delete(rgba_img2, 3, 2)
view2 = rgb_img2[:, :, 0]
view2[view2 > 0] += 128
thick = rgb_img2

Categories

Resources