Concatenating multiple images into one - python

This function receives a list of numpy arrays that consist of cropped parts of an image. The crops are all the same size, except for the right-most and bottom-most images which might be of smaller size.
predictions[2] would return the 3rd sub-image that was cropped from the original image. Each crop is a numpy array. There are WxH crops, enumerated from left to right, top to bottom (so if there are 4 sub-images constituting the width, the 5th image in predictions would be the first sub-image on the left from the 2nd row of sub-images).
crops contains the necessary information to find number of horizontal and vertical images that will constitute the reconstructed images. crops[2][3] will contain the 3rd from the top, 4th from the left image cropped.
The images contained by crops are of smaller dimension than the ones in predictions (I am basically making a model that increases the resolution of images). The reconstructed image if from the images in predictions, arranged in the same order as the ones in crops.
def reconstruct(predictions, crops):
if len(crops) != 0:
print("use crops")
# TODO: properly extract the size of the full image
width_length = 0
height_length = 0
full_image = np.empty(shape=(height_length, width_length))
print(full_image.shape)
# TODO: properly merge the crops back into a single image
for height in range(len(predictions[0])):
for width in range(len(predictions)):
# concatenate here
print(height, width)
return full_image
I was going to use numpy.concatenate, but according to other answers I've seen on SO it wouldn't be an efficient way of doing it (apparently numpy will just recreate a new variable in memory, copy the old one, and add the new data, etc.). So now I'm left wondering how to properly merge my multiple images into a single image. The current idea I was going for was to create a python list of the proper shape and progressively fill it with each numpy array's data, but even that I'm not sure if it's the proper idea.
Here is more or less the kind of bunch of images I'm trying to concatenate into a single image:
Here is the expected result:
And to help you out with understanding what more might be available to you, here is some more code:
def predict(args):
model = load_model(save_dir + '/' + args.model)
image = skimage.io.imread(tests_path + args.image)
predictions = []
images = []
crops = seq_crop(image) # crops into multiple sub-parts the image based on 'input_' constants
for i in range(len(crops)): # amount of vertical crops
for j in range(len(crops[0])): # amount of horizontal crops
current_image = crops[i][j]
images.append(current_image)
# Hack because GPU can only handle one image at a time
input_img = (np.expand_dims(images[p], 0)) # Add the image to a batch where it's the only member
predictions.append(model.predict(input_img)[0]) # returns a list of lists, one for each image in the batch
return predictions, image, crops
# adapted from: https://stackoverflow.com/a/52463034/9768291
def seq_crop(img):
"""
To crop the whole image in a list of sub-images of the same size.
Size comes from "input_" variables in the 'constants' (Evaluation).
Padding with 0 the Bottom and Right image.
:param img: input image
:return: list of sub-images with defined size
"""
width_shape = ceildiv(img.shape[1], input_width)
height_shape = ceildiv(img.shape[0], input_height)
sub_images = [] # will contain all the cropped sub-parts of the image
for j in range(height_shape):
horizontal = []
for i in range(width_shape):
horizontal.append(crop_precise(img, i*input_width, j*input_height, input_width, input_height))
sub_images.append(horizontal)
return sub_images
def crop_precise(img, coord_x, coord_y, width_length, height_length):
"""
To crop a precise portion of an image.
When trying to crop outside of the boundaries, the input to padded with zeros.
:param img: image to crop
:param coord_x: width coordinate (top left point)
:param coord_y: height coordinate (top left point)
:param width_length: width of the cropped portion starting from coord_x
:param height_length: height of the cropped portion starting from coord_y
:return: the cropped part of the image
"""
tmp_img = img[coord_y:coord_y + height_length, coord_x:coord_x + width_length]
return float_im(tmp_img) # From [0,255] to [0.,1.]
# from https://stackoverflow.com/a/17511341/9768291
def ceildiv(a, b):
"""
To get the ceiling of a division
:param a:
:param b:
:return:
"""
return -(-a // b)
if __name__ == '__main__':
preds, original, crops = predict(args) # returns the predictions along with the original
# TODO: reconstruct image
enhanced = reconstruct(preds, crops) # reconstructs the enhanced image from predictions
EDIT:
The answer worked. Here is the version I've used:
# adapted from https://stackoverflow.com/a/52733370/9768291
def reconstruct(predictions, crops):
# unflatten predictions
def nest(data, template):
data = iter(data)
return [[next(data) for _ in row] for row in template]
predictions = nest(predictions, crops)
H = np.cumsum([x[0].shape[0] for x in predictions])
W = np.cumsum([x.shape[1] for x in predictions[0]])
D = predictions[0][0]
recon = np.empty((H[-1], W[-1], D.shape[2]), D.dtype)
for rd, rs in zip(np.split(recon, H[:-1], 0), predictions):
for d, s in zip(np.split(rd, W[:-1], 1), rs):
d[...] = s
return recon

The most convenient is probably np.block
import numpy as np
from scipy import misc
import Image
# get example picture
data = misc.face()
# chop it up
I, J = map(np.arange, (200, 200), data.shape[:2], (200, 200))
chops = [np.split(row, J, axis=1) for row in np.split(data, I, axis=0)]
# do something with the bits
predictions = [chop-(i+j)*(chop>>3) for j, row in enumerate(chops) for i, chop in enumerate(row)]
# unflatten predictions
def nest(data, template):
data = iter(data)
return [[next(data) for _ in row] for row in template]
pred_lol = nest(predictions, chops)
# almost builtin reconstruction
def np_block_2D(chops):
return np.block([[[x] for x in row] for row in chops])
recon = np_block_2D(pred_lol)
Image.fromarray(recon).save('demo.png')
Reconstructed manipulated image:
But we can do faster than that by avoiding intermediary arrays. Instead, we copy into a preallocated array:
def speed_block_2D(chops):
H = np.cumsum([x[0].shape[0] for x in chops])
W = np.cumsum([x.shape[1] for x in chops[0]])
D = chops[0][0]
recon = np.empty((H[-1], W[-1], D.shape[2]), D.dtype)
for rd, rs in zip(np.split(recon, H[:-1], 0), chops):
for d, s in zip(np.split(rd, W[:-1], 1), rs):
d[...] = s
return recon
Timings, also including a generalized ND-ready variant of each method:
numpy 2D: 0.991 ms
prealloc 2D: 0.389 ms
numpy general: 1.021 ms
prealloc general: 0.448 ms
Code for general case and timings:
def np_block(chops):
d = 0
tl = chops
while isinstance(tl, list):
tl = tl[0]
d += 1
if d < tl.ndim:
def adjust_depth(L):
if isinstance(L, list):
return [adjust_depth(l) for l in L]
else:
ret = L
for j in range(d, tl.ndim):
ret = [ret]
return ret
chops = adjust_depth(chops)
return np.block(chops)
def speed_block(chops):
def line(src, i):
while isinstance(src, list):
src = src[0]
return src.shape[i]
def hyper(src, i):
src = iter(src)
fst = next(src)
if isinstance(fst, list):
res, dtype, szs = hyper(fst, i+1)
szs.append([res[i], *(line(s, i) for s in src)])
res[i] = sum(szs[-1])
return res, dtype, szs
res = np.array(fst.shape)
szs = [res[i], *(s.shape[i] for s in src)]
res[i] = sum(szs)
return res, fst.dtype, [szs]
shape, dtype, szs = hyper(chops, 0)
recon = np.empty(shape, dtype)
def cpchp(dst, src, i, szs=None):
szs = np.array(hyper(src, i)[2]) if szs is None else szs
dst = np.split(dst, np.cumsum(szs[-1][:-1]), i)
if isinstance(src[0], list):
szs = szs[:-1]
for ds, sr in zip(dst, src):
cpchp(ds, sr, i+1, szs)
szs = None
else:
for ds, sr in zip(dst, src):
ds[...] = sr
cpchp(recon, chops, 0, np.array(szs))
return recon
from timeit import timeit
T = (timeit(lambda: speed_block(pred_lol), number=1000),
timeit(lambda: np_block(pred_lol), number=1000),
timeit(lambda: speed_block_2D(pred_lol), number=1000),
timeit(lambda: np_block_2D(pred_lol), number=1000))
assert (np.all(speed_block(pred_lol)==np_block(pred_lol)) and
np.all(speed_block_2D(pred_lol)==np_block(pred_lol)) and
np.all(speed_block(pred_lol)==np_block_2D(pred_lol)))
print(f"""
numpy 2D: {T[3]:10.3f} ms
prealloc 2D: {T[2]:10.3f} ms
numpy general: {T[1]:10.3f} ms
prealloc general: {T[0]:10.3f} ms
""")

Related

Why the extracted watermark is not the same as the embedded one?

I'm trying to write the code of this paper paper for a university project. the idea is to insert an invisible watermark into a grayscale image, which can be extracted later to verify the image ownership.
This is the code I wrote for the watermark embedding process :
import pywt
import numpy as np
import cv2
from PIL import Image
from math import sqrt, log10
from scipy.fftpack import dct, idct
def Get_MSB_LSB_Watermark () : #Function that separates the watermark into MSB and LSB images
MSBs = []
LSBs = []
for i in range (len(Watermark)) :
binary = '{:0>8}'.format(str(bin(Watermark[i]))[2:])
MSB = (binary[0:4])
LSB = (binary[4:])
MSB = int(MSB, 2)
LSB = int(LSB,2)
MSBs.append(MSB)
LSBs.append(LSB)
MSBs = np.array(MSBs)
LSBs = np.array(LSBs)
return MSBs.reshape(64,64), LSBs.reshape(64,64)
def split(array, nrows, ncols): #Split array into blocks of size nrows* ncols
r, h = array.shape
return (array.reshape(h//nrows, nrows, -1, ncols)
.swapaxes(1, 2)
.reshape(-1, nrows, ncols))
def unblockshaped(arr, h, w): #the inverse of the split function
n, nrows, ncols = arr.shape
return (arr.reshape(h//nrows, -1, nrows, ncols)
.swapaxes(1,2)
.reshape(h, w))
def ISVD (U,S,V): #the inverse of singular value decomposition
s = np.zeros(np.shape(U))
for i in range(4):
s[i, i] = S[i]
recon_image = U # s # V
return recon_image
def Watermark_Embedding (blocks, watermark) :
Watermarked_blocks = []
k1 = []
k2 = []
#convert the watermark to a list
w = list(np.ndarray.flatten(watermark))
for i in range (len(blocks)) :
B = blocks[i]
#Aplly singular value decoposition to the block
U, s, V = np.linalg.svd(B)
#Modify the singular values of the block
P = s[1] - s[2]
delta = abs(w[i]) - P
s[1] = s[1] + delta
if s[0] >= s[1] :
k1.append(1)
else :
k1.append(-1)
#the inverse of SVD after watermark embedding
recunstructed_B = ISVD(U, s, V)
Watermarked_blocks.append(recunstructed_B)
for j in range(len(w)):
if w[j] >= 0:
k2.append(1)
else:
k2.append(-1)
return k1,k2, np.array(Watermarked_blocks)
def apply_dct(image_array):
size = image_array[0].__len__()
all_subdct = np.empty((size, size))
for i in range (0, size, 4):
for j in range (0, size, 4):
subpixels = image_array[i:i+4, j:j+4]
subdct = dct(dct(subpixels.T, norm="ortho").T, norm="ortho")
all_subdct[i:i+4, j:j+4] = subdct
return all_subdct
def inverse_dct(all_subdct):
size = all_subdct[0].__len__()
all_subidct = np.empty((size, size))
for i in range (0, size, 4):
for j in range (0, size, 4):
subidct = idct(idct(all_subdct[i:i+4, j:j+4].T, norm="ortho").T, norm="ortho")
all_subidct[i:i+4, j:j+4] = subidct
return all_subidct
#read watermark
Watermark = Image.open('Copyright.png').convert('L')
Watermark = list(Watermark.getdata())
#Separate the watermark into LSB and MSB images
Watermark1, Watermark2 = Get_MSB_LSB_Watermark()
#Apply descrete cosine Transform on the two generated images
DCT_Watermark1 = apply_dct(Watermark1)
DCT_Watermark2 = apply_dct(Watermark2)
#read cover Image
Cover_Image = Image.open('10.png').convert('L')
#Apply 1 level descrete wavelet transform
LL1, (LH1, HL1, HH1) = pywt.dwt2(Cover_Image, 'haar')
#Split the LH1 and HL1 subbands into blocks of size 4*4
blocks_LH1 = split(LH1,4,4)
blocks_HL1 = split(HL1,4,4)
#Watermark Embedding in LH1 and HL1 and Keys generation
Key1, Key3, WatermarkedblocksLH1 = Watermark_Embedding(blocks_LH1,DCT_Watermark1)
Key2 ,Key4, WatermarkedblocksHL1 = Watermark_Embedding(blocks_HL1,DCT_Watermark2)
#Merge the watermzrked Blocks
reconstructed_LH1 = unblockshaped(WatermarkedblocksLH1, 256,256)
reconstructed_HL1 = unblockshaped(WatermarkedblocksHL1, 256,256)
#Apply the inverse of descrete wavelet transform to get the watermarked image
IDWT = pywt.idwt2((LL1, (reconstructed_LH1, reconstructed_HL1, HH1)), 'haar')
cv2.imwrite('Watermarked_img.png', IDWT)
This is the code I wrote for the Extraction process :
import pywt
from scipy import fftpack
import numpy as np
import cv2
from PIL import Image
import scipy
from math import sqrt, log10
from Watermark_Embedding import *
def Watermark_Extraction(blocks,key1, key2) :
Extracted_Watermark = []
for i in range(len(blocks)):
B = blocks[i]
#apply SVD on the Block
U, s, V = np.linalg.svd(B)
if key1[i] == 1 :
P = (s[1] - s[2])
Extracted_Watermark.append(P)
else :
P = (s[0] - s[2])
Extracted_Watermark.append(P)
for j in range(len(Extracted_Watermark)) :
if key2[j] == 1 :
Extracted_Watermark[j] = Extracted_Watermark[j]
else :
Extracted_Watermark[j] = - (Extracted_Watermark[j])
return np.array(Extracted_Watermark)
def Merge_W1_W2 ():
Merged_watermark = []
w1 = list(np.ndarray.flatten(IDCTW1))
w2 = list(np.ndarray.flatten(IDCTW2))
for i in range (len(w2)):
bw1 = '{:0>4}'.format((bin(int(abs(w1[i]))))[2:])
bw2 = '{:0>4}'.format((bin(int(abs(w2[i]))))[2:])
P = bw1+bw2
pixel = (int(P,2))
Merged_watermark.append(pixel)
return Merged_watermark
Watermarked_Image = Image.open('Watermarked_img.png')
LL1, (LH1, HL1, HH1) = pywt.dwt2(Watermarked_Image, 'haar')
blocks_LH1 = split(LH1,4,4)
blocks_HL1 = split(HL1,4,4)
W1 = Watermark_Extraction(blocks_LH1, Key1,Key3)
W2 = Watermark_Extraction(blocks_HL1, Key2, Key4)
W1 = W1.reshape(64,64)
W2 = W2.reshape(64,64)
IDCTW1 = inverse_dct(W1)
IDCTW2 = inverse_dct(W2)
Merged = np.array(Merge_W1_W2())
Merged = Merged.reshape(64,64)
cv2.imwrite('Extracted_Watermark.png', Merged)
The cover Image of size 512*512:
The 64*64 watermark I used
The watermarked Image :
The extracted Watermark I get:
I calculated the similarity between the two watermarks using SSIM :
from skimage.metrics import structural_similarity
original_Watermark = cv2.imread('Copyright.png')
extracted_watermark = cv2.imread('Extracted_Watermark.png')
# Convert images to grayscale
original_watermark = cv2.cvtColor(original_Watermark, cv2.COLOR_BGR2GRAY)
extracted_Watermark = cv2.cvtColor(extracted_watermark, cv2.COLOR_BGR2GRAY)
# Compute SSIM between two images
(score, diff) = structural_similarity(original_Watermark, extracted_Watermark, full=True)
print("SSIM = ", score)
I didn't apply any modification on the watermarked image and The SSIM I got is 0.8445354561524052. however the SSIM of the extracted watermark should be 0.99 according to the paper.
I don't know what's wrong with my code and I have a deadline after two days so I really need help.
thanks in advance.
There are two issues:
In Merge_W1_W2 you are using int to convert from float to int but that introduces errors for numbers where the floating point representation is not exact (e.g. 14.99999999999997); this can be fixed by using round instead.
Saving cv2.imwrite('Watermarked_img.png', IDWT) is a lossy operation because it rounds the values in IDWT to the nearest integer; if you use Watermarked_Image = IDWT then you will get back the exact same watermark image.

Pytorch Data Generator for extracting 2D images from many 3D cube

I'm struggling in creating a data generator in PyTorch to extract 2D images from many 3D cubes saved in .dat format
There is a total of 200 3D cubes each having a 128*128*128 shape. Now I want to extract 2D images from all of these cubes along length and breadth.
For example, a is a cube having size 128*128*128
So I want to extract all 2D images along length i.e., [:, i, :] which will get me 128 2D images along the length, and similarly i want to extract along width i.e., [:, :, i], which will give me 128 2D images along the width. So therefore i get a total of 256 2D images from 1 3D cube, and i want to repeat this whole process for all 200 cubes, there by giving me 51200 2D images.
So far I've tried a very basic implementation which is working fine but is taking approximately 10 minutes to run. I want you guys to help me create a more optimal implementation keeping in mind time and space complexity. Right now my current approach has a time complexity of O(n2), can we dec it further to reduce the time complexity
I'm providing below the current implementation
from os.path import join as pjoin
import torch
import numpy as np
import os
from tqdm import tqdm
from torch.utils import data
class DataGenerator(data.Dataset):
def __init__(self, is_transform=True, augmentations=None):
self.is_transform = is_transform
self.augmentations = augmentations
self.dim = (128, 128, 128)
seismicSections = [] #Input
faultSections = [] #Ground Truth
for fileName in tqdm(os.listdir(pjoin('train', 'seis')), total = len(os.listdir(pjoin('train', 'seis')))):
unrolledVolSeismic = np.fromfile(pjoin('train', 'seis', fileName), dtype = np.single) #dat file contains unrolled cube, we need to reshape it
reshapedVolSeismic = np.transpose(unrolledVolSeismic.reshape(self.dim)) #need to transpose the axis to get height axis at axis = 0, while length (axis = 1), and width(axis = 2)
unrolledVolFault = np.fromfile(pjoin('train', 'fault', fileName),dtype=np.single)
reshapedVolFault = np.transpose(unrolledVolFault.reshape(self.dim))
for idx in range(reshapedVolSeismic.shape[2]):
seismicSections.append(reshapedVolSeismic[:, :, idx])
faultSections.append(reshapedVolFault[:, :, idx])
for idx in range(reshapedVolSeismic.shape[1]):
seismicSections.append(reshapedVolSeismic[:, idx, :])
faultSections.append(reshapedVolFault[:, idx, :])
self.seismicSections = seismicSections
self.faultSections = faultSections
def __len__(self):
return len(self.seismicSections)
def __getitem__(self, index):
X = self.seismicSections[index]
Y = self.faultSections[index]
return X, Y
Please Help!!!
why not storing only the 3D data in mem, and let the __getitem__ method "slice" it on the fly?
class CachedVolumeDataset(Dataset):
def __init__(self, ...):
super(...)
self._volumes_x = # a list of 200 128x128x128 volumes
self._volumes_y = # a list of 200 128x128x128 volumes
def __len__(self):
return len(self._volumes_x) * (128 + 128)
def __getitem__(self, index):
# extract volume index from general index:
vidx = index // (128 + 128)
# extract slice index
sidx = index % (128 + 128)
if sidx < 128:
# first dim
x = self._volumes_x[vidx][:, :, sidx]
y = self._volumes_y[vidx][:, :, sidx]
else:
sidx -= 128
# second dim
x = self._volumes_x[vidx][:, sidx, :]
y = self._volumes_y[vidx][:, sidx, :]
return torch.squeeze(x), torch.squeeze(y)

Quick pixel manipulation with Pillow and/or NumPy

I'm trying to improve the speed of my image manipulation as it's been too slow for actual use.
What I need to do is apply a complex transformation on the colour of every pixel on an image. The manipulation is basically apply a vector transform like T(r, g, b, a) => (r * x, g * x, b * y, a) or in layman's terms, it's a multiplication of Red and Green values by a constant, a different multiplication for Blue and keep Alpha. But I also need to manipulate it differently if the RGB colour falls under some specific colours, in those cases they must follow a dictionary/transformation table where RGB => newRGB again keeping alpha.
The algorithm would be:
for each pixel in image:
if pixel[r, g, b] in special:
return special[pixel[r, g, b]] + pixel[a]
else:
return T(pixel)
It's simple but speed has been sub-optimal. I believe there's some way using numpy vectors, but I could not find how.
Important details about the implementation:
I don't care about the original buffer/image (manipulation can be in place)
I can use wxPython, Pillow and NumPy
Order or dimension of the array is not important as long as the buffer keeps the length
The buffer is obtained from a wxPython Bitmap and special and (RG|B)_pal are transformation tables, the end result will become a wxPython Bitmap too. They're obtained like these:
# buffer
bitmap = wx.Bitmap # it's valid wxBitmap here, this is just to let you know it exists
buff = bytearray(bitmap.GetWidth() * bitmap.GetHeight() * 4)
bitmap.CopyToBuffer(buff, wx.BitmapBufferFormat_RGBA)
self.RG_mult= 0.75
self.B_mult = 0.83
self.RG_pal = []
self.B_pal = []
for i in range(0, 256):
self.RG_pal.append(int(i * self.RG_mult))
self.B_pal.append(int(i * self.B_mult))
self.special = {
# RGB: new_RGB
# Implementation specific for the fastest access
# with buffer keys are 24bit numbers, with PIL keys are tuples
}
Implementations I tried include direct buffer manipulation:
for x in range(0, bitmap.GetWidth() * bitmap.GetHeight()):
index = x * 4
r = buf[index]
g = buf[index + 1]
b = buf[index + 2]
rgb = buf[index:index + 3]
if rgb in self.special:
special = self.special[rgb]
buf[index] = special[0]
buf[index + 1] = special[1]
buf[index + 2] = special[2]
else:
buf[index] = self.RG_pal[r]
buf[index + 1] = self.RG_pal[g]
buf[index + 2] = self.B_pal[b]
Use Pillow with getdata():
pil = Image.frombuffer("RGBA", (bitmap.GetWidth(), bitmap.GetHeight()), buf)
pil_buf = []
for colour in pil.getdata():
colour_idx = colour[0:3]
if (colour_idx in self.special):
special = self.special[colour_idx]
pil_buf.append((
special[0],
special[1],
special[2],
colour[3],
))
else:
pil_buf.append((
self.RG_pal[colour[0]],
self.RG_pal[colour[1]],
self.B_pal[colour[2]],
colour[3],
))
pil.putdata(pil_buf)
buf = pil.tobytes()
Pillow with point() and getdata() (fastest I achieved, more than twice times faster than others)
pil = Image.frombuffer("RGBA", (bitmap.GetWidth(), bitmap.GetHeight()), buf)
r, g, b, a = pil.split()
r = r.point(lambda r: r * self.RG_mult)
g = g.point(lambda g: g * self.RG_mult)
b = b.point(lambda b: b * self.B_mult)
pil = Image.merge("RGBA", (r, g, b, a))
i = 0
for colour in pil.getdata():
colour_idx = colour[0:3]
if (colour_idx in self.special):
special = self.special[colour_idx]
pil.putpixel(
(i % bitmap.GetWidth(), i // bitmap.GetWidth()),
(
special[0],
special[1],
special[2],
colour[3],
)
)
i += 1
buf = pil.tobytes()
I also tried working with numpy.where but then I could not get it to work. With numpy.apply_along_axis it worked but the performance was terrible. Other tries with numpy I could not access the RGB together, only as separated bands.
Pure Numpy Version
This first optimization relies on the fact, that one probably has way less special colors than pixels. I use numpy to do all the inner loops. This works well with images of up to 1MP. If You have multiple images I'd recommend the parallel approach.
Let's define a test case:
import requests
from io import BytesIO
from PIL import Image
import numpy as np
# Load some image, so we have the same
response = requests.get("https://upload.wikimedia.org/wikipedia/commons/4/41/Rick_Astley_Dallas.jpg")
# Make areas of known color
img = Image.open(BytesIO(response.content)).rotate(10, expand=True).rotate(-10,expand=True, fillcolor=(255,255,255)).convert('RGBA')
print("height: %d, width: %d (%.2f MP)"%(img.height, img.width, img.width*img.height/10e6))
height: 5034, width: 5792 (2.92 MP)
Define our special colors
specials = {
(4,1,6):(255,255,255),
(0, 0, 0):(255, 0, 255),
(255, 255, 255):(0, 255, 0)
}
Algorithm
def transform_map(img, specials, R_factor, G_factor, B_factor):
# Your transform
def transform(x, a):
a *= x
return a.clip(0, 255).astype(np.uint8)
# Convert to array
img_array = np.asarray(img)
# Extract channels
R = img_array.T[0]
G = img_array.T[1]
B = img_array.T[2]
A = img_array.T[3]
# Find Special colors
# First, calculate a uniqe hash
color_hashes = (R + 2**8 * G + 2**16 * B)
# Find inidices of special colors
special_idxs = []
for k, v in specials.items():
key_arr = np.array(list(k))
val_arr = np.array(list(v))
spec_hash = key_arr[0] + 2**8 * key_arr[1] + 2**16 * key_arr[2]
special_idxs.append(
{
'mask': np.where(np.isin(color_hashes, spec_hash)),
'value': val_arr
}
)
# Apply transform to whole image
R = transform(R, R_factor)
G = transform(G, G_factor)
B = transform(B, B_factor)
# Replace values where special colors were found
for idx in special_idxs:
R[idx['mask']] = idx['value'][0]
G[idx['mask']] = idx['value'][1]
B[idx['mask']] = idx['value'][2]
return Image.fromarray(np.array([R,G,B,A]).T, mode='RGBA')
And finally some bench marks on a Intel Core i5-6300U # 2.40GHz
import time
times = []
for i in range(10):
t0 = time.time()
# Test
transform_map(img, specials, 1.2, .9, 1.2)
#
t1 = time.time()
times.append(t1-t0)
np.round(times, 2)
print('average run time: %.2f +/-%.2f'%(np.mean(times), np.std(times)))
average run time: 9.72 +/-0.91
EDIT Parallelization
With the same setup as above, we can get a 2x speed increase on large images. (Small ones are faster without numba)
from numba import njit, prange
from numba.core import types
from numba.typed import Dict
# Map dict of special colors or transform over array of pixel values
#njit(parallel=True, locals={'px_hash': types.uint32})
def check_and_transform(img_array, d, T):
#Save Shape for later
shape = img_array.shape
# Flatten image for 1-d iteration
img_array_flat = img_array.reshape(-1,3).copy()
N = img_array_flat.shape[0]
# Replace or map
for i in prange(N):
px_hash = np.uint32(0)
px_hash += img_array_flat[i,0]
px_hash += types.uint32(2**8) * img_array_flat[i,1]
px_hash += types.uint32(2**16) * img_array_flat[i,2]
try:
img_array_flat[i] = d[px_hash]
except Exception:
img_array_flat[i] = (img_array_flat[i] * T).astype(np.uint8)
# return image
return img_array_flat.reshape(shape)
# Wrapper for function above
def map_or_transform_jit(image: Image, specials: dict, T: np.ndarray):
# assemble numba typed dict
d = Dict.empty(
key_type=types.uint32,
value_type=types.uint8[:],
)
for k, v in specials.items():
k = types.uint32(k[0] + 2**8 * k[1] + 2**16 * k[2])
v = np.array(v, dtype=np.uint8)
d[k] = v
# get rgb channels
img_arr = np.array(img)
rgb = img_arr[:,:,:3].copy()
img_shape = img_arr.shape
# apply map
rgb = check_and_transform(rgb, d, T)
# set color channels
img_arr[:,:,:3] = rgb
return Image.fromarray(img_arr, mode='RGBA')
# Benchmark
import time
times = []
for i in range(10):
t0 = time.time()
# Test
test_img = map_or_transform_jit(img, specials, np.array([1, .5, .5]))
#
t1 = time.time()
times.append(t1-t0)
np.round(times, 2)
print('average run time: %.2f +/- %.2f'%(np.mean(times), np.std(times)))
test_img
average run time: 3.76 +/- 0.08

How to use parsed TFRecords data?

I'm in the process of serializing a large dataset of images with different resolutions. When I create the TFRecords, I store also the shape of the serialized image in this way:
def convert(folder_dirs, tfrecords_filename = '.tfrecords'):
with tf.python_io.TFRecordWriter(tfrecords_filename) as writer:
for ix, img_path in enumerate(folder_dirs):
data, annotation = read_image(img_path)
image_shape = list(data.shape)
img_raw = data.tostring()
annotation_raw = list(annotation)
example = tf.train.Example(features=tf.train.Features(feature={
'shape': _int64_feature(image_shape),
'image_raw': _bytes_feature(img_raw),
'label_raw': _int64_feature(annotation_raw)}))
serialized = example.SerializeToString()
writer.write(serialized)
My question is, how do I use the serialized shape in order to reshape the raw bytes of the images?
def imgs_input_fn(filenames, perform_shuffle=False, repeat_count=1, batch_size=1):
def _parse_function(serialized):
features = \
{
'shape': tf.FixedLenFeature([], tf.string),
'image_raw': tf.FixedLenFeature([], tf.string),
'label_raw': tf.FixedLenFeature([], tf.string)
}
parsed_example = tf.parse_single_example(serialized=serialized,
features=features)
shape = parsed_example['shape'] # <-- True image shape
image_raw = parsed_example['image_raw']
label = parsed_example['label_raw']
image = tf.io.decode_raw(image_raw, tf.uint16)
image = tf.reshape(image, [120, 120, 3]) # <-- Reshape needed
image = tf.cast(image, tf.float32)
d = dict(zip([input_name], [image])), label
return image, label
dataset = tf.data.TFRecordDataset(filenames=filenames)
dataset = dataset.map(_parse_function)
...
Have you tried numpy and scipy.ndimage
import numpy as np
import skimage.io
from scipy.ndimage import zoom
from skimage.transform import resize
try:
# Python3 will most likely not be able to load protobuf
from caffe.proto import caffe_pb2
except:
import sys
if sys.version_info >= (3, 0):
print("Failed to include caffe_pb2, things might go wrong!")
else:
raise
## proto / datum / ndarray conversion
def blobproto_to_array(blob, return_diff=False):
"""
Convert a blob proto to an array. In default, we will just return the data,
unless return_diff is True, in which case we will return the diff.
"""
# Read the data into an array
if return_diff:
data = np.array(blob.diff)
else:
data = np.array(blob.data)
# Reshape the array
if blob.HasField('num') or blob.HasField('channels') or blob.HasField('height') or blob.HasField('width'):
# Use legacy 4D shape
return data.reshape(blob.num, blob.channels, blob.height, blob.width)
else:
return data.reshape(blob.shape.dim)
def array_to_blobproto(arr, diff=None):
"""Converts a N-dimensional array to blob proto. If diff is given, also
convert the diff. You need to make sure that arr and diff have the same
shape, and this function does not do sanity check.
"""
blob = caffe_pb2.BlobProto()
blob.shape.dim.extend(arr.shape)
blob.data.extend(arr.astype(float).flat)
if diff is not None:
blob.diff.extend(diff.astype(float).flat)
return blob
def arraylist_to_blobprotovector_str(arraylist):
"""Converts a list of arrays to a serialized blobprotovec, which could be
then passed to a network for processing.
"""
vec = caffe_pb2.BlobProtoVector()
vec.blobs.extend([array_to_blobproto(arr) for arr in arraylist])
return vec.SerializeToString()
def blobprotovector_str_to_arraylist(str):
"""Converts a serialized blobprotovec to a list of arrays.
"""
vec = caffe_pb2.BlobProtoVector()
vec.ParseFromString(str)
return [blobproto_to_array(blob) for blob in vec.blobs]
def array_to_datum(arr, label=None):
"""Converts a 3-dimensional array to datum. If the array has dtype uint8,
the output data will be encoded as a string. Otherwise, the output data
will be stored in float format.
"""
if arr.ndim != 3:
raise ValueError('Incorrect array shape.')
datum = caffe_pb2.Datum()
datum.channels, datum.height, datum.width = arr.shape
if arr.dtype == np.uint8:
datum.data = arr.tostring()
else:
datum.float_data.extend(arr.flat)
if label is not None:
datum.label = label
return datum
def datum_to_array(datum):
"""Converts a datum to an array. Note that the label is not returned,
as one can easily get it by calling datum.label.
"""
if len(datum.data):
return np.fromstring(datum.data, dtype=np.uint8).reshape(
datum.channels, datum.height, datum.width)
else:
return np.array(datum.float_data).astype(float).reshape(
datum.channels, datum.height, datum.width)
## Pre-processing
class Transformer:
"""
Transform input for feeding into a Net.
Note: this is mostly for illustrative purposes and it is likely better
to define your own input preprocessing routine for your needs.
Parameters
----------
net : a Net for which the input should be prepared
"""
def __init__(self, inputs):
self.inputs = inputs
self.transpose = {}
self.channel_swap = {}
self.raw_scale = {}
self.mean = {}
self.input_scale = {}
def __check_input(self, in_):
if in_ not in self.inputs:
raise Exception('{} is not one of the net inputs: {}'.format(
in_, self.inputs))
def preprocess(self, in_, data):
"""
Format input for Caffe:
- convert to single
- resize to input dimensions (preserving number of channels)
- transpose dimensions to K x H x W
- reorder channels (for instance color to BGR)
- scale raw input (e.g. from [0, 1] to [0, 255] for ImageNet models)
- subtract mean
- scale feature
Parameters
----------
in_ : name of input blob to preprocess for
data : (H' x W' x K) ndarray
Returns
-------
caffe_in : (K x H x W) ndarray for input to a Net
"""
self.__check_input(in_)
caffe_in = data.astype(np.float32, copy=False)
transpose = self.transpose.get(in_)
channel_swap = self.channel_swap.get(in_)
raw_scale = self.raw_scale.get(in_)
mean = self.mean.get(in_)
input_scale = self.input_scale.get(in_)
in_dims = self.inputs[in_][2:]
if caffe_in.shape[:2] != in_dims:
caffe_in = resize_image(caffe_in, in_dims)
if transpose is not None:
caffe_in = caffe_in.transpose(transpose)
if channel_swap is not None:
caffe_in = caffe_in[channel_swap, :, :]
if raw_scale is not None:
caffe_in *= raw_scale
if mean is not None:
caffe_in -= mean
if input_scale is not None:
caffe_in *= input_scale
return caffe_in
def deprocess(self, in_, data):
"""
Invert Caffe formatting; see preprocess().
"""
self.__check_input(in_)
decaf_in = data.copy().squeeze()
transpose = self.transpose.get(in_)
channel_swap = self.channel_swap.get(in_)
raw_scale = self.raw_scale.get(in_)
mean = self.mean.get(in_)
input_scale = self.input_scale.get(in_)
if input_scale is not None:
decaf_in /= input_scale
if mean is not None:
decaf_in += mean
if raw_scale is not None:
decaf_in /= raw_scale
if channel_swap is not None:
decaf_in = decaf_in[np.argsort(channel_swap), :, :]
if transpose is not None:
decaf_in = decaf_in.transpose(np.argsort(transpose))
return decaf_in
def set_transpose(self, in_, order):
"""
Set the input channel order for e.g. RGB to BGR conversion
as needed for the reference ImageNet model.
Parameters
----------
in_ : which input to assign this channel order
order : the order to transpose the dimensions
"""
self.__check_input(in_)
if len(order) != len(self.inputs[in_]) - 1:
raise Exception('Transpose order needs to have the same number of '
'dimensions as the input.')
self.transpose[in_] = order
def set_channel_swap(self, in_, order):
"""
Set the input channel order for e.g. RGB to BGR conversion
as needed for the reference ImageNet model.
N.B. this assumes the channels are the first dimension AFTER transpose.
Parameters
----------
in_ : which input to assign this channel order
order : the order to take the channels.
(2,1,0) maps RGB to BGR for example.
"""
self.__check_input(in_)
if len(order) != self.inputs[in_][1]:
raise Exception('Channel swap needs to have the same number of '
'dimensions as the input channels.')
self.channel_swap[in_] = order
def set_raw_scale(self, in_, scale):
"""
Set the scale of raw features s.t. the input blob = input * scale.
While Python represents images in [0, 1], certain Caffe models
like CaffeNet and AlexNet represent images in [0, 255] so the raw_scale
of these models must be 255.
Parameters
----------
in_ : which input to assign this scale factor
scale : scale coefficient
"""
self.__check_input(in_)
self.raw_scale[in_] = scale
def set_mean(self, in_, mean):
"""
Set the mean to subtract for centering the data.
Parameters
----------
in_ : which input to assign this mean.
mean : mean ndarray (input dimensional or broadcastable)
"""
self.__check_input(in_)
ms = mean.shape
if mean.ndim == 1:
# broadcast channels
if ms[0] != self.inputs[in_][1]:
raise ValueError('Mean channels incompatible with input.')
mean = mean[:, np.newaxis, np.newaxis]
else:
# elementwise mean
if len(ms) == 2:
ms = (1,) + ms
if len(ms) != 3:
raise ValueError('Mean shape invalid')
if ms != self.inputs[in_][1:]:
raise ValueError('Mean shape incompatible with input shape.')
self.mean[in_] = mean
def set_input_scale(self, in_, scale):
"""
Set the scale of preprocessed inputs s.t. the blob = blob * scale.
N.B. input_scale is done AFTER mean subtraction and other preprocessing
while raw_scale is done BEFORE.
Parameters
----------
in_ : which input to assign this scale factor
scale : scale coefficient
"""
self.__check_input(in_)
self.input_scale[in_] = scale
## Image IO
def load_image(filename, color=True):
"""
Load an image converting from grayscale or alpha as needed.
Parameters
----------
filename : string
color : boolean
flag for color format. True (default) loads as RGB while False
loads as intensity (if image is already grayscale).
Returns
-------
image : an image with type np.float32 in range [0, 1]
of size (H x W x 3) in RGB or
of size (H x W x 1) in grayscale.
"""
img = skimage.img_as_float(skimage.io.imread(filename, as_grey=not color)).astype(np.float32)
if img.ndim == 2:
img = img[:, :, np.newaxis]
if color:
img = np.tile(img, (1, 1, 3))
elif img.shape[2] == 4:
img = img[:, :, :3]
return img
def resize_image(im, new_dims, interp_order=1):
"""
Resize an image array with interpolation.
Parameters
----------
im : (H x W x K) ndarray
new_dims : (height, width) tuple of new dimensions.
interp_order : interpolation order, default is linear.
Returns
-------
im : resized ndarray with shape (new_dims[0], new_dims[1], K)
"""
if im.shape[-1] == 1 or im.shape[-1] == 3:
im_min, im_max = im.min(), im.max()
if im_max > im_min:
# skimage is fast but only understands {1,3} channel images
# in [0, 1].
im_std = (im - im_min) / (im_max - im_min)
resized_std = resize(im_std, new_dims, order=interp_order)
resized_im = resized_std * (im_max - im_min) + im_min
else:
# the image is a constant -- avoid divide by 0
ret = np.empty((new_dims[0], new_dims[1], im.shape[-1]),
dtype=np.float32)
ret.fill(im_min)
return ret
else:
# ndimage interpolates anything but more slowly.
scale = tuple(np.array(new_dims, dtype=float) / np.array(im.shape[:2]))
resized_im = zoom(im, scale + (1,), order=interp_order)
return resized_im.astype(np.float32)
def oversample(images, crop_dims):
"""
Crop images into the four corners, center, and their mirrored versions.
Parameters
----------
image : iterable of (H x W x K) ndarrays
crop_dims : (height, width) tuple for the crops.
Returns
-------
crops : (10*N x H x W x K) ndarray of crops for number of inputs N.
"""
# Dimensions and center.
im_shape = np.array(images[0].shape)
crop_dims = np.array(crop_dims)
im_center = im_shape[:2] / 2.0
# Make crop coordinates
h_indices = (0, im_shape[0] - crop_dims[0])
w_indices = (0, im_shape[1] - crop_dims[1])
crops_ix = np.empty((5, 4), dtype=int)
curr = 0
for i in h_indices:
for j in w_indices:
crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
curr += 1
crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
-crop_dims / 2.0,
crop_dims / 2.0
])
crops_ix = np.tile(crops_ix, (2, 1))
# Extract crops
crops = np.empty((10 * len(images), crop_dims[0], crop_dims[1],
im_shape[-1]), dtype=np.float32)
ix = 0
for im in images:
for crop in crops_ix:
crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
ix += 1
crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :] # flip for mirrors
return crops

How to Convert all pixel values of an image to a certain range -python

I have an rgb image with 12 distinct colours but I do not know the colours (pixel values) beforehand. I want to convert all the pixel values between 0 and 11,each symbolising a unique colour of the original rgb image.
e.g. all [230,100,140] converted to [0,0,0] , all [130,90,100] converted to [0,0,1] and so on ...all [210,80,50] converted to [0,0,11].
Quick and dirty application. Much can be improved, especially going through the whole image pixel by pixel is not very numpy nor very opencv, but I was too lazy to remember exactly how to threshold and replace RGB pixels..
import cv2
import numpy as np
#finding unique rows
#comes from this answer : http://stackoverflow.com/questions/8560440/removing-duplicate-columns-and-rows-from-a-numpy-2d-array
def unique_rows(a):
a = np.ascontiguousarray(a)
unique_a = np.unique(a.view([('', a.dtype)]*a.shape[1]))
return unique_a.view(a.dtype).reshape((unique_a.shape[0], a.shape[1]))
img=cv2.imread(your_image)
#listing all pixels
pixels=[]
for p in img:
for k in p:
pixels.append(k)
#finding all different colors
colors=unique_rows(pixels)
#comparing each color to every pixel
res=np.zeros(img.shape)
cpt=0
for color in colors:
for i in range(img.shape[0]):
for j in range(img.shape[1]):
if (img[i,j,:]==color).all(): #if pixel is this color
res[i,j,:]=[0,0,cpt] #set the pixel to [0,0,counter]
cpt+=1
You can use np.unique with a bit of trickery:
import numpy as np
def safe_method(image, k):
# a bit of black magic to make np.unique handle triplets
out = np.zeros(image.shape[:-1], dtype=np.int32)
out8 = out.view(np.int8)
# should really check endianness here
out8.reshape(image.shape[:-1] + (4,))[..., 1:] = image
uniq, map_ = np.unique(out, return_inverse=True)
assert uniq.size == k
map_.shape = image.shape[:-1]
# map_ contains the desired result. However, order of colours is most
# probably different from original
colours = uniq.view(np.uint8).reshape(-1, 4)[:, 1:]
return colours, map_
However, if the number of pixels is much larger than the number of colours,
the following heuristic algorithm may deliver huge speedups.
It tries to find a cheap hash function (such as only looking at the red channel) and if it succeds it uses that to create a lookup table. If not it falls back to the above safe method.
CHEAP_HASHES = [lambda x: x[..., 0], lambda x: x[..., 1], lambda x: x[..., 2]]
def fast_method(image, k):
# find all colours
chunk = int(4 * k * np.log(k)) + 1
colours = set()
for chunk_start in range(0, image.size // 3, chunk):
colours |= set(
map(tuple, image.reshape(-1,3)[chunk_start:chunk_start+chunk]))
if len(colours) == k:
break
colours = np.array(sorted(colours))
# find hash method
for method in CHEAP_HASHES:
if len(set(method(colours))) == k:
break
else:
safe_method(image, k)
# create lookup table
hashed = method(colours)
# should really provide for unexpected colours here
lookup = np.empty((hashed.max() + 1,), int)
lookup[hashed] = np.arange(k)
return colours, lookup[method(image)]
Testing and timings:
from timeit import timeit
def create_image(k, M, N):
colours = np.random.randint(0, 256, (k, 3)).astype(np.uint8)
map_ = np.random.randint(0, k, (M, N))
image = colours[map_, :]
return colours, map_, image
k, M, N = 12, 1000, 1000
colours, map_, image = create_image(k, M, N)
for f in fast_method, safe_method:
print('{:16s} {:10.6f} ms'.format(f.__name__, timeit(
lambda: f(image, k), number=10)*100))
rec_colours, rec_map_ = f(image, k)
print('solution correct:', np.all(rec_colours[rec_map_, :] == image))
Sample output (12 colours, 1000x1000 pixels):
fast_method 3.425885 ms
solution correct: True
safe_method 73.622813 ms
solution correct: True

Categories

Resources