My code is the following:
import cv2; import numpy as np
class MyClass:
def __init__(self,imagefile):
self.image = cv2.imread(imagefile)
#image details
self.h,self.w = self.image.shape[:2]
#self.bPoints, self.wPoints = np.array([[0,0]]),np.array([[0,0]])
self.bPoints, self.wPoints = [],[]
#CAUTION! Points are of the form (y,x)
# Point filtering
for i in xrange(self.h):
for j in xrange(self.w):
if self.th2.item(i,j) == 0:
#self.bPoints = np.append([[i,j]], self.bPoints, axis=0)
self.bPoints.append((i,j))
else:
self.wPoints.append((i,j))
#self.wPoints = np.append([[i,j]], self.wPoints, axis=0)
#self.bPoints = self.bPoints[:len(self.bPoints) - 1]
#self.wPoints = self.wPoints[:len(self.wPoints) - 1]
self.bPoints, self.wPoints = np.array(self.bPoints), np.array(self.wPoints)
I want to find and separate the white from the black points. I have commented the lines that show a possible (but very-very slow) solution via numpy. Can you recommend me a better and faster solution? I will appreciate it if you do so!
Thanks
I'm assuming self.th2 is a numpy array. This might take some adjustment if that is not the case. Basically, this uses the np.where function to determine all the indices which are 0 or 255.
import cv2; import numpy as np
class MyClass:
def __init__(self,imagefile):
self.image = cv2.imread(imagefile)
#image details
self.h,self.w = self.image.shape[:2]
#self.bPoints, self.wPoints = np.array([[0,0]]),np.array([[0,0]])
self.bPoints, self.wPoints = [],[]
#CAUTION! Points are of the form (y,x)
# use the np.where method instead of a double loop.
# make sure self.th2 is a numpy array
indx = np.where(self.th2==0)
for i,j in zip(indx[0], indx[1]):
self.bPoints.append((i,j))
indx = np.where(self.th2==255)
for i,j in zip(indx[0], indx[1]):
self.wPoints.append((i,j))
# Point filtering
#for i in xrange(self.h):
# for j in xrange(self.w):
# if self.th2.item(i,j) == 0:
# #self.bPoints = np.append([[i,j]], self.bPoints, axis=0)
# self.bPoints.append((i,j))
# else:
# self.wPoints.append((i,j))
# #self.wPoints = np.append([[i,j]], self.wPoints, axis=0)
#self.bPoints = self.bPoints[:len(self.bPoints) - 1]
#self.wPoints = self.wPoints[:len(self.wPoints) - 1]
self.bPoints, self.wPoints = np.array(self.bPoints), np.array(self.wPoints)
Related
I'm trying to write the code of this paper paper for a university project. the idea is to insert an invisible watermark into a grayscale image, which can be extracted later to verify the image ownership.
This is the code I wrote for the watermark embedding process :
import pywt
import numpy as np
import cv2
from PIL import Image
from math import sqrt, log10
from scipy.fftpack import dct, idct
def Get_MSB_LSB_Watermark () : #Function that separates the watermark into MSB and LSB images
MSBs = []
LSBs = []
for i in range (len(Watermark)) :
binary = '{:0>8}'.format(str(bin(Watermark[i]))[2:])
MSB = (binary[0:4])
LSB = (binary[4:])
MSB = int(MSB, 2)
LSB = int(LSB,2)
MSBs.append(MSB)
LSBs.append(LSB)
MSBs = np.array(MSBs)
LSBs = np.array(LSBs)
return MSBs.reshape(64,64), LSBs.reshape(64,64)
def split(array, nrows, ncols): #Split array into blocks of size nrows* ncols
r, h = array.shape
return (array.reshape(h//nrows, nrows, -1, ncols)
.swapaxes(1, 2)
.reshape(-1, nrows, ncols))
def unblockshaped(arr, h, w): #the inverse of the split function
n, nrows, ncols = arr.shape
return (arr.reshape(h//nrows, -1, nrows, ncols)
.swapaxes(1,2)
.reshape(h, w))
def ISVD (U,S,V): #the inverse of singular value decomposition
s = np.zeros(np.shape(U))
for i in range(4):
s[i, i] = S[i]
recon_image = U # s # V
return recon_image
def Watermark_Embedding (blocks, watermark) :
Watermarked_blocks = []
k1 = []
k2 = []
#convert the watermark to a list
w = list(np.ndarray.flatten(watermark))
for i in range (len(blocks)) :
B = blocks[i]
#Aplly singular value decoposition to the block
U, s, V = np.linalg.svd(B)
#Modify the singular values of the block
P = s[1] - s[2]
delta = abs(w[i]) - P
s[1] = s[1] + delta
if s[0] >= s[1] :
k1.append(1)
else :
k1.append(-1)
#the inverse of SVD after watermark embedding
recunstructed_B = ISVD(U, s, V)
Watermarked_blocks.append(recunstructed_B)
for j in range(len(w)):
if w[j] >= 0:
k2.append(1)
else:
k2.append(-1)
return k1,k2, np.array(Watermarked_blocks)
def apply_dct(image_array):
size = image_array[0].__len__()
all_subdct = np.empty((size, size))
for i in range (0, size, 4):
for j in range (0, size, 4):
subpixels = image_array[i:i+4, j:j+4]
subdct = dct(dct(subpixels.T, norm="ortho").T, norm="ortho")
all_subdct[i:i+4, j:j+4] = subdct
return all_subdct
def inverse_dct(all_subdct):
size = all_subdct[0].__len__()
all_subidct = np.empty((size, size))
for i in range (0, size, 4):
for j in range (0, size, 4):
subidct = idct(idct(all_subdct[i:i+4, j:j+4].T, norm="ortho").T, norm="ortho")
all_subidct[i:i+4, j:j+4] = subidct
return all_subidct
#read watermark
Watermark = Image.open('Copyright.png').convert('L')
Watermark = list(Watermark.getdata())
#Separate the watermark into LSB and MSB images
Watermark1, Watermark2 = Get_MSB_LSB_Watermark()
#Apply descrete cosine Transform on the two generated images
DCT_Watermark1 = apply_dct(Watermark1)
DCT_Watermark2 = apply_dct(Watermark2)
#read cover Image
Cover_Image = Image.open('10.png').convert('L')
#Apply 1 level descrete wavelet transform
LL1, (LH1, HL1, HH1) = pywt.dwt2(Cover_Image, 'haar')
#Split the LH1 and HL1 subbands into blocks of size 4*4
blocks_LH1 = split(LH1,4,4)
blocks_HL1 = split(HL1,4,4)
#Watermark Embedding in LH1 and HL1 and Keys generation
Key1, Key3, WatermarkedblocksLH1 = Watermark_Embedding(blocks_LH1,DCT_Watermark1)
Key2 ,Key4, WatermarkedblocksHL1 = Watermark_Embedding(blocks_HL1,DCT_Watermark2)
#Merge the watermzrked Blocks
reconstructed_LH1 = unblockshaped(WatermarkedblocksLH1, 256,256)
reconstructed_HL1 = unblockshaped(WatermarkedblocksHL1, 256,256)
#Apply the inverse of descrete wavelet transform to get the watermarked image
IDWT = pywt.idwt2((LL1, (reconstructed_LH1, reconstructed_HL1, HH1)), 'haar')
cv2.imwrite('Watermarked_img.png', IDWT)
This is the code I wrote for the Extraction process :
import pywt
from scipy import fftpack
import numpy as np
import cv2
from PIL import Image
import scipy
from math import sqrt, log10
from Watermark_Embedding import *
def Watermark_Extraction(blocks,key1, key2) :
Extracted_Watermark = []
for i in range(len(blocks)):
B = blocks[i]
#apply SVD on the Block
U, s, V = np.linalg.svd(B)
if key1[i] == 1 :
P = (s[1] - s[2])
Extracted_Watermark.append(P)
else :
P = (s[0] - s[2])
Extracted_Watermark.append(P)
for j in range(len(Extracted_Watermark)) :
if key2[j] == 1 :
Extracted_Watermark[j] = Extracted_Watermark[j]
else :
Extracted_Watermark[j] = - (Extracted_Watermark[j])
return np.array(Extracted_Watermark)
def Merge_W1_W2 ():
Merged_watermark = []
w1 = list(np.ndarray.flatten(IDCTW1))
w2 = list(np.ndarray.flatten(IDCTW2))
for i in range (len(w2)):
bw1 = '{:0>4}'.format((bin(int(abs(w1[i]))))[2:])
bw2 = '{:0>4}'.format((bin(int(abs(w2[i]))))[2:])
P = bw1+bw2
pixel = (int(P,2))
Merged_watermark.append(pixel)
return Merged_watermark
Watermarked_Image = Image.open('Watermarked_img.png')
LL1, (LH1, HL1, HH1) = pywt.dwt2(Watermarked_Image, 'haar')
blocks_LH1 = split(LH1,4,4)
blocks_HL1 = split(HL1,4,4)
W1 = Watermark_Extraction(blocks_LH1, Key1,Key3)
W2 = Watermark_Extraction(blocks_HL1, Key2, Key4)
W1 = W1.reshape(64,64)
W2 = W2.reshape(64,64)
IDCTW1 = inverse_dct(W1)
IDCTW2 = inverse_dct(W2)
Merged = np.array(Merge_W1_W2())
Merged = Merged.reshape(64,64)
cv2.imwrite('Extracted_Watermark.png', Merged)
The cover Image of size 512*512:
The 64*64 watermark I used
The watermarked Image :
The extracted Watermark I get:
I calculated the similarity between the two watermarks using SSIM :
from skimage.metrics import structural_similarity
original_Watermark = cv2.imread('Copyright.png')
extracted_watermark = cv2.imread('Extracted_Watermark.png')
# Convert images to grayscale
original_watermark = cv2.cvtColor(original_Watermark, cv2.COLOR_BGR2GRAY)
extracted_Watermark = cv2.cvtColor(extracted_watermark, cv2.COLOR_BGR2GRAY)
# Compute SSIM between two images
(score, diff) = structural_similarity(original_Watermark, extracted_Watermark, full=True)
print("SSIM = ", score)
I didn't apply any modification on the watermarked image and The SSIM I got is 0.8445354561524052. however the SSIM of the extracted watermark should be 0.99 according to the paper.
I don't know what's wrong with my code and I have a deadline after two days so I really need help.
thanks in advance.
There are two issues:
In Merge_W1_W2 you are using int to convert from float to int but that introduces errors for numbers where the floating point representation is not exact (e.g. 14.99999999999997); this can be fixed by using round instead.
Saving cv2.imwrite('Watermarked_img.png', IDWT) is a lossy operation because it rounds the values in IDWT to the nearest integer; if you use Watermarked_Image = IDWT then you will get back the exact same watermark image.
I've been looking for hours to find a question similar, but nothing has satisfied me.
My problem is: I've a PIL image (representing a canal) already converted into a Numpy array (using the "L" mode of PIL), and I'd like to retrieve the white pixels whose neighbor are black (their indexes in fact), without using for loops (the image is really huge).
I thought of np.where but I don't know how I should use it to solve my problem, and I also don't know if it would be faster than using for loops (because my aim would be reaching this goal with the fastest solution).
I hope I'm clear enough, and I thank you in advance for your response!
EDIT: for example, with this image (a simple canal, it is already a black and white image, so the image.convert('L') isn't really useful here, but the code should be generic if possible), I'd do something like that:
import numpy as np
from PIL import Image
image = Image.open(canal)
image = image.convert("L")
array = np.asarray(image)
l = []
for i in range(1, len(array) - 1):
for j in range(1, len(array[0]) - 1):
if array[i][j] == 255 and (array[i+1][j] == 0 or array[i-1][j] == 0 or array[i][j+1] == 0 or array[i][j-1] == 0):
l.append((i, j))
and I'd hope to obtain l as fast as possible :)
I've colored the pixels I need in red in the next image: here.
EDIT2: thank you all for the help, it worked!
You could use the numba just-in-time compiler to speed up your loop.
from numba import njit
#njit
def find_highlow_pixels(img):
pixels = []
for j in range(1, img.shape[0]-1):
for i in range(1, img.shape[1]-1):
if (
img[j, i] == 255 and (
img[j-1, i]==0 or img[j+1,i]==0 or
img[j, i-1]==0 or img[j, i+1]==0
)
):
pixels.append((j, i))
return pixels
Another possibility that came to my mind would be using the minimum filter. However, I would expect it to be slower than the first proposed solution, but could be useful to build more on top of it.
import numpy as np
from scipy.ndimage import minimum_filter
# create a footprint that only takes the neighbours into account
neighbours = (np.arange(9) % 2 == 1).reshape(3,3)
# create a mask of relevant pixels, img should be your image as array
mask = np.logical_and(
img == 255,
minimum_filter(img, footprint=neighbours) == 0
)
# get indexes
indexes = np.where(mask)
# as list
list(zip(*indexes))
If memory space is not considered, I prefer manipulation of masks like the following.
# Step 1: Generate two masks of white and black.
mask_white = img == 255
mask_black = img == 0
# Step 2: Apply 8-neighborhood dilation on black mask
# if you want to use numpy only, you need to implement dilation by yourself.
# define function of 8-neighborhood dilation
def dilate_8nb(m):
index_row, index_col = np.where(m)
ext_index_row = np.repeat(index_row,9)
ext_index_col = np.repeat(index_col,9)
ext_index_row.reshape(-1,9)[:, :3] += 1
ext_index_row.reshape(-1,9)[:, -3:] -= 1
ext_index_col.reshape(-1,9)[:, ::3] += 1
ext_index_col.reshape(-1,9)[:, 2::3] -= 1
ext_index_row = np.clip(ext_index_row, 0, m.shape[0]-1)
ext_index_col = np.clip(ext_index_col, 0, m.shape[1]-1)
ret = m.copy()
ret[ext_index_row, ext_index_col] = True
return ret
ext_mask_black = dilate_8nb(mask_black)
# or just using dilation in scipy
# from scipy import ndimage
# ext_mask_black = ndimage.binary_dilation(mask_black, structure=ndimage.generate_binary_structure(2, 2))
# Step 3: take the intersection of mask_white and ext_mask_black
mask_target = mask_white & ext_mask_black
# Step 4: take the index using np.where
l = np.where(mask_target)
# modify this type to make it consistency with your result
l = list(zip(*l))
I have 3 numpy matrices:
One contains pixels positions in X (x_pos), another pixel positions in Y (y_pos) and a last one containing pixel values (p_value)
I would like to use these 3 matrices to build a results image
With loops I have this result:
#Resulting image
res = np.zeros((128,128,3), dtype = np.uint8)
for i in range(x_pos.shape[0]):
for j in range(x_pos.shape[1]):
# Get coordinates
x = x_pos[i][j]
y = y_pos[i][j]
res[y,x] = p_value[i][j]
With large matrices (2048*2048) this code already takes a lot of time. Is it possible to optimize this code without using a nested loop?
I specify that the positions in the pos_x and pos_y matrices do not necessarily follow each other, there may be holes or duplicate values
It should be possible using np.meshgrid
i = np.arange(0, x.shape[0])
j = np.arange(0, x.shape[1])
i_1, j_1 = np.meshgrid(i, j, indexing='ij')
res[y_1.ravel(),x_1.ravel()] = p_value[i_1.ravel(),j_1.ravel()]
First use consistent numpy 2d array indexing:
x = x_pos[i,j]
y = y_pos[i,j]
res[y,x] = p_value[i,j]
Now instead of scalar i,j use arrays
i = np.arange(n); j = np.arange(m)
You didn't provida [mcve] so I won't try to demonstrate that th
Thanks to #hpaulj and #ai2ys answer the problem is solved.
Here is a comparison of the results in terms of execution speed:
import numpy as np
import cv2
import time
m_size = 4096
m_x = np.random.randint(0,m_size,(m_size,m_size), dtype = np.uint16)
m_y = np.random.randint(0,m_size,(m_size,m_size), dtype = np.uint16)
p_value = np.ones((m_size,m_size), dtype = np.uint8)
#Meshgrid method:
out = np.zeros((m_size,m_size),dtype=np.uint8)
start = time.time()
i = np.arange(0, m_x.shape[0])
j = np.arange(0, m_x.shape[1])
i_1, j_1 = np.meshgrid(i, j, indexing='ij')
out[m_x.ravel(),m_y.ravel()] = p_value[i_1.ravel(),j_1.ravel()]
end = time.time()
print("Meshgrid: {} s".format(end - start))
#No for loop method:
out = np.zeros((m_size,m_size),dtype=np.uint8)
start = time.time()
i = np.arange(m_x.shape[0])
j = np.arange(m_y.shape[1])
x = m_x[i,j]
y = m_y[i,j]
out[x,y] = p_value[i,j]
end = time.time()
print("No loop: {} s".format(end - start))
#For loop method:
out = np.zeros((m_size,m_size),dtype=np.uint8)
start = time.time()
for i in range(m_x.shape[0]):
for j in range(m_y.shape[1]):
x = m_x[i,j]
y = m_y[i,j]
out[x,y] = p_value[i,j]
end = time.time()
print("Nested loop: {} s".format(end - start))
#Output:
Meshgrid: 0.4837045669555664 s
No loop: 0.3600656986236572 s
Nested loop: 13.10097336769104 s
import numpy as np
from PIL import Image
def isblack(rgb):
return (rgb[0]==0) and (rgb[1]==0) and (rgb[2]==0)
a = Image.open('image1.jpg')
a = np.array(a) # RGB image
[h,w,chan] = np.shape(a)
filtsz = 9
# comparing subimage with a[50:59,60:69] and a[100:119,120:129], for example
srcTop = 50
srcLeft = 60
dstTop = 100
dstLeft = 120
ssd = 0 # sumOfSquareDifference
for i in range(filtsz):
for j in range(filtsz):
if not isblack(a[dstTop+i,dstLeft+j,:]):
ssd += sum((a[dstTop+i, dstLeft+j] - a[srcTop+i, srcLeft+j])**2)
print(ssd)
The naive implementation is to loop over all pixel that satisfy the condition, then compute.
However, this is very slow.
How can I make it faster? I'm looking for a way that use indexing. For example, something that has the following pseudo code:
selected = [not isblack(pixel) for pixel in image] # 2D array contains 0 if black, 1 if not black
diff = [(a[pixel] - b[pixel])**2 for pixel in a] # 2D array contains the square difference at each pixel
ssd = sum(diff * selected) # sum only positions that satisfy the condition
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
img = Image.open("image/image1.jpg")
filtsz = 100 # increase from 9 to 100 for display purpose
srcTop = 50
srcLeft = 60
dstTop = 100
dstLeft = 120
npimg = np.array(img)
# indexing
subimg_src = npimg[srcTop:srcTop+filtsz,srcLeft:srcLeft+filtsz,:]
subimg_dst = npimg[dstTop:dstTop+filtsz,dstLeft:dstLeft+filtsz,:]
fig,ax = plt.subplots(1,2)
ax[0].imshow(subimg_src)
ax[1].imshow(subimg_dst)
# channel axis - 2
# ~: negation operator
# keepdims: set True for broadcasting
selected = ~np.any(subimg_dst,axis=2,keepdims=True)
ssd = np.sum((subimg_src-subimg_dst)**2*selected)
print(ssd)
Example image:
I already asked a similar question which got answered but now this is more in detail:
I need a really fast way to get all important component stats of two arrays, where one array is labeled by opencv2 and gives the component areas for both arrays. The stats for all components masked on the two arrays should then saved to a dictionary. My approach works but it is much too slow. Is there something to avoid the loop or a better approach then the ndimage.öabeled_comprehension?
from scipy import ndimage
import numpy as np
import cv2
def calculateMeanMaxMin(val):
return np.array([np.mean(val),np.max(val),np.min(val)])
def getTheStatsForComponents(array1,array2):
ret, thresholded= cv2.threshold(array2, 120, 255, cv2.THRESH_BINARY)
thresholded= thresholded.astype(np.uint8)
numLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresholded, 8, cv2.CV_8UC1)
allComponentStats=[]
meanmaxminArray2 = ndimage.labeled_comprehension(array2, labels, np.arange(1, numLabels+1), calculateMeanMaxMin, np.ndarray, 0)
meanmaxminArray1 = ndimage.labeled_comprehension(array1, labels, np.arange(1, numLabels+1), calculateMeanMaxMin, np.ndarray, 0)
for position, label in enumerate(range(1, numLabels)):
currentLabel = np.uint8(labels== label)
contour, _ = cv2.findContours(currentLabel, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
(side1,side2)=cv2.minAreaRect(contour[0])[1]
componentStat = stats[label]
allstats = {'position':centroids[label,:],'area':componentStat[4],'height':componentStat[3],
'width':componentStat[2],'meanArray1':meanmaxminArray1[position][0],'maxArray1':meanmaxminArray1[position][1],
'minArray1':meanmaxminArray1[position][2],'meanArray2':meanmaxminArray2[position][0],'maxArray2':meanmaxminArray2[position][1],
'minArray2':meanmaxminArray2[position][2]}
if side1 >= side2 and side1 > 0:
allstats['elongation'] = np.float32(side2 / side1)
elif side2 > side1 and side2 > 0:
allstats['elongation'] = np.float32(side1 / side2)
else:
allstats['elongation'] = np.float32(0)
allComponentStats.append(allstats)
return allComponentStats
EDIT
The two arrays are 2d arrays:
array1= np.random.choice(255,(512,512)).astype(np.uint8)
array2= np.random.choice(255,(512,512)).astype(np.uint8)
EDIT2
small example of two arrays and the labelArray with two components(1 and 2, and background 0). Calculate the min,max mean with ndimage.labeled_comprhension.
from scipy import ndimage
import numpy as np
labelArray = np.array([[0,1,1,1],[2,2,1,1],[2,2,0,1]])
data = np.array([[0.1,0.2,0.99,0.2],[0.34,0.43,0.87,0.33],[0.22,0.53,0.1,0.456]])
data2 = np.array([[0.1,0.2,0.99,0.2],[0.1,0.2,0.99,0.2],[0.1,0.2,0.99,0.2]])
numLabels = 2
minimumDataForAllLabels = ndimage.labeled_comprehension(data, labelArray, np.arange(1, numLabels+1), np.min, np.ndarray, 0)
minimumData2ForallLabels = ndimage.labeled_comprehension(data2, labelArray, np.arange(1, numLabels+1), np.min, np.ndarray, 0)
print(minimumDataForAllLabels)
print(minimumData2ForallLabels)
print(bin_and_do_simple_stats(labelArray.flatten(),data.flatten()))
Output:
[0.2 0.22] ##minimum of component 1 and 2 from data
[0.2 0.1] ##minimum of component 1 and 2 from data2
[0.1 0.2 0.22] ##minimum output of bin_and_do_simple_stats from data
labeled_comprehension is definitely slow.
At least the simple stats can be done much faster based on the linked post. For simplicity I'm only doing one data array, but as the procedure returns sort indices it can be easily extended to multiple arrays:
import numpy as np
from scipy import sparse
try:
from stb_pthr import sort_to_bins as _stb_pthr
HAVE_PYTHRAN = True
except:
HAVE_PYTHRAN = False
# fallback if pythran not available
def sort_to_bins_sparse(idx, data, mx=-1):
if mx==-1:
mx = idx.max() + 1
aux = sparse.csr_matrix((data, idx, np.arange(len(idx)+1)), (len(idx), mx)).tocsc()
return aux.data, aux.indices, aux.indptr
def sort_to_bins_pythran(idx, data, mx=-1):
indices, indptr = _stb_pthr(idx, mx)
return data[indices], indices, indptr
# pick best available
sort_to_bins = sort_to_bins_pythran if HAVE_PYTHRAN else sort_to_bins_sparse
# example data
idx = np.random.randint(0,10,(100000))
data = np.random.random(100000)
# if possible compare the two methods
if HAVE_PYTHRAN:
dsp,isp,psp = sort_to_bins_sparse(idx,data)
dph,iph,pph = sort_to_bins_pythran(idx,data)
assert (dsp==dph).all()
assert (isp==iph).all()
assert (psp==pph).all()
# example how to do simple vectorized calculations
def simple_stats(data,iptr):
min = np.minimum.reduceat(data,iptr[:-1])
mean = np.add.reduceat(data,iptr[:-1]) / np.diff(iptr)
return min, mean
def bin_and_do_simple_stats(idx,data,mx=-1):
data,indices,indptr = sort_to_bins(idx,data,mx)
return simple_stats(data,indptr)
print("minima: {}\n mean values: {}".format(*bin_and_do_simple_stats(idx,data)))
If you have pythran (not required but a bit faster), compile this as <stb_pthr.py>:
import numpy as np
#pythran export sort_to_bins(int[:], int)
def sort_to_bins(idx, mx):
if mx==-1:
mx = idx.max() + 1
cnts = np.zeros(mx + 2, int)
for i in range(idx.size):
cnts[idx[i]+2] += 1
for i in range(2, cnts.size):
cnts[i] += cnts[i-1]
res = np.empty_like(idx)
for i in range(idx.size):
res[cnts[idx[i]+1]] = i
cnts[idx[i]+1] += 1
return res, cnts[:-1]