Related
This can be a very simple question as I am still exploring Python. And for this issue I use numpy.
Updated 09/30/21: adopted and modified codes shown below for any potential future reference. I also added an elif in the loop for classes that have fewer counts than the wanted size. Some codes may be unnecessary tho.
new_array = test_array.copy()
uniques, counts = np.unique(new_array, return_counts=True)
print("classes:", uniques, "counts:", counts)
for unique, count in zip(uniques, counts):
#print (unique, count)
if unique != 0 and count > 3:
ids = np.random.choice(count, count-3, replace=False)
new_array[tuple(i[ids] for i in np.where(new_array == unique))] = 0
elif unique != 0 and count <= 3:
ids = np.random.choice(count, count, replace=False)
new_array[tuple(i[ids] for i in np.where(new_array == unique))] = unique
Below is original question.
Let's say I have a 2D array like this:
test_array = np.array([[0,0,0,0,0],
[1,1,1,1,1],
[0,0,0,0,0],
[2,2,2,4,4],
[4,4,4,2,2],
[0,0,0,0,0]])
print("existing classes:", np.unique(test_array))
# "existing classes: [0 1 2 4]"
Now I want to keep a fixed size (e.g. 2 values) in each class that != 0 (in this case two 1s, two 2s, and two 4s) and replace the rest with 0. Where the value being replaced is random with each run (or from a seed).
For example, with run 1 I will have
([[0,0,0,0,0],
[1,0,0,1,0],
[0,0,0,0,0],
[2,0,0,0,4],
[4,0,0,2,0],
[0,0,0,0,0]])
with another run it might be
([[0,0,0,0,0],
[1,1,0,0,0],
[0,0,0,0,0],
[2,0,2,0,4],
[4,0,0,0,0],
[0,0,0,0,0]])
etc. Could anyone help me with this?
My strategy is
Create a new array initialized to all zeros
Find the elements in each class
For each class
Randomly sample two of elements to keep
Set those elements of the new array to the class value
The trick is keeping the shape of the indexes appropriate so you retain the shape of the original array.
import numpy as np
test_array = np.array([[0,0,0,0,0],
[1,1,1,1,1],
[0,0,0,0,0],
[2,2,2,4,4],
[4,4,4,2,2],
[0,0,0,0,0]])
def sample_classes(arr, n_keep=2, random_state=42):
classes, counts = np.unique(test_array, return_counts=True)
rng = np.random.default_rng(random_state)
out = np.zeros_like(arr)
for klass, count in zip(classes, counts):
# Find locations of the class elements
indexes = np.nonzero(arr == klass)
# Sample up to n_keep elements of the class
keep_idx = rng.choice(count, n_keep, replace=False)
# Select the kept elements and reformat for indexing the output array and retaining its shape
keep_idx_reshape = tuple(ind[keep_idx] for ind in indexes)
out[keep_idx_reshape] = klass
return out
You can use it like
In [3]: sample_classes(test_array) [3/1174]
Out[3]:
array([[0, 0, 0, 0, 0],
[0, 1, 1, 0, 0],
[0, 0, 0, 0, 0],
[2, 0, 0, 4, 0],
[4, 0, 0, 2, 0],
[0, 0, 0, 0, 0]])
In [4]: sample_classes(test_array, n_keep=3)
Out[4]:
array([[0, 0, 0, 0, 0],
[1, 0, 1, 1, 0],
[0, 0, 0, 0, 0],
[0, 2, 0, 4, 0],
[4, 4, 0, 2, 2],
[0, 0, 0, 0, 0]])
In [5]: sample_classes(test_array, random_state=88)
Out[5]:
array([[0, 0, 0, 0, 0],
[0, 0, 1, 1, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[4, 0, 4, 2, 2],
[0, 0, 0, 0, 0]])
In [6]: sample_classes(test_array, random_state=88, n_keep=4)
Out[6]:
array([[0, 0, 0, 0, 0],
[0, 1, 1, 1, 1],
[0, 0, 0, 0, 0],
[2, 2, 0, 4, 4],
[4, 4, 0, 2, 2],
[0, 0, 0, 0, 0]])
Here is my not-so-elegant solution:
def unique(arr, num=2, seed=None):
np.random.seed(seed)
vals = {}
for i, row in enumerate(arr):
for j, val in enumerate(row):
if val in vals and val != 0:
vals[val].append((i, j))
elif val != 0:
vals[val] = [(i, j)]
new = np.zeros_like(arr)
for val in vals:
np.random.shuffle(vals[val])
while len(vals[val]) > num:
vals[val].pop()
for row, col in vals[val]:
new[row,col] = val
return new
The following should be O(n log n) in array size
def keep_k_per_class(data,k,rng):
out = np.zeros_like(data)
unq,cnts = np.unique(data,return_counts=True)
assert (cnts >= k).all()
# calculate class boundaries from class sizes
CNTS = cnts.cumsum()
# indirectly group classes together by partial sorting
idx = data.ravel().argpartition(CNTS[:-1])
# the following lines implement simultaneous drawing without replacement
# from all classes
# lower boundaries of intervals to draw random numbers from
# for each class they start with the lower class boundary
# and from there grow one by one - together with the
# swapping out below this implements "without replacement"
lb = np.add.outer(np.arange(k),CNTS-cnts)
pick = rng.integers(lb,CNTS,lb.shape)
for l,p in zip(lb,pick):
# populate output array
out.ravel()[idx[p]] = unq
# swap out used indices so still available ones occupy a linear
# range (per class)
idx[p] = idx[l]
return out
Examples:
rng = np.random.default_rng()
>>>
>>> keep_k_per_class(test_array,2,rng)
array([[0, 0, 0, 0, 0],
[1, 1, 0, 0, 0],
[0, 0, 0, 0, 0],
[2, 0, 2, 0, 4],
[0, 4, 0, 0, 0],
[0, 0, 0, 0, 0]])
>>> keep_k_per_class(test_array,2,rng)
array([[0, 0, 0, 0, 0],
[1, 1, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 2, 0, 0, 0],
[4, 0, 4, 0, 2],
[0, 0, 0, 0, 0]])
and a large one
>>> BIG = np.add.outer(np.tile(test_array,(100,100)),np.arange(0,500,5))
>>> BIG.size
30000000
>>> res = keep_k_per_class(BIG,30,rng)
### takes ~4 sec
### check
>>> np.unique(np.bincount(res.ravel()),return_counts=True)
(array([ 0, 30, 29988030]), array([100, 399, 1]))
Given an integer n, create nxn nummy array such that all of the elements present in both its diagonals are 1 and all others are 0
Input: 4
Output
*[[1, 0, 0, 1],
[0, 1, 1, 0],
[0, 1, 1, 0],
[1, 0, 0, 1]]*
how do i achieve this array?
You can use the fill_diagonal to fill the elements in the principal diagonal and use it with np.fliplr to fill elements across the other diagonal. Refer link
import numpy as np
a = np.zeros((4, 4), int)
np.fill_diagonal(a, 1)
np.fill_diagonal(np.fliplr(a), 1)
Output :
array([[1, 0, 0, 1],
[0, 1, 1, 0],
[0, 1, 1, 0],
[1, 0, 0, 1]])
Create an identity matrix and its flipped view, then take the maximum of the two:
np.maximum(np.eye(5, dtype=int), np.fliplr(np.eye(5, dtype=int)))
#array([[1, 0, 0, 0, 1],
# [0, 1, 0, 1, 0],
# [0, 0, 1, 0, 0],
# [0, 1, 0, 1, 0],
# [1, 0, 0, 0, 1]])
Edited: changed [::-1] to np.fliplr (for better performance).
I would do (assuming n=5):
import numpy as np
d = np.diagflat(np.ones(5,int))
a = d | np.rot90(d)
print(a)
Output:
[[1 0 0 0 1]
[0 1 0 1 0]
[0 0 1 0 0]
[0 1 0 1 0]
[1 0 0 0 1]]
I harness fact that we could use | (binary OR) here for getting same effect as max, because arrays holds solely 0s and 1s.
I have an array of numbers between 0 and 3 and I want to create a 2D array of their binary digits.
in the future may be I need to have array of numbers between 0 and 7 or 0 to 15.
Currently my array is defined like this:
a = np.array([[0], [1], [2], [3]], dtype=np.uint8)
I used numpy unpackbits function:
b = np.unpackbits(a, axis=1)
and the result is this :
array([[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 1, 1]], dtype=uint8)
As you can see it created a 2d array with 8 items in column while I'm looking for 2 columns 2d array.
here is my desired array:
array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
Is this related to data type uint8 ?
what is your idea?
One way of approaching the problem is to just adapt your b to match your desired output via a simple slicing, similarly to what suggested in #GrzegorzSkibinski answer:
import numpy as np
def gen_bits_by_val(values):
n = int(max(values)).bit_length()
return np.unpackbits(values, axis=1)[:, -n:].copy()
print(gen_bits_by_val(a))
# [[0 0]
# [0 1]
# [1 0]
# [1 1]]
Alternatively, you could create a look-up table, similarly to what suggested in #WarrenWeckesser answer, using the following:
import numpy as np
def gen_bits_by_num(n):
values = np.arange(2 ** n, dtype=np.uint8).reshape(-1, 1)
return np.unpackbits(values, axis=1)[:, -n:].copy()
bits2 = gen_bits_by_num(2)
print(bits2)
# [[0 0]
# [0 1]
# [1 0]
# [1 1]]
which allows for all kind of uses thereby indicated, e.g.:
bits4 = gen_bits_by_num(4)
print(bits4[[1, 3, 12]])
# [[0 0 0 1]
# [0 0 1 1]
# [1 1 0 0]]
EDIT
Considering #PaulPanzer answer the line:
return np.unpackbits(values, axis=1)[:, -n:]
has been replaced with:
return np.unpackbits(values, axis=1)[:, -n:].copy()
which is more memory efficient.
It could have been replaced with:
return np.unpackbits(values << (8 - n), axis=1, count=n)
with similar effects.
You can use the count keyword. It cuts from the right so you also have to shift bits before applying unpackbits.
b = np.unpackbits(a<<6, axis=1, count=2)
b
# array([[0, 0],
# [0, 1],
# [1, 0],
# [1, 1]], dtype=uint8)
This produces a "clean" array:
b.flags
# C_CONTIGUOUS : True
# F_CONTIGUOUS : False
# OWNDATA : True
# WRITEABLE : True
# ALIGNED : True
# WRITEBACKIFCOPY : False
# UPDATEIFCOPY : False
In contrast, slicing the full 8-column output of unpackbits is in a sense a memory leak because the discarded columns will stay in memory as long as the slice lives.
You can truncate b to keep just the columns since the first column with 1:
b=b[:, int(np.argwhere(b.max(axis=0)==1)[0]):]
For such a small number of bits, you can use a lookup table.
For example, here bits2 is an array with shape (4, 2) that holds the bits of the integers 0, 1, 2, and 3. Index bits2 with the values from a to get the bits:
In [43]: bits2 = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
In [44]: a = np.array([[0], [1], [2], [3]], dtype=np.uint8)
In [45]: bits2[a[:, 0]]
Out[45]:
array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
This works fine for 3 or 4 bits, too:
In [46]: bits4 = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 0], [0, 0, 1, 1], [0, 1, 0, 0], [
...: 0, 1, 0, 1], [0, 1, 1, 0], [0, 1, 1, 1], [1, 0, 0, 0], [1, 0, 0, 1], [1, 0, 1, 0], [1, 0,
...: 1, 1], [1, 1, 0, 0], [1, 1, 0, 1], [1, 1, 1, 0], [1, 1, 1, 1]])
In [47]: bits4
Out[47]:
array([[0, 0, 0, 0],
[0, 0, 0, 1],
[0, 0, 1, 0],
[0, 0, 1, 1],
[0, 1, 0, 0],
[0, 1, 0, 1],
[0, 1, 1, 0],
[0, 1, 1, 1],
[1, 0, 0, 0],
[1, 0, 0, 1],
[1, 0, 1, 0],
[1, 0, 1, 1],
[1, 1, 0, 0],
[1, 1, 0, 1],
[1, 1, 1, 0],
[1, 1, 1, 1]])
In [48]: x = np.array([0, 1, 5, 14, 9, 8, 15])
In [49]: bits4[x]
Out[49]:
array([[0, 0, 0, 0],
[0, 0, 0, 1],
[0, 1, 0, 1],
[1, 1, 1, 0],
[1, 0, 0, 1],
[1, 0, 0, 0],
[1, 1, 1, 1]])
I have a matrix M with values 0 through N within it. I'd like to unroll this matrix to create a new matrix A where each submatrix A[i, :, :] represents whether or not M == i.
The solution below uses a loop.
# Example Setup
import numpy as np
np.random.seed(0)
N = 5
M = np.random.randint(0, N, size=(5,5))
# Solution with Loop
A = np.zeros((N, M.shape[0], M.shape[1]))
for i in range(N):
A[i, :, :] = M == i
This yields:
M
array([[4, 0, 3, 3, 3],
[1, 3, 2, 4, 0],
[0, 4, 2, 1, 0],
[1, 1, 0, 1, 4],
[3, 0, 3, 0, 2]])
M.shape
# (5, 5)
A
array([[[0, 1, 0, 0, 0],
[0, 0, 0, 0, 1],
[1, 0, 0, 0, 1],
[0, 0, 1, 0, 0],
[0, 1, 0, 1, 0]],
...
[[1, 0, 0, 0, 0],
[0, 0, 0, 1, 0],
[0, 1, 0, 0, 0],
[0, 0, 0, 0, 1],
[0, 0, 0, 0, 0]]])
A.shape
# (5, 5, 5)
Is there a faster way, or a way to do it in a single numpy operation?
Broadcasted comparison is your friend:
B = (M[None, :] == np.arange(N)[:, None, None]).view(np.int8)
np.array_equal(A, B)
# True
The idea is to expand the dimensions in such a way that the comparison can be broadcasted in the manner desired.
As pointed out by #Alex Riley in the comments, you can use np.equal.outer to avoid having to do the indexing stuff yourself,
B = np.equal.outer(np.arange(N), M).view(np.int8)
np.array_equal(A, B)
# True
You can make use of some broadcasting here:
P = np.arange(N)
Y = np.broadcast_to(P[:, None], M.shape)
T = np.equal(M, Y[:, None]).astype(int)
Alternative using indices:
X, Y = np.indices(M.shape)
Z = np.equal(M, X[:, None]).astype(int)
You can index into the identity matrix like so
A = np.identity(N, int)[:, M]
or so
A = np.identity(N, int)[M.T].T
Or use the new (v1.15.0) put_along_axis
A = np.zeros((N,5,5), int)
np.put_along_axis(A, M[None], 1, 0)
Note if N is much larger than 5 then creating an NxN identity matrix may be considered wasteful. We can mitigate this using stride tricks:
def read_only_identity(N, dtype=float):
z = np.zeros(2*N-1, dtype)
s, = z.strides
z[N-1] = 1
return np.lib.stride_tricks.as_strided(z[N-1:], (N, N), (-s, s))
I have a matrix with some zero
x=np.array([[1,2,3,0],[4,0,5,0],[7,0,0,0],[0,9,8,0]])
>>> x
array([[1, 2, 3, 0],
[4, 0, 5, 0],
[7, 0, 0, 0],
[0, 9, 8, 0]])
And want to random value into only a position which is not zero. I can get the (row, col) position as tuple from np.where
pos = np.where(x!=0)
>>> (array([0, 0, 0, 1, 1, 2, 3, 3], dtype=int64), array([0, 1, 2, 0, 2, 0, 1, 2], dtype=int64))
Is there a way to use np.random (or something else) for the matrix x at position from posonly without changing where is zero?
# pseudocode
new_x = np.rand(x, at pos)
I assume you want to replace non-zero value with random integer number.
You can use the combination of numpy.place and numpy.random.randint functions.
>>> x=np.array([[1,2,3,0],[4,0,5,0],[7,0,0,0],[0,9,8,0]])
>>> x
array([[1, 2, 3, 0],
[4, 0, 5, 0],
[7, 0, 0, 0],
[0, 9, 8, 0]])
>>> lower_bound, upper_bound = 1, 5 # random function boundary
>>> np.place(x, x!=0, np.random.randint(lower_bound, upper_bound, np.count_nonzero(x)))
>>> x
array([[2, 2, 3, 0],
[1, 0, 3, 0],
[2, 0, 0, 0],
[0, 4, 3, 0]])
well you can use x.nonzero() which gives you all indices of array with nonzero values
and then then you just need to put random values at those indices
nz_indices = x.nonzero()
for i,j in zip(nz_indices[0],nz_indices[1]):
x[i][j] = np.random.randint(1500) #random number till 1500
you can find more about randint() here >> randint docs
How about something simple like this:
import numpy as np
x = np.array([[1, 2, 3, 0], [4, 0, 5, 0], [7, 0, 0, 0], [0, 9, 8, 0]])
w = x != 0
x[w] = np.random.randint(10, size=x.shape)[w]
print(x)
[[2 2 2 0]
[0 0 4 0]
[1 0 0 0]
[0 3 1 0]]
You could also do
x = np.random.randint(1, 10, size=x.shape) * (x != 0)
Just index with np.nonzero
i = np.nonzero(x)
x[i] = np.random.randint(1, 10, i[0].size)
Note for reference that np.nonzero(x) <=> np.where(x) <=> np.where(x != 0)