How to make a checkerboard in numpy? - python

I'm using numpy to initialize a pixel array to a gray checkerboard (the classic representation for "no pixels", or transparent). It seems like there ought to be a whizzy way to do it with numpy's amazing array assignment/slicing/dicing operations, but this is the best I've come up with:
w, h = 600, 800
sq = 15 # width of each checker-square
self.pix = numpy.zeros((w, h, 3), dtype=numpy.uint8)
# Make a checkerboard
row = [[(0x99,0x99,0x99),(0xAA,0xAA,0xAA)][(i//sq)%2] for i in range(w)]
self.pix[[i for i in range(h) if (i//sq)%2 == 0]] = row
row = [[(0xAA,0xAA,0xAA),(0x99,0x99,0x99)][(i//sq)%2] for i in range(w)]
self.pix[[i for i in range(h) if (i//sq)%2 == 1]] = row
It works, but I was hoping for something simpler.

def checkerboard(shape):
return np.indices(shape).sum(axis=0) % 2
Most compact, probably the fastest, and also the only solution posted that generalizes to n-dimensions.

I'd use the Kronecker product kron:
np.kron([[1, 0] * 4, [0, 1] * 4] * 4, np.ones((10, 10)))
The checkerboard in this example has 2*4=8 fields of size 10x10 in each direction.

this ought to do it
any size checkerboard you want (just pass in width and height, as w, h); also i have hard-coded cell height/width to 1, though of course this could also be parameterized so that an arbitrary value is passed in:
>>> import numpy as NP
>>> def build_checkerboard(w, h) :
re = NP.r_[ w*[0,1] ] # even-numbered rows
ro = NP.r_[ w*[1,0] ] # odd-numbered rows
return NP.row_stack(h*(re, ro))
>>> checkerboard = build_checkerboard(5, 5)
>>> checkerboard
Out[3]: array([[0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]])
with this 2D array, it's simple to render an image of a checkerboard, like so:
>>> import matplotlib.pyplot as PLT
>>> fig, ax = PLT.subplots()
>>> ax.imshow(checkerboard, cmap=PLT.cm.gray, interpolation='nearest')
>>> PLT.show()

Here's another way to do it using ogrid which is a bit faster:
import numpy as np
import Image
w, h = 600, 800
sq = 15
color1 = (0xFF, 0x80, 0x00)
color2 = (0x80, 0xFF, 0x00)
def use_ogrid():
coords = np.ogrid[0:w, 0:h]
idx = (coords[0] // sq + coords[1] // sq) % 2
vals = np.array([color1, color2], dtype=np.uint8)
img = vals[idx]
return img
def use_fromfunction():
img = np.zeros((w, h, 3), dtype=np.uint8)
c = np.fromfunction(lambda x, y: ((x // sq) + (y // sq)) % 2, (w, h))
img[c == 0] = color1
img[c == 1] = color2
return img
if __name__ == '__main__':
for f in (use_ogrid, use_fromfunction):
img = f()
pilImage = Image.fromarray(img, 'RGB')
pilImage.save('{0}.png'.format(f.func_name))
Here are the timeit results:
% python -mtimeit -s"import test" "test.use_fromfunction()"
10 loops, best of 3: 307 msec per loop
% python -mtimeit -s"import test" "test.use_ogrid()"
10 loops, best of 3: 129 msec per loop

You can use Numpy's tile function to get checkerboard array of size n*m where n and m should be even numbers for the right result...
def CreateCheckboard(n,m):
list_0_1 = np.array([ [ 0, 1], [ 1, 0] ])
checkerboard = np.tile(list_0_1, ( n//2, m//2))
print(checkerboard.shape)
return checkerboard
CreateCheckboard(4,6)
which gives the output:
(4, 6)
array([[0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0]])

You can use the step of start:stop:step for slicing method to update a matrix horizontally and vertically:
Here x[1::2, ::2] picks every other element starting from the first element on the row and for every second row of the matrix.
import numpy as np
print("Checkerboard pattern:")
x = np.zeros((8,8),dtype=int)
# (odd_rows, even_columns)
x[1::2,::2] = 1
# (even_rows, odd_columns)
x[::2,1::2] = 1
print(x)

Late, but for posterity:
def check(w, h, c0, c1, blocksize):
tile = np.array([[c0,c1],[c1,c0]]).repeat(blocksize, axis=0).repeat(blocksize, axis=1)
grid = np.tile(tile, ( h/(2*blocksize)+1, w/(2*blocksize)+1, 1))
return grid[:h,:w]

I'm not sure if this is better than what I had:
c = numpy.fromfunction(lambda x,y: ((x//sq) + (y//sq)) % 2, (w,h))
self.chex = numpy.array((w,h,3))
self.chex[c == 0] = (0xAA, 0xAA, 0xAA)
self.chex[c == 1] = (0x99, 0x99, 0x99)

A perfplot analysis shows that the best (fastest, most readable, memory-efficient) solution is via slicing,
def slicing(n):
A = np.zeros((n, n), dtype=int)
A[1::2, ::2] = 1
A[::2, 1::2] = 1
return A
The stacking solution is a bit faster large matrices, but arguably less well readable. The top-voted answer is also the slowest.
Code to reproduce the plot:
import numpy as np
import perfplot
def indices(n):
return np.indices((n, n)).sum(axis=0) % 2
def slicing(n):
A = np.zeros((n, n), dtype=int)
A[1::2, ::2] = 1
A[::2, 1::2] = 1
return A
def tile(n):
return np.tile([[0, 1], [1, 0]], (n // 2, n // 2))
def stacking(n):
row0 = np.array(n // 2 * [0, 1] + (n % 2) * [0])
row1 = row0 ^ 1
return np.array(n // 2 * [row0, row1] + (n % 2) * [row0])
def ogrid(n):
coords = np.ogrid[0:n, 0:n]
return (coords[0] + coords[1]) % 2
b = perfplot.bench(
setup=lambda n: n,
kernels=[slicing, indices, tile, stacking, ogrid],
n_range=[2 ** k for k in range(14)],
xlabel="n",
)
b.save("out.png")
b.show()

Can't you use hstack and vstack? See here.
Like this:
>>> import numpy as np
>>> b = np.array([0]*4)
>>> b.shape = (2,2)
>>> w = b + 0xAA
>>> r1 = np.hstack((b,w,b,w,b,w,b))
>>> r2 = np.hstack((w,b,w,b,w,b,w))
>>> board = np.vstack((r1,r2,r1,r2,r1,r2,r1))

import numpy as np
a=np.array(([1,0]*4+[0,1]*4)*4).reshape((8,8))
print(a)
[[1 0 1 0 1 0 1 0]
[0 1 0 1 0 1 0 1]
[1 0 1 0 1 0 1 0]
[0 1 0 1 0 1 0 1]
[1 0 1 0 1 0 1 0]
[0 1 0 1 0 1 0 1]
[1 0 1 0 1 0 1 0]
[0 1 0 1 0 1 0 1]]

For those wanting arbitrarily sized squares/rectangles:
import numpy as np
# if you want X squares per axis, do squaresize=[i//X for i in boardsize]
def checkerboard(boardsize, squaresize):
return np.fromfunction(lambda i, j: (i//squaresize[0])%2 != (j//squaresize[1])%2, boardsize).astype(int)
print(checkerboard((10,15), (2,3)))
[[0 0 0 1 1 1 0 0 0 1 1 1 0 0 0]
[0 0 0 1 1 1 0 0 0 1 1 1 0 0 0]
[1 1 1 0 0 0 1 1 1 0 0 0 1 1 1]
[1 1 1 0 0 0 1 1 1 0 0 0 1 1 1]
[0 0 0 1 1 1 0 0 0 1 1 1 0 0 0]
[0 0 0 1 1 1 0 0 0 1 1 1 0 0 0]
[1 1 1 0 0 0 1 1 1 0 0 0 1 1 1]
[1 1 1 0 0 0 1 1 1 0 0 0 1 1 1]
[0 0 0 1 1 1 0 0 0 1 1 1 0 0 0]
[0 0 0 1 1 1 0 0 0 1 1 1 0 0 0]]

Replace n with an even number and you will get the answer.
import numpy as np
b = np.array([[0,1],[1,0]])
np.tile(b,(n, n))

Based on Eelco Hoogendoorn's answer, if you want a checkerboard with various tile sizes you can use this:
def checkerboard(shape, tile_size):
return (np.indices(shape) // tile_size).sum(axis=0) % 2

I modified hass's answer as follows.
import math
import numpy as np
def checkerboard(w, h, c0, c1, blocksize):
tile = np.array([[c0,c1],[c1,c0]]).repeat(blocksize, axis=0).repeat(blocksize, axis=1)
grid = np.tile(tile,(int(math.ceil((h+0.0)/(2*blocksize))),int(math.ceil((w+0.0)/(2*blocksize)))))
return grid[:h,:w]

Using tile function :
import numpy as np
n = int(input())
x = np.tile(arr,(n,n//2))
x[1::2, 0::2] = 1
x[0::2, 1::2] = 1
print(x)

Very very late, but I needed a solution that allows for a non-unit checker size on an arbitrarily sized checkerboard. Here's a simple and fast solution:
import numpy as np
def checkerboard(shape, dw):
"""Create checkerboard pattern, each square having width ``dw``.
Returns a numpy boolean array.
"""
# Create individual block
block = np.zeros((dw * 2, dw * 2), dtype=bool)
block[dw:, :dw] = 1
block[:dw, dw:] = 1
# Tile until we exceed the size of the mask, then trim
repeat = (np.array(shape) + dw * 2) // np.array(block.shape)
trim = tuple(slice(None, s) for s in shape)
checkers = np.tile(block, repeat)[trim]
assert checkers.shape == shape
return checkers
To convert the checkerboard squares to colors, you could do:
checkers = checkerboard(shape, dw)
img = np.empty_like(checkers, dtype=np.uint8)
img[checkers] = 0xAA
img[~checkers] = 0x99

import numpy as np
n = int(input())
arr = ([0, 1], [1,0])
print(np.tile(arr, (n//2,n//2)))
For input 6, output:
[[0 1 0 1 0 1]
[1 0 1 0 1 0]
[0 1 0 1 0 1]
[1 0 1 0 1 0]
[0 1 0 1 0 1]
[1 0 1 0 1 0]]

I recently want the same function and i modified doug's answer a little bit as follows:
def gen_checkerboard(grid_num, grid_size):
row_even = grid_num/2 * [0,1]
row_odd = grid_num/2 * [1,0]
checkerboard = numpy.row_stack(grid_num/2*(row_even, row_odd))
return checkerboard.repeat(grid_size, axis = 0).repeat(grid_size, axis = 1)

Simplest implementation of the same.
import numpy as np
n = int(input())
checkerboard = np.tile(np.array([[0,1],[1,0]]), (n//2, n//2))
print(checkerboard)

n = int(input())
import numpy as np
m=int(n/2)
a=np.array(([0,1]*m+[1,0]*m)*m).reshape((n,n))
print (a)
So if input is n = 4 then output would be like:
[[0 1 0 1]
[1 0 1 0]
[0 1 0 1]
[1 0 1 0]]

Simplest way to write checkboard matrix using tile()
array = np.tile([0,1],n//2)
array1 = np.tile([1,0],n//2)
finalArray = np.array([array, array1], np.int32)
finalArray = np.tile(finalArray,(n//2,1))

Suppose we need a patter with length and breadth (even number) as l, b.
base_matrix = np.array([[0,1],[1,0]])
As this base matrix, which would be used as a tile already has length and breadth of 2 X 2, we would need to divide by 2.
print np.tile(base_matrix, (l / 2, b / 2))
print (np.tile(base,(4/2,6/2)))
[[0 1 0 1 0 1]
[1 0 1 0 1 0]
[0 1 0 1 0 1]
[1 0 1 0 1 0]]

n = int(input())
import numpy as np
a = np.array([0])
x = np.tile(a,(n,n))
x[1::2, ::2] = 1
x[::2, 1::2] = 1
print(x)
I guess this works perfectly well using numpy.tile( ) function.

Here is the solution using tile function in numpy.
import numpy as np
x = np.array([[0, 1], [1, 0]])
check = np.tile(x, (n//2, n//2))
# Print the created matrix
print(check)
for input 2, the Output is
[[0 1]
[1 0]]
for input 4, the Output is
[[0 1 0 1]
[1 0 1 0]
[0 1 0 1]
[1 0 1 0]]

Given odd or even 'n', below approach generates "arr" in the checkerboard pattern and does not use loops. If n is odd, this is extremely straightforward to use. If n is even, we generate the checkerboard for n-1 and then add an extra row and column.
rows = n-1 if n%2 == 0 else n
arr=(rows*rows)//2*[0,1]
arr.extend([0])
arr = np.reshape(arr, (rows,rows))
if n%2 == 0:
extra = (n//2*[1,0])
arr = np.concatenate((arr, np.reshape(extra[:-1], (1,n-1))))
arr = np.concatenate((arr, np.reshape(extra, (n,1))), 1)

Here is a generalisation to falko's answer
import numpy as np
def checkerboard(width,sq):
'''
width --> the checkerboard will be of size width x width
sq ---> each square inside the checkerboard will be of size sq x sq
'''
rep = int(width/(2*sq))
return np.kron([[1, 0] * rep, [0, 1] * rep] * rep, np.ones((sq, sq))).astype(np.uint8)
x = checkerboard(width=8,sq=4)
print(x)
print('checkerboard is of size ',x.shape)
which gives the following output
[[1 1 1 1 0 0 0 0]
[1 1 1 1 0 0 0 0]
[1 1 1 1 0 0 0 0]
[1 1 1 1 0 0 0 0]
[0 0 0 0 1 1 1 1]
[0 0 0 0 1 1 1 1]
[0 0 0 0 1 1 1 1]
[0 0 0 0 1 1 1 1]]
checkerboard is of size (8, 8)

Here's a numpy solution with some checking to make sure that the width and height are evenly divisible by the square size.
def make_checkerboard(w, h, sq, fore_color, back_color):
"""
Creates a checkerboard pattern image
:param w: The width of the image desired
:param h: The height of the image desired
:param sq: The size of the square for the checker pattern
:param fore_color: The foreground color
:param back_color: The background color
:return:
"""
w_rem = np.mod(w, sq)
h_rem = np.mod(w, sq)
if w_rem != 0 or h_rem != 0:
raise ValueError('Width or height is not evenly divisible by square '
'size.')
img = np.zeros((h, w, 3), dtype='uint8')
x_divs = w // sq
y_divs = h // sq
fore_tile = np.ones((sq, sq, 3), dtype='uint8')
fore_tile *= np.array([[fore_color]], dtype='uint8')
back_tile = np.ones((sq, sq, 3), dtype='uint8')
back_tile *= np.array([[back_color]], dtype='uint8')
for y in np.arange(y_divs):
if np.mod(y, 2):
b = back_tile
f = fore_tile
else:
b = fore_tile
f = back_tile
for x in np.arange(x_divs):
if np.mod(x, 2) == 0:
img[y * sq:y * sq + sq, x * sq:x * sq + sq] = f
else:
img[y * sq:y * sq + sq, x * sq:x * sq + sq] = b
return img

Related

Is there a way to simplify the creation of all possible (length x height) grids?

Here's my code for a 4x4 grid to better explain my problem:
#The "Duct-Tape" solution
for box0 in range(0,2):
for box1 in range(0,2):
for box2 in range(0,2):
for box3 in range(0,2):
for box4 in range(0,2):
for box5 in range(0,2):
for box6 in range(0,2):
for box7 in range(0,2): #0 = OutBag, 1 = InBag
for box8 in range(0,2):
for box9 in range(0,2):
for box10 in range(0,2):
for box11 in range(0,2):
for box12 in range(0,2):
for box13 in range(0,2):
for box14 in range(0,2):
for box15 in range(0,2):
totalGrids.append([[box0,box1,box2,box3],
[box4,box5,box6,box7],
[box8,box9,box10,box11],
[box12,box13,box14,box15]])
What's a way to make something like this for a length x height size grid?
This is another way to do it with fewer for loops by using binary arithmetic:
totalGrids = []
for i in range(0, 1 << 16):
totalGrids.append(
[
[(i >> j) & 1 for j in range(0, 4)],
[(i >> j) & 1 for j in range(4, 8)],
[(i >> j) & 1 for j in range(8, 12)],
[(i >> j) & 1 for j in range(12, 16)]
])
print(totalGrids[0])
print(totalGrids[1])
print(totalGrids[2])
print()
print(totalGrids[-3])
print(totalGrids[-2])
print(totalGrids[-1])
Output (first 3 and last 3 elements):
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
[[1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
[[0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
[[1, 0, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]
[[0, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]
[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]
To generalize this from 4 x 4 to height x width, something like this should work:
height = 3
width = 5
totalGrids = []
for i in range(0, 1 << (height * width)):
totalGrids.append(
[[(i >> j) & 1 for j in range(k * width, (k + 1) * width)] for k in range(0, height)]
)
Here is an explanation of the above.
The matrix, which has height x width elements, is to be filled with every possible combination of 0s and 1s across these elements. As an example, if height = 2 and width = 4, then there are 8 elements in total, and one ordering of the required combinations of 0s and 1s is:
0 0 0 0 0 0 0 0 (this is 0 in binary)
0 0 0 0 0 0 0 1 (this is 1 in binary)
0 0 0 0 0 0 1 0 (this is 2 in binary)
0 0 0 0 0 0 1 1 (this is 3 in binary)
...
0 0 0 0 1 1 1 1 (this is 15 in binary)
0 0 0 1 0 0 0 0 (this is 16 in binary)
0 0 0 1 0 0 0 1
0 0 0 1 0 0 1 0
0 0 0 1 0 0 1 1 (EXAMPLE VALUE USED BELOW)
...
0 0 1 0 0 0 0 0 (this is 32 in binary)
...
0 0 1 1 0 0 0 0 (this is 48 in binary)
...
1 1 1 1 1 1 1 1 (this is 255 = 2**8 - 1 in binary)
These are just the binary values from 0 to 2**8 - 1 which can be expressed as Python integers in range(0, 2**8). They are exactly what is needed, and now the only question is how to populate a Python list of lists of size height x width.
The answer is to use binary arithmetic. Let's look at 0 0 0 1 0 0 1 1 as an example. We can specify this in Python as an integer, namely i = 19.
For the 1st slot of 8, we want to use the rightmost binary bit in our example, which is 1. We can extract this using Python's bitwise & operation by taking value = i & 1. Applying & 1 to any integer effectively masks off all but the binary ones-place digit.
For the 2nd slot, we need to add an additional step:
First we slide the bits to the right by 1 position (allowing the rightmost bit to fall off the edge, which is fine since we have already processed it and won't need it again) using Python's right shift operation >> as follows: value = i >> 1. In binary, this yields 0 0 0 0 1 0 0 1, which is the integer 9. The right-shift operator has moved the bit that was in the binary twos-place rightward into the binary ones-place.
Next, we can use the same technique as we did for the 1st slot to mask off all but the ones-place bit: value = i & 1.
Rather than do the above as two separate statements, we can simply write: value = (i >> 1) & 1.
In general, for the j'th slot, we can extract the j'th bit from our example integer by writing: value = (i >> j) & 1.
Now let's look at the key logic within the loop:
[[(i >> j) & 1 for j in range(k * width, (k + 1) * width)] for k in range(0, height)]
This uses a nested list comprehension to loop first over k in range(0, height) and then over j in range(k * width, (k + 1) * width), and to put the result of the above bitwise expression (i >> j) & 1 into each successive element in our matrix (or list of lists).
Finally, let's look again at the very outer loop in the code:
for i in range(0, 1 << (height * width)):
This uses Python's bitwise left shift operation <<, which does the opposite of what right shift (>>) does, namely to shift the bits of 1 to the left by (height * width) binary positions. Because each shift to the left causes a number to double in value, our left shift expression gives the same result as 2 ** (height * width), which is exactly the number of 0/1 combinations that your question is seeking.
So, by iterating from 0 to 2 ** (height * width), then extracting and collating the bits of each value into the corresponding matrix elements for that iteration's matrix, and appending that matrix to the totalGrids variable, we ultimately construct a list of matrices with the required properties.

Random numbers on matrix diagonal (possible duplicate)

Is it possible to amend this code, so that matrix diagonal would consist of different random numbers?
For now they are the same:
rand_m = np.eye(4, 4, k = 0, dtype=int)
rand_m[rand_m == 1] = np.random.randint(1, 100)
print(rand_m)
[[44 0 0 0]
[ 0 44 0 0]
[ 0 0 44 0]
[ 0 0 0 44]]
randint can be used to generate a random vector at once. np.diag is used to construct a matrix with that vector being the diagonal.
np.diag(np.random.randint(1,100,4))
Try setting the size argument of randint() to 4, like this:
rand_m = np.eye(4, 4, k = 0, dtype=int)
rand_m[rand_m == 1] = np.random.randint(1, 100, size=4)

Counting matrices with specific condition

I have a list of 3x3 0,1 matrices and I want to count how many of them have 0 on diagonal and pseudo-antisymmetric, i.e A[i][j] != A[j][i] (if A[i][j] = 1 then A[j][i] should be 0).
How can I implement this? I was trying similar approach as for counting symmetric matrices(here Counting symmetric matrices), but it doesn't work here.
Assuming a is a 3x3 numpy array,
(a + np.eye(len(a)) == 1 - a.T).all()
Explanation:
a.T flips the matrix.
1 - a.T inverts flipped matrix
in the inverted matrix, diagonal elements are expected to be 1. Fix: add a diagonal matrix, a + np.eye(len(a))
test passes only if all elements satisfy the condition: (... == ...).all()
You could use your existing is_symmetric function and create a new is has_zero_main_diagonal one:
def pretty_print(matrix):
for row in matrix:
print(*row)
def has_zero_main_diagonal(matrix, n):
return all(matrix[i][i] == 0 for i in range(n))
def is_symmetric(matrix, n):
return all(matrix[i][j] == matrix[j][i] for i in range(n) for j in range(n))
Informal Testing:
symmetric_zero_diag = [[0, 1, 1],
[1, 0, 1],
[1, 1, 0]]
symmetric_non_zero_diag = [[0, 1, 1],
[1, 0, 1],
[1, 1, 1]]
asymmetric_zero_diag = [[0, 1, 1],
[1, 0, 1],
[0, 1, 0]]
asymmetric_non_zero_diag = [[1, 1, 1],
[1, 1, 1],
[0, 1, 1]]
testcases = {
'symmetric_zero_diag': symmetric_zero_diag,
'symmetric_non_zero_diag': symmetric_non_zero_diag,
'asymmetric_zero_diag': asymmetric_zero_diag,
'asymmetric_non_zero_diag': asymmetric_non_zero_diag
}
num_pseudo_antisymmetric_zero_diag = 0
for testcase_name, matrix in testcases.items():
print(f'{testcase_name}:')
pretty_print(matrix)
print(f'has_zero_main_diagonal={has_zero_main_diagonal(matrix, 3)}')
print(f'is_symmetric={is_symmetric(matrix, 3)}')
is_pseudo_antisymmetric_zero_diag = has_zero_main_diagonal(matrix, 3) and not is_symmetric(matrix, 3)
if is_pseudo_antisymmetric_zero_diag:
num_pseudo_antisymmetric_zero_diag += 1
print((f'has_zero_main_diagonal and not is_symmetric='
f'{is_pseudo_antisymmetric_zero_diag}'))
print()
print(f'Number of matrices that are pseudo-antisymmetric with a zero_diag: {num_pseudo_antisymmetric_zero_diag}')
Output:
symmetric_zero_diag:
0 1 1
1 0 1
1 1 0
has_zero_main_diagonal=True
is_symmetric=True
has_zero_main_diagonal and not is_symmetric=False
symmetric_non_zero_diag:
0 1 1
1 0 1
1 1 1
has_zero_main_diagonal=False
is_symmetric=True
has_zero_main_diagonal and not is_symmetric=False
asymmetric_zero_diag:
0 1 1
1 0 1
0 1 0
has_zero_main_diagonal=True
is_symmetric=False
has_zero_main_diagonal and not is_symmetric=True
asymmetric_non_zero_diag:
1 1 1
1 1 1
0 1 1
has_zero_main_diagonal=False
is_symmetric=False
has_zero_main_diagonal and not is_symmetric=False
Number of matrices that are pseudo-antisymmetric with a zero_diag: 1

Generate lexicographic series efficiently in Python

I want to generate a lexicographic series of numbers such that for each number the sum of digits is a given constant. It is somewhat similar to 'subset sum problem'. For example if I wish to generate 4-digit numbers with sum = 3 then I have a series like:
[3 0 0 0]
[2 1 0 0]
[2 0 1 0]
[2 0 0 1]
[1 2 0 0] ... and so on.
I was able to do it successfully in Python with the following code:
import numpy as np
M = 4 # No. of digits
N = 3 # Target sum
a = np.zeros((1,M), int)
b = np.zeros((1,M), int)
a[0][0] = N
jj = 0
while a[jj][M-1] != N:
ii = M-2
while a[jj][ii] == 0:
ii = ii-1
kk = ii
if kk > 0:
b[0][0:kk-1] = a[jj][0:kk-1]
b[0][kk] = a[jj][kk]-1
b[0][kk+1] = N - sum(b[0][0:kk+1])
b[0][kk+2:] = 0
a = np.concatenate((a,b), axis=0)
jj += 1
for ii in range(0,len(a)):
print a[ii]
print len(a)
I don't think it is a very efficient way (as I am a Python newbie). It works fine for small values of M and N (<10) but really slow beyond that. I wish to use it for M ~ 100 and N ~ 6. How can I make my code more efficient or is there a better way to code it?
Very effective algorithm adapted from Jorg Arndt book "Matters Computational"
(Chapter 7.2 Co-lexicographic order for compositions into exactly k parts)
n = 4
k = 3
x = [0] * n
x[0] = k
while True:
print(x)
v = x[-1]
if (k==v ):
break
x[-1] = 0
j = -2
while (0==x[j]):
j -= 1
x[j] -= 1
x[j+1] = 1 + v
[3, 0, 0, 0]
[2, 1, 0, 0]
[2, 0, 1, 0]
[2, 0, 0, 1]
[1, 2, 0, 0]
[1, 1, 1, 0]
[1, 1, 0, 1]
[1, 0, 2, 0]
[1, 0, 1, 1]
[1, 0, 0, 2]
[0, 3, 0, 0]
[0, 2, 1, 0]
[0, 2, 0, 1]
[0, 1, 2, 0]
[0, 1, 1, 1]
[0, 1, 0, 2]
[0, 0, 3, 0]
[0, 0, 2, 1]
[0, 0, 1, 2]
[0, 0, 0, 3]
Number of compositions and time on seconds for plain Python (perhaps numpy arrays are faster) for n=100, and k = 2,3,4,5 (2.8 ghz Cel-1840)
2 5050 0.040000200271606445
3 171700 0.9900014400482178
4 4421275 20.02204465866089
5 91962520 372.03577995300293
I expect time 2 hours for 100/6 generation
Same with numpy arrays (x = np.zeros((n,), dtype=int)) gives worse results - but perhaps because I don't know how to use them properly
2 5050 0.07999992370605469
3 171700 2.390003204345703
4 4421275 54.74532389640808
Native code (this is Delphi, C/C++ compilers might optimize better) generates 100/6 in 21 seconds
3 171700 0.012
4 4421275 0.125
5 91962520 1.544
6 1609344100 20.748
Cannot go sleep until all measurements aren't done :)
MSVS VC++: 18 seconds! (O2 optimization)
5 91962520 1.466
6 1609344100 18.283
So 100 millions variants per second.
A lot of time is wasted for checking of empty cells (because fill ratio is small). Speed described by Arndt is reached on higher k/n ratios and is about 300-500 millions variants per second:
n=25, k=15 25140840660 60.981 400 millions per second
My recommendations:
Rewrite it as a generator utilizing yield, rather than a loop that concatenates a global variable on each iteration.
Keep a running sum instead of calculating the sum of some subset of the array representation of the number.
Operate on a single instance of your working number representation instead of splicing a copy of it to a temporary variable on each iteration.
Note no particular order is implied.
I have a better solution using itertools as follows,
from itertools import product
n = 4 #number of elements
s = 3 #sum of elements
r = []
for x in range(n):
r.append(x)
result = [p for p in product(r, repeat=n) if sum(p) == s]
print(len(result))
print(result)
I am saying this is better because it took 0.1 secs on my system, while your code with numpy took 0.2 secs.
But as far as n=100 and s=6, this code takes time to go through all the combinations, I think it will take days to compute the results.
I found a solution using itertools as well (Source: https://bugs.python.org/msg144273). Code follows:
import itertools
import operator
def combinations_with_replacement(iterable, r):
# combinations_with_replacement('ABC', 2) --> AA AB AC BB BC CC
pool = tuple(iterable)
n = len(pool)
if not n and r:
return
indices = [0] * r
yield tuple(pool[i] for i in indices)
while True:
for i in reversed(range(r)):
if indices[i] != n - 1:
break
else:
return
indices[i:] = [indices[i] + 1] * (r - i)
yield tuple(pool[i] for i in indices)
int_part = lambda n, k: (tuple(map(c.count, range(k))) for c in combinations_with_replacement(range(k), n))
for item in int_part(3,4): print(item)

Numpy: how to convert observations to probabilities?

I have a feature matrix and a corresponding targets, which are ones or zeroes:
# raw observations
features = np.array([[1, 1, 0],
[1, 1, 0],
[0, 1, 0],
[0, 1, 0],
[0, 1, 0],
[0, 0, 1]])
targets = np.array([1, 0, 1, 1, 0, 0])
As you can see, each feature may correspond to both ones and zeros. I need to convert my raw observation matrix to probability matrix, where each feature will correspond to the probability of seeing one as a target:
[1 1 0] -> 0.5
[0 1 0] -> 0.67
[0 0 1] -> 0
I have constructed a quite straight-forward solution:
import numpy as np
# raw observations
features = np.array([[1, 1, 0],
[1, 1, 0],
[0, 1, 0],
[0, 1, 0],
[0, 1, 0],
[0, 0, 1]])
targets = np.array([1, 0, 1, 1, 0, 0])
from collections import Counter
def convert_obs_to_proba(features, targets):
features_ = []
targets_ = []
# compute unique rows (idx will point to some representative)
b = np.ascontiguousarray(features).view(np.dtype((np.void, features.dtype.itemsize * features.shape[1])))
_, idx = np.unique(b, return_index=True)
idx = idx[::-1]
zeros = Counter()
ones = Counter()
# collect row-wise number of one and zero targets
for i, row in enumerate(features[:]):
if targets[i] == 0:
zeros[tuple(row)] += 1
else:
ones[tuple(row)] += 1
# iterate over unique features and compute probabilities
for k in idx:
unique_row = features[k]
zero_count = zeros[tuple(unique_row)]
one_count = ones[tuple(unique_row)]
proba = float(one_count) / float(zero_count + one_count)
features_.append(unique_row)
targets_.append(proba)
return np.array(features_), np.array(targets_)
features_, targets_ = convert_obs_to_proba(features, targets)
print(features_)
print(targets_)
which:
extracts unique features;
counts number of zero and one observations targets for each unique feature;
computes probability and constructs the result.
Could it be solved in a prettier way using some advanced numpy magic?
Update. Previous code was pretty inefficient O(n^2). Converted it to more performance-friendly. Old code:
import numpy as np
# raw observations
features = np.array([[1, 1, 0],
[1, 1, 0],
[0, 1, 0],
[0, 1, 0],
[0, 1, 0],
[0, 0, 1]])
targets = np.array([1, 0, 1, 1, 0, 0])
def convert_obs_to_proba(features, targets):
features_ = []
targets_ = []
# compute unique rows (idx will point to some representative)
b = np.ascontiguousarray(features).view(np.dtype((np.void, features.dtype.itemsize * features.shape[1])))
_, idx = np.unique(b, return_index=True)
idx = idx[::-1]
# calculate ZERO class occurences and ONE class occurences
for k in idx:
unique_row = features[k]
zeros = 0
ones = 0
for i, row in enumerate(features[:]):
if np.array_equal(row, unique_row):
if targets[i] == 0:
zeros += 1
else:
ones += 1
proba = float(ones) / float(zeros + ones)
features_.append(unique_row)
targets_.append(proba)
return np.array(features_), np.array(targets_)
features_, targets_ = convert_obs_to_proba(features, targets)
print(features_)
print(targets_)
It's easy using Pandas:
df = pd.DataFrame(features)
df['targets'] = targets
Now you have:
0 1 2 targets
0 1 1 0 1
1 1 1 0 0
2 0 1 0 1
3 0 1 0 1
4 0 1 0 0
5 0 0 1 0
Now, the fancy part:
df.groupby([0,1,2]).targets.mean()
Gives you:
0 1 2
0 0 1 0.000000
1 0 0.666667
1 1 0 0.500000
Name: targets, dtype: float64
Pandas doesn't print the 0 at the leftmost part of the 0.666 row, but if you inspect the value there, it is indeed 0.
np.sum(np.reshape([targets[f] if tuple(features[f])==tuple(i) else 0 for i in np.vstack(set(map(tuple,features))) for f in range(features.shape[0])],features.shape[::-1]),axis=1)/np.sum(np.reshape([1 if tuple(features[f])==tuple(i) else 0 for i in np.vstack(set(map(tuple,features))) for f in range(features.shape[0])],features.shape[::-1]),axis=1)
Here you go, numpy magic! Although unnecceserily so, this could probably be cleaned up using some boring variables ;)
(And this is probably far from optimal)

Categories

Resources