Matrix multiplication using hdf5 - python

I'm trying to multiplicate 2 big matrices with memory limit using hdf5 (pytables)
but function seems to give me error:
Valueerror: array is too big
I need to do matrix multiplication by myself maybe blockwise or there is some another python function similar to
import numpy as np
import time
import tables
import cProfile
import numexpr as ne
rows = n_row
cols = n_col
batches = n_batch
atom = tables.UInt8Atom() #?
filters = tables.Filters(complevel=9, complib='blosc') # tune parameters
fileName_a = 'C:\carray_a.h5'
shape_a = (rows*batches, cols) # predefined size
h5f_a = tables.open_file(fileName_a, 'w')
ca_a = h5f_a.create_carray(h5f_a.root, 'carray', atom, shape_a, filters=filters)
for i in range(batches):
data = np.random.rand(rows,cols)
ca_a[i*rows:(i+1)*rows]= data[:]
rows = n_col
cols = n_row
batches = n_batch
fileName_b = 'C:\carray_b.h5'
shape_b = (rows, cols*batches) # predefined size
h5f_b = tables.open_file(fileName_b, 'w')
ca_b = h5f_b.create_carray(h5f_b.root, 'carray', atom, shape_b, filters=filters)
#need to batch by cols
sz= rows/batches
for i in range(batches):
data = np.random.rand(sz, cols*batches)
ca_b[i*sz:(i+1)*sz]= data[:]
rows = n_batch*n_row
cols = n_batch*n_row
fileName_c = 'C:\carray_c.h5'
shape_c = (rows, cols) # predefined size
h5f_c = tables.open_file(fileName_c, 'w')
ca_c = h5f_c.create_carray(h5f_c.root, 'carray', atom, shape_c, filters=filters)
a= h5f_a.root.carray#[:]
b= h5f_b.root.carray#[:]
c= h5f_c.root.carray
t0= time.time()
c=,b) #error if aray is big
print (time.time()-t0)
Update: so here is the code.It's interesting but using hdf5 it works even faster.
import numpy as np
import tables
import time
sz= 100 #chunk size
n_row=10000 #m
n_col=1000 #n
#for arbitrary size
# A=np.random.randint(5, size=(n_row,n_col))
# B=np.random.randint(5, size=(n_col,n_row))
#using numpy array
#C= np.zeros((n_row,n_row))
#using hdf5
fileName_C = 'CArray_C.h5'
atom = tables.Float32Atom()
shape = (A.shape[0], B.shape[1])
Nchunk = 128 # ?
chunkshape = (Nchunk, Nchunk)
chunk_multiple = 1
block_size = chunk_multiple * Nchunk
h5f_C = tables.open_file(fileName_C, 'w')
C = h5f_C.create_carray(h5f_C.root, 'CArray', atom, shape, chunkshape=chunkshape)
sz= block_size
t0= time.time()
for i in range(0, A.shape[0], sz):
for j in range(0, B.shape[1], sz):
for k in range(0, A.shape[1], sz):
C[i:i+sz,j:j+sz] +=[i:i+sz,k:k+sz],B[k:k+sz,j:j+sz])
print (time.time()-t0)
t0= time.time()
print (time.time()-t0)
print (C== res)

I don't know of a that work without loading into memory. I think blocking would work pretty well. Create a an output array (called "c" below) as pytables CArray and fill in blocks. You should choose the chunkshape when you create it to match your blocking scheme. Something like
atom = tables.Float32Atom() # you have UInt8Atom() above. do you mean that?
shape = (a.shape[0], b.shape[1])
# you can vary block_size and chunkshape independently, but I would
# aim to have block_size an integer multiple of chunkshape
# your mileage may vary and depends on the array size and how you'll
# access it in the future.
Nchunk = 128 # ?
chunkshape = (Nchunk, Nchunk)
chunk_multiple = 1
block_size = chunk_multiple * Nchunk
c = h5f.create_carray(h5.root, 'c', atom, shape, chunkshape=chunkshape)
for i_start in range(0, a.shape[0], block_size):
for j_start in range(0, b.shape[1], block_size):
for k_start in range(0, a.shape[1], block_size):
c[i_start:i_start+block_size, j_start:j_start + block_size] += \[i_start:i_start + block_size, k_start:k_start + block_size],
b[k_start:k_start + block_size, j_start:j_start + block_size]


Using map() on a function with multiple inputs to get rid of for loops

I have a function to upsample multiple arrays that I want to write as efficiently as possible (because I have to run it 370000 times).
This function takes multiple inputs and is composed of 2 for loops. To upsample my arrays, I loop over this function with a parameter k, and I would like to get rid of this loop (which sits outside of the function). I tried using a mix of map() and list-comprehension to minimize my computing time but I can't make it work.
How to get my map() part of the code working (see last section of code) ? Is there a better way than map() to get rid of for loops ?
Function interpolate_and_get_results: 2 for loops. Takes 3D, 2D arrays and int as inputs
This function is ran inside a for loop (parameter k) that I want to get rid of
I wrote some example code, the part with map() does not work because I can't think of a way to pass the k parameter as a list, but also an input.
Thank you !
ps: code to parallelize the interpolation function that I do not use for this example
import numpy as np
import time
#%% --- SETUP OF THE PROBLEM --- %%#
temperatures = np.random.rand(10,4,7)*100
precipitation = np.random.rand(10,4,7)
snow = np.random.rand(10,4,7)
# Flatten the arrays to make them iterable with map()
temperatures = temperatures.reshape(10,4*7)
precipitation = precipitation.reshape(10,4*7)
snow = snow.reshape(10,4*7)
# Array of altitudes to "adjust" the temperatures
alt = np.random.rand(4,7)*1000
# Flatten the array
alt = alt.reshape(4*7)
# Weight Matrix
w = np.random.rand(4*7, 1000, 1000)
#%% Function
def interpolate_and_get_results(temp, prec, Eprec, w, i, k):
# Do some calculations
factor1 = ((temperatures[i,k]-272.15) + (-alt[k] * -6/1000))
factor2 = precipitation[i,k]
factor3 = snow[i,k]
# Iterate through every cell of the upsampled arrays
for i in range(w.shape[1]):
for j in range(w.shape[2]):
val = w[k, i, j]
temp[i, j] += factor1 * val
prec[i, j] += factor2 * val
Eprec[i, j] += factor3 * val
#%% --- Function call without loop simplification --- ##%
# Prepare a template array
dummy = np.zeros((w.shape[1], w.shape[2]))
# Initialize the global arrays to be filled
tempYEAR2 = np.zeros((9, dummy.shape[0], dummy.shape[1]))
precYEAR2 = np.zeros((9, dummy.shape[0], dummy.shape[1]))
EprecYEAR2 = np.zeros((9, dummy.shape[0], dummy.shape[1]))
ts = time.time()
for i in range(temperatures.shape[0]):
# Create empty host arrays
temp = dummy.copy()
prec = dummy.copy()
Eprec = dummy.copy()
for k in range(w.shape[0]):
interpolate_and_get_results(temp, prec, Eprec, w, i, k)
print('Time: ', (time.time()-ts))
#%% --- With Map (DOES NOT WORK) --- %%#
del k
dummy = np.zeros((w.shape[1], w.shape[2]))
# Initialize the global arrays to be filled
tempYEAR2 = np.zeros((9, dummy.shape[0], dummy.shape[1]))
precYEAR2 = np.zeros((9, dummy.shape[0], dummy.shape[1]))
EprecYEAR2 = np.zeros((9, dummy.shape[0], dummy.shape[1]))
# Create a list k to be iterated through with the map() function
k = [k for k in range(0, temperatures.shape[1])]
for i in range(temperatures.shape[0]):
# Create empty host arrays
temp = dummy.copy()
prec = dummy.copy()
Eprec = dummy.copy()
# Call the interpolate function with map() iterating through k
map(interpolate_and_get_results(temp, prec, Eprec, w, i, k), k)
Code from #Jérôme Richard using numba added at the request of user #ken (takes 48.81s to run on my pc):
import numpy as np
import multiprocessing as mp
import time
#%% ------ Create data ------ ###
temperatures = np.random.rand(10,4,7)*100
precipitation = np.random.rand(10,4,7)
snow = np.random.rand(10,4,7)
# Array of altitudes to "adjust" the temperatures
alt = np.random.rand(4,7)*1000
#%% ------ IDW Interpolation ------ ###
# We create a weight matrix that we use to upsample our temperatures, precipitations and snow matrices
# This part is not that important, it works well as it is
MX,MY = np.shape(temperatures[0])
N = 300
T = np.zeros([N*MX+1, N*MY+1])
# create NxM inverse distance weight matrices based on Gaussian interpolation
x = np.arange(0,N*MX+1)
y = np.arange(0,N*MY+1)
X,Y = np.meshgrid(x,y)
k = 0
w = np.zeros([MX*MY,N*MX+1,N*MY+1])
for mx in range(MX):
for my in range(MY):
# Gaussian
add_point = np.exp(-((mx*N-X.T)**2+(my*N-Y.T)**2)/N**2)
w[k,:,:] += add_point
k += 1
sum_weights = np.sum(w, axis=0)
for k in range(MX*MY):
w[k,:,:] /= sum_weights
#%% --- Function --- %%#
# Code from Jérôme Richard:
import numba as nb
# get_results + interpolator
#nb.njit('void(float64[:,::1], float64[:,::1], float64[:,::1], float64[:,:,::1], int_, int_, int_, int_)', parallel=True)
def interpolate_and_get_results(temp, prec, Eprec, w, i, k, mx, my):
factor1 = ((temperatures[i,mx,my]-272.15) + (-alt[mx, my] * -6/1000))
factor2 = precipitation[i,mx,my]
factor3 = snow[i,mx,my]
# Filling the
for i in nb.prange(w.shape[1]):
for j in range(w.shape[2]):
val = w[k, i, j]
temp[i, j] += factor1 * val
prec[i, j] += factor2 * val
Eprec[i, j] += factor3 * val
#%% --- Main Loop --- %%#
ts = time.time()
if __name__ == '__main__':
dummy = np.zeros((w.shape[1], w.shape[2]))
# Initialize the permanent arrays to be filled
tempYEAR = np.zeros((9, dummy.shape[0], dummy.shape[1]))
precYEAR = np.zeros((9, dummy.shape[0], dummy.shape[1]))
EprecYEAR = np.zeros((9, dummy.shape[0], dummy.shape[1]))
smbYEAR = np.zeros((9, dummy.shape[0], dummy.shape[1]))
# Initialize semi-permanent array
smb = np.zeros((dummy.shape[0], dummy.shape[1]))
# Loop over the "time" axis
for i in range(0, temperatures.shape[0]):
# Create empty semi-permanent arrays
temp = dummy.copy()
prec = dummy.copy()
Eprec = dummy.copy()
# Loop over the different weights
for k in range(w.shape[0]):
# Loops over the cells of the array to be upsampled
for mx in range(MX):
for my in range(MY):
interpolate_and_get_results(temp, prec, Eprec, w, i, k, mx, my)
# At each timestep, update the semi-permanent array using the results from the interpolate function
smb[np.logical_and(temp <= 0, prec > 0)] += prec[np.logical_and(temp <= 0, prec > 0)]
# Fill the permanent arrays (equivalent of storing the results at the end of every year)
# and reinitialize the semi-permanent array every 5th timestep
if i%5 == 0:
# Permanent
tempYEAR[int(i/5)] = temp
precYEAR[int(i/5)] = prec
EprecYEAR[int(i/5)] = Eprec
smbYEAR[int(i/5)] = smb
# Semi-permanent
smb = np.zeros((dummy.shape[0], dummy.shape[1]))
print("Time spent:", time.time()-ts)
Note: This answer is not about how to use map, it's about "a better way".
You are doing a lot of redundant calculations. Believe it or not, this code outputs the same result.
# No change in the initialization section above.
ts = time.time()
if __name__ == '__main__':
dummy = np.zeros((w.shape[1], w.shape[2]))
# Initialize the permanent arrays to be filled
tempYEAR = np.zeros((9, dummy.shape[0], dummy.shape[1]))
precYEAR = np.zeros((9, dummy.shape[0], dummy.shape[1]))
EprecYEAR = np.zeros((9, dummy.shape[0], dummy.shape[1]))
smbYEAR = np.zeros((9, dummy.shape[0], dummy.shape[1]))
smb = np.zeros((dummy.shape[0], dummy.shape[1]))
temperatures_inter = temperatures - 272.15
w_inter = w.sum(axis=0)
alt_inter = (alt * (-6 / 1000)).sum()
for i in range(0, temperatures_inter.shape[0]):
temp_i = (temperatures_inter[i].sum() - alt_inter) * w_inter
prec_i = precipitation[i].sum() * w_inter
Eprec_i = snow[i].sum() * w_inter
condition = np.logical_and(temp_i <= 0, prec_i > 0)
smb[condition] += prec_i[condition]
if i % 5 == 0:
tempYEAR[i // 5] = temp_i
precYEAR[i // 5] = prec_i
EprecYEAR[i // 5] = Eprec_i
smbYEAR[i // 5] = smb
smb = np.zeros((dummy.shape[0], dummy.shape[1]))
print("Time spent:", time.time() - ts)
I verified the results by comparing them to the output of the code that uses numba. The difference is about 0.0000001, which is probably caused by rounding error.
print((tempYEAR_from_yours - tempYEAR_from_mine).sum()) # -8.429287845501676e-08
print((precYEAR_from_yours - precYEAR_from_mine).sum()) # 2.595697878859937e-09
print((EprecYEAR_from_yours - EprecYEAR_from_mine).sum()) # -7.430216442116944e-09
print((smbYEAR_from_yours - smbYEAR_from_mine).sum()) # -6.875431779462815e-09
On my PC, this code took 0.36 seconds. It does not use numba and is not even parallelized. It just eliminated redundancy.

Generate equal size batches from N numpy arrays

I have N NumPy arrays of shape data[n,m,3]. I want to fit/squeeze/split/slice/reshape them into N' arrays of shape new_data_#[1000,m,3] where # is the indexing of new arrays. The problem is that n can be smaller, or bigger than 1000. When it is smaller somehow I should fill the rest of 1000 capacity of new_array with the next array, and when it is bigger than 1000 I should make a new_data_# and add the rest to that one. I don't know how to manage this. Here is a pseudo-code but it can't be done this way, for example, the while maybe is not necessary. The output can be written to the disk or returned in a new data format.
def array2blocks(array_files)
for each N in array_files:
N = data = np.random.rand(n, m, 3)
new_data = np.zeros((1000, m, 3), dtype=np.float32)
index = 0
while j <= new_data.shape[0]:
for i in range(data.shape[0]):
print("--->", data[i,:,:])
print (i)
if i <= new_data.shape[0]:
# here first we should check the left capacity of new_data and then insert data into it
# new_data[i, :, :] = data[i, :, :] #this overrides previous items so not correct
new_data_name = 'new_data' + '_' + str(index)
# here fill rest of the data in the new_data
index += 1
#when capacity is full write it to the disk
UPDATE with Aaron's old answer:
I replaced 1000 with batch_size = 5 to make it simple.
def numpyarrays2blocks(array_files):
N1 = np.random.rand(7, 4, 3)
N2 = np.random.rand(7, 4, 3)
N3 = np.random.rand(4, 4, 3)
# array_files = []
for N in array_files:
n = N.shape[0]
m = N.shape[1]
batch_size = 5
# N = data = np.random.rand(n, m, 3)
data = N
# print(data)
new_arrays = []
i = 0 # the current row index to insert
while i < n:
new_data = np.zeros((batch_size, m, 3), dtype=np.float32)
j = min(i + batch_size, n) # the last row (exclusive) to copy to new_data
# j - i is the number of rows to copy
new_data[:j - i, :, :] = data[i:j, :, :]
print('NEW DATA: ', new_data)
i = j # update the index
data is used to store the temporary result, and data_start is the index to insert rows to data.
Allocate data if it is None
yield data if it is fully filled.
merge_and_split is a generator so that the memory demand should be low.
import random
from typing import Iterator
import numpy as np
def merge_and_split(arrays, batch_size) -> Iterator:
arrays = tuple(arrays)
dtype = arrays[0].dtype
data_shape = (batch_size,) + arrays[0].shape[1:]
assert all(a.shape[1:] == data_shape[1:] for a in arrays), "Shape mismatch"
data = None
data_start = 0
for src in arrays:
src_index = 0
src_avail = src.shape[0]
while src_avail >= 1:
if data is None:
# allocate if None
data = np.zeros(data_shape, dtype=dtype)
data_start = 0
num_moved = min(batch_size - data_start, src_avail)
data[data_start:data_start + num_moved, ...] = src[src_index:src_index + num_moved, ...]
data_start += num_moved
src_index += num_moved
src_avail -= num_moved
if data_start >= batch_size:
yield data
data = None
if data is not None:
yield data
def input_arrays():
number = 10
r = random.Random(13)
return [np.random.randint(0, 10, size=(r.randint(1, 5), 4, 3)) for _ in range(number)]
def main():
# Testing input and output
arrays = input_arrays()
# for i, item in enumerate(arrays):
# print('input', i, item.shape)
# print(item)
result = list(merge_and_split(arrays, 5))
# for i, item in enumerate(result):
# print('result', i, item.shape)
# print(item)
src_concat = np.vstack(arrays)
row_number = sum(s.shape[0] for s in arrays)
print('concatenated', src_concat.shape, row_number)
out_concat = np.vstack(result)
print((out_concat[0:row_number, ...] == src_concat).all()) # They are indeed the same
if __name__ == '__main__':
You can concatenate all your original arrays split them:
ars = ... # list of N arrays
ars = np.concatenate(ars, axis=0)
ars = np.split(ars, np.arange(1000, ars.shape[0], 1000))
The last line can be written as ars = np.split(ars, 1000), but only if you're sure that the total number of elements is a multiple of 1000, since np.split will barf otherwise. Specifying explicit split-points, as with np.arange, allows you to have a shorter final segment.

Proper way to save data in hdf5 file to speed up numpy matrix slicing?

What I have:
numpy matrix, size = (n, m)
row names, size = n
What I want:
Create HDF5 file,
save numpy matrix with row names properly.
use this hdf5 file to calculate cosine similarity between two vectors / matrices.
The size of these vectors / matrices are always different.
What I have tried:
import h5py
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
vecs = np.random.rand(50000, 150)
names = np.random.choice(range(10000, 100000), size=50000)
# hdf5 way
with h5py.File('test.h5', mode='w', libver='latest') as f:
f.create_dataset('vectors', shape=(150,), dtype=np.float16, compression='gzip', compression_opts=9)
for name, vec in zip(names, vecs):
f.attrs[str(name)] = vec
# memory-map way
mmap = np.memmap(filename='test.mymemmap', shape=(50000, 150), dtype='float16', mode='w+', order='F')
name_ind = dict()
for i, (name, vec) in enumerate(zip(names, vecs)):
mmap[i] = vec
name_ind[name] = i
# test case
target_name = np.random.choice(names, size=1)
target_names = np.random.choice(names, size=10000)
# 1-2 sec on my pc
with h5py.File('test.h5','r') as f:
a = f['vectors']
vec1 = f.attrs[str(target_name[0])]
vecs2 = [f.attrs[str(name)] for name in target_names]
cosine_similarity(vec1[np.newaxis], vecs2)[0]
# 0.04-0.05 sec on my pc
ind1 = name_ind[target_name[0]]
inds2 = [name_ind[name] for name in target_names]
vec1 = mmap[ind1]
vecs2 = mmap[inds2]
cosine_similarity(vec1[np.newaxis], vecs2)[0]

How to read images and labels from the infimnist / mnist8m dataset?

Using the the program at this link,, I generated some data.
As far as i can tell it is in some sort of binary format:
b"\x00\x00\x08\x01\x00\x00'\x10\x07\x02\x01\x00\x04\x01\x04\t\x05 ...
I need to extract labels and pictures from two datasets like this, generated with:
with open("test10k-labels", "rb") as binary_file:
data =
>>> b"\x00\x00\x08\x01\x00\x00'\x10\x07\x02\x01\x00\x04\x01\x04\t\x05 ...
b"\x00\x00\x08\x01 ...".decode('ascii')
>>> "\x00\x00\x08\x01 ..."
I also tried the binascii package, but it did not work.
Thankful for any help!
Creating the Data
To create the dataset i am speaking download the package from the following link:
$ cd dir_of_folder
$ make
Then I took the path of the resulting infimnist executable that pops up and:
$ app_path lab 10000 69999 > mnist60k-labels-idx1-ubyte
This should place the file i used in the folder.
The command after app_path can be replaced by any other command he lists on the side.
Final update
It works!
Using some numpy functions the images can be returned to their normal orientation.
# for the labels
with open(path, "rb") as binary_file:
y_train = np.array(array("B",
# for the images
with open("images path", "rb") as binary_file:
images = []
emnistRotate = True
magic, size, rows, cols = struct.unpack(">IIII",
if magic != 2051:
raise ValueError('Magic number mismatch, expected 2051,''got {}'.format(magic))
for i in range(size):
images.append([0] * rows * cols)
image_data = array("B",
for i in range(size):
images[i][:] = image_data[i * rows * cols:(i + 1) * rows * cols]
# for some reason EMNIST is mirrored and rotated
if emnistRotate:
x = image_data[i * rows * cols:(i + 1) * rows * cols]
subs = []
for r in range(rows):
subs.append(x[(rows - r) * cols - cols:(rows - r)*cols])
l = list(zip(*reversed(subs)))
fixed = [item for sublist in l for item in sublist]
images[i][:] = fixed
x = []
for image in images:
x.append(np.rot90(np.flip(np.array(image).reshape((28,28)), 1), 1))
x_train = np.array(x)
Crazy solution for such a simple thing :)
Ok, so looking at the python-mnistsource, it seems the correct way to unpack the binary format is as follows:
from array import array
with open("test10k-labels", "rb") as binary_file:
magic, size = struct.unpack(">II",
if magic != 2049:
raise ValueError("Magic number mismatch, expected 2049,got{}".format(magic))
labels = array("B",
So I haven't tested this extensively, but the following code should work. It was taken and modified from the aforementioned python-mnistsee source
from array import array
import struct
with open("mnist8m-patterns-idx3-ubyte", "rb") as binary_file:
images = []
emnistRotate = True
magic, size, rows, cols = struct.unpack(">IIII",
if magic != 2051:
raise ValueError('Magic number mismatch, expected 2051,''got {}'.format(magic))
for i in range(size):
images.append([0] * rows * cols)
image_data = array("B",
for i in range(size):
images[i][:] = image_data[i * rows * cols:(i + 1) * rows * cols]
# for some reason EMNIST is mirrored and rotated
if emnistRotate:
x = image_data[i * rows * cols:(i + 1) * rows * cols]
subs = []
for r in range(rows):
subs.append(x[(rows - r) * cols - cols:(rows - r)*cols])
l = list(zip(*reversed(subs)))
fixed = [item for sublist in l for item in sublist]
images[i][:] = fixed
previous answer:
You can use the python-mnist library:
from mnist import MNIST
mndata = MNIST('./data')
images, labels = mndata.load_training()

Fancy indexing with tuples

Say I have a 100x100 array in numpy, from this array I want to select 10 random blocks of (x*x)
pixels and change the values of these blocks simultaneously. What is the best way to index the slices for each block? An ideal solution would be something along the lines of the following, where the slices are taken between the pairs of tuples.
A = np.ones(100,100)
blockSize = 10
numBlocks = 15
blockCenter_Row = tuple(np.random.randint(blockSize,high=(100-blockSize),size=numBlocks))
blockCenter_Col = tuple(np.random.randint(blockSize,high=(100-blockSize),size=numBlocks))
rowLeft_Boundary = tuple((i-blockSize/2) for i in blockCenter_Row)
rowRight_Boundary = tuple((i+blockSize/2) for i in blockCenter_Row)
colLower_Boundary = tuple((i-blockSize/2) for i in blockCenter_Row)
colUpper_Boundary = tuple((i+blockSize/2) for i in blockCenter_Row)
for value in range(10):
A[rowLeft_Boundary:rowRight_Boundary,colLower_Boundary:colUpper_Boundary] = value
I think you can use as_strided() to do the trick, if the blocks can be overlaped.
import pylab as pl
from numpy.lib.stride_tricks import as_strided
blockSize = 10
numBlocks = 15
n = 100
a = np.zeros((n, n))
itemsize = a.dtype.itemsize
new_shape = n-blockSize+1, n-blockSize+1, blockSize, blockSize
new_stride = itemsize*n, itemsize, itemsize*n, itemsize
b = as_strided(a, shape=new_shape, strides=new_stride)
idx0 = np.random.randint(0, b.shape[0], numBlocks)
idx1 = np.random.randint(0, b.shape[1], numBlocks)
b[idx0, idx1, :, :] = np.random.rand(numBlocks, blockSize, blockSize)*3 + np.arange(numBlocks).reshape(-1, 1, 1)
pl.imshow(a, cmap="gray", interpolation="nearest")
here is the output:

