how to generate per-pixel histogram from many images in numpy? - python

I have tens of thousands of images. I want to generate a histogram for each pixel. I have come up with the following code using NumPy to do this that works:
import numpy as np
import matplotlib.pyplot as plt
nimages = 1000
im_shape = (64,64)
nbins = 100
#predefine the histogram bins
hist_bins = np.linspace(0,1,nbins)
#create an array to store histograms for each pixel
perpix_hist = np.zeros((64,64,nbins))
for ni in range(nimages):
#create a simple image with normally distributed pixel values
im = np.random.normal(loc=0.5,scale=0.05,size=im_shape)
#sort each pixel into the predefined histogram
bins_for_this_image = np.searchsorted(hist_bins, im.ravel())
bins_for_this_image = bins_for_this_image.reshape(im_shape)
#this next part adds one to each of those bins
#but this is slow as it loops through each pixel
#how to vectorize?
for i in range(im_shape[0]):
for j in range(im_shape[1]):
perpix_hist[i,j,bins_for_this_image[i,j]] += 1
#plot histogram for a single pixel
plt.plot(hist_bins,perpix_hist[0,0])
plt.xlabel('pixel values')
plt.ylabel('counts')
plt.title('histogram for a single pixel')
plt.show()
I would like to know if anyone can help me vectorize the for loops? I can't think of how to index into the perpix_hist array properly. I have tens/hundreds of thousands of images and each image is ~1500x1500 pixels, and this is too slow.

You can vectorize it using np.meshgrid and providing indices for first, second and third dimension (the last dimension you already have).
y_grid, x_grid = np.meshgrid(np.arange(64), np.arange(64))
for i in range(nimages):
#create a simple image with normally distributed pixel values
im = np.random.normal(loc=0.5,scale=0.05,size=im_shape)
#sort each pixel into the predefined histogram
bins_for_this_image = np.searchsorted(hist_bins, im.ravel())
bins_for_this_image = bins_for_this_image.reshape(im_shape)
perpix_hist[x_grid, y_grid, bins_for_this_image] += 1

Related

matplotlib create figure without frames, axes, plot a 2D array with a colormap, save plot to numpy array of same size as input

I wrote a function with this purpose:
to create a matplotlib figure, but not display it
with no frames, axes, etc.
to plot in the figure an input 2D array using a user-passed colormap
to save the colormapped 2D array from the canvas to a numpy array
that the output array should be the same size as the input
There are lots of questions with answers for tasks similar to either points 1-2 or point 4; for me it was also important to automate point 5. So I started by combining parts from both #joe-kington 's answer and from #matehat 's answer and comments to it, and with small modifications I got to this:
def mk_cmapped_data(data, mpl_cmap_name):
# This is to define figure & ouptput dimensions from input
r, c = data.shape
dpi = 72
w = round(c/dpi, 2)
h = round(r/dpi, 2)
# This part modified from #matehat's SO answer:
# https://stackoverflow.com/a/8218887/1034648
fig = plt.figure(frameon=False)
fig.set_size_inches((w, h))
ax = plt.Axes(fig, [0., 0., 1., 1.])
ax.set_axis_off()
fig.add_axes(ax)
plt.set_cmap(mpl_cmap_name)
ax.imshow(data, aspect='auto', cmap = mpl_cmap_name, interpolation = 'none')
fig.canvas.draw()
# This part is to save the canvas to numpy array
# Adapted rom Joe Kington's SO answer:
# https://stackoverflow.com/a/7821917/1034648
mat = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
mat = mat.reshape(fig.canvas.get_width_height()[::-1] + (3,))
mat = normalise(mat) # this is just using a helper function to normalize output range
plt.close(fig=None)
return mat
The function does what it is supposed to do and is fast enough.
My question is whether I can make it more efficient and or more pythonic in any way.
If you're wanting RGB output that exactly matches the shape of the input array, it's probably easiest to not create a figure, and instead use the colormap objects directly. For example:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
# Random data with a non 0-1 range.
data = 500 * np.random.random((100, 100)) - 200
# We'll use `LinearSegementedColormap` and `Normalize` instances directly
cmap = plt.get_cmap('viridis')
norm = plt.Normalize(data.min(), data.max())
# The norm instance scales data to a 0-1 range, cmap makes it RGB
rgb = cmap(norm(data))
# MPL uses a 0-1 float RGB representation, so we'll scale to 0-255
rgb = (255 * rgb).astype(np.uint8)
Image.fromarray(rgb).save('test.png')
Note that you likely don't want the additional step of saving it as a PNG, but I wanted to be able to show the result visually. This is exactly a 100x100 image where each pixel corresponds to the original input data.
This is what matplotlib does behind-the-scenes when you call imshow. The data is first run through a Normalize instance to scale it from its original range to 0-1. Then any Colormap instance can be called directly with the 0-1 results to turn the scalar data into RGB data.
One letter variables are hard to understand.
Change:
r -> n_rows
c -> n_cols
w -> width
h -> height

How to find what points lie in each bin of a histogram?

I have a 2D dimensional histogram having bin size 10. I wish to know whether there is a numpy function (or any faster method) to obtain what points lie in each bin in the 2d grid. Is there a way to access the bin elements?
I hope this solve your problem. However, I believe other can improve my code because I am new in python.
Create Histogram with matplotlib
import matplotlib.pyplot as plt
rng = np.random.RandomState(10) # deterministic random data
a = np.hstack((rng.normal(size=100), rng.normal(loc=5, scale=2, size=1000)))
n ,bins ,patches = plt.hist(a, bins=10) # arguments are passed to np.histogram
plt.title("Histogram with '10' bins")
plt.show()
Reshape arrays and..
newbin = np.repeat(np.reshape(bins,(-1, len(bins))), a.shape, axis=0)
newa = np.repeat(np.reshape(a,(len(a),-1)),len(bins),axis=1)
#index_bin = (np.where(newbin[:,0] >np.reshape(a,(1,-1))[:,0] ) )[0][0]
index_bin = (newbin>newa).argmax(axis=1).T
test
print(a[0] , bins)
print(index_bin[0])
Output
1.331586504129518 [-2.13171211 -0.88255884 0.36659444 1.61574771 2.86490098 4.11405425
5.36320753 6.6123608 7.86151407 9.11066734 10.35982062]
3

Convert an Image array to be used for PCA in pandas library

I am trying to perform PCA on an image and then output an image with pixels coloured based on the cluster they fall in in the PCA. I am doing unsupervised PCA. Ultimate goal is seen at this link: Forward PC rotation
I am currently using the pandas library(if people have other more elegant solutions I am all ears) as well as open for image manipulation.
I am trying to load in the b,g,r bands as my column with the index being a pixel giving a table with rows of all pixels in image (each with a column for the color bands).
When populating the data I ultimately have 3 million + pixels in my image and I have it populating but it takes about 5 seconds to do so for each pixel so can't event tell if I am doing it correctly. Is there a better way? Also if people understand how to use PCA with images I would be greatly appreciative.
Code:
import pandas as pd
import numpy as np
import random as rd
from sklearn.decomposition import PCA
from sklearn import preprocessing
import matplotlib.pyplot as plt
import cv2
#read in image
img = cv2.imread('/Volumes/EXTERNAL/Stitched-Photos-for-Chris/p7_0015_20161005-949am-75m-pass-1.jpg.png',1)
row,col = img.shape[:2]
print(row , col)
#get a unique pixel ID for each pixel
pixel = ['pixel-' + str(i) for i in range(0,row*col)]
bBand = ['bBand']
gBand = ['gBand']
rBand = ['rBand']
data = pd.DataFrame(columns=[bBand,gBand,rBand],index = pixel)
#populate data for each band
b,g,r = cv2.split(img)
#each index value
indexCount = row*col
for index in range(indexCount):
i = int(index/row)
j = index%row
data.loc[pixel,'bBand'] = b[i,j]
data.loc[pixel,'gBand'] = g[i,j]
data.loc[pixel,'rBand'] = r[i,j]
print(data.head())
Yes that for loop that you have there can take a long time.
Use np.ravel (for a 1D view) or np.flatten (for a 1D copy) or np.flat (for an 1D iterator) to convert 2d arrays to a series.
Also, creating a string index with x y encoded can be expensive too. I would either use row number as index and calculate x,y as row_num/row, row_num%col or a multi index with x,y depending on how frequent x,y are used in your calculations.

matplotlib: plot hist2d piecewise

I would like to plot a large sample stored in the arrays a and b with matplotlib's hist2d feature. However, generating H, xedges, yedges, img does not work directly for this data, as it uses too much memory. It works for half the number of samples, though, so I would like to do something like
H_1, xedges_1, yedges_1, img_1 = plt.hist2d(a[:len(a)/2], b[:len(b)/2], bins = 10)
followed by
H_2, xedges_2, yedges_2, img_2 = plt.hist2d(a[len(a)/2:], b[len(b)/2:], bins = 10)
While perhaps deleting the first half of the arrays after calculating the first set of variables. Is there a way to merge these two sets of variables and generate a combined plot for the data?
If (and only if!) you specify the bin edges manually, then your histograms will be compatible. You can simply add the occurences of each bin for both subsets, and you'll recover the full histogram:
import numpy as np
import matplotlib.pyplot as plt
a=np.random.rand(200)*10
b=np.random.rand(200)*10
binmin=min(a.min(),b.min())
binmax=max(a.max(),b.max())
H_1, xedges_1, yedges_1, img_1 = plt.hist2d(a[:len(a)/2], b[:len(b)/2], bins = np.linspace(binmin,binmax,10+1))
H_2, xedges_2, yedges_2, img_2 = plt.hist2d(a[len(a)/2:], b[len(b)/2:], bins = np.linspace(binmin,binmax,10+1))
H_3, xedges_3, yedges_3, img_3 = plt.hist2d(a, b, bins = np.linspace(binmin,binmax,10+1))
Result:
In [150]: (H_1+H_2==H_3).all()
Out[150]: True
Which you can easily plot using plt.pcolor. That's what hist2d seems to use, albeit with an additional transpose of the data:
plt.figure()
plt.pcolor((H_1+H_2).T)
img_3 (left) vs (H_1+H_2).T (right):

Custom coloration for matrix in matplotlib

I have three matrices that I'd like to plot, but the only solution I've come up with is just plotting one after the other, and that leaves me with the last matrix plotted.
ax.imshow(mat1, cmap='Blues', interpolation='nearest')
ax.imshow(mat2, cmap='binary', interpolation='nearest')
ax.imshow(mat3, cmap='autumn', interpolation='nearest') # actual plot
What I want is to display all 0s in the three matrices in white, and higher values in different tones depending on the matrix, e.g.: blue, black and red. Also, in that example, red cells would have precedence over black and black over blue. The solution I'm imagining to this is a function that, given a triple (blue, black, red) with the different values for each component, returns the color the cell should be colored, and feed it to a ColorMap, but I really don't know how to do so or if it's even possible.
Every kind of help and even different solutions (that's the most likely to happen) are welcome and appreciated. Thanks in advance.
You want a fourth image, with the RGB value at each point a function of the single value of the first three matrixes at the corresponding point? If so, can you produce the algebra to get from three values to the RGB fourth?
Your question suggests confusion about how plotting turns data into colors. A colormap takes single-valued data, normalizes it, and maps it into some named array of colors. 0 values might be mapped to any color, depending on the colormap and the rest of the data.
A bitmap defines (red, green, blue) values at each pixel. Proper bitmaps have header sections, but the data is an (m,n,3) array. imshow plots just that array; it expects the RGB values to be in the [0,1] range.
If you have three data matrices, you have to choose how to map the values to RGB values. Here's an example with three kinds of mapping to RGB. The first two rows are dummy data with a range of values, shown either with a colormap or as the simplest RGB representation. The last row shows ways of combining all three dummy matrices into one image using the whole colorspace.
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
#dummy data
x = 8
y = 15
mat = []
mat.append(np.arange(x * y).reshape((x, y)) / float(x * y) )
mat.append(np.arange(x * y).reshape((y, x)).T / float(x* y))
mat.append(np.arange(y) * np.arange(x)[:,np.newaxis] / float(99))
# NOTE: this data is approximately in the RGB range. If yours isn't, normalize,
# here or in your makeRGB function.
# (The colormap normalizes single-valued data).
fig, axs = plt.subplots(figsize=(7,4), nrows=3, ncols=3,
gridspec_kw={'hspace':0.6})
axs[0,0].imshow(mat[0], cmap='Reds', interpolation='nearest')
axs[0,1].imshow(mat[1], cmap='Greens', interpolation='nearest')
axs[0,2].imshow(mat[2], cmap='Blues', interpolation='nearest')
axs[0,0].set_xlabel('Reds Colormap')
axs[0,1].set_xlabel('Greens Colormap')
axs[0,2].set_xlabel('Blues Colormap')
def asOneHue(mat, hue):
"""
Use a single-valued matrix to represent one hue in a RGB file.'
"""
RGBout = np.zeros((len(mat),len(mat[0]),3))
RGBout[:,:,i] = mat
return RGBout
for i in (0,1,2):
axs[1,i].imshow(asOneHue(mat[i],i))
axs[1,0].set_xlabel('Reds bitmap')
axs[1,1].set_xlabel('Greens bitmap')
axs[1,2].set_xlabel('Blues bitmap')
# different ways to combine 3 values
def makeRGB0(mats):
RGBout = np.zeros((len(mats[0]),len(mats[0][0]),3))
#RGBout = np.ones((len(mats[0]),len(mats[0][0]),3))
for i in (0,1,2):
RGBout[:,:,i] = mats[i]
return RGBout
axs[2,0].imshow(makeRGB0(mat))
axs[2,0].set_xlabel('Color layers')
def makeRGB1(mats):
RGBout = np.zeros((len(mats[0]),len(mats[0][0]),3))
i,j,k = RGBout.shape
for x in range(i):
for y in range(j):
RGBout[x,y] = (mats[0][x][y] / 2,
mats[1][x][y],
1 - mats[2][x][y])
return RGBout
axs[2,1].imshow(makeRGB1(mat))
axs[2,1].set_xlabel('Algebraic')
def makeRGB2(mats):
RGBout = np.zeros((len(mats[0]),len(mats[0][0]),3))
i,j,k = RGBout.shape
for x in range(i):
for y in range(j):
if mats[0][x][y] > .8:
RGBout[x,y] = (mats[0][x][y],
0,
0)
elif mats[1][x][y] > .8:
RGBout[x,y] = (0,
mats[1][x][y],
0)
else:
RGBout[x,y] = (mats[0][x][y],
mats[1][x][y],
mats[2][x][y])
return RGBout
axs[2,2].imshow(makeRGB2(mat))
axs[2,2].set_xlabel('If-else')
plt.show()

Categories

Resources