mapping over higher dimensional numpy arrays - python

If I have a 1d array a and want to map a function f over each element, I could do
>>> import numpy as np
>>> a = np.arange(5)
>>> def f(x):
... return 3*x + x**2 #whatever
>>> np.fromiter(map(f,a),float)
array([ 0., 4., 10., 18., 28.])
I'd like to do something analogous with more complex arrays. One example calculation is this: compose paired 3x3 arrays with matrix multiplication
>>> a = np.arange(5*2*3**2).reshape(5,2,3,3)
>>> def f(x):
... return np.matmul(x[0],x[1])
# is there a smarter way?
>>> np.array([f(x) for x in a])
array([[[ 42, 45, 48],
[ 150, 162, 174],
[ 258, 279, 300]],
[[ 1716, 1773, 1830],
[ 1986, 2052, 2118],
[ 2256, 2331, 2406]],
[[ 5334, 5445, 5556],
[ 5766, 5886, 6006],
[ 6198, 6327, 6456]],
[[10896, 11061, 11226],
[11490, 11664, 11838],
[12084, 12267, 12450]],
[[18402, 18621, 18840],
[19158, 19386, 19614],
[19914, 20151, 20388]]])
Another example calculation would be transform every vector in an array of vectors by matrix multiplication
>>> a = np.arange(3*5).reshape(5,3)
>>> def f(x):
... M = np.arange(3*3).reshape(3,3)
... return np.dot(M,x)
>>> np.array([f(x) for x in a])
array([[ 5, 14, 23],
[ 14, 50, 86],
[ 23, 86, 149],
[ 32, 122, 212],
[ 41, 158, 275]])
Is there a nice way to do such computations with an np.fromiter like approach? What is the most pythonic way to do these operations with numpy? Is there an approach which handles every example problem here as np.specialnumpything(map(f,a))?

This is just as easily implemented with broadcasting. Namely:
a = np.arange(5)
a*3 + a**2
array([ 0, 4, 10, 18, 28])
a = np.arange(5*2*3**2).reshape(5,2,3,3)
a[:, 0] # a[:, 1]
array([[[ 42, 45, 48],
[ 150, 162, 174],
[ 258, 279, 300]],
[[ 1716, 1773, 1830],
[ 1986, 2052, 2118],
[ 2256, 2331, 2406]],
[[ 5334, 5445, 5556],
[ 5766, 5886, 6006],
[ 6198, 6327, 6456]],
[[10896, 11061, 11226],
[11490, 11664, 11838],
[12084, 12267, 12450]],
[[18402, 18621, 18840],
[19158, 19386, 19614],
[19914, 20151, 20388]]])
a = np.arange(3*5).reshape(5,3)
M = np.arange(3*3).reshape(3,3)
M.dot(a.T).T
array([[ 5, 14, 23],
[ 14, 50, 86],
[ 23, 86, 149],
[ 32, 122, 212],
[ 41, 158, 275]])
np.einsum('kj, ij -> ik', M, a)
array([[ 5, 14, 23],
[ 14, 50, 86],
[ 23, 86, 149],
[ 32, 122, 212],
[ 41, 158, 275]])

I would use the builtin numpy.nditer, which could be what you're looking for:
https://docs.scipy.org/doc/numpy/reference/arrays.nditer.html
from the examples:
>>> a = np.arange(6).reshape(2,3)
>>> a
>>> array([[0, 1, 2],
[3, 4, 5]])
>>> with np.nditer(a, op_flags=['readwrite']) as it:
... for x in it:
... x[...] = 2 * x
...
>>> a
array([[ 0, 2, 4],
[ 6, 8, 10]])

Related

How do I convert a matrix of integers to a matrix of lists of integers in numpy?

I'm quite new to numpy, I tried with vstack but it seems really wrong since it creates a copy in memory everytime.
The initial structure is:
[[1,2,3],
[1,2,3],
[1,2,3]]
and it will be mapped to:
[[[3,2,1,2], [4,5,2,7], [7,4,1,3]],
[[3,2,1,2], [4,5,2,7], [7,4,1,3]],
[[3,2,1,2], [4,5,2,7], [7,4,1,3]]]
The numbers here don't have any meaning, it's just to show the structure, basically each integer is decoded to a list of integers N -> [N_1, N_2, N_3, N_4]
For context I have pixels encoded in 32bits and I have to decode them to argb
Create uint8 view:
>>> ar = np.random.randint(0, 2 ** 31, (3, 3))
>>> ar
array([[ 437217537, 524850213, 771706759],
[ 467263015, 219221544, 1712453711],
[1444860139, 625411292, 1224272631]])
>>> ar[1:] = ar[0] # make the layout the same as the example of OP
>>> ar
array([[437217537, 524850213, 771706759],
[437217537, 524850213, 771706759],
[437217537, 524850213, 771706759]])
>>> ar.view(np.uint8).reshape(3, 3, -1)
array([[[ 1, 105, 15, 26],
[ 37, 148, 72, 31],
[135, 79, 255, 45]],
[[ 1, 105, 15, 26],
[ 37, 148, 72, 31],
[135, 79, 255, 45]],
[[ 1, 105, 15, 26],
[ 37, 148, 72, 31],
[135, 79, 255, 45]]], dtype=uint8)
>>> int.from_bytes(bytes(_[0, 0]), 'little') == ar[0, 0]
True

Calculating sum without comprehension [duplicate]

This question already has answers here:
Vectorize large NumPy multiplication
(2 answers)
Closed 3 years ago.
Consider the two toy arrays below:
import numpy as np
k = np.random.randint(1, 25, (5, 2, 3))
l = np.random.randint(25, 50, (7, 3))
In [27]: k
Out[27]:
array([[[14, 15, 24],
[21, 24, 5]],
[[22, 19, 9],
[21, 1, 11]],
[[ 1, 23, 5],
[16, 14, 2]],
[[ 7, 3, 16],
[23, 2, 8]],
[[12, 24, 4],
[ 2, 15, 20]]])
In [28]: l
Out[28]:
array([[47, 31, 42],
[28, 27, 26],
[45, 32, 49],
[29, 34, 32],
[40, 36, 25],
[44, 27, 31],
[27, 35, 26]])
I can get the multiplicative sum that I am interested in as follows:
f = np.array([np.sum( k * x, axis = 2) for x in l])
In [29]: f
Out[29]:
array([[[2131, 1941],
[2001, 1480],
[ 970, 1270],
[1094, 1479],
[1476, 1399]],
[[1421, 1366],
[1363, 901],
[ 779, 878],
[ 693, 906],
[1088, 981]],
[[2286, 1958],
[2039, 1516],
[1026, 1266],
[1195, 1491],
[1504, 1550]],
[[1684, 1585],
[1572, 995],
[ 971, 1004],
[ 817, 991],
[1292, 1208]],
[[1700, 1829],
[1789, 1151],
[ 993, 1194],
[ 788, 1192],
[1444, 1120]],
[[1765, 1727],
[1760, 1292],
[ 820, 1144],
[ 885, 1314],
[1300, 1113]],
[[1527, 1537],
[1493, 888],
[ 962, 974],
[ 710, 899],
[1268, 1099]]])
How can I calculate this sum without resorting to comprehension?
This is a good use case for np.einsum:
np.einsum('ijk,lk->lij', k, l)
list_comp = np.array([np.sum( k * x, axis = 2) for x in l])
np.allclose(np.einsum('ijk,lk->lij', k, l), list_comp)
# True
Or using broadcasting:
(l[:,None,None]*k).sum(-1)
Although from a quick check on timings np.einsum runs about 3 times faster
You can also do that with np.tensordot:
import numpy as np
np.random.seed(0)
k = np.random.randint(1, 25, (5, 2, 3))
l = np.random.randint(25, 50, (7, 3))
f = np.tensordot(l, k, [-1, -1])
f_comp = np.array([np.sum(k * x, axis=2) for x in l])
print(np.allclose(f, f_comp))
# True

How to reshape numpy array keeping each nth element

I have a numpy array of shape, say, (1000, 80) and I want to convert it to (100, 40, 2). You can think of it as having 1000 observations where the first 40 columns refer to a specific feature across 40 days and the next 40 columns to a 2nd feature across the same 40 days.
So, I want to the 2nd dimension to be the 40 days while the 3rd should be the values of these two features for each day.
Here's a simple example with what I tried:
import numpy as np
data = [[11, 22, 33, 44],
[55, 66, 77 ,88],
[99, 100, 101, 102]]
data = np.array(data)
# This works but I have to manually do it for every day
np.c_[data[:, ::2], data[:, 1::2]].reshape((3, 2, 2))
# This does not work
np.c_[data[:, i::2] for i in range(2)].reshape((3, 2, 2))
Desired output:
array([[[ 11, 33],
[ 22, 44]],
[[ 55, 77],
[ 66, 88]],
[[ 99, 101],
[100, 102]]])
You can reshape first and then transpose the second and third axis:
data.reshape(-1, 2, data.shape[1] / 2).transpose(0,2,1)
#array([[[ 11, 33],
# [ 22, 44]],
# [[ 55, 77],
# [ 66, 88]],
# [[ 99, 101],
# [100, 102]]])
Or swapaxes:
data.reshape(-1, 2, data.shape[1] / 2).swapaxes(1,2)
#array([[[ 11, 33],
# [ 22, 44]],
# [[ 55, 77],
# [ 66, 88]],
# [[ 99, 101],
# [100, 102]]])
Or as #wwii commented, reshape with column-major order:
data.reshape(-1, data.shape[1] / 2, 2, order='F')

Numpy Interweave oddly shaped arrays

Alright, here the given data;
There are three numpy arrays of the shapes:
(i, 4, 2), (i, 4, 3), (i, 4, 2)
the i is shared among them but is variable.
The dtype is float32 for everything.
The goal is to interweave them in a particular order. Let's look at the data at index 0 for these arrays:
[[-208. -16.]
[-192. -16.]
[-192. 0.]
[-208. 0.]]
[[ 1. 1. 1.]
[ 1. 1. 1.]
[ 1. 1. 1.]
[ 1. 1. 1.]]
[[ 0.49609375 0.984375 ]
[ 0.25390625 0.984375 ]
[ 0.25390625 0.015625 ]
[ 0.49609375 0.015625 ]]
In this case, the concatened target array would look something like this:
[-208, -16, 1, 1, 1, 0.496, 0.984, -192, -16, 1, 1, 1, ...]
And then continue on with index 1.
I don't know how to achieve this, as the concatenate function just keeps telling me that the shapes don't match. The shape of the target array does not matter much, just that the memoryview of it must be in the given order for upload to a gpu shader.
Edit: I could achieve this with a few python for loops, but the performance impact would be a problem in this program.
Use np.dstack and flatten with np.ravel() -
np.dstack((a,b,c)).ravel()
Now, np.dstack is basically stacking along the third axis. So, alternatively we can use np.concatenate too along that axis, like so -
np.concatenate((a,b,c),axis=2).ravel()
Sample run -
1) Setup Input arrays :
In [613]: np.random.seed(1234)
...: n = 3
...: m = 2
...: a = np.random.randint(0,9,(n,m,2))
...: b = np.random.randint(11,99,(n,m,2))
...: c = np.random.randint(101,999,(n,m,2))
...:
2) Check input values :
In [614]: a
Out[614]:
array([[[3, 6],
[5, 4]],
[[8, 1],
[7, 6]],
[[8, 0],
[5, 0]]])
In [615]: b
Out[615]:
array([[[84, 58],
[61, 87]],
[[48, 45],
[49, 78]],
[[22, 11],
[86, 91]]])
In [616]: c
Out[616]:
array([[[104, 359],
[376, 560]],
[[472, 720],
[566, 115]],
[[344, 556],
[929, 591]]])
3) Output :
In [617]: np.dstack((a,b,c)).ravel()
Out[617]:
array([ 3, 6, 84, 58, 104, 359, 5, 4, 61, 87, 376, 560, 8,
1, 48, 45, 472, 720, 7, 6, 49, 78, 566, 115, 8, 0,
22, 11, 344, 556, 5, 0, 86, 91, 929, 591])
What I would do is:
np.hstack([a, b, c]).flatten()
assuming a, b, c are the three arrays

Separating 3-d array into 2-d arrays along the respective columns

temp1 = tempObj[0][0]
temp2 = tempObj[0][1]
if len(tempObj) > 1:
for i in range(1, len(tempObj)):
temp1 = np.vstack((temp1, tempObj[i][0]))
temp2 = np.vstack((temp2, tempObj[i][1]))
The code is in Python. In the above code, I am trying to separate the numpy 3-d array (tempObj) into two 2-d arrays (temp1 & temp2) so that the respective columns are merged.
Is there a better or simpler way to do this?
use tempObj[:, 0, :] and tempObj[:, 1, :]. here is an example:
import numpy as np
tempObj = np.array([
[
[1,5], [2,7], [3,8], [4,4],
[6, 5], [4, 7], [13, 8], [9, 4]
],
[
[21,25], [22,72], [32,82], [34,43],
[64, 54], [44, 74], [1443, 48], [94, 44]
],
[
[211, 215], [212, 712], [312, 812], [314, 413],
[614, 514], [414, 714], [11443, 148], [194, 414]
]
])
# Your code:
############################################
temp1 = tempObj[0][0]
temp2 = tempObj[0][1]
if len(tempObj) > 1:
for i in range(1, len(tempObj)):
temp1 = np.vstack((temp1, tempObj[i][0]))
temp2 = np.vstack((temp2, tempObj[i][1]))
print(temp1)
print(temp2)
# my suggestion
############################################
print(tempObj[:, 0, :])
print(tempObj[:, 1, :])
You need to make sure that the type of tempObj (or any part of it) is numpy.ndarray and not list. You can do it by typing print(type(tempObj)). The following example is what I suspect that is your code that gives an error of TypeError: list indices must be integers, not tuple:
tempObj = [
np.array([
[1,5], [2,7], [3,8], [4,4],
[6, 5], [4, 7], [13, 8], [9, 4]
]),
np.array([
[21,25], [22,72], [32,82], [34,43],
[64, 54], [44, 74], [1443, 48], [94, 44]
]),
np.array([
[211, 215], [212, 712], [312, 812], [314, 413],
[614, 514], [414, 714], [11443, 148], [194, 414]
])
]

Categories

Resources