Turn Based Movement Algorithm - python

I am programming a turn-based strategy, like Fire Emblem, game in Python using pygame, but I am running into a problem with movement of players. The way that it works is that you select a player and it will show you all allowed moves highlighted in Blue, but my problem is that I am having a problem coming up with a list for all possible moves.
def showPerson(tilex, tiley, personAtTile):
global ALLOWEDMOVES
ALLOWEDMOVES = []
prepare = {k:v for v,k in PLAYERSPOSITION.items()}
z = PLAYERDISTANCE[personAtTile]
#get all coords for the possible moves
currentNewSpots = []
oldSpots = []
a = PLAYERSPOSITION[personAtTile][0]
b = PLAYERSPOSITION[personAtTile][1]
c = a + 1
test = findTileLetter(c, b)
if test:
currentNewSpots.append((c, b))
ALLOWEDMOVES.append((c, b))
c = a -1
test = findTileLetter(c, b)
if test:
currentNewSpots.append((c, b))
ALLOWEDMOVES.append((c, b))
c = b + 1
test = findTileLetter(a, c)
if test:
currentNewSpots.append((a, c))
ALLOWEDMOVES.append((a, c))
c = b - 1
test = findTileLetter(a, c)
if test:
currentNewSpots.append((a, c))
ALLOWEDMOVES.append((a, c))
for x in range(PLAYERDISTANCE[prepare[(tilex, tiley)]]):
for y in range(len(currentNewSpots) - 1):
a = currentNewSpots[y][0]
b = currentNewSpots[y][1]
c = a + 1
test = findTileLetter(c, b)
if test and ((c, b)) not in ALLOWEDMOVES:
currentNewSpots.append((c, b))
ALLOWEDMOVES.append((c, b))
c = a -1
test = findTileLetter(c, b)
if test and ((c, b)) not in ALLOWEDMOVES:
currentNewSpots.append((c, b))
ALLOWEDMOVES.append((c, b))
c = b + 1
test = findTileLetter(a, c)
if test and ((c, b)) not in ALLOWEDMOVES:
currentNewSpots.append((a, c))
ALLOWEDMOVES.append((a, c))
c = b - 1
test = findTileLetter(a, c)
if test and ((c, b)) not in ALLOWEDMOVES:
currentNewSpots.append((a, c))
ALLOWEDMOVES.append((a, c))

Because I don't know what you have in mind, I'll proceed under the following assumptions:
findTileLetter(x, y) tells me if the square at (x, y) is passable. That is, it tells me if a unit could pass through or end their turn on that square.
Units can step up to PLAYERDISTANCE[unit] + 1 times per turn. Each step can be taken up, down, left, or right. Diagonal steps are not allowed, but instead must be accomplished by stepping e.g. left and up.
With that in mind, we note that on the line
for y in range(len(currentNewSpots) - 1):
you iterate over one fewer element of currentNewSpots than you should, and thus every x loop you omit stepping from the element in currentNewSpots that was added last in the preceding x loop. Hence you leave out potential destination squares.
Changing the line to
for y in range(len(currentNewSpots))
fixes this issue.
Additionally, your delta-y tests in the y loop are not quite right:
c = b + 1
test = findTileLetter(a, c)
if test and ((c, b)) not in ALLOWEDMOVES: ### <--- should be (a, c)
currentNewSpots.append((a, c))
ALLOWEDMOVES.append((a, c))
Blob of working test code follows. The grid defines a world of tiles: 0 tiles are impassable, while 1 tiles are passable. The MY_X and MY_Y define where we're searching from. After every step, we output the map to stdout, illustrating which squares we've found so far.
import sys
MY_X = 3
MY_Y = 4
MY_RNG = 2
grid = [
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 1, 1, 0, 0, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 0, 1, 1, 0, 1, 1, 0, 0, 1],
[0, 0, 1, 1, 1, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 1, 1, 1, 0],
[0, 0, 0, 1, 1, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]
def findTileLetter(x, y):
return grid[y][x]
class Person:
pass
def showMap():
for y in range(len(grid)):
for x in range(len(grid[y])):
if grid[y][x] == 0:
sys.stdout.write(' ')
elif x == MY_X and y == MY_Y:
sys.stdout.write('x')
elif (x, y) in ALLOWEDMOVES:
sys.stdout.write('o')
else:
sys.stdout.write('-')
sys.stdout.write('\n')
me = Person()
ALLOWEDMOVES = []
PLAYERDISTANCE = {}
PLAYERDISTANCE[me] = MY_RNG
PLAYERSPOSITION = {}
PLAYERSPOSITION[me] = (MY_X, MY_Y)
def showPerson(tilex, tiley, personAtTile):
global ALLOWEDMOVES
ALLOWEDMOVES = []
prepare = {k:v for v,k in PLAYERSPOSITION.items()}
z = PLAYERDISTANCE[personAtTile]
#get all coords for the possible moves
currentNewSpots = []
oldSpots = []
a = PLAYERSPOSITION[personAtTile][0]
b = PLAYERSPOSITION[personAtTile][1]
c = a + 1
test = findTileLetter(c, b)
if test:
currentNewSpots.append((c, b))
ALLOWEDMOVES.append((c, b))
c = a -1
test = findTileLetter(c, b)
if test:
currentNewSpots.append((c, b))
ALLOWEDMOVES.append((c, b))
c = b + 1
test = findTileLetter(a, c)
if test:
currentNewSpots.append((a, c))
ALLOWEDMOVES.append((a, c))
c = b - 1
test = findTileLetter(a, c)
if test:
currentNewSpots.append((a, c))
ALLOWEDMOVES.append((a, c))
showMap()
for x in range(PLAYERDISTANCE[prepare[(tilex, tiley)]]):
for y in range(len(currentNewSpots)):
a = currentNewSpots[y][0]
b = currentNewSpots[y][1]
c = a + 1
test = findTileLetter(c, b)
if test and ((c, b)) not in ALLOWEDMOVES:
currentNewSpots.append((c, b))
ALLOWEDMOVES.append((c, b))
c = a - 1
test = findTileLetter(c, b)
if test and ((c, b)) not in ALLOWEDMOVES:
currentNewSpots.append((c, b))
ALLOWEDMOVES.append((c, b))
c = b + 1
test = findTileLetter(a, c)
if test and ((a, c)) not in ALLOWEDMOVES:
currentNewSpots.append((a, c))
ALLOWEDMOVES.append((a, c))
c = b - 1
test = findTileLetter(a, c)
if test and ((a, c)) not in ALLOWEDMOVES:
currentNewSpots.append((a, c))
ALLOWEDMOVES.append((a, c))
showMap()
showPerson(MY_X, MY_Y, me)
print ALLOWEDMOVES

Related

Can't integrate summation with sympy

I would like to compute the integral of a summation:
import sympy as sp
t = sp.Symbol("t")
n = sp.Symbol("n", integer=True, positive=True)
sum_term = sp.Sum(sp.exp(-(n*sp.pi)**2 * t), (n, 1, sp.oo))
sp.integrate(sum_term, (t, 0, t)).doit()
However, this doesn't calculate the integral:
Integral(Sum(exp(-pi**2*n**2*t), (n, 1, oo)), (t, 0, t))
I'm not sure why this doesn't work but if you interchange the order of the sum and the integral it works:
In [5]: import sympy as sym
In [6]: t, n = sym.symbols('t, n')
In [7]: f = sym.exp(-(n*sym.pi)**2 * t)
In [8]: f
Out[8]: exp(-pi**2*n**2*t)
In [11]: e1 = sym.Integral(sym.Sum(f, (n, 1, sym.oo)), (t, 0, sym.oo))
In [12]: e2 = sym.Sum(sym.Integral(f, (t, 0, sym.oo)), (n, 1, sym.oo))
In [13]: e1
Out[13]: Integral(Sum(exp(-pi**2*n**2*t), (n, 1, oo)), (t, 0, oo))
In [14]: e2
Out[14]: Sum(Integral(exp(-pi**2*n**2*t), (t, 0, oo)), (n, 1, oo))
In [15]: e1.doit()
Out[15]: Integral(Sum(exp(-pi**2*n**2*t), (n, 1, oo)), (t, 0, oo))
In [17]: e2.doit()
Out[17]: 1/6

Division operator overloading parameters have same values

I'm having some problems with operator overloading. If you could check please.
The code:
import math
class Mat4:
cells = [ [0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0] ]
#staticmethod
def DET2(a, b, c, d):
return a * d - b * c
#staticmethod
def DET3(a,b,c, d,e,f, g,h,i):
return a * Mat4.DET2(e,f,h,i) - b * Mat4.DET2(d,f,g,i) + c * Mat4.DET2(d,e,g,h)
#staticmethod
def DET4(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p):
return ( a * Mat4.DET3(f,g,h,j,k,l,n,o,p) - b * Mat4.DET3(e,g,h,i,k,l,m,o,p) + c * Mat4.DET3(e,f,h,i,j,l,m,n,p) - d * Mat4.DET3(e,f,g,i,j,k,m,n,o))
#staticmethod
def Scale(factor):
res = Mat4()
res.cells = [ [factor,0, 0, 0],
[0, factor, 0, 0],
[0, 0, factor, 0],
[0, 0, 0, 1] ]
return res
def __mul__(self, other):
if isinstance(other, Mat4):
print(self.cells)
print(other.cells)
print("h\n")
out = Mat4()
for row_left in range(4):
for col_right in range(4):
out.cells[row_left][col_right] = 0
for k in range(4):
out.cells[row_left][col_right] += self.cells[row_left][k] * other.cells[k][col_right]
return out
if isinstance(other, int) or isinstance(other, float):
for i in range(4):
for j in range(4):
self.cells[i][j] *= other
return self
def __truediv__(self, other):
print(self.cells)
print(other.GetInversed().cells)
print("pl\n")
return self * other.GetInversed()
#staticmethod
def Identity():
return Mat4.Scale(1)
#staticmethod
def RotationX(angle):
sinTheta = math.sin(angle)
cosTheta = math.cos(angle)
res = Mat4()
res.cells = [ [1, 0, 0, 0],
[0, cosTheta, -sinTheta, 0],
[0, sinTheta, cosTheta, 0],
[0, 0, 0, 1] ]
return res
def GetInversed(self):
a = self.cells[0][0]
b = self.cells[0][1]
c = self.cells[0][2]
d = self.cells[0][3]
e = self.cells[1][0]
f = self.cells[1][1]
g = self.cells[1][2]
h = self.cells[1][3]
i = self.cells[2][0]
j = self.cells[2][1]
k = self.cells[2][2]
l = self.cells[2][3]
m = self.cells[3][0]
n = self.cells[3][1]
o = self.cells[3][2]
p = self.cells[3][3]
min_a = Mat4.DET3(f,g,h,j,k,l,n,o,p)
min_b = Mat4.DET3(e,g,h,i,k,l,m,o,p)
min_c = Mat4.DET3(e,f,h,i,j,l,m,n,p)
min_d = Mat4.DET3(e,f,g,i,j,k,m,n,o)
det_m = a * min_a - b * min_b + c * min_c - d * min_d
res = Mat4()
res.cells[0][0] = min_a
res.cells[1][0] = -min_b
res.cells[2][0] = min_c
res.cells[3][0] = -min_d
res.cells[0][1] = -Mat4.DET3(b,c,d,j,k,l,n,o,p)
res.cells[1][1] = Mat4.DET3(a,c,d,i,k,l,m,o,p)
res.cells[2][1] = -Mat4.DET3(a,b,d,i,j,l,m,n,p)
res.cells[3][1] = Mat4.DET3(a,b,c,i,j,k,m,n,o)
res.cells[0][2] = Mat4.DET3(b,c,d,f,g,h,n,o,p)
res.cells[1][2] = -Mat4.DET3(a,c,d,e,g,h,m,o,p)
res.cells[2][2] = Mat4.DET3(a,b,d,e,f,h,m,n,p)
res.cells[3][2] = -Mat4.DET3(a,b,c,e,f,g,m,n,o)
res.cells[0][3] = -Mat4.DET3(b,c,d,f,g,h,j,k,l)
res.cells[1][3] = Mat4.DET3(a,c,d,e,g,h,i,k,l)
res.cells[2][3] = -Mat4.DET3(a,b,d,e,f,h,i,j,l)
res.cells[3][3] = Mat4.DET3(a,b,c,e,f,g,i,j,k)
return res * ( 1.0 / det_m)
def main():
rotMat = Mat4().RotationX(50)
scaMat = Mat4().Scale(3)
mRes = (rotMat * scaMat) / rotMat
print(mRes.cells)
if __name__ == "__main__":
main()
The result I'm getting:
mRes = [[0.0, 0.0, 0.0, 0.0], [0.0, -0.068840563856158, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]
The result I should get, because rotation gets subtracted and only the scaling has left:
mRes = [[3, 0, 0, 0], [0, 3, 0, 0], [0, 0, 3, 0], [0, 0, 0, 1]]
The problem is with __mul__ operator because it's having other and self parameters to store same values, but address is not the same:
For some reasons the first time multiplication happens mRes = (rotMat * scaMat) is okay, but the problems comes when the resulted matrix from the division calls division operator, which also calls multiplication operator.

From 4 given arrays(not sorted), find the elements from each array whose sum is equal to some number X

Suppose there are 4 unsorted arrays as given below:
A = [0, 100, -100, 50, 200]
B = [30, 100, 20, 0]
C = [0, 20, -1, 80]
D = [50, 0, -200, 1]
Suppose X is 0, so the few of the possible O/P should be (pick 1 element from each array which satisfy condition):
0,0,0,0
-100, 100, 0, 0
-100, 30, 20,50 .. etc.
I was able to devise the algorithm which can do this in O(n^3LogN), is there any better way to achieve the same?
My Solution:
1- Sort each array.
2- Fixed the element from array A.
3- run three loops for the rest of the arrays and take the sum of each element:
if sum > 0 (return -1, no such elements exit)
if sum == 0 (return current elements)
if sum < 0 (then advance the pointer from the array for which the current element is minimum.)
Any suggestion over this?
a kind of dynamic programming approach.
initialize sums (a dict of the form {possible_sum0: [way_to_get_sum0, ...]}) with the first list A. this results in
sums = {0: [[0]], 100: [[100]], -100: [[-100]], 50: [[50]], 200: [[200]]}
the update that dictionary with the lists B and C. sums will now contain entries like
sums = {...,
30: [[0, 30, 0]],
50: [[0, 30, 20], [50, 0, 0]],
29: [[0, 30, -1]], ...}
then in find_sum i sort the last list D and the sums for some speedup and break if a give sum X is no longer accessible.
here is the code:
from collections import defaultdict
A = [0, 100, -100, 50, 200]
B = [30, 100, 20, 0]
C = [0, 20, -1, 80]
D = [50, 0, -200, 1]
def initialize_sums(lst):
return {item: [[item]] for item in lst}
def update_sums(sums, lst):
new_sums = defaultdict(list)
for sm, ways in sums.items():
for item in lst:
new_sum = sm + item
for way in ways:
new_sums[new_sum].append(way + [item])
return new_sums
def find_sum(sums, last_lst, X):
last_lst = sorted(last_lst)
ret = []
for sm, ways in sorted(sums.items()):
for item in last_lst:
x = sm + item
if x > X:
break
if x == X:
for way in ways:
ret.append(way + [item])
break
return ret
sums = initialize_sums(lst=A)
sums = update_sums(sums, lst=B)
sums = update_sums(sums, lst=C)
ret = find_sum(sums, last_lst=D, X=0)
print(ret)
# [[-100, 30, 20, 50], [0, 0, -1, 1], [-100, 100, -1, 1], ...]
...did not analyze the overall complexity though.
We can have O(n^2) by hashing pair sums for A and B and checking if for any one of them, sum_AB[i] there might be an X - sum_AB[i] hashed in the pair sums of C and D.
In some circumstances it could be more efficient to enumerate those sums by multiplying each pair of lists as counts of coefficients in polynomials, using a FFT for O(m log m) complexity, where m is the range.
Assuming your arrays all have the same length n (+/- some constant value) you can get O(n^3) by using a set for the fourth array:
from itertools import product
ds = set(D)
for a, b, c in product(A, B, C):
d = X - a - b - c
if d in ds:
print(a, b, c, d)
If one or multiple arrays contain (many) extreme values you can also take shortcuts by checking the running sum against the min and max of subsequent arrays to see if X can still be reached. For example:
ds = set(D)
c_min, c_max = min(C), max(C)
d_min, d_max = min(ds), max(ds)
for a in A:
for b in B:
s = a + b
if s + c_min + d_min > X or s + c_max + d_max < X:
continue # Shortcut here.
for c in C:
d = X - a - b - c
if d in ds:
print(a, b, c, d)
You can further extend this by storing solutions that have already been found for a running sum (of the first two arrays for example) and hence taking a shortcut whenever such a sum is encountered again (by reordering with the min/max check one can avoid repeated computation of s + min/max values):
ds = set(D)
c_min, c_max = min(C), max(C)
d_min, d_max = min(ds), max(ds)
shortcuts = {}
for a in A:
for b in B:
s = a + b
if s in shortcuts:
for c, d in shortcuts[s]:
print(a, b, c, d)
continue
shortcuts[s] = []
if s + c_min + d_min > X or s + c_max + d_max < X:
continue
for c in C:
d = X - a - b - c
if d in ds:
print(a, b, c, d)
shortcuts[s].append((c, d))
A = [0, 100, -100, 50, 200]
B = [30, 100, 20, 0]
C = [0, 20, -1, 80]
D = [50, 0, -200, 1]
solutions = [(x1,x2,x3,x4) for x1 in A for x2 in B for x3 in C for x4 in D if sum([x1,x2,x3,x4]) == 0]
print(solutions)
Output:
>>>[(0, 0, 0, 0), (0, 0, -1, 1), (100, 100, 0, -200), (100, 20, 80, -200), (-100, 30, 20, 50), (-100, 100, 0, 0), (-100, 100, -1, 1), (-100, 20, 80, 0), (200, 0, 0, -200)]
This does exactly what you listed in your steps and works for any size, I don't know if it can get any easier finding all solutions for different list sizes.
find all combinations for an array
def dOfSums(li):
return {sum(x):x for x in sum([list(itertools.combinations(li, i)) for i in range(2,len(li))],[])}
find sums for a number in an array
def findSums(li, num):
return [((namestr(l), dOfSums(l)[num]) for l in li if num in dOfSums(l).keys() ]
name the array
def namestr(obj):
return [name for name in globals() if globals()[name] is obj].pop()
test
for el in findSums([A,B,C,D],50):
print(el)
('A', (0, 100, -100, 50))
('B', (30, 20, 0))
('D', (50, 0))
for el in findSums([A,B,C,D],100):
print(el)
('A', (0, -100, 200))
('B', (100, 0))
('C', (0, 20, 80))
for el in findSums([A,B,C,D],0):
print(el)
('A', (0, 100, -100))

How to vectorize multiple levels of recursion?

I am a noobie to python and numpy (and programming in general). I am trying to speed up my code as much as possible. The math involves several summations over multiple axes of a few arrays. I've attained one level of vectorization, but I can't seem to get any deeper than that and have to resort to for loops (I believe there's three levels of recursion, M, N, and I, one of which I've eliminated, I). Here's my code for the relevant section (this code works, but I'd like to speed it up):
def B1(n, i):
return np.pi * n * dmaxi * (-1)**(n+1) * np.sin(qi[i]*dmaxi) * ((np.pi*n)**2 - (qi[i]*dmaxi)**2)**(-1)
for n in N:
B[n, :] = B1(n, I)
for m in M:
for n in N:
C[m, n] = np.dot((1/np.square(qi*Iq[0, :, 2]))*B[m, :], B[n, :])
Y[m] = np.dot((1/np.square(qi*Iq[0, :, 2]))*U[0, :, 1], B[m, :])
A = np.linalg.solve(C[1:, 1:], (0.25)*Y[1:])
dmaxi is just a float and m, n and i are integers. The arrays have the following shapes:
>>> qi.shape
(551,)
>>> N.shape
(18,)
>>> M.shape
(18,)
>>> I.shape
(551,)
>>> Iq.shape
(1, 551, 3)
>>> U.shape
(1, 551, 3)
As you can see I've vectorized the calculation of the 2nd axis of B, but I can't seem to do it for the 1st axis, C, and Y, which still require the for loops. It seems that when I try to do the same form of vectorization that I did for the 1st axis of B (define a function, then give the array as the argument), I get a broadcasting error since it appears to be trying to calculate both axes simultaneously, rather than the 1st, then the 2nd, which is why I had to force it into a for loop instead. The same problem occurs for both C and Y which is why they're both in for loops also. In case that's confusing, essentially what I tried was:
>>> B[:, :] = B1(N, I)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "sasrec_v6.py", line 155, in B1
return np.pi * n * dmaxi * (-1)**(n+1) * np.sin(qi[i]*dmaxi) * ((np.pi*n)**2 - (qi[i]*dmaxi)**2)**(-1)
ValueError: operands could not be broadcast together with shapes (18) (551)
Vectorizing the 2nd axis of B made a substantial improvement to the speed of my code, so I'm assuming that the same will apply for further vectorization (I hope I'm using that term correctly by the way).
You can use broadcasting to make 2d arrays from your 1d index vectors. I haven't tested these yet, but they should work:
If you reshape the N to be a column vector, then B1 will return a 2d array:
B[N] = B1(N[:, None], I)
For Y and C, I'd use np.einsum to have better control over which axes are mulitplied (probably this could be done with np.dot as well but I'm not sure how.
C[M[:, None], N] = np.einsum('ij,kj->ik',
B[M]/np.square(qi*Iq[0, :, 2]),
B[N])
Y[M] = np.einsum('i, ki->k',
U[0, :, 1]/np.square(qi*Iq[0, :, 2]),
B[M])
To see what that indexing trick does:
In [1]: a = np.arange(3)
In [2]: a
Out[2]: array([0, 1, 2])
In [3]: a[:, None]
Out[3]:
array([[0],
[1],
[2]])
In [4]: b = np.arange(4,1,-1)
In [5]: b
Out[5]: array([4, 3, 2])
In [6]: a[:, None] * b
Out[6]:
array([[0, 0, 0],
[4, 3, 2],
[8, 6, 4]])
It saves two orders of magnitude in time:
In [92]: %%timeit
....: B = np.zeros((18, 551))
....: C = np.zeros((18, 18))
....: Y = np.zeros((18))
....: for n in N:
....: B[n, :] = B1(n, I)
....: for m in M:
....: for n in N:
....: C[m, n] = np.dot((1/np.square(qi*Iq[0, :, 2]))*B[m, :], B[n, :])
....: Y[m] = np.dot((1/np.square(qi*Iq[0, :, 2]))*U[0, :, 1], B[m, :])
....:
100 loops, best of 3: 15.8 ms per loop
In [93]: %%timeit
....: Bv = np.zeros((18, 551))
....: Cv = np.zeros((18, 18))
....: Yv = np.zeros((18))
....: Bv[N] = B1(N[:, None], I)
....: Cv[M[:, None], N] = np.einsum('ij,kj->ik', B[M]/np.square(qi*Iq[0, :, 2]), B[N])
....: Yv[M] = np.einsum('i, ki->k', U[0, :, 1]/np.square(qi*Iq[0, :, 2]), B[M])
....:
1000 loops, best of 3: 1.34 ms per loop
Here's my test:
import numpy as np
# make fake data:
np.random.seed(5)
qi = np.random.rand(551)
N = np.random.randint(0,18,18)#np.arange(18)
M = np.random.randint(0,18,18)#np.arange(18)
I = np.arange(551)
Iq = np.random.rand(1, 551, 3)
U = np.random.rand(1, 551, 3)
B = np.zeros((18, 551))
C = np.zeros((18, 18))
Y = np.zeros((18))
Bv = np.zeros((18, 551))
Cv = np.zeros((18, 18))
Yv = np.zeros((18))
dmaxi = 1.
def B1(n, i):
return np.pi * n * dmaxi * (-1)**(n+1) * np.sin(qi[i]*dmaxi) * ((np.pi*n)**2 - (qi[i]*dmaxi)**2)**(-1)
for n in N:
B[n, :] = B1(n, I)
for m in M:
for n in N:
C[m, n] = np.dot((1/np.square(qi*Iq[0, :, 2]))*B[m, :], B[n, :])
Y[m] = np.dot((1/np.square(qi*Iq[0, :, 2]))*U[0, :, 1], B[m, :])
Bv[N] = B1(N[:, None], I)
print "B correct?", np.allclose(Bv, B)
# np.einsum test case:
n, m = 2, 3
a = np.arange(n*m).reshape(n,m)*8 + 2
b = np.arange(n*m)[::-1].reshape(n,m)
c = np.empty((n,n))
for i in range(n):
for j in range(n):
c[i,j] = np.dot(a[i],b[j])
cv = np.einsum('ij,kj->ik', a, b)
print "einsum test successful?", np.allclose(c,cv)
Cv[M[:, None], N] = np.einsum('ij,kj->ik',
B[M]/np.square(qi*Iq[0, :, 2]),
B[N])
print "C correct?", np.allclose(Cv, C)
Yv[M] = np.einsum('i, ki->k',
U[0, :, 1]/np.square(qi*Iq[0, :, 2]),
B[M])
print "Y correct?", np.allclose(Yv, Y)
output :D
B correct? True
einsum test successful? True
C correct? True
Y correct? True

How to repeat elements of an array along two axes?

I want to repeat elements of an array along axis 0 and axis 1 for M and N times respectively:
import numpy as np
a = np.arange(12).reshape(3, 4)
b = a.repeat(2, 0).repeat(2, 1)
print(b)
[[ 0 0 1 1 2 2 3 3]
[ 0 0 1 1 2 2 3 3]
[ 4 4 5 5 6 6 7 7]
[ 4 4 5 5 6 6 7 7]
[ 8 8 9 9 10 10 11 11]
[ 8 8 9 9 10 10 11 11]]
This works, but I want to know are there better methods without create a temporary array.
You could use the Kronecker product, see numpy.kron:
>>> a = np.arange(12).reshape(3,4)
>>> print(np.kron(a, np.ones((2,2), dtype=a.dtype)))
[[ 0 0 1 1 2 2 3 3]
[ 0 0 1 1 2 2 3 3]
[ 4 4 5 5 6 6 7 7]
[ 4 4 5 5 6 6 7 7]
[ 8 8 9 9 10 10 11 11]
[ 8 8 9 9 10 10 11 11]]
Your original method is OK too, though!
You can make use of np.broadcast_to here:
def broadcast_tile(a, h, w):
x, y = a.shape
m, n = x * h, y * w
return np.broadcast_to(
a.reshape(x, 1, y, 1), (x, h, y, w)
).reshape(m, n)
broadcast_tile(a, 2, 2)
array([[ 0, 0, 1, 1, 2, 2, 3, 3],
[ 0, 0, 1, 1, 2, 2, 3, 3],
[ 4, 4, 5, 5, 6, 6, 7, 7],
[ 4, 4, 5, 5, 6, 6, 7, 7],
[ 8, 8, 9, 9, 10, 10, 11, 11],
[ 8, 8, 9, 9, 10, 10, 11, 11]])
Performance
Functions
def chris(a, h, w):
x, y = a.shape
m, n = x * h, y * w
return np.broadcast_to(
a.reshape(x, 1, y, 1), (x, h, y, w)
).reshape(m, n)
def alex_riley(a, b0, b1):
r, c = a.shape
rs, cs = a.strides
x = np.lib.stride_tricks.as_strided(a, (r, b0, c, b1), (rs, 0, cs, 0))
return x.reshape(r*b0, c*b1)
def paul_panzer(a, b0, b1):
r, c = a.shape
out = np.empty((r, b0, c, b1), a.dtype)
out[...] = a[:, None, :, None]
return out.reshape(r*b0, c*b1)
def wim(a, h, w):
return np.kron(a, np.ones((h,w), dtype=a.dtype))
Setup
import numpy as np
import pandas as pd
from timeit import timeit
res = pd.DataFrame(
index=['chris', 'alex_riley', 'paul_panzer', 'wim'],
columns=[5, 10, 20, 50, 100, 500, 1000],
dtype=float
)
a = np.arange(100).reshape((10,10))
for f in res.index:
for c in res.columns:
h = w = c
stmt = '{}(a, h, w)'.format(f)
setp = 'from __main__ import h, w, a, {}'.format(f)
res.at[f, c] = timeit(stmt, setp, number=50)
Output
Since the result cannot be implemented as a view, as_strided offers no benefits over simple preallocation and broadcasting. Because of its overhead as_strided seems in fact a bit slower (I did no proper benchmarking, though).
The as_strided code is taken from #AlexRiley's post.
from numpy.lib.stride_tricks import as_strided
import numpy as np
def tile_array(a, b0, b1):
r, c = a.shape # number of rows/columns
rs, cs = a.strides # row/column strides
x = as_strided(a, (r, b0, c, b1), (rs, 0, cs, 0)) # view a as larger 4D array
return x.reshape(r*b0, c*b1) # create new 2D array
def tile_array_pp(a, b0, b1):
r, c = a.shape
out = np.empty((r, b0, c, b1), a.dtype)
out[...] = a[:, None, :, None]
return out.reshape(r*b0, c*b1)
a = np.arange(9).reshape(3, 3)
kwds = {'globals': {'f_ar': tile_array, 'f_pp': tile_array_pp, 'a': a},
'number': 1000}
from timeit import timeit
print('as_strided', timeit('f_ar(a, 100, 100)', **kwds))
print('broadcast ', timeit('f_pp(a, 100, 100)', **kwds))
Sample run:
as_strided 0.048387714981799945
broadcast 0.04324757700669579
Another solution is to use as_strided. kron is much slower then using repeat twice. I have found that as_strided is much faster than a double repeat in many cases (small arrays [<250x250] with only a doubling in each dimension as_strided was slower). The as_strided trick is as follows:
a = arange(1000000).reshape((1000, 1000)) # dummy data
from numpy.lib.stride_tricks import as_strided
N, M = 4,3 # number of time to replicate each point in each dimension
H, W = a.shape
b = as_strided(a, (H, N, W, M), (a.strides[0], 0, a.strides[1], 0)).reshape((H*N, W*M))
This works by using 0-length strides which causes numpy to read the same value multiple times (until it gets to the next dimension). The final reshape does copy the data, but only once unlike using a double repeat which will copy the data twice.
Errata: I'm only taking 2x upsampling into account.
TL;DR It turns out that after the OpenCV version,
np.repeat(np.repeat(a, 2, axis=1), 2, axis=0)
is the fastest. So the answer is - there's no faster ways in numpy today,
but you can get a slight improvement by changing the order of axes.
And if you don't mind OpenCV -
cv.resize(a, None, fx=2, fy=2, interpolation=cv.INTER_NEAREST)
Here is the test.
import timeit
import numpy as np
import cv2 as cv
test = np.zeros((16, 16, 3), dtype=np.float32)
def measure(f):
t = timeit.timeit("f(test)", number=1000, globals={"test": test, "f": f})
print("%s - %f"%(f.__name__, t))
return f, t
def fastest(c):
print(c.__name__)
winner, t = min((measure(getattr(c, ve)) for ve in dir(c) if ve.startswith("alg_")), key=lambda x: x[1])
print("%s winner: %s - %f"%(c.__name__, winner.__name__, t))
return winner
#fastest
class nn:
def alg_01(a):
return np.repeat(np.repeat(a, 2, axis=0), 2, axis=1)
def alg_02(a):
return np.repeat(np.repeat(a, 2, axis=1), 2, axis=0)
def alg_03(a):
b = a[:, None, :, None]
b = np.concatenate((b, b), axis=1)
b = np.concatenate((b, b), axis=3)
return b.reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
def alg_04(a):
b = a[:, None, :, None]
b = np.concatenate((b, b), axis=3)
b = np.concatenate((b, b), axis=1)
return b.reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
def alg_05(a):
return (a[:, None, :, None]*np.ones((1, 2, 1, 2)+((1,)*len(a.shape[2:])), dtype=np.float32)).reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
def alg_06(a):
return cv.resize(a, None, fx=2, fy=2, interpolation=cv.INTER_NEAREST)
def alg_07(a):
return a[:, None, :, None][:, (0, 0)][:, :, :, (0, 0)].reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
def alg_08(a):
return a[:, None, :, None][:, :, :, (0, 0)][:, (0, 0)].reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
def alg_09(a):
return np.kron(a, np.ones((2, 2), dtype=np.float32))
def alg_10(a):
return np.broadcast_to(a[:, None, :, None], (a.shape[0], 2, a.shape[1], 2)+a.shape[2:]).reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
def alg_11(a):
ret = np.empty((a.shape[0], 2, a.shape[1], 2, *a.shape[2:]), dtype=np.float32)
ret[...] = a[:, None, :, None]
ret.resize((a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:]), refcheck=False)
return ret
The result is:
nn
alg_01 - 0.040967
alg_02 - 0.033744
alg_03 - 0.057969
alg_04 - 0.048739
alg_05 - 0.076595
alg_06 - 0.078638
alg_07 - 0.084692
alg_08 - 0.084539
alg_09 - 0.344339
alg_10 - 0.078707
alg_11 - 0.049424
nn winner: alg_02 - 0.033744

Categories

Resources