I have the following recursive equations I want to implement in Python
(the background is financial markets):
This is easily done sequentially, using a loop:
def compute(x, y, χ, γ, a0, b0):
a = [a0]
b = [b0]
α = [a[0]]
β = [b[0]]
for i in range(len(x)):
a.append(-α[i]*x[i] + β[i]*y[i]*γ[i])
b.append(-β[i]*y[i] + α[i]*x[i]*χ[i])
α.append(α[-1] + a[-1])
β.append(β[-1] + b[-1])
return α, β
A small-arrays example:
x = np.array([0.6, 0.4, 0., 0., 0.9])
y = np.array([0., 0., 0.3, 0.9, 0.])
χ = np.arange(100., 105., 1.)
γ = 1. / (χ - 1.)
print(np.array(compute(x, y, χ, γ, 1., 0.)))
would produce
>>> [[ 1. 0.4 0.24 0.46621782 0.93661782 0.09366178]
[ 0. 60. 76.16 53.312 5.3312 92.99862812]]
is there a way to do it in NumPy (which I expect to significantly speed up the computation)?
In other words: to compute the whole a and b vectors without using a loop, just NumPy functions?

To be clear first: I have no idea how make this faster using numpy. So this is not an answer to your question.
But: you can achieve some speedup using numba:
from numba import jit
import numpy as np
N = 1000000
x = np.random.rand(N)
y = np.random.rand(N)
gamma = np.random.rand(N)
chi = np.random.rand(N)
a = [.2]
b = [.3]
alpha = [a[0]]
beta = [b[0]]
def compute_in_python():
for i in range(len(x)):
a.append(-alpha[i]*x[i] + beta[i]*y[i]*gamma[i])
b.append(-beta[i]*y[i] + alpha[i]*x[i]*chi[i])
def compute_with_numba(x,y,gamma,chi,a0, b0, alpha0, beta0):
N = len(x)
a = np.empty(N+1)
b = np.empty(N+1)
alpha = np.empty(N+1)
beta = np.empty(N+1)
a[0] = a0
b[0] = b0
alpha[0] = alpha0
beta[0] = beta0
for i in range(N):
a[i+1] = -alpha[i] * x[i] + beta[i] * y[i] * gamma[i]
b[i+1] = -beta[i] * y[i] + alpha[i] * x[i] * chi[i]
alpha[i+1] = alpha[i]+a[i+1]
beta[i+1] = beta[i]+b[i+1]
return a,b,alpha,beta
Vanilla python loops:
In [23]: %time compute_in_python()
CPU times: user 1.6 s, sys: 24.8 ms, total: 1.63 s
Wall time: 1.63 s
In [42]: %time res = compute_with_numba(x,y,gamma,chi,a[0], b[0], alpha[0], beta[0])
CPU times: user 13 ms, sys: 3.36 ms, total: 16.4 ms
Wall time: 16.4 ms
Note that the first call to compute_with_numba will trigger the jit, so you should measure the runtime of the second call.
Again, this is not an answer to your question, but it still is approximately 100 times faster.

you can do it with matrix multiplication if you rearrange the elements to get the following form
After that you can compute everything by simple matrix multiplications.
note that k, j, l, m of a 2x2 matrix are all available and the matrix they construct can be precomputed.
In this case they will be:
k = 1-x
l = y*γ
m = x*χ
n = 1-y
Also I do recommend pre-allocating any array that might be used as the size is always available (appending a list is very costly).
In any case though a for loop is inevitable. But I guess the following would make it neat.
from functools import reduce
import numpy as np
def compute(mat, inp):
This seems more of a direct question. I will generalize it a bit at the end.
I am trying to this function in numpy. I have been successful using nested for loops but I can't think of a numpy way to do it.
My way of implementation:
bs = 10 # batch_size
nb = 8 # number of bounding boxes
nc = 15 # number of classes
bbox = np.random.random(size=(bs, nb, 4)) # model output bounding boxes
p = np.random.random(size=(bs, nb, nc)) # model output probability
p = softmax(p, axis=-1)
s_rand = np.random.random(size=(nc, nc))
s = (s_rand + s_rand.T)/2 # similarity matrix
pp = np.random.random(size=(bs, nb, nc)) # proposed probability
pp = softmax(pp, axis=-1)
first_term = 0
for b in range(nb):
for b_1 in range(nb):
if b_1 == b:
for l in range(nc):
for l_1 in range(nc):
first_term += (s[l, l_1] * (pp[:, b, l] - pp[:, b_1, l_1])**2)
second_term = 0
for b in range(nb):
for l in range(nc):
second_term += (np.linalg.norm(s[l, :], ord=1) * (pp[:, b, l] - p[:, b, l])**2)
second_term *= nb
epsilon = 0.5
output = ((1 - epsilon) * first_term) + (epsilon * second_term)
I have tried hard to remove the loops and use np.tile and np.repeat instead, in order to achieve the task. But can't think of a possible way.
I have tried searching google for finding exercises like such which can help me learn such conversions in numpy but wasn't successful.
P_hat.shape is (B,L), S.shape is (L,L), P.shape is (B,L).
array_before_sum = S[None,:,None,:]*(P_hat[:,:,None,None]- P_hat[None,None,:,:])**2
array_after_sum = array_before_sum.sum(axis=(1,3))
array_sum_again = (array_after_sum*(1-np.ones((B,B)))).sum()
first_term = (1-epsilon)*array_sum_again
second_term = epsilon*(B*np.abs(S).sum(axis=1)[None,:]*(P_hat - P)**2).sum()
I think you can do both with einsum
first_term = np.einsum('km, ijklm -> i', s, (pp[..., None, None] - pp[:, None, None, ...])**2 )
second_term = np.einsum('k, ijk -> i', np.linalg.norm(s, axis = 1), (pp - p)**2 )
Now there's a problem: that ijklm tensor in first_term is going to get huge if nb and nc get large. You should probably distribute it so that you get 3 smaller tensors:
first_term = np.einsum('km, ijk, ijk -> i', s, pp, pp) +\
np.einsum('km, ilm, ilm -> i', s, pp, pp) -\
2 * np.einsum('km, ijk, ilm -> i', s, pp, pp)
This takes advantage of the fact that (a-b)**2 = a**2 + b**2 - 2ab to allow you to break the problem into three parts that can each be done in one step with the dot product
Maximally optimized code: (removal of first two loops is inspired from L.Iridium's answer)
squared_diff = (pp[:, :, None, :, None] - pp[:, None, :, None, :]) ** 2
weighted_diff = s * squared_diff
b_eq_b_1_removed = b.sum(axis=(3,4)) * (1 - np.eye(nb))
first_term = b_eq_b_1_removed.sum(axis=(1,2))
normalized_s = np.linalg.norm(s, ord=1, axis=1)
squared_diff = (pp - p)**2
second_term = nb * (normalized_s * squared_diff).sum(axis=(1,2))
loss = ((1 - epsilon) * first_term) + (epsilon * second_term)
Timeit track:
512 µs ± 13 µs per loop
Timeit track of code posted in question:
62.5 ms ± 197 µs per loop
That's a huge improvement.

Python: Gradient of matrix function

I want to calculate the gradient of the following function h(x) = 0.5 x.T * A * x + b.T + x.
For now I set A to be just a (2,2) Matrix.
def function(x):
return 0.5 *, A), x) +, x)
A = A = np.zeros((2, 2))
n = A.shape[0]
A[range(n), range(n)] = 1
a (2,2) Matrix with main diagonal of 1 and
b = np.ones(2)
For a given Point x = (1,1) numpy.gradient returns an empty list.
x = np.ones(2)
result = np.gradient(function(x))
However shouldn't I get something like that: grad(f((1,1)) = (x1 + 1, x2 + 1) = (2, 2).
Appreciate any help.
It seems like you want to perform symbolic differentiation or automatic differentiation which np.gradient does not do. sympy is a package for symbolic math and autograd is a package for automatic differentiation for numpy. For example, to do this with autograd:
import autograd.numpy as np
from autograd import grad
def function(x):
return 0.5 *, A), x) +, x)
A = A = np.zeros((2, 2))
n = A.shape[0]
A[range(n), range(n)] = 1
b = np.ones(2)
x = np.ones(2)
array([2., 2.])

Close form solution for finding a root

Suppose I have a Pandas Series s whose values sum to 1 and whose values are also all greater than or equal to 0. I need to subtract a constant from all values such that the sum of the new Series is equal to 0.6. The catch is, when I subtract this constant, the values never end up less than zero.
In math formula, assume I have a series of x's and I want to find k
import pandas as pd
import numpy as np
from string import ascii_uppercase
np.random.seed([3, 141592653])
s = np.power(
1000, pd.Series(
).pipe(lambda s: s / s.sum())
A 0.001352
B 0.163135
C 0.088365
D 0.010904
E 0.007615
F 0.407947
G 0.005856
H 0.198381
I 0.027455
J 0.088989
dtype: float64
The sum is 1
What I've tried
I can use Newton's method (among others) found in Scipy's optimize module
from scipy.optimize import newton
def f(k):
return s.sub(k).clip(0).sum() - .6
Finding the root of this function will give me the k I need
initial_guess = .1
k = newton(f, x0=initial_guess)
Then subtract this from s
new_s = s.sub(k).clip(0)
A 0.000000
B 0.093772
C 0.019002
D 0.000000
E 0.000000
F 0.338583
G 0.000000
H 0.129017
I 0.000000
J 0.019626
dtype: float64
And the new sum is
Can we find k without resorting to using a solver?
I was not expecting newton to carry the day. But on large arrays, it does.
Inspire by Thierry's Answer
Using a loop on a sorted array with numbas jit
import numpy as np
from numba import njit
def find_k_numba(a, t):
a = np.sort(a)
m = len(a)
s = a.sum()
to_remove = s - t
if to_remove <= 0:
k = 0
for i, x in enumerate(a):
k = to_remove / (m - i)
if k < x:
to_remove -= x
return k
Inspired by Paul's Answer
Numpy carrying the heavy lifting.
import numpy as np
def find_k_numpy(a, t):
a = np.sort(a)
m = len(a)
s = a.sum()
to_remove = s - t
if to_remove <= 0:
k = 0
c = a.cumsum()
n = np.arange(m)[::-1]
b = n * a + c
i = np.searchsorted(b, to_remove)
k = a[i] + (to_remove - b[i]) / (m - i)
return k
My method via Newton
import numpy as np
from scipy.optimize import newton
def find_k_newton(a, t):
s = a.sum()
if s <= t:
k = 0
def f(k_):
return np.clip(a - k_, 0, a.max()).sum() - t
k = newton(f, (s - t) / len(a))
return k
Time Trials
import pandas as pd
from timeit import timeit
res = pd.DataFrame(
np.nan, [10, 30, 100, 300, 1000, 3000, 10000, 30000],
'find_k_newton find_k_numpy find_k_numba'.split()
for i in res.index:
a = np.random.rand(i)
t = a.sum() * .6
for j in res.columns:
stmt = f'{j}(a, t)'
setp = f'from __main__ import {j}, a, t'[i, j] = timeit(stmt, setp, number=200)
res.div(res.min(1), 0)
find_k_newton find_k_numpy find_k_numba
10 57.265421 17.552150 1.000000
30 29.221947 9.420263 1.000000
100 16.920463 5.294890 1.000000
300 10.761341 3.037060 1.000000
1000 1.455159 1.033066 1.000000
3000 1.000000 2.076484 2.550152
10000 1.000000 3.798906 4.421955
30000 1.000000 5.551422 6.784594
Updated: Three different implementations - interestingly, the least sophisticated scales best.
import numpy as np
def f_sort(A, target=0.6):
B = np.sort(A)
C = np.cumsum(np.r_[B[0], np.diff(B)] * np.arange(N, 0, -1))
idx = np.searchsorted(C, 1 - target)
return B[idx] + (1 - target - C[idx]) / (N-idx)
def f_partition(A, target=0.6):
target, l = 1 - target, len(A)
while len(A) > 1:
m = len(A) // 2
A = np.partition(A, m-1)
ls = A[:m].sum()
if ls + A[m-1] * (l-m) > target:
A = A[:m]
l -= m
target -= ls
A = A[m:]
return target / l
def f_direct(A, target=0.6):
target = 1 - target
while True:
gt = A > target / len(A)
if np.all(gt):
return target / len(A)
target -= A[~gt].sum()
A = A[gt]
N = 10
A = np.random.random(N)
A /= A.sum()
print(f_sort(A), np.clip(A-f_sort(A), 0, None).sum())
print(f_partition(A), np.clip(A-f_partition(A), 0, None).sum())
print(f_direct(A), np.clip(A-f_direct(A), 0, None).sum())
from timeit import timeit
kwds = dict(globals=globals(), number=1000)
N = 100000
A = np.random.random(N)
A /= A.sum()
print(timeit('f_sort(A)', **kwds))
print(timeit('f_partition(A)', **kwds))
print(timeit('f_direct(A)', **kwds))
Sample run:
0.04813686999999732 0.5999999999999999
0.048136869999997306 0.6000000000000001
0.048136869999997306 0.6000000000000001
An exact solution, requesting only a sort, then in O(n) (well, less: we only need as many loops as the number of values that will turn to zero):
we turn the smallest values to zero while possible, then share the remaining excess between the remaining ones:
l = [0.001352, 0.163135, 0.088365, 0.010904, 0.007615, 0.407947,
0.005856, 0.198381, 0.027455, 0.088989]
initial_sum = sum(l)
target_sum = 0.6
# number of values not yet turned to zero
non_zero = len(l)
# already substracted by substracting the constant where possible
substracted = 0
# what we want to substract to each value
constant = 0
for v in sorted(l):
if initial_sum - substracted - non_zero * (v - constant) >= target_sum:
substracted += non_zero * (v - constant)
constant = v
non_zero -= 1
constant += (initial_sum - substracted - target_sum) / non_zero
l = [v - constant if v > constant else 0 for v in l]
# [0, 0.09377160000000001, 0.019001600000000007, 0, 0, 0.3385836, 0, 0.1290176, 0, 0.019625600000000007]
# 0.6
Just wanted to add an option to #piRSquared 's answer: find_k_hybrd
find_k_hybrd is a mixture of the "numba" and "newton" solutions. I use the hybrd root finding algorithm in NumbaMinpack. NumbaMinpack is faster than scipy for problems like this, because it's root finding methods can be within jit-ed functions.
import numpy as np
from NumbaMinpack import hybrd, minpack_sig
from numba import njit, cfunc
n = 10000
a = np.random.rand(n)
t = a.sum()*.6
def func(k_, fvec, args):
t = args[n]
amax = -np.inf
for i in range(n):
if args[i] > amax:
amax = args[i]
args1 = np.empty(n)
for i in range(n):
args1[i] = args[i] - k_[0]
if args1[i] < 0.0:
args1[i] = 0.0
elif args1[i] > amax:
args1[i] = amax
argsum = args1.sum()
fvec[0] = argsum - t
funcptr = func.address
def find_k_hybrd(a, t):
s = a.sum()
if s <= t:
k = 0.0
k_init = np.array([(s - t) / len(a)])
args = np.append(a, np.array([t]))
sol = hybrd(funcptr, k_init, args)
k = sol[0][0]
return k
print(find_k_hybrd(a, t))

Vectorization/optimising for loop with numpy in Python

Im writing a script to handle some data from a sensor represented in the signal_gen function. As you can see in the testing function it is quite loop sentered. Since this function is called many times it makes it a bit slow and it would be lovely with a push in the right direction for optimising it.
I have read that it is possible to exchange the for loop with a vectorizatid array, but I can't get my head around how the i_avg[i] line should be written, since we have single element y[i] multiplied with the whole array x inside a np.cos, and all this is again just one irritation of i_avg.
def testing(signal):
y = np.arange(0.0108, 0.0135, 0.001) # this one changes over time, set
#to constant for easier reading
x = np.arange(0, (len(signal)))
I_avg = np.zeros(len(y))
Q_avg = np.zeros_like(I_avg)
for i in range(0, len(y)):
I_avg[i] = np.array(signal * (np.cos(2 * np.pi * y[i] * x))).sum()
Q_avg[i] = np.array(signal * (np.sin(2 * np.pi * y[i] * x))).sum()
D = np.power(I_avg, 2) + np.power(Q_avg, 2)
max_index = np.argmax(D)
phaseOut = np.arctan2(Q_avg[max_index], I_avg[max_index])
#just a test signal
def signal_gen():
signal = np.random.random(size=251)
return signal
One vectorized approach using matrix-multiplication with to replace the nested loop to give us I_avg, Q_avg and also incorporating NumPy broadcasting and thus achieve a more efficient solution would be like so -
mult = 2*np.pi*y[:,None]*x
I_avg, Q_avg = np.cos(mult).dot(signal), np.sin(mult).dot(signal)
Please note that for the given sample, we are competing against a loopy version that only has to iterate for 3 iterations (y being of length 3). As such, we won't be seeing a huge speedup here.
Runtime test -
In [9]: #just a test signal
...: signal = np.random.random(size=251)
...: y = np.arange(0.0108, 0.0135, 0.001)
...: x = np.arange(0, (len(signal)))
# Original approach
In [10]: %%timeit I_avg = np.zeros(len(y))
...: Q_avg = np.zeros_like(I_avg)
...: for i in range(0, len(y)):
...: I_avg[i] = np.array(signal * (np.cos(2 * np.pi * y[i] * x))).sum()
...: Q_avg[i] = np.array(signal * (np.sin(2 * np.pi * y[i] * x))).sum()
10000 loops, best of 3: 68 µs per loop
# Proposed approach
In [11]: %%timeit mult = 2*np.pi*y[:,None]*x
...: I_avg, Q_avg = np.cos(mult).dot(signal), np.sin(mult).dot(signal)
10000 loops, best of 3: 34.8 µs per loop
You can use np.einsum for broadcasting:
yx = 2*np.pi*np.einsum("i,j->ij", y, x)
I_avg = np.sin(yx) # signal
Q_avg = np.cos(yx) # signal

Numpy/Scipy Solve simulataneous equations with integrals in them

I am trying to use numpy and scipy to solve the following two equations:
P(z) = sgn(-cos(np.pi*D1) + cos(5*z)) * sgn(-cos(np.pi*D2) + cos(6*z))
1. 0 = 2/2pi ∫ P(z,D1,D2) * cos(5z) dz + z/L
2. 0 = 2/2pi ∫ P(z,D1,D2) * cos(6z) dz - z/L
for D1 and D2 (integral limits are 0 -> 2pi).
My code is:
def equations(p, z):
D1, D2 = p
period = 2*np.pi
P1 = lambda zz, D1, D2: \
np.sign(-np.cos(np.pi*D1) + np.cos(6.*zz)) * \
np.sign(-np.cos(np.pi*D2) + np.cos(5.*zz)) * \
P2 = lambda zz, D1, D2: \
np.sign(-np.cos(np.pi*D1) + np.cos(6.*zz)) * \
np.sign(-np.cos(np.pi*D2) + np.cos(5.*zz)) * \
eq1 = 2./period * integrate.quad(P1, 0., period, args=(D1,D2), epsabs=0.01)[0] + z
eq2 = 2./period * integrate.quad(P2, 0., period, args=(D1,D2), epsabs=0.01)[0] - z
return (eq1, eq2)
z = np.arange(0., 1000., 0.01)
N = int(len(z))
D1 = np.empty([N])
D2 = np.empty([N])
for i in range(N):
D1[i], D2[i] = fsolve(equations, x0=(0.5, 0.5), args=z[i])
print D1, D2
Unfortunately, it does not seem to converge. I don't know much about numerical methods and was hoping someone could give me a hand.
Thank you.
P.S. I'm also trying the following which should be equivalent:
import numpy as np
from scipy.optimize import fsolve
from scipy import integrate
from scipy import signal
def equations(p, z):
D1, D2 = p
period = 2.*np.pi
K12 = 1./L * z
K32 = -1./L * z + 1.
P1 = lambda zz, D1, D2: \
signal.square(6.*zz, duty=D1) * \
signal.square(5.*zz, duty=D2) * \
P2 = lambda zz, D1, D2: \
signal.square(6.*zz, duty=D1) * \
signal.square(5.*zz, duty=D2) * \
eq1 = 2./period * integrate.quad(P1, 0., period, args=(D1,D2))[0] + K12
eq2 = 2./period * integrate.quad(P2, 0., period, args=(D1,D2))[0] - K32
return (eq1, eq2)
h = 0.01
L = 10.
z = np.arange(0., L, h)
N = int(len(z))
D1 = np.empty([N])
D2 = np.empty([N])
for i in range(N):
D1[i], D2[i] = fsolve(equations, x0=(0.5, 0.5), args=z[i])
print z[i]
print ("%0.8f,%0.8f" % (D1[i], D2[i]))
I implemented what you wrote (I think I understand it!), very nicely done. Thank you. Unfortunately, I really don't have much skill in this field and don't really know how to make a suitable guess, so I just guess 0.5 (I also added a small amount of noise to the initial guess to try and improve it). The result I'm getting have numerical errors it seems, and I'm not sure why, I was hoping you could point me in the right direction. So essentially, I did an FFT sweep (did an FFT for each dutycycle variation and looked at the frequency component at 5, which is shown below in the graph) and found that the linear part (z/L) is slightly jagged.
Thank you for that, I've noted some of the techniques you've suggested. I tried replicated your second graph as it seems very useful. To do this, I kept D1 (D2) fixed and swept D2 (D1), and I did this for various z values. fmin did not always find the correct minimum (it was dependent on the initial guess) so I swept the initial guess of fmin until I found the correct answer. I get a similar answer to you. (I think it's correct?)
Also, I would just like to say that you might like to give me your contact details, as this solution as a step in finding the solution to a problem I have (I'm a student doing research), and I will most certainly acknowledge you in any papers in which this code is used.
#!/usr/bin/env python
import numpy as np
from scipy.optimize import fsolve
from scipy import integrate
from scipy import optimize
from scipy import signal
altsigns = np.ones(50)
altsigns[1::2] = -1
def get_breaks(x, y, a, b):
sa = np.arange(0, 2*a, 2)
sb = np.arange(0, 2*b, 2)
zx = (( x + sa) % (2*a))*np.pi/a
zx2 = ((-x + sa) % (2*a))*np.pi/a
zy = (( y + sb) % (2*b))*np.pi/b
zy2 = ((-y + sb) % (2*b))*np.pi/b
zi = np.r_[np.sort(np.hstack((zx, zx2, zy, zy2))), 2*np.pi]
if zi[0]:
zi = np.r_[0, zi]
return zi
def integrals(x, y, a, b):
zi = get_breaks(x % 1., y % 1., a, b)
sins = np.vstack((np.sin(b*zi), np.sin(a*zi)))
return (altsigns[:zi.size-1]*(sins[:,1:] - sins[:,:-1])).sum(1) / np.array((b, a))
def equation1(p, z, d2):
D2 = d2
D1 = p
I1, _ = integrals(D1, D2, deltaK1, deltaK2)
eq1 = 1. / np.pi * I1 + z
return abs(eq1)
def equation2(p, z, d1):
D1 = d1
D2 = p
_, I2 = integrals(D1, D2, deltaK1, deltaK2)
eq2 = 1. / np.pi * I2 - z + 1
return abs(eq2)
z = [0.2, 0.4, 0.6, 0.8, 1.0]#np.arange(0., 1., 0.1)
step = 0.05
deltaK1 = 5.
deltaK2 = 6.
f = open('data.dat', 'w')
D = np.arange(0.0, 1.0, step)
D1eq1 = np.empty([len(D)])
D2eq2 = np.empty([len(D)])
D1eq1Err = np.empty([len(D)])
D2eq2Err = np.empty([len(D)])
for n in z:
for i in range(len(D)):
# Fix D2 and solve for D1.
for guessD1 in np.arange(0.,1.,0.1):
D2 = D
tempD1 = optimize.fmin(equation1, guessD1, args=(n, D2[i]), disp=False, xtol=1e-8, ftol=1e-8, full_output=True)
if tempD1[1] < 1.e-6:
D1eq1Err[i] = tempD1[1]
D1eq1[i] = tempD1[0][0]
D1eq1Err[i] = -1.
D1eq1[i] = -1.
# Fix D1 and solve for D2.
for guessD2 in np.arange(0.,1.,0.1):
D1 = D
tempD2 = optimize.fmin(equation2, guessD2, args=(n, D1[i]), disp=False, xtol=1e-8, ftol=1e-8, full_output=True)
if tempD2[1] < 1.e-6:
D2eq2Err[i] = tempD2[1]
D2eq2[i] = tempD2[0][0]
D2eq2Err[i] = -2.
D2eq2[i] = -2.
for i in range(len(D)):
f.write('%0.8f,%0.8f,%0.8f,%0.8f,%0.8f\n' %(D[i], D1eq1[i], D2eq2[i], D1eq1Err[i], D2eq2Err[i]))
This is a very ill-posed problem. Let's recap what you are trying to do:
You want to solve 100000 optimization problems
Each optimization problem is 2 dimensional, so you need O(10000) function evaluations (estimating O(100) function evaluations for a 1D optimization problem)
Each function evaluation depends on the evaluation of two numerical integrals
The integrands contain jumps, i.e. they are 0-times contiguously differentiable
The integrands are composed of periodic functions, so they have multiple minima and maxima
So you are off to a very hard time. In addition, even in the most optimistic estimate in which all factors in the integrand that are < 1 are replaced by 1, the integrals can only take values between -2*pi and 2*pi. Much less than that in reality. So you can already see that you only have a chance of a solution for
I1 - z = 0
I2 + z = 0
for very small numbers of z. So there is no point in trying up to z = 1000.
I am almost certain that this is not the problem you need to solve. (I cannot imagine a context in which such a problem would appear. It seems like a weird twist on Fourier coefficient computation...) But in case you insist, your best bet is to work on the inner loop first.
As you noted, the numerical evaluation of the integrals is subject to large errors. This is due to the jumps introduced by the sgn() function. Functions such as scipy.integrate.quad() tend to use higher order algorithms which assume that the integrands are smooth. If they are not, they perform very badly. You either need to hand-pick an algorithm that can deal with jumps or, much better in this case, do the integrals by hand:
The following algorithm calculates the jump points of the sgn() function and then evaluates the analytic integrals on all pieces:
altsigns = np.ones(50)
altsigns[1::2] = -1
def get_breaks(x, y, a, b):
sa = np.arange(0, 2*a, 2)
sb = np.arange(0, 2*b, 2)
zx = (( x + sa) % (2*a))*np.pi/a
zx2 = ((-x + sa) % (2*a))*np.pi/a
zy = (( y + sb) % (2*b))*np.pi/b
zy2 = ((-y + sb) % (2*b))*np.pi/b
zi = np.r_[np.sort(np.hstack((zx, zx2, zy, zy2))), 2*pi]
if zi[0]:
zi = np.r_[0, zi]
return zi
def integrals(x, y, a, b):
zi = get_breaks(x % 1., y % 1., a, b)
sins = np.vstack((np.sin(b*zi), np.sin(a*zi)))
return (altsigns[:zi.size-1]*(sins[:,1:] - sins[:,:-1])).sum(1) / np.array((b, a))
This gets rid of the problem of the numerical integration. It is very accurate and fast. However, even the integrals will not be perfectly contiguous for all parameters, so in order to solve your optimization problem, you are better off using an algorithm that doesn't rely on the existence of any derivatives. The only choice in scipy is scipy.optimize.fmin(), which you can use like:
def equations2(p, z):
x, y = p
I1, I2 = integrals(x, y, 6., 5.)
fact = 1. / pi
eq1 = fact * I1 + z
eq2 = fact * I2 - z
return eq1, eq2
def norm2(p, z):
eq1, eq2 = equations2(p, z)
return eq1**2 + eq2**2 # this has the minimum when eq1 == eq2 == 0
z = 0.25
res = fmin(norm2, (0.25, 0.25), args=(z,), xtol=1e-8, ftol=1e-8)
print res
# -> [ 0.3972 0.5988]
print equations2(res, z)
# -> (-2.7285737558280232e-09, -2.4748670890417657e-09)
You are still left with the problem of finding suitable starting values for all z, which is still a tricky business. Good Luck!
To check if you still have numerical errors, plug the result of the optimization back in the equations and see if they are satisfied to the required accuracy, which is what I did above. Note that I used (0.25, 0.25) as a starting value, since starting at (0.5, 0.5) didn't lead to convergence. This is normal for optimizations problems with local minima (such as yours). There is no better way to deal with this other than trying multiple starting values, rejecting non-converged results. In the case above, if equations2(res, z) returns anything higher than, say, (1e-6, 1e-6), I would reject the result and try again with a different starting value. A very useful technique for successive optimization problems is to use the result of the previous problem as the starting value for the next problem.
Note however that you have no guarantee of a smooth solution for D1(z) and D2(z). Just a tiny change in D1 could push one break point off the integration interval, resulting in a big change of the value of the integral. The algorithm may well adjust by using D2, leading to jumps in D1(z) and D2(z). Note also that you can take any result modulo 1, due to the symmetries of cos(pi*D1).
The bottom line: There shouldn't be any remaining numerical inaccuracies if you use the analytical formula for the integrals. If the residuals are less than the accuracy you specified, this is your solution. If they are not, you need to find better starting values. If you can't, a solution may not exist. If the solutions are not contiguous as a function of z, that is also expected, since your integrals are not contiguous. Good luck!
Edit 2
It appears your equations have two solutions in the interval z in [0, ~0.46], and no solutions for z > 0.46, see the first figure below. To prove this, see the good old graphical solution in the second figure below. The contours represent solutions of Eq. 1 (vertical) and Eq. 2 (horizontal), for different z. You can see that the contours cross twice for z < 0.46 (two solutions) and not at all for z > 0.46 (no solution that simultaneously satisfies both equations). If this is not what you expected, you need to write down different equations (which was my suspicion in the first place...)
Here is the final code I was using:
import numpy as np
from numpy import sin, cos, sign, pi, arange, sort, concatenate
from scipy.optimize import fmin
a = 6.0
b = 5.0
def P(z, x, y):
return sign((cos(a*z) - cos(pi*x)) * (cos(b*z) - cos(pi*y)))
def P1(z, x, y):
return P(z, x, y) * cos(b*z)
def P2(z, x, y):
return P(z, x, y) * cos(a*z)
altsigns = np.ones(50)
altsigns[1::2] = -1
twopi = 2*pi
pi_a = pi/a
da = 2*pi_a
pi_b = pi/b
db = 2*pi_b
lim = np.array([0., twopi])
def get_breaks(x, y):
zx = arange(x*pi_a, twopi, da)
zx2 = arange((2-x)*pi_a, twopi, da)
zy = arange(y*pi_b, twopi, db)
zy2 = arange((2-y)*pi_b, twopi, db)
zi = sort(concatenate((lim, zx, zx2, zy, zy2)))
return zi
ba = np.array((b, a))[:,None]
fact = np.array((1. / b, 1. / a))
def integrals(x, y):
zi = get_breaks(x % 1., y % 1.)
sins = sin(ba*zi)
return fact * (altsigns[:zi.size-1]*(sins[:,1:] - sins[:,:-1])).sum(1)
def equations2(p, z):
x, y = p
I1, I2 = integrals(x, y)
fact = 1. / pi
eq1 = fact * I1 + z
eq2 = fact * I2 - z
return eq1, eq2
def norm2(p, z):
eq1, eq2 = equations2(p, z)
return eq1**2 + eq2**2
def eval_integrals(Nx=100, Ny=101):
x = np.arange(Nx) / float(Nx)
y = np.arange(Ny) / float(Ny)
I = np.zeros((Nx, Ny, 2))
for i in xrange(Nx):
xi = x[i]
Ii = I[i]
for j in xrange(Ny):
Ii[j] = integrals(xi, y[j])
return x, y, I
def solve(z, start=(0.25, 0.25)):
N = len(z)
res = np.zeros((N, 2))
for i in xrange(N):
if i < 100:
prev = start
prev = fmin(norm2, prev, args=(z[i],), xtol=1e-8, ftol=1e-8)
if norm2(prev, z[i]) < 1e-7:
res[i] = prev
return res
#x, y, I = eval_integrals(Nx=1000, Ny=1001)
#zlvl = np.arange(0.2, 1.2, 0.2)
#contour(x, y, -I[:,:,0].T/pi, zlvl)
#contour(x, y, I[:,:,1].T/pi, zlvl)
N = 1000
z = np.linspace(0., 1., N)
res = np.zeros((N, 2, 2))
res[:,0,:] = solve(z, (0.25, 0.25))
res[:,1,:] = solve(z, (0.05, 0.95))

