I am implementing parallel tempering Gibbs sampling using Theano. I am trying to create a Theano function that takes a matrix X and swaps some of its rows. I have a symbolic binary vector named swaps that denotes which rows should be swapped (i.e., if swaps[i] == 1, then X[i] and X[i+1] should be swapped). The order of swapping is not important for me.
I was trying to write a theano.scan that goes through the swaps vector and performs swapping of X row-by-row. The problem is that Theano doesn't allow doing something like X[pos], X[pos + 1] = X[pos + 1], X[pos] with symbolic variables. Here is a simple code snippet of what I am trying to do.
import numpy as np
import theano
import theano.tensor as T
def swap(swp, pos, idx):
if swp: idx[pos], idx[pos + 1] = idx[pos + 1], idx[pos]
return idx
max_length = 10
swaps = T.ivector('swaps')
idx = T.ivector('idx')
pos = T.iscalar('pos')
new_idx, updates = theano.scan(swap,
sequences=[swaps, T.arange(max_length)],
outputs_info=idx)
do_swaps = theano.function([swaps, idx], new_idx[-1], updates=updates)
idx_swapped = do_swaps(np.array([1, 1, 0, 1]), np.arange(5))
print idx_swapped
Any ideas on how I can do this the right way?
Okay, here is a really simple solution I found.
import numpy as np
import theano
import theano.tensor as T
def swap(swp, pos, X):
return T.concatenate([X[:pos],X[[pos+swp]],X[[pos+1-swp]],X[pos+2:]])
max_length = 10
swaps = T.ivector('swaps')
pos = T.iscalar('pos')
X = T.vector('X')
new_X, _ = theano.scan(swap,
sequences=[swaps, T.arange(max_length)],
outputs_info=X)
do_swaps = theano.function([swaps, X], new_X[-1])
X_swapped = do_swaps(np.array([1, 1, 0, 1], dtype='int32'), np.arange(5))
print X_swapped
However, I am not sure how it is optimal or not for executing on a GPU.
Related
I am trying to use scipy.optimize.minimize to fit parameters for a multivariate function, however, regardless of how many noise free data points I am providing to the optimizer, the optimizer could not converge to a correct (or close) answer.
I wonder if there is a mistake in the way I am using the optimizer but I have been scratching my head to find the mistake. I would appreciate any advice or guesses, thanks!
import numpy as np
from scipy.optimize import minimize
import math
def get_transform(ai,aj,ak,x,y,z):
i,j,k = 0, 1, 2
si, sj, sk = math.sin(ai), math.sin(aj), math.sin(ak)
ci, cj, ck = math.cos(ai), math.cos(aj), math.cos(ak)
cc, cs = ci*ck, ci*sk
sc, ss = si*ck, si*sk
M = np.identity(4)
M[i, i] = cj*ck
M[i, j] = sj*sc-cs
M[i, k] = sj*cc+ss
M[j, i] = cj*sk
M[j, j] = sj*ss+cc
M[j, k] = sj*cs-sc
M[k, i] = -sj
M[k, j] = cj*si
M[k, k] = cj*ci
M[0, 3] = x
M[1, 3] = y
M[2, 3] = z
return M
def camera_intrinsic(fx, ppx, fy, ppy):
K = np.zeros((3, 3), dtype='float64')
K[0, 0], K[0, 2] = fx, ppx
K[1, 1], K[1, 2] = fy, ppy
K[2, 2] = 1
return K
def apply_transform(p, matrix):
rotation = matrix[0:3,0:3]
T = np.array([matrix[0][3],matrix[1][3],matrix[2][3]])
transformed = (np.dot(rotation, p.T).T)+T
return transformed
def project(points_3D,internal_calibration):
points_3D = points_3D.T
projections_2d = np.zeros((2, points_3D.shape[1]), dtype='float32')
camera_projection = (internal_calibration).dot(points_3D)
projections_2d[0, :] = camera_projection[0, :]/camera_projection[2, :]
projections_2d[1, :] = camera_projection[1, :]/camera_projection[2, :]
return projections_2d.T
def error(x):
global points,pixels
transform = get_transform(x[0],x[1],x[2],x[3],x[4],x[5])
points_transfered = apply_transform(points, transform)
internal_calibration = camera_intrinsic(x[6],x[7],x[8],x[9])
projected = project(points_transfered,internal_calibration)
# print(((projected-pixels)**2).mean())
return ((projected-pixels)**2).mean()
def generate(points, x):
transform = get_transform(x[0],x[1],x[2],x[3],x[4],x[5])
points_transfered = apply_transform(points, transform)
internal_calibration = camera_intrinsic(x[6],x[7],x[8],x[9])
projected = project(points_transfered,internal_calibration)
return projected
points = np.random.rand(100,3)
x_initial = np.random.rand(10)
pixels = generate(points,x_initial)
x_guess = np.random.rand(10)
results = minimize(error,x_guess, method='nelder-mead', tol = 1e-15)
x = results.x
print(x_initial)
print(x)
You are solving least squares problem, but trying to optimize it using a solver that minimizes a scalar function. While it can possibly solve the problem, it does so very inefficiently. It can require much more iterations or can fail to converge at all.
The better way is to use least_squares instead of minimize.
For it to work properly you should modify error function by returning 1D numpy array instead of a scalar:
def error(x):
...
return (projected-pixels).flatten()
Then call least_squares:
results = least_squares(error, x_guess)
x = results.x
print(x_initial)
print(x)
print('error:', np.linalg.norm(error(x)))
Also, error(x) currently returns array of float32, because an array of float32 is created in project. It should be replaced by float64, otherwise minimization fails to converge, because most of gradients become zeros when 32 bit precision is used.
def project(points_3D,internal_calibration):
...
projections_2d = np.zeros((2, points_3D.shape[1]), dtype='float64')
With these modifications the solver converges to the solution most of the times, but can sometimes fail to do so. It happens because you generate the problem randomly, so in some cases the problem may be degenerate or make no physical sense. Such cases should be investigated on their own.
It can also help to use a robust loss, such as 'arctan', instead of linear loss:
results = least_squares(error, x_guess, loss='arctan')
Result:
[0.68589904 0.68782115 0.83299068 0.02360941 0.19367124 0.54715374
0.37609235 0.62190714 0.98824796 0.88385802]
[0.68589904 0.68782115 0.83299068 0.02360941 0.19367124 0.54715374
0.37609235 0.62190714 0.98824796 0.88385802]
error: 1.2269443642313758e-12
I'm trying to make custom gradient descent estimator, however, I am encountering the issue with storing the parameter values at every step of the gradient descent algorithm. Here is the code skeleton:
from numpy import *
import pandas as pd
from joblib import Parallel, delayed
from multiprocessing import cpu_count
ftemp = zeros((2, ))
stemp = empty([1, ], dtype='<U10')
la = 10
vals = pd.DataFrame(index=range(la), columns=['a', 'b', 'string']
def sfun(k1, k2, k3, string):
a = k1*k2
b = k2*k3
s = string
nums = [a, b]
strs = [s]
return(nums, strs)
def store(inp):
r = rsfun(inp[0], inp[1], inp[2], inp[3])
ftemp = append(ftemp, asarray(r[0]), axis = 0)
stemp = append(stemp, asarray(r[1]), axis = 0)
return(ftemp, stemp)
for l in range(la):
inputs = [(2, 3, 4, 'he'),
(4, 6, 2, 'je'),
(2, 7, 5, 'ke')]
Parallel(n_jobs = cpu_count)(delayed(store)(i) for i in inputs)
vals.iloc[l, 0:2] = ftemp[0, 0], ftemp[0, 1]
vals.iloc[l, 2] = stemp[0]
d = ftemp[2, 0]-ftemp[0, 0]
Note: most of the gradient descent stuff is removed because I do not have any issues with that. the main issues that I have are storing the values at each step.
sfun() is the loss function (I know that it doesn't look like that here) and store() is just an attempt to store the parameter values with each step.
The important aspect here is that I want to parallelize the process as sfun() is computationally expensive and the issue with that I want to save values for all parallel runs.
I tried solving this in many different ways, but I always get a different error.
No need to make a temporary storage array, possible to store the results of Parallel() function directly by:
a = Parallel(n_jobs = cpu_count)(delayed(store)(i) for i in inputs)
Most importantly, a is populated in order that the inputs are given.
The task by example:
data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
idx = np.array([2, 0, 1, 1, 2, 0, 1, 1, 2])
Expected result:
binned = np.array([2, 6, 3, 4, 7, 8, 1, 5, 9])
Constraints:
Should be fast.
Should be O(n+k) where n is the length of data and k is the number of bins.
Should be stable, i.e. order within bins is preserved.
Obvious solution
data[np.argsort(idx, kind='stable')]
is O(n log n).
O(n+k) solution
def sort_to_bins(idx, data, mx=-1):
if mx==-1:
mx = idx.max() + 1
cnts = np.zeros(mx + 1, int)
for i in range(idx.size):
cnts[idx[i] + 1] += 1
for i in range(1, cnts.size):
cnts[i] += cnts[i-1]
res = np.empty_like(data)
for i in range(data.size):
res[cnts[idx[i]]] = data[i]
cnts[idx[i]] += 1
return res
is loopy and slow.
Is there a better method in pure numpy < scipy < pandas < numba/pythran?
Here are a few solutions:
Use np.argsort anyway, after all it is fast compiled code.
Use np.bincount to get the bin sizes and np.argpartition which is O(n) for fixed number of bins. Downside: currently, no stable algorithm is available, thus we have to sort each bin.
Use scipy.ndimage.measurements.labeled_comprehension. This does roughly what is required, but no idea how it is implemented.
Use pandas. I'm a complete pandas noob, so what I cobbled together here using groupby may be suboptimal.
Use scipy.sparse switching between compressed sparse row and compressed sparse column formats happens to implement the exact operation we are looking for.
Use pythran (I'm sure numba works as well) on the loopy code in the question. All that is required is to insert at the top after numpy import
.
#pythran export sort_to_bins(int[:], float[:], int)
and then compile
# pythran stb_pthr.py
Benchmarks 100 bins, variable number of items:
Take home:
If you are ok with numba/pythran that is the way to go, if not scipy.sparse scales rather well.
Code:
import numpy as np
from scipy import sparse
from scipy.ndimage.measurements import labeled_comprehension
from stb_pthr import sort_to_bins as sort_to_bins_pythran
import pandas as pd
def sort_to_bins_pandas(idx, data, mx=-1):
df = pd.DataFrame.from_dict(data=data)
out = np.empty_like(data)
j = 0
for grp in df.groupby(idx).groups.values():
out[j:j+len(grp)] = data[np.sort(grp)]
j += len(grp)
return out
def sort_to_bins_ndimage(idx, data, mx=-1):
if mx==-1:
mx = idx.max() + 1
out = np.empty_like(data)
j = 0
def collect(bin):
nonlocal j
out[j:j+len(bin)] = np.sort(bin)
j += len(bin)
return 0
labeled_comprehension(data, idx, np.arange(mx), collect, data.dtype, None)
return out
def sort_to_bins_partition(idx, data, mx=-1):
if mx==-1:
mx = idx.max() + 1
return data[np.argpartition(idx, np.bincount(idx, None, mx)[:-1].cumsum())]
def sort_to_bins_partition_stable(idx, data, mx=-1):
if mx==-1:
mx = idx.max() + 1
split = np.bincount(idx, None, mx)[:-1].cumsum()
srt = np.argpartition(idx, split)
for bin in np.split(srt, split):
bin.sort()
return data[srt]
def sort_to_bins_sparse(idx, data, mx=-1):
if mx==-1:
mx = idx.max() + 1
return sparse.csr_matrix((data, idx, np.arange(len(idx)+1)), (len(idx), mx)).tocsc().data
def sort_to_bins_argsort(idx, data, mx=-1):
return data[idx.argsort(kind='stable')]
from timeit import timeit
exmpls = [np.random.randint(0, K, (N,)) for K, N in np.c_[np.full(16, 100), 1<<np.arange(5, 21)]]
timings = {}
for idx in exmpls:
data = np.arange(len(idx), dtype=float)
ref = None
for x, f in (*globals().items(),):
if x.startswith('sort_to_bins_'):
timings.setdefault(x.replace('sort_to_bins_', '').replace('_', ' '), []).append(timeit('f(idx, data, -1)', globals={'f':f, 'idx':idx, 'data':data}, number=10)*100)
if x=='sort_to_bins_partition':
continue
if ref is None:
ref = f(idx, data, -1)
else:
assert np.all(f(idx, data, -1)==ref)
import pylab
for k, v in timings.items():
pylab.loglog(1<<np.arange(5, 21), v, label=k)
pylab.xlabel('#items')
pylab.ylabel('time [ms]')
pylab.legend()
pylab.show()
I would like to perform a multidimensional ODR with scipy.odr. I read the API documentation, it says that multi-dimensionality is possible, but I cannot make it work. I cannot find working example on the internet and API is really crude and give no hints how to proceed.
Here is my MWE:
import numpy as np
import scipy.odr
def linfit(beta, x):
return beta[0]*x[:,0] + beta[1]*x[:,1] + beta[2]
n = 1000
t = np.linspace(0, 1, n)
x = np.full((n, 2), float('nan'))
x[:,0] = 2.5*np.sin(2*np.pi*6*t)+4
x[:,1] = 0.5*np.sin(2*np.pi*7*t + np.pi/3)+2
e = 0.25*np.random.randn(n)
y = 3*x[:,0] + 4*x[:,1] + 5 + e
print(x.shape)
print(y.shape)
linmod = scipy.odr.Model(linfit)
data = scipy.odr.Data(x, y)
odrfit = scipy.odr.ODR(data, linmod, beta0=[1., 1., 1.])
odrres = odrfit.run()
odrres.pprint()
It raises the following exception:
scipy.odr.odrpack.odr_error: number of observations do not match
Which seems to be related to my matrix shapes, but I do not know how must I shape it properly. Does anyone know?
Firstly, in my experience scipy.odr uses mostly arrays, not matrices. The library seems to make a large amount of size checks along the way and getting it to work with multiple variables seems to be quite troublesome.
This is the workflow how I usually get it to work (and worked at least on python 2.7):
import numpy as np
import scipy.odr
n = 1000
t = np.linspace(0, 1, n)
def linfit(beta, x):
return beta[0]*x[0] + beta[1]*x[1] + beta[2] #notice changed indices for x
x1 = 2.5*np.sin(2*np.pi*6*t)+4
x2 = 0.5*np.sin(2*np.pi*7*t + np.pi/3)+2
x = np.row_stack( (x1, x2) ) #odr doesn't seem to work with column_stack
e = 0.25*np.random.randn(n)
y = 3*x[0] + 4*x[1] + 5 + e #indices changed
linmod = scipy.odr.Model(linfit)
data = scipy.odr.Data(x, y)
odrfit = scipy.odr.ODR(data, linmod, beta0=[1., 1., 1.])
odrres = odrfit.run()
odrres.pprint()
So using identical (1D?) arrays, using row_stack and adressing by single index number seems to work.
Original Question
So I've been reading up alot on how to do a Perspective transform on an image in pure Python but I can't get it to work.
The transform itself is pretty simple, it's just a small equation, but I'm having trouble with the function for creating the coefficients from certain anchor points in the image. I've gotten a working code that completes without errors with the help of the excellent numpy-based answer from another post, but my issue is that I'm trying to do this in pure-Python. The coefficients turn out redicilously low, and nothing pops up on my resulting image.
One issue could be in the porting of the numpy functions like dotmatrix, multiplication, and inverse matrix that I'm trying to do in pure Python functions. (I do get a perspective transformed image when I manually input each of the coefficients so I know the coefficient to rendering part should be working). So any help with figuring out what might be wrong with how I calculate the coefficients is greatly appreciated :)
Update
Alright, quick update, it turns out that the numpy code suddenly works now and produces what looks like a perspectived image (originally from the post I linked to earlier), so that's at least a good sign. Here is the exact numpy code I used, and thus the template of steps to be reproduced in pure Python.
import numpy
matrix = []
print pa,pb
for p1, p2 in zip(pa, pb):
matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])
A = numpy.matrix(matrix, dtype=numpy.float)
B = numpy.array(pb).reshape(8)
print "ab",A,B
res = numpy.dot(numpy.linalg.inv(A.T * A) * A.T, B)
print "res",numpy.array(res).reshape(8)
And here is the pure Python version that currently attempts to reproduce the steps but fails:
def tilt(self, oldplane, newplane):
"""
perspective transform.
oldplane is a list of four old xy coordinate pairs
that will move to the four points in the newplane
"""
#first find the transform coefficients, thanks to https://stackoverflow.com/questions/14177744/how-does-perspective-transformation-work-in-pil
pb,pa = oldplane,newplane
grid = []
for p1,p2 in zip(pa, pb):
grid.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
grid.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])
def transpose(listoflists):
return [list(each) for each in zip(*listoflists)]
def flatten(listoflists):
return [xory for xy in listoflists for xory in xy]
def sumproduct(listA,multis):
"aka, dot multiplication"
outgrid = []
for y,(row,multi) in enumerate(zip(listA,multis)):
rowsum = 0
for x,nr in enumerate(row):
rowsum += nr*multi
outgrid.append(rowsum)
return outgrid
def gridmultiply(grid1,grid2):
"aka, matrix*matrix"
outgrid = []
for y in xrange(len(grid1)):
newrow = []
for x in xrange(len(grid2)):
value = grid1[y][x] * grid2[y][x]
newrow.append(value)
outgrid.append(newrow)
return outgrid
def gridinverse(grid):
outgrid = []
pos_deriv = 1
neg_deriv = 1
for y in xrange(len(grid)):
horizline = []
for x in xrange(len(grid[0])):
invx=len(grid[0])-1-x
invy=len(grid)-1-y
nr = grid[y][x]
pos_deriv += pos_deriv*nr
invnr = grid[invy][invx]*-1
horizline.append(invnr)
neg_deriv += neg_deriv*invnr
outgrid.append(horizline)
derivative = 1/float(pos_deriv-neg_deriv)
print "deriv",derivative
outgrid = gridmultiply(outgrid,[[derivative for _ in xrange(len(outgrid[0]))] for _ in xrange(len(outgrid))])
return outgrid
A = grid
B = flatten(pb)
res = sumproduct(gridinverse(gridmultiply(gridmultiply(transpose(A),A),transpose(A))), B)
transcoeff = res
print transcoeff
#then calculate new coords, thanks to http://math.stackexchange.com/questions/413860/is-perspective-transform-affine-if-it-is-why-its-impossible-to-perspective-a"
k = 1
a,b,c,d,e,f,g,h = transcoeff
outimg = Image().new(self.width,self.height)
for y in xrange(len(self.imagegrid)):
for x in xrange(len(self.imagegrid[0])):
color = self.get(x,y)
newx = int(round((a*x+b*y+c)/float(g*x+h*y+k)))
newy = int(round((d*x+e*y+f)/float(g*x+h*y+k)))
try:
outimg.put(newx,newy,color)
#print x,y,newx,newy
except IndexError:
#out of bounds
pass
return outimg