Python: convert numpy array of signs to int and back - python

I'm trying to convert from a numpy array of signs (i.e., a numpy array whose entries are either 1. or -1.) to an integer and back through a binary representation. I have something that works, but it's not Pythonic, and I expect it'll be slow.
def sign2int(s):
s[s==-1.] = 0.
bstr = ''
for i in range(len(s)):
bstr = bstr + str(int(s[i]))
return int(bstr, 2)
def int2sign(i, m):
bstr = bin(i)[2:].zfill(m)
s = []
for d in bstr:
s.append(float(d))
s = np.array(s)
s[s==0.] = -1.
return s
Then
>>> m = 4
>>> s0 = np.array([1., -1., 1., 1.])
>>> i = sign2int(s0)
>>> print i
11
>>> s = int2sign(i, m)
>>> print s
[ 1. -1. 1. 1.]
I'm concerned about (1) the for loops in each and (2) having to build an intermediate representation as a string.
Ultimately, I will want something that works with a 2-d numpy array, too---e.g.,
>>> s = np.array([[1., -1., 1.], [1., 1., 1.]])
>>> print sign2int(s)
[5, 7]

For 1d arrays you can use this one linear Numpythonic approach, using np.packbits:
>>> np.packbits(np.pad((s0+1).astype(bool).astype(int), (8-s0.size, 0), 'constant'))
array([11], dtype=uint8)
And for reversing:
>>> unpack = (np.unpackbits(np.array([11], dtype=np.uint8))[-4:]).astype(float)
>>> unpack[unpack==0] = -1
>>> unpack
array([ 1., -1., 1., 1.])
And for 2d array:
>>> x, y = s.shape
>>> np.packbits(np.pad((s+1).astype(bool).astype(int), (8-y, 0), 'constant')[-2:])
array([5, 7], dtype=uint8)
And for reversing:
>>> unpack = (np.unpackbits(np.array([5, 7], dtype='uint8'))).astype(float).reshape(x, 8)[:,-y:]
>>> unpack[unpack==0] = -1
>>> unpack
array([[ 1., -1., 1.],
[ 1., 1., 1.]])

I'll start with sig2int.. Convert from a sign representation to binary
>>> a
array([ 1., -1., 1., -1.])
>>> (a + 1) / 2
array([ 1., 0., 1., 0.])
>>>
Then you can simply create an array of powers of two, multiply it by the binary and sum.
>>> powers = np.arange(a.shape[-1])[::-1]
>>> np.power(2, powers)
array([8, 4, 2, 1])
>>> a = (a + 1) / 2
>>> powers = np.power(2, powers)
>>> a * powers
array([ 8., 0., 2., 0.])
>>> np.sum(a * powers)
10.0
>>>
Then make it operate on rows by adding axis information and rely on broadcasting.
def sign2int(a):
# powers of two
powers = np.arange(a.shape[-1])[::-1]
np.power(2, powers, powers)
# sign to "binary" - add one and divide by two
np.add(a, 1, a)
np.divide(a, 2, a)
# scale by powers of two and sum
np.multiply(a, powers, a)
return np.sum(a, axis = -1)
>>> b = np.array([a, a, a, a, a])
>>> sign2int(b)
array([ 11., 11., 11., 11., 11.])
>>>
I tried it on a 4 by 100 bit array and it seemed fast
>>> a = a.repeat(100)
>>> b = np.array([a, a, a, a, a])
>>> b
array([[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.]])
>>> sign2int(b)
array([ 2.58224988e+120, 2.58224988e+120, 2.58224988e+120,
2.58224988e+120, 2.58224988e+120])
>>>
I'll add the reverse if i can figure it. - the best I could do relies on some plain Python without any numpy vectoriztion magic and I haven't figured how to make it work with a sequence of ints other than to iterate over them and convert them one at a time - but the time still seems acceptable.
def foo(n):
'''yields bits in increasing powers of two
bit sequence from lsb --> msb
'''
while n > 0:
n, r = divmod(n, 2)
yield r
def int2sign(n):
n = int(n)
a = np.fromiter(foo(n), dtype = np.int8, count = n.bit_length())
np.multiply(a, 2, a)
np.subtract(a, 1, a)
return a[::-1]
Works on 1324:
>>> bin(1324)
'0b10100101100'
>>> a = int2sign(1324)
>>> a
array([ 1, -1, 1, -1, -1, 1, -1, 1, 1, -1, -1], dtype=int8)
Seems to work with 1.2e305:
>>> n = int(1.2e305)
>>> n.bit_length()
1014
>>> a = int2sign(n)
>>> a.shape
(1014,)
>>> s = bin(n)
>>> s = s[2:]
>>> all(2 * int(x) -1 == y for x, y in zip(s, a))
True
>>>

Here are some vectorized versions of your functions:
def sign2int(s):
return int(''.join(np.where(s == -1., 0, s).astype(int).astype(str)), 2)
def int2sign(i, m):
tmp = np.array(list(bin(i)[2:].zfill(m)))
return np.where(tmp == "0", "-1", tmp).astype(int)
s0 = np.array([1., -1., 1., 1.])
sign2int(s0)
# 11
int2sign(11, 5)
# array([-1, 1, -1, 1, 1])
To use your functions on 2-d arrays, you can use map function:
s = np.array([[1., -1., 1.], [1., 1., 1.]])
map(sign2int, s)
# [5, 7]
map(lambda x: int2sign(x, 4), [5, 7])
# [array([-1, 1, -1, 1]), array([-1, 1, 1, 1])]

After a bit of testing, the Numpythonic approach of #wwii that doesn't use strings seems to fit what I need best. For the int2sign, I used a for-loop over the exponents with a standard algorithm for the conversion---which will have at most 64 iterations for 64-bit integers. Numpy's broadcasting happens across each integer very efficiently.
packbits and unpackbits are restricted to 8-bit integers; otherwise, I suspect that would've been the best (though I didn't try).
Here are the specific implementations I tested that follow the suggestions in the other answers (thanks to everyone!):
def _sign2int_str(s):
return int(''.join(np.where(s == -1., 0, s).astype(int).astype(str)), 2)
def sign2int_str(s):
return np.array(map(_sign2int_str, s))
def _int2sign_str(i, m):
tmp = np.array(list(bin(i)[2:])).astype(int)
return np.pad(np.where(tmp == 0, -1, tmp), (m - len(tmp), 0), "constant", constant_values = -1)
def int2sign_str(i,m):
return np.array(map(lambda x: _int2sign_str(x, m), i.astype(int).tolist())).transpose()
def sign2int_np(s):
p = np.arange(s.shape[-1])[::-1]
s = s + 1
return np.sum(np.power(s, p), axis = -1).astype(int)
def int2sign_np(i,m):
N = i.shape[-1]
S = np.zeros((m, N))
for k in range(m):
b = np.power(2, m - 1 - k).astype(int)
S[k,:] = np.divide(i.astype(int), b).astype(float)
i = np.mod(i, b)
S[S==0.] = -1.
return S
And here is my test:
X = np.sign(np.random.normal(size=(5000, 20)))
N = 100
t = time.time()
for i in range(N):
S = sign2int_np(X)
print 'sign2int_np: \t{:10.8f} sec'.format((time.time() - t)/N)
t = time.time()
for i in range(N):
S = sign2int_str(X)
print 'sign2int_str: \t{:10.8f} sec'.format((time.time() - t)/N)
m = 20
S = np.random.randint(0, high=np.power(2,m), size=(5000,))
t = time.time()
for i in range(N):
X = int2sign_np(S, m)
print 'int2sign_np: \t{:10.8f} sec'.format((time.time() - t)/N)
t = time.time()
for i in range(N):
X = int2sign_str(S, m)
print 'int2sign_str: \t{:10.8f} sec'.format((time.time() - t)/N)
This produced the following results:
sign2int_np: 0.00165325 sec
sign2int_str: 0.04121902 sec
int2sign_np: 0.00318024 sec
int2sign_str: 0.24846984 sec

I think numpy.packbits is worth another look. Given a real-valued sign array a, you can use numpy.packbits(a > 0). Decompression is done by numpy.unpackbits. This implicitly flattens multi-dimensional arrays so you'll need to reshape after unpackbits if you have a multi-dimensional array.
Note that you can combine bit packing with conventional compression (e.g., zlib or lzma). If there is a pattern or bias to your data, you may get a useful compression factor, but for unbiased random data, you'll typically see a moderate size increase.

Related

Finding determinant with torch.det doesn't return 0?

I'm trying to find the determinant of a matrix using torch.det. However, it seems like I'm either not doing it right or the function is not working properly (the results should be 0 rather than a small number).
a = torch.tensor([1.0, 1.0])
b = torch.tensor([3.0, 3.0])
c = torch.stack([a,b], dim = 1)
print(c)
torch.det(d)
>>>tensor([[1., 3.],
[1., 3.]])
tensor(1.2517e-06)
Another example:
a = torch.tensor([2, -1, 1]).float()
b = torch.tensor([3, -4, -2]).float()
c = torch.tensor([5, -10, -8]).float()
d = torch.stack([a,b,c], dim = 1)
print(d)
print(torch.det(d))
>>>
tensor([[ 2., 3., 5.],
[ -1., -4., -10.],
[ 1., -2., -8.]])
tensor(1.2517e-06)
Update 1:
I think I had a typo in the first example (I restarted everything and reran it):
import torch
a = torch.tensor([1.0, 1.0])
b = torch.tensor([3.0, 3.0])
c = torch.stack([a,b], dim = 1)
print(c)
torch.det(c)
>>> tensor([[1., 3.],
[1., 3.]])
tensor(0.)
Though, I believe the second example should also be 0

Assigning values to overwrite python array

I am trying to assign new values to an array based on whether or not the stored value is <3. Coming from an R background this is how I would do it, but this gives me a syntax error in Python. What am I doing wrong, and what is the Python approach?
eurx=[1,2,3,4,5,6,7,'a',8]
sma50=3
tw=eurx
tw[eurx<sma50]=-1
tw[eurx>=sma50]=1
tw[(tw!=1)||(tw!=-1)]=0
print(tw)
GOAL:
-1
-1
1
1
1
1
1
0
1
This is "too much R". A pythonic way would be to use functional filtering:
>>> map(lambda i: -2*int(i<sma50)+1 if type(i) == int else 0, eurx)
[-1, -1, 1, 1, 1, 1, 1, 0, 1]
Or just a simple for-loop with a few ifs:
>>> for i in eurx:
... if type(i) != int:
... print 0
... else:
... print -2*int(i<sma50)+1
...
-1
-1
1
1
1
1
1
0
1
In general: don't try to guess the syntax. It's very simple, just read through some tutorials (e.g. https://docs.python.org/3/tutorial/introduction.html#first-steps-towards-programming)
Edit: the int conversion hack works as follows: you know you can convert bool to int, right?
>>> int(True)
1
>>> int(False)
0
If i<sma50 evaluates to True, int(i<sma50) will be 1. So yor numbers now are converted to ones if i is smaller than sma50 and to zeros otherwise. But apparently you want the values (-1, 1) instead of (1, 0). Just apply the transform -2x+1 and you're done!
Your desired syntax is pretty close to what you'd write in numpy.
The heterogeneous list doesn't make it easy, but here's an example:
>>> import numpy as np
>>> eurx=[1,2,3,4,5,6,7,'a',8]
>>> sma50 = 3
>>> tw = np.array([i if isinstance(i, int) else np.nan for i in eurx])
>>> tw
array([ 1., 2., 3., 4., 5., 6., 7., nan, 8.])
>>> tw[tw < sma50] = -1
__main__:1: RuntimeWarning: invalid value encountered in less
>>> tw[tw >= sma50] = 1
__main__:1: RuntimeWarning: invalid value encountered in greater_equal
>>> tw
array([ -1., -1., 1., 1., 1., 1., 1., nan, 1.])
>>> tw[np.isnan(tw)] = 0
>>> tw
array([-1., -1., 1., 1., 1., 1., 1., 0., 1.])

Efficient way to compare the values of 3 lists in Python?

I have 3 lists with similar float values in a1, a2, a3 (whose lengths are equal).
for i in length(a1,a2,a3):
Find the increasing / decreasing order of a1[i], a2[i], a3[i]
Rank the values based on the order
Is there a simple/efficient way to do this? Rather than writing blocks of if-else statements?
I am trying to calculate the Friedman test ranks in Python. Though there is a scipy.stats.friedmanchisquare function, it doesn't return the ranks The Friedman test
EDIT
I have data like this in the Image 1.
a1 has week 1
a2 has week 2 and
a3 has week 3
I want to rank the values like in this Image 2
I tried comparing the values by using if else loops like this
for i in range(0,10):
if(acc1[i]>acc2[i]):
if(acc1[i]>acc3[i]):
rank1[i] = 1
if(acc2[i]>acc3[i]):
rank2[i] = 2
rank3[i] = 3
friedmanchisquare uses scipy.stats.rankdata. Here's one way you could use rankdata with your three lists. It creates a list called ranks, where ranks[i] is an array containing the ranking of [a1[i], a2[i], a3[i]].
In [41]: a1
Out[41]: [1.0, 2.4, 5.0, 6]
In [42]: a2
Out[42]: [9.0, 5.0, 4, 5.0]
In [43]: a3
Out[43]: [5.0, 6.0, 7.0, 2.0]
In [44]: from scipy.stats import rankdata
In [45]: ranks = [rankdata(row) for row in zip(a1, a2, a3)]
In [46]: ranks
Out[46]:
[array([ 1., 3., 2.]),
array([ 1., 2., 3.]),
array([ 2., 1., 3.]),
array([ 3., 2., 1.])]
If you convert that to a single numpy array, you can then easily work with either the rows or columns of ranks:
In [47]: ranks = np.array(ranks)
In [48]: ranks
Out[48]:
array([[ 1., 3., 2.],
[ 1., 2., 3.],
[ 2., 1., 3.],
[ 3., 2., 1.]])
In [49]: ranks.sum(axis=0)
Out[49]: array([ 7., 8., 9.])
You could define a simple function that returns the order of the sorts:
def sort3(a,b,c):
if (a >= b):
if (b >= c):
return (1, 2, 3)
elif (a >= c):
return (1, 3, 2)
else:
return (3, 1, 2)
elif (b >= c):
if (c >= a):
return (2, 3, 1)
else:
return (2, 1, 3)
else:
return (3, 2, 1)
Or consider using this https://stackoverflow.com/a/3382369/3224664
def argsort(seq):
# http://stackoverflow.com/questions/3071415/efficient-method-to-calculate-the-rank-vector-of-a-list-in-python
return sorted(range(len(seq)), key=seq.__getitem__)
a = [1,3,5,7]
b = [2,2,2,6]
c = [3,1,4,8]
for i in range(len(a)):
print(argsort([a[i],b[i],c[i]]))

Error about inequality constraint in cvxopt (using GLPK)

I'm using cvxopt to calculate the Nash equilibrium of a the following two-person zero-sum game.
[-5, 3, 1, 8]
[ 5, 5, 4, 6]
[-4, 6, 0, 5]
Here's the code (with doctest) I'm using.
from cvxopt import matrix, solvers
from cvxopt.modeling import op, dot, variable
import numpy as np
def solve_lp(a, b, c):
"""
>>> a = matrix([[-5., 3., 1., 8., 1.],
... [ 5., 5., 4., 6., 1.],
... [-4., 6., 0., 5., 1.],
... [-1.,-1.,-1.,-1., 0.],
... [ 1., 1., 1., 1., 0.],
... [-1., 0., 0., 0., 0.],
... [ 0.,-1., 0., 0., 0.],
... [ 0., 0.,-1., 0., 0.],
... [ 0., 0., 0.,-1., 0.]])
>>> b = matrix([0.,0.,0.,0.,1.])
>>> c = matrix([0.,0.,0., 1.,-1.,0.,0.,0.,0.])
>>> solve_lp(a, b, c)
"""
variables = c.size[0]
x = variable(variables, 'x')
eq = (a*x == b)
ineq = (x >= 0)
lp = op(dot(c, x), [eq, ineq])
lp.solve(solver='glpk')
return (lp.objective.value(), x.value)
Running it generates the following error:
Traceback (most recent call last):
...
TypeError: 'G' must be a dense or sparse 'd' matrix with 9 columns
It seems that cvxopt is throwing an exception regarding the ineq constraint, even though I seem to be following the syntax for constraints from the modeling examples.
What I've tried so far
Changing the code by multiplying x by a vector of 1s:
def solve_lp(a, b, c):
variables = c.size[0]
x = variable(variables, 'x')
e = matrix(1.0, (1, variables))
eq = (a*x == b)
ineq = (e*x >= 0)
lp = op(dot(c, x), [eq, ineq])
lp.solve(solver='glpk')
return (lp.objective.value(), x.value)
at least it gets to GLPK, which in turn produces this error:
Scaling...
A: min|aij| = 1.000e+00 max|aij| = 8.000e+00 ratio = 8.000e+00
Problem data seem to be well scaled
Constructing initial basis...
Size of triangular part = 6
* 0: obj = 0.000000000e+00 infeas = 0.000e+00 (0)
PROBLEM HAS UNBOUNDED SOLUTION
glp_simplex: unable to recover undefined or non-optimal solution
How do I fix this?
I think you should follow the usage of glpk solver in this webpage:
https://github.com/benmoran/L1-Sudoku/blob/master/sudoku.py
Follow this exactly you will fix and use this glpk solver correctly...
def solve_plain_l1(A, b, solver='glpk'):
'''Find x with min l1 such that Ax=b,
using plain L1 minimization'''
n = A.size[1]
c0 = ones_v(2*n)
G1 = concathoriz(A,-A) # concatenate horizontally
G2 = concathoriz(-A,A)
G3 = -eye(2*n)
G = reduce(concatvert, [G1,G2,G3]) # concatenate vertically
hh = reduce(concatvert, [b, -b, zeros_v(2*n)])
u = cvxopt.solvers.lp(c0, G, hh, solver=solver)
v = u['x'][:n]
return v

Flip (reverse) image vertically given its string?

So I have a string of RGBA image data, each pixel is a byte long. I know the image's x and y resolution too. Now I want to edit the string in a way which would cause the image to be flipped or reversed vertically, which means have the first "row" of pixels become the last row and the opposite, and like this for all other "rows". Is there a fast way to do it?
To do what you want to the letter this is one way to proceed:
>>> img = 'ABCDEFGHIJKL'
>>> x, y = 4, 3
>>> def chunks(l, n):
... for i in xrange(0, len(l), n):
... yield l[i:i+n]
...
>>> [row for row in chunks(img, x)]
['ABCD', 'EFGH', 'IJKL']
>>> ''.join(reversed([row for row in chunks(img, x)]))
'IJKLEFGHABCD'
HOWEVER, unless you have very small images, you would be better off passing through numpy, as this is at the very minimum an order of magnitude faster than Cpython datatypes. You should look at at the flipup function. Example:
>>> A
array([[ 1., 0., 0.],
[ 0., 2., 0.],
[ 0., 0., 3.]])
>>> np.flipud(A)
array([[ 0., 0., 3.],
[ 0., 2., 0.],
[ 1., 0., 0.]])
EDIT: thought to add a complete example in case you have never worked with NumPy before. Of course the conversion is worth only for images that are not 2x2, as instantiating the array has an overhead....
>>> import numpy as np
>>> img = [0x00, 0x01, 0x02, 0x03]
>>> img
[0, 1, 2, 3]
>>> x = y = 2
>>> aimg = np.array(img).reshape(x, y)
>>> aimg
array([[0, 1],
[2, 3]])
>>> np.flipud(aimg)
array([[2, 3],
[0, 1]])
say you have the image in array img, then do
img.reverse();
#also need to flip each row
for row in img:
row.reverse();

Categories

Resources