How do I get the maximum signed short integer in Python (i.e. SHRT_MAX in C's limits.h)?
I want to normalize samples from a single channel of a *.wav file, so instead of a bunch of 16-bit signed integers, I want a bunch of floats between 1 and -1. Here's what I've got (the pertinent code is in the normalized_samples() function):
def samples(clip, chan_no = 0):
# *.wav files generally come in 8-bit unsigned ints or 16-bit signed ints
# python's wave module gives sample width in bytes, so STRUCT_FMT
# basically converts the wave.samplewidth into a struct fmt string
STRUCT_FMT = { 1 : 'B',
2 : 'h' }
for i in range(clip.getnframes()):
yield struct.unpack(STRUCT_FMT[clip.getsampwidth()] * clip.getnchannels(),
clip.readframes(1))[chan_no]
def normalized_samples(clip, chan_no = 0):
for sample in samples(clip, chan_no):
yield float(sample) / float(32767) ### THIS IS WHERE I NEED HELP
GregS is right, this is not the right way to solve the problem. If your samples are known 8 or 16 bit, you don't want to be dividing them by a number that varies by platform.
You may be running into trouble because a signed 16-bit int actually ranges from -32768 to 32767. Dividing by 32767 is going to give you < -1 in the extreme negative case.
Try this:
yield float(sample + 2**15) / 2**15 - 1.0
Here is a way using cython
getlimit.py
import pyximport; pyximport.install()
import limits
print limits.shrt_max
limits.pyx
import cython
cdef extern from "limits.h":
cdef int SHRT_MAX
shrt_max = SHRT_MAX
in module sys, sys.maxint. Though I'm not sure that is the correct way to solve your problem.
I can't imagine circumstances on a modern computer (i.e. one that uses 2's complement integers) where this would fail:
assert -32768 <= signed_16_bit_integer <= 32767
To do exactly what you asked for:
if signed_16_bit_integer >= 0:
afloat = signed_16_bit_integer / 32767.0
else:
afloat = signed_16_bit_integer / -32768.0
Having read your code a bit more closely: you have sample_width_in_bytes so just divide by 255 or 256 if it's B and by 32768 if it's h
#!/usr/bin/env python2
# maximums.py
####################################333#########################
B16_MAX = (1 << 16) - 1
B15_MAX = (1 << 15) - 1
B08_MAX = (1 << 8) - 1
B07_MAX = (1 << 7) - 1
print
print "hex(B16_MAX) =",hex(B16_MAX) # 0xffff
print "hex(B15_MAX) =",hex(B15_MAX) # 0x7fff
print "hex(B08_MAX) =",hex(B08_MAX) # 0xff
print "hex(B07_MAX) =",hex(B07_MAX) # 0x7f
print
####################################333#########################
UBYTE2_MAX = B16_MAX
SBYTE2_MAX = B15_MAX
UBYTE1_MAX = B08_MAX
SBYTE1_MAX = B07_MAX
print
print "UBYTE2_MAX =",UBYTE2_MAX # 65535
print "SBYTE2_MAX =",SBYTE2_MAX # 32767
print "UBYTE1_MAX =",UBYTE1_MAX # 255
print "SBYTE1_MAX =",SBYTE1_MAX # 127
print
####################################333#########################
USHRT_MAX = UBYTE2_MAX
SHRT_MAX = SBYTE2_MAX
CHAR_MAX = UBYTE1_MAX
BYTE_MAX = SBYTE1_MAX
print
print "USHRT_MAX =",USHRT_MAX # 65535
print " SHRT_MAX =", SHRT_MAX # 32767
print " CHAR_MAX =", CHAR_MAX # 255
print " BYTE_MAX =", BYTE_MAX # 127
print
####################################333#########################
Related
I am receiving a longitude and accuracy as a 4 byte hexadecimal string: 99054840
I'm trying to extract a longitude from this value.
The specs tell me the following:
Bits [28:0]: signed value λ, little-endian format, longitude in ° = λ ÷ 1,000,000
Bits [31:29]: unsigned value α, range 0-7, a measure for accuracy
My device is located physically at a longitude of 4.7199. So I now what the result of the conversion should be.
To read the value of the longitude I currently do (with incorrect result):
def get_longitude(reading):
# split in different bytes
n=2
all_bytes = [reading[i:i+n] for i in range(0, len(reading), n)]
# convert to binary
long_bytes_binary = list(map(hex_to_binary, all_bytes))
# drop the accuracy bits
long_bytes_binary[3] = long_bytes_binary[3][0:5]
# little endian & concatenate bytes
longitude_binary = ''.join(list(reversed(long_bytes_binary)))
# get longitude
lon = binary_to_decimal(int(longitude_binary))/1_000_000
Which comes to 138.93. So totally different from the 4.7199 (expected outcome)
Here are the helper methods
def hex_to_binary(payload):
scale = 16
num_of_bits = 8
binary_payload = bin(int(payload, scale))[2:].zfill(num_of_bits)
return binary_payload
def binary_to_decimal(binary):
binary1 = binary
decimal, i, n = 0, 0, 0
while(binary != 0):
dec = binary % 10
decimal = decimal + dec * pow(2, i)
binary = binary//10
i += 1
return decimal
What am I doing wrong? How can I correctly read the value?
Or is my device broken :)
I'm cheating a little bit here by using struct to do the endian swap, but you get the idea.
import struct
val = 0x99054840
val = struct.unpack('<I',struct.pack('>I',val))[0]
print(hex(val))
accuracy = (val >> 29) & 7
longitude = (val & 0x1ffffff) / 1000000
print(accuracy,longitude)
Output:
C:\tmp>x.py
0x40480599
2 4.720025
C:\tmp> ```
The OP code dropped the last 3 bits instead of the first 3 bits for accuracy. This change fixes it:
# drop the accuracy bits
long_bytes_binary[3] = long_bytes_binary[3][3:]
But the calculation can be much more simple:
def hex_to_longitude(x):
b = bytes.fromhex(x) # convert hex string to bytes
i = int.from_bytes(b,'little') # treat bytes as little-endian integer
return (i & 0x1FFFFFFF) / 1e6 # 29-bitwise AND mask divided by one million
x = '99054840'
print(hex_to_longitude(x))
4.720025
I want to retrieve base64 encoded objectSid from an LDAP query to an Active Directory database and convert them to the standard SID representation. Can you please give me a Python snippet that does that?
This should do the trick:
import struct
def convert(binary):
version = struct.unpack('B', binary[0])[0]
# I do not know how to treat version != 1 (it does not exist yet)
assert version == 1, version
length = struct.unpack('B', binary[1])[0]
authority = struct.unpack('>Q', '\x00\x00' + binary[2:8])[0]
string = 'S-%d-%d' % (version, authority)
binary = binary[8:]
assert len(binary) == 4 * length
for i in xrange(length):
value = struct.unpack('<L', binary[4*i:4*(i+1)])[0]
string += '-%d' % value
return string
References: http://blogs.msdn.com/b/oldnewthing/archive/2004/03/15/89753.aspx and http://codeimpossible.com/2008/04/07/Converting-a-Security-Identifier-from-binary-to-string/.
This is #Giovanni Mascellani answer, adapted for Python 3.x:
import struct
def convert(binary):
version = struct.unpack('B', binary[0:1])[0]
# I do not know how to treat version != 1 (it does not exist yet)
assert version == 1, version
length = struct.unpack('B', binary[1:2])[0]
authority = struct.unpack(b'>Q', b'\x00\x00' + binary[2:8])[0]
string = 'S-%d-%d' % (version, authority)
binary = binary[8:]
assert len(binary) == 4 * length
for i in range(length):
value = struct.unpack('<L', binary[4*i:4*(i+1)])[0]
string += '-%d' % value
return string
use or see implementation in ldap3
ldap-doc
source
ldap3.protocol.formatters.formatters.format_sid
A SID in format S-1-5-21-2562418665-3218585558-1813906818-1576 has the following hex raw format: 010500000000000515000000e967bb98d6b7d7bf82051e6c28060000 and can be break down as follows:
S : it means simply this is an objectSid
01 : (1) is the revision ans is always 1 as far as I know
05 : count of sub authorities or you can simply say dash count minus two
000000000005 : (5) big endian
15000000 : (21) small endian
e967bb98 : (2562418665) small endian
d6b7d7bf : (3218585558) small endian
82051e6c : (1813906818) small endian
28060000 : (1576) small endian
Small endian should be read in reverse order. That is how the binary data is represented, where is the least significant byte is stored first.
Accordingly Big endian is the order where the most significant byte is stored first. Here is a good article explaining the byte order.
This is a brief introduction about the structure of the objecSid, and this is a nice blog post.
According to these information let's try to read a SID, which is returned as a binary from the LDAP query. The binascii library can be used for Python 2 as well as for Python 3:
from binascii import b2a_hex
def sid_to_str(sid):
try:
# Python 3
if str is not bytes:
# revision
revision = int(sid[0])
# count of sub authorities
sub_authorities = int(sid[1])
# big endian
identifier_authority = int.from_bytes(sid[2:8], byteorder='big')
# If true then it is represented in hex
if identifier_authority >= 2 ** 32:
identifier_authority = hex(identifier_authority)
# loop over the count of small endians
sub_authority = '-' + '-'.join([str(int.from_bytes(sid[8 + (i * 4): 12 + (i * 4)], byteorder='little')) for i in range(sub_authorities)])
# Python 2
else:
revision = int(b2a_hex(sid[0]))
sub_authorities = int(b2a_hex(sid[1]))
identifier_authority = int(b2a_hex(sid[2:8]), 16)
if identifier_authority >= 2 ** 32:
identifier_authority = hex(identifier_authority)
sub_authority = '-' + '-'.join([str(int(b2a_hex(sid[11 + (i * 4): 7 + (i * 4): -1]), 16)) for i in range(sub_authorities)])
objectSid = 'S-' + str(revision) + '-' + str(identifier_authority) + sub_authority
return objectSid
except Exception:
pass
return sid
sid = b'\x01\x05\x00\x00\x00\x00\x00\x05\x15\x00\x00\x00\xe9\x67\xbb\x98\xd6\xb7\xd7\xbf\x82\x05\x1e\x6c\x28\x06\x00\x00'
print(sid_to_str(sid)) # S-1-5-21-2562418665-3218585558-1813906818-1576
If you're using Linux and have Samba installed:
from samba.dcerpc import security
from samba.ndr import ndr_unpack
def convert(binary_sid):
return str(ndr_unpack(security.dom_sid, binary_sid))
Where binary_sid is the binary representation of the sid.
I am trying to convert this C function into Python;
typedef unsigned long var;
/* Bit rotate rightwards */
var ror(var v,unsigned int bits) {
return (v>>bits)|(v<<(8*sizeof(var)-bits));
}
I have tried Googling for some solutions, but I can't seem to get any of them to give the same results as the one here.
This is one solution I have found from another program;
def mask1(n):
"""Return a bitmask of length n (suitable for masking against an
int to coerce the size to a given length)
"""
if n >= 0:
return 2**n - 1
else:
return 0
def ror(n, rotations=1, width=8):
"""Return a given number of bitwise right rotations of an integer n,
for a given bit field width.
"""
rotations %= width
if rotations < 1:
return n
n &= mask1(width)
return (n >> rotations) | ((n << (8 * width - rotations)))
I am trying to btishift key = 0xf0f0f0f0f123456. The C code gives 000000000f0f0f12 when it is called with; ror(key, 8 << 1) and Python gives; 0x0f0f0f0f0f123456 (the original input!)
Your C output doesn't match the function that you provided. That is presumably because you are not printing it correctly. This program:
#include <stdio.h>
#include <stdint.h>
uint64_t ror(uint64_t v, unsigned int bits)
{
return (v>>bits) | (v<<(8*sizeof(uint64_t)-bits));
}
int main(void)
{
printf("%llx\n", ror(0x0123456789abcdef, 4));
printf("%llx\n", ror(0x0123456789abcdef, 8));
printf("%llx\n", ror(0x0123456789abcdef, 12));
printf("%llx\n", ror(0x0123456789abcdef, 16));
return 0;
}
produces the following output:
f0123456789abcde
ef0123456789abcd
def0123456789abc
cdef0123456789ab
To produce an ror function in Python I refer you to this excellent article: http://www.falatic.com/index.php/108/python-and-bitwise-rotation
This Python 2 code produces the same output as the C program above:
ror = lambda val, r_bits, max_bits: \
((val & (2**max_bits-1)) >> r_bits%max_bits) | \
(val << (max_bits-(r_bits%max_bits)) & (2**max_bits-1))
print "%x" % ror(0x0123456789abcdef, 4, 64)
print "%x" % ror(0x0123456789abcdef, 8, 64)
print "%x" % ror(0x0123456789abcdef, 12, 64)
print "%x" % ror(0x0123456789abcdef, 16, 64)
The shortest way I've found in Python:
(note this works only with integers as inputs)
def ror(n,rotations,width):
return (2**width-1)&(n>>rotations|n<<(width-rotations))
There are different problems in your question.
C part :
You use a value of key that is a 64 bits value (0x0f0f0f0f0f123456), but the output shows that for you compiler unsigned long is only 32 bits wide. So what C code does is rotating the 32 bits value 0x0f123456 16 times giving 0x34560f12
If you had used unsigned long long (assuming it is 64 bits on your architecture as it is on mine), you would have got 0x34560f0f0f0f0f12 (rotation 16 times of a 64 bits)
Python part :
The definition of width between mask1 and ror is not consistent. mask1 takes a width in bits, where ror takes a width in bytes and one byte = 8 bits.
The ror function should be :
def ror(n, rotations=1, width=8):
"""Return a given number of bitwise right rotations of an integer n,
for a given bit field width.
"""
rotations %= width * 8 # width bytes give 8*bytes bits
if rotations < 1:
return n
mask = mask1(8 * width) # store the mask
n &= mask
return (n >> rotations) | ((n << (8 * width - rotations)) & mask) # apply the mask to result
That way with key = 0x0f0f0f0f0f123456, you get :
>>> hex(ror(key, 16))
'0x34560f0f0f0f0f12L'
>>> hex(ror(key, 16, 4))
'0x34560f12L'
exactly the same as C output
i know its nearly 6 years old
I always find it easier to use string slices than bitwise operations.
def rotate_left(x, n):
return int(f"{x:032b}"[n:] + f"{x:032b}"[:n], 2)
def rotate_right(x, n):
return int(f"{x:032b}"[-n:] + f"{x:032b}"[:-n], 2)
def rotation_value(value, rotations, widht=32):
""" Return a given number of bitwise left or right rotations of an interger
value,
for a given bit field widht.
if rotations == -rotations:
left
else:
right
"""
if int(rotations) != abs(int(rotations)):
rotations = widht + int(rotations)
return (int(value)<<(widht-(rotations%widht)) | (int(value)>>(rotations%widht))) & ((1<<widht)-1)
I have a string of booleans and I want to create a binary file using these booleans as bits. This is what I am doing:
# first append the string with 0s to make its length a multiple of 8
while len(boolString) % 8 != 0:
boolString += '0'
# write the string to the file byte by byte
i = 0
while i < len(boolString) / 8:
byte = int(boolString[i*8 : (i+1)*8], 2)
outputFile.write('%c' % byte)
i += 1
But this generates the output 1 byte at a time and is slow. What would be a more efficient way to do it?
It should be quicker if you calculate all your bytes first and then write them all together. For example
b = bytearray([int(boolString[x:x+8], 2) for x in range(0, len(boolString), 8)])
outputFile.write(b)
I'm also using a bytearray which is a natural container to use, and can also be written directly to your file.
You can of course use libraries if that's appropriate such as bitarray and bitstring. Using the latter you could just say
bitstring.Bits(bin=boolString).tofile(outputFile)
Here's another answer, this time using an industrial-strength utility function from the PyCrypto - The Python Cryptography Toolkit where, in version 2.6 (the current latest stable release), it's defined inpycrypto-2.6/lib/Crypto/Util/number.py.
The comments preceeding it say:
Improved conversion functions contributed by Barry Warsaw, after careful benchmarking
import struct
def long_to_bytes(n, blocksize=0):
"""long_to_bytes(n:long, blocksize:int) : string
Convert a long integer to a byte string.
If optional blocksize is given and greater than zero, pad the front of the
byte string with binary zeros so that the length is a multiple of
blocksize.
"""
# after much testing, this algorithm was deemed to be the fastest
s = b('')
n = long(n)
pack = struct.pack
while n > 0:
s = pack('>I', n & 0xffffffffL) + s
n = n >> 32
# strip off leading zeros
for i in range(len(s)):
if s[i] != b('\000')[0]:
break
else:
# only happens when n == 0
s = b('\000')
i = 0
s = s[i:]
# add back some pad bytes. this could be done more efficiently w.r.t. the
# de-padding being done above, but sigh...
if blocksize > 0 and len(s) % blocksize:
s = (blocksize - len(s) % blocksize) * b('\000') + s
return s
You can convert a boolean string to a long using data = long(boolString,2). Then to write this long to disk you can use:
while data > 0:
data, byte = divmod(data, 0xff)
file.write('%c' % byte)
However, there is no need to make a boolean string. It is much easier to use a long. The long type can contain an infinite number of bits. Using bit manipulation you can set or clear the bits as needed. You can then write the long to disk as a whole in a single write operation.
You can try this code using the array class:
import array
buffer = array.array('B')
i = 0
while i < len(boolString) / 8:
byte = int(boolString[i*8 : (i+1)*8], 2)
buffer.append(byte)
i += 1
f = file(filename, 'wb')
buffer.tofile(f)
f.close()
A helper class (shown below) makes it easy:
class BitWriter:
def __init__(self, f):
self.acc = 0
self.bcount = 0
self.out = f
def __del__(self):
self.flush()
def writebit(self, bit):
if self.bcount == 8 :
self.flush()
if bit > 0:
self.acc |= (1 << (7-self.bcount))
self.bcount += 1
def writebits(self, bits, n):
while n > 0:
self.writebit( bits & (1 << (n-1)) )
n -= 1
def flush(self):
self.out.write(chr(self.acc))
self.acc = 0
self.bcount = 0
with open('outputFile', 'wb') as f:
bw = BitWriter(f)
bw.writebits(int(boolString,2), len(boolString))
bw.flush()
Use the struct package.
This can be used in handling binary data stored in files or from network connections, among other sources.
Edit:
An example using ? as the format character for a bool.
import struct
p = struct.pack('????', True, False, True, False)
assert p == '\x01\x00\x01\x00'
with open("out", "wb") as o:
o.write(p)
Let's take a look at the file:
$ ls -l out
-rw-r--r-- 1 lutz lutz 4 Okt 1 13:26 out
$ od out
0000000 000001 000001
000000
Read it in again:
with open("out", "rb") as i:
q = struct.unpack('????', i.read())
assert q == (True, False, True, False)
I'm trying to interop with C in Python, and I have the following function prototype:
int testout(unsigned char *test, int *size)
The size of the unsigned char is determined by the size parameter. So I need a way to resize the buffer in Python dynamically to adjust to the size returned from the function call. This has been very difficult for me to figure out.
From the Python ctypes manual:
"This is nice and fine, but how would one access the additional elements contained in this array? Since the type still only knows about 4 elements, we get errors accessing other elements:
>>> short_array[:]
[0, 0, 0, 0]
>>> short_array[7]
Traceback (most recent call last):
...
IndexError: invalid index
>>>
Another way to use variable-sized data types with ctypes is to use the dynamic nature of Python, and (re-)define the data type after the required size is already known, on a case by case basis."
Great! I'm getting IndexError:invalid index, just as the example shows. And they decided not to show how to do this properly! :)
Does anyone know how to resize a ctype properly?
Here is my example code that works, except for the resizing:
from ctypes import *
lib = "test.so"
dll = cdll.LoadLibrary(lib)
print "Testing pointer output"
dll.testout.argtypes = [POINTER(c_ubyte), POINTER(c_int)]
sizeout = c_int(0)
mem = (c_ubyte * 20)()
dll.testout(mem, byref(sizeout))
print "Sizeout = " + str(sizeout.value)
for i in range(0,sizeout.value):
print "Item " + str(i) + " = " + str(mem[i])
Here's an example of what we were discussing. This code expects the caller to allocate the buffer.
Test code (Windows)
// IN: test=pre-allocated buffer.
// IN: size=size of pre-allocated buffer.
// RETURNS: 0 and size set to required size.
// 1 and size set to size used.
__declspec(dllexport) int testout(void *pdata, int *psize)
{
unsigned char * p = (unsigned char *)pdata;
if (*psize < 5)
{
*psize = 5; // set size required
return 0; // fail
}
p[0] = 123;
p[1] = 255;
p[2] = 237;
p[3] = 12;
p[4] = 222;
*psize = 5; // indicate size used
return 1; // pass
}
Python
from ctypes import *
test = CDLL('test')
size = c_int(0)
test.testout.argtypes=[c_void_p,POINTER(c_int)]
print "Result of NULL pointer and zero size:",test.testout(None,byref(size))
print "Returned size:",size.value
mem = (c_ubyte * 2)()
size = c_int(sizeof(mem))
print "sizeof(mem):",size.value
print "Result of small buffer:",test.testout(byref(mem),byref(size))
print "Returned size:",size.value
mem = (c_ubyte * size.value)()
print "Result of exact size buffer:",test.testout(byref(mem),byref(size))
print "Returned size:",size.value
for i in range(size.value):
print mem[i]
mem = (c_ubyte * 20)()
size = c_int(20)
print "Result of bigger buffer:",test.testout(byref(mem),byref(size))
print "Returned size:",size.value
for i in range(size.value):
print mem[i]
Output
Result of NULL pointer and zero size: 0
Returned size: 5
sizeof(mem): 2
Result of small buffer: 0
Returned size: 5
Result of exact size buffer: 1
Returned size: 5
123
255
237
12
222
Result of bigger buffer: 1
Returned size: 5
123
255
237
12
222
If you receive a pointer from C, you can index the corresponding ctypes pointer just like a C pointer.
If you need to allocate the array in Python, you can create the ctypes array type of appropriate size dynamically.