Python complex numbers from C++ strange output - python

I'm working on translating some code from C++ to Python and some values in python output are wrong. The expected output is pairs of numbers stored in the array. In the python, I get a lot of 1-0j pairs first and then good ones. In C++ the greatest value is around 1.3 and in Python over 9. How do I have to modify my python code to get the output from C++.
main func C++, I know that S do nothing but I'll use it later:
int X = 1000;
int N = X;
complex<double> S;
for (int n=0; n<X; n++)
{
S = complex<double>(0,0);
for (int x=0; x<X; x++)
{
double r = cos(((2*M_PI)/X)*n*x);
double i = sin(((2*M_PI)/X)*n*x);
complex<double> t (r, -i);
cout << t << endl;
}
}
Python:
import numpy as np
from math import pi
import sys
np.set_printoptions(threshold=sys.maxsize)
X = 1000
N = X
S = np.zeros(0, dtype = complex)
T = np.zeros(0, dtype = complex)
n = 0
x = 0
for n in range(0, 1000, 1):
# S = np.append(S, np.complex(0 ,0)
for x in range(0, 1000, 1):
r = np.cos(((2*pi)/X)*n*x)
i = np.sin(((2*pi)/X)*n*x)
T = np.append(T, np.complex(r, -i))
print(T)
print('\n')

It's not clear why you think your Python code is equivalent to your C++ code. T = np.append(T, np.complex(r, -i)) is not equivalent to complex<double> t (r, -i);. The actual equivalent Python code to your C++, which produces the same output, is:
from numpy import cos, sin, pi # could also import from math instead of numpy, might affect speed though
X = 1000
for n in range(X):
for x in range(X):
r = cos(((2*pi)/X)*n*x)
i = sin(((2*pi)/X)*n*x)
t = complex(r, -i)
print(t)
The way I tested this was by setting X to 5 in both sets of code and comparing the output. They were the same (with Python just showing more digits of precision).

This is my conversion of the C++ code. I maintained the array creation, but pre-allocated them rather than using np.append (unnecessarily slow when you already have the array length defined). Also added formatting to the "print" statement. Note that printing a million values takes a terribly long time, so I suggest commenting out the print line, or reducing the value of X to 50 or less.
import numpy as np
import sys
np.set_printoptions(threshold=sys.maxsize)
X = 1000
N = X
S = np.zeros((N*X,), dtype=complex)
T = np.zeros((N*X,), dtype=complex)
k = 0
for n in range(0, N):
# Not sure what you were trying to do with "S"
for x in range(0, X):
r = np.cos((2*np.pi/X)*n*x)
i = np.sin((2*np.pi/X)*n*x)
T[k] = np.complex(r, -i)
print('{val:14.3f}'.format(val=T[k]))
k += 1

Related

Cython promlem: cimport libcpp.vector not compiled

I'm trying to use cython to speed up my code. Since I'm working with an array of strings, I want to use string and vector from c++. But I have problems compiling if I import c libraries. For an example, I tried to implement an example from here: https://cython.readthedocs.io/en/latest/src/tutorial/cython_tutorial.html.
So, my code is
from libcpp.vector cimport vector
def primes(unsigned int nb_primes):
cdef int n, i
cdef vector[int] p
p.reserve(nb_primes) # allocate memory for 'nb_primes' elements.
n = 2
while p.size() < nb_primes: # size() for vectors is similar to len()
for i in p:
if n % i == 0:
break
else:
p.push_back(n) # push_back is similar to append()
n += 1
# Vectors are automatically converted to Python
# lists when converted to Python objects.
return p
I save thiscode like 'test_char.pyx'. For compilation i use it:
from Cython.Build import cythonize
setup(name='test_char',
ext_modules = cythonize('test_char.pyx')
)
After that i get test_char.c, but i don't get test_char.py.
If i will use this code (without cimport):
def primes(int nb_primes):
cdef int n, i, len_p
cdef int p[1000]
if nb_primes > 1000:
nb_primes = 1000
len_p = 0 # The current number of elements in p.
n = 2
while len_p < nb_primes:
# Is n prime?
for i in p[:len_p]:
if n % i == 0:
break
# If no break occurred in the loop, we have a prime.
else:
p[len_p] = n
len_p += 1
n += 1
# Let's return the result in a python list:
result_as_list = [prime for prime in p[:len_p]]
return result_as_list
all be right. So, plz, any ideas?
from distutils.extension import Extension
extensions = [
Extension("test_char", ["test_char.pyx"]
, language="c++"
)
]
setup(
name="test_char",
ext_modules = cythonize(extensions),
)
it can solve this problem

Getting an Array/Vector from PARI/GP in Python using Ctypes

I have written a code to compare the solution of sympy and PARI/GP, how ever I am facing a problem to get an array/vector from PARI/GP.
When I try to return the vector res from PARI/GP function nfroots, I get a address like this (see the last line) -
[3, 4]
elements as long (only if of type t_INT):
3
4
<__main__.LP_LP_c_long object at 0x00000000056166C8>
how can I get the res as vector/array from nfroots so I can use that array like normal python vector/array?
The code is given below to download the libpari.dll file, click here-
from ctypes import *
from sympy.solvers import solve
from sympy import Symbol
pari = cdll.LoadLibrary("libpari.dll")
pari.stoi.restype = POINTER(c_long)
pari.cgetg.restype = POINTER(POINTER(c_long))
pari.gtopoly.restype = POINTER(c_long)
pari.nfroots.restype = POINTER(POINTER(c_long))
(t_VEC, t_COL, t_MAT) = (17, 18, 19) # incomplete
pari.pari_init(2 ** 19, 0)
def t_vec(numbers):
l = len(numbers) + 1
p1 = pari.cgetg(c_long(l), c_long(t_VEC))
for i in range(1, l):
#Changed c_long to c_float, but got no output
p1[i] = pari.stoi(c_long(numbers[i - 1]))
return p1
def Quartic_Comparison():
x = Symbol('x')
#a=0;A=0;B=1;C=-7;D=13/12 #PROBLEM 1
a=0;A=0;B=1;C=-7;D=12
#a=0;A=0;B=-1;C=-2;D=1
solution=solve(a*x**4+A*x**3+B*x**2+ C*x + D, x)
print(solution)
V=(A,B,C,D)
P = pari.gtopoly(t_vec(V), c_long(-1))
res = pari.nfroots(None, P)
print("elements as long (only if of type t_INT): ")
for i in range(1, pari.glength(res) + 1):
print(pari.itos(res[i]))
return res #PROBLEM 2
f=Quartic_Comparison()
print(f)
res is an element from the PARI/C world. It is a PARI vector of PARI integers (t_VEC of t_INTs). Python does not know it.
If it is to be processed further on the Python side, it must be converted. This is generally necessary if data needs to be exchanged between Python and the PARI/C world.
So if you have a t_VEC with t_INTs on the PARI/C side, as in this case, you most likely want to convert it to a Python list.
One possible approach might look like this:
...
roots = pari.nfroots(None, P)
result = []
for i in range(1, pari.glength(roots) + 1):
result.append(pari.itos(roots[i]))
return result

accelerated FFT to be invoked from Python Numba CUDA kernel

I need to calculate the Fourier transform of a 256 element float64 signal. The requirement is as such that I need to invoke these FFTs from inside a cuda.jitted section and it must be completed within 25usec. Alas cuda.jit-compiled functions do not allow to invoke external libraries => I wrote my own. Alas my single-core code is still way too slow (~250usec on a Quadro P4000). Is there a better way?
I created a single core FFT-function that gives correct results, but is alas 10x too slow. I don't understand how to make good use of multiple cores.
---fft.py
from numba import cuda, boolean, void, int32, float32, float64, complex128
import math, sys, cmath
def _transform_radix2(vector, inverse, out):
n = len(vector)
levels = int32(math.log(float32(n))/math.log(float32(2)))
assert 2**levels==n # error: Length is not a power of 2
#uncomment either Numba.Cuda or Numpy memory allocation, (intelligent conditional compileation??)
exptable = cuda.local.array(1024, dtype=complex128)
#exptable = np.zeros(1024, np.complex128)
assert (n // 2) <= len(exptable) # error: FFT length > MAXFFTSIZE
coef = complex128((2j if inverse else -2j) * math.pi / n)
for i in range(n // 2):
exptable[i] = cmath.exp(i * coef)
for i in range(n):
x = i
y = 0
for j in range(levels):
y = (y << 1) | (x & 1)
x >>= 1
out[i] = vector[y]
size = 2
while size <= n:
halfsize = size // 2
tablestep = n // size
for i in range(0, n, size):
k = 0
for j in range(i, i + halfsize):
temp = out[j + halfsize] * exptable[k]
out[j + halfsize] = out[j] - temp
out[j] += temp
k += tablestep
size *= 2
scale=float64(n if inverse else 1)
for i in range(n):
out[i]=out[i]/scale # the inverse requires a scaling
# now create the Numba.cuda version to be called by a GPU
gtransform_radix2 = cuda.jit(device=True)(_transform_radix2)
---test.py
from numba import cuda, void, float64, complex128, boolean
import cupy as cp
import numpy as np
import timeit
import fft
#cuda.jit(void(float64[:],boolean, complex128[:]))
def fftbench(y, inverse, FT):
Y = cuda.local.array(256, dtype=complex128)
for i in range(len(y)):
Y[i]=complex128(y[i])
fft.gtransform_radix2(Y, False, FT)
str='\nbest [%2d/%2d] iterations, min:[%9.3f], max:[%9.3f], mean:[%9.3f], std:[%9.3f] usec'
a=[127.734375 ,130.87890625 ,132.1953125 ,129.62109375 ,118.6015625
,110.2890625 ,106.55078125 ,104.8203125 ,106.1875 ,109.328125
,113.5 ,118.6640625 ,125.71875 ,127.625 ,120.890625
,114.04296875 ,112.0078125 ,112.71484375 ,110.18359375 ,104.8828125
,104.47265625 ,106.65625 ,109.53515625 ,110.73828125 ,111.2421875
,112.28125 ,112.38671875 ,112.7734375 ,112.7421875 ,113.1328125
,113.24609375 ,113.15625 ,113.66015625 ,114.19921875 ,114.5
,114.5546875 ,115.09765625 ,115.2890625 ,115.7265625 ,115.41796875
,115.73828125 ,116. ,116.55078125 ,116.5625 ,116.33984375
,116.63671875 ,117.015625 ,117.25 ,117.41015625 ,117.6640625
,117.859375 ,117.91015625 ,118.38671875 ,118.51171875 ,118.69921875
,118.80859375 ,118.67578125 ,118.78125 ,118.49609375 ,119.0078125
,119.09375 ,119.15234375 ,119.33984375 ,119.31640625 ,119.6640625
,119.890625 ,119.80078125 ,119.69140625 ,119.65625 ,119.83984375
,119.9609375 ,120.15625 ,120.2734375 ,120.47265625 ,120.671875
,120.796875 ,120.4609375 ,121.1171875 ,121.35546875 ,120.94921875
,120.984375 ,121.35546875 ,120.87109375 ,120.8359375 ,121.2265625
,121.2109375 ,120.859375 ,121.17578125 ,121.60546875 ,121.84375
,121.5859375 ,121.6796875 ,121.671875 ,121.78125 ,121.796875
,121.8828125 ,121.9921875 ,121.8984375 ,122.1640625 ,121.9375
,122. ,122.3515625 ,122.359375 ,122.1875 ,122.01171875
,121.91015625 ,122.11328125 ,122.1171875 ,122.6484375 ,122.81640625
,122.33984375 ,122.265625 ,122.78125 ,122.44921875 ,122.34765625
,122.59765625 ,122.63671875 ,122.6796875 ,122.6171875 ,122.34375
,122.359375 ,122.7109375 ,122.83984375 ,122.546875 ,122.25390625
,122.06640625 ,122.578125 ,122.7109375 ,122.83203125 ,122.5390625
,122.2421875 ,122.06640625 ,122.265625 ,122.13671875 ,121.8046875
,121.87890625 ,121.88671875 ,122.2265625 ,121.63671875 ,121.14453125
,120.84375 ,120.390625 ,119.875 ,119.34765625 ,119.0390625
,118.4609375 ,117.828125 ,117.1953125 ,116.9921875 ,116.046875
,115.16015625 ,114.359375 ,113.1875 ,110.390625 ,108.41796875
,111.90234375 ,117.296875 ,127.0234375 ,147.58984375 ,158.625
,129.8515625 ,120.96484375 ,124.90234375 ,130.17578125 ,136.47265625
,143.9296875 ,150.24609375 ,141. ,117.71484375 ,109.80859375
,115.24609375 ,118.44140625 ,120.640625 ,120.9921875 ,111.828125
,101.6953125 ,111.21484375 ,114.91015625 ,115.2265625 ,118.21875
,125.3359375 ,139.44140625 ,139.76953125 ,135.84765625 ,137.3671875
,141.67578125 ,139.53125 ,136.44921875 ,135.08203125 ,135.7890625
,137.58203125 ,138.7265625 ,154.33203125 ,172.01171875 ,152.24609375
,129.8046875 ,125.59375 ,125.234375 ,127.32421875 ,132.8984375
,147.98828125 ,152.328125 ,153.7734375 ,155.09765625 ,156.66796875
,159.0546875 ,151.83203125 ,138.91796875 ,138.0546875 ,140.671875
,143.48046875 ,143.99609375 ,146.875 ,146.7578125 ,141.15234375
,141.5 ,140.76953125 ,140.8828125 ,145.5625 ,150.78125
,148.89453125 ,150.02734375 ,150.70703125 ,152.24609375 ,148.47265625
,131.95703125 ,125.40625 ,123.265625 ,123.57421875 ,129.859375
,135.6484375 ,144.51171875 ,155.05078125 ,158.4453125 ,140.8125
,100.08984375 ,104.29296875 ,128.55078125 ,139.9921875 ,143.38671875
,143.69921875 ,137.734375 ,124.48046875 ,116.73828125 ,114.84765625
,113.85546875 ,117.45703125 ,122.859375 ,125.8515625 ,133.22265625
,139.484375 ,135.75 ,122.69921875 ,115.7734375 ,116.9375
,127.57421875]
y1 =cp.zeros(len(a), cp.complex128)
FT1=cp.zeros(len(a), cp.complex128)
for i in range(len(a)):
y1[i]=a[i] #convert to complex to feed the FFT
r=1000
series=sorted(timeit.repeat("fftbench(y1, False, FT1)", number=1, repeat=r, globals=globals()))
series=series[0:r-5]
print(str % (len(series), r, 1e6*np.min(series), 1e6*np.max(series), 1e6*np.mean(series), 1e6*np.std(series)));
a faster implementation t<<25usec
The drawback of your algorithm is that even on GPU it runs on a single-core.
In order to understand how to design algorithms on Nvidia GPGPU I recommend to look at :
the CUDA C Programming guide and to the numba documentation to apply the code in python.
Moreover to understand what's wrong with your code, I recommend to use Nvidia profiler.
The following parts of the answer will explained how to apply the basics on your example.
Run multiples threads
To improve performances, you will first need to launch multiples threads that can run in parallel, CUDA handle threads as follow:
Threads are grouped into blocs of n threads (n < 1024)
Each thread withing the same bloc can be synchronized and have access to a (fast) common memory space called "shared memory".
You can run multiples blocs in parallel in a "grid" but you will lose the synchronization mechanism.
The syntax to run multiples threads is the following:
fftbench[griddim, blockdim](y1, False, FT1)
to simplify, I will use only one bloc of size 256:
fftbench[1, 256](y1, False, FT1)
Memory
To improve GPU performances it's important to look where the data will be stored, their is three main spaces:
global memory: it's the "RAM" of your GPU, it's slow and have a high latency, this is where all your array are placed when you send them to the GPU.
shared memory: it's a little fast access memory, all the thread of a bloc have access to the same shared memory.
local memory: physically it's the same that global memory, but each thread access its own local memory.
Typically, if you use multiples times the sames data, you should try store them in shared memory to prevent latency from the global memory.
In your code, you can store exptable in shared memory:
exptable = cuda.shared.array(1024, dtype=complex128)
and if n is not too big, you may want to use a working instead of using out:
working = cuda.shared.array(256, dtype=complex128)
Assign tasks to each thread
Of course if you don't change your function, all thread will do the same job and it will just slow down your program.
In this example we will assign each thread to one cell of the array. To do so, we have to get the unique id of thread withing a bloc:
idx = cuda.threadIdx.x
Now we will be able to speed up the for loops, lets handle them one by one:
exptable = cuda.shared.array(1024, dtype=complex128)
...
for i in range(n // 2):
exptable[i] = cmath.exp(i * coef)
Here is the goal: we will want the n/2 first threads to fill this array, then all the thread will be able to use it.
So in this case just replace the for loop by a condition on the thread idx's:
if idx < n // 2:
exptable[idx] = cmath.exp(idx * coef)
For the two last loops it's easier, each thread will deal with one cell of the array:
for i in range(n):
x = i
y = 0
for j in range(levels):
y = (y << 1) | (x & 1)
x >>= 1
out[i] = vector[y]
become
x = idx
y = 0
for j in range(levels):
y = (y << 1) | (x & 1)
x >>= 1
working[idx] = vector[y]
and
for i in range(n):
out[i]=out[i]/scale # the inverse requires a scaling
become
out[idx]=working[idx]/scale # the inverse requires a scaling
I use the shared array working but you can replace it by out if you want to use global memory.
Now, lets look at the while loop, we said that we want each thread to only deal with one cell of the array. So we can try to parallelize the two for loops inside.
...
for i in range(0, n, size):
k = 0
for j in range(i, i + halfsize):
temp = out[j + halfsize] * exptable[k]
out[j + halfsize] = out[j] - temp
out[j] += temp
k += tablestep
...
To simplify I will only use half of the threads, we will take the 128 first threads and determine j as follow:
...
if idx < 128:
j = (idx%halfsize) + size*(idx//halfsize)
...
k is:
k = tablestep*(idx%halfsize)
so we got the loop:
size = 2
while size <= n:
halfsize = size // 2
tablestep = n // size
if idx < 128:
j = (idx%halfsize) + size*(idx//halfsize)
k = tablestep*(idx%halfsize)
temp = working[j + halfsize] * exptable[k]
working[j + halfsize] = working[j] - temp
working[j] += temp
size *= 2
Synchronization
Last but not least, we need to synchronize all theses threads. In fact the program will not work if we do not synch. On the GPU thread may not run at the same time so you can get issues when data are produced by one thread and used by another one, for example:
exptable[0] is used by thread_2 before thread_0 fill store its value
working[j + halfsize] is moddified by another thread before you store it in temp
to prevent this we can use the function:
cuda.syncthreads()
All the threads in the same bloc will finish this line before execution the rest of the code.
In this example, you need to synchronize at two point, after the working initialization and after each iteration of the while loop.
then your code look like:
def _transform_radix2(vector, inverse, out):
n = len(vector)
levels = int32(math.log(float32(n))/math.log(float32(2)))
assert 2**levels==n # error: Length is not a power of 2
exptable = cuda.shared.array(1024, dtype=complex128)
working = cuda.shared.array(256, dtype=complex128)
assert (n // 2) <= len(exptable) # error: FFT length > MAXFFTSIZE
coef = complex128((2j if inverse else -2j) * math.pi / n)
if idx < n // 2:
exptable[idx] = cmath.exp(idx * coef)
x = idx
y = 0
for j in range(levels):
y = (y << 1) | (x & 1)
x >>= 1
working[idx] = vector[y]
cuda.syncthreads()
size = 2
while size <= n:
halfsize = size // 2
tablestep = n // size
if idx < 128:
j = (idx%halfsize) + size*(idx//halfsize)
k = tablestep*(idx%halfsize)
temp = working[j + halfsize] * exptable[k]
working[j + halfsize] = working[j] - temp
working[j] += temp
size *= 2
cuda.syncthreads()
scale=float64(n if inverse else 1)
out[idx]=working[idx]/scale # the inverse requires a scaling
I feel like your question is a good way to introduce some basics about GPGPU computing and I try to answer it in a didactic way. The final code is far from perfect and can be optimized a lot, I highly recommend you to read this Programming guide if you want to learn more about GPU optimizations.

Python: how to get a 4-byte sized integer from a 4-byte byte array?

Here is a simple Python (version 3.4) code I've written to get a 32bit sized integer (int type I would assume) from an array of 4 bytes:
import binascii
import socket
import struct
import array
import pickle
import ctypes
import numpy
import sys
float_val = 1.0 + 0.005
print(float_val)
packed = struct.pack('f', float_val)
print(len(packed))
tempint2 = struct.unpack(">I", packed)[0]
tempint3 = struct.unpack_from(">I", packed)[0]
tempint4 = int.from_bytes(packed, byteorder='big', signed=False)
print(sys.getsizeof(tempint2))
print(tempint2)
print(sys.getsizeof(tempint3))
print(tempint3)
print(sys.getsizeof(tempint4))
print(tempint4)
However, none of the attempts (tempint2/tempint3/tempint4) gives the value I expected (4-byte size integer). Somehow, the size is all 18 bytes (sys.getsizeof() function result). Can you tell me how to get the expected answer (4-byte or 32bit size integer)?
First of all, due to Python's... ahem... "magic", sys.getsizeof() won't return the length of a list, but the sizeof the whole datastructure as represented internally by the Python interpreter.
Now, the answer (for integers) is simply... (for all combinations of Python 2.x/Python 3.x and 32-bit/64-bit):
from math import ceil, floor, log
def minimumAmountOfBytesToHoldTheStuff(x):
# Avoid math domain errors
if x < 0:
x = ~x
# Avoid more math domain erros
if x == 0:
x = 1
return int(ceil((floor(log(x, 2)) + 1 ) / 8))
def powersOfTwo():
x = 1
while True:
yield x
x *= 2
def minimumAmountOfBytesToHoldTheStuffOnRealMachines(x):
bytes = minimumAmountOfBytesToHoldTheStuff(x)
for power in powersOfTwo():
if bytes <= power:
return power
print(minimumAmountOfBytesToHoldTheStuffOnRealMachines(tempint))
Note: It appears that log(x, 2) breaks for x >= pow(2, 48) - 1, and so does the whole algorithm. This is probably an issue from the C library/the stupid floating-point accurracy errors, because log(n, x) in Python is translated into log(n) / log(x) in C.
Edit: This one is an optimized version for Python 3.x that is independent of bot floating-point and logarithmic operations, and thus is accurate on all situations...
from math import ceil
def minimumAmountOfBytesToHoldTheStuff(x):
# Avoid math domain errors
if x < 0:
x = ~x
# Avoid more math domain erros
if x == 0:
x = 1
return int(ceil(x.bit_length() / 8))
The other functions are the same.
I hope this has led some light on you!

(Pseudo) Random number generation in Python without using modules and clock

I'm using Python for a competition in which I am creating a bot to play a game. The problem is, it does not have anything of c support installed, so I do not have access to the random, numpy, and scipy modules.
I will have roughly 400mb ram available, and I am looking for a way to produce uniform random numbers between 0 and 1 for simulation purposes during the game.
Note that I have used the clock time before to generate a single number, but the issue is that I will need loads of numbers without the clock changing much, which would result in constantly the same number. In fact, I am limited to a maximum of 1 second for, say, 100k numbers.
I'm considering loading in data, but the problem would then be that the bot would always use the same numbers. Then again, the circumstances for which I need to use the numbers vary slightly.
Using Python 2.7, hoping people have some suggestions.
FWIW, the random module contains the class Wichman-Hill generator written in pure python (no C required):
>>> import random
>>> rng = random.WichmannHill(8675309)
>>> rng.random()
0.06246664612856567
>>> rng.random()
0.3049888099198217
Here's the cleaned-up source code:
class WichmannHill(Random):
def seed(self, a=None):
a, x = divmod(a, 30268)
a, y = divmod(a, 30306)
a, z = divmod(a, 30322)
self._seed = int(x)+1, int(y)+1, int(z)+1
def random(self):
"""Get the next random number in the range [0.0, 1.0)."""
x, y, z = self._seed
x = (171 * x) % 30269
y = (172 * y) % 30307
z = (170 * z) % 30323
self._seed = x, y, z
return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0
You can use a Mersenne Twister implementation. I found this one, which is modeled after the pseudocode on Wikipedia.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Based on the pseudocode in https://en.wikipedia.org/wiki/Mersenne_Twister. Generates uniformly distributed 32-bit integers in the range [0, 232 − 1] with the MT19937 algorithm
Yaşar Arabacı <yasar11732 et gmail nokta com>
"""
# Create a length 624 list to store the state of the generator
MT = [0 for i in xrange(624)]
index = 0
# To get last 32 bits
bitmask_1 = (2 ** 32) - 1
# To get 32. bit
bitmask_2 = 2 ** 31
# To get last 31 bits
bitmask_3 = (2 ** 31) - 1
def initialize_generator(seed):
"Initialize the generator from a seed"
global MT
global bitmask_1
MT[0] = seed
for i in xrange(1,624):
MT[i] = ((1812433253 * MT[i-1]) ^ ((MT[i-1] >> 30) + i)) & bitmask_1
def extract_number():
"""
Extract a tempered pseudorandom number based on the index-th value,
calling generate_numbers() every 624 numbers
"""
global index
global MT
if index == 0:
generate_numbers()
y = MT[index]
y ^= y >> 11
y ^= (y << 7) & 2636928640
y ^= (y << 15) & 4022730752
y ^= y >> 18
index = (index + 1) % 624
return y
def generate_numbers():
"Generate an array of 624 untempered numbers"
global MT
for i in xrange(624):
y = (MT[i] & bitmask_2) + (MT[(i + 1 ) % 624] & bitmask_3)
MT[i] = MT[(i + 397) % 624] ^ (y >> 1)
if y % 2 != 0:
MT[i] ^= 2567483615
if __name__ == "__main__":
from datetime import datetime
now = datetime.now()
initialize_generator(now.microsecond)
for i in xrange(100):
"Print 100 random numbers as an example"
print extract_number()
If the script is ran on linux, try using /dev/urandom:
with open('/dev/urandom', 'rb') as f:
random_int = reduce(lambda acc, x: (acc << 8) | x, map(ord, f.read(4)), 0)
f.read(4) reads 4 bytes of entrophy
map(ord, f.read(4)) - converts byte-strings into numbers
reduce(lambda ..., map(...), 0) - converts the list of numbers into an integer
Maths is your best answer: http://en.m.wikipedia.org/wiki/Linear_congruential_generator
X(n+1) = (aX(n)+c) mod m
x2 = (a*x1+c)%m

Categories

Resources