(Pseudo) Random number generation in Python without using modules and clock - python

I'm using Python for a competition in which I am creating a bot to play a game. The problem is, it does not have anything of c support installed, so I do not have access to the random, numpy, and scipy modules.
I will have roughly 400mb ram available, and I am looking for a way to produce uniform random numbers between 0 and 1 for simulation purposes during the game.
Note that I have used the clock time before to generate a single number, but the issue is that I will need loads of numbers without the clock changing much, which would result in constantly the same number. In fact, I am limited to a maximum of 1 second for, say, 100k numbers.
I'm considering loading in data, but the problem would then be that the bot would always use the same numbers. Then again, the circumstances for which I need to use the numbers vary slightly.
Using Python 2.7, hoping people have some suggestions.

FWIW, the random module contains the class Wichman-Hill generator written in pure python (no C required):
>>> import random
>>> rng = random.WichmannHill(8675309)
>>> rng.random()
0.06246664612856567
>>> rng.random()
0.3049888099198217
Here's the cleaned-up source code:
class WichmannHill(Random):
def seed(self, a=None):
a, x = divmod(a, 30268)
a, y = divmod(a, 30306)
a, z = divmod(a, 30322)
self._seed = int(x)+1, int(y)+1, int(z)+1
def random(self):
"""Get the next random number in the range [0.0, 1.0)."""
x, y, z = self._seed
x = (171 * x) % 30269
y = (172 * y) % 30307
z = (170 * z) % 30323
self._seed = x, y, z
return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0

You can use a Mersenne Twister implementation. I found this one, which is modeled after the pseudocode on Wikipedia.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Based on the pseudocode in https://en.wikipedia.org/wiki/Mersenne_Twister. Generates uniformly distributed 32-bit integers in the range [0, 232 − 1] with the MT19937 algorithm
Yaşar Arabacı <yasar11732 et gmail nokta com>
"""
# Create a length 624 list to store the state of the generator
MT = [0 for i in xrange(624)]
index = 0
# To get last 32 bits
bitmask_1 = (2 ** 32) - 1
# To get 32. bit
bitmask_2 = 2 ** 31
# To get last 31 bits
bitmask_3 = (2 ** 31) - 1
def initialize_generator(seed):
"Initialize the generator from a seed"
global MT
global bitmask_1
MT[0] = seed
for i in xrange(1,624):
MT[i] = ((1812433253 * MT[i-1]) ^ ((MT[i-1] >> 30) + i)) & bitmask_1
def extract_number():
"""
Extract a tempered pseudorandom number based on the index-th value,
calling generate_numbers() every 624 numbers
"""
global index
global MT
if index == 0:
generate_numbers()
y = MT[index]
y ^= y >> 11
y ^= (y << 7) & 2636928640
y ^= (y << 15) & 4022730752
y ^= y >> 18
index = (index + 1) % 624
return y
def generate_numbers():
"Generate an array of 624 untempered numbers"
global MT
for i in xrange(624):
y = (MT[i] & bitmask_2) + (MT[(i + 1 ) % 624] & bitmask_3)
MT[i] = MT[(i + 397) % 624] ^ (y >> 1)
if y % 2 != 0:
MT[i] ^= 2567483615
if __name__ == "__main__":
from datetime import datetime
now = datetime.now()
initialize_generator(now.microsecond)
for i in xrange(100):
"Print 100 random numbers as an example"
print extract_number()

If the script is ran on linux, try using /dev/urandom:
with open('/dev/urandom', 'rb') as f:
random_int = reduce(lambda acc, x: (acc << 8) | x, map(ord, f.read(4)), 0)
f.read(4) reads 4 bytes of entrophy
map(ord, f.read(4)) - converts byte-strings into numbers
reduce(lambda ..., map(...), 0) - converts the list of numbers into an integer

Maths is your best answer: http://en.m.wikipedia.org/wiki/Linear_congruential_generator
X(n+1) = (aX(n)+c) mod m
x2 = (a*x1+c)%m

Related

How to perform a ranking selection in this Genetic Algorithm

I'm building a Genetic Algorithm to maximize this function: x^5 - 10x^3 + 30x - y^2 + 21y.
The code must be in binary and the bounds for x and y are [-2.5, 2.5]. To generate the initial population I made a 16 bit string for both x and y where:
The first bit represents the signal [0 or 1]
The the second and third bit represents the integer part [00, 01 or 10]
The rest represents the float part
This is the function that generates the initial population:
def generate_population(n_pop):
population = list()
for _ in range(n_pop):
aux = list()
for _ in range(2):
signal = bin(randint(0, 1))[2:]
int_part = bin(randint(0, 2))[2:].zfill(2)
float_part = bin(randint(0, 5000))[2:].zfill(13)
aux.append((signal+int_part+float_part))
population.append(aux)
return population
I also made a function that returns the binary number into float:
def convert_float(individual):
float_num = list()
for i in range(2):
signal = int(individual[i][0])
int_part = int(individual[i][1:3], 2)
float_part = int(individual[i][3:], 2) * (10 ** -4)
value = round(int_part + float_part, 4)
if value > 2.5:
value = 2.5
if signal == 1:
value = value * (-1)
float_num.append(value)
return float_num
And lastly this function that calculate the fitness of each individual:
def get_fitness(individual):
x = individual[0]
y = individual[1]
return x ** 5 - 10 * x ** 3 + 30 * x - y ** 2 + 21 * y
This is my main function:
def ga(n_pop=10, n_iter=10):
population = generate_population(n_pop)
best_fitness_id, best_fitness = 0, get_fitness(convert_float(population[0]))
for i in range(n_iter):
float_population = [convert_float(x) for x in population]
fitness_population = [get_fitness(x) for x in float_population]
for j in range(n_pop):
if fitness_population[j] > best_fitness:
best_fitness_id, best_fitness = j, fitness_population[j]
print(f'--> NEW BEST FOUND AT GENERATION {i}:')
print(f'{float_population[j]} = {fitness_population[j]}')
selected_parents = rank_selection()
# childrens = list()
# childrens = childrens + population[best_fitness_id] # ELITE
After running the program I have something like this:
The population looks like: [['0000001100110111', '0000110111110101'], ['0010011111101110', '1000100101001001'], ...
The float population: [[0.0823, 0.3573], [1.203, -0.2377], ...
And the fitness values: [9.839066068044746, 16.15145434928624, ...
I need help to build the rank_selection() function, I've been stuck in this selection for 2 days. I know is something 1/N, 2/N etc and I've seen tons of examples in multiple languages but I could not apply any of them to this particular algorithm and it MUST be rank selecion.
I already know how to perform crossover and mutation.

Turtle Graphics window not responding

I am attempting to translate a Julia set generator that I made previously to Python code. However, when the code is run, the turtle graphics window stops responding immediately and draws nothing. Have I done something horribly wrong or is there something I'm missing? Perhaps I'm asking too much of python to do in 1 frame. Please explain what is causing this to happen and how I can fix it. Thanks!
import turtle
import time
y_set = []
map_output = 0
iterations = 0
#turtle.hideturtle()
#turtle.speed(1)
generate a list of y-values
def y_set (r):
global y_set
y_set = []
for n in range ((360*2)+1):
y_set.append(n)
create a color value
def color (i, n):
output = map(i, 2, 10000, 0, 2500)
if output < 0:
output = 0
if output > 0:
output = 255
iterate on the x's
def repeat (n, r, i):
global iterations
global x
global y
aa = 0
ba = 0
ab = 0
a = 0
b = 0
for j in range (n):
iterations += 1
aa = a * a
bb = b * b
ab = 2 * a * b
a = ((aa - bb) + float(r))
b = (ab + float(i))
if (ab + bb) > 4:
break
turtle.setx(100 * x)
turtle.sety(100 * y)
color(iterations, n)
turtle.pendown()
turtle.penup()
Iterate on the y's
def Julia (s, r, i, d):
global iterations
global y_set
global x
global y
global a
global b
y_set(s)
while len(y_set) > 0:
y = y_set[0]/360
del y_set[0]
x = -1.5
for n in range (round((700/(float(r)+1))+1)):
a = x
b = y
iterations = 0
repeat(10**d, r, i)
x += ((1/240)*s)
user input
real = input('Real: ')
imag = input('Imaginary: ')
Julia (1, real, imag, 100)
turtle.done()
There are too many problems with this code to focus on an algorithm error. When I try to run it, I get, TypeError: 'int' object is not iterable. Specific issues:
The i argument here is being passed a number:
iterations += 1
...
color(iterations, n)
...
def color(i, n):
output = map(i, 2, 10000, 0, 2500)
but Python's map function (and Julia's) expects a function as its first argument:
map(func, *iterables)
and it returns a list of the results of applying func to iterables but you treat the result as a scalar value:
output = map(i, 2, 10000, 0, 2500)
if output < 0:
output = 0
if output > 0:
output = 255
The color() function never uses its second argument, and never returns anything!
The variables a & b here are being treated as globals, set but not used, as if prepared for use by repeat():
global a
global b
...
a = x
b = y
iterations = 0
repeat(10 ** d, r, i)
but the a & b used by repeat() are locals initialized to zero:
a = 0
b = 0
You have a function and global variable with the same name y_set!
And your globals are out of control.

How to pull a random value from two numbers based on a Mersenne Twister returned value

I have implemented a Mersenne Twister in Python using the following example code and while it does work as intended, I am not clear on how to limit results returned to a range of integers. For example, if I wanted to use this mechanism to determine the value of a dice roll, I would (right now, in an incredibly inefficient manner) iterate through potential results from the MT until something falls within the set. There has to be a much more memory-efficient manner to get a value to fall within a set range, like 1-20.
Currently, the algorithm returns a random set of numbers that don't seem to peak anywhere close to the set range:
2840889030
2341262508
2626522481
893458501
1134227444
3424236607
4171927007
1414775506
318984778
811882651
1509520423
1796453323
571461449
2606098999
2100002233
202969379
2318195635
1583585513
863717092
1218132929
1044954980
2997947229
867650808
177016714
2532350044
2917724494
2789913671
2793703767
1477382755
2552234519
2230774266
956596469
1165204853
1261233074
1856099289
21274564
1867584221
200970721
2112891842
139474834
93227265
1919721548
1026587194
30693196
3114464709
2194502660
2235520335
1877205724
1093736467
3136329929
1838505684
1358237877
2394536120
1268347552
1222927042
2982839076
1155599683
1943346953
3778719619
1483759762
3227630028
2775862513
2991889829
4252811853
995611629
626323532
3895812866
4027023347
3778533921
3840271846
4289281429
2263887842
402963991
2957069652
238880521
3643974307
472466724
3309455978
3588191581
1390613042
290666747
1375502175
1172854301
2159248842
3279978887
2206149102
804187781
3811948116
4134597627
1556281173
2590972812
3291094915
1836658937
3721612785
365099684
3884686172
2966532828
3609464378
1672431128
3959413372
For testing purposes I implemented the current test logic (part of __main__) and so far it just runs infinitely.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Based on the pseudocode in https://en.wikipedia.org/wiki/Mersenne_Twister. Generates uniformly distributed 32-bit integers in the range [0, 232 − 1] with the MT19937 algorithm
Yaşar Arabacı <yasar11732 et gmail nokta com>
"""
# Create a length 624 list to store the state of the generator
MT = [0 for i in xrange(624)]
index = 0
# To get last 32 bits
bitmask_1 = (2 ** 32) - 1
# To get 32. bit
bitmask_2 = 2 ** 31
# To get last 31 bits
bitmask_3 = (2 ** 31) - 1
def initialize_generator(seed):
"Initialize the generator from a seed"
global MT
global bitmask_1
MT[0] = seed
for i in xrange(1,624):
MT[i] = ((1812433253 * MT[i-1]) ^ ((MT[i-1] >> 30) + i)) & bitmask_1
def extract_number():
"""
Extract a tempered pseudorandom number based on the index-th value,
calling generate_numbers() every 624 numbers
"""
global index
global MT
if index == 0:
generate_numbers()
y = MT[index]
y ^= y >> 11
y ^= (y << 7) & 2636928640
y ^= (y << 15) & 4022730752
y ^= y >> 18
index = (index + 1) % 624
return y
def generate_numbers():
"Generate an array of 624 untempered numbers"
global MT
for i in xrange(624):
y = (MT[i] & bitmask_2) + (MT[(i + 1 ) % 624] & bitmask_3)
MT[i] = MT[(i + 397) % 624] ^ (y >> 1)
if y % 2 != 0:
MT[i] ^= 2567483615
if __name__ == "__main__":
from datetime import datetime
now = datetime.now()
solved = False
initialize_generator(now.microsecond)
#for i in xrange(10):
# "Print 10 random numbers as an example"
while(solved != True):
generated_number = extract_number()
while(generated_number <= 20 and generated_number >= 1):
print generated_number
solved = True
Any advice on how to implement? It appears that it may not even get a chance to drop down to a number within the predefined set.

Python recursive program

I'm relatively newcomer on programming as I'm educated a mathematician and have no experience on Python. I would like to know how to solve this problem in Python which appeared as I was studying one maths problem on my own:
Program asks a positive integer m. If m is of the form 2^n-1 it returns T(m)=n*2^{n-1}. Otherwise it writes m to the form 2^n+x, where -1 < x < 2^n, and returns T(m)=T(2^n-1)+x+1+T(x). Finally it outputs the answer.
I thought this was a neat problem so I attempted a solution. As far as I can tell, this satisfies the parameters in the original question.
#!/usr/bin/python
import math
def calculate(m: int) -> int:
"""
>>> calculate(10)
20
>>> calculate(100)
329
>>> calculate(1.2)
>>> calculate(-1)
"""
if (m <= 0 or math.modf(m)[0] != 0):
return None
n, x = decompose(m + 1)
if (x == 0):
return n * 2**(n - 1)
else:
return calculate(2**n - 1) + x + 1 + calculate(x)
def decompose(m: int) -> (int, int):
"""
Returns two numbers (n, x), where
m = 2**n + x and -1 < x < 2^n
"""
n = int(math.log(m, 2))
return (n, m - 2**n)
if __name__ == "__main__":
import doctest
doctest.testmod(verbose = True)
Assuming the numbers included in the calculate function's unit tests are the correct results for the problem, this solution should be accurate. Feedback is most welcome, of course.

hash functions family generator in python

I am looking for a hash functions family generator that could generate a family of hash functions given a set of parameters. I haven't found any such generator so far.
Is there a way to do that with the hashlib package ?
For example I'd like to do something like :
h1 = hash_function(1)
h2 = hash_function(2)
...
and h1 and h2 would be different hash functions.
For those of you who might know about it, I am trying to implement a min-hashing algorithm on a very large dataset.
Basically, I have a very large set of features (100 millions to 1 billion) for a given document, and I need to create 1000 to 10000 different random permutations for this set of features.
I do NOT want to build the random permutations explicitly so the technique I would like to use in the following :
generate a hash function h and consider that for two indices r and s
r appears before s in the permutation if h(r) < h(s) and do that for 100 to 1000 different hash functions.
Are there any known libraries that I might have missed ? Or any standard way of generating families of hash functions with python that you might be aware of ?
I'd just do something like (if you don't need thread-safety -- not hard to alter if you DO need thread safety -- and assuming a 32-bit Python version):
import random
_memomask = {}
def hash_function(n):
mask = _memomask.get(n)
if mask is None:
random.seed(n)
mask = _memomask[n] = random.getrandbits(32)
def myhash(x):
return hash(x) ^ mask
return myhash
As mentioned above, you can use universal hashing for minhash.
For example:
import random
def minhash():
d1 = set(random.randint(0, 2000) for _ in range(1000))
d2 = set(random.randint(0, 2000) for _ in range(1000))
jacc_sim = len(d1.intersection(d2)) / len(d1.union(d2))
print("jaccard similarity: {}".format(jacc_sim))
N_HASHES = 200
hash_funcs = []
for i in range(N_HASHES):
hash_funcs.append(universal_hashing())
m1 = [min([h(e) for e in d1]) for h in hash_funcs]
m2 = [min([h(e) for e in d2]) for h in hash_funcs]
minhash_sim = sum(int(m1[i] == m2[i]) for i in range(N_HASHES)) / N_HASHES
print("min-hash similarity: {}".format(minhash_sim))
def universal_hashing():
def rand_prime():
while True:
p = random.randrange(2 ** 32, 2 ** 34, 2)
if all(p % n != 0 for n in range(3, int((p ** 0.5) + 1), 2)):
return p
m = 2 ** 32 - 1
p = rand_prime()
a = random.randint(0, p)
if a % 2 == 0:
a += 1
b = random.randint(0, p)
def h(x):
return ((a * x + b) % p) % m
return h
Reference
#alex's answer is great and concise, but the hash functions it generates are not "very different from each other".
Let's look at the Pearson correlation between 10000 samples of 10000 hashes that put the results in 100 bins
%%time # 1min 14s
n=10000
hashes = [hash_function(i) for i in range(n)]
median_pvalue(hashes, n=n)
# 1.1614081043690444e-06
I.e. the median p_value is 1e-06 which is far from random. Here's an example if it were truly random :
%%time # 4min 15s
hashes = [lambda _ : random.randint(0,100) for _ in range(n)]
median_pvalue(hashes, n=n)
# 0.4979718236429698
Using Carter and Wegman method you could get:
%%time # 1min 43s
hashes = HashFamily(100).draw_hashes(n)
median_pvalue(hashes, n=n)
# 0.841929288037321
Code to reproduce :
from scipy.stats.stats import pearsonr
import numpy as np
import random
_memomask = {}
def hash_function(n):
mask = _memomask.get(n)
if mask is None:
random.seed(n)
mask = _memomask[n] = random.getrandbits(32)
def myhash(x):
return hash(x) ^ mask
return myhash
class HashFamily():
r"""Universal hash family as proposed by Carter and Wegman.
.. math::
\begin{array}{ll}
h_{{a,b}}(x)=((ax+b)~{\bmod ~}p)~{\bmod ~}m \ \mid p > m\\
\end{array}
Args:
bins (int): Number of bins to hash to. Better if a prime number.
moduler (int,optional): Temporary hashing. Has to be a prime number.
"""
def __init__(self, bins, moduler=None):
if moduler and moduler <= bins:
raise ValueError("p (moduler) should be >> m (buckets)")
self.bins = bins
self.moduler = moduler if moduler else self._next_prime(np.random.randint(self.bins + 1, 2**32))
# do not allow same a and b, as it could mean shifted hashes
self.sampled_a = set()
self.sampled_b = set()
def _is_prime(self, x):
"""Naive is prime test."""
for i in range(2, int(np.sqrt(x))):
if x % i == 0:
return False
return True
def _next_prime(self, n):
"""Naively gets the next prime larger than n."""
while not self._is_prime(n):
n += 1
return n
def draw_hash(self, a=None, b=None):
"""Draws a single hash function from the family."""
if a is None:
while a is None or a in self.sampled_a:
a = np.random.randint(1, self.moduler - 1)
assert len(self.sampled_a) < self.moduler - 2, "please give a bigger moduler"
self.sampled_a.add(a)
if b is None:
while b is None or b in self.sampled_b:
b = np.random.randint(0, self.moduler - 1)
assert len(self.sampled_b) < self.moduler - 1, "please give a bigger moduler"
self.sampled_b.add(b)
return lambda x: ((a * x + b) % self.moduler) % self.bins
def draw_hashes(self, n, **kwargs):
"""Draws n hash function from the family."""
return [self.draw_hash() for i in range(n)]
def median_pvalue(hashes, buckets=100, n=1000):
p_values = []
for j in range(n-1):
a = [hashes[j](i) % buckets for i in range(n)]
b = [hashes[j+1](i) % buckets for i in range(n)]
p_values.append(pearsonr(a,b)[1])
return np.median(p_values)
Note that my implementation is of Carter and Wegman is very naive (e.g. generation of prime numbers). It could be made shorter and quicker.
You should consider using universal hashing. My answer and code can be found here: https://stackoverflow.com/a/25104050/207661
The universal hash family is a set of hash functions H of size m, such that any two (district) inputs collide with probability at most 1/m when the hash function h is drawn randomly from set H.
Based on the formulation in Wikipedia, use can use the following code:
import random
def is_prime(n):
if n==2 or n==3: return True
if n%2==0 or n<2: return False
for i in range(3, int(n**0.5)+1, 2):
if n%i==0:
return False
return True
# universal hash functions
class UniversalHashFamily:
def __init__(self, number_of_hash_functions, number_of_buckets, min_value_for_prime_number=2, bucket_value_offset=0):
self.number_of_buckets = number_of_buckets
self.bucket_value_offset = bucket_value_offset
primes = []
number_to_check = min_value_for_prime_number
while len(primes) < number_of_hash_functions:
if is_prime(number_to_check):
primes.append(number_to_check)
number_to_check += random.randint(1, 1000)
self.hash_function_attrs = []
for i in range(number_of_hash_functions):
p = primes[i]
a = random.randint(1, p)
b = random.randint(0, p)
self.hash_function_attrs.append((a, b, p))
def __call__(self, function_index, input_integer):
a, b, p = self.hash_function_attrs[function_index]
return (((a*input_integer + b)%p)%self.number_of_buckets) + self.bucket_value_offset
Example usage:
We can create a hash family consists of 20 hash functions, each one map the input to 100 buckets.
hash_family = UniversalHashFamily(20, 100)
And get the hashed values like:
input_integer = 1234567890 # sample input
hash_family(0, input_integer) # the output of the first hash function, i.e. h0(input_integer)
hash_family(1, input_integer) # the output of the second hash function, i.e. h1(input_integer)
# ...
hash_family(19, input_integer) # the output of the last hash function, i.e. h19(input_integer)
If you are interested in the universal hash family for string inputs, you can use the following code. But please note that this code may not be the optimized solution for string hashing.
class UniversalStringHashFamily:
def __init__(self, number_of_hash_functions, number_of_buckets, min_value_for_prime_number=2, bucket_value_offset=0):
self.number_of_buckets = number_of_buckets
self.bucket_value_offset = bucket_value_offset
primes = []
number_to_check = max(min_value_for_prime_number, number_of_buckets)
while len(primes) < number_of_hash_functions:
if is_prime(number_to_check):
primes.append(number_to_check)
number_to_check += random.randint(1, 1000)
self.hash_function_attrs = []
for i in range(number_of_hash_functions):
p = primes[i]
a = random.randint(1, p)
a2 = random.randint(1, p)
b = random.randint(0, p)
self.hash_function_attrs.append((a, b, p, a2))
def hash_int(self, int_to_hash, a, b, p):
return (((a*int_to_hash + b)%p)%self.number_of_buckets) + self.bucket_value_offset
def hash_str(self, str_to_hash, a, b, p, a2):
str_to_hash = "1" + str_to_hash # this will ensure that universality is not affected, see wikipedia for more detail
l = len(str_to_hash)-1
int_to_hash = 0
for i in range(l+1):
int_to_hash += ord(str_to_hash[i]) * (a2 ** (l-i))
int_to_hash = int_to_hash % p
return self.hash_int(int_to_hash, a, b, p)
def __call__(self, function_index, str_to_hash):
a, b, p, a2 = self.hash_function_attrs[function_index]
return self.hash_str(str_to_hash, a, b, p, a2)

Categories

Resources