Parallel random distribution - python

I have two iterators in python and both should follow the same "random" distribution (both should run in parallel). For instance:
class Iter1(object):
def __iter__(self):
for i in random_generator():
yield i
class Iter2(object):
def __iter__(self):
for i in random_generator():
yield i
for el1, el2 in zip(Iter1(), Iter2()):
print '{} {}'.format(el1, el2)
output should be somethig like:
0.53534 0.53534
0.12312 0.12312
0.19238 0.19238
How can I define random_generator() in a way that it creates the same random distributions in parallel for both iterators.
Note:
They should run in parallel
I can't generate the sequence in advance (it is a streaming, so I don't know the size of the sequence)
Thanks.

Specify the same seed to each call of random_generator:
import random
def random_generator(l, seed=None):
r = random.Random(seed)
for i in range(l):
yield r.random()
class Iter1(object):
def __init__(self, seed):
self.seed = seed
def __iter__(self):
for i in random_generator(10, self.seed):
yield i
class Iter2(object):
def __init__(self, seed):
self.seed = seed
def __iter__(self):
for i in random_generator(10, self.seed):
yield i
# The seed can be any hashable object, but don't use None; that
# tells random.seed() to use the current time. But make sure that
# Python itself isn't using hash randomization.
common_seed = object()
for el1, el2 in zip(Iter1(common_seed), Iter2(common_seed)):
print '{} {}'.format(el1, el2)

There is no way to control the random generation number in this way. If you want to do that you should create your own random function. But as another pythonic and simpler way you can just create one object and use itertools.tee in order to copy your iterator object to having the same result for your random sequences:
In [28]: class Iter1(object):
def __init__(self, number):
self.number = number
def __iter__(self):
for _ in range(self.number):
yield random.random()
....:
In [29]:
In [29]: num = Iter1(5)
In [30]: from itertools import tee
In [31]: num, num2 = tee(num)
In [32]: list(zip(num, num2))
Out[32]:
[(0.485400998727448, 0.485400998727448),
(0.8801649381536764, 0.8801649381536764),
(0.9684025615967844, 0.9684025615967844),
(0.9980073706742334, 0.9980073706742334),
(0.1963579685642387, 0.1963579685642387)]

Related

How can I modify my __repr__ to respresent correctly?

My __repr__ method works fine using objects created in it's class, but with objects that were created with the help of importing a library and using methods from it, it only represented the memory address...
from roster import student_roster #I only got the list if students from here
import itertools as it
class ClassroomOrganizer:
def __init__(self):
self.sorted_names = self._sort_alphabetically(student_roster)
def __repr__(self):
return f'{self.get_combinations(2)}'
def __iter__(self):
self.c = 0
return self
def __next__(self):
if self.c < len(self.sorted_names):
x = self.sorted_names[self.c]
self.c += 1
return x
else:
raise StopIteration
def _sort_alphabetically(self,students):
names = []
for student_info in students:
name = student_info['name']
names.append(name)
return sorted(`your text`names)
def get_students_with_subject(self, subject):
selected_students = []
for student in student_roster:
if student['favorite_subject'] == subject:
selected_students.append((student['name'], subject))
return selected_students
def get_combinations(self, r):
return it.combinations(self.sorted_names, r)
a = ClassroomOrganizer()
# for i in a:
# print(i)
print(repr(a))
I tried displaying objects that don't rely on anther library, and they dispayed properly.
The issue I was facing was linked to me not understanding the nature of the object. itertools.combinations is an iterable, and in order to represent the values stored I needed to either:
unpack it inside a variable like:
def get_combinations(self, r):
*res, = it.combinations(self.sorted_names, r)
return res
Iter through it inside a loop and leave the original code intact like
for i in a.get_combinations(2):
print(i)
I prefer the second solution

python - getting specific generator data from the generator

let's start with the code
class MyClass:
def __init__(self):
self.elemplusone = None
self.elemplustwo = None
self.data = self.generate_data()
def generate_data(self):
for elem in range(10):
yield elem+1, elem+2
I need to get the first and the second element of generator. Right now, I'm calling it outside the class after creating an object:
a_generator = MyClass()
c = next(a_generator.data)
elemplusone = c[0]
elemplustwo = c[1]
but I need them to be specified (as separate generators) in the class and I can't create two generator methods.
Thanks
I also don't quite understand what you mean exactly. But does this help you?
class MyClass:
def __init__(self):
self.data = self.generate_data()
self.update_elements()
def update_elements(self):
self.elemplusone, self.elemplustwo = [x for x in next(self.data)]
def generate_data(self):
for elem in range(10):
print("Yielded")
yield elem + 1, elem + 2
a_generator = MyClass()
a_generator.elemplusone is 1 and a_generator.elemplustwo is 2.
Now you could call a_generator.update_elements() to yield your elements again and continue in your generator. Please let me know if this helps you. Good luck!

Python Primes iterator using count()

I am trying to understand how the primes iterator works, the code was taken from a lecture.
I searched for count() but found only methods for lists, objects, I just do not understand how the line self._candidates = count(1) works and what it means.
where is the object we are trying to count 1 in? and the further use of it self._candidates.next() is also very confusing.
I mainly code in java albeit know basic python.
here is the code:
class Primes(object):
def __init__(self):
self._candidates = count(1)
def __iter__(self): return self
def next(self):
item = self._candidates.next()
if item > 1:
self._candidates = FilterMultiplies(self._candidates, item)
return item
class FilterMultiplies(object):def __init__(self, seq, n):
self._seq = iter(seq)
self._n = n
def __iter__(self): return self
def next(self):
item = self._seq.next()
while item % self._n == 0:
item = self._seq.next()
return item
Probably this is itertools.count, and a line
from itertools import count
is missing from the listing.
Generators in Python are comparable to Iterators in Java. The call count(1) returns a generator that counts upwards from 1:
>>> from itertools import count
>>> counter = count(1)
>>> counter.next()
1
>>> counter.next()
2
>>> counter.next()
3
Note that counter.next() is Python 2 only. For compatibility with both Python 2 and 3, use next(counter) instead.

subclass str, and make new method with same effect as +=

I'm trying to subclass str - not for anything important, just an experiment to learn more about Python built-in types. I've subclassed str this way (using __new__ because str is immutable):
class MyString(str):
def __new__(cls, value=''):
return str.__new__(cls, value)
def __radd__(self, value): # what method should I use??
return MyString(self + value) # what goes here??
def write(self, data):
self.__radd__(data)
It initializes right, as far as I can tell. but I cant get it to modify itself in-place using the += operator. I've tried overriding __add__, __radd__, __iadd__ and a variety of other configurations. Using a return statement, ive managed to get it to return a new instance of the correct appended MyString, but not modify in place. Success would look like:
b = MyString('g')
b.write('h') # b should now be 'gh'
Any thoughts?
UPDATE
To possibly add a reason why someone might want to do this, I followed the suggestion of creating the following mutable class that uses a plain string internally:
class StringInside(object):
def __init__(self, data=''):
self.data = data
def write(self, data):
self.data += data
def read(self):
return self.data
and tested with timeit:
timeit.timeit("arr+='1234567890'", setup="arr = ''", number=10000)
0.004415035247802734
timeit.timeit("arr.write('1234567890')", setup="from hard import StringInside; arr = StringInside()", number=10000)
0.0331270694732666
The difference increases rapidly at the number goes up - at 1 million interactions, StringInside took longer than I was willing to wait to return, while the pure str version returned in ~100ms.
UPDATE 2
For posterity, I decided to write a cython class wrapping a C++ string to see if performance could be improved compared to one loosely based on Mike Müller's updated version below, and I managed to succeed. I realize cython is "cheating" but I provide this just for fun.
python version:
class Mike(object):
def __init__(self, data=''):
self._data = []
self._data.extend(data)
def write(self, data):
self._data.extend(data)
def read(self, stop=None):
return ''.join(self._data[0:stop])
def pop(self, stop=None):
if not stop:
stop = len(self._data)
try:
return ''.join(self._data[0:stop])
finally:
self._data = self._data[stop:]
def __getitem__(self, key):
return ''.join(self._data[key])
cython version:
from libcpp.string cimport string
cdef class CyString:
cdef string buff
cdef public int length
def __cinit__(self, string data=''):
self.length = len(data)
self.buff = data
def write(self, string new_data):
self.length += len(new_data)
self.buff += new_data
def read(self, int length=0):
if not length:
length = self.length
return self.buff.substr(0, length)
def pop(self, int length=0):
if not length:
length = self.length
ans = self.buff.substr(0, length)
self.buff.erase(0, length)
return ans
performance:
writing
>>> timeit.timeit("arr.write('1234567890')", setup="from pyversion import Mike; arr = Mike()", number=1000000)
0.5992741584777832
>>> timeit.timeit("arr.write('1234567890')", setup="from cyversion import CyBuff; arr = CyBuff()", number=1000000)
0.17381906509399414
reading
>>> timeit.timeit("arr.write('1234567890'); arr.read(5)", setup="from pyversion import Mike; arr = Mike()", number=1000000)
1.1499049663543701
>>> timeit.timeit("arr.write('1234567890'); arr.read(5)", setup="from cyversion import CyBuff; arr = CyBuff()", number=1000000)
0.2894480228424072
popping
>>> # note I'm using 10e3 iterations - the python version wouldn't return otherwise
>>> timeit.timeit("arr.write('1234567890'); arr.pop(5)", setup="from pyversion import Mike; arr = Mike()", number=10000)
0.7390561103820801
>>> timeit.timeit("arr.write('1234567890'); arr.pop(5)", setup="from cyversion import CyBuff; arr = CyBuff()", number=10000)
0.01501607894897461
Solution
This is an answer to the updated question.
You can use a list to hold data and only construct the string when reading it:
class StringInside(object):
def __init__(self, data=''):
self._data = []
self._data.append(data)
def write(self, data):
self._data.append(data)
def read(self):
return ''.join(self._data)
Performance
The performance of this class:
%%timeit arr = StringInside()
arr.write('1234567890')
1000000 loops, best of 3: 352 ns per loop
is much closer to that of the native str:
%%timeit str_arr = ''
str_arr+='1234567890'
1000000 loops, best of 3: 222 ns per loop
Compare with your version:
%%timeit arr = StringInsidePlusEqual()
arr.write('1234567890')
100000 loops, best of 3: 87 µs per loop
Reason
The my_string += another_string way of building a string has been an anti-pattern performance wise for a long time. CPython has some optimizations for this case. Seems like CPython cannot detect that this pattern is used here. This likely because it a bit hidden inside a class.
Not all implementations have this optimization for various reasons. For example. PyPy, which in general is much faster than CPython, is considerably slower for this use case:
PyPy 2.6.0 (Python 2.7.9)
>>>> import timeit
>>>> timeit.timeit("arr+='1234567890'", setup="arr = ''", number=10000)
0.08312582969665527
CPython 2.7.11
>>> import timeit
>>> timeit.timeit("arr+='1234567890'", setup="arr = ''", number=10000)
0.002151966094970703
Slice-able version
This version supports slicing:
class StringInside(object):
def __init__(self, data=''):
self._data = []
self._data.extend(data)
def write(self, data):
self._data.extend(data)
def read(self, start=None, stop=None):
return ''.join(self._data[start:stop])
def __getitem__(self, key):
return ''.join(self._data[key])
You can slice the normal way:
>>> arr = StringInside('abcdefg')
>>> arr[2]
'c'
>>> arr[1:3]
'bc'
Now, read() also supports optional start and stop indices:
>>> arr.read()
'abcdefg'
>>> arr.read(1, 3)
'bc'
>>> arr.read(1)
'bcdefg'

Robust weighted random string generator

I have implemented the following class to generate either 'p' or 'q' based on a input frequency of 'p'. However, this implementation breaks if the frequency gets smaller than the size of the list used to store the options. Is there a way in which I can implement this to work for any value of p?
from random import random
class AlleleGenerator(object):
"""
allele generator - will break if p < 0.001
"""
def __init__(self, p):
"""construct class and creates list to select from"""
self.values = list()
for i in xrange(int(1000*p)):
self.values.append('p')
while len(self.values) <= 1000:
self.values.append('q')
def next(self):
"""Returns p or q based on allele frequency"""
rnd = int(random() * 1000)
return self.values[rnd]
def __call__(self):
return self.next()
Don't use self.values. In next, just generate a random number between 0 and 1, and return 'p' if the random number is less than p:
from random import random
class AlleleGenerator(object):
def __init__(self, p):
"""construct class and creates list to select from"""
self.p = p
def next(self):
"""Returns p or q based on allele frequency"""
return 'p' if random() < self.p else 'q'
def __call__(self):
return self.next()
Also, be careful not to use classes when a function suffices.
For example, you might consider using a generator function:
from random import random
def allele_generator(p):
while True:
yield 'p' if random() < p else 'q'
agen = allele_generator(0.001)
for i in range(3):
print(next(agen))

Categories

Resources