Multiprocessed producer consumer audio player with buffering - python

I need to implement audio player that could deal with jitter. So I need buffering and hence I need to have minimal buffer size and to know how much elements are in buffer at the time.
But in python Queue qsize() method is not implemented. What can I do about it?
class MultiprocessedAudioPlayer(object):
def __init__(self, sampling_frequency, min_buffer_size=1, max_buffer_size=10, sample_width=2):
self.p = PyAudio()
self.stream = self.p.open(format=self.p.get_format_from_width(width=sample_width), rate=sampling_frequency,
output=True, channels=1)
self.max_buffer_size = max_buffer_size
self.min_buffer_size = min_buffer_size
self.buffer = Queue(maxsize=max_buffer_size)
self.process = Process(target=self.playing)
self.process.start()
self.condition = Condition()
def schedule_to_play(self, frame):
self.condition.acquire()
if self.buffer.full():
print('Buffer is overflown')
self.condition.wait()
self.buffer.put(frame)
if self.buffer.qsize() > self.min_buffer_size:
print('Buffer length is', len(self.buffer))
self.condition.notify()
print('It is sufficient to play')
self.condition.release()
# print('frame appended buffer length is {} now'.format(self.buffer.qsize()))
def play(self, frame):
print('started playing frame at {}'.format(datetime.now()))
self.stream.write(frame, num_frames=len(frame))
print('stopped playing frame at {}'.format(datetime.now()))
def close(self):
self.stream.stop_stream()
self.stream.close()
def playing(self):
while True:
self.condition.acquire()
if self.buffer.qsize() < self.min_buffer_size:
self.condition.wait()
frame = self.buffer.popleft()
print('popping frame from buffer')
print('Buffer length is {} now'.format(len(self.buffer)))
self.condition.notify()
self.condition.release()
self.play(frame)

Two suggestions:
use threading -- the qsize() method is reliable. (It isn't reliable with multiprocessing because of latency sending messages back and forth.)
use multiprocessing with a Manager instance that holds your shared state. Each process can set and get data, and the Manager handles the sending of updates back and forth.
The following example adds data to a list every second, and every now and then the data is scanned by a second process. Also note the extensive logging, which is extremely helpful with multiprocess programs.
#!/usr/bin/env python
'''
mptest_proxy.py -- producer adds to fixed-sized list; scanner uses them
OPTIONS:
-v verbose multiprocessing output
'''
import logging, multiprocessing, sys, time
def producer(objlist):
'''
add an item to list every sec; ensure fixed size list
'''
logger = multiprocessing.get_logger()
logger.info('start')
while True:
try:
time.sleep(1)
except KeyboardInterrupt:
return
msg = 'ding: {:04d}'.format(int(time.time()) % 10000)
logger.info('put: %s', msg)
del objlist[0]
objlist.append( msg )
def scanner(objlist):
'''
every now and then, run calculation on objlist
'''
logger = multiprocessing.get_logger()
logger.info('start')
while True:
try:
time.sleep(5)
except KeyboardInterrupt:
return
logger.info('items: %s', list(objlist))
def main():
opt_verbose = '-v' in sys.argv[1:]
logger = multiprocessing.log_to_stderr(
level=logging.DEBUG if opt_verbose else logging.INFO,
)
logger.info('setup')
# create fixed-length list, shared between producer & consumer
manager = multiprocessing.Manager()
my_objlist = manager.list( # pylint: disable=E1101
[None] * 10
)
multiprocessing.Process(
target=producer,
args=(my_objlist,),
name='producer',
).start()
multiprocessing.Process(
target=scanner,
args=(my_objlist,),
name='scanner',
).start()
logger.info('running forever')
try:
manager.join() # wait until both workers die
except KeyboardInterrupt:
pass
logger.info('done')
if __name__=='__main__':
main()

Related

Threaded Buffer crashes

I tried to implement a 5 element buffer through threading and a list.
import threading
class Buffer1 :
def __init__(self,size) :
self.empty = True
self.content = None
self.lock = threading.Condition()
self.list = []
def take(self) :
with self.lock :
while not self.list :
self.lock.wait()
help = self.list[len(self.list)-1]
del self.list[len(self.list)-1]
self.lock.notify_all()
return help
def put(self,v) :
with self.lock :
while len(self.list) >4:
self.lock.wait()
#self.content = v
self.list.append(v)
self.empty = False
self.lock.notify_all()
def show_list(self):
return self.list
a = Buffer1(5)
a.put(7)
Theoretically it works, but when you exceed the limitations of the buffer, either by buffering 6 values or by trying to „take()“ when there is no value buffered, the IDE becomes unresponsive. How could I go about fixing this?
You are using only one thread to add elements in the buffer, so the list contains 5 items, and your main thread is waiting indefinitely on self.lock.wait(). You can use another thread that would take some elements in parallel and then it will notify the producer thread.
For example, creation of a consumer thread that takes 5 items:
def consume(buffer):
import time
for i in range(5):
print(threading.current_thread(), "consume", buffer.take())
time.sleep(2)
print(threading.current_thread(), "Bye")
buffer = Buffer1(5)
t = threading.Thread(target=consume, args=(buffer,), name="consumer")
t.start()
buffer.put(1)
buffer.put(2)
buffer.put(3)
buffer.put(4)
buffer.put(5)
buffer.put(6)
print(buffer.show_list())
... the IDE becomes unresponsive. How could I go about fixing this?
You only showed adding to the buffer from the main thread and nothing ever takes anything out.
If the buffer gets filled up or becomes empty the next put/take will cause its Condition (lock) to wait until something notifies it to continue. I didn't see any of that signaling in your example.
The buffer is a shared resource. The buffer and the threads that use it need to have good control so that everyone can stay out of everyone else's way and enough logic to keep from getting stuck somewhere.
Presumably you need a thread that puts stuff into the buffer and a thread that takes stuff out of the buffer - both having ample signaling to notify everyone when they are done messing with the buffer.
Set up logging so that the program execution could be traced with log messages.
Buffer1 changes:
Changed the list to a collections.deque to simplify things a bit.
Added properties for empty and full
Added an Event attribute to stop putting/taking when the process gets shut down.
Added a timeout while waiting to put/take to forestall any timing issues when the threads are shut down
Added notifications for empty and full conditions.
Made two threads: one to add to the buffer and one to take from the buffer.
Each will add/take while its Event is not set.
In each iteration a random number of items are taken or added.
When taking/putting the buffer's Condition is acquired and all Waiter's are notified when complete.
In the main thread:
An Event is created - for signaling the thread to quit
A Timer is created to limit thread execution time - when it times out its callback function sets the Event and uses the buffer's Condition (lock) to notify anyone that is waiting and free them up.
The threads are created, started, and joined.
import threading
import collections
import random
import string
import time
import logging
import sys
# logging setup
root = logging.getLogger()
root.setLevel(logging.INFO)
formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(message)s',
datefmt='%S')
class WarningFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.WARNING
class InfoFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.INFO
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
handler.setFormatter(formatter)
handler.addFilter(InfoFilter())
root.addHandler(handler)
handler = logging.StreamHandler(sys.stderr)
handler.setLevel(logging.WARNING)
handler.setFormatter(formatter)
handler.addFilter(WarningFilter())
root.addHandler(handler)
# logging setup end
class Buffer1:
'''FILO buffer.
'''
def __init__(self,size,evt) :
self.content = None
self.size = size
self.evt = evt
self.lock = threading.Condition()
self.list = collections.deque()
#property
def full(self):
return len(self.list) >= self.size
#property
def empty(self):
return bool(self.list)
def take(self) :
with self.lock :
while not self.empty:
root.warning('buffer empty waiting to take')
self.lock.wait(timeout=5)
if self.evt.is_set():
help = None
break
else:
help = self.list.pop()
self.lock.notify_all()
return help
def put(self,v):
success = False
with self.lock :
while self.full:
root.warning('buffer full waiting to put')
self.lock.wait(timeout=5)
if self.evt.is_set():
break
else:
self.list.append(v)
success = True
self.lock.notify_all()
return success
def show_list(self):
return self.list
class Prod(threading.Thread):
'''Puts stuff onto buffer, quits on Event.
Contrived toy - periodically puts random n items in buffer.
'''
def __init__(self,buffer,evt):
super().__init__(name='producer')
self.buffer = buffer
self.evt = evt
def run(self):
n = 0
while not self.evt.is_set():
howmany = random.randint(1,9)
payload = random.sample(string.ascii_letters,howmany)
payload = collections.deque(payload)
root.info(f'{self.name} putting {howmany}')
with self.buffer.lock:
while payload and (not self.evt.is_set()):
c = payload.popleft()
root.info(f'{self.name} -----> {c}')
if not self.buffer.put(c):
root.warning(f'{self.name} last put failed')
self.buffer.lock.notify_all()
time.sleep(.04)
n += 1
root.info(f'{self.name} dying n={n}')
with self.buffer.lock:
self.buffer.lock.notify_all()
root.info(f'{self.name} is done')
class Cons(threading.Thread):
'''Takes stuff off of buffer, quits on Event set.
Contrived toy - periodically takes random n items from buffer.
'''
def __init__(self,buffer,evt):
super().__init__(name='consumer')
self.buffer = buffer
self.evt = evt
def run(self):
n = 0
while not self.evt.is_set():
howmany = random.randint(1,9)
root.info(f'{self.name} taking {howmany}')
with self.buffer.lock:
while (howmany > 0) and (not self.evt.is_set()):
c = self.buffer.take()
root.info(f'{self.name} <----- {c}')
howmany -= 1
self.buffer.lock.notify_all()
time.sleep(.04)
n += 1
root.info(f'{self.name} dying n={n}')
with self.buffer.lock:
self.buffer.lock.notify_all()
root.info(f'{self.name} is done')
if __name__ == '__main__':
# use an Event to shut down the whole process
evt = threading.Event()
buffer = Buffer1(5,evt)
def kill(evt=evt,buffer=buffer):
root.warning('killing everything')
evt.set()
with buffer.lock:
buffer.lock.notify_all()
# don't let this toy example run forever
t = threading.Timer(5,kill)
t.start()
p1 = Prod(buffer,evt)
c1 = Cons(buffer,evt)
c1.start()
p1.start()
p1.join()
c1.join()
print('exit')
Here is another take using asyncio instead of threads to exercise your buffer.
import asyncio
import collections
import random
import string
import time
import logging
import sys
# logging setup
root = logging.getLogger()
root.setLevel(logging.INFO)
formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(message)s',
datefmt='%S')
class WarningFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.WARNING
class InfoFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.INFO
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
handler.setFormatter(formatter)
handler.addFilter(InfoFilter())
root.addHandler(handler)
handler = logging.StreamHandler(sys.stderr)
handler.setLevel(logging.WARNING)
handler.setFormatter(formatter)
handler.addFilter(WarningFilter())
root.addHandler(handler)
class Buffer:
'''FILO buffer.
'''
def __init__(self,size,evt) :
self.content = None
self.size = size
self.stop_evt = evt
self.lock = asyncio.Condition()
self.list = collections.deque()
def full(self):
return len(self.list) >= self.size
def not_full(self):
return len(self.list) < self.size
def empty(self):
return not bool(self.list)
def not_empty(self):
return bool(self.list)
async def take(self) :
async with self.lock:
#root.info(f'take:lock acquired - wait for not empty')
while self.empty():
waiters = [thing for thing in self.lock._waiters]
#root.warning(f'take:{waiters} waiting')
await self.lock.wait()
if self.stop_evt.is_set(): # shutting down
val = None
else:
#root.info('take: not empty')
val = self.list.pop()
self.lock.notify_all()
return val
async def put(self,v):
success = False
async with self.lock:
#root.info(f'put:lock acquired - wait for not full')
while self.full():
waiters = [thing for thing in self.lock._waiters]
#root.warning(f'put:{waiters} waiting')
await self.lock.wait()
if self.stop_evt.is_set(): # shutting down
break
else:
#root.info('put: not full')
self.list.append(v)
success = True
self.lock.notify_all()
return success
def show_list(self):
return self.list
async def random_stuff():
howmany = random.randint(1,9)
payload = random.sample(string.ascii_letters,howmany)
return collections.deque(payload)
async def produce(buffer,stop_evt,name):
puts = []
try:
while True:
payload = await random_stuff()
root.warning(f'producer{name} putting {len(payload)}')
while payload:
c = payload.popleft()
root.info(f'producer{name} -----> {c}')
success = await buffer.put(c)
if not success:
root.warning(f'producer{name} failed to put {c}')
else:
puts.append(c)
await asyncio.sleep(.03)
except asyncio.CancelledError as e:
root.warning('producer canceled')
root.info(f'producer{name} dying n={len(puts)}')
root.info(f'producer{name} is done')
return puts
async def consume(buffer, stop_evt, name):
'''Takes stuff off of buffer, quits on Event set.
Contrived toy - periodically takes random n items from buffer.
'''
takes = []
try:
while True:
howmany = random.randint(1,9)
msg = f'consumer{name} taking {howmany}'
root.warning(f'{msg:>38}')
while howmany > 0:
c = await buffer.take()
takes.append(c)
msg = f'consumer{name} <----- {c}'
root.info(f'{msg:>38}')
howmany -= 1
await asyncio.sleep(.02)
except asyncio.CancelledError as e:
root.warning('consumer canceled')
root.info(f'consumer{name} dying n={len(takes)}')
root.info(f'consumer{name} is done')
return takes
async def timer(n,buffer,evt, tasks):
root.warning('timer started')
await asyncio.sleep(n)
evt.set()
root.warning('timed out - event set')
root.warning('canceling tasks')
for task in tasks:
task.cancel()
async def main():
loop = asyncio.get_running_loop()
loop.set_debug(True)
# use an Event to shut down the whole process
evt = asyncio.Event()
buffer = Buffer(5,evt)
put_task = asyncio.create_task(produce(buffer,evt,1))
take_task = asyncio.create_task(consume(buffer,evt,1))
timer_task = asyncio.create_task(timer(5,buffer,evt,[put_task,take_task]))
root.info('tasks created')
await timer_task
puts = await put_task
takes = await take_task
print('exit')
return puts,takes,buffer.list
if __name__ == '__main__':
puts,takes,remains = asyncio.run(main())
puts = collections.Counter(puts)
takes = collections.Counter(takes)
remains = collections.Counter(remains)
#print(remains == (puts-takes))

Why does the value of Semaphore does not change globaly

This is my current code, the main issue is I use Semphore to control the output of two process, but it seems like the Semphore does not change globaly, i.e. when process "producer" change the Semphore to 2 the process "consumer" still think the Semphore is zero , which cause it to wait forever.
from multiprocessing import Process, Semaphore, Queue
import time
from random import random
buffer = Queue(10)
empty = Semaphore(2)
full = Semaphore(0)
class Consumer(Process):
def run(self):
global buffer, empty, full
while True:
time.sleep(4)
print(full)
full.acquire()
buffer.get()
print('Consumer get')
time.sleep(1)
empty.release()
class Producer(Process):
def run(self):
global buffer, empty, full
while True:
empty.acquire()
print ('Producer put ')
time.sleep(1)
full.release()
buffer.put(1)
print(full)
if __name__ == '__main__':
p = Producer()
c = Consumer()
p.daemon = c.daemon = True
p.start()
c.start()
p.join()
c.join()
print ('Ended!')
and the output is
Producer put
<Semaphore(value=1)>
Producer put
<Semaphore(value=2)>
<Semaphore(value=0)>
I don't know what should I do to let "consumer" process detect the change.
Your two processes have both their own copy of both the semaphores, because each process runs the whole code in the script when it is instantiated.
You must move the semaphores and queue definitions inside the if __name__ == '__main__': and pass the instances of the semaphores to the Producer and Consumer constructors so that they both use the same instance of the three objects.
from multiprocessing import Process, Semaphore, Lock, Queue
import time
from random import random
class Consumer(Process):
def __init__(self, empty, full, buffer):
super(Consumer, self).__init__()
self.empty = empty
self.full = full
self.buffer = buffer
def run(self):
while True:
time.sleep(4)
print("Consumer: {}".format(self.full), flush=True)
print("Consumer: buf {}".format(self.buffer.qsize()), flush=True)
self.full.acquire()
self.buffer.get()
print('Consumer get', flush=True)
time.sleep(1)
self.empty.release()
class Producer(Process):
def __init__(self, empty, full, buffer):
super(Process, self).__init__()
self.empty = empty
self.full = full
self.buffer = buffer
def run(self):
while True:
self.empty.acquire()
print ('Producer put ', flush=True)
self.buffer.put('a') #<<<<<<<<<<< you forgot this in your code. If the queue is empty, get() will block on the consumer
time.sleep(1)
self.full.release()
print(self.full, flush=True)
if __name__ == '__main__':
buffer = Queue(10)
empty = Semaphore(2)
full = Semaphore(0)
p = Producer(empty, full, buffer)
c = Consumer(empty, full, buffer)
p.daemon = c.daemon = True
p.start()
c.start()
p.join()
c.join()
print ('Ended!')

Start python Process with output and timeout

I'm trying to find the way to start a new Process and get its output if it takes less than X seconds. If the process takes more time I would like to ignore the Process result, kill the Process and carry on.
I need to basically add the timer to the code below. Now sure if there's a better way to do it, I'm open to a different and better solution.
from multiprocessing import Process, Queue
def f(q):
# Ugly work
q.put(['hello', 'world'])
if __name__ == '__main__':
q = Queue()
p = Process(target=f, args=(q,))
p.start()
print q.get()
p.join()
Thanks!
You may find the following module useful in your case:
Module
#! /usr/bin/env python3
"""Allow functions to be wrapped in a timeout API.
Since code can take a long time to run and may need to terminate before
finishing, this module provides a set_timeout decorator to wrap functions."""
__author__ = 'Stephen "Zero" Chappell ' \
'<stephen.paul.chappell#atlantis-zero.net>'
__date__ = '18 December 2017'
__version__ = 1, 0, 1
__all__ = [
'set_timeout',
'run_with_timeout'
]
import multiprocessing
import sys
import time
DEFAULT_TIMEOUT = 60
def set_timeout(limit=None):
"""Return a wrapper that provides a timeout API for callers."""
if limit is None:
limit = DEFAULT_TIMEOUT
_Timeout.validate_limit(limit)
def wrapper(entry_point):
return _Timeout(entry_point, limit)
return wrapper
def run_with_timeout(limit, polling_interval, entry_point, *args, **kwargs):
"""Execute a callable object and automatically poll for results."""
engine = set_timeout(limit)(entry_point)
engine(*args, **kwargs)
while engine.ready is False:
time.sleep(polling_interval)
return engine.value
def _target(queue, entry_point, *args, **kwargs):
"""Help with multiprocessing calls by being a top-level module function."""
# noinspection PyPep8,PyBroadException
try:
queue.put((True, entry_point(*args, **kwargs)))
except:
queue.put((False, sys.exc_info()[1]))
class _Timeout:
"""_Timeout(entry_point, limit) -> _Timeout instance"""
def __init__(self, entry_point, limit):
"""Initialize the _Timeout instance will all needed attributes."""
self.__entry_point = entry_point
self.__limit = limit
self.__queue = multiprocessing.Queue()
self.__process = multiprocessing.Process()
self.__timeout = time.monotonic()
def __call__(self, *args, **kwargs):
"""Begin execution of the entry point in a separate process."""
self.cancel()
self.__queue = multiprocessing.Queue(1)
self.__process = multiprocessing.Process(
target=_target,
args=(self.__queue, self.__entry_point) + args,
kwargs=kwargs
)
self.__process.daemon = True
self.__process.start()
self.__timeout = time.monotonic() + self.__limit
def cancel(self):
"""Terminate execution if possible."""
if self.__process.is_alive():
self.__process.terminate()
#property
def ready(self):
"""Property letting callers know if a returned value is available."""
if self.__queue.full():
return True
elif not self.__queue.empty():
return True
elif self.__timeout < time.monotonic():
self.cancel()
else:
return False
#property
def value(self):
"""Property that retrieves a returned value if available."""
if self.ready is True:
valid, value = self.__queue.get()
if valid:
return value
raise value
raise TimeoutError('execution timed out before terminating')
#property
def limit(self):
"""Property controlling what the timeout period is in seconds."""
return self.__limit
#limit.setter
def limit(self, value):
self.validate_limit(value)
self.__limit = value
#staticmethod
def validate_limit(value):
"""Verify that the limit's value is not too low."""
if value <= 0:
raise ValueError('limit must be greater than zero')
To use, see the following example that demonstrates its usage:
Example
from time import sleep
def main():
timeout_after_four_seconds = timeout(4)
# create copies of a function that have a timeout
a = timeout_after_four_seconds(do_something)
b = timeout_after_four_seconds(do_something)
c = timeout_after_four_seconds(do_something)
# execute the functions in separate processes
a('Hello', 1)
b('World', 5)
c('Jacob', 3)
# poll the functions to find out what they returned
results = [a, b, c]
polling = set(results)
while polling:
for process, name in zip(results, 'abc'):
if process in polling:
ready = process.ready
if ready is True: # if the function returned
print(name, 'returned', process.value)
polling.remove(process)
elif ready is None: # if the function took too long
print(name, 'reached timeout')
polling.remove(process)
else: # if the function is running
assert ready is False, 'ready must be True, False, or None'
sleep(0.1)
print('Done.')
def do_something(data, work):
sleep(work)
print(data)
return work
if __name__ == '__main__':
main()
Does the process you are running involve a loop?
If so you can get the timestamp prior to starting the loop and include an if statement within the loop with an sys.exit(); command terminating the script if the current timestamp differs from the recorded start time stamp by more than x seconds.
All you need to adapt the queue example from the docs to your case is to pass the timeout to the q.get() call and terminate the process on timeout:
from Queue import Empty
...
try:
print q.get(timeout=timeout)
except Empty: # no value, timeout occured
p.terminate()
q = None # the queue might be corrupted after the `terminate()` call
p.join()
Using a Pipe might be more lightweight otherwise the code is the same (you could use .poll(timeout), to find out whether there is a data to receive).

Is there a way to add to a variable across threads in python

Is there a way that I can have a single variable across active threads like below
count = 0
threadA(count)
threadB(count)
threadA(count):
#do stuff
count += 1
threadB(count):
#do stuff
print count
so that count will print out 1? I changed the variable in thread A and it reflected across to the other thread?
Your variable count is already available to all your threads. But you need to synchronize access to it, or you will lose updates. Look into using a lock to protect access to the count.
If you want to use processes instead of threads, use multiprocessing. It has more features, including having a Manager objects which handles shared objects for you. As a perk, you can share objects across machines!
source
import multiprocessing, signal, time
def producer(objlist):
'''
add an item to list every sec
'''
while True:
try:
time.sleep(1)
except KeyboardInterrupt:
return
msg = 'ding: {:04d}'.format(int(time.time()) % 10000)
objlist.append( msg )
print msg
def scanner(objlist):
'''
every now and then, consume objlist & run calculation
'''
while True:
try:
time.sleep(3)
except KeyboardInterrupt:
return
print 'items: {}'.format( list(objlist) )
objlist[:] = []
def main():
# create obj sharable between all processes
manager = multiprocessing.Manager()
my_objlist = manager.list() # pylint: disable=E1101
multiprocessing.Process(
target=producer, args=(my_objlist,),
).start()
multiprocessing.Process(
target=scanner, args=(my_objlist,),
).start()
# kill everything after a few seconds
signal.signal(
signal.SIGALRM,
lambda _sig,_frame: manager.shutdown(),
)
signal.alarm(12)
try:
manager.join() # wait until both workers die
except KeyboardInterrupt:
pass
if __name__=='__main__':
main()

How to do dynamic creation of per-process queues in Python multiprocessing

I want to dynamically create multiple Processes, where each instance has a queue for incoming messages from other instances, and each instance can also create new instances. So we end up with a network of processes all sending to each other. Every instance is allowed to send to every other.
The code below would do what I want: it uses a Manager.dict() to store the queues, making sure updates are propagated, and a Lock() to protect write-access to the queues. However when adding a new queue it throws "RuntimeError: Queue objects should only be shared between processes through inheritance".
The problem is that when starting-up, we don't know how many queues will eventually be needed, so we have to create them dynamically. But since we can't share queues except at construction time, I don't know how to do that.
I know that one possibility would be to make queues a global variable instead of a managed one passed-in to __init__: the problem then, as I understand it, is that additions to the queues variable wouldn't be propagated to other processes.
EDIT I'm working on evolutionary algorithms. EAs are a type of machine learning technique. An EA simulates a "population", which evolves by survival of the fittest, crossover, and mutation. In parallel EAs, as here, we also have migration between populations, corresponding to interprocess communication. Islands can also spawn new islands, and so we need a way to send messages between dynamically-created processes.
import random, time
from multiprocessing import Process, Queue, Lock, Manager, current_process
try:
from queue import Empty as EmptyQueueException
except ImportError:
from Queue import Empty as EmptyQueueException
class MyProcess(Process):
def __init__(self, queues, lock):
super(MyProcess, self).__init__(target=lambda x: self.run(x),
args=tuple())
self.queues = queues
self.lock = lock
# acquire lock and add a new queue for this process
with self.lock:
self.id = len(list(self.queues.keys()))
self.queues[self.id] = Queue()
def run(self):
while len(list(self.queues.keys())) < 10:
# make a new process
new = MyProcess(self.lock)
new.start()
# send a message to a random process
dest_key = random.choice(list(self.queues.keys()))
dest = self.queues[dest_key]
dest.put("hello to %s from %s" % (dest_key, self.id))
# receive messages
message = True
while message:
try:
message = self.queues[self.id].get(False) # don't block
print("%s received: %s" % (self.id, message))
except EmptyQueueException:
break
# what queues does this process know about?
print("%d: I know of %s" %
(self.id, " ".join([str(id) for id in self.queues.keys()])))
time.sleep(1)
if __name__ == "__main__":
# Construct MyProcess with a Manager.dict for storing the queues
# and a lock to protect write access. Start.
MyProcess(Manager().dict(), Lock()).start()
I'm not entirely sure what your use case actually is here. Perhaps if you elaborate a bit more on why you want to have each process dynamically spawn a child with a connected queue it'll be a bit more clear what the right solution would be in this situation.
Anyway, with the question as is it seems that there is not really a good way to dynamically create pipes or queues with Multiprocessing right now.
I think that if you're willing to spawn threads within each of your processes you may be able to use multiprocessing.connection.Listener/Client to communicate back and forth. Rather than spawning threads I took an approach using network sockets and select to communicate between threads.
Dynamic process spawning and network sockets may still be flaky depending on how multiprocessing cleans up your file descriptors when spawning/forking a new process and your solution will most likely work more easily on *nix derivatives. If you're concerned about socket overhead you could use unix domain sockets to be a little more lightweight at the cost of added complexity running nodes on multiple worker machines.
Anyway, here's an example using network sockets and a global process list to accomplish this since I was unable to find a good way to make multiprocessing do it.
import collections
import multiprocessing
import random
import select
import socket
import time
class MessagePassingProcess(multiprocessing.Process):
def __init__(self, id_, processes):
self.id = id_
self.processes = processes
self.queue = collections.deque()
super(MessagePassingProcess, self).__init__()
def run(self):
print "Running"
inputs = []
outputs = []
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
address = self.processes[self.id]["address"]
print "Process %s binding to %s"%(self.id, address)
server.bind(address)
server.listen(5)
inputs.append(server)
process = self.processes[self.id]
process["listening"] = True
self.processes[self.id] = process
print "Process %s now listening!(%s)"%(self.id, process)
while inputs:
readable, writable, exceptional = select.select(inputs,
outputs,
inputs,
0.1)
for sock in readable:
print "Process %s has a readable scoket: %s"%(self.id,
sock)
if sock is server:
print "Process %s has a readable server scoket: %s"%(self.id,
sock)
conn, addr = sock.accept()
conn.setblocking(0)
inputs.append(conn)
else:
data = sock.recv(1024)
if data:
self.queue.append(data)
print "non server readable socket with data"
else:
inputs.remove(sock)
sock.close()
print "non server readable socket with no data"
for sock in exceptional:
print "exception occured on socket %s"%(sock)
inputs.remove(sock)
sock.close()
while len(self.queue) >= 1:
print "Received:", self.queue.pop()
# send a message to a random process:
random_id = random.choice(list(self.processes.keys()))
print "%s Attempting to send message to %s"%(self.id, random_id)
random_process = self.processes[random_id]
print "random_process:", random_process
if random_process["listening"]:
random_address = random_process["address"]
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect(random_address)
except socket.error:
print "%s failed to send to %s"%(self.id, random_id)
else:
s.send("Hello World!")
finally:
s.close()
time.sleep(1)
if __name__=="__main__":
print "hostname:", socket.getfqdn()
print dir(multiprocessing)
manager = multiprocessing.Manager()
processes = manager.dict()
joinable = []
for n in xrange(multiprocessing.cpu_count()):
mpp = MessagePassingProcess(n, processes)
processes[n] = {"id":n,
"address":("127.0.0.1",7000+n),
"listening":False,
}
print "processes[%s] = %s"%(n, processes[n])
mpp.start()
joinable.append(mpp)
for process in joinable:
process.join()
With a lot of polish and testing love this might be a logical extension to multiprocessing.Process and/or multiprocessing.Pool as this does seem like something people would use if it were available in the standard lib. It may also be reasonable to create a DynamicQueue class that uses sockets to be discoverable to other queues.
Anyway, hope it helps. Please update if you figure out a better way to make this work.
This code is based on the accepted answer. It's in Python 3 since OSX Snow Leopard segfaults on some uses of multiprocessing stuff.
#!/usr/bin/env python3
import collections
from multiprocessing import Process, Manager, Lock, cpu_count
import random
import select
import socket
import time
import pickle
class Message:
def __init__(self, origin):
self.type = "long_msg"
self.data = "X" * 3000
self.origin = origin
def __str__(self):
return "%s %d" % (self.type, self.origin)
class MessagePassingProcess(Process):
def __init__(self, processes, lock):
self.lock = lock
self.processes = processes
with self.lock:
self.id = len(list(processes.keys()))
process_dict = {"id": self.id,
"address": ("127.0.0.1", 7000 + self.id),
"listening": False
}
self.processes[self.id] = process_dict
print("new process: processes[%s] = %s" % (self.id, processes[self.id]))
self.queue = collections.deque()
super(MessagePassingProcess, self).__init__()
def run(self):
print("Running")
self.processes[self.id]["joinable"] = True
inputs = []
outputs = []
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
address = self.processes[self.id]["address"]
print("Process %s binding to %s" % (self.id, address))
server.bind(address)
server.listen(5)
inputs.append(server)
process = self.processes[self.id]
process["listening"] = True
self.processes[self.id] = process
print("Process %s now listening!(%s)" % (self.id, process))
while inputs and len(list(self.processes.keys())) < 10:
readable, writable, exceptional = select.select(inputs,
outputs,
inputs,
0.1)
# read incoming messages
for sock in readable:
print("Process %s has a readable socket: %s" % (self.id, sock))
if sock is server:
print("Process %s has a readable server socket: %s" %
(self.id, sock))
conn, addr = sock.accept()
conn.setblocking(0)
inputs.append(conn)
else:
data = True
item = bytes() # empty bytes object, to be added to
recvs = 0
while data:
data = sock.recv(1024)
item += data
recvs += 1
if len(item):
self.queue.append(item)
print("non server readable socket: recvd %d bytes in %d parts"
% (len(item), recvs))
else:
inputs.remove(sock)
sock.close()
print("non server readable socket: nothing to read")
for sock in exceptional:
print("exception occured on socket %s" % (sock))
inputs.remove(sock)
sock.close()
while len(self.queue):
msg = pickle.loads(self.queue.pop())
print("received:" + str(msg))
# send a message to a random process:
random_id = random.choice(list(self.processes.keys()))
print("%s attempting to send message to %s" % (self.id, random_id))
random_process = self.processes[random_id]
if random_process["listening"]:
random_address = random_process["address"]
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect(random_address)
except socket.error:
print("%s failed to send to %s"%(self.id, random_id))
else:
item = pickle.dumps(Message(self.id))
print("sending a total of %d bytes" % len(item))
s.sendall(item)
finally:
s.close()
# make a new process
if random.random() < 0.1:
mpp = MessagePassingProcess(self.processes, self.lock)
mpp.start()
else:
time.sleep(1.0)
print("process %d finished looping" % self.id)
if __name__=="__main__":
manager = Manager()
processes = manager.dict()
lock = Lock()
# make just one process: it will make more
mpp = MessagePassingProcess(processes, lock)
mpp.start()
# this doesn't join on all the other processes created
# subsequently
mpp.join()
The standard library socketserver is provided to help avoid programming select() manually. In this version, we start a socketserver in a separate thread so that each Process can do (well, pretend to do) computation in its main loop.
#!/usr/bin/env python3
# Each Node is an mp.Process. It opens a client-side socket to send a
# message to another Node. Each Node listens using a separate thread
# running a socketserver (so avoiding manual programming of select()),
# which itself starts a new thread to handle each incoming connection.
# The socketserver puts received messages on an mp.Queue, where they
# are picked up by the Node for processing once per loop. This setup
# allows the Node to do computation in its main loop.
import multiprocessing as mp
import threading, random, socket, socketserver, time, pickle, queue
class Message:
def __init__(self, origin):
self.type = "long_message"
self.data = "X" * random.randint(0, 2000)
self.origin = origin
def __str__(self):
return "Message of type %s, length %d from %d" % (
self.type, len(self.data), self.origin)
class Node(mp.Process):
def __init__(self, nodes, lock):
super().__init__()
# Add this node to the Manager.dict of node descriptors.
# Write-access is protected by a Lock.
self.nodes = nodes
self.lock = lock
with self.lock:
self.id = len(list(nodes.keys()))
host = "127.0.0.1"
port = 7022 + self.id
node = {"id": self.id, "address": (host, port), "listening": False}
self.nodes[self.id] = node
print("new node: nodes[%s] = %s" % (self.id, nodes[self.id]))
# Set up socketserver.
# don't know why collections.deque or queue.Queue don't work here.
self.queue = mp.Queue()
# This MixIn usage is directly from the python.org
# socketserver docs
class ThreadedTCPServer(socketserver.ThreadingMixIn,
socketserver.TCPServer):
pass
class HandlerWithQueue(socketserver.BaseRequestHandler):
# Something of a hack: using class variables to give the
# Handler access to this Node-specific data
handler_queue = self.queue
handler_id = self.id
def handle(self):
# could receive data in multiple chunks, so loop and
# concatenate
item = bytes()
recvs = 0
data = True
if data:
data = self.request.recv(4096)
item += data
recvs += 1
if len(item):
# Receive a pickle here and put it straight on
# queue. Will be unpickled when taken off queue.
print("%d: socketserver received %d bytes in %d recv()s"
% (self.handler_id, len(item), recvs))
self.handler_queue.put(item)
self.server = ThreadedTCPServer((host, port), HandlerWithQueue)
self.server_thread = threading.Thread(target=self.server.serve_forever)
self.server_thread.setDaemon(True) # Tell it to exit when Node exits.
self.server_thread.start()
print("%d: server loop running in thread %s" %
(self.id, self.server_thread.getName()))
# Now ready to receive
with self.lock:
# Careful: if we assign directly to
# self.nodes[self.id]["listening"], the new value *won't*
# be propagated to other Nodes by the Manager.dict. Have
# to use this hack to re-assign the Manager.dict key.
node = self.nodes[self.id]
node["listening"] = True
self.nodes[self.id] = node
def send(self):
# Find a destination. All listening nodes are eligible except self.
dests = [node for node in self.nodes.values()
if node["id"] != self.id and node["listening"]]
if len(dests) < 1:
print("%d: no node to send to" % self.id)
return
dest = random.choice(dests)
print("%d: sending to %s" % (self.id, dest["id"]))
# send
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect(dest["address"])
except socket.error:
print("%s: failed to send to %s" % (self.id, dest["id"]))
else:
item = pickle.dumps(Message(self.id))
s.sendall(item)
finally:
s.close()
# Check our queue for incoming messages.
def receive(self):
while True:
try:
message = pickle.loads(self.queue.get(False))
print("%d: received %s" % (self.id, str(message)))
except queue.Empty:
break
def run(self):
print("%d: in run()" % self.id)
# Main loop. Loop until at least 10 Nodes exist. Because of
# parallel processing we might get a few more
while len(list(self.nodes.keys())) < 10:
time.sleep(random.random() * 0.5) # simulate heavy computation
self.send()
time.sleep(random.random() * 0.5) # simulate heavy computation
self.receive()
# maybe make a new node
if random.random() < 0.1:
new = Node(self.nodes, self.lock)
new.start()
# Seems natural to call server_thread.shutdown() here, but it
# hangs. But since we've set the thread to be a daemon, it
# will exit when this process does.
print("%d: finished" % self.id)
if __name__=="__main__":
manager = mp.Manager()
nodes = manager.dict()
lock = mp.Lock()
# make just one node: it will make more
node0 = Node(nodes, lock)
node0.start()
# This doesn't join on all the other nodes created subsequently.
# But everything seems to work out ok.
node0.join()

Categories

Resources