I have a simple set of objects for managing a background process using the Actor model. In this case I'm concerned with only a single actor. However, it is important that the actor maintains a persistent state between receiving messages.
The objects work by appending messages to a queue in the main thread. Then the main thread can execute as it pleases. Every once in awhile it checks to see if anything new is on the results queue. When this happens it knows the actor has completed the task.
I want to know if this be implemented in a cleaner way using Futures objects. My current implementation is as follows:
import multiprocessing
import time
import collections
class Client(object):
"""
Object used in the main thread to communicate with background actors
"""
def __init__(client):
client.manager = None
client.start()
def __del__(client):
if client.manager and client.manager.is_alive():
client.get(StopIteration)
def start(client):
client.task_queue = multiprocessing.JoinableQueue()
client.result_queue = multiprocessing.Queue()
client.result_history = collections.deque(maxlen=1000)
client.manager = Manager(client.task_queue, client.result_queue)
client.manager.start()
def post(client, payload):
client.task_queue.put(payload)
def get(client, payload):
# Exhaust any existing results
list(client.results())
# Post the command
client.post(payload)
# Wait for a response
result = client.wait_for_result()
return result
def wait_for_result(client):
wait = 0
while True:
for result in client.results():
return result
time.sleep(wait)
wait = max(1, wait + .01)
def results(client):
""" Look at results put on the result_queue """
while not client.result_queue.empty():
item = client.result_queue.get()
client.result_history.append(item)
yield item
class Manager(multiprocessing.Process):
"""
Manager manages a single actor.
A manager sends messages an actor and appends a response when it is done.
"""
def __init__(self, task_queue, result_queue):
super(Manager, self).__init__()
self.task_queue = task_queue
self.result_queue = result_queue
def run(self):
""" main loop """
terminate = False
# Create Actor in separate process and send messages to it
actor = Actor()
while not terminate:
message = self.task_queue.get()
print('Sending message={} to actor'.format(message))
try:
if message is StopIteration:
content = 'shutdown'
terminate = True
else:
content = actor.handle(message)
except Exception as ex:
print('Error handling message')
status = 'error'
content = repr(ex)
else:
status = 'success'
print('Actor finished handling message={}'.format(message))
# Send back result
response = {
'status': status,
'content': content
}
self.task_queue.task_done()
self.result_queue.put(response)
print('Manager is shutting down')
class Actor(object):
"""
An actor is given messages from its manager and performs actions in a
single thread. Its state is private and threadsafe.
"""
def __init__(actor):
actor.state = {}
def handle(actor, message):
if not isinstance(message, dict):
raise ValueError('Commands must be passed in a message dict')
message = message.copy()
action = message.pop('action', None)
if action is None:
raise ValueError('message must have an action item')
if action == 'hello world':
content = 'hello world'
return content
elif action == 'debug':
return actor
elif action == 'start':
actor.state['a'] = 3
return 'started'
elif action == 'add':
for i in range(10000000):
actor.state['a'] += 1
return 'added', actor.state['a']
else:
raise ValueError('Unknown action=%r' % (action,))
def test():
print('Starting Test')
client = Client()
print('About to send messages')
# Get sends a message and then blocks until the response is returned.
print(client.get({'action': 'hello world'}))
print(client.get({'action': 'start'}))
print(client.get({'action': 'add'}))
print('Test completed')
if __name__ == '__main__':
test()
I would like to modify this code to use Future objects. Whenever the client is about to send a message, is it possible to create a Future object, then send that over the multiprocessing queue? Then the manager could execute the actors function and then modify the state of the Future object instead of appending a result to the result_queue.
This seems like it would offer a cleaner way to associate results with messages sent to the actor. It would also remove the need for the get and results methods I have in the first example.
Intuitively, I want it to look something like this:
from concurrent import futures
import multiprocessing
class Client(object):
"""
Object used in the main thread to communicate with background actors
"""
def __init__(client):
client.manager = None
client.start()
def __del__(client):
if client.manager and client.manager.is_alive():
f = client.post(StopIteration)
def start(client):
client.task_queue = multiprocessing.JoinableQueue()
client.manager = Manager(client.task_queue)
client.manager.start()
def post(client, payload):
f = futures.Future()
client.task_queue.put((f, payload))
return f
class Manager(multiprocessing.Process):
"""
Manager manages a single actor.
"""
def __init__(self, task_queue):
super(Manager, self).__init__()
self.task_queue = task_queue
def run(self):
""" main loop """
terminate = False
# Create Actor in separate process and send messages to it
actor = Actor()
while not terminate:
f, message = self.task_queue.get()
f.set_running_or_notify_cancel()
print('Sending message={} to actor'.format(message))
try:
if message is StopIteration:
content = 'shutdown'
terminate = True
else:
content = actor.handle(message)
except Exception as ex:
print('Error handling message')
status = 'error'
content = repr(ex)
else:
status = 'success'
print('Actor finished handling message={}'.format(message))
# Send back result
response = {
'status': status,
'content': content
}
self.task_queue.task_done()
f.set_result(response)
print('Manager is shutting down')
class Actor(object):
"""
An actor is given messages from its manager and performs actions in a
single thread. Its state is private and threadsafe.
"""
def __init__(actor):
actor.state = {}
def handle(actor, message):
if not isinstance(message, dict):
raise ValueError('Commands must be passed in a message dict')
message = message.copy()
action = message.pop('action', None)
if action is None:
raise ValueError('message must have an action item')
if action == 'hello world':
content = 'hello world'
return content
elif action == 'debug':
return actor
elif action == 'start':
actor.state['a'] = 3
return 'started'
elif action == 'add':
for i in range(10000000):
actor.state['a'] += 1
return 'added', actor.state['a']
else:
raise ValueError('Unknown action=%r' % (action,))
def test():
print('Starting Test')
client = Client()
print('About to send messages')
f1 = client.post({'action': 'hello world'})
print(f1.result())
f2 = client.post({'action': 'start'})
print(f2.result())
f3 = client.post({'action': 'add'})
print(f3.result())
print('Test completed')
if __name__ == '__main__':
test()
However, this obviously doesn't execute correctly. I believe I need some sort of process pool manager to create the futures for me (because I'm calling methods that are documented saying that only the pool manager should call them). But I'm not quite sure how to go about doing that. I've used futures before to map singleton worker functions, but I've never managed an external process with state before.
Can someone help me out with this? Perhaps there is an even easier way to go about implementing this with Futures?
So, I went ahead and just made a library to do this:
https://github.com/Erotemic/futures_actors
Related
I am consuming low latency market data and I'm trying to measure how many streams I can consume without my code slowing down due to the websocket message queue building up. My understanding is that messages are received by the websocket and queued until ws.recv() is called, which processes them one at a time in the order they were received. Under normal circumstances, my code is definitely fast enough to handle the messages, but when a burst of messages comes all at once I would imagine that the queue fills up. I would expect that the queue would only be filled up for 5 or 10 milliseconds, but it is very important that I know this. Is there a way to measure how many messages are waiting in the queue?
I'm attaching a snippet of the code I'm using for context, but the relevant part is just looping over
data = self.ws.recv()
class WebsocketClient(object):
def __init__(
self,
url=""
products=None,
message_type="subscribe",
should_print=True,
self.url = url
self.products = products
self.channels = channels
self.type = message_type
self.stop = True
self.error = None
self.ws = None
self.thread = None
self.auth = auth
self.api_key = api_key
self.api_secret = api_secret
self.api_passphrase = api_passphrase
self.should_print = should_print
def start(self):
def _go():
self._connect()
self._listen()
self._disconnect()
self.stop = False
self.on_open()
self.thread = Thread(target=_go)
self.keepalive = Thread(target=self._keepalive)
self.thread.start()
def _connect(self):
if self.products is None:
self.products = []
elif not isinstance(self.products, list):
self.products = [self.products]
if self.url[-1] == "/":
self.url = self.url[:-1]
if self.channels is None:
self.channels = [{"name": "ticker", "product_ids": [product_id for product_id in self.products]}]
sub_params = {'type': 'subscribe', 'product_ids': self.products, 'channels': self.channels}
else:
sub_params = {'type': 'subscribe', 'product_ids': self.products, 'channels': self.channels}
if self.auth:
#timestamp = int(time.time())
#message = timestamp + 'GET' + '/users/self/verify'
auth_headers = get_auth_headers('/users/self/verify','GET','')
#print(auth_headers)
sub_params['signature'] = auth_headers['CB-ACCESS-SIGN']
sub_params['key'] = auth_headers['CB-ACCESS-KEY']
sub_params['passphrase'] = auth_headers['CB-ACCESS-PASSPHRASE']
sub_params['timestamp'] = auth_headers['CB-ACCESS-TIMESTAMP']
try:
self.ws = create_connection(self.url)
self.ws.send(json.dumps(sub_params))
except:
traceback.print_exc()
self.stop = True
def _keepalive(self, interval=10):
while self.ws.connected:
self.ws.ping("keepalive")
time.sleep(interval)
def _listen(self):
self.keepalive.start()
while not self.stop:
try:
data = self.ws.recv()
msg = json.loads(data)
except ValueError as e:
self.on_error(e)
except Exception as e:
self.on_error(e)
else:
self.on_message(msg)
def _disconnect(self):
try:
if self.ws:
self.ws.close()
except WebSocketConnectionClosedException as e:
pass
finally:
self.keepalive.join()
self.on_close()
def close(self):
self.stop = True # will only disconnect after next msg recv
self._disconnect() # force disconnect so threads can join
self.thread.join()
def on_open(self):
if self.should_print:
print("-- Subscribed! --\n")
def on_close(self):
if self.should_print:
print("\n-- Socket Closed --")
def on_message(self, msg):
*** my logic ***
def on_error(self, e, data=None):
self.error = e
self.stop = True
print('{} - data: {}'.format(e, data))
You can measure length of incomming messages buffer by calling
len(self.ws.messages)
There is a background asyncio task, that reads StreamReader bytes buffer and put messages to ws.messages deque.
Messages deque is limited by max_queue parameter of client.connect method:
https://websockets.readthedocs.io/en/stable/reference/client.html#websockets.client.connect
Here are the details:
https://websockets.readthedocs.io/en/stable/topics/design.html#backpressure
I tried to implement a 5 element buffer through threading and a list.
import threading
class Buffer1 :
def __init__(self,size) :
self.empty = True
self.content = None
self.lock = threading.Condition()
self.list = []
def take(self) :
with self.lock :
while not self.list :
self.lock.wait()
help = self.list[len(self.list)-1]
del self.list[len(self.list)-1]
self.lock.notify_all()
return help
def put(self,v) :
with self.lock :
while len(self.list) >4:
self.lock.wait()
#self.content = v
self.list.append(v)
self.empty = False
self.lock.notify_all()
def show_list(self):
return self.list
a = Buffer1(5)
a.put(7)
Theoretically it works, but when you exceed the limitations of the buffer, either by buffering 6 values or by trying to „take()“ when there is no value buffered, the IDE becomes unresponsive. How could I go about fixing this?
You are using only one thread to add elements in the buffer, so the list contains 5 items, and your main thread is waiting indefinitely on self.lock.wait(). You can use another thread that would take some elements in parallel and then it will notify the producer thread.
For example, creation of a consumer thread that takes 5 items:
def consume(buffer):
import time
for i in range(5):
print(threading.current_thread(), "consume", buffer.take())
time.sleep(2)
print(threading.current_thread(), "Bye")
buffer = Buffer1(5)
t = threading.Thread(target=consume, args=(buffer,), name="consumer")
t.start()
buffer.put(1)
buffer.put(2)
buffer.put(3)
buffer.put(4)
buffer.put(5)
buffer.put(6)
print(buffer.show_list())
... the IDE becomes unresponsive. How could I go about fixing this?
You only showed adding to the buffer from the main thread and nothing ever takes anything out.
If the buffer gets filled up or becomes empty the next put/take will cause its Condition (lock) to wait until something notifies it to continue. I didn't see any of that signaling in your example.
The buffer is a shared resource. The buffer and the threads that use it need to have good control so that everyone can stay out of everyone else's way and enough logic to keep from getting stuck somewhere.
Presumably you need a thread that puts stuff into the buffer and a thread that takes stuff out of the buffer - both having ample signaling to notify everyone when they are done messing with the buffer.
Set up logging so that the program execution could be traced with log messages.
Buffer1 changes:
Changed the list to a collections.deque to simplify things a bit.
Added properties for empty and full
Added an Event attribute to stop putting/taking when the process gets shut down.
Added a timeout while waiting to put/take to forestall any timing issues when the threads are shut down
Added notifications for empty and full conditions.
Made two threads: one to add to the buffer and one to take from the buffer.
Each will add/take while its Event is not set.
In each iteration a random number of items are taken or added.
When taking/putting the buffer's Condition is acquired and all Waiter's are notified when complete.
In the main thread:
An Event is created - for signaling the thread to quit
A Timer is created to limit thread execution time - when it times out its callback function sets the Event and uses the buffer's Condition (lock) to notify anyone that is waiting and free them up.
The threads are created, started, and joined.
import threading
import collections
import random
import string
import time
import logging
import sys
# logging setup
root = logging.getLogger()
root.setLevel(logging.INFO)
formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(message)s',
datefmt='%S')
class WarningFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.WARNING
class InfoFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.INFO
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
handler.setFormatter(formatter)
handler.addFilter(InfoFilter())
root.addHandler(handler)
handler = logging.StreamHandler(sys.stderr)
handler.setLevel(logging.WARNING)
handler.setFormatter(formatter)
handler.addFilter(WarningFilter())
root.addHandler(handler)
# logging setup end
class Buffer1:
'''FILO buffer.
'''
def __init__(self,size,evt) :
self.content = None
self.size = size
self.evt = evt
self.lock = threading.Condition()
self.list = collections.deque()
#property
def full(self):
return len(self.list) >= self.size
#property
def empty(self):
return bool(self.list)
def take(self) :
with self.lock :
while not self.empty:
root.warning('buffer empty waiting to take')
self.lock.wait(timeout=5)
if self.evt.is_set():
help = None
break
else:
help = self.list.pop()
self.lock.notify_all()
return help
def put(self,v):
success = False
with self.lock :
while self.full:
root.warning('buffer full waiting to put')
self.lock.wait(timeout=5)
if self.evt.is_set():
break
else:
self.list.append(v)
success = True
self.lock.notify_all()
return success
def show_list(self):
return self.list
class Prod(threading.Thread):
'''Puts stuff onto buffer, quits on Event.
Contrived toy - periodically puts random n items in buffer.
'''
def __init__(self,buffer,evt):
super().__init__(name='producer')
self.buffer = buffer
self.evt = evt
def run(self):
n = 0
while not self.evt.is_set():
howmany = random.randint(1,9)
payload = random.sample(string.ascii_letters,howmany)
payload = collections.deque(payload)
root.info(f'{self.name} putting {howmany}')
with self.buffer.lock:
while payload and (not self.evt.is_set()):
c = payload.popleft()
root.info(f'{self.name} -----> {c}')
if not self.buffer.put(c):
root.warning(f'{self.name} last put failed')
self.buffer.lock.notify_all()
time.sleep(.04)
n += 1
root.info(f'{self.name} dying n={n}')
with self.buffer.lock:
self.buffer.lock.notify_all()
root.info(f'{self.name} is done')
class Cons(threading.Thread):
'''Takes stuff off of buffer, quits on Event set.
Contrived toy - periodically takes random n items from buffer.
'''
def __init__(self,buffer,evt):
super().__init__(name='consumer')
self.buffer = buffer
self.evt = evt
def run(self):
n = 0
while not self.evt.is_set():
howmany = random.randint(1,9)
root.info(f'{self.name} taking {howmany}')
with self.buffer.lock:
while (howmany > 0) and (not self.evt.is_set()):
c = self.buffer.take()
root.info(f'{self.name} <----- {c}')
howmany -= 1
self.buffer.lock.notify_all()
time.sleep(.04)
n += 1
root.info(f'{self.name} dying n={n}')
with self.buffer.lock:
self.buffer.lock.notify_all()
root.info(f'{self.name} is done')
if __name__ == '__main__':
# use an Event to shut down the whole process
evt = threading.Event()
buffer = Buffer1(5,evt)
def kill(evt=evt,buffer=buffer):
root.warning('killing everything')
evt.set()
with buffer.lock:
buffer.lock.notify_all()
# don't let this toy example run forever
t = threading.Timer(5,kill)
t.start()
p1 = Prod(buffer,evt)
c1 = Cons(buffer,evt)
c1.start()
p1.start()
p1.join()
c1.join()
print('exit')
Here is another take using asyncio instead of threads to exercise your buffer.
import asyncio
import collections
import random
import string
import time
import logging
import sys
# logging setup
root = logging.getLogger()
root.setLevel(logging.INFO)
formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(message)s',
datefmt='%S')
class WarningFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.WARNING
class InfoFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.INFO
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
handler.setFormatter(formatter)
handler.addFilter(InfoFilter())
root.addHandler(handler)
handler = logging.StreamHandler(sys.stderr)
handler.setLevel(logging.WARNING)
handler.setFormatter(formatter)
handler.addFilter(WarningFilter())
root.addHandler(handler)
class Buffer:
'''FILO buffer.
'''
def __init__(self,size,evt) :
self.content = None
self.size = size
self.stop_evt = evt
self.lock = asyncio.Condition()
self.list = collections.deque()
def full(self):
return len(self.list) >= self.size
def not_full(self):
return len(self.list) < self.size
def empty(self):
return not bool(self.list)
def not_empty(self):
return bool(self.list)
async def take(self) :
async with self.lock:
#root.info(f'take:lock acquired - wait for not empty')
while self.empty():
waiters = [thing for thing in self.lock._waiters]
#root.warning(f'take:{waiters} waiting')
await self.lock.wait()
if self.stop_evt.is_set(): # shutting down
val = None
else:
#root.info('take: not empty')
val = self.list.pop()
self.lock.notify_all()
return val
async def put(self,v):
success = False
async with self.lock:
#root.info(f'put:lock acquired - wait for not full')
while self.full():
waiters = [thing for thing in self.lock._waiters]
#root.warning(f'put:{waiters} waiting')
await self.lock.wait()
if self.stop_evt.is_set(): # shutting down
break
else:
#root.info('put: not full')
self.list.append(v)
success = True
self.lock.notify_all()
return success
def show_list(self):
return self.list
async def random_stuff():
howmany = random.randint(1,9)
payload = random.sample(string.ascii_letters,howmany)
return collections.deque(payload)
async def produce(buffer,stop_evt,name):
puts = []
try:
while True:
payload = await random_stuff()
root.warning(f'producer{name} putting {len(payload)}')
while payload:
c = payload.popleft()
root.info(f'producer{name} -----> {c}')
success = await buffer.put(c)
if not success:
root.warning(f'producer{name} failed to put {c}')
else:
puts.append(c)
await asyncio.sleep(.03)
except asyncio.CancelledError as e:
root.warning('producer canceled')
root.info(f'producer{name} dying n={len(puts)}')
root.info(f'producer{name} is done')
return puts
async def consume(buffer, stop_evt, name):
'''Takes stuff off of buffer, quits on Event set.
Contrived toy - periodically takes random n items from buffer.
'''
takes = []
try:
while True:
howmany = random.randint(1,9)
msg = f'consumer{name} taking {howmany}'
root.warning(f'{msg:>38}')
while howmany > 0:
c = await buffer.take()
takes.append(c)
msg = f'consumer{name} <----- {c}'
root.info(f'{msg:>38}')
howmany -= 1
await asyncio.sleep(.02)
except asyncio.CancelledError as e:
root.warning('consumer canceled')
root.info(f'consumer{name} dying n={len(takes)}')
root.info(f'consumer{name} is done')
return takes
async def timer(n,buffer,evt, tasks):
root.warning('timer started')
await asyncio.sleep(n)
evt.set()
root.warning('timed out - event set')
root.warning('canceling tasks')
for task in tasks:
task.cancel()
async def main():
loop = asyncio.get_running_loop()
loop.set_debug(True)
# use an Event to shut down the whole process
evt = asyncio.Event()
buffer = Buffer(5,evt)
put_task = asyncio.create_task(produce(buffer,evt,1))
take_task = asyncio.create_task(consume(buffer,evt,1))
timer_task = asyncio.create_task(timer(5,buffer,evt,[put_task,take_task]))
root.info('tasks created')
await timer_task
puts = await put_task
takes = await take_task
print('exit')
return puts,takes,buffer.list
if __name__ == '__main__':
puts,takes,remains = asyncio.run(main())
puts = collections.Counter(puts)
takes = collections.Counter(takes)
remains = collections.Counter(remains)
#print(remains == (puts-takes))
I'm beggining with asyncio and I'm struggling awaiting multiple return values from differents functions.
I want to do something like thread but using asyncio since I'm using async librairies from Azure.
I need to launch in my Thread class some function and I want to launch them in conccurency and await for their results and see if the results are good (I'm in a django test).
Here my current code:
class DeviceThread(threading.Thread):
def __init__(self, test_case, device_id, module_id, cert_prim, cert_sec):
threading.Thread.__init__(self)
self.test_case = test_case
self.device_id = device_id
self.module_id = module_id
self.cert_prim = cert_prim
self.cert_sec = cert_sec
async def get_twin(self, device):
try:
# 0 - Get the twin for the device
twin = await device.get_twin()
info(twin)
return twin['desired']['test'] == "test1device"
except Exception as e:
info(format_exc())
return False
async def receive_message_cloud(self, device):
try:
# 1- Waiting for the first message from the server (Blocking call)
message = await device.receive_message_cloud()
return message.data.decode() == "testmessage1"
except Exception as e:
info(format_exc())
return False
async def send_message_cloud(self, device):
try:
# 2 -Sending a message to the backend
await device.send_message_cloud("testmessage1")
return True
except Exception as e:
info(format_exc())
return False
async def receive_direct_call_and_respond(self, device):
try:
# 3 - Receiving a direct call method from the backend
method_request = await device.receive_method_request()
# 4 - Sending result of direct call
payload = {"test": "testok"}
status = 200
await device.send_method_response(method_request, status, payload)
return (method_request.name == "testmethod1") and (method_request.payload == "testpayload1")
except Exception as e:
info(format_exc())
return False
async def update_twin_properties(self, device):
try:
# 5 - Updating twin properties
new_properties = {'test', 'test2device'}
device.patch_twin_reported_properties(new_properties)
return True
except Exception as e:
info(format_exc())
return False
async def perform(self, device):
# Creating the tasks that will execute in parrallel
twin_get_res = asyncio.create_task(self.get_twin(device))
rec_mess_cloud = asyncio.create_task(self.receive_message_cloud(device))
send_mess_cloud = asyncio.create_task(self.send_message_cloud(device))
rec_dir_call = asyncio.create_task(self.receive_direct_call_and_respond(device))
up_twin_prop = asyncio.create_task(self.update_twin_properties(device))
# Verify the execution of the routine when done
self.test_case.assertTrue(await twin_get_res)
self.test_case.assertTrue(await rec_mess_cloud)
self.test_case.assertTrue(await send_mess_cloud)
self.test_case.assertTrue(await rec_dir_call)
self.test_case.assertTrue(await up_twin_prop)
def run(self):
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
# Getting writing the cert content to disk
open("cert.pem", "w").write(self.cert_prim)
# Creating a device
device = IOTHubDevice()
device.authenticate_device(self.device_id, "cert.pem")
# Removing cert previously created
remove("cert.pem")
asyncio.run(self.perform(device))
except Exception as e:
info(format_exc())
self.test_case.assertFalse(True)
As you've seen I'm in a thread and I want to verify some function of Azure IOTHub.
But I get this error :
RuntimeError: Task got Future attached to a different loop
So my question is : how can I get all these tasks running in the loop and get their individual results ?
Thanks for your answer !
I am dealing with the following problem:
I've implemented a dummy 'Thing' class that sleeps for 10 seconds and logs a message ('foo'). This class is instantiated in a worker function for a Processes Pool and the 'foo' method that implements the above mentioned logic is called.
What I want to achieve is a custom signal handling: as long as the processes haven't terminated, if CTRL+C (SIGINT) is sent, each process will log the signal and they will immediately terminate.
Half of the logic is working: while each process is sleeping, on SIGINT, they'll be interrupted and the Pool will be closed.
Problem: if ALL the process end successfully and SIGINT is sent, the message will be logged but the Pool won't be closed.
Code:
import logging
import signal
import os
import time
from multiprocessing import Pool, current_process
logger = logging.getLogger('test')
SIGNAL_NAMES = dict((k, v) for v, k in reversed(sorted(signal.__dict__.items()))
if v.startswith('SIG') and not v.startswith('SIG_'))
class Thing(object):
def __init__(self, my_id):
self.my_id = my_id
self.logger = logging.getLogger(str(my_id))
def foo(self):
time.sleep(10)
self.logger.info('[%s] Foo after 10 secs!', self.my_id)
class Daemon(object):
def __init__(self, no_processes, max_count):
signal.signal(signal.SIGINT, self.stop)
self.done = False
self.count = 0
self.max_count = max_count
self.pool = Pool(no_processes, initializer=self.pool_initializer)
def stop(self, signum, _):
""" Stop function for Daemon """
sig = SIGNAL_NAMES.get(signum) or signum
logger.info('[Daemon] Stopping (received signal %s', sig)
self.done = True
def _generate_ids(self):
""" Generator function of the IDs for the Processes Pool """
while not self.done:
if self.count < self.max_count:
my_id = "ID-{}".format(self.count)
logger.info('[Daemon] Generated ID %s', my_id)
time.sleep(3)
yield my_id
self.count += 1
time.sleep(1)
def run(self):
""" Main daemon run function """
pid = os.getpid()
logger.info('[Daemon] Started running on PID %s', str(pid))
my_ids = self._generate_ids()
for res in self.pool.imap_unordered(run_thing, my_ids):
logger.info("[Daemon] Finished %s", res or '')
logger.info('[Daemon] Closing & waiting processes to terminate')
self.pool.close()
self.pool.join()
def pool_initializer(self):
""" Pool initializer function """
signal.signal(signal.SIGINT, self.worker_signal_handler)
#staticmethod
def worker_signal_handler(signum, _):
""" Signal handler for the Process worker """
sig = SIGNAL_NAMES.get(signum) or signum
cp = current_process()
logger.info("[%s] Received in worker %s signal %s", WORKER_THING_ID or '', str(cp), sig)
global WORKER_EXITING
WORKER_EXITING = True
WORKER_EXITING = False
WORKER_THING_ID = None
def run_thing(arg):
""" Worker function for processes """
if WORKER_EXITING:
return
global WORKER_THING_ID
WORKER_THING_ID = arg
run_exception = None
logger.info('[%s] START Thing foo-ing', arg)
logging.getLogger('Thing-{}'.format(arg)).setLevel(logging.INFO)
try:
thing = Thing(arg)
thing.foo()
except Exception as e:
run_exception = e
finally:
WORKER_THING_ID = None
logger.info('[%s] STOP Thing foo-ing', arg)
if run_exception:
logger.error('[%s] EXCEPTION on Thing foo-ing: %s', arg, run_exception)
return arg
if __name__ == '__main__':
logging.basicConfig()
logger.setLevel(logging.INFO)
daemon = Daemon(4, 3)
daemon.run()
Your problem is logic in function _generate_ids(). The function never ends so pool.imap_unordered() never finishes by itself, only needs to be interrupted by CTRL-C.
Change it for something like this:
def _generate_ids(self):
""" Generator function of the IDs for the Processes Pool """
for i in range(self.max_count):
time.sleep(3)
my_id = "ID-{}".format(self.count)
logger.info('[Daemon] Generated ID %s', my_id)
if self.done:
break
self.count += 1
yield my_id
And the processes end by themselves normally.
I'm trying to find the way to start a new Process and get its output if it takes less than X seconds. If the process takes more time I would like to ignore the Process result, kill the Process and carry on.
I need to basically add the timer to the code below. Now sure if there's a better way to do it, I'm open to a different and better solution.
from multiprocessing import Process, Queue
def f(q):
# Ugly work
q.put(['hello', 'world'])
if __name__ == '__main__':
q = Queue()
p = Process(target=f, args=(q,))
p.start()
print q.get()
p.join()
Thanks!
You may find the following module useful in your case:
Module
#! /usr/bin/env python3
"""Allow functions to be wrapped in a timeout API.
Since code can take a long time to run and may need to terminate before
finishing, this module provides a set_timeout decorator to wrap functions."""
__author__ = 'Stephen "Zero" Chappell ' \
'<stephen.paul.chappell#atlantis-zero.net>'
__date__ = '18 December 2017'
__version__ = 1, 0, 1
__all__ = [
'set_timeout',
'run_with_timeout'
]
import multiprocessing
import sys
import time
DEFAULT_TIMEOUT = 60
def set_timeout(limit=None):
"""Return a wrapper that provides a timeout API for callers."""
if limit is None:
limit = DEFAULT_TIMEOUT
_Timeout.validate_limit(limit)
def wrapper(entry_point):
return _Timeout(entry_point, limit)
return wrapper
def run_with_timeout(limit, polling_interval, entry_point, *args, **kwargs):
"""Execute a callable object and automatically poll for results."""
engine = set_timeout(limit)(entry_point)
engine(*args, **kwargs)
while engine.ready is False:
time.sleep(polling_interval)
return engine.value
def _target(queue, entry_point, *args, **kwargs):
"""Help with multiprocessing calls by being a top-level module function."""
# noinspection PyPep8,PyBroadException
try:
queue.put((True, entry_point(*args, **kwargs)))
except:
queue.put((False, sys.exc_info()[1]))
class _Timeout:
"""_Timeout(entry_point, limit) -> _Timeout instance"""
def __init__(self, entry_point, limit):
"""Initialize the _Timeout instance will all needed attributes."""
self.__entry_point = entry_point
self.__limit = limit
self.__queue = multiprocessing.Queue()
self.__process = multiprocessing.Process()
self.__timeout = time.monotonic()
def __call__(self, *args, **kwargs):
"""Begin execution of the entry point in a separate process."""
self.cancel()
self.__queue = multiprocessing.Queue(1)
self.__process = multiprocessing.Process(
target=_target,
args=(self.__queue, self.__entry_point) + args,
kwargs=kwargs
)
self.__process.daemon = True
self.__process.start()
self.__timeout = time.monotonic() + self.__limit
def cancel(self):
"""Terminate execution if possible."""
if self.__process.is_alive():
self.__process.terminate()
#property
def ready(self):
"""Property letting callers know if a returned value is available."""
if self.__queue.full():
return True
elif not self.__queue.empty():
return True
elif self.__timeout < time.monotonic():
self.cancel()
else:
return False
#property
def value(self):
"""Property that retrieves a returned value if available."""
if self.ready is True:
valid, value = self.__queue.get()
if valid:
return value
raise value
raise TimeoutError('execution timed out before terminating')
#property
def limit(self):
"""Property controlling what the timeout period is in seconds."""
return self.__limit
#limit.setter
def limit(self, value):
self.validate_limit(value)
self.__limit = value
#staticmethod
def validate_limit(value):
"""Verify that the limit's value is not too low."""
if value <= 0:
raise ValueError('limit must be greater than zero')
To use, see the following example that demonstrates its usage:
Example
from time import sleep
def main():
timeout_after_four_seconds = timeout(4)
# create copies of a function that have a timeout
a = timeout_after_four_seconds(do_something)
b = timeout_after_four_seconds(do_something)
c = timeout_after_four_seconds(do_something)
# execute the functions in separate processes
a('Hello', 1)
b('World', 5)
c('Jacob', 3)
# poll the functions to find out what they returned
results = [a, b, c]
polling = set(results)
while polling:
for process, name in zip(results, 'abc'):
if process in polling:
ready = process.ready
if ready is True: # if the function returned
print(name, 'returned', process.value)
polling.remove(process)
elif ready is None: # if the function took too long
print(name, 'reached timeout')
polling.remove(process)
else: # if the function is running
assert ready is False, 'ready must be True, False, or None'
sleep(0.1)
print('Done.')
def do_something(data, work):
sleep(work)
print(data)
return work
if __name__ == '__main__':
main()
Does the process you are running involve a loop?
If so you can get the timestamp prior to starting the loop and include an if statement within the loop with an sys.exit(); command terminating the script if the current timestamp differs from the recorded start time stamp by more than x seconds.
All you need to adapt the queue example from the docs to your case is to pass the timeout to the q.get() call and terminate the process on timeout:
from Queue import Empty
...
try:
print q.get(timeout=timeout)
except Empty: # no value, timeout occured
p.terminate()
q = None # the queue might be corrupted after the `terminate()` call
p.join()
Using a Pipe might be more lightweight otherwise the code is the same (you could use .poll(timeout), to find out whether there is a data to receive).