Threaded Buffer crashes - python

I tried to implement a 5 element buffer through threading and a list.
import threading
class Buffer1 :
def __init__(self,size) :
self.empty = True
self.content = None
self.lock = threading.Condition()
self.list = []
def take(self) :
with self.lock :
while not self.list :
self.lock.wait()
help = self.list[len(self.list)-1]
del self.list[len(self.list)-1]
self.lock.notify_all()
return help
def put(self,v) :
with self.lock :
while len(self.list) >4:
self.lock.wait()
#self.content = v
self.list.append(v)
self.empty = False
self.lock.notify_all()
def show_list(self):
return self.list
a = Buffer1(5)
a.put(7)
Theoretically it works, but when you exceed the limitations of the buffer, either by buffering 6 values or by trying to „take()“ when there is no value buffered, the IDE becomes unresponsive. How could I go about fixing this?

You are using only one thread to add elements in the buffer, so the list contains 5 items, and your main thread is waiting indefinitely on self.lock.wait(). You can use another thread that would take some elements in parallel and then it will notify the producer thread.
For example, creation of a consumer thread that takes 5 items:
def consume(buffer):
import time
for i in range(5):
print(threading.current_thread(), "consume", buffer.take())
time.sleep(2)
print(threading.current_thread(), "Bye")
buffer = Buffer1(5)
t = threading.Thread(target=consume, args=(buffer,), name="consumer")
t.start()
buffer.put(1)
buffer.put(2)
buffer.put(3)
buffer.put(4)
buffer.put(5)
buffer.put(6)
print(buffer.show_list())

... the IDE becomes unresponsive. How could I go about fixing this?
You only showed adding to the buffer from the main thread and nothing ever takes anything out.
If the buffer gets filled up or becomes empty the next put/take will cause its Condition (lock) to wait until something notifies it to continue. I didn't see any of that signaling in your example.
The buffer is a shared resource. The buffer and the threads that use it need to have good control so that everyone can stay out of everyone else's way and enough logic to keep from getting stuck somewhere.
Presumably you need a thread that puts stuff into the buffer and a thread that takes stuff out of the buffer - both having ample signaling to notify everyone when they are done messing with the buffer.
Set up logging so that the program execution could be traced with log messages.
Buffer1 changes:
Changed the list to a collections.deque to simplify things a bit.
Added properties for empty and full
Added an Event attribute to stop putting/taking when the process gets shut down.
Added a timeout while waiting to put/take to forestall any timing issues when the threads are shut down
Added notifications for empty and full conditions.
Made two threads: one to add to the buffer and one to take from the buffer.
Each will add/take while its Event is not set.
In each iteration a random number of items are taken or added.
When taking/putting the buffer's Condition is acquired and all Waiter's are notified when complete.
In the main thread:
An Event is created - for signaling the thread to quit
A Timer is created to limit thread execution time - when it times out its callback function sets the Event and uses the buffer's Condition (lock) to notify anyone that is waiting and free them up.
The threads are created, started, and joined.
import threading
import collections
import random
import string
import time
import logging
import sys
# logging setup
root = logging.getLogger()
root.setLevel(logging.INFO)
formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(message)s',
datefmt='%S')
class WarningFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.WARNING
class InfoFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.INFO
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
handler.setFormatter(formatter)
handler.addFilter(InfoFilter())
root.addHandler(handler)
handler = logging.StreamHandler(sys.stderr)
handler.setLevel(logging.WARNING)
handler.setFormatter(formatter)
handler.addFilter(WarningFilter())
root.addHandler(handler)
# logging setup end
class Buffer1:
'''FILO buffer.
'''
def __init__(self,size,evt) :
self.content = None
self.size = size
self.evt = evt
self.lock = threading.Condition()
self.list = collections.deque()
#property
def full(self):
return len(self.list) >= self.size
#property
def empty(self):
return bool(self.list)
def take(self) :
with self.lock :
while not self.empty:
root.warning('buffer empty waiting to take')
self.lock.wait(timeout=5)
if self.evt.is_set():
help = None
break
else:
help = self.list.pop()
self.lock.notify_all()
return help
def put(self,v):
success = False
with self.lock :
while self.full:
root.warning('buffer full waiting to put')
self.lock.wait(timeout=5)
if self.evt.is_set():
break
else:
self.list.append(v)
success = True
self.lock.notify_all()
return success
def show_list(self):
return self.list
class Prod(threading.Thread):
'''Puts stuff onto buffer, quits on Event.
Contrived toy - periodically puts random n items in buffer.
'''
def __init__(self,buffer,evt):
super().__init__(name='producer')
self.buffer = buffer
self.evt = evt
def run(self):
n = 0
while not self.evt.is_set():
howmany = random.randint(1,9)
payload = random.sample(string.ascii_letters,howmany)
payload = collections.deque(payload)
root.info(f'{self.name} putting {howmany}')
with self.buffer.lock:
while payload and (not self.evt.is_set()):
c = payload.popleft()
root.info(f'{self.name} -----> {c}')
if not self.buffer.put(c):
root.warning(f'{self.name} last put failed')
self.buffer.lock.notify_all()
time.sleep(.04)
n += 1
root.info(f'{self.name} dying n={n}')
with self.buffer.lock:
self.buffer.lock.notify_all()
root.info(f'{self.name} is done')
class Cons(threading.Thread):
'''Takes stuff off of buffer, quits on Event set.
Contrived toy - periodically takes random n items from buffer.
'''
def __init__(self,buffer,evt):
super().__init__(name='consumer')
self.buffer = buffer
self.evt = evt
def run(self):
n = 0
while not self.evt.is_set():
howmany = random.randint(1,9)
root.info(f'{self.name} taking {howmany}')
with self.buffer.lock:
while (howmany > 0) and (not self.evt.is_set()):
c = self.buffer.take()
root.info(f'{self.name} <----- {c}')
howmany -= 1
self.buffer.lock.notify_all()
time.sleep(.04)
n += 1
root.info(f'{self.name} dying n={n}')
with self.buffer.lock:
self.buffer.lock.notify_all()
root.info(f'{self.name} is done')
if __name__ == '__main__':
# use an Event to shut down the whole process
evt = threading.Event()
buffer = Buffer1(5,evt)
def kill(evt=evt,buffer=buffer):
root.warning('killing everything')
evt.set()
with buffer.lock:
buffer.lock.notify_all()
# don't let this toy example run forever
t = threading.Timer(5,kill)
t.start()
p1 = Prod(buffer,evt)
c1 = Cons(buffer,evt)
c1.start()
p1.start()
p1.join()
c1.join()
print('exit')

Here is another take using asyncio instead of threads to exercise your buffer.
import asyncio
import collections
import random
import string
import time
import logging
import sys
# logging setup
root = logging.getLogger()
root.setLevel(logging.INFO)
formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(message)s',
datefmt='%S')
class WarningFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.WARNING
class InfoFilter(logging.Filter):
def filter(self, record):
return record.levelno == logging.INFO
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
handler.setFormatter(formatter)
handler.addFilter(InfoFilter())
root.addHandler(handler)
handler = logging.StreamHandler(sys.stderr)
handler.setLevel(logging.WARNING)
handler.setFormatter(formatter)
handler.addFilter(WarningFilter())
root.addHandler(handler)
class Buffer:
'''FILO buffer.
'''
def __init__(self,size,evt) :
self.content = None
self.size = size
self.stop_evt = evt
self.lock = asyncio.Condition()
self.list = collections.deque()
def full(self):
return len(self.list) >= self.size
def not_full(self):
return len(self.list) < self.size
def empty(self):
return not bool(self.list)
def not_empty(self):
return bool(self.list)
async def take(self) :
async with self.lock:
#root.info(f'take:lock acquired - wait for not empty')
while self.empty():
waiters = [thing for thing in self.lock._waiters]
#root.warning(f'take:{waiters} waiting')
await self.lock.wait()
if self.stop_evt.is_set(): # shutting down
val = None
else:
#root.info('take: not empty')
val = self.list.pop()
self.lock.notify_all()
return val
async def put(self,v):
success = False
async with self.lock:
#root.info(f'put:lock acquired - wait for not full')
while self.full():
waiters = [thing for thing in self.lock._waiters]
#root.warning(f'put:{waiters} waiting')
await self.lock.wait()
if self.stop_evt.is_set(): # shutting down
break
else:
#root.info('put: not full')
self.list.append(v)
success = True
self.lock.notify_all()
return success
def show_list(self):
return self.list
async def random_stuff():
howmany = random.randint(1,9)
payload = random.sample(string.ascii_letters,howmany)
return collections.deque(payload)
async def produce(buffer,stop_evt,name):
puts = []
try:
while True:
payload = await random_stuff()
root.warning(f'producer{name} putting {len(payload)}')
while payload:
c = payload.popleft()
root.info(f'producer{name} -----> {c}')
success = await buffer.put(c)
if not success:
root.warning(f'producer{name} failed to put {c}')
else:
puts.append(c)
await asyncio.sleep(.03)
except asyncio.CancelledError as e:
root.warning('producer canceled')
root.info(f'producer{name} dying n={len(puts)}')
root.info(f'producer{name} is done')
return puts
async def consume(buffer, stop_evt, name):
'''Takes stuff off of buffer, quits on Event set.
Contrived toy - periodically takes random n items from buffer.
'''
takes = []
try:
while True:
howmany = random.randint(1,9)
msg = f'consumer{name} taking {howmany}'
root.warning(f'{msg:>38}')
while howmany > 0:
c = await buffer.take()
takes.append(c)
msg = f'consumer{name} <----- {c}'
root.info(f'{msg:>38}')
howmany -= 1
await asyncio.sleep(.02)
except asyncio.CancelledError as e:
root.warning('consumer canceled')
root.info(f'consumer{name} dying n={len(takes)}')
root.info(f'consumer{name} is done')
return takes
async def timer(n,buffer,evt, tasks):
root.warning('timer started')
await asyncio.sleep(n)
evt.set()
root.warning('timed out - event set')
root.warning('canceling tasks')
for task in tasks:
task.cancel()
async def main():
loop = asyncio.get_running_loop()
loop.set_debug(True)
# use an Event to shut down the whole process
evt = asyncio.Event()
buffer = Buffer(5,evt)
put_task = asyncio.create_task(produce(buffer,evt,1))
take_task = asyncio.create_task(consume(buffer,evt,1))
timer_task = asyncio.create_task(timer(5,buffer,evt,[put_task,take_task]))
root.info('tasks created')
await timer_task
puts = await put_task
takes = await take_task
print('exit')
return puts,takes,buffer.list
if __name__ == '__main__':
puts,takes,remains = asyncio.run(main())
puts = collections.Counter(puts)
takes = collections.Counter(takes)
remains = collections.Counter(remains)
#print(remains == (puts-takes))

Related

What is the best practice of 'restarting' a thread? [duplicate]

How can I start and stop a thread with my poor thread class?
It is in loop, and I want to restart it again at the beginning of the code. How can I do start-stop-restart-stop-restart?
My class:
import threading
class Concur(threading.Thread):
def __init__(self):
self.stopped = False
threading.Thread.__init__(self)
def run(self):
i = 0
while not self.stopped:
time.sleep(1)
i = i + 1
In the main code, I want:
inst = Concur()
while conditon:
inst.start()
# After some operation
inst.stop()
# Some other operation
You can't actually stop and then restart a thread since you can't call its start() method again after its run() method has terminated. However you can make one pause and then later resume its execution by using a threading.Condition variable to avoid concurrency problems when checking or changing its running state.
threading.Condition objects have an associated threading.Lock object and methods to wait for it to be released and will notify any waiting threads when that occurs. Here's an example derived from the code in your question which shows this being done. In the example code I've made the Condition variable a part of Thread subclass instances to better encapsulate the implementation and avoid needing to introduce additional global variables:
from __future__ import print_function
import threading
import time
class Concur(threading.Thread):
def __init__(self):
super(Concur, self).__init__()
self.iterations = 0
self.daemon = True # Allow main to exit even if still running.
self.paused = True # Start out paused.
self.state = threading.Condition()
def run(self):
self.resume()
while True:
with self.state:
if self.paused:
self.state.wait() # Block execution until notified.
# Do stuff...
time.sleep(.1)
self.iterations += 1
def pause(self):
with self.state:
self.paused = True # Block self.
def resume(self):
with self.state:
self.paused = False
self.state.notify() # Unblock self if waiting.
class Stopwatch(object):
""" Simple class to measure elapsed times. """
def start(self):
""" Establish reference point for elapsed time measurements. """
self.start_time = time.time()
return self
#property
def elapsed_time(self):
""" Seconds since started. """
try:
return time.time() - self.start_time
except AttributeError: # Wasn't explicitly started.
self.start_time = time.time()
return 0
MAX_RUN_TIME = 5 # Seconds.
concur = Concur()
stopwatch = Stopwatch()
print('Running for {} seconds...'.format(MAX_RUN_TIME))
concur.start()
while stopwatch.elapsed_time < MAX_RUN_TIME:
concur.resume()
# Can also do other concurrent operations here...
concur.pause()
# Do some other stuff...
# Show Concur thread executed.
print('concur.iterations: {}'.format(concur.iterations))
This is David Heffernan's idea fleshed-out. The example below runs for 1 second, then stops for 1 second, then runs for 1 second, and so on.
import time
import threading
import datetime as DT
import logging
logger = logging.getLogger(__name__)
def worker(cond):
i = 0
while True:
with cond:
cond.wait()
logger.info(i)
time.sleep(0.01)
i += 1
logging.basicConfig(level=logging.DEBUG,
format='[%(asctime)s %(threadName)s] %(message)s',
datefmt='%H:%M:%S')
cond = threading.Condition()
t = threading.Thread(target=worker, args=(cond, ))
t.daemon = True
t.start()
start = DT.datetime.now()
while True:
now = DT.datetime.now()
if (now-start).total_seconds() > 60: break
if now.second % 2:
with cond:
cond.notify()
The implementation of stop() would look like this:
def stop(self):
self.stopped = True
If you want to restart, then you can just create a new instance and start that.
while conditon:
inst = Concur()
inst.start()
#after some operation
inst.stop()
#some other operation
The documentation for Thread makes it clear that the start() method can only be called once for each instance of the class.
If you want to pause and resume a thread, then you'll need to use a condition variable.

Python custom signal handling in processes pool

I am dealing with the following problem:
I've implemented a dummy 'Thing' class that sleeps for 10 seconds and logs a message ('foo'). This class is instantiated in a worker function for a Processes Pool and the 'foo' method that implements the above mentioned logic is called.
What I want to achieve is a custom signal handling: as long as the processes haven't terminated, if CTRL+C (SIGINT) is sent, each process will log the signal and they will immediately terminate.
Half of the logic is working: while each process is sleeping, on SIGINT, they'll be interrupted and the Pool will be closed.
Problem: if ALL the process end successfully and SIGINT is sent, the message will be logged but the Pool won't be closed.
Code:
import logging
import signal
import os
import time
from multiprocessing import Pool, current_process
logger = logging.getLogger('test')
SIGNAL_NAMES = dict((k, v) for v, k in reversed(sorted(signal.__dict__.items()))
if v.startswith('SIG') and not v.startswith('SIG_'))
class Thing(object):
def __init__(self, my_id):
self.my_id = my_id
self.logger = logging.getLogger(str(my_id))
def foo(self):
time.sleep(10)
self.logger.info('[%s] Foo after 10 secs!', self.my_id)
class Daemon(object):
def __init__(self, no_processes, max_count):
signal.signal(signal.SIGINT, self.stop)
self.done = False
self.count = 0
self.max_count = max_count
self.pool = Pool(no_processes, initializer=self.pool_initializer)
def stop(self, signum, _):
""" Stop function for Daemon """
sig = SIGNAL_NAMES.get(signum) or signum
logger.info('[Daemon] Stopping (received signal %s', sig)
self.done = True
def _generate_ids(self):
""" Generator function of the IDs for the Processes Pool """
while not self.done:
if self.count < self.max_count:
my_id = "ID-{}".format(self.count)
logger.info('[Daemon] Generated ID %s', my_id)
time.sleep(3)
yield my_id
self.count += 1
time.sleep(1)
def run(self):
""" Main daemon run function """
pid = os.getpid()
logger.info('[Daemon] Started running on PID %s', str(pid))
my_ids = self._generate_ids()
for res in self.pool.imap_unordered(run_thing, my_ids):
logger.info("[Daemon] Finished %s", res or '')
logger.info('[Daemon] Closing & waiting processes to terminate')
self.pool.close()
self.pool.join()
def pool_initializer(self):
""" Pool initializer function """
signal.signal(signal.SIGINT, self.worker_signal_handler)
#staticmethod
def worker_signal_handler(signum, _):
""" Signal handler for the Process worker """
sig = SIGNAL_NAMES.get(signum) or signum
cp = current_process()
logger.info("[%s] Received in worker %s signal %s", WORKER_THING_ID or '', str(cp), sig)
global WORKER_EXITING
WORKER_EXITING = True
WORKER_EXITING = False
WORKER_THING_ID = None
def run_thing(arg):
""" Worker function for processes """
if WORKER_EXITING:
return
global WORKER_THING_ID
WORKER_THING_ID = arg
run_exception = None
logger.info('[%s] START Thing foo-ing', arg)
logging.getLogger('Thing-{}'.format(arg)).setLevel(logging.INFO)
try:
thing = Thing(arg)
thing.foo()
except Exception as e:
run_exception = e
finally:
WORKER_THING_ID = None
logger.info('[%s] STOP Thing foo-ing', arg)
if run_exception:
logger.error('[%s] EXCEPTION on Thing foo-ing: %s', arg, run_exception)
return arg
if __name__ == '__main__':
logging.basicConfig()
logger.setLevel(logging.INFO)
daemon = Daemon(4, 3)
daemon.run()
Your problem is logic in function _generate_ids(). The function never ends so pool.imap_unordered() never finishes by itself, only needs to be interrupted by CTRL-C.
Change it for something like this:
def _generate_ids(self):
""" Generator function of the IDs for the Processes Pool """
for i in range(self.max_count):
time.sleep(3)
my_id = "ID-{}".format(self.count)
logger.info('[Daemon] Generated ID %s', my_id)
if self.done:
break
self.count += 1
yield my_id
And the processes end by themselves normally.

Creating custom Futures objects in Python

I have a simple set of objects for managing a background process using the Actor model. In this case I'm concerned with only a single actor. However, it is important that the actor maintains a persistent state between receiving messages.
The objects work by appending messages to a queue in the main thread. Then the main thread can execute as it pleases. Every once in awhile it checks to see if anything new is on the results queue. When this happens it knows the actor has completed the task.
I want to know if this be implemented in a cleaner way using Futures objects. My current implementation is as follows:
import multiprocessing
import time
import collections
class Client(object):
"""
Object used in the main thread to communicate with background actors
"""
def __init__(client):
client.manager = None
client.start()
def __del__(client):
if client.manager and client.manager.is_alive():
client.get(StopIteration)
def start(client):
client.task_queue = multiprocessing.JoinableQueue()
client.result_queue = multiprocessing.Queue()
client.result_history = collections.deque(maxlen=1000)
client.manager = Manager(client.task_queue, client.result_queue)
client.manager.start()
def post(client, payload):
client.task_queue.put(payload)
def get(client, payload):
# Exhaust any existing results
list(client.results())
# Post the command
client.post(payload)
# Wait for a response
result = client.wait_for_result()
return result
def wait_for_result(client):
wait = 0
while True:
for result in client.results():
return result
time.sleep(wait)
wait = max(1, wait + .01)
def results(client):
""" Look at results put on the result_queue """
while not client.result_queue.empty():
item = client.result_queue.get()
client.result_history.append(item)
yield item
class Manager(multiprocessing.Process):
"""
Manager manages a single actor.
A manager sends messages an actor and appends a response when it is done.
"""
def __init__(self, task_queue, result_queue):
super(Manager, self).__init__()
self.task_queue = task_queue
self.result_queue = result_queue
def run(self):
""" main loop """
terminate = False
# Create Actor in separate process and send messages to it
actor = Actor()
while not terminate:
message = self.task_queue.get()
print('Sending message={} to actor'.format(message))
try:
if message is StopIteration:
content = 'shutdown'
terminate = True
else:
content = actor.handle(message)
except Exception as ex:
print('Error handling message')
status = 'error'
content = repr(ex)
else:
status = 'success'
print('Actor finished handling message={}'.format(message))
# Send back result
response = {
'status': status,
'content': content
}
self.task_queue.task_done()
self.result_queue.put(response)
print('Manager is shutting down')
class Actor(object):
"""
An actor is given messages from its manager and performs actions in a
single thread. Its state is private and threadsafe.
"""
def __init__(actor):
actor.state = {}
def handle(actor, message):
if not isinstance(message, dict):
raise ValueError('Commands must be passed in a message dict')
message = message.copy()
action = message.pop('action', None)
if action is None:
raise ValueError('message must have an action item')
if action == 'hello world':
content = 'hello world'
return content
elif action == 'debug':
return actor
elif action == 'start':
actor.state['a'] = 3
return 'started'
elif action == 'add':
for i in range(10000000):
actor.state['a'] += 1
return 'added', actor.state['a']
else:
raise ValueError('Unknown action=%r' % (action,))
def test():
print('Starting Test')
client = Client()
print('About to send messages')
# Get sends a message and then blocks until the response is returned.
print(client.get({'action': 'hello world'}))
print(client.get({'action': 'start'}))
print(client.get({'action': 'add'}))
print('Test completed')
if __name__ == '__main__':
test()
I would like to modify this code to use Future objects. Whenever the client is about to send a message, is it possible to create a Future object, then send that over the multiprocessing queue? Then the manager could execute the actors function and then modify the state of the Future object instead of appending a result to the result_queue.
This seems like it would offer a cleaner way to associate results with messages sent to the actor. It would also remove the need for the get and results methods I have in the first example.
Intuitively, I want it to look something like this:
from concurrent import futures
import multiprocessing
class Client(object):
"""
Object used in the main thread to communicate with background actors
"""
def __init__(client):
client.manager = None
client.start()
def __del__(client):
if client.manager and client.manager.is_alive():
f = client.post(StopIteration)
def start(client):
client.task_queue = multiprocessing.JoinableQueue()
client.manager = Manager(client.task_queue)
client.manager.start()
def post(client, payload):
f = futures.Future()
client.task_queue.put((f, payload))
return f
class Manager(multiprocessing.Process):
"""
Manager manages a single actor.
"""
def __init__(self, task_queue):
super(Manager, self).__init__()
self.task_queue = task_queue
def run(self):
""" main loop """
terminate = False
# Create Actor in separate process and send messages to it
actor = Actor()
while not terminate:
f, message = self.task_queue.get()
f.set_running_or_notify_cancel()
print('Sending message={} to actor'.format(message))
try:
if message is StopIteration:
content = 'shutdown'
terminate = True
else:
content = actor.handle(message)
except Exception as ex:
print('Error handling message')
status = 'error'
content = repr(ex)
else:
status = 'success'
print('Actor finished handling message={}'.format(message))
# Send back result
response = {
'status': status,
'content': content
}
self.task_queue.task_done()
f.set_result(response)
print('Manager is shutting down')
class Actor(object):
"""
An actor is given messages from its manager and performs actions in a
single thread. Its state is private and threadsafe.
"""
def __init__(actor):
actor.state = {}
def handle(actor, message):
if not isinstance(message, dict):
raise ValueError('Commands must be passed in a message dict')
message = message.copy()
action = message.pop('action', None)
if action is None:
raise ValueError('message must have an action item')
if action == 'hello world':
content = 'hello world'
return content
elif action == 'debug':
return actor
elif action == 'start':
actor.state['a'] = 3
return 'started'
elif action == 'add':
for i in range(10000000):
actor.state['a'] += 1
return 'added', actor.state['a']
else:
raise ValueError('Unknown action=%r' % (action,))
def test():
print('Starting Test')
client = Client()
print('About to send messages')
f1 = client.post({'action': 'hello world'})
print(f1.result())
f2 = client.post({'action': 'start'})
print(f2.result())
f3 = client.post({'action': 'add'})
print(f3.result())
print('Test completed')
if __name__ == '__main__':
test()
However, this obviously doesn't execute correctly. I believe I need some sort of process pool manager to create the futures for me (because I'm calling methods that are documented saying that only the pool manager should call them). But I'm not quite sure how to go about doing that. I've used futures before to map singleton worker functions, but I've never managed an external process with state before.
Can someone help me out with this? Perhaps there is an even easier way to go about implementing this with Futures?
So, I went ahead and just made a library to do this:
https://github.com/Erotemic/futures_actors

Why doesn't SIGVTALRM trigger inside time.sleep()?

I'm trying to use SIGVTALRM to snapshot profile my Python code, but it doesn't seem to be firing inside blocking operations like time.sleep() and socket operations.
Why is that? And is there any way to address that, so I can collect samples while I'm inside blocking operations?
I've also tried using ITIMER_PROF/SIGPROF and ITIMER_REAL/SIGALRM and both seem to produce similar results.
The code I'm testing with follows, and the output is something like:
$ python profiler-test.py
<module>(__main__:1);test_sampling_profiler(__main__:53): 1
<module>(__main__:1);test_sampling_profiler(__main__:53);busyloop(__main__:48): 1509
Note that the timesleep function isn't shown at all.
Test code:
import time
import signal
import collections
class SamplingProfiler(object):
def __init__(self, interval=0.001, logger=None):
self.interval = interval
self.running = False
self.counter = collections.Counter()
def _sample(self, signum, frame):
if not self.running:
return
stack = []
while frame is not None:
formatted_frame = "%s(%s:%s)" %(
frame.f_code.co_name,
frame.f_globals.get('__name__'),
frame.f_code.co_firstlineno,
)
stack.append(formatted_frame)
frame = frame.f_back
formatted_stack = ';'.join(reversed(stack))
self.counter[formatted_stack] += 1
signal.setitimer(signal.ITIMER_VIRTUAL, self.interval, 0)
def start(self):
if self.running:
return
signal.signal(signal.SIGVTALRM, self._sample)
signal.setitimer(signal.ITIMER_VIRTUAL, self.interval, 0)
self.running = True
def stop(self):
if not self.running:
return
self.running = False
signal.signal(signal.SIGVTALRM, signal.SIG_IGN)
def flush(self):
res = self.counter
self.counter = collections.Counter()
return res
def busyloop():
start = time.time()
while time.time() - start < 5:
pass
def timesleep():
time.sleep(5)
def test_sampling_profiler():
p = SamplingProfiler()
p.start()
busyloop()
timesleep()
p.stop()
print "\n".join("%s: %s" %x for x in sorted(p.flush().items()))
if __name__ == "__main__":
test_sampling_profiler()
Not sure about why time.sleep works that way (could it be using SIGALRM for itself to know when to resume?) but Popen.wait does not block signals so worst case you can call out to OS sleep.
Another approach is to use a separate thread to trigger the sampling:
import sys
import threading
import time
import collections
class SamplingProfiler(object):
def __init__(self, interval=0.001):
self.interval = interval
self.running = False
self.counter = collections.Counter()
self.thread = threading.Thread(target=self._sample)
def _sample(self):
while self.running:
next_wakeup_time = time.time() + self.interval
for thread_id, frame in sys._current_frames().items():
if thread_id == self.thread.ident:
continue
stack = []
while frame is not None:
formatted_frame = "%s(%s:%s)" % (
frame.f_code.co_name,
frame.f_globals.get('__name__'),
frame.f_code.co_firstlineno,
)
stack.append(formatted_frame)
frame = frame.f_back
formatted_stack = ';'.join(reversed(stack))
self.counter[formatted_stack] += 1
sleep_time = next_wakeup_time - time.time()
if sleep_time > 0:
time.sleep(sleep_time)
def start(self):
if self.running:
return
self.running = True
self.thread.start()
def stop(self):
if not self.running:
return
self.running = False
def flush(self):
res = self.counter
self.counter = collections.Counter()
return res
def busyloop():
start = time.time()
while time.time() - start < 5:
pass
def timesleep():
time.sleep(5)
def test_sampling_profiler():
p = SamplingProfiler()
p.start()
busyloop()
timesleep()
p.stop()
print "\n".join("%s: %s" %x for x in sorted(p.flush().items()))
if __name__ == "__main__":
test_sampling_profiler()
When doing it this way the result is:
$ python profiler-test.py
<module>(__main__:1);test_sampling_profiler(__main__:62);busyloop(__main__:54): 2875
<module>(__main__:1);test_sampling_profiler(__main__:62);start(__main__:37);start(threading:717);wait(threading:597);wait(threading:309): 1
<module>(__main__:1);test_sampling_profiler(__main__:62);timesleep(__main__:59): 4280
Still not totally fair, but better than no samples at all during sleep.
The absence of SIGVTALRM during a sleep() doesn't surprise me, since ITIMER_VIRTUAL "runs only when the process is executing."
(As an aside, CPython on non-Windows platforms implements time.sleep() in terms of select().)
With a plain SIGALRM, however, I expect a signal interruption and indeed I observe one:
<module>(__main__:1);test_sampling_profiler(__main__:62);busyloop(__main__:54): 4914
<module>(__main__:1);test_sampling_profiler(__main__:62);timesleep(__main__:59): 1
I changed the code somewhat, but you get the idea:
class SamplingProfiler(object):
TimerSigs = {
signal.ITIMER_PROF : signal.SIGPROF,
signal.ITIMER_REAL : signal.SIGALRM,
signal.ITIMER_VIRTUAL : signal.SIGVTALRM,
}
def __init__(self, interval=0.001, timer = signal.ITIMER_REAL): # CHANGE
self.interval = interval
self.running = False
self.counter = collections.Counter()
self.timer = timer # CHANGE
self.signal = self.TimerSigs[timer] # CHANGE
....

How to start and stop a thread

How can I start and stop a thread with my poor thread class?
It is in loop, and I want to restart it again at the beginning of the code. How can I do start-stop-restart-stop-restart?
My class:
import threading
class Concur(threading.Thread):
def __init__(self):
self.stopped = False
threading.Thread.__init__(self)
def run(self):
i = 0
while not self.stopped:
time.sleep(1)
i = i + 1
In the main code, I want:
inst = Concur()
while conditon:
inst.start()
# After some operation
inst.stop()
# Some other operation
You can't actually stop and then restart a thread since you can't call its start() method again after its run() method has terminated. However you can make one pause and then later resume its execution by using a threading.Condition variable to avoid concurrency problems when checking or changing its running state.
threading.Condition objects have an associated threading.Lock object and methods to wait for it to be released and will notify any waiting threads when that occurs. Here's an example derived from the code in your question which shows this being done. In the example code I've made the Condition variable a part of Thread subclass instances to better encapsulate the implementation and avoid needing to introduce additional global variables:
from __future__ import print_function
import threading
import time
class Concur(threading.Thread):
def __init__(self):
super(Concur, self).__init__()
self.iterations = 0
self.daemon = True # Allow main to exit even if still running.
self.paused = True # Start out paused.
self.state = threading.Condition()
def run(self):
self.resume()
while True:
with self.state:
if self.paused:
self.state.wait() # Block execution until notified.
# Do stuff...
time.sleep(.1)
self.iterations += 1
def pause(self):
with self.state:
self.paused = True # Block self.
def resume(self):
with self.state:
self.paused = False
self.state.notify() # Unblock self if waiting.
class Stopwatch(object):
""" Simple class to measure elapsed times. """
def start(self):
""" Establish reference point for elapsed time measurements. """
self.start_time = time.time()
return self
#property
def elapsed_time(self):
""" Seconds since started. """
try:
return time.time() - self.start_time
except AttributeError: # Wasn't explicitly started.
self.start_time = time.time()
return 0
MAX_RUN_TIME = 5 # Seconds.
concur = Concur()
stopwatch = Stopwatch()
print('Running for {} seconds...'.format(MAX_RUN_TIME))
concur.start()
while stopwatch.elapsed_time < MAX_RUN_TIME:
concur.resume()
# Can also do other concurrent operations here...
concur.pause()
# Do some other stuff...
# Show Concur thread executed.
print('concur.iterations: {}'.format(concur.iterations))
This is David Heffernan's idea fleshed-out. The example below runs for 1 second, then stops for 1 second, then runs for 1 second, and so on.
import time
import threading
import datetime as DT
import logging
logger = logging.getLogger(__name__)
def worker(cond):
i = 0
while True:
with cond:
cond.wait()
logger.info(i)
time.sleep(0.01)
i += 1
logging.basicConfig(level=logging.DEBUG,
format='[%(asctime)s %(threadName)s] %(message)s',
datefmt='%H:%M:%S')
cond = threading.Condition()
t = threading.Thread(target=worker, args=(cond, ))
t.daemon = True
t.start()
start = DT.datetime.now()
while True:
now = DT.datetime.now()
if (now-start).total_seconds() > 60: break
if now.second % 2:
with cond:
cond.notify()
The implementation of stop() would look like this:
def stop(self):
self.stopped = True
If you want to restart, then you can just create a new instance and start that.
while conditon:
inst = Concur()
inst.start()
#after some operation
inst.stop()
#some other operation
The documentation for Thread makes it clear that the start() method can only be called once for each instance of the class.
If you want to pause and resume a thread, then you'll need to use a condition variable.

Categories

Resources