Python threading: monitoring threads and executing additional code after threads complete - python

I have the following code:
class MyThread(Thread):
def __init__(self, command, template, env, build_flavor, logger):
Thread.__init__(self)
self.command = command
self.template = template
self.env = env
self.build_flavor = build_flavor
self.logger = logger
def run(self):
self.logger.info('Running (%s)...this may take several minutes. Please be patient' % self.build_flavor)
run_command(self.command, self.template, self.env)
self.logger.info('Complete (%s)' % self.build_flavor)
return
And then in another class, when I create the actual threads:
if self.build_type == 'default':
threads = []
for t in self.template:
modify_template(t)
build_flavor = self.getmatch(t)
thread = MyThread(packer, t, self.new_env, build_flavor, self.logger)
thread.setName(build_flavor)
thread.start()
threads.append(thread)
for thread in threads:
thread.join()
vmware_create()
openstack_create()
Unfortunately, after the threads are .join()'d, I'm calling vmware_create() and openstack_create() in serial. I'd like to be able to execute each of those after their respective threads complete so that I'm not waiting for both threads to finish before starting one of the *_create() functions...and then waiting for the first to complete before executing the 2nd
i.e. right now vmware_create() will execute only after BOTH threads are finished, and once vmware_create() is done, only then will openstack_create() begin. I'd like to be able to wait for the respective threads to complete, and then execute the _create() function for whatever thread completed first, all the while waiting for the 2nd thread to finish and then once that's done, immediately executing its _create() function for true parallelization.
I haven't been able to figure out how to do this and need a lil help.

Functions are objects. Just hand them to the thread:
class MyThread(Thread):
def __init__(self, command, template, env, build_flavor, logger, func=None):
Thread.__init__(self)
self.command = command
self.template = template
self.env = env
self.build_flavor = build_flavor
self.logger = logger
self.func = func
def run(self):
self.logger.info('Running (%s)...this may take several minutes. Please be patient' % self.build_flavor)
run_command(self.command, self.template, self.env)
self.logger.info('Complete (%s)' % self.build_flavor)
# call func if it is there
if self.func:
self.func()
return
Now, I supply the first two threads with function to call:
if self.build_type == 'default':
threads = []
funcs = {0: vmware_create, 1: openstack_create}
for i, t in enumerate(self.template):
modify_template(t)
build_flavor = self.getmatch(t)
func = funcs.get(i, None)
thread = MyThread(packer, t, self.new_env, build_flavor,
self.logger, func=func)
thread.setName(build_flavor)
thread.start()
threads.append(thread)
for thread in threads:
thread.join()
Of course, you can add them to any other threads.

Related

MultiThreading with Python

This is a Producer Consumer Problem. I need a single producer and multiple consumers to access the shared data cell and each consumer needs to access the produced data before the producer makes additional data. The code works fine when there is a single consumer. I have attempted to make a list of the Producer and Consumers in order to .join() and .start() them. The program works so far as the first consumer, but hangs up when it gets to the second consumer. I have tried to change the locking mechanisms from "notify" to "notifyAll" in the getData and setData, I am a beginner in python and this stuff is pretty foreign to me but I have been trying stuff for 10 hours and would really appreciate some help.
import time, random
from threading import Thread, currentThread, Condition
class SharedCell(object):
def __init__(self):
self.data = -1
self.writeable = True
self.condition = Condition()
def setData(self, data):
self.condition.acquire()
while not self.writeable:
self.condition.wait()
print("%s setting data to %d" % \
(currentThread().getName(), data))
self.data = data
self.writeable = False
self.condition.notifyAll()
self.condition.release()
def getData(self):
self.condition.acquire()
while self.writeable:
self.condition.wait()
print(f'accessing data {currentThread().getName()} {self.data}')
self.writeable = True
self.condition.notifyAll()
self.condition.release()
return self.data
class Producer(Thread):
def __init__(self, cell, accessCount, sleepMax):
Thread.__init__(self, name = "Producer")
self.accessCount = accessCount
self.cell = cell
self.sleepMax = sleepMax
def run(self):
print("%s starting up" % self.getName())
for count in range(self.accessCount):
time.sleep(random.randint(1, self.sleepMax))
self.cell.setData(count + 1)
print("%s is done producing\n" % self.getName())
class Consumer(Thread):
def __init__(self, cell, accessCount, sleepMax):
Thread.__init__(self)
self.accessCount = accessCount
self.cell = cell
self.sleepMax = sleepMax
def run(self):
print("%s starting up" % self.getName())
for count in range(self.accessCount):
time.sleep(random.randint(1, self.sleepMax))
value = self.cell.getData()
print("%s is done consuming\n" % self.getName())
def main():
accessCount = int(input("Enter the number of accesses: "))
sleepMax = 4
cell = SharedCell()
producer = Producer(cell, accessCount, sleepMax)
consumer = Consumer(cell, accessCount, sleepMax)
consumerTwo = Consumer(cell, accessCount, sleepMax)
threads = []
threads.append(producer)
threads.append(consumer)
threads.append(consumerTwo)
print("Starting the threads")
for thread in threads:
thread.start()
thread.join()
main()
The join function blocks the current thread and waits until the indicated thread terminates. In your loop at the end of your main function, why do you join each thread immediately after starting it? That would result in starting thread 1, and then waiting for it to terminate before starting thread 2, and then waiting that it to terminate before starting thread 3, and so on.
Perhaps you meant something like this:
for thread in threads:
thread.start()
for thread in threads:
thread.join()
so that every thread is started before you wait for them to terminate.

How do you run a process that spawns multiple threads? How do you serialize a thread object?

I'm trying to create multiple processes in which each process takes in a group of threads that it will start. I keep getting this error:
TypeError: cannot pickle '_thread.lock' object
Here is a basic example of what I am trying to achieve:
import time
import threading
import multiprocessing
def threading_func(i):
print(f'Starting Function {i}')
time.sleep(1)
print(f'Ending Function {i}')
def process_func(threads):
for thread in threads:
thread.start()
for thread in threads:
thread.join()
thread_list = [threading.Thread(target=threading_func, args=(i,)) for i in range(1, 9)]
processes = [multiprocessing.Process(target=process_func, args=([thread],)) for thread in thread_list]
for process in processes:
process.start()
for process in processes:
process.join()
I am aware that the arguments passed into a Process instance must have the ability to be serialized. The real question, then, is how can I make a Thread object serialized?
There is no simple way of getting a Thread instance to be serializable. An alternative would be to pass to each process the arguments required for it create the threads in its own address space. But this can be done rather painlessly by creating a class called MyThread that sort of behaves like the Thread class although it is meant to be used with the target argument since it does not have a run method:
import time
import threading
import multiprocessing
class MyThread:
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
self.thread = None
def start(self):
self.thread = threading.Thread(*self.args, **self.kwargs)
self.thread.start()
def join(self):
self.thread.join()
def threading_func(i):
print(f'Starting Function {i}')
time.sleep(1)
print(f'Ending Function {i}')
def process_func(threads):
for thread in threads:
thread.start()
for thread in threads:
thread.join()
# Required for Windows:
if __name__ == '__main__':
thread_list = [MyThread(target=threading_func, args=(i,)) for i in range(1, 9)]
processes = [multiprocessing.Process(target=process_func, args=([thread],)) for thread in thread_list]
for process in processes:
process.start()
for process in processes:
process.join()
A version using a more generic MyThread class
import time
import threading
import multiprocessing
class MyThread:
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
self.thread = None
def start(self):
self.thread = threading.Thread(*self.args, **self.kwargs) if 'target' in self.kwargs else threading.Thread(*self.args, **self.kwargs, target=self.run)
self.thread.start()
def join(self):
self.thread.join()
def run(self, *args, **kwargs):
"""
This method would need to be overridden if this class is not initialized
with the `target` keyword and you wanted to perform something useful
"""
pass
class T(MyThread):
def run(self, i):
print(f'Starting Function {i}')
time.sleep(1)
print(f'Ending Function {i}')
def process_func(threads):
for thread in threads:
thread.start()
for thread in threads:
thread.join()
# Required for Windows:
if __name__ == '__main__':
thread_list = [T(args=(i,)) for i in range(1, 9)]
processes = [multiprocessing.Process(target=process_func, args=([thread],)) for thread in thread_list]
for process in processes:
process.start()
for process in processes:
process.join()

Send data to Python thread, then read a response using Queue

It's quite easy to send or receive data through threads using Queue's module when doing each thing at a time, but I didn't figure out how to send something to a thread, then expect for a return properly.
In the below example, I was expecting to send something to thread in order to be processed, then harvest the result, but the t.queue.get() in the main function receives what what just sent above instead of waiting for the thread to return. How can I get around it?
#!/usr/bin/env python3
from threading import Thread
from queue import Queue
class MyThread(Thread):
queue:Queue
def __init__(self, *args, **kwargs):
super().__init__()
self.queue = Queue()
self.daemon = True
# receives a name, then prints "Hello, name!"
def run(self):
while True:
val = self.queue.get()
if not val:
break
self.queue.put(f'Hello, {val}!')
def main():
t = MyThread()
t.start()
# sends string to thread
t.queue.put('Jurandir')
# expects to receive "Hello, Jurandir!",
# but "Jurandir" is immediately returned
ret = t.queue.get()
print(ret)
if __name__ == '__main__':
main()
Thing is that you are getting the alleged result immediately from the queue, and the worker has still not added the result. You can split into an "input queue" and a "results queue". And then wait in the main thread until there's some output in the queue.
#!/usr/bin/env python3
from threading import Thread, Lock
from queue import Queue
class MyThread(Thread):
def __init__(self, *args, **kwargs):
super().__init__()
self.input_queue = Queue()
self.results_queue = Queue()
self.daemon = True
# receives a name, then prints "Hello, name!"
def run(self):
while True:
val = self.input_queue.get()
if not val:
break
self.results_queue.put(f'Hello, {val}!')
def main():
t = MyThread()
t.start()
# sends string to thread
t.input_queue.put('Jurandir')
ret = t.results_queue.get()
while ret is None:
ret = t.results_queue.get()
print(ret)
if __name__ == '__main__':
main()

Python multiprocessing: Event.wait() blocking other processes

I'm working with a toy multiprocessing problem, and event signalling is not working as expected. The multiprocessing documentation refers detail description of Event() to the multithreading documentation, and the description of the methods there are precisely what I'm trying to do. I want worker processes (subclassed from multiprocessing.Process) spawned by a parent class, then wait for a start signal from the parent class, do their thing, then terminate. What seems to be happening, however, is that the first process, once running, blocks any others. What's going on here, and how do I fix?
class Worker(Process):
def __init__(self, my_id, caller):
Process.__init__(self)
self.caller = caller
self.my_id = my_id
def run(self):
print("%i started"%self.my_id)
self.caller.start_flag.wait()
print("%i sleeping"%self.my_id)
sleep(2000)
class ParentProcess(object):
def __init__(self, num_procs):
self.procs = []
self.start_flag = Event()
for i in range(num_procs):
self.procs.append(Worker(i, self))
def run(self):
for proc in self.procs:
proc.run()
self.start_flag.set()
for proc in self.procs:
proc.join()
print("%i done"%proc.my_id)
if __name__ == '__main__':
cpus = cpu_count()
world = ParentProcess(cpus)
start = time()
world.run()
end = time()
runtime = end - start
print("Runtime: %3.6f"%runtime)
This is only outputting "0 started", then hanging. It seems the Event.wait() is blocking all other threads, even the caller. The documentation implies this should not happen.
He is a working version of the code. When you subclass process you implement the run method to define what should run in that process. When you actually want the process to start you should call the start method on it (proc.start()).
from multiprocessing import Process, Event
from time import time, sleep
class Worker(Process):
def __init__(self, my_id, caller):
Process.__init__(self)
self.caller = caller
self.my_id = my_id
def run(self):
print("%i started"%self.my_id)
self.caller.start_flag.wait()
print("%i sleeping"%self.my_id)
sleep(5)
class ParentProcess(object):
def __init__(self, num_procs):
self.procs = []
self.start_flag = Event()
for i in range(num_procs):
self.procs.append(Worker(i, self))
def run(self):
for proc in self.procs:
proc.start()
self.start_flag.set()
for proc in self.procs:
proc.join()
print("%i done"%proc.my_id)
if __name__ == '__main__':
cpus = 4
world = ParentProcess(cpus)
start = time()
world.run()
end = time()
runtime = end - start
print(runtime)
Outputs:
0 started
1 started
2 started
2 sleeping
0 sleeping
1 sleeping
3 started
3 sleeping
0 done
1 done
2 done
3 done
5.01037812233

python can't start a new thread

I am building a multi threading application.
I have setup a threadPool.
[ A Queue of size N and N Workers that get data from the queue]
When all tasks are done I use
tasks.join()
where tasks is the queue .
The application seems to run smoothly until suddently at some point (after 20 minutes in example) it terminates with the error
thread.error: can't start new thread
Any ideas?
Edit: The threads are daemon Threads and the code is like:
while True:
t0 = time.time()
keyword_statuses = DBSession.query(KeywordStatus).filter(KeywordStatus.status==0).options(joinedload(KeywordStatus.keyword)).with_lockmode("update").limit(100)
if keyword_statuses.count() == 0:
DBSession.commit()
break
for kw_status in keyword_statuses:
kw_status.status = 1
DBSession.commit()
t0 = time.time()
w = SWorker(threads_no=32, network_server='http://192.168.1.242:8180/', keywords=keyword_statuses, cities=cities, saver=MySqlRawSave(DBSession), loglevel='debug')
w.work()
print 'finished'
When the daemon threads are killed?
When the application finishes or when the work() finishes?
Look at the thread pool and the worker (it's from a recipe )
from Queue import Queue
from threading import Thread, Event, current_thread
import time
event = Event()
class Worker(Thread):
"""Thread executing tasks from a given tasks queue"""
def __init__(self, tasks):
Thread.__init__(self)
self.tasks = tasks
self.daemon = True
self.start()
def run(self):
'''Start processing tasks from the queue'''
while True:
event.wait()
#time.sleep(0.1)
try:
func, args, callback = self.tasks.get()
except Exception, e:
print str(e)
return
else:
if callback is None:
func(args)
else:
callback(func(args))
self.tasks.task_done()
class ThreadPool:
"""Pool of threads consuming tasks from a queue"""
def __init__(self, num_threads):
self.tasks = Queue(num_threads)
for _ in range(num_threads): Worker(self.tasks)
def add_task(self, func, args=None, callback=None):
''''Add a task to the queue'''
self.tasks.put((func, args, callback))
def wait_completion(self):
'''Wait for completion of all the tasks in the queue'''
self.tasks.join()
def broadcast_block_event(self):
'''blocks running threads'''
event.clear()
def broadcast_unblock_event(self):
'''unblocks running threads'''
event.set()
def get_event(self):
'''returns the event object'''
return event
ALSo maybe the problem it's because I create SWorker objects in a loop?
What happens with the old SWorker (garbage collection ?) ?
There is still not enough code for localize the problem, but I'm sure that this is because you don't utilize the threads and start too much of them. Did you see canonical example from Queue python documentation http://docs.python.org/library/queue.html (bottom of the page)?
I can reproduce your problem with the following code:
import threading
import Queue
q = Queue.Queue()
def worker():
item = q.get(block=True) # sleeps forever for now
do_work(item)
q.task_done()
# create infinite number of workers threads and fails
# after some time with "error: can't start new thread"
while True:
t = threading.Thread(target=worker)
t.start()
q.join() # newer reached this
Instead you must create the poll of threads with known number of threads and put your data to queue like:
q = Queue()
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
for i in range(num_worker_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
for item in source():
q.put(item)
q.join() # block until all tasks are done
UPD: In case you need to stop some thread, you can add a flag to it or send a special mark means "stop" for break while loop:
class Worker(Thread):
break_msg = object() # just uniq mark sign
def __init__(self):
self.continue = True
def run():
while self.continue: # can stop and destroy thread, (var 1)
msg = queue.get(block=True)
if msg == self.break_msg:
return # will stop and destroy thread (var 2)
do_work()
queue.task_done()
workers = [Worker() for _ in xrange(num_workers)]
for w in workers:
w.start()
for task in tasks:
queue.put(task)
for _ in xrange(num_workers):
queue.put(Worker.break_msg) # stop thread after all tasks done. Need as many messages as many threads you have
OR
queue.join() # wait until all tasks done
for w in workers:
w.continue = False
w.put(None)

Categories

Resources