I am using Multiprocessing module in python and triggering many processes. Is there a way where I can find out the START TIME of each process that I triggered?
Since the processes are triggered quickly, I am looking to get the time in milliseconds or even in nanoseconds to differentiate each process.
This is the piece of code that I have,
import multiprocessing
import time
def myProcess(processName):
print "This is a method that will run in parallel: processName"
time.sleep(120)
num = 100
for n in range(1,num):
processName = "Process %s" % n
proc = multiprocessing.Process(target=myProcess,args=(processName))
proc.start()
proc.join()
print "process startTime: %s " ?????
You can write your own wrapper class to record startime -
class Proc(object):
def __init__(self,cmd)
self.cmd = cmd
def run(self):
proc = multiprocessing.Process(target=myProcess,args=(args))
self.startime = datetime.datetime.now()
proc.start()
proc.join() // Don't use this if you don't want to wait.
procobj = Proc(cmd)
procobj.run()
procobj.startime // gives you starttime
Don't do that. multiprocessing.Process objects have a pid attribute -- the process ID, which is the process's unique identifier at the OS level. Use that instead.
You can pass a Queue to the process and save the start times there:
def myProcess(processName,times):
print "This is a method that will run in parallel: processName"
times.put((processName,time.time()))
time.sleep(120)
q = multiprocessing.Queue()
for n in range(num):
processName = "Process %s" % n
proc = multiprocessing.Process(target=myProcess,args=(processName,q))
proc.start()
proc.join()
while not q.empty():
a = q.get()
print a
"%s startTime: %f" % a
Related
I'm experimenting with the multiprocessing module and I'm getting some strange behavior. I have a list of tasks, which I first enqueue to a task_queue. Then I start all processes, which have access to both the task queue and the result queue. After the processes have started, I actively check for new content in the result queue while there are active processes.
The weird behavior is that when I start the script below, 4 of the processes exit immediately, and all the work is done by one process. No except Exception clauses are entered.
from multiprocessing import Process, Queue
from time import sleep, time
from queue import Empty
def function_doing_heavy_computation(task):
return task + 1
def service_function(func, tasks_q, result_q):
"""
:param func: user passed function that takes one argument - the task and returns the result of processing the task
:param tasks_q:
:param result_q:
"""
while True:
try:
task = tasks_q.get_nowait()
except Empty:
result_q.close() # https://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue.close
result_q.join_thread()
print("Worker finished ")
break
except Exception as ex:
print(str(ex))
try:
res = func(task)
except Exception as ex:
print(str(ex))
result_q.put_nowait(res)
def multiproc(func, all_tasks, num_procs):
result = []
task_queue, result_queue = Queue(), Queue()
# add the tasks to the task queue
start_put_tasks = time()
for x in all_tasks:
task_queue.put_nowait(x)
print("Finished adding tasks in %.2f" % (time() - start_put_tasks))
# create the processes and pass them the task and result queue
start_create_procs = time()
procs = []
for _ in range(num_procs):
p = Process(target=service_function, args=(func, task_queue, result_queue))
procs.append(p)
for p in procs:
p.start()
print("Started %i workers in %.2f" % (len(procs), time() - start_create_procs))
# collect the results in a list, and return it
start_drain_queue = time()
liveprocs = list(procs)
while liveprocs:
# drain the current contents of the result_queue
while True:
try:
result.append(result_queue.get_nowait())
except Empty:
break
# set the currently active procs. while loop will exit if all procs have terminated
liveprocs = [p for p in procs if p.is_alive()]
# no process has put a result/all ready results have been drained. wait for new results to arrive
sleep(.1)
print("Finished draining result queue in %.2f" % (time() - start_drain_queue))
if len(result) != len(all_tasks):
raise RuntimeError("Only %i/%i tasks processed" % (len(result), len(all_tasks)))
return result
if __name__ == '__main__':
start = time()
# a task is just a number here.
# normally more tasks than workers. i
tasks = range(100000)
result = multiproc(func=function_doing_heavy_computation, all_tasks=tasks, num_procs=5)
print("Done in %s seconds" % str(time() - start))
assert len(tasks) == len(result)
print("Processed %i tasks" % len(result))
My multi-threading script raising this error:
thread.error : can't start new thread
when it reached 460 threads:
threading.active_count() = 460
I assume the old threads keeps stack up, since the script didn't kill them. This my code:
import threading
import Queue
import time
import os
import csv
def main(worker):
#Do Work
print worker
return
def threader():
while True:
worker = q.get()
main(worker)
q.task_done()
def main_threader(workers):
global q
global city
q = Queue.Queue()
for x in range(20):
t = threading.Thread(target=threader)
t.daemon = True
print "\n\nthreading.active_count() = " + str(threading.active_count()) + "\n\n"
t.start()
for worker in workers:
q.put(worker)
q.join()
How do I kill the old threads when their job is done? (Is the function returning not enough?)
Python threading API doesn't have any function to kill a thread (nothing like threading.kill(PID)).
That said, you should code some thread-stopping algorithm yourself. For example, your thread should somehow decide that is should terminate (e.g. check some global variable or check whether some signal has been sent) and simply return.
For example:
import threading
nthreads = 7
you_should_stop = [0 for _ in range(nthreads)]
def Athread(number):
while True:
if you_should_stop[number]:
print "Thread {} stopping...".format(number)
return
print "Running..."
for x in range(nthreads):
threading.Thread(target = Athread, args = (x, )).start()
for x in range(nthreads):
you_should_stop[x] = 1
print "\nStopped all threads!"
I am trying to gulp threading, and started with Python Module of the week examples:
according to below code
import threading
def worker(arg=None):
"""thread worker function"""
print 'Worker thread: %s\n' % arg
return
threads = []
for i in range(5):
t = threading.Thread(target=worker, args=str(i), name="threadingPrac")
threads.append(t)
t.start()
does this mean that I am starting 5 threads ?
I have just started with threading so want to understand it better.
Yes.
Add import time and time.sleep(5) after the print statement to better see it.
import threading
import time
def worker(arg=None):
"""thread worker function"""
print 'Worker thread: %s\n' % arg
time.sleep(5)
return
threads = []
for i in range(5):
t = threading.Thread(target=worker, args=str(i), name="threadingPrac")
threads.append(t)
t.start()
Yes you can check the length of the list threads by adding this line at the bottom of your code:
print len(threads)
Output:
5 #Number of threads
I have to run a program on 200 files in a round robin.
Right now I have them running like this:
for combo in it.combinations(files, 2):
cmd = ["command", combo[0], combo[1]]
subprocess.Popen(cmd)
I would like to run only say 60 at a time as not to overwhelm the computer, the command is pretty processor intensive. What's the best way to pause the loop once 60 processes are running and then start again once one has finished so that there are always 60 processes running?
#!/usr/bin/env python
import itertools
import subprocess
from multiprocessing.dummy import Pool # use threads
def run(combo):
cmd = ["command", combo[0], combo[1]]
return combo, subprocess.call(cmd)
def main():
p = Pool(60) # 60 subprocesses at a time
for combo, rc in p.imap_unordered(run, itertools.combinations(files, 2)):
print("%s exited with %s" % (combo, rc))
p.close()
p.join()
if __name__ == "__main__":
main()
This answer demonstrates various techniques to limit number of concurrent subprocesses: it shows multiprocessing.Pool, concurrent.futures, threading + Queue -based solutions.
This might help:
import itertools as it
import time
import subprocess
files = range(5)
max_load = 3
sleep_interval = 0.5
pid_list = []
for combo in it.combinations(files, 2):
# Random command that takes time
cmd = ['sleep', str(combo[0]+combo[1])]
# Launch and record this command
print "Launching: ", cmd
pid = subprocess.Popen(cmd)
pid_list.append(pid)
# Deal with condtion of exceeding maximum load
while len(filter(lambda x: x.poll() is None, pid_list)) >= max_load:
time.sleep(sleep_interval)
You could do something really simple like:
from time import sleep
count = 0
for combo in it.combinations(files, 2):
while count < 60:
cmd = ["command", combo[0], combo[1]]
subprocess.Popen(cmd)
count = count + 1
if subprocess_is_done:
count = count - 1
sleep(5)
Obviously you'd need to figure out how to get subprocess_is_done from your command.
This works for trivial cases as far as I can tell, but have no clue what you're trying to run...
You want something like this:
import socket
import threading
import Queue
import subprocess
class IPThread(threading.Thread):
def __init__(self, queue, num):
super(IPThread, self).__init__()
self.queue = queue
self.num = num
def run(self):
while True:
try:
args = self.queue.get_nowait()
cmd = ["echo"] + [str(i) for i in args]
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
print out
except Queue.Empty:
# Nothing left in the Queue -- we are done
print "Queue %d done" % self.num
break
except Exception as err:
# Handle exception
print err
self.queue.task_done()
def create_threads(q, size):
for i in range(size):
thread = IPThread(q, i)
thread.setDaemon(True)
thread.start()
q.join()
def fill_queue(q):
# Call q.put(args) in a loop to populate Queue with arguments
from itertools import permutations
x = list(range(20))
for arg1, arg2 in permutations(x, 2):
q.put([arg1, arg2])
print q.qsize()
def main():
q = Queue.Queue()
fill_queue(q)
create_threads(q, 60)
print "Done"
if __name__ == '__main__':
main()
Create a queue of things to work on. Specialize your Thread-derived class. Spin up your threads. Wait for them to be done.
You can tell that the tasks are running concurrently because their output interferes with each other. It's a feature!
I'm trying to use a queue with the multiprocessing library in Python. After executing the code below (the print statements work), but the processes do not quit after I call join on the Queue and there are still alive. How can I terminate the remaining processes?
Thanks!
def MultiprocessTest(self):
print "Starting multiprocess."
print "Number of CPUs",multiprocessing.cpu_count()
num_procs = 4
def do_work(message):
print "work",message ,"completed"
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = multiprocessing.JoinableQueue()
for i in range(num_procs):
p = multiprocessing.Process(target=worker)
p.daemon = True
p.start()
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
print "q close"
q.join()
#q.close()
print "Finished everything...."
print "num active children:",multiprocessing.active_children()
try this:
import multiprocessing
num_procs = 4
def do_work(message):
print "work",message ,"completed"
def worker():
for item in iter( q.get, None ):
do_work(item)
q.task_done()
q.task_done()
q = multiprocessing.JoinableQueue()
procs = []
for i in range(num_procs):
procs.append( multiprocessing.Process(target=worker) )
procs[-1].daemon = True
procs[-1].start()
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
q.join()
for p in procs:
q.put( None )
q.join()
for p in procs:
p.join()
print "Finished everything...."
print "num active children:", multiprocessing.active_children()
Your workers need a sentinel to terminate, or they will just sit on the blocking reads. Note that using sleep on the Q instead of join on the P lets you display status information etc.
My preferred template is:
def worker(q,nameStr):
print 'Worker %s started' %nameStr
while True:
item = q.get()
if item is None: # detect sentinel
break
print '%s processed %s' % (nameStr,item) # do something useful
q.task_done()
print 'Worker %s Finished' % nameStr
q.task_done()
q = multiprocessing.JoinableQueue()
procs = []
for i in range(num_procs):
nameStr = 'Worker_'+str(i)
p = multiprocessing.Process(target=worker, args=(q,nameStr))
p.daemon = True
p.start()
procs.append(p)
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
for i in range(num_procs):
q.put(None) # send termination sentinel, one for each process
while not q.empty(): # wait for processing to finish
sleep(1) # manage timeouts and status updates etc.
Here is a sentinel-free method for the relatively simple case where you put a number of tasks on a JoinableQueue, then launch worker processes that consume the tasks and exit once they read the queue "dry". The trick is to use JoinableQueue.get_nowait() instead of get(). get_nowait(), as the name implies, tries to get a value from the queue in a non-blocking manner and if there's nothing to be gotten then a queue.Empty exception is raised. The worker handles this exception by exiting.
Rudimentary code to illustrate the principle:
import multiprocessing as mp
from queue import Empty
def worker(q):
while True:
try:
work = q.get_nowait()
# ... do something with `work`
q.task_done()
except Empty:
break # completely done
# main
worknum = 4
jq = mp.JoinableQueue()
# fill up the task queue
# let's assume `tasks` contains some sort of data
# that your workers know how to process
for task in tasks:
jq.put(task)
procs = [ mp.Process(target=worker, args=(jq,)) for _ in range(worknum) ]
for p in procs:
p.start()
for p in procs:
p.join()
The advantage is that you do not need to put the "poison pills" on the queue so the code is a bit shorter.
IMPORTANT : in more complex situations where producers and consumers use the same queue in an "interleaved" manner and the workers may have to wait for new tasks to come along, the "poison pill" approach should be used. My suggestion above is for simple cases where the workers "know" that if the task queue is empty, then there's no point hanging around any more.
You have to clear the queue before joining the process, but q.empty() is unreliable.
The best way to clear the queue is to count the number of successful gets or loop until you receive a sentinel value, just like a socket with a reliable network.
The code below may not be very relevant but I post it for your comments/feedbacks so we can learn together. Thank you!
import multiprocessing
def boss(q,nameStr):
source = range(1024)
for item in source:
q.put(nameStr+' '+str(item))
q.put(None) # send termination sentinel, one for each process
def worker(q,nameStr):
while True:
item = q.get()
if item is None: # detect sentinel
break
print '%s processed %s' % (nameStr,item) # do something useful
q = multiprocessing.Queue()
procs = []
num_procs = 4
for i in range(num_procs):
nameStr = 'ID_'+str(i)
p = multiprocessing.Process(target=worker, args=(q,nameStr))
procs.append(p)
p = multiprocessing.Process(target=boss, args=(q,nameStr))
procs.append(p)
for j in procs:
j.start()
for j in procs:
j.join()