Run x number of commands at a time - python

I have to run a program on 200 files in a round robin.
Right now I have them running like this:
for combo in it.combinations(files, 2):
cmd = ["command", combo[0], combo[1]]
subprocess.Popen(cmd)
I would like to run only say 60 at a time as not to overwhelm the computer, the command is pretty processor intensive. What's the best way to pause the loop once 60 processes are running and then start again once one has finished so that there are always 60 processes running?

#!/usr/bin/env python
import itertools
import subprocess
from multiprocessing.dummy import Pool # use threads
def run(combo):
cmd = ["command", combo[0], combo[1]]
return combo, subprocess.call(cmd)
def main():
p = Pool(60) # 60 subprocesses at a time
for combo, rc in p.imap_unordered(run, itertools.combinations(files, 2)):
print("%s exited with %s" % (combo, rc))
p.close()
p.join()
if __name__ == "__main__":
main()
This answer demonstrates various techniques to limit number of concurrent subprocesses: it shows multiprocessing.Pool, concurrent.futures, threading + Queue -based solutions.

This might help:
import itertools as it
import time
import subprocess
files = range(5)
max_load = 3
sleep_interval = 0.5
pid_list = []
for combo in it.combinations(files, 2):
# Random command that takes time
cmd = ['sleep', str(combo[0]+combo[1])]
# Launch and record this command
print "Launching: ", cmd
pid = subprocess.Popen(cmd)
pid_list.append(pid)
# Deal with condtion of exceeding maximum load
while len(filter(lambda x: x.poll() is None, pid_list)) >= max_load:
time.sleep(sleep_interval)

You could do something really simple like:
from time import sleep
count = 0
for combo in it.combinations(files, 2):
while count < 60:
cmd = ["command", combo[0], combo[1]]
subprocess.Popen(cmd)
count = count + 1
if subprocess_is_done:
count = count - 1
sleep(5)
Obviously you'd need to figure out how to get subprocess_is_done from your command.
This works for trivial cases as far as I can tell, but have no clue what you're trying to run...

You want something like this:
import socket
import threading
import Queue
import subprocess
class IPThread(threading.Thread):
def __init__(self, queue, num):
super(IPThread, self).__init__()
self.queue = queue
self.num = num
def run(self):
while True:
try:
args = self.queue.get_nowait()
cmd = ["echo"] + [str(i) for i in args]
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
print out
except Queue.Empty:
# Nothing left in the Queue -- we are done
print "Queue %d done" % self.num
break
except Exception as err:
# Handle exception
print err
self.queue.task_done()
def create_threads(q, size):
for i in range(size):
thread = IPThread(q, i)
thread.setDaemon(True)
thread.start()
q.join()
def fill_queue(q):
# Call q.put(args) in a loop to populate Queue with arguments
from itertools import permutations
x = list(range(20))
for arg1, arg2 in permutations(x, 2):
q.put([arg1, arg2])
print q.qsize()
def main():
q = Queue.Queue()
fill_queue(q)
create_threads(q, 60)
print "Done"
if __name__ == '__main__':
main()
Create a queue of things to work on. Specialize your Thread-derived class. Spin up your threads. Wait for them to be done.
You can tell that the tasks are running concurrently because their output interferes with each other. It's a feature!

Related

Getting information back from a process with a multiprocessing Queue

I am trying to play around with multiprocessing and I would like to communicate between Python's main thread and a subprocess with a Queue. Here is a quick test code I wrote that should get periodically some results generated by the subprocess:
from multiprocessing import Process, Queue
import time
def calculate(queue):
n = 0
while n < 10:
n += 1
queue.put(n)
time.sleep(1)
queue.put(0)
def queue_getter(queue):
executing = True
while executing:
while queue.qsize():
n = queue.get()
print(n)
if n == 0:
executing = False
time.sleep(0.1)
print('done')
queue = Queue()
p = Process(target=calculate, args=(queue,))
p.start()
queue_getter(queue)
p.join()
print('DONE')
This program just hangs forever, while replacing Process with threading.Thread gives the expected result:
1
2
3
4
5
6
7
8
9
10
0
done
DONE
How to make Process behave the same way as Thread in this situation?
Your program works fine on POSIX (UNIX-like) systems.
However, for it to work properly on ms-windows and macOS, you will need to put the program itself inside a main block, so the file can be imported without side effects.
This is due to the way multiprocessing has to work on ms-windows and macOS. Read the programming guidelines for multiprocessing.
Modify your code like this:
from multiprocessing import Process, Queue
import time
def calculate(queue):
n = 0
while n < 10:
n += 1
queue.put(n)
time.sleep(1)
queue.put(0)
def queue_getter(queue):
executing = True
while executing:
while queue.qsize():
n = queue.get()
print(n)
if n == 0:
executing = False
time.sleep(0.1)
print("done")
if __name__ == "__main__":
queue = Queue()
p = Process(target=calculate, args=(queue,))
p.start()
queue_getter(queue)
p.join()
print("DONE")
Here's a simplified and more robust approach which is (almost) functionally identical to the OP's original except that is does not print the zero:
from multiprocessing import Manager
from concurrent.futures import ProcessPoolExecutor
import time
def calculate(q):
for n in range(1, 11):
q.put(n)
time.sleep(1)
q.put(0)
def queue_getter(q):
while (n := q.get()):
print(n)
def main():
with Manager() as manager:
q = manager.Queue()
with ProcessPoolExecutor() as executor:
executor.submit(calculate, q)
queue_getter(q)
if __name__ == '__main__':
main()

python interactive subprocess communicate

I am trying to learn how to write interactive subprocess communication.
I need to read stdout from and write to stdin continually, below is my code, it sort of "works" but I am not sure if I am doing it right (it's very hacked code)
Assuming I have a script called app.py as following
import logging
import random
def app():
number1 = random.randint(1,100)
number2 = random.randint(200,500)
logging.info("number1: %s, number2: %s", number1, number2)
ans = input("enter sum of {} and {}: ".format(number1, number2))
logging.info("received answer: %s", ans)
try:
if int(ans) != number1+number2:
raise ValueError
logging.info("{} is the correct answer".format(ans))
except (ValueError,TypeError):
logging.info("{} is incorrect answer".format(ans))
def main():
logging.basicConfig(level=logging.DEBUG, filename='log.log')
for x in range(10):
app()
if __name__ == '__main__':
main()
to interactive with above script (app.py) I have some very ugly code
import queue
import time
import threading
import subprocess
import os
import pty
import re
class ReadStdout(object):
def __init__(self):
self.queue = queue.Queue()
self._buffer_ = []
def timer(self, timeout=0.1):
buffer_size = 0
while True:
if len(self._buffer_) > buffer_size:
buffer_size = len(self._buffer_)
time.sleep(timeout)
if len(self._buffer_) == buffer_size and buffer_size!=0:
self.queue.put(''.join(self._buffer_))
self._buffer_ = []
buffer_size = 0
def read(self, fd):
while True:
self._buffer_.append(fd.read(1))
def run(self):
timer = threading.Thread(target=self.timer)
timer.start()
master, slave = pty.openpty()
p = subprocess.Popen(['python', 'app.py'], stdout=slave, stderr=slave, stdin=subprocess.PIPE, close_fds=True)
stdout = os.fdopen(master)
read_thread = threading.Thread(target=self.read, args=(stdout,))
read_thread.start()
while True:
if self.queue.empty():
time.sleep(0.1)
continue
msg = self.queue.get()
digits = (re.findall('(\d+)', msg))
ans = (int(digits[0])+int(digits[1]))
print("got message: {} result: {}".format(msg, ans))
p.stdin.write(b"%d\n" %ans)
p.stdin.flush()
if __name__ == '__main__':
x = ReadStdout()
x.run()
I don't feel I am doing it the right way. what's the correct way to interactive with another script (I need stdout, not just blind write to stdin)
Thanks
This code will work with your app.py, so you can get base logic of interaction from it. Also, i would suggest you to look into pexpect module. In any case - you MUST know what to expect from running program and how it's input lines endswith. Or you could implement some timeout while reading line, so it would be possible to raise exception if something went not as expected.
import subprocess
from functools import partial
child = subprocess.Popen(['app.py'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True)
# iterate while child is not terminated
while child.poll() is None:
line = ''
# read stdout character by character until a colon appears
for c in iter(partial(child.stdout.read, 1), ''):
if c == ':':
break
line += c
if "enter sum" in line:
numbers = filter(str.isdigit, line.split())
numbers = list(map(int, numbers))
child.stdin.write("{0}\n".format(sum(numbers)))

How to kill old threads in python

My multi-threading script raising this error:
thread.error : can't start new thread
when it reached 460 threads:
threading.active_count() = 460
I assume the old threads keeps stack up, since the script didn't kill them. This my code:
import threading
import Queue
import time
import os
import csv
def main(worker):
#Do Work
print worker
return
def threader():
while True:
worker = q.get()
main(worker)
q.task_done()
def main_threader(workers):
global q
global city
q = Queue.Queue()
for x in range(20):
t = threading.Thread(target=threader)
t.daemon = True
print "\n\nthreading.active_count() = " + str(threading.active_count()) + "\n\n"
t.start()
for worker in workers:
q.put(worker)
q.join()
How do I kill the old threads when their job is done? (Is the function returning not enough?)
Python threading API doesn't have any function to kill a thread (nothing like threading.kill(PID)).
That said, you should code some thread-stopping algorithm yourself. For example, your thread should somehow decide that is should terminate (e.g. check some global variable or check whether some signal has been sent) and simply return.
For example:
import threading
nthreads = 7
you_should_stop = [0 for _ in range(nthreads)]
def Athread(number):
while True:
if you_should_stop[number]:
print "Thread {} stopping...".format(number)
return
print "Running..."
for x in range(nthreads):
threading.Thread(target = Athread, args = (x, )).start()
for x in range(nthreads):
you_should_stop[x] = 1
print "\nStopped all threads!"

Getting the start time of a new process which I triggered

I am using Multiprocessing module in python and triggering many processes. Is there a way where I can find out the START TIME of each process that I triggered?
Since the processes are triggered quickly, I am looking to get the time in milliseconds or even in nanoseconds to differentiate each process.
This is the piece of code that I have,
import multiprocessing
import time
def myProcess(processName):
print "This is a method that will run in parallel: processName"
time.sleep(120)
num = 100
for n in range(1,num):
processName = "Process %s" % n
proc = multiprocessing.Process(target=myProcess,args=(processName))
proc.start()
proc.join()
print "process startTime: %s " ?????
You can write your own wrapper class to record startime -
class Proc(object):
def __init__(self,cmd)
self.cmd = cmd
def run(self):
proc = multiprocessing.Process(target=myProcess,args=(args))
self.startime = datetime.datetime.now()
proc.start()
proc.join() // Don't use this if you don't want to wait.
procobj = Proc(cmd)
procobj.run()
procobj.startime // gives you starttime
Don't do that. multiprocessing.Process objects have a pid attribute -- the process ID, which is the process's unique identifier at the OS level. Use that instead.
You can pass a Queue to the process and save the start times there:
def myProcess(processName,times):
print "This is a method that will run in parallel: processName"
times.put((processName,time.time()))
time.sleep(120)
q = multiprocessing.Queue()
for n in range(num):
processName = "Process %s" % n
proc = multiprocessing.Process(target=myProcess,args=(processName,q))
proc.start()
proc.join()
while not q.empty():
a = q.get()
print a
"%s startTime: %f" % a

Python Multithreading - Schedule Queue

I don't know why I'm having such a problem with this, basically, I want to have a Queue that is constantly running during the program called "Worker" this then works, however, every 10 seconds or so.. Another method called "Process" comes in and processes the data. Let's assume the following, data is captured every 10 seconds.. (0, 1, 2, 3, ..... n) and then the "Proces" function receives this, processes the data, ends, and then the "Worker" goes back to work and does their job until the program has ended.
I have the following code:
import multiprocessing as mp
import time
DELAY_SIZE = 10
def Worker(q):
print "I'm working..."
def Process(q):
print "I'm processing.."
queue = mp.Queue(maxsize=DELAY_SIZE)
p = mp.Process(target=Worker, args=(queue,))
p.start()
while True:
d = queue.get()
time.sleep(10)
Process()
In this example, it would look like the following:
I'm working...
I'm working...
I'm working...
...
...
...
I'm working...
I'm processing...
I'm processing...
I'm processing...
...
...
I'm working..
I'm working..
Any ideas?
Here is an alternative way using threads:
import threading
import Queue
import time
class Worker(threading.Thread):
def __init__(self, q):
threading.Thread.__init__(self)
self._q = q
def run(self):
# here, worker does its job
# results are pushed to the shared queue
while True:
print 'I am working'
time.sleep(1)
result = time.time() # just an example
self._q.put(result)
def process(q):
while True:
if q.empty():
time.sleep(10)
print 'I am processing'
worker_result = q.get()
# do whatever you want with the result...
print " ", worker_result
if __name__ == '__main__':
shared_queue = Queue.Queue()
worker = Worker(shared_queue)
worker.start()
process(shared_queue)

Categories

Resources