How to pass Popen objects to concurrent.futures.ProcessPoolExecutor

How to pass Popen objects to concurrent.futures.ProcessPoolExecutor - python

As you can see below I have two Popen objects which are running in parallel but on completion of each process I want to perform some postprocess task with the data received from each process individually, but I want the postprocess task to execute in parallel but I get stuck the moment I call the executor.map function and I have observed that the CPU utilisation touches 100% for some time, later it goes down but no result is being achieved.
I get stuck indefinitely and the process keeps on running. It doesn't even print Inside letsee.
subProcess1 = Popen(cmd1, stdout=PIPE, stderr=PIPE, shell=True)
subProcess2 = Popen(cmd2, stdout=PIPE, stderr=PIPE, shell=True)
def letsSee(subProcessId):
print("Inside letsee")
while True:
stdoutVar = subProcessId.stdout.readline()
if stdoutVar == b'' and subProcessId.poll() is not None:
break
if stdoutVar:
print(subProcessId, type(stdoutVar), stdoutVar)
rc = subProcessId.poll()
return "Yes"
if __name__ == '__main__':
with concurrent.futures.ProcessPoolExecutor() as executor:
print("here")
xList = [subProcess1, subProcess2]
print(xList) #output is [<subprocess.Popen object at 0x01365290>, <subprocess.Popen object at 0x01365292>]
results = executor.map(letsSee, xList)

Is this acceptable for you?
def letsSee(command):
print("Inside letsee")
subProcessId = Popen(command, stdout=PIPE, stderr=PIPE, shell=True)
while True:
stdoutVar = subProcessId.stdout.readline()
if stdoutVar == b'' and subProcessId.poll() is not None:
break
if stdoutVar:
print(subProcessId, type(stdoutVar), stdoutVar)
rc = subProcessId.poll()
return "Yes"
if __name__ == '__main__':
with ProcessPoolExecutor() as executor:
print("here")
xList = [cmd1, cmd2]
print(xList)
results = executor.map(letsSee, xList)

Related

How to transfer the data after the completion of the second process to the first one?

I have application X.exe and Y.exe. First, I start server X, after which I have to start Y 20 times and get 20 different PIDs. Due to certain circumstances, I have to run these processes at the same time, so I run X and the script issuing PIDs as processes. I don't understand how to transfer the data generated by the second process to the first one in order to work with them already in it. Here is my code:
from subprocess import Popen, PIPE
from multiprocessing import Process
def firstproc():
process = Popen(
'X.exe',
stdout=PIPE)
while True:
chunk = process.stdout.readline()
print(chunk)
def secondproc():
def getPID():
return Popen('Y.exe').pid
return [getPID() for _ in range(20)]
def main():
p1 = Process(target=firstproc, args=())
p2 = Process(target=secondproc, args=())
p1.start()
p2.start()
if __name__ == "__main__":
main()
The first process runs indefinitely

To write a process1 stdout line in any process2 stdin you can use this code:
from subprocess import Popen, PIPE
from threading import Thread, Lock
class Processes:
def __init__(self):
self.p1: Popen = None
self.p2: Popen = None
self.lock = Lock()
self.lock2 = Lock()
self.lock.acquire()
self.lock2.acquire()
self.p2_run = True
def firstproc(self):
self.p1 = Popen(
'X.exe',
stdout=PIPE)
while self.p2_run:
line = self.p1.stdout.readline()
self.lock2.acquire()
if self.p2_run:
self.p2.communicate(line)
self.lock.release()
def secondproc(self):
def getPID():
self.p2 = Popen('Y.exe', stdin=PIPE)
self.lock2.release()
self.lock.acquire()
return self.p2.pid
pids = []
for _ in range(8):
pids.append(getPID())
self.p2_run = False
self.lock2.release()
return pids
def main():
t = Processes()
p1 = Thread(target=t.firstproc, args=())
p1.start()
p2 = Thread(target=t.secondproc, args=())
p2.start()
p1.join()
if __name__ == "__main__":
main()
I use threading to synchronize processes to write in stdin and to read stdout.

Simple parallelization of subprocess.run() calls?

Consider the following snippet that runs three different subprocesses one after the other with subprocess.run (and notably all with defaulted kwargs):
import subprocess
p1 = subprocess.run(args1)
if p1.returncode != 0:
error()
p2 = subprocess.run(args2)
if p2.returncode != 0:
error()
p3 = subprocess.run(args3)
if p3.returncode != 0:
error()
How can we rewrite this so that the subprocesses are run in parallel to each other?
With Popen right? What does that exactly look like?
For reference, the implementation of subprocess.run is essentially:
with Popen(*popenargs, **kwargs) as process:
try:
stdout, stderr = process.communicate(input, timeout=timeout)
except TimeoutExpired as exc:
process.kill()
if _mswindows:
exc.stdout, exc.stderr = process.communicate()
else:
process.wait()
raise
except:
process.kill()
raise
retcode = process.poll()
return CompletedProcess(process.args, retcode, stdout, stderr)
So something like...
with Popen(args1) as p1:
with Popen(args2) as p2:
with Popen(args3) as p3:
try:
p1.communicate(None, timeout=None)
p2.communicate(None, timeout=None)
p3.communicate(None, timeout=None)
except:
p1.kill()
p2.kill()
p3.kill()
raise
if p1.poll() != 0 or p2.poll() != 0 or p3.poll() != 0:
error()
Is that along the right lines?

I would just use multiprocessing to accomplish your mission but ensuring that your invocation of subprocess.run uses capture_output=True so that the output from the 3 commands running in parallel are not interlaced:
import multiprocessing
import subprocess
def runner(args):
p = subprocess.run(args, capture_output=True, text=True)
if p.returncode != 0:
raise Exception(r'Return code was {p.returncode}.')
return p.stdout, p.stderr
def main():
args1 = ['git', 'status']
args2 = ['git', 'log', '-3']
args3 = ['git', 'branch']
args = [args1, args2, args3]
with multiprocessing.Pool(3) as pool:
results = [pool.apply_async(runner, args=(arg,)) for arg in args]
for result in results:
try:
out, err = result.get()
print(out, end='')
except Exception as e: # runner completed with an Exception
print(e)
if __name__ == '__main__': # required for Windows
main()
Update
With just subprocess we have something like:
import subprocess
args1 = ['git', 'status']
args2 = ['git', 'log', '-3']
args3 = ['git', 'branch']
p1 = subprocess.Popen(args1)
p2 = subprocess.Popen(args2)
p3 = subprocess.Popen(args3)
p1.communicate()
rc1 = p1.returncode
p2.communicate()
rc2 = p2.returncode
p3.communicate()
rc3 = p3.returncode
But, for whatever reason on my Windows platform I never saw the output from the third subprocess command ('git branch'), so there must be some limitation there. Also, if the command you were running required input from stdin before proceeding, that input would have to be provided to the communicate method. But the communicate method would not complete until the entire subprocess has completed and you would get no parallelism, so as a general solution this is not really very good. In the multiprocessing code, there is no problem with having stdin input to communicate.
Update 2
When I recode it as follows, I now get all the expected output. I am not sure why it makes a difference, however. According to the documentation, Popen.communicate:
Interact with process: Send data to stdin. Read data from stdout and stderr, until end-of-file is reached. Wait for process to terminate and set the returncode attribute. The optional input argument should be data to be sent to the child process, or None, if no data should be sent to the child. If streams were opened in text mode, input must be a string. Otherwise, it must be bytes.
So the call should be waiting for the process to terminate. Nevertheless, my preceding comment about the situation where the command you are executing requiring stdin input (via a pipe) would not run in parallel without using multiprocessing.
import subprocess
args1 = ['git', 'status']
args2 = ['git', 'log', '-3']
args3 = ['git', 'branch']
with subprocess.Popen(args1) as p1:
with subprocess.Popen(args2) as p2:
with subprocess.Popen(args3) as p3:
p1.communicate()
rc1 = p1.returncode
p2.communicate()
rc2 = p2.returncode
p3.communicate()
rc3 = p3.returncode

Subprocess only allows one input

I'm using subprocess to communicate with an interactive command line application, but after I send the first command to the application, all other input seems not to be communicated to the subprocess. Can anyone show me where my mistake is?
Here's the code:
from subprocess import Popen, PIPE, STDOUT
from threading import Thread
from queue import Queue, Empty
import time
class Prolog(object):
def __init__(self):
"""
Opens a subprocess running swi-prolog and reads all the header stuff that it writes
"""
self.prolog = Popen(r"C:\Program Files\swipl\bin\swipl.exe", stdin=PIPE, stdout=PIPE, stderr=STDOUT, bufsize=1)
def enqueue_output(out, queue):
for line in iter(out.readline, b''):
queue.put(line)
out.close()
# This thread runs in the background as long as the program is running it enqueues all the output from Prolog
self.q = Queue()
t = Thread(target=enqueue_output, args=(self.prolog.stdout, self.q))
t.daemon = True # thread dies with the program
t.start()
out = True
while out:
out = self.get_line()
def get_line(self):
"""
read line without blocking
:return: the next line in the output, else False if no more output
"""
try:
line = self.q.get(timeout=.1) # or q.get(timeout=.1)
except Empty:
return False
else: # got line
return line
def send_query(self, query):
"""
Sends a query to the Prolog shell
:param query: string containing the query to be sent to the prolog shell
:return: None
"""
query = query + "\n"
query = bytes(query, encoding="utf-8")
self.prolog.stdin.write(query)
self.prolog.stdin.flush()
def get_output(self):
output = self.get_line()
if not output:
return False
else:
return output[:-2]
def query(self, query):
output = []
self.send_query(query)
temp = self.get_output()
print(temp)
while not temp:
time.sleep(.1)
temp = self.get_output()
output.append(temp)
while not temp == b'true.' and not temp == b'false.':
self.send_query(";")
temp = self.get_output()
print(temp)
while not temp:
time.sleep(.1)
temp = self.get_output()
output.append(temp)
print(output)
if __name__ == "__main__":
p = Prolog()
p.query('[\"GCD.pl\"].')
p.get_output()
p.query("permut([a, b, c], X).")
The problem comes during the second call to p.query. The command doesn't seem to be passed to the shell at all, so there is never any output, so the program just gets stuck in the "while not temp" loop in the query method.

From subprocess.Popen to multiprocessing

I got a function that invokes a process using subprocess.Popen in the following way:
def func():
...
process = subprocess.Popen(substr, shell=True, stdout=subprocess.PIPE)
timeout = {"value": False}
timer = Timer(timeout_sec, kill_proc, [process, timeout])
timer.start()
for line in process.stdout:
lines.append(line)
timer.cancel()
if timeout["value"] == True:
return 0
...
I call this function from other function using a loop (e.g from range(1,100) ) , how can I make multiple calls to the function with multiprocessing? that each time several processes will run in parallel
The processes doesn't depend on each other, the only constraint is that each process would be 'working' on only one index (e.g no two processes will work on index 1)
Thanks for your help

Just add the index to your Popen call and create a worker pool with as many CPU cores you have available.
import multiprocessing
def func(index):
....
process = subprocess.Popen(substr + " --index {}".format(index), shell=True, stdout=subprocess.PIPE)
....
if __name__ == '__main__':
p = multiprocessing.Pool(multiprocessing.cpu_count())
p.map(func, range(1, 100))

AttributeError: 'module' object has no attribute 'kill'

Here is my code:
def cmdoutput(cmd1, flag):
finish = time.time() + 50
p = subprocess.Popen(cmd1, stdin=subprocess.PIPE, stdout = subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
while p.poll() is None:
time.sleep(1)
if finish < time.time():
os.kill(p.pid, signal.SIGTERM)
print "timed out and killed child, collecting what output exists so far"
if (flag == "1"):#To enable container
out, err = p.communicate(input='container\nzone1')
else:
out, err = p.communicate()
print (out)
return out
When I run this script, I get
Attribute Error: 'module' object has no attribute 'kill'.
What's wrong with my code?

I think you have your own os.py.
Put print os.__file__ before os.kill(...) line, and you will see what's going on.
UPDATE
os.kill is only available in unix in jython
Instead of os.kill(...), use p.kill().
UPDATE
p.kill() not work. (At least in Windows + Jython 2.5.2, 2.5.3).
p.pid is None.
http://bugs.jython.org/issue1898

Change your code as follow. Change CPYTHON_EXECUTABLE_PATH, CMDOUTPUT_SCRIPT_PATH.
CPYTHON_EXECUTABLE_PATH = r'c:\python27\python.exe' # Change path to python.exe
CMDOUTPUT_SCRIPT_PATH = r'c:\users\falsetru\cmdoutput.py' # Change path to the script
def cmdoutput(cmd1, flag):
return subprocess.check_output([CPYTHON_EXECUTABLE_PATH, CMDOUTPUT_SCRIPT_PATH, flag])
Save following code as cmdoutput.py
import subprocess
import sys
def cmdoutput(cmd1, flag):
finish = time.time() + 50
p = subprocess.Popen(cmd1, stdin=subprocess.PIPE, stdout = subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
while p.poll() is None:
time.sleep(1)
if finish < time.time():
p.kill()
return '<<timeout>>'
if flag == "1":
out, err = p.communicate('container\nzone1')
else:
out, err = p.communicate()
return out
if __name__ == '__main__':
cmd, flag = sys.argv[1:3]
print(cmdoutput(cmd, flag))

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to pass Popen objects to concurrent.futures.ProcessPoolExecutor - python

Related

How to transfer the data after the completion of the second process to the first one?

Simple parallelization of subprocess.run() calls?

Subprocess only allows one input

From subprocess.Popen to multiprocessing

AttributeError: 'module' object has no attribute 'kill'

Categories

Resources