Pytonic way of passing values between process

Pytonic way of passing values between process - python

I need a simple way to pass the stdout of a subprocess as a list to another function using multiprocess:
The first function that invokes subprocess:
def beginRecvTest():
command = ["receivetest","-f=/dev/pcan33"]
incoming = Popen(command, stdout = PIPE)
processing = iter(incoming.stdout.readline, "")
lines = list(processing)
return lines
The function that should receive lines:
def readByLine(lines):
i = 0
while (i < len(lines)):
system("clear")
if(lines[i][0].isdigit()):
line = lines[i].split()
dictAdd(line)
else:
next
print ; print "-" *80
for _i in mydict.keys():
printMsg(mydict, _i)
print "Keys: ", ; print mydict.keys()
print ; print "-" *80
sleep(0.3)
i += 1
and the main from my program:
if __name__ == "__main__":
dataStream = beginRecvTest()
p = Process(target=dataStream)
reader = Process(target=readByLine, args=(dataStream,))
p.start()
reader.start()
I've read up on using queues, but I don't think that's exactly what I need.
The subprocess called returns infinite data so some people have suggested using tempfile, but I am totally confused about how to do this.
At the moment the script only returns the first line read, and all efforts on looping the beginRecvTest() function have ended in compilation errors.

Related

Python: store results of ProcessPoolExecutor

I'm very new to parallel processing with "concurrent.futures". Code seems to work, but I am not sure how to store the result of each process, therey by marking the build as failed at last, if any of processes's return value is not zero.
Tried to create a list (exit_status) and append the results to that, but that shows IndexError. Wondering what can I do right?
#!/usr/bin/env python3
import concurrent.futures
import sys
import shutil
import os
import glob
import multiprocessing as mp
import json
from os import path
def slave(path1, path2, target):
os.makedirs(target)
shutil.copy(path1, target)
shutil.copy(path2, target)
os.system(<Login command>)
os.system(<Image creation command>)
os.system(<Copy to Other slaves or NFS>)
#If any one of the above operation or command fails for any of the process, the script should return 1 at the end of the execution or fail the build at last.
def main():
processed = {}
exit_status = []
with open('example.json', 'r') as f:
data = json.load(f)
for value in data.items():
for line in value[1]:
if line.endswith('.zip'):
targz = line
elif line.endswith('.yaml'):
yaml = line
processed[targz] = yaml
with concurrent.futures.ProcessPoolExecutor() as executor:
for id, (path2, path1) in enumerate(processed.items(), 1):
target = path.join("/tmp", "dir" + str(id))
ret = executor.submit(slave, path1, path2, target)
exit_status.append(ret.result())
for i in exit_status:
print("##########Result status: ", i)
if __name__ == "__main__":
mp.set_start_method('spawn')
main()
exit_status list's output:
##########Result status: None
##########Result status: None

re; comments
If you want to get the result of a system call in order to act on the results of it, using subprocess.run is much more flexible and powerful than os.system. Additionally, if you actually want to perform the operations in parallel, you can't wait on result() after each task. Otherwise you're only ever doing one thing at a time. Better to submit all the tasks, and collect the Future objects. Then you can iterate over those and wait on each result() now that you've submitted all the work you want the executor to do.
def target_func(path1, path2, target):
#...
#instead of os.system, use subprocess.run
#you can inspect the stdout from the process
complete_process = subprocess.run(<Login command>, text=True, capture_output=True)
if "success" not in complete_process.stdout:
return "uh-oh"
#you can also just check the return value (0 typically means clean exit)
if subprocess.run(<Image creation command>).returncode == 0:
return "uh-oh"
#or you can tell `run` to generate an error if the returncode is non-zero
try:
subprocess.run(<Copy to Other slaves or NFS>, check=True)
except subprocess.CalledProcessError:
return "uh-oh"
return "we did it!"
def main():
#...
#...
with concurrent.futures.ProcessPoolExecutor() as executor:
for id, (path2, path1) in enumerate(processed.items(), 1):
target = path.join("/tmp", "dir" + str(id))
ret = executor.submit(slave, path1, path2, target)
exit_status.append(ret)
for i in exit_status:
print("##########Result status: ", i.result())

Multiprocessing hangs after several hundred jobs

I am trying to use this question for my file processing:
Python multiprocessing safely writing to a file
This is my modification of the code:
def listener(q):
'''listens for messages on the q, writes to file. '''
while 1:
reads = q.get()
if reads == 'kill':
#f.write('killed')
break
for read in reads:
out_bam.write(read)
out_bam.flush()
out_bam.close()
def fetch_reads(line, q):
parts = line[:-1].split('\t')
print(parts)
start,end = int(parts[1])-1,int(parts[2])-1
in_bam = pysam.AlignmentFile(args.bam, mode='rb')
fetched = in_bam.fetch(parts[0], start, end)
reads = [read for read in fetched if (read.cigarstring and read.pos >= start and read.pos < end and 'S' not in read.cigarstring)]
in_bam.close()
q.put(reads)
return reads
#must use Manager queue here, or will not work
manager = mp.Manager()
q = manager.Queue()
if not args.threads:
threads = 1
else:
threads = int(args.threads)
pool = mp.Pool(threads+1)
#put listener to work first
watcher = pool.apply_async(listener, (q,))
with open(args.bed,'r') as bed:
jobs = []
cnt = 0
for line in bed:
# Fire off the read fetchings
job = pool.apply_async(fetch_reads, (line, q))
jobs.append(job)
cnt += 1
if cnt > 10000:
break
# collect results from the workers through the pool result queue
for job in jobs:
job.get()
print('get')
#now we are done, kill the listener
q.put('kill')
pool.close()
The differences in that I am opening and closing the file in the function since otherwise I get unusual errors from bgzip.
At first, print(parts) and print('get') are interchangeably printed (more or less), then there are less and less prints of 'get'. Ultimately the code hangs, and nothing is printed (all the parts are printed, but 'get' simply doesn't print anymore). The output file remains zero bytes.
Can anyone lend a hand? Cheers!

Capture check_output value

I am trying to capture the return value of check_output instead of having it automatically print to the command line. Unfortunately, my solution is not working and I'm not sure why. I've included my code and it's output:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from multiprocessing import Pool
from subprocess import check_output,CalledProcessError
def job(cmd):
result = ""
try:
result = check_output(cmd.split()) # Split string into list.
print("job result length = {0}".format(len(result)), file=sys.stdout)
except CalledProcessError as error:
raise Exception("Exit status of the child process: {0}\
Command used to spawn child process: {1}\
Output of the child process: {2}".format(error.returncode,error.cmd,error.output))
def main():
# Sets up a process pool. Defaults to number of cores.
# Each input gets passed to job and processed in a separate process.
p = Pool()
result = []
try:
# cmd_list is just a list of system commands which have been verified to work.
result = list(p.imap_unordered(job, cmd_list))
print("main result length = {0}".format(len(result)), file=sys.stdout)
print("{0}".format(result), file=sys.stdout)
except Exception as error:
print("Error: {0}. Aborting...".format(error), file=sys.stderr)
p.close()
p.terminate()
else:
p.close()
p.join()
if __name__ == '__main__':
main()
Output
In addition to the output of each command executed by check_output, my print statements reveal some unexpected results:
job result length = 0
job result length = 0
main result length = 2
[None, None]
I would expect job result length to equal 2 and result to contain the return values of the child processes.

result is a local variable. Either return it:
def job(cmd):
# something goes here
return result
Or make it global:
result = ""
def job(cmd):
global result
# something goes here
result = whatever it shall be.
Or parameterize it:
def job(cmd, result):
result = whatever it shall be.

Python refresh file from disk

I have a python script that calls a system program and reads the output from a file out.txt, acts on that output, and loops. However, it doesn't work, and a close investigation showed that the python script just opens out.txt once and then keeps on reading from that old copy. How can I make the python script reread the file on each iteration? I saw a similar question here on SO but it was about a python script running alongside a program, not calling it, and the solution doesn't work. I tried closing the file before looping back but it didn't do anything.
EDIT:
I already tried closing and opening, it didn't work. Here's the code:
import subprocess, os, sys
filename = sys.argv[1]
file = open(filename,'r')
foo = open('foo','w')
foo.write(file.read().rstrip())
foo = open('foo','a')
crap = open(os.devnull,'wb')
numSolutions = 0
while True:
subprocess.call(["minisat", "foo", "out"], stdout=crap,stderr=crap)
out = open('out','r')
if out.readline().rstrip() == "SAT":
numSolutions += 1
clause = out.readline().rstrip()
clause = clause.split(" ")
print clause
clause = map(int,clause)
clause = map(lambda x: -x,clause)
output = ' '.join(map(lambda x: str(x),clause))
print output
foo.write('\n'+output)
out.close()
else:
break
print "There are ", numSolutions, " solutions."

You need to flush foo so that the external program can see its latest changes. When you write to a file, the data is buffered in the local process and sent to the system in larger blocks. This is done because updating the system file is relatively expensive. In your case, you need to force a flush of the data so that minisat can see it.
foo.write('\n'+output)
foo.flush()

I rewrote it to hopefully be a bit easier to understand:
import os
from shutil import copyfile
import subprocess
import sys
TEMP_CNF = "tmp.in"
TEMP_SOL = "tmp.out"
NULL = open(os.devnull, "wb")
def all_solutions(cnf_fname):
"""
Given a file containing a set of constraints,
generate all possible solutions.
"""
# make a copy of original input file
copyfile(cnf_fname, TEMP_CNF)
while True:
# run minisat to solve the constraint problem
subprocess.call(["minisat", TEMP_CNF, TEMP_SOL], stdout=NULL,stderr=NULL)
# look at the result
with open(TEMP_SOL) as result:
line = next(result)
if line.startswith("SAT"):
# Success - return solution
line = next(result)
solution = [int(i) for i in line.split()]
yield solution
else:
# Failure - no more solutions possible
break
# disqualify found solution
with open(TEMP_CNF, "a") as constraints:
new_constraint = " ".join(str(-i) for i in sol)
constraints.write("\n")
constraints.write(new_constraint)
def main(cnf_fname):
"""
Given a file containing a set of constraints,
count the possible solutions.
"""
count = sum(1 for i in all_solutions(cnf_fname))
print("There are {} solutions.".format(count))
if __name__=="__main__":
if len(sys.argv) == 2:
main(sys.argv[1])
else:
print("Usage: {} cnf.in".format(sys.argv[0]))

You take your file_var and end the loop with file_var.close().
for ... :
ga_file = open(out.txt, 'r')
... do stuff
ga_file.close()
Demo of an implementation below (as simple as possible, this is all of the Jython code needed)...
__author__ = ''
import time
var = 'false'
while var == 'false':
out = open('out.txt', 'r')
content = out.read()
time.sleep(3)
print content
out.close()
generates this output:
2015-01-09, 'stuff added'
2015-01-09, 'stuff added' # <-- this is when i just saved my update
2015-01-10, 'stuff added again :)' # <-- my new output from file reads
I strongly recommend reading the error messages. They hold quite a lot of information.
I think the full file name should be written for debug purposes.

Get output from a program with the user input that program takes

I'm trying to capture a string from the output of a subprocess and when the subprocess asks for user input, include the user input in the string, but I can't get stdout to work.
I got the string output from stdout using a while loop, but I don't know how to terminate it after reading the string.
I tried using subprocess.check_output, but then I can't see the prompts for user input.
import subprocess
import sys
child = subprocess.Popen(["java","findTheAverage"], stdout = subprocess.PIPE, stdin = subprocess.PIPE )
string = u""
while True:
line = str(child.stdout.read(1))
if line != '':
string += line[2]
print(string)
else:
break
print(string)
for line in sys.stdin:
print(line)
child.stdin.write(bytes(line, 'utf-8'))
EDIT:
With help and code from Alfe post I now have a string getting created from the subprocess programs output, and the users input to that program, but its jumbled about.
The string appears to first get The first letter of the output, then the user input, then the rest of the output.
Example of string muddling:
U2
3ser! please enter a double:U
4ser! please enter another double: U
5ser! please enter one final double: Your numbers were:
a = 2.0
b = 3.0
c = 4.0
average = 3.0
Is meant to be:
User! please enter a double:2
User! please enter another double: 3
User! please enter one final double: 4
Your numbers were:
a = 2.0
b = 3.0
c = 4.0
average = 3.0
Using the code:
import subprocess
import sys
import signal
import select
def signal_handler(signum, frame):
raise Exception("Timed out!")
child = subprocess.Popen(["java","findTheAverage"], universal_newlines = True, stdout = subprocess.PIPE, stdin = subprocess.PIPE )
string = u""
stringbuf = ""
while True:
print(child.poll())
if child.poll() != None and not stringbuf:
break
signal.signal(signal.SIGALRM, signal_handler)
signal.alarm(1)
try:
r, w, e = select.select([ child.stdout, sys.stdin ], [], [])
if child.stdout in r:
stringbuf = child.stdout.read(1)
string += stringbuf
print(stringbuf)
except:
print(string)
print(stringbuf)
if sys.stdin in r:
typed = sys.stdin.read(1)
child.stdin.write(typed)
string += typed
FINAL EDIT:
Alright, I played around with it and got it working with this code:
import subprocess
import sys
import select
import fcntl
import os
# the string that we will return filled with tasty program output and user input #
string = ""
# the subprocess running the program #
child = subprocess.Popen(["java","findTheAverage"],bufsize = 0, universal_newlines = True, stdout = subprocess.PIPE, stdin = subprocess.PIPE )
# stuff to stop IO blocks in child.stdout and sys.stdin ## (I stole if from http://stackoverflow.com/a/8980466/2674170)
fcntl.fcntl(child.stdout.fileno(), fcntl.F_SETFL, os.O_NONBLOCK)
fcntl.fcntl(sys.stdin.fileno(), fcntl.F_SETFL, os.O_NONBLOCK)
# this here in the unlikely event that the program has #
# finished by the time the main loop is first running #
# because if that happened the loop would end without #
# having added the programs output to the string! #
progout = ""
typedbuf = "#"
### here we have the main loop, this friendly fellah is
### going to read from the program and user, and tell
### each other what needs to be known
while True:
## stop when the program finishes and there is no more output
if child.poll() != None and not progout:
break
# read from
typed = ""
while typedbuf:
try:
typedbuf = sys.stdin.read(1)
except:
break
typed += typedbuf
stringbuf = "#"
string += typed
child.stdin.write(typed)
progout = ""
progoutbuf = "#"
while progoutbuf:
try:
progoutbuf = child.stdout.read(1)
except:
typedbuf = "#"
break
progout += progoutbuf
if progout:
print(progout)
string += progout
# the final output string #
print( string)

You need select to read from more than one source at the same time (in your case stdin and the output of the child process).
import select
string = ''
while True:
r, w, e = select.select([ child.stdout, sys.stdin ], [], [])
if child.stdout in r:
string += child.stdout.read()
if sys.stdin in r:
typed = sys.stdin.read()
child.stdin.write(typed)
string += typed
You will still need to find a proper breaking condition to leave that loop. But you probably get the idea already.
I want to give a warning at this point: Processes writing into pipes typically buffer until the latest possible moment; you might not expect this because when testing the same program from the command line (in a terminal) typically only lines get buffered. This is due to performance considerations. When writing to a terminal, typically a user expects to see the output as soon as possible. When writing to a pipe, typically a reading process is happy to be given larger chunks in order to sleep longer before they arrive.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Pytonic way of passing values between process - python

Related

Python: store results of ProcessPoolExecutor

Multiprocessing hangs after several hundred jobs

Capture check_output value

Python refresh file from disk

Get output from a program with the user input that program takes

Categories

Resources