Work with the stdout of some other process (created with Popen) - python

I would like to count the number of lines written to stdout by a process (here unrar.exe) created with Popen.
import time
from subprocess import Popen, PIPE, STDOUT
p = Popen('unrar.exe x -y myfile.rar', stdout=PIPE)
while (p is not finished): # pseudo code here and next lines...
time.sleep(0.100)
print 'Number of lines written to STDOUT by unrar' + len(PIPE.split('\n'))
How to do this properly ?
Remark : I already looked at p.communicate() (https://python.readthedocs.org/en/v2.7.2/library/subprocess.html#subprocess.Popen.communicate) but this has the effect of blocking the execution of the Python until p has terminated, which is not what I want : I want to be able to print the number of lines written by p when it's running.

I'm looking for an easier solution without using another thread, etc. I don't mind if the process is blocked every 100 ms
If it is a hard requirement that the process must not block then you need threads (or other asynchronous techniques). To emulate non-blocking wc --lines <(cmd):
#!/usr/bin/env python
import io
import shlex
from functools import partial
from subprocess import Popen, PIPE
from threading import Thread
from Queue import Queue
def count_lines(pipe, queue, chunksize=io.DEFAULT_BUFFER_SIZE):
#NOTE: you could write intermediate results here (just drop `sum()`)
queue.put(sum(chunk.count(b'\n')
for chunk in iter(partial(pipe.read, chunksize), b'')))
pipe.close()
p = Popen(shlex.split('unrar.exe x -y myfile.rar'), stdout=PIPE, bufsize=-1)
result = Queue()
Thread(target=count_lines, args=[p.stdout, result]).start()
p.wait() # you can omit it if you want to do something else and call it later
number_of_lines = result.get() # this blocks (you could pass `timeout`)
On the other hand if all you need is "to print the number of lines written by p when it's
running." then you could count lines in the main thread:
#!/usr/bin/env python
import shlex
from subprocess import Popen, PIPE
p = Popen(shlex.split('unrar.exe x -y myfile.rar'), stdout=PIPE, bufsize=1)
count = 0
for line in iter(p.stdout.readline, b''):
count += 1
print count
p.stdout.close()
p.wait()

I don't know if this is the cleanest way to do it, but it works:
from subprocess import Popen, PIPE
import io, time
p = Popen('unrar.exe x -y myfile.rar', stdout = PIPE)
b = ' '
i = 0
while b:
b = p.stdout.readline()
i += 1
print i
print 'FINISHED'

Related

real time logging to file with python subprocess

I expect this is really simple but I can't work this out.
I am trying to write to a log file in real time the output from a DD imaging subprocess - I'm using DD v 8.25 from which you can get regular progress updates using the 'status=progress' option which writes to stderr.
I can get it to log the full output real time by passing the file object to the stderr i.e
log_file = open('mylog.log', 'a')
p = subprocess.Popen['dd command...'], stdout=None, stderr=log_file)
...but I would prefer to intercept the string from stderr first so I can parse it before writing to file.
I have tried threading but I can't seem to get it to write, or if it does, it only does it at the end of the process and not during.
I'm a python noob so example code would be appreciated. Thanks!
UPDATE - NOW WORKING (ISH)
I had a look at the link J.F. Sebastian suggested and found posts about using threads, so after that I used the "kill -USR1" trick to get DD to post progress to stderr which I could then pick up:
#! /usr/bin/env python
from subprocess import PIPE, Popen
from threading import Thread
from queue import Queue, Empty
import time
q = Queue()
def parsestring(mystr):
newstring = mystr[0:mystr.find('bytes')]
return newstring
def enqueue(out, q):
for line in proc1.stderr:
q.put(line)
out.close()
def getstatus():
while proc1.poll() == None:
proc2 = Popen(["kill -USR1 $(pgrep ^dd)"], bufsize=1, shell=True)
time.sleep(2)
with open("log_file.log", mode="a") as log_fh:
start_time = time.time()
#start the imaging
proc1 = Popen(["dd if=/dev/sda1 of=image.dd bs=524288 count=3000"], bufsize=1, stderr=PIPE, shell=True)
#define and start the queue function thread
t = Thread(target=enqueue, args=(proc1.stderr, q))
t.daemon = True
t.start()
#define and start the getstatus function thread
t_getstatus = Thread(target=getstatus, args=())
t_getstatus.daemon
t_getstatus.start()
#get the string from the queue
while proc1.poll() == None:
try: nline = q.get_nowait()
except Empty:
continue
else:
mystr = nline.decode('utf-8')
if mystr.find('bytes') > 0:
log_fh.write(str(time.time()) + ' - ' + parsestring(mystr))
log_fh.flush()
#put in a delay
#time.sleep(2)
#print duration
end_time=time.time()
duration=end_time-start_time
print('Took ' + str(duration) + ' seconds')
The only issue is I can't work out how to improve performance. I only need it to report status every 2 seconds or so but increasing the time delay increases the time of the imaging, which I don't want. That's a question for another post though...
Thanks to both J.F. Sebastian and Ali.
With this example it's possible (with python 3) to stream from stderr to console:
#! /usr/bin/env python
from subprocess import Popen, PIPE
# emulate a program that write on stderr
proc = Popen(["/usr/bin/yes 1>&2 "], bufsize=512, stdout=PIPE, stderr=PIPE, shell=True)
r = b""
for line in proc.stderr:
r += line
print("current line", line, flush=True)
To stream to a file:
#! /usr/bin/env python
from subprocess import Popen, PIPE
with open("log_file.log", mode="b", encoding="utf8") as log_fh:
proc = Popen(["/usr/bin/yes 1>&2 "], bufsize=512, stdout=PIPE, stderr=PIPE, shell=True)
r = b""
# proc.stderr is an io.TextIOWrapper file-like obj
# iter over line
for line in proc.stderr:
r += line
# print("current line", line, flush=True)
log_fh.write(line) # file open in binary mode
# log_fh.write(line.decode("utf8")) # for text mode
log_fh.flush() # flush the content
To display dd's progress report in a terminal and to save (parsed) output to a log file:
#!/usr/bin/env python3
import io
from subprocess import PIPE, Popen
from time import monotonic as timer
cmd = "dd if=/dev/sda1 of=image.dd bs=524288 count=3000 status=progress".split()
with Popen(cmd, stderr=PIPE) as process, \
open("log_file.log", "a") as log_file:
start_time = timer()
for line in io.TextIOWrapper(process.stderr, newline=''):
print(line, flush=True, end='') # no newline ('\n')
if 'bytes' in line:
# XXX parse line here, add flush=True if necessary
print(line, file=log_file)
# print duration
print('Took {duration} seconds'.format(duration=timer() - start_time))
Note
no shell=True: you don't need the shell here. Popen() can run dd directly
no threads, queues: you don't need them here
please, please DO NOT USE while proc1.poll() == None You don't need it here (you'll see EOF on proc1.stderr if proc1.poll() is not None). You may lose data (there could be a buffered content even if the process has exited already). Unrelated: if you need to compare with None; use is None instead of == None
io.TextIOWrapper(newline='') enables text mode
(it uses locale.getpreferredencoding(False)) and it
treats '\r' as a newline too
use the default bufsize=-1 (see io.DEFAULT_BUFFER_SIZE)

How to reuse intermediate results of Popen in Python?

The codes are like this:
from subprocess import Popen, PIPE
p1 = Popen("command1", stdout = PIPE)
p2 = Popen("command2", stdin = p1.stdout, stdout = PIPE)
result_a = p2.communicate()[0]
p1_again = Popen("command1", stdout = PIPE)
p3 = Popen("command3", stdin = p1_again.stdout, stdout = PIPE)
result_b = p3.communicate()[0]
with open("test") as tf:
p1_again_again = Popen("command1", stdout = tf)
p1_again_again.communicate()
The bad part is:
The command1 was executed three times because when I use commnnicate once, the stdout of that Popen object can't be used again. I was just wondering whether there's a method to reuse the intermediate results of PIPE.
Does anyone have ideas about how to make these codes better (better performance as well as less lines of codes)? Thanks!
here is a working solution. I have put example commands for cmd1, cmd2, cmd3 so that you can run it. It just takes the output from the first command and uppercases it in one command and lowercases it in the other.
code
from subprocess import Popen, PIPE, check_output
from tempfile import TemporaryFile
cmd1 = ['echo', 'Hi']
cmd2 = ['tr', '[:lower:]', '[:upper:]']
cmd3 = ['tr', '[:upper:]', '[:lower:]']
with TemporaryFile() as f:
p = Popen(cmd1, stdout=f)
ret_code = p.wait()
f.flush()
f.seek(0)
out2 = Popen(cmd2, stdin=f, stdout=PIPE).stdout.read()
f.seek(0)
out3 = Popen(cmd3, stdin=f, stdout=PIPE).stdout.read()
print out2, out3
output
HI
hi
some of the things to make note of in the solution. the tempfile module is always a great way to go when needing to work with temp files, it will automatically delete the temporary file as a cleanup once the with statement exits, even if there was some io exception thrown through out the with block. cmd1 is run once and output to the temp file, one calls the wait() method to make sure all execution has completed, then we do seek(0) each time so that when we call the read() method on f it is back at the start of the file. As a reference the question Saving stdout from subprocess.Popen to file, helped me in getting the first part of the solution.
If you can read all output of command1 in memory and then run command2, command3 one after another:
#!/usr/bin/env python
from subprocess import Popen, PIPE, check_output as qx
cmd1_output = qx(['ls']) # get all output
# run commands in sequence
results = [Popen(cmd, stdin=PIPE, stdout=PIPE).communicate(cmd1_output)[0]
for cmd in [['cat'], ['tr', 'a-z', 'A-Z']]]
Or you can write to a temporary file first if command1 generates a gigantic output that can't fit in memory as #Marwan Alsabbagh suggested:
#!/usr/bin/env python
import tempfile
from subprocess import check_call, check_output as qx
with tempfile.TemporaryFile() as file: # deleted automatically on closing
# run command1, wait for completion
check_call(['ls'], stdout=file)
# run commands in sequence
results = []
for cmd in [['cat'], ['tr', 'a-z', 'A-Z']]:
file.seek(0)
results.append(qx(cmd, stdin=file))
To handle input/output to/from subprocesses in parallel you could use threading:
#!/usr/bin/env python3
from contextlib import ExitStack # pip install contextlib2 (stdlib since 3.3)
from subprocess import Popen, PIPE
from threading import Thread
def tee(fin, *files):
try:
for chunk in iter(lambda: fin.read(1 << 10), b''):
for f in files: # fan out
f.write(chunk)
finally:
for f in (fin,) + files:
try:
f.close()
except OSError:
pass
with ExitStack() as stack:
# run commands asynchronously
source_proc = Popen(["command1", "arg1"], stdout=PIPE)
stack.callback(source_proc.wait)
stack.callback(source_proc.stdout.close)
processes = []
for command in [["tr", "a-z", "A-Z"], ["cat"]]:
processes.append(Popen(command, stdin=PIPE, stdout=PIPE))
stack.callback(processes[-1].wait)
stack.callback(processes[-1].stdout.close) # use .terminate()
stack.callback(processes[-1].stdin.close) # if it doesn't kill it
fout = open("test.txt", "wb")
stack.callback(fout.close)
# fan out source_proc's output
Thread(target=tee, args=([source_proc.stdout, fout] +
[p.stdin for p in processes])).start()
# collect results in parallel
results = [[] for _ in range(len(processes))]
threads = [Thread(target=r.extend, args=[iter(p.stdout.readline, b'')])
for p, r in zip(processes, results)]
for t in threads: t.start()
for t in threads: t.join() # wait for completion
I've used ExitStack here for a proper clean up in case of exceptions.

Run a subprocess and show the output in the application [duplicate]

My python script uses subprocess to call a linux utility that is very noisy. I want to store all of the output to a log file and show some of it to the user. I thought the following would work, but the output doesn't show up in my application until the utility has produced a significant amount of output.
#fake_utility.py, just generates lots of output over time
import time
i = 0
while True:
print hex(i)*512
i += 1
time.sleep(0.5)
#filters output
import subprocess
proc = subprocess.Popen(['python','fake_utility.py'],stdout=subprocess.PIPE)
for line in proc.stdout:
#the real code does filtering here
print "test:", line.rstrip()
The behavior I really want is for the filter script to print each line as it is received from the subprocess. Sorta like what tee does but with python code.
What am I missing? Is this even possible?
Update:
If a sys.stdout.flush() is added to fake_utility.py, the code has the desired behavior in python 3.1. I'm using python 2.6. You would think that using proc.stdout.xreadlines() would work the same as py3k, but it doesn't.
Update 2:
Here is the minimal working code.
#fake_utility.py, just generates lots of output over time
import sys, time
for i in range(10):
print i
sys.stdout.flush()
time.sleep(0.5)
#display out put line by line
import subprocess
proc = subprocess.Popen(['python','fake_utility.py'],stdout=subprocess.PIPE)
#works in python 3.0+
#for line in proc.stdout:
for line in iter(proc.stdout.readline,''):
print line.rstrip()
I think the problem is with the statement for line in proc.stdout, which reads the entire input before iterating over it. The solution is to use readline() instead:
#filters output
import subprocess
proc = subprocess.Popen(['python','fake_utility.py'],stdout=subprocess.PIPE)
while True:
line = proc.stdout.readline()
if not line:
break
#the real code does filtering here
print "test:", line.rstrip()
Of course you still have to deal with the subprocess' buffering.
Note: according to the documentation the solution with an iterator should be equivalent to using readline(), except for the read-ahead buffer, but (or exactly because of this) the proposed change did produce different results for me (Python 2.5 on Windows XP).
Bit late to the party, but was surprised not to see what I think is the simplest solution here:
import io
import subprocess
proc = subprocess.Popen(["prog", "arg"], stdout=subprocess.PIPE)
for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"): # or another encoding
# do something with line
(This requires Python 3.)
Indeed, if you sorted out the iterator then buffering could now be your problem. You could tell the python in the sub-process not to buffer its output.
proc = subprocess.Popen(['python','fake_utility.py'],stdout=subprocess.PIPE)
becomes
proc = subprocess.Popen(['python','-u', 'fake_utility.py'],stdout=subprocess.PIPE)
I have needed this when calling python from within python.
You want to pass these extra parameters to subprocess.Popen:
bufsize=1, universal_newlines=True
Then you can iterate as in your example. (Tested with Python 3.5)
A function that allows iterating over both stdout and stderr concurrently, in realtime, line by line
In case you need to get the output stream for both stdout and stderr at the same time, you can use the following function.
The function uses Queues to merge both Popen pipes into a single iterator.
Here we create the function read_popen_pipes():
from queue import Queue, Empty
from concurrent.futures import ThreadPoolExecutor
def enqueue_output(file, queue):
for line in iter(file.readline, ''):
queue.put(line)
file.close()
def read_popen_pipes(p):
with ThreadPoolExecutor(2) as pool:
q_stdout, q_stderr = Queue(), Queue()
pool.submit(enqueue_output, p.stdout, q_stdout)
pool.submit(enqueue_output, p.stderr, q_stderr)
while True:
if p.poll() is not None and q_stdout.empty() and q_stderr.empty():
break
out_line = err_line = ''
try:
out_line = q_stdout.get_nowait()
except Empty:
pass
try:
err_line = q_stderr.get_nowait()
except Empty:
pass
yield (out_line, err_line)
read_popen_pipes() in use:
import subprocess as sp
with sp.Popen(my_cmd, stdout=sp.PIPE, stderr=sp.PIPE, text=True) as p:
for out_line, err_line in read_popen_pipes(p):
# Do stuff with each line, e.g.:
print(out_line, end='')
print(err_line, end='')
return p.poll() # return status-code
You can also read lines w/o loop. Works in python3.6.
import os
import subprocess
process = subprocess.Popen(command, stdout=subprocess.PIPE)
list_of_byte_strings = process.stdout.readlines()
Pythont 3.5 added the methods run() and call() to the subprocess module, both returning a CompletedProcess object. With this you are fine using proc.stdout.splitlines():
proc = subprocess.run( comman, shell=True, capture_output=True, text=True, check=True )
for line in proc.stdout.splitlines():
print "stdout:", line
See also How to Execute Shell Commands in Python Using the Subprocess Run Method
I tried this with python3 and it worked, source
When you use popen to spawn the new thread, you tell the operating system to PIPE the stdout of the child processes so the parent process can read it and here, stderr is copied to the stderr of the parent process.
in output_reader we read each line of stdout of the child process by wrapping it in an iterator that populates line by line output from the child process whenever a new line is ready.
def output_reader(proc):
for line in iter(proc.stdout.readline, b''):
print('got line: {0}'.format(line.decode('utf-8')), end='')
def main():
proc = subprocess.Popen(['python', 'fake_utility.py'],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
t = threading.Thread(target=output_reader, args=(proc,))
t.start()
try:
time.sleep(0.2)
import time
i = 0
while True:
print (hex(i)*512)
i += 1
time.sleep(0.5)
finally:
proc.terminate()
try:
proc.wait(timeout=0.2)
print('== subprocess exited with rc =', proc.returncode)
except subprocess.TimeoutExpired:
print('subprocess did not terminate in time')
t.join()
The following modification of RĂ´mulo's answer works for me on Python 2 and 3 (2.7.12 and 3.6.1):
import os
import subprocess
process = subprocess.Popen(command, stdout=subprocess.PIPE)
while True:
line = process.stdout.readline()
if line != '':
os.write(1, line)
else:
break
I was having a problem with the arg list of Popen to update servers, the following code resolves this a bit.
import getpass
from subprocess import Popen, PIPE
username = 'user1'
ip = '127.0.0.1'
print ('What is the password?')
password = getpass.getpass()
cmd1 = f"""sshpass -p {password} ssh {username}#{ip}"""
cmd2 = f"""echo {password} | sudo -S apt update"""
cmd3 = " && "
cmd4 = f"""echo {password} | sudo -S apt upgrade -y"""
cmd5 = " && "
cmd6 = "exit"
commands = [cmd1, cmd2, cmd3, cmd4, cmd5, cmd6]
command = " ".join(commands)
cmd = command.split()
with Popen(cmd, stdout=PIPE, bufsize=1, universal_newlines=True) as p:
for line in p.stdout:
print(line, end='')
And to run the update on a local computer, the following code example does this.
import getpass
from subprocess import Popen, PIPE
print ('What is the password?')
password = getpass.getpass()
cmd1_local = f"""apt update"""
cmd2_local = f"""apt upgrade -y"""
commands = [cmd1_local, cmd2_local]
with Popen(['echo', password], stdout=PIPE) as auth:
for cmd in commands:
cmd = cmd.split()
with Popen(['sudo','-S'] + cmd, stdin=auth.stdout, stdout=PIPE, bufsize=1, universal_newlines=True) as p:
for line in p.stdout:
print(line, end='')

Constantly print Subprocess output while process is running

To launch programs from my Python-scripts, I'm using the following method:
def execute(command):
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
output = process.communicate()[0]
exitCode = process.returncode
if (exitCode == 0):
return output
else:
raise ProcessException(command, exitCode, output)
So when i launch a process like Process.execute("mvn clean install"), my program waits until the process is finished, and only then i get the complete output of my program. This is annoying if i'm running a process that takes a while to finish.
Can I let my program write the process output line by line, by polling the process output before it finishes in a loop or something?
I found this article which might be related.
You can use iter to process lines as soon as the command outputs them: lines = iter(fd.readline, ""). Here's a full example showing a typical use case (thanks to #jfs for helping out):
from __future__ import print_function # Only Python 2.x
import subprocess
def execute(cmd):
popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True)
for stdout_line in iter(popen.stdout.readline, ""):
yield stdout_line
popen.stdout.close()
return_code = popen.wait()
if return_code:
raise subprocess.CalledProcessError(return_code, cmd)
# Example
for path in execute(["locate", "a"]):
print(path, end="")
To print subprocess' output line-by-line as soon as its stdout buffer is flushed in Python 3:
from subprocess import Popen, PIPE, CalledProcessError
with Popen(cmd, stdout=PIPE, bufsize=1, universal_newlines=True) as p:
for line in p.stdout:
print(line, end='') # process line here
if p.returncode != 0:
raise CalledProcessError(p.returncode, p.args)
Notice: you do not need p.poll() -- the loop ends when eof is reached. And you do not need iter(p.stdout.readline, '') -- the read-ahead bug is fixed in Python 3.
See also, Python: read streaming input from subprocess.communicate().
Ok i managed to solve it without threads (any suggestions why using threads would be better are appreciated) by using a snippet from this question Intercepting stdout of a subprocess while it is running
def execute(command):
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
# Poll process for new output until finished
while True:
nextline = process.stdout.readline()
if nextline == '' and process.poll() is not None:
break
sys.stdout.write(nextline)
sys.stdout.flush()
output = process.communicate()[0]
exitCode = process.returncode
if (exitCode == 0):
return output
else:
raise ProcessException(command, exitCode, output)
There is actually a really simple way to do this when you just want to print the output:
import subprocess
import sys
def execute(command):
subprocess.check_call(command, shell=True, stdout=sys.stdout, stderr=subprocess.STDOUT)
Here we're simply pointing the subprocess to our own stdout, and using existing succeed or exception api.
#tokland
tried your code and corrected it for 3.4 and windows
dir.cmd is a simple dir command, saved as cmd-file
import subprocess
c = "dir.cmd"
def execute(command):
popen = subprocess.Popen(command, stdout=subprocess.PIPE,bufsize=1)
lines_iterator = iter(popen.stdout.readline, b"")
while popen.poll() is None:
for line in lines_iterator:
nline = line.rstrip()
print(nline.decode("latin"), end = "\r\n",flush =True) # yield line
execute(c)
In Python >= 3.5 using subprocess.run works for me:
import subprocess
cmd = 'echo foo; sleep 1; echo foo; sleep 2; echo foo'
subprocess.run(cmd, shell=True)
(getting the output during execution also works without shell=True)
https://docs.python.org/3/library/subprocess.html#subprocess.run
For anyone trying the answers to this question to get the stdout from a Python script note that Python buffers its stdout, and therefore it may take a while to see the stdout.
This can be rectified by adding the following after each stdout write in the target script:
sys.stdout.flush()
To answer the original question, the best way IMO is just redirecting subprocess stdout directly to your program's stdout (optionally, the same can be done for stderr, as in example below)
p = Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
p.communicate()
In case someone wants to read from both stdout and stderr at the same time using threads, this is what I came up with:
import threading
import subprocess
import Queue
class AsyncLineReader(threading.Thread):
def __init__(self, fd, outputQueue):
threading.Thread.__init__(self)
assert isinstance(outputQueue, Queue.Queue)
assert callable(fd.readline)
self.fd = fd
self.outputQueue = outputQueue
def run(self):
map(self.outputQueue.put, iter(self.fd.readline, ''))
def eof(self):
return not self.is_alive() and self.outputQueue.empty()
#classmethod
def getForFd(cls, fd, start=True):
queue = Queue.Queue()
reader = cls(fd, queue)
if start:
reader.start()
return reader, queue
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(stdoutReader, stdoutQueue) = AsyncLineReader.getForFd(process.stdout)
(stderrReader, stderrQueue) = AsyncLineReader.getForFd(process.stderr)
# Keep checking queues until there is no more output.
while not stdoutReader.eof() or not stderrReader.eof():
# Process all available lines from the stdout Queue.
while not stdoutQueue.empty():
line = stdoutQueue.get()
print 'Received stdout: ' + repr(line)
# Do stuff with stdout line.
# Process all available lines from the stderr Queue.
while not stderrQueue.empty():
line = stderrQueue.get()
print 'Received stderr: ' + repr(line)
# Do stuff with stderr line.
# Sleep for a short time to avoid excessive CPU use while waiting for data.
sleep(0.05)
print "Waiting for async readers to finish..."
stdoutReader.join()
stderrReader.join()
# Close subprocess' file descriptors.
process.stdout.close()
process.stderr.close()
print "Waiting for process to exit..."
returnCode = process.wait()
if returnCode != 0:
raise subprocess.CalledProcessError(returnCode, command)
I just wanted to share this, as I ended up on this question trying to do something similar, but none of the answers solved my problem. Hopefully it helps someone!
Note that in my use case, an external process kills the process that we Popen().
This PoC constantly reads the output from a process and can be accessed when needed. Only the last result is kept, all other output is discarded, hence prevents the PIPE from growing out of memory:
import subprocess
import time
import threading
import Queue
class FlushPipe(object):
def __init__(self):
self.command = ['python', './print_date.py']
self.process = None
self.process_output = Queue.LifoQueue(0)
self.capture_output = threading.Thread(target=self.output_reader)
def output_reader(self):
for line in iter(self.process.stdout.readline, b''):
self.process_output.put_nowait(line)
def start_process(self):
self.process = subprocess.Popen(self.command,
stdout=subprocess.PIPE)
self.capture_output.start()
def get_output_for_processing(self):
line = self.process_output.get()
print ">>>" + line
if __name__ == "__main__":
flush_pipe = FlushPipe()
flush_pipe.start_process()
now = time.time()
while time.time() - now < 10:
flush_pipe.get_output_for_processing()
time.sleep(2.5)
flush_pipe.capture_output.join(timeout=0.001)
flush_pipe.process.kill()
print_date.py
#!/usr/bin/env python
import time
if __name__ == "__main__":
while True:
print str(time.time())
time.sleep(0.01)
output: You can clearly see that there is only output from ~2.5s interval nothing in between.
>>>1520535158.51
>>>1520535161.01
>>>1520535163.51
>>>1520535166.01
This works at least in Python3.4
import subprocess
process = subprocess.Popen(cmd_list, stdout=subprocess.PIPE)
for line in process.stdout:
print(line.decode().strip())
Building on #jfs's excellent answer, here is a complete working example for you to play with. Requires Python 3.7 or newer.
sub.py
import time
for i in range(10):
print(i, flush=True)
time.sleep(1)
main.py
from subprocess import PIPE, Popen
import sys
with Popen([sys.executable, 'sub.py'], bufsize=1, stdout=PIPE, text=True) as sub:
for line in sub.stdout:
print(line, end='')
Notice the flush argument used in the child script.
None of the answers here addressed all of my needs.
No threads for stdout (no Queues, etc, either)
Non-blocking as I need to check for other things going on
Use PIPE as I needed to do multiple things, e.g. stream output, write to a log file and return a string copy of the output.
A little background: I am using a ThreadPoolExecutor to manage a pool of threads, each launching a subprocess and running them concurrency. (In Python2.7, but this should work in newer 3.x as well). I don't want to use threads just for output gathering as I want as many available as possible for other things (a pool of 20 processes would be using 40 threads just to run; 1 for the process thread and 1 for stdout...and more if you want stderr I guess)
I'm stripping back a lot of exception and such here so this is based on code that works in production. Hopefully I didn't ruin it in the copy and paste. Also, feedback very much welcome!
import time
import fcntl
import subprocess
import time
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
# Make stdout non-blocking when using read/readline
proc_stdout = proc.stdout
fl = fcntl.fcntl(proc_stdout, fcntl.F_GETFL)
fcntl.fcntl(proc_stdout, fcntl.F_SETFL, fl | os.O_NONBLOCK)
def handle_stdout(proc_stream, my_buffer, echo_streams=True, log_file=None):
"""A little inline function to handle the stdout business. """
# fcntl makes readline non-blocking so it raises an IOError when empty
try:
for s in iter(proc_stream.readline, ''): # replace '' with b'' for Python 3
my_buffer.append(s)
if echo_streams:
sys.stdout.write(s)
if log_file:
log_file.write(s)
except IOError:
pass
# The main loop while subprocess is running
stdout_parts = []
while proc.poll() is None:
handle_stdout(proc_stdout, stdout_parts)
# ...Check for other things here...
# For example, check a multiprocessor.Value('b') to proc.kill()
time.sleep(0.01)
# Not sure if this is needed, but run it again just to be sure we got it all?
handle_stdout(proc_stdout, stdout_parts)
stdout_str = "".join(stdout_parts) # Just to demo
I'm sure there is overhead being added here but it is not a concern in my case. Functionally it does what I need. The only thing I haven't solved is why this works perfectly for log messages but I see some print messages show up later and all at once.
import time
import sys
import subprocess
import threading
import queue
cmd='esptool.py --chip esp8266 write_flash -z 0x1000 /home/pi/zero2/fw/base/boot_40m.bin'
cmd2='esptool.py --chip esp32 -b 115200 write_flash -z 0x1000 /home/pi/zero2/fw/test.bin'
cmd3='esptool.py --chip esp32 -b 115200 erase_flash'
class ExecutorFlushSTDOUT(object):
def __init__(self,timeout=15):
self.process = None
self.process_output = queue.Queue(0)
self.capture_output = threading.Thread(target=self.output_reader)
self.timeout=timeout
self.result=False
self.validator=None
def output_reader(self):
start=time.time()
while self.process.poll() is None and (time.time() - start) < self.timeout:
try:
if not self.process_output.full():
line=self.process.stdout.readline()
if line:
line=line.decode().rstrip("\n")
start=time.time()
self.process_output.put(line)
if self.validator:
if self.validator in line: print("Valid");self.result=True
except:pass
self.process.kill()
return
def start_process(self,cmd_list,callback=None,validator=None,timeout=None):
if timeout: self.timeout=timeout
self.validator=validator
self.process = subprocess.Popen(cmd_list,stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
self.capture_output.start()
line=None
self.result=False
while self.process.poll() is None:
try:
if not self.process_output.empty():
line = self.process_output.get()
if line:
if callback:callback(line)
#print(line)
line=None
except:pass
error = self.process.returncode
if error:
print("Error Found",str(error))
raise RuntimeError(error)
return self.result
execute = ExecutorFlushSTDOUT()
def liveOUTPUT(line):
print("liveOUTPUT",line)
try:
if "Writing" in line:
line=''.join([n for n in line.split(' ')[3] if n.isdigit()])
print("percent={}".format(line))
except Exception as e:
pass
result=execute.start_process(cmd2,callback=liveOUTPUT,validator="Hash of data verified.")
print("Finish",result)
Use the -u Python option with subprocess.Popen() if you want to print from stdout while the process is running. (shell=True is necessary, despite the risks...)
Simple better than complex.
os library has built-in module system. You should execute your code and see the output.
import os
os.system("python --version")
# Output
"""
Python 3.8.6
0
"""
After version it is also printed return value as 0.
In Python 3.6 I used this:
import subprocess
cmd = "command"
output = subprocess.call(cmd, shell=True)
print(process)

python, subprocess: reading output from subprocess

I have following script:
#!/usr/bin/python
while True:
x = raw_input()
print x[::-1]
I am calling it from ipython:
In [5]: p = Popen('./script.py', stdin=PIPE)
In [6]: p.stdin.write('abc\n')
cba
and it works fine.
However, when I do this:
In [7]: p = Popen('./script.py', stdin=PIPE, stdout=PIPE)
In [8]: p.stdin.write('abc\n')
In [9]: p.stdout.read()
the interpreter hangs. What am I doing wrong? I would like to be able to both write and read from another process multiple times, to pass some tasks to this process. What do I need to do differently?
EDIT 1
If I use communicate, I get this:
In [7]: p = Popen('./script.py', stdin=PIPE, stdout=PIPE)
In [8]: p.communicate('abc\n')
Traceback (most recent call last):
File "./script.py", line 4, in <module>
x = raw_input()
EOFError: EOF when reading a line
Out[8]: ('cba\n', None)
EDIT 2
I tried flushing:
#!/usr/bin/python
import sys
while True:
x = raw_input()
print x[::-1]
sys.stdout.flush()
and here:
In [5]: from subprocess import PIPE, Popen
In [6]: p = Popen('./script.py', stdin=PIPE, stdout=PIPE)
In [7]: p.stdin.write('abc')
In [8]: p.stdin.flush()
In [9]: p.stdout.read()
but it hangs again.
I believe there are two problems at work here:
1) Your parent script calls p.stdout.read(), which will read all data until end-of-file. However, your child script runs in an infinite loop so end-of-file will never happen. Probably you want p.stdout.readline()?
2) In interactive mode, most programs do buffer only one line at a time. When run from another program, they buffer much more. The buffering improves efficiency in many cases, but causes problems when two programs need to communicate interactively.
After p.stdin.write('abc\n') add:
p.stdin.flush()
In your subprocess script, after print x[::-1] add the following within the loop:
sys.stdout.flush()
(and import sys at the top)
The subprocess method check_output can be useful for this:
output = subprocess.check_output('./script.py')
And output will be the stdout from the process. If you need stderr, too:
output = subprocess.check_output('./script.py', stderr=subprocess.STDOUT)
Because you avoid managing pipes directly, it may circumvent your issue.
If you'd like to pass several lines to script.py then you need to read/write simultaneously:
#!/usr/bin/env python
import sys
from subprocess import PIPE, Popen
from threading import Thread
def print_output(out, ntrim=80):
for line in out:
print len(line)
if len(line) > ntrim: # truncate long output
line = line[:ntrim-2]+'..'
print line.rstrip()
if __name__=="__main__":
p = Popen(['python', 'script.py'], stdin=PIPE, stdout=PIPE)
Thread(target=print_output, args=(p.stdout,)).start()
for s in ['abc', 'def', 'ab'*10**7, 'ghi']:
print >>p.stdin, s
p.stdin.close()
sys.exit(p.wait()) #NOTE: read http://docs.python.org/library/subprocess.html#subprocess.Popen.wait
Output:
4
cba
4
fed
20000001
bababababababababababababababababababababababababababababababababababababababa..
4
ihg
Where script.py:
#!/usr/bin/env python
"""Print reverse lines."""
while True:
try: x = raw_input()
except EOFError:
break # no more input
else:
print x[::-1]
Or
#!/usr/bin/env python
"""Print reverse lines."""
import sys
for line in sys.stdin:
print line.rstrip()[::-1]
Or
#!/usr/bin/env python
"""Print reverse lines."""
import fileinput
for line in fileinput.input(): # accept files specified as command line arguments
print line.rstrip()[::-1]
You're probably tripping over Python's output buffering. Here's what python --help has to say about it.
-u : unbuffered binary stdout and stderr; also PYTHONUNBUFFERED=x
see man page for details on internal buffering relating to '-u'
When you are through writing to p.stdin, close it: p.stdin.close()
Use communicate() instead of .stdout.read().
Example:
from subprocess import Popen, PIPE
p = Popen('./script.py', stdin=PIPE, stdout=PIPE, stderr=PIPE)
input = 'abc\n'
stdout, stderr = p.communicate(input)
This recommendation comes from the Popen objects section in the subprocess documentation:
Warning: Use communicate() rather than .stdin.write, .stdout.read or .stderr.read
to avoid deadlocks due to any of the other OS pipe buffers filling up and blocking the
child process.

Categories

Resources