Mimicing glib.spawn_async with Popen… - python

The function glib.spawn_async allows you to hook three callbacks which are called on event on stdout, stderr, and on process completion.
How can I mimic the same functionality with subprocess with either threads or asyncio?
I am more interested in the functionality rather than threading/asynio but an answer that contains both will earn a bounty.
Here is a toy program that shows what I want to do:
import glib
import logging
import os
import gtk
class MySpawn(object):
def __init__(self):
self._logger = logging.getLogger(self.__class__.__name__)
def execute(self, cmd, on_done, on_stdout, on_stderr):
self.pid, self.idin, self.idout, self.iderr = \
glib.spawn_async(cmd,
flags=glib.SPAWN_DO_NOT_REAP_CHILD,
standard_output=True,
standard_error=True)
fout = os.fdopen(self.idout, "r")
ferr = os.fdopen(self.iderr, "r")
glib.child_watch_add(self.pid, on_done)
glib.io_add_watch(fout, glib.IO_IN, on_stdout)
glib.io_add_watch(ferr, glib.IO_IN, on_stderr)
return self.pid
if __name__ == '__main__':
logging.basicConfig(format='%(thread)d %(levelname)s: %(message)s',
level=logging.DEBUG)
cmd = '/usr/bin/git ls-remote https://github.com/DiffSK/configobj'.split()
def on_done(pid, retval, *args):
logging.info("That's all folks!…")
def on_stdout(fobj, cond):
"""This blocks which is fine for this toy example…"""
for line in fobj.readlines():
logging.info(line.strip())
return True
def on_stderr(fobj, cond):
"""This blocks which is fine for this toy example…"""
for line in fobj.readlines():
logging.error(line.strip())
return True
runner = MySpawn()
runner.execute(cmd, on_done, on_stdout, on_stderr)
try:
gtk.main()
except KeyboardInterrupt:
print('')
I should add that since readlines() is blocking, the above will buffer all the output and send it at once. If this is not what one wants, then you have to use readline() and make sure that on end of command you finish reading all the lines you did not read before.

asyncio has subprocess_exec, there is no need to use the subprocess module at all:
import asyncio
class Handler(asyncio.SubprocessProtocol):
def pipe_data_received(self, fd, data):
# fd == 1 for stdout, and 2 for stderr
print("Data from /bin/ls on fd %d: %s" % (fd, data.decode()))
def pipe_connection_lost(self, fd, exc):
print("Connection lost to /bin/ls")
def process_exited(self):
print("/bin/ls is finished.")
loop = asyncio.get_event_loop()
coro = loop.subprocess_exec(Handler, "/bin/ls", "/")
loop.run_until_complete(coro)
loop.close()
With subprocess and threading, it's simple as well. You can just spawn a thread per pipe, and one to wait() for the process:
import subprocess
import threading
class PopenWrapper(object):
def __init__(self, args):
self.process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.DEVNULL)
self.stdout_reader_thread = threading.Thread(target=self._reader, args=(self.process.stdout,))
self.stderr_reader_thread = threading.Thread(target=self._reader, args=(self.process.stderr,))
self.exit_watcher = threading.Thread(target=self._exit_watcher)
self.stdout_reader_thread.start()
self.stderr_reader_thread.start()
self.exit_watcher.start()
def _reader(self, fileobj):
for line in fileobj:
self.on_data(fileobj, line)
def _exit_watcher(self):
self.process.wait()
self.stdout_reader_thread.join()
self.stderr_reader_thread.join()
self.on_exit()
def on_data(self, fd, data):
return NotImplementedError
def on_exit(self):
return NotImplementedError
def join(self):
self.process.wait()
class LsWrapper(PopenWrapper):
def on_data(self, fd, data):
print("Received on fd %r: %s" % (fd, data))
def on_exit(self):
print("Process exited.")
LsWrapper(["/bin/ls", "/"]).join()
However, mind that glib does not use threads to asynchroneously execute your callbacks. It uses an event loop, just as asyncio does. The idea is that at the core of your program is a loop that waits until something happens, and then synchronously executes an associated callback. In your case, that's "data becomes available for reading on one of the pipes", and "the subprocess has exited". In general, its also stuff like "the X11-server reported mouse movement", "there's incoming network traffic", etc. You can emulate glib's behaviour by writing your own event loop. Use the select module on the two pipes. If select reports that the pipes are readable, but read returns no data, the process likely exited - call the poll() method on the subprocess object in this case to check whether it is completed, and call your exit callback if it has, or an error callback elsewise.

Related

How to detect if a pipe is empty [duplicate]

I'm using the subprocess module to start a subprocess and connect to its output stream (standard output). I want to be able to execute non-blocking reads on its standard output. Is there a way to make .readline non-blocking or to check if there is data on the stream before I invoke .readline? I'd like this to be portable or at least work under Windows and Linux.
Here is how I do it for now (it's blocking on the .readline if no data is available):
p = subprocess.Popen('myprogram.exe', stdout = subprocess.PIPE)
output_str = p.stdout.readline()
fcntl, select, asyncproc won't help in this case.
A reliable way to read a stream without blocking regardless of operating system is to use Queue.get_nowait():
import sys
from subprocess import PIPE, Popen
from threading import Thread
try:
from queue import Queue, Empty
except ImportError:
from Queue import Queue, Empty # python 2.x
ON_POSIX = 'posix' in sys.builtin_module_names
def enqueue_output(out, queue):
for line in iter(out.readline, b''):
queue.put(line)
out.close()
p = Popen(['myprogram.exe'], stdout=PIPE, bufsize=1, close_fds=ON_POSIX)
q = Queue()
t = Thread(target=enqueue_output, args=(p.stdout, q))
t.daemon = True # thread dies with the program
t.start()
# ... do other things here
# read line without blocking
try: line = q.get_nowait() # or q.get(timeout=.1)
except Empty:
print('no output yet')
else: # got line
# ... do something with line
I have often had a similar problem; Python programs I write frequently need to have the ability to execute some primary functionality while simultaneously accepting user input from the command line (stdin). Simply putting the user input handling functionality in another thread doesn't solve the problem because readline() blocks and has no timeout. If the primary functionality is complete and there is no longer any need to wait for further user input I typically want my program to exit, but it can't because readline() is still blocking in the other thread waiting for a line. A solution I have found to this problem is to make stdin a non-blocking file using the fcntl module:
import fcntl
import os
import sys
# make stdin a non-blocking file
fd = sys.stdin.fileno()
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
# user input handling thread
while mainThreadIsRunning:
try: input = sys.stdin.readline()
except: continue
handleInput(input)
In my opinion this is a bit cleaner than using the select or signal modules to solve this problem but then again it only works on UNIX...
Python 3.4 introduces new provisional API for asynchronous IO -- asyncio module.
The approach is similar to twisted-based answer by #Bryan Ward -- define a protocol and its methods are called as soon as data is ready:
#!/usr/bin/env python3
import asyncio
import os
class SubprocessProtocol(asyncio.SubprocessProtocol):
def pipe_data_received(self, fd, data):
if fd == 1: # got stdout data (bytes)
print(data)
def connection_lost(self, exc):
loop.stop() # end loop.run_forever()
if os.name == 'nt':
loop = asyncio.ProactorEventLoop() # for subprocess' pipes on Windows
asyncio.set_event_loop(loop)
else:
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(loop.subprocess_exec(SubprocessProtocol,
"myprogram.exe", "arg1", "arg2"))
loop.run_forever()
finally:
loop.close()
See "Subprocess" in the docs.
There is a high-level interface asyncio.create_subprocess_exec() that returns Process objects that allows to read a line asynchroniosly using StreamReader.readline() coroutine
(with async/await Python 3.5+ syntax):
#!/usr/bin/env python3.5
import asyncio
import locale
import sys
from asyncio.subprocess import PIPE
from contextlib import closing
async def readline_and_kill(*args):
# start child process
process = await asyncio.create_subprocess_exec(*args, stdout=PIPE)
# read line (sequence of bytes ending with b'\n') asynchronously
async for line in process.stdout:
print("got line:", line.decode(locale.getpreferredencoding(False)))
break
process.kill()
return await process.wait() # wait for the child process to exit
if sys.platform == "win32":
loop = asyncio.ProactorEventLoop()
asyncio.set_event_loop(loop)
else:
loop = asyncio.get_event_loop()
with closing(loop):
sys.exit(loop.run_until_complete(readline_and_kill(
"myprogram.exe", "arg1", "arg2")))
readline_and_kill() performs the following tasks:
start subprocess, redirect its stdout to a pipe
read a line from subprocess' stdout asynchronously
kill subprocess
wait for it to exit
Each step could be limited by timeout seconds if necessary.
On Unix-like systems and Python 3.5+ there's os.set_blocking which does exactly what it says.
import os
import time
import subprocess
cmd = 'python3', '-c', 'import time; [(print(i), time.sleep(1)) for i in range(5)]'
p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
os.set_blocking(p.stdout.fileno(), False)
start = time.time()
while True:
# first iteration always produces empty byte string in non-blocking mode
for i in range(2):
line = p.stdout.readline()
print(i, line)
time.sleep(0.5)
if time.time() > start + 5:
break
p.terminate()
This outputs:
1 b''
2 b'0\n'
1 b''
2 b'1\n'
1 b''
2 b'2\n'
1 b''
2 b'3\n'
1 b''
2 b'4\n'
With os.set_blocking commented it's:
0 b'0\n'
1 b'1\n'
0 b'2\n'
1 b'3\n'
0 b'4\n'
1 b''
Try the asyncproc module. For example:
import os
from asyncproc import Process
myProc = Process("myprogram.app")
while True:
# check to see if process has ended
poll = myProc.wait(os.WNOHANG)
if poll != None:
break
# print any new output
out = myProc.read()
if out != "":
print out
The module takes care of all the threading as suggested by S.Lott.
You can do this really easily in Twisted. Depending upon your existing code base, this might not be that easy to use, but if you are building a twisted application, then things like this become almost trivial. You create a ProcessProtocol class, and override the outReceived() method. Twisted (depending upon the reactor used) is usually just a big select() loop with callbacks installed to handle data from different file descriptors (often network sockets). So the outReceived() method is simply installing a callback for handling data coming from STDOUT. A simple example demonstrating this behavior is as follows:
from twisted.internet import protocol, reactor
class MyProcessProtocol(protocol.ProcessProtocol):
def outReceived(self, data):
print data
proc = MyProcessProtocol()
reactor.spawnProcess(proc, './myprogram', ['./myprogram', 'arg1', 'arg2', 'arg3'])
reactor.run()
The Twisted documentation has some good information on this.
If you build your entire application around Twisted, it makes asynchronous communication with other processes, local or remote, really elegant like this. On the other hand, if your program isn't built on top of Twisted, this isn't really going to be that helpful. Hopefully this can be helpful to other readers, even if it isn't applicable for your particular application.
Use select & read(1).
import subprocess #no new requirements
def readAllSoFar(proc, retVal=''):
while (select.select([proc.stdout],[],[],0)[0]!=[]):
retVal+=proc.stdout.read(1)
return retVal
p = subprocess.Popen(['/bin/ls'], stdout=subprocess.PIPE)
while not p.poll():
print (readAllSoFar(p))
For readline()-like:
lines = ['']
while not p.poll():
lines = readAllSoFar(p, lines[-1]).split('\n')
for a in range(len(lines)-1):
print a
lines = readAllSoFar(p, lines[-1]).split('\n')
for a in range(len(lines)-1):
print a
Things are a lot better in modern Python.
Here's a simple child program, "hello.py":
#!/usr/bin/env python3
while True:
i = input()
if i == "quit":
break
print(f"hello {i}")
And a program to interact with it:
import asyncio
async def main():
proc = await asyncio.subprocess.create_subprocess_exec(
"./hello.py", stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE
)
proc.stdin.write(b"bob\n")
print(await proc.stdout.read(1024))
proc.stdin.write(b"alice\n")
print(await proc.stdout.read(1024))
proc.stdin.write(b"quit\n")
await proc.wait()
asyncio.run(main())
That prints out:
b'hello bob\n'
b'hello alice\n'
Note that the actual pattern, which is also by almost all of the previous answers, both here and in related questions, is to set the child's stdout file descriptor to non-blocking and then poll it in some sort of select loop. These days, of course, that loop is provided by asyncio.
One solution is to make another process to perform your read of the process, or make a thread of the process with a timeout.
Here's the threaded version of a timeout function:
http://code.activestate.com/recipes/473878/
However, do you need to read the stdout as it's coming in?
Another solution may be to dump the output to a file and wait for the process to finish using p.wait().
f = open('myprogram_output.txt','w')
p = subprocess.Popen('myprogram.exe', stdout=f)
p.wait()
f.close()
str = open('myprogram_output.txt','r').read()
Here is my code, used to catch every output from subprocess ASAP, including partial lines. It pumps at same time and stdout and stderr in almost correct order.
Tested and correctly worked on Python 2.7 linux & windows.
#!/usr/bin/python
#
# Runner with stdout/stderr catcher
#
from sys import argv
from subprocess import Popen, PIPE
import os, io
from threading import Thread
import Queue
def __main__():
if (len(argv) > 1) and (argv[-1] == "-sub-"):
import time, sys
print "Application runned!"
time.sleep(2)
print "Slept 2 second"
time.sleep(1)
print "Slept 1 additional second",
time.sleep(2)
sys.stderr.write("Stderr output after 5 seconds")
print "Eol on stdin"
sys.stderr.write("Eol on stderr\n")
time.sleep(1)
print "Wow, we have end of work!",
else:
os.environ["PYTHONUNBUFFERED"]="1"
try:
p = Popen( argv + ["-sub-"],
bufsize=0, # line-buffered
stdin=PIPE, stdout=PIPE, stderr=PIPE )
except WindowsError, W:
if W.winerror==193:
p = Popen( argv + ["-sub-"],
shell=True, # Try to run via shell
bufsize=0, # line-buffered
stdin=PIPE, stdout=PIPE, stderr=PIPE )
else:
raise
inp = Queue.Queue()
sout = io.open(p.stdout.fileno(), 'rb', closefd=False)
serr = io.open(p.stderr.fileno(), 'rb', closefd=False)
def Pump(stream, category):
queue = Queue.Queue()
def rdr():
while True:
buf = stream.read1(8192)
if len(buf)>0:
queue.put( buf )
else:
queue.put( None )
return
def clct():
active = True
while active:
r = queue.get()
try:
while True:
r1 = queue.get(timeout=0.005)
if r1 is None:
active = False
break
else:
r += r1
except Queue.Empty:
pass
inp.put( (category, r) )
for tgt in [rdr, clct]:
th = Thread(target=tgt)
th.setDaemon(True)
th.start()
Pump(sout, 'stdout')
Pump(serr, 'stderr')
while p.poll() is None:
# App still working
try:
chan,line = inp.get(timeout = 1.0)
if chan=='stdout':
print "STDOUT>>", line, "<?<"
elif chan=='stderr':
print " ERROR==", line, "=?="
except Queue.Empty:
pass
print "Finish"
if __name__ == '__main__':
__main__()
Disclaimer: this works only for tornado
You can do this by setting the fd to be nonblocking and then use ioloop to register callbacks. I have packaged this in an egg called tornado_subprocess and you can install it via PyPI:
easy_install tornado_subprocess
now you can do something like this:
import tornado_subprocess
import tornado.ioloop
def print_res( status, stdout, stderr ) :
print status, stdout, stderr
if status == 0:
print "OK:"
print stdout
else:
print "ERROR:"
print stderr
t = tornado_subprocess.Subprocess( print_res, timeout=30, args=[ "cat", "/etc/passwd" ] )
t.start()
tornado.ioloop.IOLoop.instance().start()
you can also use it with a RequestHandler
class MyHandler(tornado.web.RequestHandler):
def on_done(self, status, stdout, stderr):
self.write( stdout )
self.finish()
#tornado.web.asynchronous
def get(self):
t = tornado_subprocess.Subprocess( self.on_done, timeout=30, args=[ "cat", "/etc/passwd" ] )
t.start()
Existing solutions did not work for me (details below). What finally worked was to implement readline using read(1) (based on this answer). The latter does not block:
from subprocess import Popen, PIPE
from threading import Thread
def process_output(myprocess): #output-consuming thread
nextline = None
buf = ''
while True:
#--- extract line using read(1)
out = myprocess.stdout.read(1)
if out == '' and myprocess.poll() != None: break
if out != '':
buf += out
if out == '\n':
nextline = buf
buf = ''
if not nextline: continue
line = nextline
nextline = None
#--- do whatever you want with line here
print 'Line is:', line
myprocess.stdout.close()
myprocess = Popen('myprogram.exe', stdout=PIPE) #output-producing process
p1 = Thread(target=process_output, args=(myprocess,)) #output-consuming thread
p1.daemon = True
p1.start()
#--- do whatever here and then kill process and thread if needed
if myprocess.poll() == None: #kill process; will automatically stop thread
myprocess.kill()
myprocess.wait()
if p1 and p1.is_alive(): #wait for thread to finish
p1.join()
Why existing solutions did not work:
Solutions that require readline (including the Queue based ones) always block. It is difficult (impossible?) to kill the thread that executes readline. It only gets killed when the process that created it finishes, but not when the output-producing process is killed.
Mixing low-level fcntl with high-level readline calls may not work properly as anonnn has pointed out.
Using select.poll() is neat, but doesn't work on Windows according to python docs.
Using third-party libraries seems overkill for this task and adds additional dependencies.
I add this problem to read some subprocess.Popen stdout.
Here is my non blocking read solution:
import fcntl
def non_block_read(output):
fd = output.fileno()
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
try:
return output.read()
except:
return ""
# Use example
from subprocess import *
sb = Popen("echo test && sleep 1000", shell=True, stdout=PIPE)
sb.kill()
# sb.stdout.read() # <-- This will block
non_block_read(sb.stdout)
'test\n'
Here is a simple solution based on threads which:
works on both Linux and Windows (not relying on select).
reads both stdout and stderr asynchronouly.
doesn't rely on active polling with arbitrary waiting time (CPU friendly).
doesn't use asyncio (which may conflict with other libraries).
runs until the child process terminates.
printer.py
import time
import sys
sys.stdout.write("Hello\n")
sys.stdout.flush()
time.sleep(1)
sys.stdout.write("World!\n")
sys.stdout.flush()
time.sleep(1)
sys.stderr.write("That's an error\n")
sys.stderr.flush()
time.sleep(2)
sys.stdout.write("Actually, I'm fine\n")
sys.stdout.flush()
time.sleep(1)
reader.py
import queue
import subprocess
import sys
import threading
def enqueue_stream(stream, queue, type):
for line in iter(stream.readline, b''):
queue.put(str(type) + line.decode('utf-8'))
stream.close()
def enqueue_process(process, queue):
process.wait()
queue.put('x')
p = subprocess.Popen('python printer.py', stdout=subprocess.PIPE, stderr=subprocess.PIPE)
q = queue.Queue()
to = threading.Thread(target=enqueue_stream, args=(p.stdout, q, 1))
te = threading.Thread(target=enqueue_stream, args=(p.stderr, q, 2))
tp = threading.Thread(target=enqueue_process, args=(p, q))
te.start()
to.start()
tp.start()
while True:
line = q.get()
if line[0] == 'x':
break
if line[0] == '2': # stderr
sys.stdout.write("\033[0;31m") # ANSI red color
sys.stdout.write(line[1:])
if line[0] == '2':
sys.stdout.write("\033[0m") # reset ANSI code
sys.stdout.flush()
tp.join()
to.join()
te.join()
This version of non-blocking read doesn't require special modules and will work out-of-the-box on majority of Linux distros.
import os
import sys
import time
import fcntl
import subprocess
def async_read(fd):
# set non-blocking flag while preserving old flags
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
# read char until EOF hit
while True:
try:
ch = os.read(fd.fileno(), 1)
# EOF
if not ch: break
sys.stdout.write(ch)
except OSError:
# waiting for data be available on fd
pass
def shell(args, async=True):
# merge stderr and stdout
proc = subprocess.Popen(args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
if async: async_read(proc.stdout)
sout, serr = proc.communicate()
return (sout, serr)
if __name__ == '__main__':
cmd = 'ping 8.8.8.8'
sout, serr = shell(cmd.split())
I have the original questioner's problem, but did not wish to invoke threads. I mixed Jesse's solution with a direct read() from the pipe, and my own buffer-handler for line reads (however, my sub-process - ping - always wrote full lines < a system page size). I avoid busy-waiting by only reading in a gobject-registered io watch. These days I usually run code within a gobject MainLoop to avoid threads.
def set_up_ping(ip, w):
# run the sub-process
# watch the resultant pipe
p = subprocess.Popen(['/bin/ping', ip], stdout=subprocess.PIPE)
# make stdout a non-blocking file
fl = fcntl.fcntl(p.stdout, fcntl.F_GETFL)
fcntl.fcntl(p.stdout, fcntl.F_SETFL, fl | os.O_NONBLOCK)
stdout_gid = gobject.io_add_watch(p.stdout, gobject.IO_IN, w)
return stdout_gid # for shutting down
The watcher is
def watch(f, *other):
print 'reading',f.read()
return True
And the main program sets up a ping and then calls gobject mail loop.
def main():
set_up_ping('192.168.1.8', watch)
# discard gid as unused here
gobject.MainLoop().run()
Any other work is attached to callbacks in gobject.
Adding this answer here since it provides ability to set non-blocking pipes on Windows and Unix.
All the ctypes details are thanks to #techtonik's answer.
There is a slightly modified version to be used both on Unix and Windows systems.
Python3 compatible (only minor change needed).
Includes posix version, and defines exception to use for either.
This way you can use the same function and exception for Unix and Windows code.
# pipe_non_blocking.py (module)
"""
Example use:
p = subprocess.Popen(
command,
stdout=subprocess.PIPE,
)
pipe_non_blocking_set(p.stdout.fileno())
try:
data = os.read(p.stdout.fileno(), 1)
except PortableBlockingIOError as ex:
if not pipe_non_blocking_is_error_blocking(ex):
raise ex
"""
__all__ = (
"pipe_non_blocking_set",
"pipe_non_blocking_is_error_blocking",
"PortableBlockingIOError",
)
import os
if os.name == "nt":
def pipe_non_blocking_set(fd):
# Constant could define globally but avoid polluting the name-space
# thanks to: https://stackoverflow.com/questions/34504970
import msvcrt
from ctypes import windll, byref, wintypes, WinError, POINTER
from ctypes.wintypes import HANDLE, DWORD, BOOL
LPDWORD = POINTER(DWORD)
PIPE_NOWAIT = wintypes.DWORD(0x00000001)
def pipe_no_wait(pipefd):
SetNamedPipeHandleState = windll.kernel32.SetNamedPipeHandleState
SetNamedPipeHandleState.argtypes = [HANDLE, LPDWORD, LPDWORD, LPDWORD]
SetNamedPipeHandleState.restype = BOOL
h = msvcrt.get_osfhandle(pipefd)
res = windll.kernel32.SetNamedPipeHandleState(h, byref(PIPE_NOWAIT), None, None)
if res == 0:
print(WinError())
return False
return True
return pipe_no_wait(fd)
def pipe_non_blocking_is_error_blocking(ex):
if not isinstance(ex, PortableBlockingIOError):
return False
from ctypes import GetLastError
ERROR_NO_DATA = 232
return (GetLastError() == ERROR_NO_DATA)
PortableBlockingIOError = OSError
else:
def pipe_non_blocking_set(fd):
import fcntl
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
return True
def pipe_non_blocking_is_error_blocking(ex):
if not isinstance(ex, PortableBlockingIOError):
return False
return True
PortableBlockingIOError = BlockingIOError
To avoid reading incomplete data, I ended up writing my own readline generator (which returns the byte string for each line).
Its a generator so you can for example...
def non_blocking_readlines(f, chunk=1024):
"""
Iterate over lines, yielding b'' when nothings left
or when new data is not yet available.
stdout_iter = iter(non_blocking_readlines(process.stdout))
line = next(stdout_iter) # will be a line or b''.
"""
import os
from .pipe_non_blocking import (
pipe_non_blocking_set,
pipe_non_blocking_is_error_blocking,
PortableBlockingIOError,
)
fd = f.fileno()
pipe_non_blocking_set(fd)
blocks = []
while True:
try:
data = os.read(fd, chunk)
if not data:
# case were reading finishes with no trailing newline
yield b''.join(blocks)
blocks.clear()
except PortableBlockingIOError as ex:
if not pipe_non_blocking_is_error_blocking(ex):
raise ex
yield b''
continue
while True:
n = data.find(b'\n')
if n == -1:
break
yield b''.join(blocks) + data[:n + 1]
data = data[n + 1:]
blocks.clear()
blocks.append(data)
Not the first and probably not the last, I have built a package that does non blocking stdout PIPE reads with two different methods, one being based on the work of J.F. Sebastian (#jfs)'s answer, the other being a simple communicate() loop with a thread to check for timeouts.
Both stdout capture methods are tested to work both under Linux and Windows, with Python versions from 2.7 to 3.9 as of the time of writing
Being non blocking, it guarantees timeout enforcement, even with multiple child and grandchild processes, and even under Python 2.7.
The package also handles both bytes and text stdout encodings, being a nightmare when trying to catch EOF.
You'll find the package at https://github.com/netinvent/command_runner
If you need some well tested non blocking read implementations, try it out (or hack the code):
pip install command_runner
from command_runner import command_runner
exit_code, output = command_runner('ping 127.0.0.1', timeout=3)
exit_code, output = command_runner('echo hello world, shell=True)
exit_code, output = command_runner('some command', stdout='some_file')
You can find the core non blocking read code in _poll_process() or _monitor_process() depending on the capture method employed.
From there, you can hack your way to what you want, or simply use the whole package to execute your commands as a subprocess replacement.
The select module helps you determine where the next useful input is.
However, you're almost always happier with separate threads. One does a blocking read the stdin, another does wherever it is you don't want blocked.
why bothering thread&queue?
unlike readline(), BufferedReader.read1() wont block waiting for \r\n, it returns ASAP if there is any output coming in.
#!/usr/bin/python
from subprocess import Popen, PIPE, STDOUT
import io
def __main__():
try:
p = Popen( ["ping", "-n", "3", "127.0.0.1"], stdin=PIPE, stdout=PIPE, stderr=STDOUT )
except: print("Popen failed"); quit()
sout = io.open(p.stdout.fileno(), 'rb', closefd=False)
while True:
buf = sout.read1(1024)
if len(buf) == 0: break
print buf,
if __name__ == '__main__':
__main__()
In my case I needed a logging module that catches the output from the background applications and augments it(adding time-stamps, colors, etc.).
I ended up with a background thread that does the actual I/O. Following code is only for POSIX platforms. I stripped non-essential parts.
If someone is going to use this beast for long runs consider managing open descriptors. In my case it was not a big problem.
# -*- python -*-
import fcntl
import threading
import sys, os, errno
import subprocess
class Logger(threading.Thread):
def __init__(self, *modules):
threading.Thread.__init__(self)
try:
from select import epoll, EPOLLIN
self.__poll = epoll()
self.__evt = EPOLLIN
self.__to = -1
except:
from select import poll, POLLIN
print 'epoll is not available'
self.__poll = poll()
self.__evt = POLLIN
self.__to = 100
self.__fds = {}
self.daemon = True
self.start()
def run(self):
while True:
events = self.__poll.poll(self.__to)
for fd, ev in events:
if (ev&self.__evt) != self.__evt:
continue
try:
self.__fds[fd].run()
except Exception, e:
print e
def add(self, fd, log):
assert not self.__fds.has_key(fd)
self.__fds[fd] = log
self.__poll.register(fd, self.__evt)
class log:
logger = Logger()
def __init__(self, name):
self.__name = name
self.__piped = False
def fileno(self):
if self.__piped:
return self.write
self.read, self.write = os.pipe()
fl = fcntl.fcntl(self.read, fcntl.F_GETFL)
fcntl.fcntl(self.read, fcntl.F_SETFL, fl | os.O_NONBLOCK)
self.fdRead = os.fdopen(self.read)
self.logger.add(self.read, self)
self.__piped = True
return self.write
def __run(self, line):
self.chat(line, nl=False)
def run(self):
while True:
try: line = self.fdRead.readline()
except IOError, exc:
if exc.errno == errno.EAGAIN:
return
raise
self.__run(line)
def chat(self, line, nl=True):
if nl: nl = '\n'
else: nl = ''
sys.stdout.write('[%s] %s%s' % (self.__name, line, nl))
def system(command, param=[], cwd=None, env=None, input=None, output=None):
args = [command] + param
p = subprocess.Popen(args, cwd=cwd, stdout=output, stderr=output, stdin=input, env=env, bufsize=0)
p.wait()
ls = log('ls')
ls.chat('go')
system("ls", ['-l', '/'], output=ls)
date = log('date')
date.chat('go')
system("date", output=date)
This is a example to run interactive command in subprocess, and the stdout is interactive by using pseudo terminal. You can refer to: https://stackoverflow.com/a/43012138/3555925
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import select
import termios
import tty
import pty
from subprocess import Popen
command = 'bash'
# command = 'docker run -it --rm centos /bin/bash'.split()
# save original tty setting then set it to raw mode
old_tty = termios.tcgetattr(sys.stdin)
tty.setraw(sys.stdin.fileno())
# open pseudo-terminal to interact with subprocess
master_fd, slave_fd = pty.openpty()
# use os.setsid() make it run in a new process group, or bash job control will not be enabled
p = Popen(command,
preexec_fn=os.setsid,
stdin=slave_fd,
stdout=slave_fd,
stderr=slave_fd,
universal_newlines=True)
while p.poll() is None:
r, w, e = select.select([sys.stdin, master_fd], [], [])
if sys.stdin in r:
d = os.read(sys.stdin.fileno(), 10240)
os.write(master_fd, d)
elif master_fd in r:
o = os.read(master_fd, 10240)
if o:
os.write(sys.stdout.fileno(), o)
# restore tty settings back
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_tty)
My problem is a bit different as I wanted to collect both stdout and stderr from a running process, but ultimately the same since I wanted to render the output in a widget as its generated.
I did not want to resort to many of the proposed workarounds using Queues or additional Threads as they should not be necessary to perform such a common task as running another script and collecting its output.
After reading the proposed solutions and python docs I resolved my issue with the implementation below. Yes it only works for POSIX as I'm using the select function call.
I agree that the docs are confusing and the implementation is awkward for such a common scripting task. I believe that older versions of python have different defaults for Popen and different explanations so that created a lot of confusion. This seems to work well for both Python 2.7.12 and 3.5.2.
The key was to set bufsize=1 for line buffering and then universal_newlines=True to process as a text file instead of a binary which seems to become the default when setting bufsize=1.
class workerThread(QThread):
def __init__(self, cmd):
QThread.__init__(self)
self.cmd = cmd
self.result = None ## return code
self.error = None ## flag indicates an error
self.errorstr = "" ## info message about the error
def __del__(self):
self.wait()
DEBUG("Thread removed")
def run(self):
cmd_list = self.cmd.split(" ")
try:
cmd = subprocess.Popen(cmd_list, bufsize=1, stdin=None
, universal_newlines=True
, stderr=subprocess.PIPE
, stdout=subprocess.PIPE)
except OSError:
self.error = 1
self.errorstr = "Failed to execute " + self.cmd
ERROR(self.errorstr)
finally:
VERBOSE("task started...")
import select
while True:
try:
r,w,x = select.select([cmd.stdout, cmd.stderr],[],[])
if cmd.stderr in r:
line = cmd.stderr.readline()
if line != "":
line = line.strip()
self.emit(SIGNAL("update_error(QString)"), line)
if cmd.stdout in r:
line = cmd.stdout.readline()
if line == "":
break
line = line.strip()
self.emit(SIGNAL("update_output(QString)"), line)
except IOError:
pass
cmd.wait()
self.result = cmd.returncode
if self.result < 0:
self.error = 1
self.errorstr = "Task terminated by signal " + str(self.result)
ERROR(self.errorstr)
return
if self.result:
self.error = 1
self.errorstr = "exit code " + str(self.result)
ERROR(self.errorstr)
return
return
ERROR, DEBUG and VERBOSE are simply macros that print output to the terminal.
This solution is IMHO 99.99% effective as it still uses the blocking readline function, so we assume the sub process is nice and outputs complete lines.
I welcome feedback to improve the solution as I am still new to Python.
I have created a library based on J. F. Sebastian's solution. You can use it.
https://github.com/cenkalti/what
Working from J.F. Sebastian's answer, and several other sources, I've put together a simple subprocess manager. It provides the request non-blocking reading, as well as running several processes in parallel. It doesn't use any OS-specific call (that I'm aware) and thus should work anywhere.
It's available from pypi, so just pip install shelljob. Refer to the project page for examples and full docs.
EDIT: This implementation still blocks. Use J.F.Sebastian's answer instead.
I tried the top answer, but the additional risk and maintenance of thread code was worrisome.
Looking through the io module (and being limited to 2.6), I found BufferedReader. This is my threadless, non-blocking solution.
import io
from subprocess import PIPE, Popen
p = Popen(['myprogram.exe'], stdout=PIPE)
SLEEP_DELAY = 0.001
# Create an io.BufferedReader on the file descriptor for stdout
with io.open(p.stdout.fileno(), 'rb', closefd=False) as buffer:
while p.poll() == None:
time.sleep(SLEEP_DELAY)
while '\n' in bufferedStdout.peek(bufferedStdout.buffer_size):
line = buffer.readline()
# do stuff with the line
# Handle any remaining output after the process has ended
while buffer.peek():
line = buffer.readline()
# do stuff with the line
This solution uses the select module to "read any available data" from an IO stream. This function blocks initially until data is available, but then reads only the data that is available and doesn't block further.
Given the fact that it uses the select module, this only works on Unix.
The code is fully PEP8-compliant.
import select
def read_available(input_stream, max_bytes=None):
"""
Blocks until any data is available, then all available data is then read and returned.
This function returns an empty string when end of stream is reached.
Args:
input_stream: The stream to read from.
max_bytes (int|None): The maximum number of bytes to read. This function may return fewer bytes than this.
Returns:
str
"""
# Prepare local variables
input_streams = [input_stream]
empty_list = []
read_buffer = ""
# Initially block for input using 'select'
if len(select.select(input_streams, empty_list, empty_list)[0]) > 0:
# Poll read-readiness using 'select'
def select_func():
return len(select.select(input_streams, empty_list, empty_list, 0)[0]) > 0
# Create while function based on parameters
if max_bytes is not None:
def while_func():
return (len(read_buffer) < max_bytes) and select_func()
else:
while_func = select_func
while True:
# Read single byte at a time
read_data = input_stream.read(1)
if len(read_data) == 0:
# End of stream
break
# Append byte to string buffer
read_buffer += read_data
# Check if more data is available
if not while_func():
break
# Return read buffer
return read_buffer
I also faced the problem described by Jesse and solved it by using "select" as Bradley, Andy and others did but in a blocking mode to avoid a busy loop. It uses a dummy Pipe as a fake stdin. The select blocks and wait for either stdin or the pipe to be ready. When a key is pressed stdin unblocks the select and the key value can be retrieved with read(1). When a different thread writes to the pipe then the pipe unblocks the select and it can be taken as an indication that the need for stdin is over. Here is some reference code:
import sys
import os
from select import select
# -------------------------------------------------------------------------
# Set the pipe (fake stdin) to simulate a final key stroke
# which will unblock the select statement
readEnd, writeEnd = os.pipe()
readFile = os.fdopen(readEnd)
writeFile = os.fdopen(writeEnd, "w")
# -------------------------------------------------------------------------
def getKey():
# Wait for stdin or pipe (fake stdin) to be ready
dr,dw,de = select([sys.__stdin__, readFile], [], [])
# If stdin is the one ready then read it and return value
if sys.__stdin__ in dr:
return sys.__stdin__.read(1) # For Windows use ----> getch() from module msvcrt
# Must finish
else:
return None
# -------------------------------------------------------------------------
def breakStdinRead():
writeFile.write(' ')
writeFile.flush()
# -------------------------------------------------------------------------
# MAIN CODE
# Get key stroke
key = getKey()
# Keyboard input
if key:
# ... do your stuff with the key value
# Faked keystroke
else:
# ... use of stdin finished
# -------------------------------------------------------------------------
# OTHER THREAD CODE
breakStdinRead()
Try wexpect, which is the windows alternative of pexpect.
import wexpect
p = wexpect.spawn('myprogram.exe')
p.stdout.readline('.') // regex pattern of any character
output_str = p.after()
Here is a module that supports non-blocking reads and background writes in python:
https://pypi.python.org/pypi/python-nonblock
Provides a function,
nonblock_read which will read data from the stream, if available, otherwise return an empty string (or None if the stream is closed on the other side and all possible data has been read)
You may also consider the python-subprocess2 module,
https://pypi.python.org/pypi/python-subprocess2
which adds to the subprocess module. So on the object returned from "subprocess.Popen" is added an additional method, runInBackground. This starts a thread and returns an object which will automatically be populated as stuff is written to stdout/stderr, without blocking your main thread.
Enjoy!

asynchronous subprocess Popen python 3.5

I am trying to asynchronously run the Popen command from subprocess, so that I can run other stuff in the background.
import subprocess
import requests
import asyncio
import asyncio.subprocess
async def x(message):
if len(message.content.split()) > 1:
#output = asyncio.create_subprocess_shell(message.content[3:], shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
output = subprocess.Popen(message.content[3:], shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
return output.communicate()[0].decode('utf-8')
I have tried to understand https://docs.python.org/3/library/asyncio-subprocess.html but i am not sure what a protocol factory is.
When I came to this question, I expected the answer to really use asyncio for interprocess communication.
I have found the following resource useful:
https://github.com/python/asyncio/blob/master/examples/child_process.py
and below is my simplified example (using 3.5+ async/await syntax), which reads lines and outputs them sorted:
import asyncio
from subprocess import Popen, PIPE
async def connect_write_pipe(file):
"""Return a write-only transport wrapping a writable pipe"""
loop = asyncio.get_event_loop()
transport, _ = await loop.connect_write_pipe(asyncio.Protocol, file)
return transport
async def connect_read_pipe(file):
"""Wrap a readable pipe in a stream"""
loop = asyncio.get_event_loop()
stream_reader = asyncio.StreamReader(loop=loop)
def factory():
return asyncio.StreamReaderProtocol(stream_reader)
transport, _ = await loop.connect_read_pipe(factory, file)
return stream_reader, transport
async def main(loop):
# start subprocess and wrap stdin, stdout, stderr
p = Popen(['/usr/bin/sort'], stdin=PIPE, stdout=PIPE, stderr=PIPE)
stdin = await connect_write_pipe(p.stdin)
stdout, stdout_transport = await connect_read_pipe(p.stdout)
stderr, stderr_transport = await connect_read_pipe(p.stderr)
# interact with subprocess
name = {stdout: 'OUT', stderr: 'ERR'}
registered = {
asyncio.Task(stderr.read()): stderr,
asyncio.Task(stdout.read()): stdout
}
to_sort = b"one\ntwo\nthree\n"
stdin.write(to_sort)
stdin.close() # this way we tell we do not have anything else
# get and print lines from stdout, stderr
timeout = None
while registered:
done, pending = await asyncio.wait(
registered, timeout=timeout,
return_when=asyncio.FIRST_COMPLETED)
if not done:
break
for f in done:
stream = registered.pop(f)
res = f.result()
if res != b'':
print(name[stream], res.decode('ascii').rstrip())
registered[asyncio.Task(stream.read())] = stream
timeout = 0.0
stdout_transport.close()
stderr_transport.close()
if __name__ == '__main__':
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main(loop))
finally:
loop.close()
NB: without taking special measures, the amount of data to be written into the pipe is limited. In my system it was possible to write just over 700000 bytes before using up pipe buffers.
There are also other examples there, using create_subprocess_shell.
I have not yet used asyncio in real projects, so improvements' suggestions in the comments are welcome.
It's the right way to go...! Use
async/await
Tested it on Python - 3.X [Windows, MacOS]
import asyncio
from asyncio.subprocess import PIPE, STDOUT
import subprocess
import signal
def signal_handler(signal, frame):
loop.stop()
client.close()
sys.exit(0)
async def run_async(loop = ''):
cmd = 'sudo long_running_cmd --opt1=AAAA --opt2=BBBB'
print ("[INFO] Starting script...")
await asyncio.create_subprocess_shell(cmd1, stdin = PIPE, stdout = PIPE, stderr = STDOUT)
print("[INFO] Script is complete.")
loop = asyncio.get_event_loop()
signal.signal(signal.SIGINT, signal_handler)
tasks = [loop.create_task(run_async())]
wait_tasks = asyncio.wait(tasks)
loop.run_until_complete(wait_tasks)
loop.close()
Core logic:
process = await asyncio.create_subprocess_shell(cmd1, stdin = PIPE, stdout PIPE, stderr = STDOUT)
await process.wait()
I eventually found the answer to my question, which utilizes async.
http://pastebin.com/Zj8SK1CG
Tested with python 3.5. Just ask if you have questions.
import threading
import time
import subprocess
import shlex
from sys import stdout
# Only data wihtin a class are actually shared by the threads.
# Let's use a class as communicator (there could be problems if you have more than
# a single thread)
class Communicator(object):
counter = 0
stop = False
arg = None
result = None
# Here we can define what you want to do. There are other methods to do that
# but this is the one I prefer.
class ThreadedFunction(threading.Thread):
def run(self, *args, **kwargs):
super().run()
command = c.arg
# Here what you want to do...
command = shlex.split(command)
print(time.time()) # this is just to check that the command (sleep 5) is executed
output = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
print('\n',time.time())
c.result = output
if c.stop: return None # This is useful only within loops within threads
# Create a class instance
c = Communicator()
c.arg = 'time sleep 5' # Here I used the 'time' only to have some output
# Create the thread and start it
t = ThreadedFunction()
t.start() # Start the thread and do something else...
# ...for example count the seconds in the mean time..
try:
for j in range(100):
c.counter += 1
stdout.write('\r{:}'.format(c.counter))
stdout.flush()
time.sleep(1)
if c.result != None:
print(c.result)
break
except:
c.stop = True
This one is much simpler, I found it after the other reply that could, anyway, be interesting... so I left it.
import time
import subprocess
import shlex
from sys import stdout
command = 'time sleep 5' # Here I used the 'time' only to have some output
def x(command):
cmd = shlex.split(command)
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return p
# Start the subprocess and do something else...
p = x(command)
# ...for example count the seconds in the mean time..
try: # This take care of killing the subprocess if problems occur
for j in range(100):
stdout.write('\r{:}'.format(j))
stdout.flush()
time.sleep(1)
if p.poll() != None:
print(p.communicate())
break
except:
p.terminate() # or p.kill()
The asynchronism is evident from the fact that the python script prints the counter value on the stdout while the background process runs the sleep command. The fact that the python script exit after ~5sec printing the output of the bash time command printing the counter in the meanwhile is an evidence that the script works.

How to kill hung threads in Python

I've created a function that uses PyQt5 to "render" HTML and return the result. It's as follows:
def render(source_html):
"""Fully render HTML, JavaScript and all."""
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebKitWidgets import QWebPage
class Render(QWebPage):
def __init__(self, html):
self.html = None
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().setHtml(html)
self.app.exec_()
def _loadFinished(self, result):
self.html = self.mainFrame().toHtml()
self.app.quit()
return Render(source_html).html
Occasionally it's threads will hang indefinitely and I'll have to kill the whole program. Unfortunately PyQt5 may as well be a black box as I'm not sure how to kill it when it misbehaves.
Ideally I'd be able to implement a timeout of n seconds. As a workaround, I've put the function in it's own script render.py and am calling it from via subprocess with this monstrosity:
def render(html):
"""Return fully rendered HTML, JavaScript and all."""
args = ['render.py', '-']
timeout = 20
try:
return subprocess.check_output(args,
input=html,
timeout=timeout,
universal_newlines=True)
# Python 2's subprocess.check_output doesn't support input or timeout
except TypeError:
class SubprocessError(Exception):
"""Base exception from subprocess module."""
pass
class TimeoutExpired(SubprocessError):
"""
This exception is raised when the timeout expires while
waiting for a child process.
"""
def __init__(self, cmd, timeout, output=None):
super(TimeoutExpired, self).__init__()
self.cmd = cmd
self.timeout = timeout
self.output = output
def __str__(self):
return ('Command %r timed out after %s seconds' %
(self.cmd, self.timeout))
process = subprocess.Popen(['timeout', str(timeout)] + args,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
# pipe html into render.py's stdin
output = process.communicate(
html.encode('utf8'))[0].decode('latin1')
retcode = process.poll()
if retcode == 124:
raise TimeoutExpired(args, timeout)
return output
The multiprocessing module appears to greatly simplify things:
from multiprocessing import Pool
pool = Pool(1)
rendered_html = pool.apply_async(render, args=(html,)).get(timeout=20)
pool.terminate()
Is there a way to implement a timeout that doesn't necessitate these sort of shenanigans?
I was looking for a solution too, there apparently isn't one, on purpose.
If you're using Linux and all you want is Python to attempt something for N seconds and then time out and handle an error condition after those N seconds, you can do this:
import time
import signal
# This stuff is so when we get SIGALRM from the timeout functionality we can handle it instead of
# crashing to the ground
class TimeOutError(Exception):
pass
def raise_timeout(var1, var2):
raise TimeOutError
signal.signal(signal.SIGALRM, raise_timeout)
# Turn the alarm on
signal.alarm(1)
# Try your thing
try:
time.sleep(2)
except TimeOutError as e:
print(" We hit our timeout value and we bailed out of whatever that BS was.")
# Remember to turn the alarm back off if your attempt succeeds!
signal.alarm(0)
The one drawback is that you can't nest signal.alarm() hooks; if, in your try statement, you're calling something else that also then sets a signal.alarm(), it will override the first one and screw your stuff up.

Python readline blocking the while loop [duplicate]

I'm using the subprocess module to start a subprocess and connect to its output stream (standard output). I want to be able to execute non-blocking reads on its standard output. Is there a way to make .readline non-blocking or to check if there is data on the stream before I invoke .readline? I'd like this to be portable or at least work under Windows and Linux.
Here is how I do it for now (it's blocking on the .readline if no data is available):
p = subprocess.Popen('myprogram.exe', stdout = subprocess.PIPE)
output_str = p.stdout.readline()
fcntl, select, asyncproc won't help in this case.
A reliable way to read a stream without blocking regardless of operating system is to use Queue.get_nowait():
import sys
from subprocess import PIPE, Popen
from threading import Thread
try:
from queue import Queue, Empty
except ImportError:
from Queue import Queue, Empty # python 2.x
ON_POSIX = 'posix' in sys.builtin_module_names
def enqueue_output(out, queue):
for line in iter(out.readline, b''):
queue.put(line)
out.close()
p = Popen(['myprogram.exe'], stdout=PIPE, bufsize=1, close_fds=ON_POSIX)
q = Queue()
t = Thread(target=enqueue_output, args=(p.stdout, q))
t.daemon = True # thread dies with the program
t.start()
# ... do other things here
# read line without blocking
try: line = q.get_nowait() # or q.get(timeout=.1)
except Empty:
print('no output yet')
else: # got line
# ... do something with line
I have often had a similar problem; Python programs I write frequently need to have the ability to execute some primary functionality while simultaneously accepting user input from the command line (stdin). Simply putting the user input handling functionality in another thread doesn't solve the problem because readline() blocks and has no timeout. If the primary functionality is complete and there is no longer any need to wait for further user input I typically want my program to exit, but it can't because readline() is still blocking in the other thread waiting for a line. A solution I have found to this problem is to make stdin a non-blocking file using the fcntl module:
import fcntl
import os
import sys
# make stdin a non-blocking file
fd = sys.stdin.fileno()
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
# user input handling thread
while mainThreadIsRunning:
try: input = sys.stdin.readline()
except: continue
handleInput(input)
In my opinion this is a bit cleaner than using the select or signal modules to solve this problem but then again it only works on UNIX...
Python 3.4 introduces new provisional API for asynchronous IO -- asyncio module.
The approach is similar to twisted-based answer by #Bryan Ward -- define a protocol and its methods are called as soon as data is ready:
#!/usr/bin/env python3
import asyncio
import os
class SubprocessProtocol(asyncio.SubprocessProtocol):
def pipe_data_received(self, fd, data):
if fd == 1: # got stdout data (bytes)
print(data)
def connection_lost(self, exc):
loop.stop() # end loop.run_forever()
if os.name == 'nt':
loop = asyncio.ProactorEventLoop() # for subprocess' pipes on Windows
asyncio.set_event_loop(loop)
else:
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(loop.subprocess_exec(SubprocessProtocol,
"myprogram.exe", "arg1", "arg2"))
loop.run_forever()
finally:
loop.close()
See "Subprocess" in the docs.
There is a high-level interface asyncio.create_subprocess_exec() that returns Process objects that allows to read a line asynchroniosly using StreamReader.readline() coroutine
(with async/await Python 3.5+ syntax):
#!/usr/bin/env python3.5
import asyncio
import locale
import sys
from asyncio.subprocess import PIPE
from contextlib import closing
async def readline_and_kill(*args):
# start child process
process = await asyncio.create_subprocess_exec(*args, stdout=PIPE)
# read line (sequence of bytes ending with b'\n') asynchronously
async for line in process.stdout:
print("got line:", line.decode(locale.getpreferredencoding(False)))
break
process.kill()
return await process.wait() # wait for the child process to exit
if sys.platform == "win32":
loop = asyncio.ProactorEventLoop()
asyncio.set_event_loop(loop)
else:
loop = asyncio.get_event_loop()
with closing(loop):
sys.exit(loop.run_until_complete(readline_and_kill(
"myprogram.exe", "arg1", "arg2")))
readline_and_kill() performs the following tasks:
start subprocess, redirect its stdout to a pipe
read a line from subprocess' stdout asynchronously
kill subprocess
wait for it to exit
Each step could be limited by timeout seconds if necessary.
On Unix-like systems and Python 3.5+ there's os.set_blocking which does exactly what it says.
import os
import time
import subprocess
cmd = 'python3', '-c', 'import time; [(print(i), time.sleep(1)) for i in range(5)]'
p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
os.set_blocking(p.stdout.fileno(), False)
start = time.time()
while True:
# first iteration always produces empty byte string in non-blocking mode
for i in range(2):
line = p.stdout.readline()
print(i, line)
time.sleep(0.5)
if time.time() > start + 5:
break
p.terminate()
This outputs:
1 b''
2 b'0\n'
1 b''
2 b'1\n'
1 b''
2 b'2\n'
1 b''
2 b'3\n'
1 b''
2 b'4\n'
With os.set_blocking commented it's:
0 b'0\n'
1 b'1\n'
0 b'2\n'
1 b'3\n'
0 b'4\n'
1 b''
Try the asyncproc module. For example:
import os
from asyncproc import Process
myProc = Process("myprogram.app")
while True:
# check to see if process has ended
poll = myProc.wait(os.WNOHANG)
if poll != None:
break
# print any new output
out = myProc.read()
if out != "":
print out
The module takes care of all the threading as suggested by S.Lott.
You can do this really easily in Twisted. Depending upon your existing code base, this might not be that easy to use, but if you are building a twisted application, then things like this become almost trivial. You create a ProcessProtocol class, and override the outReceived() method. Twisted (depending upon the reactor used) is usually just a big select() loop with callbacks installed to handle data from different file descriptors (often network sockets). So the outReceived() method is simply installing a callback for handling data coming from STDOUT. A simple example demonstrating this behavior is as follows:
from twisted.internet import protocol, reactor
class MyProcessProtocol(protocol.ProcessProtocol):
def outReceived(self, data):
print data
proc = MyProcessProtocol()
reactor.spawnProcess(proc, './myprogram', ['./myprogram', 'arg1', 'arg2', 'arg3'])
reactor.run()
The Twisted documentation has some good information on this.
If you build your entire application around Twisted, it makes asynchronous communication with other processes, local or remote, really elegant like this. On the other hand, if your program isn't built on top of Twisted, this isn't really going to be that helpful. Hopefully this can be helpful to other readers, even if it isn't applicable for your particular application.
Use select & read(1).
import subprocess #no new requirements
def readAllSoFar(proc, retVal=''):
while (select.select([proc.stdout],[],[],0)[0]!=[]):
retVal+=proc.stdout.read(1)
return retVal
p = subprocess.Popen(['/bin/ls'], stdout=subprocess.PIPE)
while not p.poll():
print (readAllSoFar(p))
For readline()-like:
lines = ['']
while not p.poll():
lines = readAllSoFar(p, lines[-1]).split('\n')
for a in range(len(lines)-1):
print a
lines = readAllSoFar(p, lines[-1]).split('\n')
for a in range(len(lines)-1):
print a
Things are a lot better in modern Python.
Here's a simple child program, "hello.py":
#!/usr/bin/env python3
while True:
i = input()
if i == "quit":
break
print(f"hello {i}")
And a program to interact with it:
import asyncio
async def main():
proc = await asyncio.subprocess.create_subprocess_exec(
"./hello.py", stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE
)
proc.stdin.write(b"bob\n")
print(await proc.stdout.read(1024))
proc.stdin.write(b"alice\n")
print(await proc.stdout.read(1024))
proc.stdin.write(b"quit\n")
await proc.wait()
asyncio.run(main())
That prints out:
b'hello bob\n'
b'hello alice\n'
Note that the actual pattern, which is also by almost all of the previous answers, both here and in related questions, is to set the child's stdout file descriptor to non-blocking and then poll it in some sort of select loop. These days, of course, that loop is provided by asyncio.
One solution is to make another process to perform your read of the process, or make a thread of the process with a timeout.
Here's the threaded version of a timeout function:
http://code.activestate.com/recipes/473878/
However, do you need to read the stdout as it's coming in?
Another solution may be to dump the output to a file and wait for the process to finish using p.wait().
f = open('myprogram_output.txt','w')
p = subprocess.Popen('myprogram.exe', stdout=f)
p.wait()
f.close()
str = open('myprogram_output.txt','r').read()
Here is my code, used to catch every output from subprocess ASAP, including partial lines. It pumps at same time and stdout and stderr in almost correct order.
Tested and correctly worked on Python 2.7 linux & windows.
#!/usr/bin/python
#
# Runner with stdout/stderr catcher
#
from sys import argv
from subprocess import Popen, PIPE
import os, io
from threading import Thread
import Queue
def __main__():
if (len(argv) > 1) and (argv[-1] == "-sub-"):
import time, sys
print "Application runned!"
time.sleep(2)
print "Slept 2 second"
time.sleep(1)
print "Slept 1 additional second",
time.sleep(2)
sys.stderr.write("Stderr output after 5 seconds")
print "Eol on stdin"
sys.stderr.write("Eol on stderr\n")
time.sleep(1)
print "Wow, we have end of work!",
else:
os.environ["PYTHONUNBUFFERED"]="1"
try:
p = Popen( argv + ["-sub-"],
bufsize=0, # line-buffered
stdin=PIPE, stdout=PIPE, stderr=PIPE )
except WindowsError, W:
if W.winerror==193:
p = Popen( argv + ["-sub-"],
shell=True, # Try to run via shell
bufsize=0, # line-buffered
stdin=PIPE, stdout=PIPE, stderr=PIPE )
else:
raise
inp = Queue.Queue()
sout = io.open(p.stdout.fileno(), 'rb', closefd=False)
serr = io.open(p.stderr.fileno(), 'rb', closefd=False)
def Pump(stream, category):
queue = Queue.Queue()
def rdr():
while True:
buf = stream.read1(8192)
if len(buf)>0:
queue.put( buf )
else:
queue.put( None )
return
def clct():
active = True
while active:
r = queue.get()
try:
while True:
r1 = queue.get(timeout=0.005)
if r1 is None:
active = False
break
else:
r += r1
except Queue.Empty:
pass
inp.put( (category, r) )
for tgt in [rdr, clct]:
th = Thread(target=tgt)
th.setDaemon(True)
th.start()
Pump(sout, 'stdout')
Pump(serr, 'stderr')
while p.poll() is None:
# App still working
try:
chan,line = inp.get(timeout = 1.0)
if chan=='stdout':
print "STDOUT>>", line, "<?<"
elif chan=='stderr':
print " ERROR==", line, "=?="
except Queue.Empty:
pass
print "Finish"
if __name__ == '__main__':
__main__()
Disclaimer: this works only for tornado
You can do this by setting the fd to be nonblocking and then use ioloop to register callbacks. I have packaged this in an egg called tornado_subprocess and you can install it via PyPI:
easy_install tornado_subprocess
now you can do something like this:
import tornado_subprocess
import tornado.ioloop
def print_res( status, stdout, stderr ) :
print status, stdout, stderr
if status == 0:
print "OK:"
print stdout
else:
print "ERROR:"
print stderr
t = tornado_subprocess.Subprocess( print_res, timeout=30, args=[ "cat", "/etc/passwd" ] )
t.start()
tornado.ioloop.IOLoop.instance().start()
you can also use it with a RequestHandler
class MyHandler(tornado.web.RequestHandler):
def on_done(self, status, stdout, stderr):
self.write( stdout )
self.finish()
#tornado.web.asynchronous
def get(self):
t = tornado_subprocess.Subprocess( self.on_done, timeout=30, args=[ "cat", "/etc/passwd" ] )
t.start()
Existing solutions did not work for me (details below). What finally worked was to implement readline using read(1) (based on this answer). The latter does not block:
from subprocess import Popen, PIPE
from threading import Thread
def process_output(myprocess): #output-consuming thread
nextline = None
buf = ''
while True:
#--- extract line using read(1)
out = myprocess.stdout.read(1)
if out == '' and myprocess.poll() != None: break
if out != '':
buf += out
if out == '\n':
nextline = buf
buf = ''
if not nextline: continue
line = nextline
nextline = None
#--- do whatever you want with line here
print 'Line is:', line
myprocess.stdout.close()
myprocess = Popen('myprogram.exe', stdout=PIPE) #output-producing process
p1 = Thread(target=process_output, args=(myprocess,)) #output-consuming thread
p1.daemon = True
p1.start()
#--- do whatever here and then kill process and thread if needed
if myprocess.poll() == None: #kill process; will automatically stop thread
myprocess.kill()
myprocess.wait()
if p1 and p1.is_alive(): #wait for thread to finish
p1.join()
Why existing solutions did not work:
Solutions that require readline (including the Queue based ones) always block. It is difficult (impossible?) to kill the thread that executes readline. It only gets killed when the process that created it finishes, but not when the output-producing process is killed.
Mixing low-level fcntl with high-level readline calls may not work properly as anonnn has pointed out.
Using select.poll() is neat, but doesn't work on Windows according to python docs.
Using third-party libraries seems overkill for this task and adds additional dependencies.
I add this problem to read some subprocess.Popen stdout.
Here is my non blocking read solution:
import fcntl
def non_block_read(output):
fd = output.fileno()
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
try:
return output.read()
except:
return ""
# Use example
from subprocess import *
sb = Popen("echo test && sleep 1000", shell=True, stdout=PIPE)
sb.kill()
# sb.stdout.read() # <-- This will block
non_block_read(sb.stdout)
'test\n'
Here is a simple solution based on threads which:
works on both Linux and Windows (not relying on select).
reads both stdout and stderr asynchronouly.
doesn't rely on active polling with arbitrary waiting time (CPU friendly).
doesn't use asyncio (which may conflict with other libraries).
runs until the child process terminates.
printer.py
import time
import sys
sys.stdout.write("Hello\n")
sys.stdout.flush()
time.sleep(1)
sys.stdout.write("World!\n")
sys.stdout.flush()
time.sleep(1)
sys.stderr.write("That's an error\n")
sys.stderr.flush()
time.sleep(2)
sys.stdout.write("Actually, I'm fine\n")
sys.stdout.flush()
time.sleep(1)
reader.py
import queue
import subprocess
import sys
import threading
def enqueue_stream(stream, queue, type):
for line in iter(stream.readline, b''):
queue.put(str(type) + line.decode('utf-8'))
stream.close()
def enqueue_process(process, queue):
process.wait()
queue.put('x')
p = subprocess.Popen('python printer.py', stdout=subprocess.PIPE, stderr=subprocess.PIPE)
q = queue.Queue()
to = threading.Thread(target=enqueue_stream, args=(p.stdout, q, 1))
te = threading.Thread(target=enqueue_stream, args=(p.stderr, q, 2))
tp = threading.Thread(target=enqueue_process, args=(p, q))
te.start()
to.start()
tp.start()
while True:
line = q.get()
if line[0] == 'x':
break
if line[0] == '2': # stderr
sys.stdout.write("\033[0;31m") # ANSI red color
sys.stdout.write(line[1:])
if line[0] == '2':
sys.stdout.write("\033[0m") # reset ANSI code
sys.stdout.flush()
tp.join()
to.join()
te.join()
This version of non-blocking read doesn't require special modules and will work out-of-the-box on majority of Linux distros.
import os
import sys
import time
import fcntl
import subprocess
def async_read(fd):
# set non-blocking flag while preserving old flags
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
# read char until EOF hit
while True:
try:
ch = os.read(fd.fileno(), 1)
# EOF
if not ch: break
sys.stdout.write(ch)
except OSError:
# waiting for data be available on fd
pass
def shell(args, async=True):
# merge stderr and stdout
proc = subprocess.Popen(args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
if async: async_read(proc.stdout)
sout, serr = proc.communicate()
return (sout, serr)
if __name__ == '__main__':
cmd = 'ping 8.8.8.8'
sout, serr = shell(cmd.split())
I have the original questioner's problem, but did not wish to invoke threads. I mixed Jesse's solution with a direct read() from the pipe, and my own buffer-handler for line reads (however, my sub-process - ping - always wrote full lines < a system page size). I avoid busy-waiting by only reading in a gobject-registered io watch. These days I usually run code within a gobject MainLoop to avoid threads.
def set_up_ping(ip, w):
# run the sub-process
# watch the resultant pipe
p = subprocess.Popen(['/bin/ping', ip], stdout=subprocess.PIPE)
# make stdout a non-blocking file
fl = fcntl.fcntl(p.stdout, fcntl.F_GETFL)
fcntl.fcntl(p.stdout, fcntl.F_SETFL, fl | os.O_NONBLOCK)
stdout_gid = gobject.io_add_watch(p.stdout, gobject.IO_IN, w)
return stdout_gid # for shutting down
The watcher is
def watch(f, *other):
print 'reading',f.read()
return True
And the main program sets up a ping and then calls gobject mail loop.
def main():
set_up_ping('192.168.1.8', watch)
# discard gid as unused here
gobject.MainLoop().run()
Any other work is attached to callbacks in gobject.
Adding this answer here since it provides ability to set non-blocking pipes on Windows and Unix.
All the ctypes details are thanks to #techtonik's answer.
There is a slightly modified version to be used both on Unix and Windows systems.
Python3 compatible (only minor change needed).
Includes posix version, and defines exception to use for either.
This way you can use the same function and exception for Unix and Windows code.
# pipe_non_blocking.py (module)
"""
Example use:
p = subprocess.Popen(
command,
stdout=subprocess.PIPE,
)
pipe_non_blocking_set(p.stdout.fileno())
try:
data = os.read(p.stdout.fileno(), 1)
except PortableBlockingIOError as ex:
if not pipe_non_blocking_is_error_blocking(ex):
raise ex
"""
__all__ = (
"pipe_non_blocking_set",
"pipe_non_blocking_is_error_blocking",
"PortableBlockingIOError",
)
import os
if os.name == "nt":
def pipe_non_blocking_set(fd):
# Constant could define globally but avoid polluting the name-space
# thanks to: https://stackoverflow.com/questions/34504970
import msvcrt
from ctypes import windll, byref, wintypes, WinError, POINTER
from ctypes.wintypes import HANDLE, DWORD, BOOL
LPDWORD = POINTER(DWORD)
PIPE_NOWAIT = wintypes.DWORD(0x00000001)
def pipe_no_wait(pipefd):
SetNamedPipeHandleState = windll.kernel32.SetNamedPipeHandleState
SetNamedPipeHandleState.argtypes = [HANDLE, LPDWORD, LPDWORD, LPDWORD]
SetNamedPipeHandleState.restype = BOOL
h = msvcrt.get_osfhandle(pipefd)
res = windll.kernel32.SetNamedPipeHandleState(h, byref(PIPE_NOWAIT), None, None)
if res == 0:
print(WinError())
return False
return True
return pipe_no_wait(fd)
def pipe_non_blocking_is_error_blocking(ex):
if not isinstance(ex, PortableBlockingIOError):
return False
from ctypes import GetLastError
ERROR_NO_DATA = 232
return (GetLastError() == ERROR_NO_DATA)
PortableBlockingIOError = OSError
else:
def pipe_non_blocking_set(fd):
import fcntl
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
return True
def pipe_non_blocking_is_error_blocking(ex):
if not isinstance(ex, PortableBlockingIOError):
return False
return True
PortableBlockingIOError = BlockingIOError
To avoid reading incomplete data, I ended up writing my own readline generator (which returns the byte string for each line).
Its a generator so you can for example...
def non_blocking_readlines(f, chunk=1024):
"""
Iterate over lines, yielding b'' when nothings left
or when new data is not yet available.
stdout_iter = iter(non_blocking_readlines(process.stdout))
line = next(stdout_iter) # will be a line or b''.
"""
import os
from .pipe_non_blocking import (
pipe_non_blocking_set,
pipe_non_blocking_is_error_blocking,
PortableBlockingIOError,
)
fd = f.fileno()
pipe_non_blocking_set(fd)
blocks = []
while True:
try:
data = os.read(fd, chunk)
if not data:
# case were reading finishes with no trailing newline
yield b''.join(blocks)
blocks.clear()
except PortableBlockingIOError as ex:
if not pipe_non_blocking_is_error_blocking(ex):
raise ex
yield b''
continue
while True:
n = data.find(b'\n')
if n == -1:
break
yield b''.join(blocks) + data[:n + 1]
data = data[n + 1:]
blocks.clear()
blocks.append(data)
Not the first and probably not the last, I have built a package that does non blocking stdout PIPE reads with two different methods, one being based on the work of J.F. Sebastian (#jfs)'s answer, the other being a simple communicate() loop with a thread to check for timeouts.
Both stdout capture methods are tested to work both under Linux and Windows, with Python versions from 2.7 to 3.9 as of the time of writing
Being non blocking, it guarantees timeout enforcement, even with multiple child and grandchild processes, and even under Python 2.7.
The package also handles both bytes and text stdout encodings, being a nightmare when trying to catch EOF.
You'll find the package at https://github.com/netinvent/command_runner
If you need some well tested non blocking read implementations, try it out (or hack the code):
pip install command_runner
from command_runner import command_runner
exit_code, output = command_runner('ping 127.0.0.1', timeout=3)
exit_code, output = command_runner('echo hello world, shell=True)
exit_code, output = command_runner('some command', stdout='some_file')
You can find the core non blocking read code in _poll_process() or _monitor_process() depending on the capture method employed.
From there, you can hack your way to what you want, or simply use the whole package to execute your commands as a subprocess replacement.
The select module helps you determine where the next useful input is.
However, you're almost always happier with separate threads. One does a blocking read the stdin, another does wherever it is you don't want blocked.
why bothering thread&queue?
unlike readline(), BufferedReader.read1() wont block waiting for \r\n, it returns ASAP if there is any output coming in.
#!/usr/bin/python
from subprocess import Popen, PIPE, STDOUT
import io
def __main__():
try:
p = Popen( ["ping", "-n", "3", "127.0.0.1"], stdin=PIPE, stdout=PIPE, stderr=STDOUT )
except: print("Popen failed"); quit()
sout = io.open(p.stdout.fileno(), 'rb', closefd=False)
while True:
buf = sout.read1(1024)
if len(buf) == 0: break
print buf,
if __name__ == '__main__':
__main__()
In my case I needed a logging module that catches the output from the background applications and augments it(adding time-stamps, colors, etc.).
I ended up with a background thread that does the actual I/O. Following code is only for POSIX platforms. I stripped non-essential parts.
If someone is going to use this beast for long runs consider managing open descriptors. In my case it was not a big problem.
# -*- python -*-
import fcntl
import threading
import sys, os, errno
import subprocess
class Logger(threading.Thread):
def __init__(self, *modules):
threading.Thread.__init__(self)
try:
from select import epoll, EPOLLIN
self.__poll = epoll()
self.__evt = EPOLLIN
self.__to = -1
except:
from select import poll, POLLIN
print 'epoll is not available'
self.__poll = poll()
self.__evt = POLLIN
self.__to = 100
self.__fds = {}
self.daemon = True
self.start()
def run(self):
while True:
events = self.__poll.poll(self.__to)
for fd, ev in events:
if (ev&self.__evt) != self.__evt:
continue
try:
self.__fds[fd].run()
except Exception, e:
print e
def add(self, fd, log):
assert not self.__fds.has_key(fd)
self.__fds[fd] = log
self.__poll.register(fd, self.__evt)
class log:
logger = Logger()
def __init__(self, name):
self.__name = name
self.__piped = False
def fileno(self):
if self.__piped:
return self.write
self.read, self.write = os.pipe()
fl = fcntl.fcntl(self.read, fcntl.F_GETFL)
fcntl.fcntl(self.read, fcntl.F_SETFL, fl | os.O_NONBLOCK)
self.fdRead = os.fdopen(self.read)
self.logger.add(self.read, self)
self.__piped = True
return self.write
def __run(self, line):
self.chat(line, nl=False)
def run(self):
while True:
try: line = self.fdRead.readline()
except IOError, exc:
if exc.errno == errno.EAGAIN:
return
raise
self.__run(line)
def chat(self, line, nl=True):
if nl: nl = '\n'
else: nl = ''
sys.stdout.write('[%s] %s%s' % (self.__name, line, nl))
def system(command, param=[], cwd=None, env=None, input=None, output=None):
args = [command] + param
p = subprocess.Popen(args, cwd=cwd, stdout=output, stderr=output, stdin=input, env=env, bufsize=0)
p.wait()
ls = log('ls')
ls.chat('go')
system("ls", ['-l', '/'], output=ls)
date = log('date')
date.chat('go')
system("date", output=date)
This is a example to run interactive command in subprocess, and the stdout is interactive by using pseudo terminal. You can refer to: https://stackoverflow.com/a/43012138/3555925
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import select
import termios
import tty
import pty
from subprocess import Popen
command = 'bash'
# command = 'docker run -it --rm centos /bin/bash'.split()
# save original tty setting then set it to raw mode
old_tty = termios.tcgetattr(sys.stdin)
tty.setraw(sys.stdin.fileno())
# open pseudo-terminal to interact with subprocess
master_fd, slave_fd = pty.openpty()
# use os.setsid() make it run in a new process group, or bash job control will not be enabled
p = Popen(command,
preexec_fn=os.setsid,
stdin=slave_fd,
stdout=slave_fd,
stderr=slave_fd,
universal_newlines=True)
while p.poll() is None:
r, w, e = select.select([sys.stdin, master_fd], [], [])
if sys.stdin in r:
d = os.read(sys.stdin.fileno(), 10240)
os.write(master_fd, d)
elif master_fd in r:
o = os.read(master_fd, 10240)
if o:
os.write(sys.stdout.fileno(), o)
# restore tty settings back
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_tty)
My problem is a bit different as I wanted to collect both stdout and stderr from a running process, but ultimately the same since I wanted to render the output in a widget as its generated.
I did not want to resort to many of the proposed workarounds using Queues or additional Threads as they should not be necessary to perform such a common task as running another script and collecting its output.
After reading the proposed solutions and python docs I resolved my issue with the implementation below. Yes it only works for POSIX as I'm using the select function call.
I agree that the docs are confusing and the implementation is awkward for such a common scripting task. I believe that older versions of python have different defaults for Popen and different explanations so that created a lot of confusion. This seems to work well for both Python 2.7.12 and 3.5.2.
The key was to set bufsize=1 for line buffering and then universal_newlines=True to process as a text file instead of a binary which seems to become the default when setting bufsize=1.
class workerThread(QThread):
def __init__(self, cmd):
QThread.__init__(self)
self.cmd = cmd
self.result = None ## return code
self.error = None ## flag indicates an error
self.errorstr = "" ## info message about the error
def __del__(self):
self.wait()
DEBUG("Thread removed")
def run(self):
cmd_list = self.cmd.split(" ")
try:
cmd = subprocess.Popen(cmd_list, bufsize=1, stdin=None
, universal_newlines=True
, stderr=subprocess.PIPE
, stdout=subprocess.PIPE)
except OSError:
self.error = 1
self.errorstr = "Failed to execute " + self.cmd
ERROR(self.errorstr)
finally:
VERBOSE("task started...")
import select
while True:
try:
r,w,x = select.select([cmd.stdout, cmd.stderr],[],[])
if cmd.stderr in r:
line = cmd.stderr.readline()
if line != "":
line = line.strip()
self.emit(SIGNAL("update_error(QString)"), line)
if cmd.stdout in r:
line = cmd.stdout.readline()
if line == "":
break
line = line.strip()
self.emit(SIGNAL("update_output(QString)"), line)
except IOError:
pass
cmd.wait()
self.result = cmd.returncode
if self.result < 0:
self.error = 1
self.errorstr = "Task terminated by signal " + str(self.result)
ERROR(self.errorstr)
return
if self.result:
self.error = 1
self.errorstr = "exit code " + str(self.result)
ERROR(self.errorstr)
return
return
ERROR, DEBUG and VERBOSE are simply macros that print output to the terminal.
This solution is IMHO 99.99% effective as it still uses the blocking readline function, so we assume the sub process is nice and outputs complete lines.
I welcome feedback to improve the solution as I am still new to Python.
I have created a library based on J. F. Sebastian's solution. You can use it.
https://github.com/cenkalti/what
Working from J.F. Sebastian's answer, and several other sources, I've put together a simple subprocess manager. It provides the request non-blocking reading, as well as running several processes in parallel. It doesn't use any OS-specific call (that I'm aware) and thus should work anywhere.
It's available from pypi, so just pip install shelljob. Refer to the project page for examples and full docs.
EDIT: This implementation still blocks. Use J.F.Sebastian's answer instead.
I tried the top answer, but the additional risk and maintenance of thread code was worrisome.
Looking through the io module (and being limited to 2.6), I found BufferedReader. This is my threadless, non-blocking solution.
import io
from subprocess import PIPE, Popen
p = Popen(['myprogram.exe'], stdout=PIPE)
SLEEP_DELAY = 0.001
# Create an io.BufferedReader on the file descriptor for stdout
with io.open(p.stdout.fileno(), 'rb', closefd=False) as buffer:
while p.poll() == None:
time.sleep(SLEEP_DELAY)
while '\n' in bufferedStdout.peek(bufferedStdout.buffer_size):
line = buffer.readline()
# do stuff with the line
# Handle any remaining output after the process has ended
while buffer.peek():
line = buffer.readline()
# do stuff with the line
This solution uses the select module to "read any available data" from an IO stream. This function blocks initially until data is available, but then reads only the data that is available and doesn't block further.
Given the fact that it uses the select module, this only works on Unix.
The code is fully PEP8-compliant.
import select
def read_available(input_stream, max_bytes=None):
"""
Blocks until any data is available, then all available data is then read and returned.
This function returns an empty string when end of stream is reached.
Args:
input_stream: The stream to read from.
max_bytes (int|None): The maximum number of bytes to read. This function may return fewer bytes than this.
Returns:
str
"""
# Prepare local variables
input_streams = [input_stream]
empty_list = []
read_buffer = ""
# Initially block for input using 'select'
if len(select.select(input_streams, empty_list, empty_list)[0]) > 0:
# Poll read-readiness using 'select'
def select_func():
return len(select.select(input_streams, empty_list, empty_list, 0)[0]) > 0
# Create while function based on parameters
if max_bytes is not None:
def while_func():
return (len(read_buffer) < max_bytes) and select_func()
else:
while_func = select_func
while True:
# Read single byte at a time
read_data = input_stream.read(1)
if len(read_data) == 0:
# End of stream
break
# Append byte to string buffer
read_buffer += read_data
# Check if more data is available
if not while_func():
break
# Return read buffer
return read_buffer
I also faced the problem described by Jesse and solved it by using "select" as Bradley, Andy and others did but in a blocking mode to avoid a busy loop. It uses a dummy Pipe as a fake stdin. The select blocks and wait for either stdin or the pipe to be ready. When a key is pressed stdin unblocks the select and the key value can be retrieved with read(1). When a different thread writes to the pipe then the pipe unblocks the select and it can be taken as an indication that the need for stdin is over. Here is some reference code:
import sys
import os
from select import select
# -------------------------------------------------------------------------
# Set the pipe (fake stdin) to simulate a final key stroke
# which will unblock the select statement
readEnd, writeEnd = os.pipe()
readFile = os.fdopen(readEnd)
writeFile = os.fdopen(writeEnd, "w")
# -------------------------------------------------------------------------
def getKey():
# Wait for stdin or pipe (fake stdin) to be ready
dr,dw,de = select([sys.__stdin__, readFile], [], [])
# If stdin is the one ready then read it and return value
if sys.__stdin__ in dr:
return sys.__stdin__.read(1) # For Windows use ----> getch() from module msvcrt
# Must finish
else:
return None
# -------------------------------------------------------------------------
def breakStdinRead():
writeFile.write(' ')
writeFile.flush()
# -------------------------------------------------------------------------
# MAIN CODE
# Get key stroke
key = getKey()
# Keyboard input
if key:
# ... do your stuff with the key value
# Faked keystroke
else:
# ... use of stdin finished
# -------------------------------------------------------------------------
# OTHER THREAD CODE
breakStdinRead()
Try wexpect, which is the windows alternative of pexpect.
import wexpect
p = wexpect.spawn('myprogram.exe')
p.stdout.readline('.') // regex pattern of any character
output_str = p.after()
Here is a module that supports non-blocking reads and background writes in python:
https://pypi.python.org/pypi/python-nonblock
Provides a function,
nonblock_read which will read data from the stream, if available, otherwise return an empty string (or None if the stream is closed on the other side and all possible data has been read)
You may also consider the python-subprocess2 module,
https://pypi.python.org/pypi/python-subprocess2
which adds to the subprocess module. So on the object returned from "subprocess.Popen" is added an additional method, runInBackground. This starts a thread and returns an object which will automatically be populated as stuff is written to stdout/stderr, without blocking your main thread.
Enjoy!

pycurl subprocess in a separate Thread

I need help to get the output from pycurl, that I'm trying to run in subprocess. This output I'm trying to put in a queue and than pull this queue out in a different class.
unfortunately, Right now I have no output =(
import threading
import random
import time
import Queue
import urllib2
import sys
import simplejson, pycurl
import sys, signal
queue = Queue.Queue()
keep_running = True
user = "username"
pswd = "pass"
class MyThread(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
curl_path = '/usr/bin/curl'
curl_list = [curl_path]
args = ('curl', 'https://stream.twitter.com/1/statuses/filter.json?track=java', '-u', 'user:pass')
for arg in args:
curl_list.append(arg)
child = subprocess.Popen(
curl_list,
shell=False,
#stdout=subprocess.PIPE)
stderr=subprocess.PIPE)
try:
out += child.communicate()
c_out.write(out)
self.queue.put(c_out)
self.queue.task_done()
except KeyboardInterrupt:
child.kill()
class Starter():
def __init__(self):
self.queue = queue
t = MyThread(self.queue)
t.daemon=True
t.start()
self.next()
def next(self):
while True:
time.sleep(0.5)
if not self.queue.empty():
line = self.queue.get(timeout=0.2)
print '\n\nIM IN STARTER %s' % line
else:
print 'waiting for queue'
def main():
try:
Starter()
except KeyboardInterrupt, e:
print 'Stopping'
raise
main()
You seem to be confusing your arguments to subprocess quite a bit... the args list should be all of the different pieces of the command that you would be using for curl, you are currently putting them all together in a fashion that is not going to work with subprocess. Your curl_list should look more like this...
curl_path = '/usr/bin/curl'
curl_list = [curl_path, 'https://stream.twitter.com/1/statuses/filter.json?track=java', '-u', 'user:pass']
You are also using an unnecessary for at the moment... you don't want to loop over that list you just want to pass it to subprocess which will handle it appropriately. And you are also going to want stdout to get the results from that, so you need to include the pipe there as well.
I.E, the entire thing should be...
def run(self):
curl_path = '/usr/bin/curl'
curl_list = [curl_path, 'https://stream.twitter.com/1/statuses/filter.json?track=java', '-u', 'user:pass']
child = subprocess.Popen(curl_list,
shell=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
try:
out += child.communicate()[0]
c_out.write(out)
self.queue.put(c_out)
self.queue.task_done()
except KeyboardInterrupt:
child.kill()
Might want to take another look at the subprocess documentation to better understand the changes above. I haven't actually run this through an interpreter so it may not be perfect but it should get you going in the right direction... good luck!

Categories

Resources