I want to execute a process, limit the execution-time by some timeout in seconds and grab the output produced by the process. And I want to do this on windows, linux and freebsd.
I have tried implementing this in three different ways:
cmd - Without timeout and subprocess.PIPE for output capture.
BEHAVIOUR: Operates as expected but does not support timeout, i need timeout...
cmd_to - With timeout and subprocess.PIPE for output capture.
BEHAVIOUR: Blocks subprocess execution when output >= 2^16 bytes.
cmd_totf - With timeout and tempfile.NamedTemporaryfile for output capture.
BEHAVIOUR: Operates as expected but uses temporary files on disk.
These are available below for closer inspection.
As can be seen in the output below, then the timeout-code blocks the execution of the sub-process when using subprocessing.PIPE and output from the subprocess is >= 2^16 bytes.
The subprocess documentation states that this is expected when calling process.wait() and using subprocessing.PIPE, however no warnings are given when using process.poll(), so what is going wrong here?
I have a solution in cmd_totf which use the tempfile module but the tradeoff is that it writes the output to disk, something I would REALLY like to avoid.
So my questions are:
What am I doing wrong in cmd_to?
Is there a way to do what I want and without using tempfiles / keeping the output in memory.
Script to generate a bunch of output ('exp_gen.py'):
#!/usr/bin/env python
import sys
output = "b"*int(sys.argv[1])
print output
Three different implementations (cmd, cmd_to, cmd_totf) of wrappers around subprocessing.Popen:
#!/usr/bin/env python
import subprocess, time, tempfile
bufsize = -1
def cmd(cmdline, timeout=60):
"""
Execute cmdline.
Uses subprocessing and subprocess.PIPE.
"""
p = subprocess.Popen(
cmdline,
bufsize = bufsize,
shell = False,
stdin = subprocess.PIPE,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE
)
out, err = p.communicate()
returncode = p.returncode
return (returncode, err, out)
def cmd_to(cmdline, timeout=60):
"""
Execute cmdline, limit execution time to 'timeout' seconds.
Uses subprocessing and subprocess.PIPE.
"""
p = subprocess.Popen(
cmdline,
bufsize = bufsize,
shell = False,
stdin = subprocess.PIPE,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE
)
t_begin = time.time() # Monitor execution time
seconds_passed = 0
while p.poll() is None and seconds_passed < timeout:
seconds_passed = time.time() - t_begin
time.sleep(0.1)
#if seconds_passed > timeout:
#
# try:
# p.stdout.close() # If they are not closed the fds will hang around until
# p.stderr.close() # os.fdlimit is exceeded and cause a nasty exception
# p.terminate() # Important to close the fds prior to terminating the process!
# # NOTE: Are there any other "non-freed" resources?
# except:
# pass
#
# raise TimeoutInterrupt
out, err = p.communicate()
returncode = p.returncode
return (returncode, err, out)
def cmd_totf(cmdline, timeout=60):
"""
Execute cmdline, limit execution time to 'timeout' seconds.
Uses subprocessing and tempfile instead of subprocessing.PIPE.
"""
output = tempfile.NamedTemporaryFile(delete=False)
error = tempfile.NamedTemporaryFile(delete=False)
p = subprocess.Popen(
cmdline,
bufsize = 0,
shell = False,
stdin = None,
stdout = output,
stderr = error
)
t_begin = time.time() # Monitor execution time
seconds_passed = 0
while p.poll() is None and seconds_passed < timeout:
seconds_passed = time.time() - t_begin
time.sleep(0.1)
#if seconds_passed > timeout:
#
# try:
# p.stdout.close() # If they are not closed the fds will hang around until
# p.stderr.close() # os.fdlimit is exceeded and cause a nasty exception
# p.terminate() # Important to close the fds prior to terminating the process!
# # NOTE: Are there any other "non-freed" resources?
# except:
# pass
#
# raise TimeoutInterrupt
p.wait()
returncode = p.returncode
fd = open(output.name)
out = fd.read()
fd.close()
fd = open(error.name)
err = fd.read()
fd.close()
error.close()
output.close()
return (returncode, err, out)
if __name__ == "__main__":
implementations = [cmd, cmd_to, cmd_totf]
bytes = ['65535', '65536', str(1024*1024)]
timeouts = [5]
for timeout in timeouts:
for size in bytes:
for i in implementations:
t_begin = time.time()
seconds_passed = 0
rc, err, output = i(['exp_gen.py', size], timeout)
seconds_passed = time.time() - t_begin
filler = ' '*(8-len(i.func_name))
print "[%s%s: timeout=%d, iosize=%s, seconds=%f]" % (repr(i.func_name), filler, timeout, size, seconds_passed)
Output from execution:
['cmd' : timeout=5, iosize=65535, seconds=0.016447]
['cmd_to' : timeout=5, iosize=65535, seconds=0.103022]
['cmd_totf': timeout=5, iosize=65535, seconds=0.107176]
['cmd' : timeout=5, iosize=65536, seconds=0.028105]
['cmd_to' : timeout=5, iosize=65536, seconds=5.116658]
['cmd_totf': timeout=5, iosize=65536, seconds=0.104905]
['cmd' : timeout=5, iosize=1048576, seconds=0.025964]
['cmd_to' : timeout=5, iosize=1048576, seconds=5.128062]
['cmd_totf': timeout=5, iosize=1048576, seconds=0.103183]
As opposed to all the warnings in the subprocess documentation then directly reading from process.stdout and process.stderr has provided a better solution.
By better I mean that I can read output from a process that exceeds 2^16 bytes without having to temporarily store the output on disk.
The code follows:
import fcntl
import os
import subprocess
import time
def nonBlockRead(output):
fd = output.fileno()
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
try:
return output.read()
except:
return ''
def cmd(cmdline, timeout=60):
"""
Execute cmdline, limit execution time to 'timeout' seconds.
Uses the subprocess module and subprocess.PIPE.
Raises TimeoutInterrupt
"""
p = subprocess.Popen(
cmdline,
bufsize = bufsize, # default value of 0 (unbuffered) is best
shell = False, # not really needed; it's disabled by default
stdout = subprocess.PIPE,
stderr = subprocess.PIPE
)
t_begin = time.time() # Monitor execution time
seconds_passed = 0
stdout = ''
stderr = ''
while p.poll() is None and seconds_passed < timeout: # Monitor process
time.sleep(0.1) # Wait a little
seconds_passed = time.time() - t_begin
# p.std* blocks on read(), which messes up the timeout timer.
# To fix this, we use a nonblocking read()
# Note: Not sure if this is Windows compatible
stdout += nonBlockRead(p.stdout)
stderr += nonBlockRead(p.stderr)
if seconds_passed >= timeout:
try:
p.stdout.close() # If they are not closed the fds will hang around until
p.stderr.close() # os.fdlimit is exceeded and cause a nasty exception
p.terminate() # Important to close the fds prior to terminating the process!
# NOTE: Are there any other "non-freed" resources?
except:
pass
raise TimeoutInterrupt
returncode = p.returncode
return (returncode, stdout, stderr)
Disclaimer: This answer is not tested on windows, nor freebsd. But the used modules should work on these systems. I believe this should be a working answer to your question - it works for me.
Here's code I just hacked to solve the problem on linux. It is a combination of several Stackoverflow threads and my own research in the Python 3 documents.
Main characteristics of this code:
Uses processes not threads for blocking I/O because they can more reliably be p.terminated()
Implements a retriggerable timeout watchdog that restarts counting whenever some output happens
Implements a long-term timeout watchdog to limit overall runtime
Can feed in stdin (although I only need to feed in one-time short strings)
Can capture stdout/stderr in the usual Popen means (Only stdout is coded, and stderr redirected to stdout; but can easily be separated)
It's almost realtime because it only checks every 0.2 seconds for output. But you could decrease this or remove the waiting interval easily
Lots of debugging printouts still enabled to see whats happening when.
The only code dependency is enum as implemented here, but the code could easily be changed to work without. It's only used to distinguish the two timeouts - use separate exceptions if you like.
Here's the code - as usual - feedback is highly appreciated:
(Edit 29-Jun-2012 - the code is now actually working)
# Python module runcmd
# Implements a class to launch shell commands which
# are killed after a timeout. Timeouts can be reset
# after each line of output
#
# Use inside other script with:
#
# import runcmd
# (return_code, out) = runcmd.RunCmd(['ls', '-l', '/etc'],
# timeout_runtime,
# timeout_no_output,
# stdin_string).go()
#
import multiprocessing
import queue
import subprocess
import time
import enum
def timestamp():
return time.strftime('%Y%m%d-%H%M%S')
class ErrorRunCmd(Exception): pass
class ErrorRunCmdTimeOut(ErrorRunCmd): pass
class Enqueue_output(multiprocessing.Process):
def __init__(self, out, queue):
multiprocessing.Process.__init__(self)
self.out = out
self.queue = queue
self.daemon = True
def run(self):
try:
for line in iter(self.out.readline, b''):
#print('worker read:', line)
self.queue.put(line)
except ValueError: pass # Readline of closed file
self.out.close()
class Enqueue_input(multiprocessing.Process):
def __init__(self, inp, iterable):
multiprocessing.Process.__init__(self)
self.inp = inp
self.iterable = iterable
self.daemon = True
def run(self):
#print("writing stdin")
for line in self.iterable:
self.inp.write(bytes(line,'utf-8'))
self.inp.close()
#print("writing stdin DONE")
class RunCmd():
"""RunCmd - class to launch shell commands
Captures and returns stdout. Kills child after a given
amount (timeout_runtime) wallclock seconds. Can also
kill after timeout_retriggerable wallclock seconds.
This second timer is reset whenever the child does some
output
(return_code, out) = RunCmd(['ls', '-l', '/etc'],
timeout_runtime,
timeout_no_output,
stdin_string).go()
"""
Timeout = enum.Enum('No','Retriggerable','Runtime')
def __init__(self, cmd, timeout_runtime, timeout_retriggerable, stdin=None):
self.dbg = False
self.cmd = cmd
self.timeout_retriggerable = timeout_retriggerable
self.timeout_runtime = timeout_runtime
self.timeout_hit = self.Timeout.No
self.stdout = '--Cmd did not yield any output--'
self.stdin = stdin
def read_queue(self, q):
time_last_output = None
try:
bstr = q.get(False) # non-blocking
if self.dbg: print('{} chars read'.format(len(bstr)))
time_last_output = time.time()
self.stdout += bstr
except queue.Empty:
#print('queue empty')
pass
return time_last_output
def go(self):
if self.stdin:
pstdin = subprocess.PIPE
else:
pstdin = None
p = subprocess.Popen(self.cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=pstdin)
pin = None
if (pstdin):
pin = Enqueue_input(p.stdin, [self.stdin + '\n'])
pin.start()
q = multiprocessing.Queue()
pout = Enqueue_output(p.stdout, q)
pout.start()
try:
if self.dbg: print('Beginning subprocess with timeout {}/{} s on {}'.format(self.timeout_retriggerable, self.timeout_runtime, time.asctime()))
time_begin = time.time()
time_last_output = time_begin
seconds_passed = 0
self.stdout = b''
once = True # ensure loop's executed at least once
# some child cmds may exit very fast, but still produce output
while once or p.poll() is None or not q.empty():
once = False
if self.dbg: print('a) {} of {}/{} secs passed and overall {} chars read'.format(seconds_passed, self.timeout_retriggerable, self.timeout_runtime, len(self.stdout)))
tlo = self.read_queue(q)
if tlo:
time_last_output = tlo
now = time.time()
if now - time_last_output >= self.timeout_retriggerable:
self.timeout_hit = self.Timeout.Retriggerable
raise ErrorRunCmdTimeOut(self)
if now - time_begin >= self.timeout_runtime:
self.timeout_hit = self.Timeout.Runtime
raise ErrorRunCmdTimeOut(self)
if q.empty():
time.sleep(0.1)
# Final try to get "last-millisecond" output
self.read_queue(q)
finally:
self._close(p, [pout, pin])
return (self.returncode, self.stdout)
def _close(self, p, procs):
if self.dbg:
if self.timeout_hit != self.Timeout.No:
print('{} A TIMEOUT occured: {}'.format(timestamp(), self.timeout_hit))
else:
print('{} No timeout occured'.format(timestamp()))
for process in [proc for proc in procs if proc]:
try:
process.terminate()
except:
print('{} Process termination raised trouble'.format(timestamp()))
raise
try:
p.stdin.close()
except: pass
if self.dbg: print('{} _closed stdin'.format(timestamp()))
try:
p.stdout.close() # If they are not closed the fds will hang around until
except: pass
if self.dbg: print('{} _closed stdout'.format(timestamp()))
#p.stderr.close() # os.fdlimit is exceeded and cause a nasty exception
try:
p.terminate() # Important to close the fds prior to terminating the process!
# NOTE: Are there any other "non-freed" resources?
except: pass
if self.dbg: print('{} _closed Popen'.format(timestamp()))
try:
self.stdout = self.stdout.decode('utf-8')
except: pass
self.returncode = p.returncode
if self.dbg: print('{} _closed all'.format(timestamp()))
Use with:
import runcmd
cmd = ['ls', '-l', '/etc']
worker = runcmd.RunCmd(cmd,
40, # limit runtime [wallclock seconds]
2, # limit runtime after last output [wallclk secs]
'' # stdin input string
)
(return_code, out) = worker.go()
if worker.timeout_hit != worker.Timeout.No:
print('A TIMEOUT occured: {}'.format(worker.timeout_hit))
else:
print('No timeout occured')
print("Running '{:s}' returned {:d} and {:d} chars of output".format(cmd, return_code, len(out)))
print('Output:')
print(out)
command - the first argument - should be a list of a command and its arguments. It is used for the Popen(shell=False) call and its timeouts are in seconds. There's currently no code to disable the timeouts. Set timeout_no_output to time_runtime to effectively disable the retriggerable timeout_no_output.
stdin_string can be any string which is to be sent to the command's standard input. Set to None if your command does not need any input. If a string is provided, a final '\n' is appended.
Related
I'm writing a subprocess based python program that acts as a proxy between the user input and the subprocess (trying to go beyond pexpect). I've taken this thread as reference, and some code chunk from pexpect (_read_incoming() method for popen_spawn) to read output (the fcntl method worked, but not satisfactorily).
The code runs but has a problem: There seems to be an additional carriage return being sent to the process. This is causing me issues when I try to do things like sending passwords to ssh etc.
Could you look into what might be the issue? Thanks!
The code is as follows:
from queue import Queue, Empty
from threading import Thread
import subprocess
import signal
import fcntl
import os
global terminating
terminating = False
def setNonBlocking(fd):
"""
Set the file description of the given file descriptor to non-blocking.
"""
print(fd)
flags = fcntl.fcntl(fd, fcntl.F_GETFL)
flags = flags | os.O_NONBLOCK
fcntl.fcntl(fd, fcntl.F_SETFL, flags)
def enqueue(out, q):
fileno = out.fileno()
while not terminating:
buf = b''
try:
buf = os.read(fileno, 1024)
if buf and len(buf)>0:
q.put(buf)
except OSError as e:
#print("OS error: {0}".format(e))
pass
if not buf:
q.put(buf)
# for line in iter(out.readline, b''):
# if len(line.strip()) > 0:
# print(line)
# q.put(line)
out.close()
print('Terminating')
return
def get_output(q):
out_str = bytes()
while True:
try:
incoming = q.get_nowait()
except Empty:
break
else:
if incoming is None:
break
else:
out_str += incoming
if out_str:
return out_str
else:
return b''
def explore(cmd="/bin/bash"):
global terminating
universal_newlines = False
p = subprocess.Popen([cmd], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.STDOUT,
bufsize=0, shell=False, universal_newlines=universal_newlines)
#setNonBlocking(p.stdout)
outQueue = Queue()
outThread = Thread(target=enqueue, args=(p.stdout, outQueue))
outThread.daemon = True
outThread.start()
while True:
try:
someInput = input()
print('[In]:'+someInput)
someInput += '\n'
if not universal_newlines:
p.stdin.write(someInput.encode('utf-8'))
else:
p.stdin.write(someInput)
p.stdin.flush()
out = get_output(outQueue).decode('utf-8')
print('[Out]:'+out)
#p.communicate(someInput+'\n')
except KeyboardInterrupt:
print('Interrupting')
p.send_signal(signal.SIGINT)
terminating = True
outThread.join()
break
p.wait()
if __name__ == '__main__':
explore()
Example run:
ls
[In]:ls
[Out]:
[In]:
[Out]:explorer.py
__init__.py
^CInterrupting
Terminating
The second In was an enter from user.
Update:
Tested the alternate using pexpect's popen_spawn module. Same result:
from pexpect.popen_spawn import PopenSpawn as Spawn
import signal
def explore(cmd="/bin/bash"):
p = Spawn(cmd)
while True:
try:
someInput = input()
print('[In]:'+someInput)
p.sendline(someInput)
out = p.read_nonblocking(size=1024, timeout=-1).decode('utf-8')
print('[Out]:'+out)
#p.communicate(someInput+'\n')
except KeyboardInterrupt:
print('Interrupting')
p.sendeof()
p.kill(signal.SIGINT)
break
if __name__ == '__main__':
explore()
I have a python subprocess that I'm trying to read output and error streams from. Currently I have it working, but I'm only able to read from stderr after I've finished reading from stdout. Here's what it looks like:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout_iterator = iter(process.stdout.readline, b"")
stderr_iterator = iter(process.stderr.readline, b"")
for line in stdout_iterator:
# Do stuff with line
print line
for line in stderr_iterator:
# Do stuff with line
print line
As you can see, the stderr for loop can't start until the stdout loop completes. How can I modify this to be able to read from both in the correct order the lines come in?
To clarify: I still need to be able to tell whether a line came from stdout or stderr because they will be treated differently in my code.
The code in your question may deadlock if the child process produces enough output on stderr (~100KB on my Linux machine).
There is a communicate() method that allows to read from both stdout and stderr separately:
from subprocess import Popen, PIPE
process = Popen(command, stdout=PIPE, stderr=PIPE)
output, err = process.communicate()
If you need to read the streams while the child process is still running then the portable solution is to use threads (not tested):
from subprocess import Popen, PIPE
from threading import Thread
from Queue import Queue # Python 2
def reader(pipe, queue):
try:
with pipe:
for line in iter(pipe.readline, b''):
queue.put((pipe, line))
finally:
queue.put(None)
process = Popen(command, stdout=PIPE, stderr=PIPE, bufsize=1)
q = Queue()
Thread(target=reader, args=[process.stdout, q]).start()
Thread(target=reader, args=[process.stderr, q]).start()
for _ in range(2):
for source, line in iter(q.get, None):
print "%s: %s" % (source, line),
See:
Python: read streaming input from subprocess.communicate()
Non-blocking read on a subprocess.PIPE in python
Python subprocess get children's output to file and terminal?
Here's a solution based on selectors, but one that preserves order, and streams variable-length characters (even single chars).
The trick is to use read1(), instead of read().
import selectors
import subprocess
import sys
p = subprocess.Popen(
["python", "random_out.py"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
sel = selectors.DefaultSelector()
sel.register(p.stdout, selectors.EVENT_READ)
sel.register(p.stderr, selectors.EVENT_READ)
while True:
for key, _ in sel.select():
data = key.fileobj.read1().decode()
if not data:
exit()
if key.fileobj is p.stdout:
print(data, end="")
else:
print(data, end="", file=sys.stderr)
If you want a test program, use this.
import sys
from time import sleep
for i in range(10):
print(f" x{i} ", file=sys.stderr, end="")
sleep(0.1)
print(f" y{i} ", end="")
sleep(0.1)
The order in which a process writes data to different pipes is lost after write.
There is no way you can tell if stdout has been written before stderr.
You can try to read data simultaneously from multiple file descriptors in a non-blocking way
as soon as data is available, but this would only minimize the probability that the order is incorrect.
This program should demonstrate this:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import select
import subprocess
testapps={
'slow': '''
import os
import time
os.write(1, 'aaa')
time.sleep(0.01)
os.write(2, 'bbb')
time.sleep(0.01)
os.write(1, 'ccc')
''',
'fast': '''
import os
os.write(1, 'aaa')
os.write(2, 'bbb')
os.write(1, 'ccc')
''',
'fast2': '''
import os
os.write(1, 'aaa')
os.write(2, 'bbbbbbbbbbbbbbb')
os.write(1, 'ccc')
'''
}
def readfds(fds, maxread):
while True:
fdsin, _, _ = select.select(fds,[],[])
for fd in fdsin:
s = os.read(fd, maxread)
if len(s) == 0:
fds.remove(fd)
continue
yield fd, s
if fds == []:
break
def readfromapp(app, rounds=10, maxread=1024):
f=open('testapp.py', 'w')
f.write(testapps[app])
f.close()
results={}
for i in range(0, rounds):
p = subprocess.Popen(['python', 'testapp.py'], stdout=subprocess.PIPE
, stderr=subprocess.PIPE)
data=''
for (fd, s) in readfds([p.stdout.fileno(), p.stderr.fileno()], maxread):
data = data + s
results[data] = results[data] + 1 if data in results else 1
print 'running %i rounds %s with maxread=%i' % (rounds, app, maxread)
results = sorted(results.items(), key=lambda (k,v): k, reverse=False)
for data, count in results:
print '%03i x %s' % (count, data)
print
print "=> if output is produced slowly this should work as whished"
print " and should return: aaabbbccc"
readfromapp('slow', rounds=100, maxread=1024)
print
print "=> now mostly aaacccbbb is returnd, not as it should be"
readfromapp('fast', rounds=100, maxread=1024)
print
print "=> you could try to read data one by one, and return"
print " e.g. a whole line only when LF is read"
print " (b's should be finished before c's)"
readfromapp('fast', rounds=100, maxread=1)
print
print "=> but even this won't work ..."
readfromapp('fast2', rounds=100, maxread=1)
and outputs something like this:
=> if output is produced slowly this should work as whished
and should return: aaabbbccc
running 100 rounds slow with maxread=1024
100 x aaabbbccc
=> now mostly aaacccbbb is returnd, not as it should be
running 100 rounds fast with maxread=1024
006 x aaabbbccc
094 x aaacccbbb
=> you could try to read data one by one, and return
e.g. a whole line only when LF is read
(b's should be finished before c's)
running 100 rounds fast with maxread=1
003 x aaabbbccc
003 x aababcbcc
094 x abababccc
=> but even this won't work ...
running 100 rounds fast2 with maxread=1
003 x aaabbbbbbbbbbbbbbbccc
001 x aaacbcbcbbbbbbbbbbbbb
008 x aababcbcbcbbbbbbbbbbb
088 x abababcbcbcbbbbbbbbbb
This works for Python3 (3.6):
p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, universal_newlines=True)
# Read both stdout and stderr simultaneously
sel = selectors.DefaultSelector()
sel.register(p.stdout, selectors.EVENT_READ)
sel.register(p.stderr, selectors.EVENT_READ)
ok = True
while ok:
for key, val1 in sel.select():
line = key.fileobj.readline()
if not line:
ok = False
break
if key.fileobj is p.stdout:
print(f"STDOUT: {line}", end="")
else:
print(f"STDERR: {line}", end="", file=sys.stderr)
from https://docs.python.org/3/library/subprocess.html#using-the-subprocess-module
If you wish to capture and combine both streams into one, use
stdout=PIPE and stderr=STDOUT instead of capture_output.
so the easiest solution would be:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout_iterator = iter(process.stdout.readline, b"")
for line in stdout_iterator:
# Do stuff with line
print line
I know this question is very old, but this answer may help others who stumble upon this page in researching a solution for a similar situation, so I'm posting it anyway.
I've built a simple python snippet that will merge any number of pipes into a single one. Of course, as stated above, the order cannot be guaranteed, but this is as close as I think you can get in Python.
It spawns a thread for each of the pipes, reads them line by line and puts them into a Queue (which is FIFO). The main thread loops through the queue, yielding each line.
import threading, queue
def merge_pipes(**named_pipes):
r'''
Merges multiple pipes from subprocess.Popen (maybe other sources as well).
The keyword argument keys will be used in the output to identify the source
of the line.
Example:
p = subprocess.Popen(['some', 'call'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
outputs = {'out': log.info, 'err': log.warn}
for name, line in merge_pipes(out=p.stdout, err=p.stderr):
outputs[name](line)
This will output stdout to the info logger, and stderr to the warning logger
'''
# Constants. Could also be placed outside of the method. I just put them here
# so the method is fully self-contained
PIPE_OPENED=1
PIPE_OUTPUT=2
PIPE_CLOSED=3
# Create a queue where the pipes will be read into
output = queue.Queue()
# This method is the run body for the threads that are instatiated below
# This could be easily rewritten to be outside of the merge_pipes method,
# but to make it fully self-contained I put it here
def pipe_reader(name, pipe):
r"""
reads a single pipe into the queue
"""
output.put( ( PIPE_OPENED, name, ) )
try:
for line in iter(pipe.readline,''):
output.put( ( PIPE_OUTPUT, name, line.rstrip(), ) )
finally:
output.put( ( PIPE_CLOSED, name, ) )
# Start a reader for each pipe
for name, pipe in named_pipes.items():
t=threading.Thread(target=pipe_reader, args=(name, pipe, ))
t.daemon = True
t.start()
# Use a counter to determine how many pipes are left open.
# If all are closed, we can return
pipe_count = 0
# Read the queue in order, blocking if there's no data
for data in iter(output.get,''):
code=data[0]
if code == PIPE_OPENED:
pipe_count += 1
elif code == PIPE_CLOSED:
pipe_count -= 1
elif code == PIPE_OUTPUT:
yield data[1:]
if pipe_count == 0:
return
This works for me (on windows):
https://github.com/waszil/subpiper
from subpiper import subpiper
def my_stdout_callback(line: str):
print(f'STDOUT: {line}')
def my_stderr_callback(line: str):
print(f'STDERR: {line}')
my_additional_path_list = [r'c:\important_location']
retcode = subpiper(cmd='echo magic',
stdout_callback=my_stdout_callback,
stderr_callback=my_stderr_callback,
add_path_list=my_additional_path_list)
I need to run an interactive Bash instance in a separated process in Python with it's own dedicated TTY (I can't use pexpect).
I used this code snippet I commonly see used in similar programs:
master, slave = pty.openpty()
p = subprocess.Popen(["/bin/bash", "-i"], stdin=slave, stdout=slave, stderr=slave)
os.close(slave)
x = os.read(master, 1026)
print x
subprocess.Popen.kill(p)
os.close(master)
But when I run it I get the following output:
$ ./pty_try.py
bash: cannot set terminal process group (10790): Inappropriate ioctl for device
bash: no job control in this shell
Strace of the run shows some errors:
...
readlink("/usr/bin/python2.7", 0x7ffc8db02510, 4096) = -1 EINVAL (Invalid argument)
...
ioctl(3, SNDCTL_TMR_TIMEBASE or SNDRV_TIMER_IOCTL_NEXT_DEVICE or TCGETS, 0x7ffc8db03590) = -1 ENOTTY (Inappropriate ioctl for device)
...
readlink("./pty_try.py", 0x7ffc8db00610, 4096) = -1 EINVAL (Invalid argument)
The code snippet seems pretty straightforward, is Bash not getting something it needs? what could be the problem here?
This is a solution to run an interactive command in subprocess. It uses pseudo-terminal to make stdout non-blocking(also some command needs a tty device, eg. bash). it uses select to handle input and ouput to the subprocess.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import select
import termios
import tty
import pty
from subprocess import Popen
command = 'bash'
# command = 'docker run -it --rm centos /bin/bash'.split()
# save original tty setting then set it to raw mode
old_tty = termios.tcgetattr(sys.stdin)
tty.setraw(sys.stdin.fileno())
# open pseudo-terminal to interact with subprocess
master_fd, slave_fd = pty.openpty()
try:
# use os.setsid() make it run in a new process group, or bash job control will not be enabled
p = Popen(command,
preexec_fn=os.setsid,
stdin=slave_fd,
stdout=slave_fd,
stderr=slave_fd,
universal_newlines=True)
while p.poll() is None:
r, w, e = select.select([sys.stdin, master_fd], [], [])
if sys.stdin in r:
d = os.read(sys.stdin.fileno(), 10240)
os.write(master_fd, d)
elif master_fd in r:
o = os.read(master_fd, 10240)
if o:
os.write(sys.stdout.fileno(), o)
finally:
# restore tty settings back
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_tty)
This is the solution that worked for me at the end (as suggested by qarma) :
libc = ctypes.CDLL('libc.so.6')
master, slave = pty.openpty()
p = subprocess.Popen(["/bin/bash", "-i"], preexec_fn=libc.setsid, stdin=slave, stdout=slave, stderr=slave)
os.close(slave)
... do stuff here ...
x = os.read(master, 1026)
print x
Here is a full object oriented solution to do interactive shell commands with TTYs using threads and queues for stdout and stderr IO handling. This took me a while to build from multiple locations but it works perfectly so far on Unix/Linux systems and also as part of a Juniper op script. Thought I would post this here to save others time in trying to build something like this.
import pty
import re
import select
import threading
from datetime import datetime, timedelta
import os
import logging
import subprocess
import time
from queue import Queue, Empty
lib_logger = logging.getLogger("lib")
# Handler function to be run as a thread for pulling pty channels from an interactive shell
def _pty_handler(pty_master, logger, queue, stop):
poller = select.poll()
poller.register(pty_master, select.POLLIN)
while True:
# Stop handler if flagged
if stop():
logger.debug("Disabling pty handler for interactive shell")
break
fd_event = poller.poll(100)
for descriptor, event in fd_event:
# Read data from pipe and send to queue if there is data to read
if event == select.POLLIN:
data = os.read(descriptor, 1).decode("utf-8")
if not data:
break
# logger.debug("Reading in to handler queue: " + data)
queue.put(data)
# Exit handler if stdout is closing
elif event == select.POLLHUP:
logger.debug("Disabling pty handler for interactive shell")
break
# Function for reading outputs from the given queue by draining it and returning the output
def _get_queue_output(queue: Queue) -> str:
value = ""
try:
while True:
value += queue.get_nowait()
except Empty:
return value
# Helper function to create the needed list for popen and print the command run to the logger
def popen_command(command, logger, *args):
popen_list = list()
popen_list.append(command)
command_output = command
for arg in args:
popen_list.append(arg)
command_output += " " + arg
lib_logger.debug("Making Popen call using: " + str(popen_list))
logger.debug("")
logger.debug(command_output)
logger.debug("")
return popen_list
# Class for create an interactive shell and sending commands to it along with logging output to loggers
class InteractiveShell(object):
def __init__(self, command, logger, *args):
self.logger = logger
self.command = command
self.process = None
self.popen_list = popen_command(command, logger, *args)
self.master_stdout = None
self.slave_stdout = None
self.master_stderr = None
self.slave_stderr = None
self.stdout_handler = None
self.stderr_handler = None
self.stdout_queue = None
self.stderr_queue = None
self.stop_handlers = False
# Open interactive shell and setup all threaded IO handlers
def open(self, shell_prompt, timeout=DEVICE_TIMEOUT):
# Create PTYs
self.master_stdout, self.slave_stdout = pty.openpty()
self.master_stderr, self.slave_stderr = pty.openpty()
# Create shell subprocess
self.process = subprocess.Popen(self.popen_list, stdin=self.slave_stdout, stdout=self.slave_stdout,
stderr=self.slave_stderr, bufsize=0, start_new_session=True)
lib_logger.debug("")
lib_logger.debug("Started interactive shell for command " + self.command)
lib_logger.debug("")
# Create thread and queues for handling pty output and start them
self.stdout_queue = Queue()
self.stderr_queue = Queue()
self.stdout_handler = threading.Thread(target=_pty_handler, args=(self.master_stdout,
lib_logger,
self.stdout_queue,
lambda: self.stop_handlers))
self.stderr_handler = threading.Thread(target=_pty_handler, args=(self.master_stderr,
lib_logger,
self.stderr_queue,
lambda: self.stop_handlers))
self.stdout_handler.daemon = True
self.stderr_handler.daemon = True
lib_logger.debug("Enabling stderr handler for interactive shell " + self.command)
self.stderr_handler.start()
lib_logger.debug("Enabling stdout handler for interactive shell " + self.command)
self.stdout_handler.start()
# Wait for shell prompt
lib_logger.debug("Waiting for shell prompt: " + shell_prompt)
return self.wait_for(shell_prompt, timeout)
# Close interactive shell which should also kill all threaded IO handlers
def close(self):
# Wait 5 seconds before closing to let shell handle all input and outputs
time.sleep(5)
# Stop IO handler threads and terminate the process then wait another 5 seconds for cleanup to happen
self.stop_handlers = True
self.process.terminate()
time.sleep(5)
# Check for any additional output from the stdout handler
output = ""
while True:
data = _get_queue_output(self.stdout_queue)
if data != "":
output += data
else:
break
for line in iter(output.splitlines()):
self.logger.debug(line)
# Check for any additional output from the stderr handler
output = ""
while True:
data = _get_queue_output(self.stderr_queue)
if data != "":
output += data
else:
break
for line in iter(output.splitlines()):
self.logger.error(line)
# Cleanup PTYs
os.close(self.master_stdout)
os.close(self.master_stderr)
os.close(self.slave_stdout)
os.close(self.slave_stderr)
lib_logger.debug("Interactive shell command " + self.command + " terminated")
# Run series of commands given as a list of a list of commands and wait_for strings. If no wait_for is needed then
# only provide the command. Return if all the commands completed successfully or not.
# Ex:
# [
# ["ssh jsas#" + vnf_ip, r"jsas#.*:"],
# ["juniper123", r"jsas#.*\$"],
# ["sudo su", r".*jsas:"],
# ["juniper123", r"root#.*#"],
# ["usermod -p 'blah' jsas"]
# ]
def run_commands(self, commands_list):
shell_status = True
for command in commands_list:
shell_status = self.run(command[0])
if shell_status and len(command) == 2:
shell_status = self.wait_for(command[1])
# Break out of running commands if a command failed
if not shell_status:
break
return shell_status
# Run given command and return False if error occurs otherwise return True
def run(self, command, sleep=0):
# Check process to make sure it is still running and if not grab the stderr output
if self.process.poll():
self.logger.error("Interactive shell command " + self.command + " closed with return code: " +
self.process.returncode)
data = _get_queue_output(self.stderr_queue)
if data != "":
self.logger.error("Interactive shell error messages:")
for line in iter(data.splitlines()):
self.logger.error(line)
return False
# Write command to process and check to make sure a newline is in command otherwise add it
if "\n" not in command:
command += "\n"
os.write(self.master_stdout, command.encode("utf-8"))
if sleep:
time.sleep(sleep)
return True
# Wait for specific regex expression in output before continuing return False if wait time expires otherwise return
# True
def wait_for(self, this, timeout=DEVICE_TIMEOUT):
timeout = datetime.now() + timedelta(seconds=timeout)
output = ""
# Keep searching for output until timeout occurs
while timeout > datetime.now():
data = _get_queue_output(self.stdout_queue)
if data != "":
# Add to output line and check for match to regex given and if match then break and send output to
# logger
output += data
lib_logger.debug("Checking for " + this + " in data: ")
for line in iter(output.splitlines()):
lib_logger.debug(line)
if re.search(r"{}\s?$".format(this), output):
break
time.sleep(1)
# Send output to logger
for line in iter(output.splitlines()):
self.logger.debug(line)
# If wait time expired print error message and return False
if timeout < datetime.now():
self.logger.error("Wait time expired when waiting for " + this)
return False
return True
I'm trying to run a lengthy command within Python that outputs to both stdout and stderr. I'd like to poll the subprocess and write the output to separate files.
I tried the following, based on this answer Non-blocking read on a subprocess.PIPE in python
import subprocess
from Queue import Queue, Empty
from threading import Thread
def send_cmd(cmd, shell=False):
"""
Send cmd to the shell
"""
if not isinstance(cmd, list): cmd = shlex.split(cmd)
params = {'args' : cmd,
'stdout' : subprocess.PIPE,
'stderr' : subprocess.PIPE,
'shell' : shell}
proc = subprocess.Popen(**params)
return proc
def monitor_command(process, stdout_log=os.devnull, stderr_log=os.devnull):
"""
Monitor the process that is running, and log it if desired
"""
def enqueue_output(out, queue):
for line in iter(out.readline, b''):
queue.put(line)
def setup_process(log_name, proc):
FID = open(log_name, 'w')
queue = Queue()
thread = Thread(target=enqueue_output, args=(proc, queue))
thread.daemon = True # Thread dies with program
thread.start()
return (queue, FID)
def check_queues(queue_list, errors):
for queue, FID in queue_list:
try:
line = queue.get_nowait()
if 'error' in line.lower() or 'failed' in line.lower():
errors.append(line)
except Empty:
pass
else:
FID.write(line)
errors = []
queue_list = []
for log, proc in [(stdout_log, process.stdout), (stderr_log, process.stderr)]:
queue_list.append(setup_process(log, proc)
while process.poll() is None:
check_queues(queue_list, errors)
while not queue_list[0][0].empty() or queue_list[1][0].empty():
check_queues(queue_list, errors)
for queue, FID in queue_list:
FID.close()
return errors
process = send_cmd('long_program.exe')
errors = monitor_command(process, stdout_log='stdout.log', stderr_log='stderr.log')
But it the output file for stdout is empty, and the output file for stderr is only a few lines long, whereas both should be quite large.
What am I missing?
I did that once.. here is some old code I wrote
class Process_Communicator():
def join(self):
self.te.join()
self.to.join()
self.running = False
self.aggregator.join()
self.ti.join()
def enqueue_in(self):
while self.running and self.p.stdin is not None:
while not self.stdin_queue.empty():
s = self.stdin_queue.get()
self.p.stdin.write(str(s) + '\n\r')
pass
def enqueue_output(self):
if not self.p.stdout or self.p.stdout.closed:
return
out = self.p.stdout
for line in iter(out.readline, b''):
self.qo.put(line)
# out.flush()
def enqueue_err(self):
if not self.p.stderr or self.p.stderr.closed:
return
err = self.p.stderr
for line in iter(err.readline, b''):
self.qe.put(line)
def aggregate(self):
while (self.running):
self.update()
self.update()
def update(self):
line = ""
try:
while self.qe.not_empty:
line = self.qe.get_nowait() # or q.get(timeout=.1)
self.unbblocked_err += line
except Queue.Empty:
pass
line = ""
try:
while self.qo.not_empty:
line = self.qo.get_nowait() # or q.get(timeout=.1)
self.unbblocked_out += line
except Queue.Empty:
pass
while not self.stdin_queue.empty():
s = self.stdin_queue.get()
self.p.stdin.write(str(s))
def get_stdout(self, clear=True):
ret = self.unbblocked_out
if clear:
self.unbblocked_out = ""
return ret
def has_stdout(self):
ret = self.get_stdout(False)
if ret == '':
return None
else:
return ret
def get_stderr(self, clear=True):
ret = self.unbblocked_out
if clear:
self.unbblocked_out = ""
return ret
def has_stderr(self):
ret = self.get_stdout(False)
if ret == '':
return None
else:
return ret
def __init__(self, subp):
'''This is a simple class that collects and aggregates the
output from a subprocess so that you can more reliably use
the class without having to block for subprocess.communicate.'''
self.p = subp
self.unbblocked_out = ""
self.unbblocked_err = ""
self.running = True
self.qo = Queue.Queue()
self.to = threading.Thread(name="out_read",
target=self.enqueue_output,
args=())
self.to.daemon = True # thread dies with the program
self.to.start()
self.qe = Queue.Queue()
self.te = threading.Thread(name="err_read",
target=self.enqueue_err,
args=())
self.te.daemon = True # thread dies with the program
self.te.start()
self.stdin_queue = Queue.Queue()
self.aggregator = threading.Thread(name="aggregate",
target=self.aggregate,
args=())
self.aggregator.daemon = True # thread dies with the program
self.aggregator.start()
pass
You may not need the whole example, but feel free to cut copy and paste what you need. It's also important to show how I did the threading.
The code looks more complicated than the task requires. I don't see why do you need to call process.poll() or queue.get_nowait() here. To deliver subprocess' stdout/stderr to several sinks; you could start with teed_call() that accepts arbitrary file-like objects: you could pass logfiles and special file-like objects that accumulates errors in theirs .write() methods.
To fix your code with minimal changes; you should call .join() on the reader threads (even if process.poll() is not None i.e., the subprocess exited; there could be some pending output. Joining reader's threads ensures that all output is read).
I'm starting pool of workers and submitting jobs to this pool. Each process creates subprocess with browser, waits page loading and then takes a screenshot. Sometimes Opera shows crash dialog for incorrect terminated session. For avoiding this I'm killing tab through xkill and waiting browser termination. Now I need to make correct handling for SIGTERM signal. After signal was set and handled in a sig_handler function, I prevent submitting new jobs with pool.close() and waiting pool termination with pool.join(). When pool not running any subproccesses main process terminates normally, but when pool has a subprocess all worker processes terminates without waiting browser termination. How can I normally terminate my main process?
#!/usr/bin/env python
#
# http://bugs.python.org/issue6766 in functions manager data packed by pickle
#
import redis
import pickle
import getopt
import time
import logging
import os
import sys
import pwd
import subprocess
import re
import urllib2
import signal
import multiprocessing
import httplib
# Define regexps
xvfb_reg = re.compile(r'Xvfb :(\d+)')
browser_reg = re.compile(r'0x(\d+) .* \("opera" "Opera"\) 1024x768')
running = True
def sig_handler(signum, frame):
"""
Set termination flag
"""
global running
running = False
return
def check_url_code(url):
"""
Try fetch url before processing.
Return True if returned request code is 200 OK else False
"""
try:
url = urllib2.urlopen(url)
code = url.getcode()
if code == 200:
return True
else:
return False
except (urllib2.URLError, httplib.InvalidURL, ValueError):
return False
def list_display():
"""
Get working virtual framebuffers
"""
proc = subprocess.Popen(['/bin/ps', 'ax'], stdout=subprocess.PIPE)
return xvfb_reg.findall(proc.communicate()[0])
def get_display(queue, lock):
"""
Get display for opera instance.
"""
while True:
lock.acquire()
_queue = pickle.loads(queue['q'])
free = list(set(_queue['displays']).difference(_queue['locked_displays']))
if len(free):
_queue['locked_displays'].append(free[0])
queue['q'] = pickle.dumps(_queue)
lock.release()
return free[0]
lock.release()
time.sleep(3)
def get_screenshot(data, display):
"""
Fork background opera process and then search window with url.
Wait for 30 seconds and take screenshot of the window.
xkill killing opera window, cuz without opened tabs opera will be terminated.
"""
try:
os.remove('.opera/{0}/sessions/autosave.win'.format(display))
except:
pass
proc = subprocess.Popen(['/usr/bin/opera', '-geometry', '1024x768+0+0', '-fullscreen', '-display', ':{0}'.format(display), '-pd', '.opera/{0}'.format(display), data['url']])
time.sleep(10)
if int(data['size']) == 120:
geometry = '120x90'
elif int(data['size']) == 240:
geometry = '240x151'
elif int(data['size']) == 400:
geometry = '400x300'
try:
os.makedirs(data['path'])
except OSError:
pass
xwin_proc = subprocess.Popen(['/usr/bin/xwininfo', '-display', ':{0}'.format(display), '-root', '-tree'], stdout=subprocess.PIPE)
xwin_info = xwin_proc.communicate()[0]
window = browser_reg.findall(xwin_info)[0]
time.sleep(5)
pimport = subprocess.Popen(['/usr/bin/import', '-display', ':{0}'.format(display), '-window', 'root', '-resize', geometry, data['file']], stdout=subprocess.PIPE)
pimport.wait()
logging.info('Screenshot {0} for {1}: display={2}, window=0x{3}, file={4}'.format(geometry, data['url'], display, window, data['file']))
pxkill = subprocess.Popen(['/usr/bin/xkill', '-display', ':{0}'.format(display), '-id', '0x{0}'.format(window)])
proc.wait()
def worker_process(data, display, lock, connection, queue):
"""
Return data for callback function for freeing display and url
"""
get_screenshot(data, display)
lock.acquire()
_queue = pickle.loads(queue['q'])
_queue['locked_displays'].remove(display)
queue['q'] = pickle.dumps(_queue)
lock.release()
connection.hdel('jobs', data['md5_url'])
connection.hincrby('stats', 'completed', 1)
return
def main(pool, queue, lock, connection, job):
"""
Checking for file has been created early in another queue, url and url locks
"""
data = pickle.loads(job)
if os.path.isfile(data['path']):
connection.hdel('jobs', data['md5_url'])
return
lock.acquire()
_queue = pickle.loads(queue['q'])
if not check_url_code(data['url']):
logging.error('Error fetching {0}'.format(data['url']))
lock.release()
connection.hdel('jobs', data['md5_url'])
return
lock.release()
display = get_display(queue, lock)
pool.apply_async(worker_process, args = (data, display, lock, connection, queue))
def create_daemon(home):
try:
pid = os.fork()
except OSError:
sys.exit('Can not demonize process')
if pid == 0:
os.setsid()
try:
pid = os.fork()
except OSError:
sys.exit('Can not demonize process')
if pid == 0:
os.chdir(home)
os.umask(0)
else:
os._exit(0)
else:
os._exit(0)
import resource
maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
if (maxfd == resource.RLIM_INFINITY):
maxfd = 1024
for fd in range(0, maxfd):
try:
os.close(fd)
except OSError:
pass
if hasattr(os, 'devnull'):
console = os.devnull
else:
console = '/dev/null'
os.open(console, os.O_RDWR)
os.dup2(0, 1)
os.dup2(0, 2)
return (0)
def help():
print """
Usage: {0} --u screenshot -l /var/log/screenshot/server.log -p /var/run/screenshot.pid
--user Set unprivileged user for process. This user can't be nobody, because script
-u reads home directory from passwd and uses it for Chrome user data dirs.
--log Set log file.
-l
--pid Set pid file.
-p
--help This help.
-h
""".format(sys.argv[0])
if __name__ == '__main__':
log_file = '/var/log/screenshot/server.log'
pid_file = '/var/run/screenshot.pid'
user = None
try:
opts, args = getopt.getopt(sys.argv[1:], 'l:p:u:h', ['log', 'pid', 'user', 'help'])
except getopt.GetoptError:
help()
sys.exit(2)
for opt, arg in opts:
if opt in ('-h', '--help'):
help()
sys.exit()
elif opt in ('-l', '--log'):
log_file = arg
elif opt in ('-p', '--pid'):
pid_file = arg
elif opt in ('-u', '--user'):
user = arg
if user:
if not os.geteuid() == 0:
sys.exit('You need root privileges to set user')
try:
userl = pwd.getpwnam(user)
uid = userl.pw_uid
home = userl.pw_dir
except KeyError:
sys.exit('User {0} does not exist'.format(user))
os.setuid(uid)
os.chdir(home)
else:
sys.exit('You must set user')
# Fork child process for demonization
retval = create_daemon(home)
# Write pid to pidfile
pid = os.getpid()
open(pid_file, 'w').write(str(pid))
# Open logfile
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
datefmt='%m-%d %H:%M',
filename=log_file)
logging.info('Starting server with pid {0}'.format(os.getpid()))
#
# Get working displays and start subprocesses
displays = list_display()
logging.info('Found displays: {0}'.format(' '.join(displays)))
pool = multiprocessing.Pool(processes=len(displays))
queue = multiprocessing.Manager().dict()
queue['q'] = pickle.dumps({
'displays' : displays,
'termination' : False,
'locked_displays' : []})
lock = multiprocessing.Manager().Lock()
connection = redis.Redis('localhost')
# Handle termination signals
signal.signal(signal.SIGTERM, sig_handler)
while running:
job = connection.lpop('high_priority')
if job is None:
job = connection.rpop('low_priority')
if not job is None:
main(pool, queue, lock, connection, job)
else:
time.sleep(5)
logging.info('Server stopped')
pool.close()
pool.join()
os._exit(0)