How do you wrap a bash shell session in a Python script so that Python can store the stdout and stderr to a database, and occasionally write to stdin?
I tried using subprocess with a tee-like Python class to redirect the IO, but it seems to use fileno to bypass Python entirely.
shell.py:
import os
import sys
from StringIO import StringIO
from subprocess import Popen, PIPE
class TeeFile(StringIO):
def __init__(self, file, auto_flush=False):
#super(TeeFile, self).__init__()
StringIO.__init__(self)
self.file = file
self.auto_flush = auto_flush
self.length = 0
def write(self, s):
print 'writing' # This is never called!!!
self.length += len(s)
self.file.write(s)
#super(TeeFile, self).write(s)
StringIO.write(self, s)
if self.auto_flush:
self.file.flush()
def flush(self):
self.file.flush()
StringIO.flush(self)
def fileno(self):
return self.file.fileno()
cmd = ' '.join(sys.argv[1:])
stderr = TeeFile(sys.stderr, True)
stdout = TeeFile(sys.stdout, True)
p = Popen(cmd, shell=True, stdin=PIPE, stdout=stdout, stderr=stderr, close_fds=True)
e.g. Running python shell.py ping google.com runs the correct command and shows output, but Python never sees the stdout.
#!/usr/bin/env python
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
from twisted.internet import protocol
from twisted.internet import reactor
import re
class MyPP(protocol.ProcessProtocol):
def __init__(self, verses):
self.verses = verses
self.data = ""
def connectionMade(self):
print "connectionMade!"
for i in range(self.verses):
self.transport.write("Aleph-null bottles of beer on the wall,\n" +
"Aleph-null bottles of beer,\n" +
"Take one down and pass it around,\n" +
"Aleph-null bottles of beer on the wall.\n")
self.transport.closeStdin() # tell them we're done
def outReceived(self, data):
print "outReceived! with %d bytes!" % len(data)
self.data = self.data + data
def errReceived(self, data):
print "errReceived! with %d bytes!" % len(data)
def inConnectionLost(self):
print "inConnectionLost! stdin is closed! (we probably did it)"
def outConnectionLost(self):
print "outConnectionLost! The child closed their stdout!"
# now is the time to examine what they wrote
#print "I saw them write:", self.data
(dummy, lines, words, chars, file) = re.split(r'\s+', self.data)
print "I saw %s lines" % lines
def errConnectionLost(self):
print "errConnectionLost! The child closed their stderr."
def processExited(self, reason):
print "processExited, status %d" % (reason.value.exitCode,)
def processEnded(self, reason):
print "processEnded, status %d" % (reason.value.exitCode,)
print "quitting"
reactor.stop()
pp = MyPP(10)
reactor.spawnProcess(pp, "wc", ["wc"], {})
reactor.run()
Thats the Twisted way to handle command IO as a protocol. Btw you too complicate your script with StringIO. Rather check Popen.communicate() method. Note that stdin/stdout are file descriptors and need to be read in parallel cause their buffers will overflow if longer output. If you want to stream huge data through this, rather use that Twisted way, or in case Popen way fire separate thread for reading stdout may be by lines and putting them to DB immediately.
Twisted howto on process protocol.
Related
My goal : To read the latest "chunk" (N lines) of streaming stdout every M seconds from a subprocess.
Current code:
start the subprocess
reads stdout
once I have a chunk of N lines, print it out (or save as current chunk)
wait M seconds
repeat
I have also put code for the moment to terminate the subprocess (which is an endless stream until you hit Ctrl-C)
What I want to achieve is after I wait for M seconds, if for it to always read the latest N lines and not the subsequent N lines in stdout (they can be discarded as I'm only interested in the latest)
My end goal would be to spawn a thread to run the process and keep saving the latest lines and then call from the main process whenever I need the latest results of the stream.
Any help would be greatly appreciated!
#!/usr/bin/env python3
import signal
import time
from subprocess import Popen, PIPE
sig = signal.SIGTERM
N=9
M=5
countlines=0
p = Popen(["myprogram"], stdout=PIPE, bufsize=1, universal_newlines=True)
chunk=[]
for line in p.stdout:
countlines+=1
chunk.append(line)
if len(chunk)==N:
print(chunk)
chunk=[]
time.sleep(M)
if countlines>100:
p.send_signal(sig)
break
print("done")
After much searching, I stumbled upon a solution here:
https://eli.thegreenplace.net/2017/interacting-with-a-long-running-child-process-in-python/
Eli's "Launch, interact, get output in real time, terminate" code section worked for me.
So far its the most elegant solution I've found.
Adapted to my problem above, and written within a class (not shown here):
def output_reader(self,proc):
chunk=[]
countlines=0
for line in iter(proc.stdout.readline, b''):
countlines+=1
chunk.append(line.decode("utf-8"))
if countlines==N:
self.current_chunk = chunk
chunk=[]
countlines=0
def main():
proc = subprocess.Popen(['myprocess'],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
t = threading.Thread(target=output_reader, args=(proc,))
t.start()
try:
time.sleep(0.2)
for i in range(10):
time.sleep(1) # waits a while before getting latest lines
print(self.current_chunk)
finally:
proc.terminate()
try:
proc.wait(timeout=0.2)
print('== subprocess exited with rc =', proc.returncode)
except subprocess.TimeoutExpired:
print('subprocess did not terminate in time')
t.join()
Here is another possible solution. It is a program that you would run as a separate process in the pipeline, which presents a REST API that when queried will return the last N lines that it read on stdin (where N and the port number are supplied on stdin). It is using run in flask so should not be used in situations where the outside world has access to the local server port to make requests, though this could be adapted.
import sys
import time
import threading
import argparse
from flask import Flask, request
from flask_restful import Resource, Api
class Server:
def __init__(self):
self.data = {'at_eof': False,
'lines_read': 0,
'latest_lines': []}
self.thread = None
self.args = None
self.stop = False
def parse_args(self):
parser = argparse.ArgumentParser()
parser.add_argument("num_lines", type=int,
help="number of lines to cache")
parser.add_argument("port", type=int,
help="port to serve on")
self.args = parser.parse_args()
def start_updater(self):
def updater():
lines = self.data['latest_lines']
while True:
if self.stop:
return
line = sys.stdin.readline()
if not line:
break
self.data['lines_read'] += 1
lines.append(line)
while len(lines) > self.args.num_lines:
lines.pop(0)
self.data['at_eof'] = True
self.thread = threading.Thread(target=updater)
self.thread.start()
def get_data(self):
return self.data
def shutdown(self):
self.stop = True
func = request.environ.get('werkzeug.server.shutdown')
if func:
func()
return 'Shutting down'
else:
return 'shutdown failed'
def add_apis(self, app):
class GetData(Resource):
get = self.get_data
class Shutdown(Resource):
get = self.shutdown
api = Api(app)
api.add_resource(GetData, "/getdata")
api.add_resource(Shutdown, "/shutdown")
def run(self):
self.parse_args()
self.start_updater()
app = Flask(__name__)
self.add_apis(app)
app.run(port=self.args.port)
server = Server()
server.run()
Example usage: here is a test program whose output we want to serve:
import sys
import time
for i in range(100):
print("this is line {}".format(i))
sys.stdout.flush()
time.sleep(.1)
And a simple pipeline to launch it (here from the linux shell prompt but could be done via subprocess.Popen), serving the last 5 lines, on port 8001:
python ./writer.py | python ./server.py 5 8001
An example query, here using curl as the client but it could be done via Python requests:
$ curl -s http://localhost:8001/getdata
{"at_eof": false, "lines_read": 30, "latest_lines": ["this is line 25\n", "this is line 26\n", "this is line 27\n", "this is line 28\n", "this is line 29\n"]}
The server also provides an http://localhost:<port>/shutdown URL to terminate it, though if you call it before you first see "at_eof": true, then expect the writer to die with a broken pipe.
I am using multiprocessing package to spawn a second process from which I would like to redirect stdout and stderr into the first process. I am using multiprocessing.Pipe object:
dup2(output_pipe.fileno(), 1)
Where output_pipe is an instance of multiprocessing.Pipe. However, when I try to read on the other end, it just hangs. I tried reading using Pipe.recv_bytes with a limit, but that raises an OSError. Is this possible at all or should I just switch to some lower level pipe functions?
After experimenting in Python 2.7 I got this working example. With os.dup2 pipe's file descriptor is copied to standard output file descriptor, and each print function ends up writing to a pipe.
import os
import multiprocessing
def tester_method(w):
os.dup2(w.fileno(), 1)
for i in range(3):
print 'This is a message!'
if __name__ == '__main__':
r, w = multiprocessing.Pipe()
reader = os.fdopen(r.fileno(), 'r')
process = multiprocessing.Process(None, tester_method, 'TESTER', (w,))
process.start()
for i in range(3):
print 'From pipe: %s' % reader.readline()
reader.close()
process.join()
Output:
From pipe: This is a message!
From pipe: This is a message!
From pipe: This is a message!
The existing answer works for the raw file descriptors, but this may be useful for using Pipe.send() and recv:
class PipeTee(object):
def __init__(self, pipe):
self.pipe = pipe
self.stdout = sys.stdout
sys.stdout = self
def write(self, data):
self.stdout.write(data)
self.pipe.send(data)
def flush(self):
self.stdout.flush()
def __del__(self):
sys.stdout = self.stdout
To use this, create the object in your multiprocess function, pass it the write side of multiprocessing.Pipe, and then use the read side on the parent process with recv, using poll to check if data exists.
The function glib.spawn_async allows you to hook three callbacks which are called on event on stdout, stderr, and on process completion.
How can I mimic the same functionality with subprocess with either threads or asyncio?
I am more interested in the functionality rather than threading/asynio but an answer that contains both will earn a bounty.
Here is a toy program that shows what I want to do:
import glib
import logging
import os
import gtk
class MySpawn(object):
def __init__(self):
self._logger = logging.getLogger(self.__class__.__name__)
def execute(self, cmd, on_done, on_stdout, on_stderr):
self.pid, self.idin, self.idout, self.iderr = \
glib.spawn_async(cmd,
flags=glib.SPAWN_DO_NOT_REAP_CHILD,
standard_output=True,
standard_error=True)
fout = os.fdopen(self.idout, "r")
ferr = os.fdopen(self.iderr, "r")
glib.child_watch_add(self.pid, on_done)
glib.io_add_watch(fout, glib.IO_IN, on_stdout)
glib.io_add_watch(ferr, glib.IO_IN, on_stderr)
return self.pid
if __name__ == '__main__':
logging.basicConfig(format='%(thread)d %(levelname)s: %(message)s',
level=logging.DEBUG)
cmd = '/usr/bin/git ls-remote https://github.com/DiffSK/configobj'.split()
def on_done(pid, retval, *args):
logging.info("That's all folks!…")
def on_stdout(fobj, cond):
"""This blocks which is fine for this toy example…"""
for line in fobj.readlines():
logging.info(line.strip())
return True
def on_stderr(fobj, cond):
"""This blocks which is fine for this toy example…"""
for line in fobj.readlines():
logging.error(line.strip())
return True
runner = MySpawn()
runner.execute(cmd, on_done, on_stdout, on_stderr)
try:
gtk.main()
except KeyboardInterrupt:
print('')
I should add that since readlines() is blocking, the above will buffer all the output and send it at once. If this is not what one wants, then you have to use readline() and make sure that on end of command you finish reading all the lines you did not read before.
asyncio has subprocess_exec, there is no need to use the subprocess module at all:
import asyncio
class Handler(asyncio.SubprocessProtocol):
def pipe_data_received(self, fd, data):
# fd == 1 for stdout, and 2 for stderr
print("Data from /bin/ls on fd %d: %s" % (fd, data.decode()))
def pipe_connection_lost(self, fd, exc):
print("Connection lost to /bin/ls")
def process_exited(self):
print("/bin/ls is finished.")
loop = asyncio.get_event_loop()
coro = loop.subprocess_exec(Handler, "/bin/ls", "/")
loop.run_until_complete(coro)
loop.close()
With subprocess and threading, it's simple as well. You can just spawn a thread per pipe, and one to wait() for the process:
import subprocess
import threading
class PopenWrapper(object):
def __init__(self, args):
self.process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.DEVNULL)
self.stdout_reader_thread = threading.Thread(target=self._reader, args=(self.process.stdout,))
self.stderr_reader_thread = threading.Thread(target=self._reader, args=(self.process.stderr,))
self.exit_watcher = threading.Thread(target=self._exit_watcher)
self.stdout_reader_thread.start()
self.stderr_reader_thread.start()
self.exit_watcher.start()
def _reader(self, fileobj):
for line in fileobj:
self.on_data(fileobj, line)
def _exit_watcher(self):
self.process.wait()
self.stdout_reader_thread.join()
self.stderr_reader_thread.join()
self.on_exit()
def on_data(self, fd, data):
return NotImplementedError
def on_exit(self):
return NotImplementedError
def join(self):
self.process.wait()
class LsWrapper(PopenWrapper):
def on_data(self, fd, data):
print("Received on fd %r: %s" % (fd, data))
def on_exit(self):
print("Process exited.")
LsWrapper(["/bin/ls", "/"]).join()
However, mind that glib does not use threads to asynchroneously execute your callbacks. It uses an event loop, just as asyncio does. The idea is that at the core of your program is a loop that waits until something happens, and then synchronously executes an associated callback. In your case, that's "data becomes available for reading on one of the pipes", and "the subprocess has exited". In general, its also stuff like "the X11-server reported mouse movement", "there's incoming network traffic", etc. You can emulate glib's behaviour by writing your own event loop. Use the select module on the two pipes. If select reports that the pipes are readable, but read returns no data, the process likely exited - call the poll() method on the subprocess object in this case to check whether it is completed, and call your exit callback if it has, or an error callback elsewise.
This question already has answers here:
Stop reading process output in Python without hang?
(5 answers)
Closed 8 years ago.
i'm trying to build a python script, which opens a subprocess (bash script) and reads "stdout" into a variable during 10 seconds. After 10 sec i need to transfer data to server via POST request.
How to make POST request i know, but how to collect "stdout" during 10 seconds?
i find a lot of examples how to use "Popen", launch bash script and read stderr instantly without biffering, but how to collect output during some time and release is partially?
I think this solution with two threads with simple responsibilities is clean and elegant.
import os
import subprocess
import threading
import functools
from time import sleep
class OutputMonitor(threading.Thread):
""" Start the subprocess in separate thread and append it's output to a buffer. """
def __init__(self, cmd):
super(OutputMonitor, self).__init__()
self.daemon = True
self.cmd = cmd
self.buffer = ''
self.buflock = threading.Lock()
def run(self):
popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
while popen.poll() is None:
data = popen.stdout.read(4)
if data != "":
with self.buflock:
self.buffer += data
def get_current_output(self):
with self.buflock:
buf = self.buffer
self.buffer = ""
return buf
class OutputHandler(threading.Thread):
"""
Start a thread responsible for tracking subprocess output, and periodically
check if it has produced new output. If so, call handler to process this data.
"""
def __init__(self, cmd, interval, filepath):
super(OutputHandler, self).__init__()
self.om = OutputMonitor(cmd)
self.interval = interval
# Replace it with your handler init...
self.filepath = filepath
if os.path.exists(self.filepath):
os.unlink(self.filepath)
def run(self):
self.om.start()
while self.om.is_alive():
sleep(self.interval)
data = self.om.get_current_output()
self._handle_data_chunk(data)
def _handle_data_chunk(self, data):
# Replace it with you handling.
with open(self.filepath, 'a') as f:
f.write(data)
if __name__ == '__main__':
logfile_path = "C:\\log.txt"
interval = 5
cmd = ['ping', 'n', '10', '127.0.0.1']
oh = OutputHandler(cmd, interval, logfile_path)
oh.start()
oh.join()
You could do something similar to what is below:
point the subprocess to output to console output
catch the output in a variable which is common to posting and capturing function
setup the thread to post the logs every 10 seconds
import threading, sys, subprocess
out = ""
def postLogs():
print out
#do your posting here
threading.Timer(10.0, postLogs).start() #execute this function every 10 seconds
proc = subprocess.Popen("ping google.com", shell=True,stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
while proc.poll() is None:
out = proc.stdout.readline()
sys.stdout.flush
if out != "":
postLogs()
okay lets continue with mrad's script
i edit it just a little. Added writing to file function and it works perfect. with
ping google.com
BUT it do not work with command which i need... i need to launch ffmpeg. Command which i need is
ffmpeg -i "my rtsp link" -vcodec copy -loglevel verbose -an -f flv "my RTMP link"
when i put my command inside this code 1- i see output instantly. 2- nothing saved in the file (
import subprocess
import threading
from datetime import datetime
from time import sleep
from Queue import Queue
class Monitor(threading.Thread):
def __init__(self, queue, cmd):
super(Monitor, self).__init__()
self.queue = queue
self.cmd = cmd
def run(self):
popen = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, shell=True)
while popen.poll() is None:
line = popen.stdout.readline()
self.queue.put(line)
def foo(cmd, interval):
q = Queue()
m = Monitor(q, cmd)
m.start()
while m.is_alive():
sleep(interval)
current_queue_length = q.qsize()
chunk = ''
for i in xrange(current_queue_length):
chunk += q.get()
print chunk
f=open("/home/pi/raf/log.txt","a") #trying to write to log
f.write(chunk)
f.close()
if __name__ == '__main__':
cmd = 'ping google.com'
interval = 3
foo(cmd, interval)
I am trying to create a stream object that triggers a callback function any time data is written to it.
class MonitoredStream():
def __init__(self, outstream, callback):
self.outstream = outstream
self.callback = callback
def write(self, s):
self.callback(s)
self.outstream.write(s)
def __getattr__(self, attr):
return getattr(self.outstream, attr)
This works fine when I call the write method directly, but I would love to have it work also when I have a subprocess' output hooked to the stream. For example:
def f(s):
print("Write")
p = sub.Popen(["sh", "test.sh"], stdout=MonitoredStream(sys.stdout, f))
p.communicate()
This just sends output directly to sys.stdout, bypassing the write function completely. Is there a way that I can monitor this output also?
I believe the issue here is that subprocess.Popen doesn't use the Python interface to the pipe - it instead gets the file descriptor and then uses that to write to the pipe directly, which, as you give the attributes of the stdout pipe, means it uses that, bypassing your code.
My best guess at solving this is to make a new in-between pipe that sits in the middle to let you deal with the stream yourself. I would implement this as a context manager:
import sys
import os
from subprocess import Popen
from contextlib import contextmanager
#contextmanager
def monitor(stream, callback):
read, write = os.pipe()
yield write
os.close(write)
with os.fdopen(read) as f:
for line in f:
callback(line)
stream.write(line)
def f(s):
print("Write")
with monitor(sys.stdout, f) as stream:
p = Popen(["ls"], stdout=stream)
p.communicate()
Although you could, of course, still use a class:
import sys
import os
from subprocess import Popen
class MonitoredStream():
def __init__(self, stream, callback):
self.stream = stream
self.callback = callback
self._read, self._write = os.pipe()
def fileno(self):
return self._write
def process(self):
os.close(self._write)
with os.fdopen(self._read) as f:
for line in f:
self.callback(line)
self.stream.write(line)
def f(s):
print("Write")
stream = MonitoredStream(sys.stdout, f)
p = Popen(["ls"], stdout=stream)
p.communicate()
print(stream.process())
Although I feel this is less elegant.