python: multiprocessing.Pipe and redirecting stdout

python: multiprocessing.Pipe and redirecting stdout - python

I am using multiprocessing package to spawn a second process from which I would like to redirect stdout and stderr into the first process. I am using multiprocessing.Pipe object:
dup2(output_pipe.fileno(), 1)
Where output_pipe is an instance of multiprocessing.Pipe. However, when I try to read on the other end, it just hangs. I tried reading using Pipe.recv_bytes with a limit, but that raises an OSError. Is this possible at all or should I just switch to some lower level pipe functions?

After experimenting in Python 2.7 I got this working example. With os.dup2 pipe's file descriptor is copied to standard output file descriptor, and each print function ends up writing to a pipe.
import os
import multiprocessing
def tester_method(w):
os.dup2(w.fileno(), 1)
for i in range(3):
print 'This is a message!'
if __name__ == '__main__':
r, w = multiprocessing.Pipe()
reader = os.fdopen(r.fileno(), 'r')
process = multiprocessing.Process(None, tester_method, 'TESTER', (w,))
process.start()
for i in range(3):
print 'From pipe: %s' % reader.readline()
reader.close()
process.join()
Output:
From pipe: This is a message!
From pipe: This is a message!
From pipe: This is a message!

The existing answer works for the raw file descriptors, but this may be useful for using Pipe.send() and recv:
class PipeTee(object):
def __init__(self, pipe):
self.pipe = pipe
self.stdout = sys.stdout
sys.stdout = self
def write(self, data):
self.stdout.write(data)
self.pipe.send(data)
def flush(self):
self.stdout.flush()
def __del__(self):
sys.stdout = self.stdout
To use this, create the object in your multiprocess function, pass it the write side of multiprocessing.Pipe, and then use the read side on the parent process with recv, using poll to check if data exists.

Related

Mimicing glib.spawn_async with Popen…

The function glib.spawn_async allows you to hook three callbacks which are called on event on stdout, stderr, and on process completion.
How can I mimic the same functionality with subprocess with either threads or asyncio?
I am more interested in the functionality rather than threading/asynio but an answer that contains both will earn a bounty.
Here is a toy program that shows what I want to do:
import glib
import logging
import os
import gtk
class MySpawn(object):
def __init__(self):
self._logger = logging.getLogger(self.__class__.__name__)
def execute(self, cmd, on_done, on_stdout, on_stderr):
self.pid, self.idin, self.idout, self.iderr = \
glib.spawn_async(cmd,
flags=glib.SPAWN_DO_NOT_REAP_CHILD,
standard_output=True,
standard_error=True)
fout = os.fdopen(self.idout, "r")
ferr = os.fdopen(self.iderr, "r")
glib.child_watch_add(self.pid, on_done)
glib.io_add_watch(fout, glib.IO_IN, on_stdout)
glib.io_add_watch(ferr, glib.IO_IN, on_stderr)
return self.pid
if __name__ == '__main__':
logging.basicConfig(format='%(thread)d %(levelname)s: %(message)s',
level=logging.DEBUG)
cmd = '/usr/bin/git ls-remote https://github.com/DiffSK/configobj'.split()
def on_done(pid, retval, *args):
logging.info("That's all folks!…")
def on_stdout(fobj, cond):
"""This blocks which is fine for this toy example…"""
for line in fobj.readlines():
logging.info(line.strip())
return True
def on_stderr(fobj, cond):
"""This blocks which is fine for this toy example…"""
for line in fobj.readlines():
logging.error(line.strip())
return True
runner = MySpawn()
runner.execute(cmd, on_done, on_stdout, on_stderr)
try:
gtk.main()
except KeyboardInterrupt:
print('')
I should add that since readlines() is blocking, the above will buffer all the output and send it at once. If this is not what one wants, then you have to use readline() and make sure that on end of command you finish reading all the lines you did not read before.

asyncio has subprocess_exec, there is no need to use the subprocess module at all:
import asyncio
class Handler(asyncio.SubprocessProtocol):
def pipe_data_received(self, fd, data):
# fd == 1 for stdout, and 2 for stderr
print("Data from /bin/ls on fd %d: %s" % (fd, data.decode()))
def pipe_connection_lost(self, fd, exc):
print("Connection lost to /bin/ls")
def process_exited(self):
print("/bin/ls is finished.")
loop = asyncio.get_event_loop()
coro = loop.subprocess_exec(Handler, "/bin/ls", "/")
loop.run_until_complete(coro)
loop.close()
With subprocess and threading, it's simple as well. You can just spawn a thread per pipe, and one to wait() for the process:
import subprocess
import threading
class PopenWrapper(object):
def __init__(self, args):
self.process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.DEVNULL)
self.stdout_reader_thread = threading.Thread(target=self._reader, args=(self.process.stdout,))
self.stderr_reader_thread = threading.Thread(target=self._reader, args=(self.process.stderr,))
self.exit_watcher = threading.Thread(target=self._exit_watcher)
self.stdout_reader_thread.start()
self.stderr_reader_thread.start()
self.exit_watcher.start()
def _reader(self, fileobj):
for line in fileobj:
self.on_data(fileobj, line)
def _exit_watcher(self):
self.process.wait()
self.stdout_reader_thread.join()
self.stderr_reader_thread.join()
self.on_exit()
def on_data(self, fd, data):
return NotImplementedError
def on_exit(self):
return NotImplementedError
def join(self):
self.process.wait()
class LsWrapper(PopenWrapper):
def on_data(self, fd, data):
print("Received on fd %r: %s" % (fd, data))
def on_exit(self):
print("Process exited.")
LsWrapper(["/bin/ls", "/"]).join()
However, mind that glib does not use threads to asynchroneously execute your callbacks. It uses an event loop, just as asyncio does. The idea is that at the core of your program is a loop that waits until something happens, and then synchronously executes an associated callback. In your case, that's "data becomes available for reading on one of the pipes", and "the subprocess has exited". In general, its also stuff like "the X11-server reported mouse movement", "there's incoming network traffic", etc. You can emulate glib's behaviour by writing your own event loop. Use the select module on the two pipes. If select reports that the pipes are readable, but read returns no data, the process likely exited - call the poll() method on the subprocess object in this case to check whether it is completed, and call your exit callback if it has, or an error callback elsewise.

How to seamlessly wrap Bash shell IO in Python

How do you wrap a bash shell session in a Python script so that Python can store the stdout and stderr to a database, and occasionally write to stdin?
I tried using subprocess with a tee-like Python class to redirect the IO, but it seems to use fileno to bypass Python entirely.
shell.py:
import os
import sys
from StringIO import StringIO
from subprocess import Popen, PIPE
class TeeFile(StringIO):
def __init__(self, file, auto_flush=False):
#super(TeeFile, self).__init__()
StringIO.__init__(self)
self.file = file
self.auto_flush = auto_flush
self.length = 0
def write(self, s):
print 'writing' # This is never called!!!
self.length += len(s)
self.file.write(s)
#super(TeeFile, self).write(s)
StringIO.write(self, s)
if self.auto_flush:
self.file.flush()
def flush(self):
self.file.flush()
StringIO.flush(self)
def fileno(self):
return self.file.fileno()
cmd = ' '.join(sys.argv[1:])
stderr = TeeFile(sys.stderr, True)
stdout = TeeFile(sys.stdout, True)
p = Popen(cmd, shell=True, stdin=PIPE, stdout=stdout, stderr=stderr, close_fds=True)
e.g. Running python shell.py ping google.com runs the correct command and shows output, but Python never sees the stdout.

#!/usr/bin/env python
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
from twisted.internet import protocol
from twisted.internet import reactor
import re
class MyPP(protocol.ProcessProtocol):
def __init__(self, verses):
self.verses = verses
self.data = ""
def connectionMade(self):
print "connectionMade!"
for i in range(self.verses):
self.transport.write("Aleph-null bottles of beer on the wall,\n" +
"Aleph-null bottles of beer,\n" +
"Take one down and pass it around,\n" +
"Aleph-null bottles of beer on the wall.\n")
self.transport.closeStdin() # tell them we're done
def outReceived(self, data):
print "outReceived! with %d bytes!" % len(data)
self.data = self.data + data
def errReceived(self, data):
print "errReceived! with %d bytes!" % len(data)
def inConnectionLost(self):
print "inConnectionLost! stdin is closed! (we probably did it)"
def outConnectionLost(self):
print "outConnectionLost! The child closed their stdout!"
# now is the time to examine what they wrote
#print "I saw them write:", self.data
(dummy, lines, words, chars, file) = re.split(r'\s+', self.data)
print "I saw %s lines" % lines
def errConnectionLost(self):
print "errConnectionLost! The child closed their stderr."
def processExited(self, reason):
print "processExited, status %d" % (reason.value.exitCode,)
def processEnded(self, reason):
print "processEnded, status %d" % (reason.value.exitCode,)
print "quitting"
reactor.stop()
pp = MyPP(10)
reactor.spawnProcess(pp, "wc", ["wc"], {})
reactor.run()
Thats the Twisted way to handle command IO as a protocol. Btw you too complicate your script with StringIO. Rather check Popen.communicate() method. Note that stdin/stdout are file descriptors and need to be read in parallel cause their buffers will overflow if longer output. If you want to stream huge data through this, rather use that Twisted way, or in case Popen way fire separate thread for reading stdout may be by lines and putting them to DB immediately.
Twisted howto on process protocol.

Monitored stream class

I am trying to create a stream object that triggers a callback function any time data is written to it.
class MonitoredStream():
def __init__(self, outstream, callback):
self.outstream = outstream
self.callback = callback
def write(self, s):
self.callback(s)
self.outstream.write(s)
def __getattr__(self, attr):
return getattr(self.outstream, attr)
This works fine when I call the write method directly, but I would love to have it work also when I have a subprocess' output hooked to the stream. For example:
def f(s):
print("Write")
p = sub.Popen(["sh", "test.sh"], stdout=MonitoredStream(sys.stdout, f))
p.communicate()
This just sends output directly to sys.stdout, bypassing the write function completely. Is there a way that I can monitor this output also?

I believe the issue here is that subprocess.Popen doesn't use the Python interface to the pipe - it instead gets the file descriptor and then uses that to write to the pipe directly, which, as you give the attributes of the stdout pipe, means it uses that, bypassing your code.
My best guess at solving this is to make a new in-between pipe that sits in the middle to let you deal with the stream yourself. I would implement this as a context manager:
import sys
import os
from subprocess import Popen
from contextlib import contextmanager
#contextmanager
def monitor(stream, callback):
read, write = os.pipe()
yield write
os.close(write)
with os.fdopen(read) as f:
for line in f:
callback(line)
stream.write(line)
def f(s):
print("Write")
with monitor(sys.stdout, f) as stream:
p = Popen(["ls"], stdout=stream)
p.communicate()
Although you could, of course, still use a class:
import sys
import os
from subprocess import Popen
class MonitoredStream():
def __init__(self, stream, callback):
self.stream = stream
self.callback = callback
self._read, self._write = os.pipe()
def fileno(self):
return self._write
def process(self):
os.close(self._write)
with os.fdopen(self._read) as f:
for line in f:
self.callback(line)
self.stream.write(line)
def f(s):
print("Write")
stream = MonitoredStream(sys.stdout, f)
p = Popen(["ls"], stdout=stream)
p.communicate()
print(stream.process())
Although I feel this is less elegant.

How to attach debugger to a python subproccess?

I need to debug a child process spawned by multiprocessing.Process(). The pdb degugger seems to be unaware of forking and unable to attach to already running processes.
Are there any smarter python debuggers which can be attached to a subprocess?

I've been searching for a simple to solution for this problem and came up with this:
import sys
import pdb
class ForkedPdb(pdb.Pdb):
"""A Pdb subclass that may be used
from a forked multiprocessing child
"""
def interaction(self, *args, **kwargs):
_stdin = sys.stdin
try:
sys.stdin = open('/dev/stdin')
pdb.Pdb.interaction(self, *args, **kwargs)
finally:
sys.stdin = _stdin
Use it the same way you might use the classic Pdb:
ForkedPdb().set_trace()

Winpdb is pretty much the definition of a smarter Python debugger. It explicitly supports going down a fork, not sure it works nicely with multiprocessing.Process() but it's worth a try.
For a list of candidates to check for support of your use case, see the list of Python Debuggers in the wiki.

This is an elaboration of Romuald's answer which restores the original stdin using its file descriptor. This keeps readline working inside the debugger. Besides, pdb special management of KeyboardInterrupt is disabled, in order it not to interfere with multiprocessing sigint handler.
class ForkablePdb(pdb.Pdb):
_original_stdin_fd = sys.stdin.fileno()
_original_stdin = None
def __init__(self):
pdb.Pdb.__init__(self, nosigint=True)
def _cmdloop(self):
current_stdin = sys.stdin
try:
if not self._original_stdin:
self._original_stdin = os.fdopen(self._original_stdin_fd)
sys.stdin = self._original_stdin
self.cmdloop()
finally:
sys.stdin = current_stdin

Building upon #memplex idea, I had to modify it to get it to work with joblib by setting the sys.stdin in the constructor as well as passing it directly along via joblib.
import os
import pdb
import signal
import sys
import joblib
_original_stdin_fd = None
class ForkablePdb(pdb.Pdb):
_original_stdin = None
_original_pid = os.getpid()
def __init__(self):
pdb.Pdb.__init__(self)
if self._original_pid != os.getpid():
if _original_stdin_fd is None:
raise Exception("Must set ForkablePdb._original_stdin_fd to stdin fileno")
self.current_stdin = sys.stdin
if not self._original_stdin:
self._original_stdin = os.fdopen(_original_stdin_fd)
sys.stdin = self._original_stdin
def _cmdloop(self):
try:
self.cmdloop()
finally:
sys.stdin = self.current_stdin
def handle_pdb(sig, frame):
ForkablePdb().set_trace(frame)
def test(i, fileno):
global _original_stdin_fd
_original_stdin_fd = fileno
while True:
pass
if __name__ == '__main__':
print "PID: %d" % os.getpid()
signal.signal(signal.SIGUSR2, handle_pdb)
ForkablePdb().set_trace()
fileno = sys.stdin.fileno()
joblib.Parallel(n_jobs=2)(joblib.delayed(test)(i, fileno) for i in range(10))

remote-pdb can be used to debug sub-processes. After installation, put the following lines in the code you need to debug:
import remote_pdb
remote_pdb.set_trace()
remote-pdb will print a port number which will accept a telnet connection for debugging that specific process. There are some caveats around worker launch order, where stdout goes when using various frontends, etc. To ensure a specific port is used (must be free and accessible to the current user), use the following instead:
from remote_pdb import RemotePdb
RemotePdb('127.0.0.1', 4444).set_trace()
remote-pdb may also be launched via the breakpoint() command in Python 3.7.

Just use PuDB that gives you an awesome TUI (GUI on terminal) and supports multiprocessing as follow:
from pudb import forked; forked.set_trace()

An idea I had was to create "dummy" classes to fake the implementation of the methods you are using from multiprocessing:
from multiprocessing import Pool
class DummyPool():
#staticmethod
def apply_async(func, args, kwds):
return DummyApplyResult(func(*args, **kwds))
def close(self): pass
def join(self): pass
class DummyApplyResult():
def __init__(self, result):
self.result = result
def get(self):
return self.result
def foo(a, b, switch):
# set trace when DummyPool is used
# import ipdb; ipdb.set_trace()
if switch:
return b - a
else:
return a - b
if __name__ == '__main__':
xml = etree.parse('C:/Users/anmendoza/Downloads/jim - 8.1/running-config.xml')
pool = DummyPool() # switch between Pool() and DummyPool() here
results = []
results.append(pool.apply_async(foo, args=(1, 100), kwds={'switch': True}))
pool.close()
pool.join()
results[0].get()

Here is the version of the ForkedPdb(Romuald's Solution) which will work for Windows and *nix based systems.
import sys
import pdb
import win32console
class MyHandle():
def __init__(self):
self.screenBuffer = win32console.GetStdHandle(win32console.STD_INPUT_HANDLE)
def readline(self):
return self.screenBuffer.ReadConsole(1000)
class ForkedPdb(pdb.Pdb):
def interaction(self, *args, **kwargs):
_stdin = sys.stdin
try:
if sys.platform == "win32":
sys.stdin = MyHandle()
else:
sys.stdin = open('/dev/stdin')
pdb.Pdb.interaction(self, *args, **kwargs)
finally:
sys.stdin = _stdin

The problem here is that Python always connects sys.stdin in the child process to os.devnull to avoid contention for the stream. But this means that when the debugger (or a simple input()) tries to connect to stdin to get input from the user, it immediately reaches end-of-file and reports an error.
One solution, at least if you don't expect multiple debuggers to run at the same time, is to reopen stdin in the child process. That can be done by setting sys.stdin to open(0), which always opens the active terminal. This in fact is what the ForkedPdb solution does, but it can be done more simply and in an os-independent manner like this:
import multiprocessing, sys
def main():
process = multiprocessing.Process(target=worker)
process.start()
process.join()
def worker():
# Python automatically closes sys.stdin for the subprocess, so we reopen
# stdin. This enables pdb to connect to the terminal and accept commands.
# See https://stackoverflow.com/a/30149635/3830997.
sys.stdin = open(0) # or os.fdopen(0)
print("Hello from the subprocess.")
breakpoint() # or import pdb; pdb.set_trace()
print("Exited from breakpoint in the subprocess.")
if __name__ == '__main__':
main()

If you are on a supported platform, try DTrace. Most of the BSD / Solaris / OS X family support DTrace.
Here is an intro by the author. You can use Dtrace to debug just about anything.
Here is a SO post on learning DTrace.

python interprocess querying/control

I have this Python based service daemon which is doing a lot of multiplexed IO (select).
From another script (also Python) I want to query this service daemon about status/information and/or control the processing (e.g. pause it, shut it down, change some parameters, etc).
What is the best way to send control messages ("from now on you process like this!") and query processed data ("what was the result of that?") using python?
I read somewhere that named pipes might work, but don't know that much about named pipes, especially in python - and whether there are any better alternatives.
Both the background service daemon AND the frontend will be programmed by me, so all options are open :)
I am using Linux.

Pipes and Named pipes are good solution to communicate between different processes.
Pipes work like shared memory buffer but has an interface that mimics a simple file on each of two ends. One process writes data on one end of the pipe, and another reads that data on the other end.
Named pipes are similar to above , except that this pipe is actually associated with a real file in your computer.
More details at
http://www.softpanorama.org/Scripting/pipes.shtml
In Python, named pipe files are created with the os.mkfifo call
x = os.mkfifo(filename)
In child and parent open this pipe as file
out = os.open(filename, os.O_WRONLY)
in = open(filename, 'r')
To write
os.write(out, 'xxxx')
To read
lines = in.readline( )
Edit: Adding links from SO
Create a temporary FIFO (named pipe) in Python?
https://stackoverflow.com/search?q=python+named+pipes
You may want to read more on "IPC and Python"
http://www.freenetpages.co.uk/hp/alan.gauld/tutipc.htm

The best way to do IPC is using message Queue in python as bellow
server process server.py (run this before running client.py and interact.py)
from multiprocessing.managers import BaseManager
import Queue
queue1 = Queue.Queue()
queue2 = Queue.Queue()
class QueueManager(BaseManager): pass
QueueManager.register('get_queue1', callable=lambda:queue1)
QueueManager.register('get_queue2', callable=lambda:queue2)
m = QueueManager(address=('', 50000), authkey='abracadabra')
s = m.get_server()
s.serve_forever()
The inter-actor which is for I/O interact.py
from multiprocessing.managers import BaseManager
import threading
import sys
class QueueManager(BaseManager): pass
QueueManager.register('get_queue1')
QueueManager.register('get_queue2')
m = QueueManager(address=('localhost', 50000),authkey='abracadabra')
m.connect()
queue1 = m.get_queue1()
queue2 = m.get_queue2()
def read():
while True:
sys.stdout.write(queue2.get())
def write():
while True:
queue1.put(sys.stdin.readline())
threads = []
threadr = threading.Thread(target=read)
threadr.start()
threads.append(threadr)
threadw = threading.Thread(target=write)
threadw.start()
threads.append(threadw)
for thread in threads:
thread.join()
The client program Client.py
from multiprocessing.managers import BaseManager
import sys
import string
import os
class QueueManager(BaseManager): pass
QueueManager.register('get_queue1')
QueueManager.register('get_queue2')
m = QueueManager(address=('localhost', 50000), authkey='abracadabra')
m.connect()
queue1 = m.get_queue1()
queue2 = m.get_queue2()
class RedirectOutput:
def __init__(self, stdout):
self.stdout = stdout
def write(self, s):
queue2.put(s)
class RedirectInput:
def __init__(self, stdin):
self.stdin = stdin
def readline(self):
return queue1.get()
# redirect standard output
sys.stdout = RedirectOutput(sys.stdout)
sys.stdin = RedirectInput(sys.stdin)
# The test program which will take input and produce output
Text=raw_input("Enter Text:")
print "you have entered:",Text
def x():
while True:
x= raw_input("Enter 'exit' to end and some thing else to continue")
print x
if 'exit' in x:
break
x()
this can be used to communicate between two process in network or on same machine
remember that inter-actor and server process will not terminate until you manually kill it.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

python: multiprocessing.Pipe and redirecting stdout - python

Related

Mimicing glib.spawn_async with Popen…

How to seamlessly wrap Bash shell IO in Python

Monitored stream class

How to attach debugger to a python subproccess?

python interprocess querying/control

Categories

Resources