I need help to get the output from pycurl, that I'm trying to run in subprocess. This output I'm trying to put in a queue and than pull this queue out in a different class.
unfortunately, Right now I have no output =(
import threading
import random
import time
import Queue
import urllib2
import sys
import simplejson, pycurl
import sys, signal
queue = Queue.Queue()
keep_running = True
user = "username"
pswd = "pass"
class MyThread(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
curl_path = '/usr/bin/curl'
curl_list = [curl_path]
args = ('curl', 'https://stream.twitter.com/1/statuses/filter.json?track=java', '-u', 'user:pass')
for arg in args:
curl_list.append(arg)
child = subprocess.Popen(
curl_list,
shell=False,
#stdout=subprocess.PIPE)
stderr=subprocess.PIPE)
try:
out += child.communicate()
c_out.write(out)
self.queue.put(c_out)
self.queue.task_done()
except KeyboardInterrupt:
child.kill()
class Starter():
def __init__(self):
self.queue = queue
t = MyThread(self.queue)
t.daemon=True
t.start()
self.next()
def next(self):
while True:
time.sleep(0.5)
if not self.queue.empty():
line = self.queue.get(timeout=0.2)
print '\n\nIM IN STARTER %s' % line
else:
print 'waiting for queue'
def main():
try:
Starter()
except KeyboardInterrupt, e:
print 'Stopping'
raise
main()
You seem to be confusing your arguments to subprocess quite a bit... the args list should be all of the different pieces of the command that you would be using for curl, you are currently putting them all together in a fashion that is not going to work with subprocess. Your curl_list should look more like this...
curl_path = '/usr/bin/curl'
curl_list = [curl_path, 'https://stream.twitter.com/1/statuses/filter.json?track=java', '-u', 'user:pass']
You are also using an unnecessary for at the moment... you don't want to loop over that list you just want to pass it to subprocess which will handle it appropriately. And you are also going to want stdout to get the results from that, so you need to include the pipe there as well.
I.E, the entire thing should be...
def run(self):
curl_path = '/usr/bin/curl'
curl_list = [curl_path, 'https://stream.twitter.com/1/statuses/filter.json?track=java', '-u', 'user:pass']
child = subprocess.Popen(curl_list,
shell=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
try:
out += child.communicate()[0]
c_out.write(out)
self.queue.put(c_out)
self.queue.task_done()
except KeyboardInterrupt:
child.kill()
Might want to take another look at the subprocess documentation to better understand the changes above. I haven't actually run this through an interpreter so it may not be perfect but it should get you going in the right direction... good luck!
Related
My goal : To read the latest "chunk" (N lines) of streaming stdout every M seconds from a subprocess.
Current code:
start the subprocess
reads stdout
once I have a chunk of N lines, print it out (or save as current chunk)
wait M seconds
repeat
I have also put code for the moment to terminate the subprocess (which is an endless stream until you hit Ctrl-C)
What I want to achieve is after I wait for M seconds, if for it to always read the latest N lines and not the subsequent N lines in stdout (they can be discarded as I'm only interested in the latest)
My end goal would be to spawn a thread to run the process and keep saving the latest lines and then call from the main process whenever I need the latest results of the stream.
Any help would be greatly appreciated!
#!/usr/bin/env python3
import signal
import time
from subprocess import Popen, PIPE
sig = signal.SIGTERM
N=9
M=5
countlines=0
p = Popen(["myprogram"], stdout=PIPE, bufsize=1, universal_newlines=True)
chunk=[]
for line in p.stdout:
countlines+=1
chunk.append(line)
if len(chunk)==N:
print(chunk)
chunk=[]
time.sleep(M)
if countlines>100:
p.send_signal(sig)
break
print("done")
After much searching, I stumbled upon a solution here:
https://eli.thegreenplace.net/2017/interacting-with-a-long-running-child-process-in-python/
Eli's "Launch, interact, get output in real time, terminate" code section worked for me.
So far its the most elegant solution I've found.
Adapted to my problem above, and written within a class (not shown here):
def output_reader(self,proc):
chunk=[]
countlines=0
for line in iter(proc.stdout.readline, b''):
countlines+=1
chunk.append(line.decode("utf-8"))
if countlines==N:
self.current_chunk = chunk
chunk=[]
countlines=0
def main():
proc = subprocess.Popen(['myprocess'],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
t = threading.Thread(target=output_reader, args=(proc,))
t.start()
try:
time.sleep(0.2)
for i in range(10):
time.sleep(1) # waits a while before getting latest lines
print(self.current_chunk)
finally:
proc.terminate()
try:
proc.wait(timeout=0.2)
print('== subprocess exited with rc =', proc.returncode)
except subprocess.TimeoutExpired:
print('subprocess did not terminate in time')
t.join()
Here is another possible solution. It is a program that you would run as a separate process in the pipeline, which presents a REST API that when queried will return the last N lines that it read on stdin (where N and the port number are supplied on stdin). It is using run in flask so should not be used in situations where the outside world has access to the local server port to make requests, though this could be adapted.
import sys
import time
import threading
import argparse
from flask import Flask, request
from flask_restful import Resource, Api
class Server:
def __init__(self):
self.data = {'at_eof': False,
'lines_read': 0,
'latest_lines': []}
self.thread = None
self.args = None
self.stop = False
def parse_args(self):
parser = argparse.ArgumentParser()
parser.add_argument("num_lines", type=int,
help="number of lines to cache")
parser.add_argument("port", type=int,
help="port to serve on")
self.args = parser.parse_args()
def start_updater(self):
def updater():
lines = self.data['latest_lines']
while True:
if self.stop:
return
line = sys.stdin.readline()
if not line:
break
self.data['lines_read'] += 1
lines.append(line)
while len(lines) > self.args.num_lines:
lines.pop(0)
self.data['at_eof'] = True
self.thread = threading.Thread(target=updater)
self.thread.start()
def get_data(self):
return self.data
def shutdown(self):
self.stop = True
func = request.environ.get('werkzeug.server.shutdown')
if func:
func()
return 'Shutting down'
else:
return 'shutdown failed'
def add_apis(self, app):
class GetData(Resource):
get = self.get_data
class Shutdown(Resource):
get = self.shutdown
api = Api(app)
api.add_resource(GetData, "/getdata")
api.add_resource(Shutdown, "/shutdown")
def run(self):
self.parse_args()
self.start_updater()
app = Flask(__name__)
self.add_apis(app)
app.run(port=self.args.port)
server = Server()
server.run()
Example usage: here is a test program whose output we want to serve:
import sys
import time
for i in range(100):
print("this is line {}".format(i))
sys.stdout.flush()
time.sleep(.1)
And a simple pipeline to launch it (here from the linux shell prompt but could be done via subprocess.Popen), serving the last 5 lines, on port 8001:
python ./writer.py | python ./server.py 5 8001
An example query, here using curl as the client but it could be done via Python requests:
$ curl -s http://localhost:8001/getdata
{"at_eof": false, "lines_read": 30, "latest_lines": ["this is line 25\n", "this is line 26\n", "this is line 27\n", "this is line 28\n", "this is line 29\n"]}
The server also provides an http://localhost:<port>/shutdown URL to terminate it, though if you call it before you first see "at_eof": true, then expect the writer to die with a broken pipe.
This code:
import multiprocessing as mp
from threading import Thread
import subprocess
import time
class WorkerProcess(mp.Process):
def run(self):
# Simulate long running task
self.subprocess = subprocess.Popen(['python', '-c', 'import time; time.sleep(1000)'])
self.code = self.subprocess.wait()
class ControlThread(Thread):
def run():
jobs = []
for _ in range(2):
job = WorkerProcess()
jobs.append(job)
job.start()
# wait for a while and then kill jobs
time.sleep(2)
for job in jobs:
job.terminate()
if __name__ == "__main__":
controller = ControlThread()
controller.start()
When I terminate the spawned WorkerProcess instances. They die just fine, however the subprocesses python -c 'import time; time.sleep(1000) runs until completition. This is well documented in the official docs, but how do I kill the child processes of a killed process?
A possbile soultion might be:
Wrap WorkerProcess.run() method inside try/except block catching SIGTERM, and terminating the subprocess.call call. But I am not sure how to catch the SIGTERM in the WorkerProcess
I also tried setting signal.signal(signal.SIGINT, handler) in the WorkerProcess, but I am getting ValuError, because it is allowed to be set only in the main thread.
What do I do now?
EDIT: As #svalorzen pointed out in comments this doesn't really work since the reference to self.subprocess is lost.
Finally came to a clean, acceptable solution. Since mp.Process.terminate is a method, we can override it.
class WorkerProcess(mp.Process):
def run(self):
# Simulate long running task
self.subprocess = subprocess.Popen(['python', '-c', 'import time; time.sleep(1000)'])
self.code = self.subprocess.wait()
# HERE
def terminate(self):
self.subprocess.terminate()
super(WorkerProcess, self).terminate()
You can use queues to message to your subprocesses and ask them nicely to terminate their children before exiting themselves. You can't use signals in anywhere else but your main thread, so signals are not suitable for this.
Curiously, when I modify the code like this, even if I interrupt it with control+C, subprocesses will die as well. This may be OS related thing, though.
import multiprocessing as mp
from threading import Thread
import subprocess
import time
from Queue import Empty
class WorkerProcess(mp.Process):
def __init__(self,que):
super(WorkerProcess,self).__init__()
self.queue = que
def run(self):
# Simulate long running task
self.subprocess = subprocess.Popen(['python', '-c', 'import time; time.sleep(1000)'])
while True:
a = self.subprocess.poll()
if a is None:
time.sleep(1)
try:
if self.queue.get(0) == "exit":
print "kill"
self.subprocess.kill()
self.subprocess.wait()
break
else:
pass
except Empty:
pass
print "run"
else:
print "exiting"
class ControlThread(Thread):
def run(self):
jobs = []
queues = []
for _ in range(2):
q = mp.Queue()
job = WorkerProcess(q)
queues.append(q)
jobs.append(job)
job.start()
# wait for a while and then kill jobs
time.sleep(5)
for q in queues:
q.put("exit")
time.sleep(30)
if __name__ == "__main__":
controller = ControlThread()
controller.start()
Hope this helps.
Hannu
I am trying to asynchronously run the Popen command from subprocess, so that I can run other stuff in the background.
import subprocess
import requests
import asyncio
import asyncio.subprocess
async def x(message):
if len(message.content.split()) > 1:
#output = asyncio.create_subprocess_shell(message.content[3:], shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
output = subprocess.Popen(message.content[3:], shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
return output.communicate()[0].decode('utf-8')
I have tried to understand https://docs.python.org/3/library/asyncio-subprocess.html but i am not sure what a protocol factory is.
When I came to this question, I expected the answer to really use asyncio for interprocess communication.
I have found the following resource useful:
https://github.com/python/asyncio/blob/master/examples/child_process.py
and below is my simplified example (using 3.5+ async/await syntax), which reads lines and outputs them sorted:
import asyncio
from subprocess import Popen, PIPE
async def connect_write_pipe(file):
"""Return a write-only transport wrapping a writable pipe"""
loop = asyncio.get_event_loop()
transport, _ = await loop.connect_write_pipe(asyncio.Protocol, file)
return transport
async def connect_read_pipe(file):
"""Wrap a readable pipe in a stream"""
loop = asyncio.get_event_loop()
stream_reader = asyncio.StreamReader(loop=loop)
def factory():
return asyncio.StreamReaderProtocol(stream_reader)
transport, _ = await loop.connect_read_pipe(factory, file)
return stream_reader, transport
async def main(loop):
# start subprocess and wrap stdin, stdout, stderr
p = Popen(['/usr/bin/sort'], stdin=PIPE, stdout=PIPE, stderr=PIPE)
stdin = await connect_write_pipe(p.stdin)
stdout, stdout_transport = await connect_read_pipe(p.stdout)
stderr, stderr_transport = await connect_read_pipe(p.stderr)
# interact with subprocess
name = {stdout: 'OUT', stderr: 'ERR'}
registered = {
asyncio.Task(stderr.read()): stderr,
asyncio.Task(stdout.read()): stdout
}
to_sort = b"one\ntwo\nthree\n"
stdin.write(to_sort)
stdin.close() # this way we tell we do not have anything else
# get and print lines from stdout, stderr
timeout = None
while registered:
done, pending = await asyncio.wait(
registered, timeout=timeout,
return_when=asyncio.FIRST_COMPLETED)
if not done:
break
for f in done:
stream = registered.pop(f)
res = f.result()
if res != b'':
print(name[stream], res.decode('ascii').rstrip())
registered[asyncio.Task(stream.read())] = stream
timeout = 0.0
stdout_transport.close()
stderr_transport.close()
if __name__ == '__main__':
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main(loop))
finally:
loop.close()
NB: without taking special measures, the amount of data to be written into the pipe is limited. In my system it was possible to write just over 700000 bytes before using up pipe buffers.
There are also other examples there, using create_subprocess_shell.
I have not yet used asyncio in real projects, so improvements' suggestions in the comments are welcome.
It's the right way to go...! Use
async/await
Tested it on Python - 3.X [Windows, MacOS]
import asyncio
from asyncio.subprocess import PIPE, STDOUT
import subprocess
import signal
def signal_handler(signal, frame):
loop.stop()
client.close()
sys.exit(0)
async def run_async(loop = ''):
cmd = 'sudo long_running_cmd --opt1=AAAA --opt2=BBBB'
print ("[INFO] Starting script...")
await asyncio.create_subprocess_shell(cmd1, stdin = PIPE, stdout = PIPE, stderr = STDOUT)
print("[INFO] Script is complete.")
loop = asyncio.get_event_loop()
signal.signal(signal.SIGINT, signal_handler)
tasks = [loop.create_task(run_async())]
wait_tasks = asyncio.wait(tasks)
loop.run_until_complete(wait_tasks)
loop.close()
Core logic:
process = await asyncio.create_subprocess_shell(cmd1, stdin = PIPE, stdout PIPE, stderr = STDOUT)
await process.wait()
I eventually found the answer to my question, which utilizes async.
http://pastebin.com/Zj8SK1CG
Tested with python 3.5. Just ask if you have questions.
import threading
import time
import subprocess
import shlex
from sys import stdout
# Only data wihtin a class are actually shared by the threads.
# Let's use a class as communicator (there could be problems if you have more than
# a single thread)
class Communicator(object):
counter = 0
stop = False
arg = None
result = None
# Here we can define what you want to do. There are other methods to do that
# but this is the one I prefer.
class ThreadedFunction(threading.Thread):
def run(self, *args, **kwargs):
super().run()
command = c.arg
# Here what you want to do...
command = shlex.split(command)
print(time.time()) # this is just to check that the command (sleep 5) is executed
output = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
print('\n',time.time())
c.result = output
if c.stop: return None # This is useful only within loops within threads
# Create a class instance
c = Communicator()
c.arg = 'time sleep 5' # Here I used the 'time' only to have some output
# Create the thread and start it
t = ThreadedFunction()
t.start() # Start the thread and do something else...
# ...for example count the seconds in the mean time..
try:
for j in range(100):
c.counter += 1
stdout.write('\r{:}'.format(c.counter))
stdout.flush()
time.sleep(1)
if c.result != None:
print(c.result)
break
except:
c.stop = True
This one is much simpler, I found it after the other reply that could, anyway, be interesting... so I left it.
import time
import subprocess
import shlex
from sys import stdout
command = 'time sleep 5' # Here I used the 'time' only to have some output
def x(command):
cmd = shlex.split(command)
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return p
# Start the subprocess and do something else...
p = x(command)
# ...for example count the seconds in the mean time..
try: # This take care of killing the subprocess if problems occur
for j in range(100):
stdout.write('\r{:}'.format(j))
stdout.flush()
time.sleep(1)
if p.poll() != None:
print(p.communicate())
break
except:
p.terminate() # or p.kill()
The asynchronism is evident from the fact that the python script prints the counter value on the stdout while the background process runs the sleep command. The fact that the python script exit after ~5sec printing the output of the bash time command printing the counter in the meanwhile is an evidence that the script works.
The function glib.spawn_async allows you to hook three callbacks which are called on event on stdout, stderr, and on process completion.
How can I mimic the same functionality with subprocess with either threads or asyncio?
I am more interested in the functionality rather than threading/asynio but an answer that contains both will earn a bounty.
Here is a toy program that shows what I want to do:
import glib
import logging
import os
import gtk
class MySpawn(object):
def __init__(self):
self._logger = logging.getLogger(self.__class__.__name__)
def execute(self, cmd, on_done, on_stdout, on_stderr):
self.pid, self.idin, self.idout, self.iderr = \
glib.spawn_async(cmd,
flags=glib.SPAWN_DO_NOT_REAP_CHILD,
standard_output=True,
standard_error=True)
fout = os.fdopen(self.idout, "r")
ferr = os.fdopen(self.iderr, "r")
glib.child_watch_add(self.pid, on_done)
glib.io_add_watch(fout, glib.IO_IN, on_stdout)
glib.io_add_watch(ferr, glib.IO_IN, on_stderr)
return self.pid
if __name__ == '__main__':
logging.basicConfig(format='%(thread)d %(levelname)s: %(message)s',
level=logging.DEBUG)
cmd = '/usr/bin/git ls-remote https://github.com/DiffSK/configobj'.split()
def on_done(pid, retval, *args):
logging.info("That's all folks!…")
def on_stdout(fobj, cond):
"""This blocks which is fine for this toy example…"""
for line in fobj.readlines():
logging.info(line.strip())
return True
def on_stderr(fobj, cond):
"""This blocks which is fine for this toy example…"""
for line in fobj.readlines():
logging.error(line.strip())
return True
runner = MySpawn()
runner.execute(cmd, on_done, on_stdout, on_stderr)
try:
gtk.main()
except KeyboardInterrupt:
print('')
I should add that since readlines() is blocking, the above will buffer all the output and send it at once. If this is not what one wants, then you have to use readline() and make sure that on end of command you finish reading all the lines you did not read before.
asyncio has subprocess_exec, there is no need to use the subprocess module at all:
import asyncio
class Handler(asyncio.SubprocessProtocol):
def pipe_data_received(self, fd, data):
# fd == 1 for stdout, and 2 for stderr
print("Data from /bin/ls on fd %d: %s" % (fd, data.decode()))
def pipe_connection_lost(self, fd, exc):
print("Connection lost to /bin/ls")
def process_exited(self):
print("/bin/ls is finished.")
loop = asyncio.get_event_loop()
coro = loop.subprocess_exec(Handler, "/bin/ls", "/")
loop.run_until_complete(coro)
loop.close()
With subprocess and threading, it's simple as well. You can just spawn a thread per pipe, and one to wait() for the process:
import subprocess
import threading
class PopenWrapper(object):
def __init__(self, args):
self.process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.DEVNULL)
self.stdout_reader_thread = threading.Thread(target=self._reader, args=(self.process.stdout,))
self.stderr_reader_thread = threading.Thread(target=self._reader, args=(self.process.stderr,))
self.exit_watcher = threading.Thread(target=self._exit_watcher)
self.stdout_reader_thread.start()
self.stderr_reader_thread.start()
self.exit_watcher.start()
def _reader(self, fileobj):
for line in fileobj:
self.on_data(fileobj, line)
def _exit_watcher(self):
self.process.wait()
self.stdout_reader_thread.join()
self.stderr_reader_thread.join()
self.on_exit()
def on_data(self, fd, data):
return NotImplementedError
def on_exit(self):
return NotImplementedError
def join(self):
self.process.wait()
class LsWrapper(PopenWrapper):
def on_data(self, fd, data):
print("Received on fd %r: %s" % (fd, data))
def on_exit(self):
print("Process exited.")
LsWrapper(["/bin/ls", "/"]).join()
However, mind that glib does not use threads to asynchroneously execute your callbacks. It uses an event loop, just as asyncio does. The idea is that at the core of your program is a loop that waits until something happens, and then synchronously executes an associated callback. In your case, that's "data becomes available for reading on one of the pipes", and "the subprocess has exited". In general, its also stuff like "the X11-server reported mouse movement", "there's incoming network traffic", etc. You can emulate glib's behaviour by writing your own event loop. Use the select module on the two pipes. If select reports that the pipes are readable, but read returns no data, the process likely exited - call the poll() method on the subprocess object in this case to check whether it is completed, and call your exit callback if it has, or an error callback elsewise.
I've created a function that uses PyQt5 to "render" HTML and return the result. It's as follows:
def render(source_html):
"""Fully render HTML, JavaScript and all."""
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebKitWidgets import QWebPage
class Render(QWebPage):
def __init__(self, html):
self.html = None
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().setHtml(html)
self.app.exec_()
def _loadFinished(self, result):
self.html = self.mainFrame().toHtml()
self.app.quit()
return Render(source_html).html
Occasionally it's threads will hang indefinitely and I'll have to kill the whole program. Unfortunately PyQt5 may as well be a black box as I'm not sure how to kill it when it misbehaves.
Ideally I'd be able to implement a timeout of n seconds. As a workaround, I've put the function in it's own script render.py and am calling it from via subprocess with this monstrosity:
def render(html):
"""Return fully rendered HTML, JavaScript and all."""
args = ['render.py', '-']
timeout = 20
try:
return subprocess.check_output(args,
input=html,
timeout=timeout,
universal_newlines=True)
# Python 2's subprocess.check_output doesn't support input or timeout
except TypeError:
class SubprocessError(Exception):
"""Base exception from subprocess module."""
pass
class TimeoutExpired(SubprocessError):
"""
This exception is raised when the timeout expires while
waiting for a child process.
"""
def __init__(self, cmd, timeout, output=None):
super(TimeoutExpired, self).__init__()
self.cmd = cmd
self.timeout = timeout
self.output = output
def __str__(self):
return ('Command %r timed out after %s seconds' %
(self.cmd, self.timeout))
process = subprocess.Popen(['timeout', str(timeout)] + args,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
# pipe html into render.py's stdin
output = process.communicate(
html.encode('utf8'))[0].decode('latin1')
retcode = process.poll()
if retcode == 124:
raise TimeoutExpired(args, timeout)
return output
The multiprocessing module appears to greatly simplify things:
from multiprocessing import Pool
pool = Pool(1)
rendered_html = pool.apply_async(render, args=(html,)).get(timeout=20)
pool.terminate()
Is there a way to implement a timeout that doesn't necessitate these sort of shenanigans?
I was looking for a solution too, there apparently isn't one, on purpose.
If you're using Linux and all you want is Python to attempt something for N seconds and then time out and handle an error condition after those N seconds, you can do this:
import time
import signal
# This stuff is so when we get SIGALRM from the timeout functionality we can handle it instead of
# crashing to the ground
class TimeOutError(Exception):
pass
def raise_timeout(var1, var2):
raise TimeOutError
signal.signal(signal.SIGALRM, raise_timeout)
# Turn the alarm on
signal.alarm(1)
# Try your thing
try:
time.sleep(2)
except TimeOutError as e:
print(" We hit our timeout value and we bailed out of whatever that BS was.")
# Remember to turn the alarm back off if your attempt succeeds!
signal.alarm(0)
The one drawback is that you can't nest signal.alarm() hooks; if, in your try statement, you're calling something else that also then sets a signal.alarm(), it will override the first one and screw your stuff up.