I have a python subprocess that I'm trying to read output and error streams from. Currently I have it working, but I'm only able to read from stderr after I've finished reading from stdout. Here's what it looks like:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout_iterator = iter(process.stdout.readline, b"")
stderr_iterator = iter(process.stderr.readline, b"")
for line in stdout_iterator:
# Do stuff with line
print line
for line in stderr_iterator:
# Do stuff with line
print line
As you can see, the stderr for loop can't start until the stdout loop completes. How can I modify this to be able to read from both in the correct order the lines come in?
To clarify: I still need to be able to tell whether a line came from stdout or stderr because they will be treated differently in my code.
The code in your question may deadlock if the child process produces enough output on stderr (~100KB on my Linux machine).
There is a communicate() method that allows to read from both stdout and stderr separately:
from subprocess import Popen, PIPE
process = Popen(command, stdout=PIPE, stderr=PIPE)
output, err = process.communicate()
If you need to read the streams while the child process is still running then the portable solution is to use threads (not tested):
from subprocess import Popen, PIPE
from threading import Thread
from Queue import Queue # Python 2
def reader(pipe, queue):
try:
with pipe:
for line in iter(pipe.readline, b''):
queue.put((pipe, line))
finally:
queue.put(None)
process = Popen(command, stdout=PIPE, stderr=PIPE, bufsize=1)
q = Queue()
Thread(target=reader, args=[process.stdout, q]).start()
Thread(target=reader, args=[process.stderr, q]).start()
for _ in range(2):
for source, line in iter(q.get, None):
print "%s: %s" % (source, line),
See:
Python: read streaming input from subprocess.communicate()
Non-blocking read on a subprocess.PIPE in python
Python subprocess get children's output to file and terminal?
Here's a solution based on selectors, but one that preserves order, and streams variable-length characters (even single chars).
The trick is to use read1(), instead of read().
import selectors
import subprocess
import sys
p = subprocess.Popen(
["python", "random_out.py"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
sel = selectors.DefaultSelector()
sel.register(p.stdout, selectors.EVENT_READ)
sel.register(p.stderr, selectors.EVENT_READ)
while True:
for key, _ in sel.select():
data = key.fileobj.read1().decode()
if not data:
exit()
if key.fileobj is p.stdout:
print(data, end="")
else:
print(data, end="", file=sys.stderr)
If you want a test program, use this.
import sys
from time import sleep
for i in range(10):
print(f" x{i} ", file=sys.stderr, end="")
sleep(0.1)
print(f" y{i} ", end="")
sleep(0.1)
The order in which a process writes data to different pipes is lost after write.
There is no way you can tell if stdout has been written before stderr.
You can try to read data simultaneously from multiple file descriptors in a non-blocking way
as soon as data is available, but this would only minimize the probability that the order is incorrect.
This program should demonstrate this:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import select
import subprocess
testapps={
'slow': '''
import os
import time
os.write(1, 'aaa')
time.sleep(0.01)
os.write(2, 'bbb')
time.sleep(0.01)
os.write(1, 'ccc')
''',
'fast': '''
import os
os.write(1, 'aaa')
os.write(2, 'bbb')
os.write(1, 'ccc')
''',
'fast2': '''
import os
os.write(1, 'aaa')
os.write(2, 'bbbbbbbbbbbbbbb')
os.write(1, 'ccc')
'''
}
def readfds(fds, maxread):
while True:
fdsin, _, _ = select.select(fds,[],[])
for fd in fdsin:
s = os.read(fd, maxread)
if len(s) == 0:
fds.remove(fd)
continue
yield fd, s
if fds == []:
break
def readfromapp(app, rounds=10, maxread=1024):
f=open('testapp.py', 'w')
f.write(testapps[app])
f.close()
results={}
for i in range(0, rounds):
p = subprocess.Popen(['python', 'testapp.py'], stdout=subprocess.PIPE
, stderr=subprocess.PIPE)
data=''
for (fd, s) in readfds([p.stdout.fileno(), p.stderr.fileno()], maxread):
data = data + s
results[data] = results[data] + 1 if data in results else 1
print 'running %i rounds %s with maxread=%i' % (rounds, app, maxread)
results = sorted(results.items(), key=lambda (k,v): k, reverse=False)
for data, count in results:
print '%03i x %s' % (count, data)
print
print "=> if output is produced slowly this should work as whished"
print " and should return: aaabbbccc"
readfromapp('slow', rounds=100, maxread=1024)
print
print "=> now mostly aaacccbbb is returnd, not as it should be"
readfromapp('fast', rounds=100, maxread=1024)
print
print "=> you could try to read data one by one, and return"
print " e.g. a whole line only when LF is read"
print " (b's should be finished before c's)"
readfromapp('fast', rounds=100, maxread=1)
print
print "=> but even this won't work ..."
readfromapp('fast2', rounds=100, maxread=1)
and outputs something like this:
=> if output is produced slowly this should work as whished
and should return: aaabbbccc
running 100 rounds slow with maxread=1024
100 x aaabbbccc
=> now mostly aaacccbbb is returnd, not as it should be
running 100 rounds fast with maxread=1024
006 x aaabbbccc
094 x aaacccbbb
=> you could try to read data one by one, and return
e.g. a whole line only when LF is read
(b's should be finished before c's)
running 100 rounds fast with maxread=1
003 x aaabbbccc
003 x aababcbcc
094 x abababccc
=> but even this won't work ...
running 100 rounds fast2 with maxread=1
003 x aaabbbbbbbbbbbbbbbccc
001 x aaacbcbcbbbbbbbbbbbbb
008 x aababcbcbcbbbbbbbbbbb
088 x abababcbcbcbbbbbbbbbb
This works for Python3 (3.6):
p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, universal_newlines=True)
# Read both stdout and stderr simultaneously
sel = selectors.DefaultSelector()
sel.register(p.stdout, selectors.EVENT_READ)
sel.register(p.stderr, selectors.EVENT_READ)
ok = True
while ok:
for key, val1 in sel.select():
line = key.fileobj.readline()
if not line:
ok = False
break
if key.fileobj is p.stdout:
print(f"STDOUT: {line}", end="")
else:
print(f"STDERR: {line}", end="", file=sys.stderr)
from https://docs.python.org/3/library/subprocess.html#using-the-subprocess-module
If you wish to capture and combine both streams into one, use
stdout=PIPE and stderr=STDOUT instead of capture_output.
so the easiest solution would be:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout_iterator = iter(process.stdout.readline, b"")
for line in stdout_iterator:
# Do stuff with line
print line
I know this question is very old, but this answer may help others who stumble upon this page in researching a solution for a similar situation, so I'm posting it anyway.
I've built a simple python snippet that will merge any number of pipes into a single one. Of course, as stated above, the order cannot be guaranteed, but this is as close as I think you can get in Python.
It spawns a thread for each of the pipes, reads them line by line and puts them into a Queue (which is FIFO). The main thread loops through the queue, yielding each line.
import threading, queue
def merge_pipes(**named_pipes):
r'''
Merges multiple pipes from subprocess.Popen (maybe other sources as well).
The keyword argument keys will be used in the output to identify the source
of the line.
Example:
p = subprocess.Popen(['some', 'call'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
outputs = {'out': log.info, 'err': log.warn}
for name, line in merge_pipes(out=p.stdout, err=p.stderr):
outputs[name](line)
This will output stdout to the info logger, and stderr to the warning logger
'''
# Constants. Could also be placed outside of the method. I just put them here
# so the method is fully self-contained
PIPE_OPENED=1
PIPE_OUTPUT=2
PIPE_CLOSED=3
# Create a queue where the pipes will be read into
output = queue.Queue()
# This method is the run body for the threads that are instatiated below
# This could be easily rewritten to be outside of the merge_pipes method,
# but to make it fully self-contained I put it here
def pipe_reader(name, pipe):
r"""
reads a single pipe into the queue
"""
output.put( ( PIPE_OPENED, name, ) )
try:
for line in iter(pipe.readline,''):
output.put( ( PIPE_OUTPUT, name, line.rstrip(), ) )
finally:
output.put( ( PIPE_CLOSED, name, ) )
# Start a reader for each pipe
for name, pipe in named_pipes.items():
t=threading.Thread(target=pipe_reader, args=(name, pipe, ))
t.daemon = True
t.start()
# Use a counter to determine how many pipes are left open.
# If all are closed, we can return
pipe_count = 0
# Read the queue in order, blocking if there's no data
for data in iter(output.get,''):
code=data[0]
if code == PIPE_OPENED:
pipe_count += 1
elif code == PIPE_CLOSED:
pipe_count -= 1
elif code == PIPE_OUTPUT:
yield data[1:]
if pipe_count == 0:
return
This works for me (on windows):
https://github.com/waszil/subpiper
from subpiper import subpiper
def my_stdout_callback(line: str):
print(f'STDOUT: {line}')
def my_stderr_callback(line: str):
print(f'STDERR: {line}')
my_additional_path_list = [r'c:\important_location']
retcode = subpiper(cmd='echo magic',
stdout_callback=my_stdout_callback,
stderr_callback=my_stderr_callback,
add_path_list=my_additional_path_list)
Related
I've got a python 3 script i use to backup and encrypt mysqldump files and im having a particular issues with one database that is 67gb after encryption & compression.
The mysqldump is outputting errorcode 3, so i'd like to catch the actual error message, as this could mean a couple of things.
The random thing is the backup file is the right size, so not sure what the error means. it worked once on this database...
the code looks like the below and i'd really appreciate some help on how to add non-blocking capture of stderr when the return code is anything but 0 for both p1 and p2.
Also, if im doing anything glaringly obvious wrong, please do let me know, as i'd like to make sure this is a reliable process. it has been working fine on my databases under 15gb compressed.
def dbbackup():
while True:
item = q.get()
#build up folder structure, daily, weekly, monthy & project
genfile = config[item]['DBName'] + '-' + dateyymmdd + '-'
genfile += config[item]['PubKey'] + '.sql.gpg'
if os.path.isfile(genfile):
syslog.syslog(item + ' ' + genfile + ' exists, removing')
os.remove(genfile)
syslog.syslog(item + ' will be backed up as ' + genfile)
args = ['mysqldump', '-u', config[item]['UserNm'],
'-p' + config[item]['Passwd'], '-P', config[item]['Portnu'],
'-h', config[item]['Server']]
args.extend(config[item]['MyParm'].split())
args.append(config[item]['DBName'])
p1 = subprocess.Popen(args, stdout=subprocess.PIPE)
p2 = subprocess.Popen(['gpg', '-o', genfile, '-r',
config[item]['PubKey'], '-z', '9', '--encrypt'], stdin=p1.stdout)
p2.wait()
if p2.returncode == 0:
syslog.syslog(item + ' encryption successful')
else:
syslog.syslog(syslog.LOG_CRIT, item + ' encryption failed '+str(p2.returncode))
p1.terminate()
p1.wait()
if p1.returncode == 0:
#does some uploads of the file etc..
else:
syslog.syslog(syslog.LOG_CRIT, item + ' extract failed '+str(p1.returncode))
q.task_done()
def main():
db2backup = []
for settingtest in config:
db2backup.append(settingtest)
if len(db2backup) >= 1:
syslog.syslog('Backups started')
for database in db2backup:
q.put(database)
syslog.syslog(database + ' added to backup queue')
q.join()
syslog.syslog('Backups finished')
q = queue.Queue()
config = configparser.ConfigParser()
config.read('backup.cfg')
backuptype = 'daily'
dateyymmdd = datetime.datetime.now().strftime('%Y%m%d')
for i in range(2):
t = threading.Thread(target=dbbackup)
t.daemon = True
t.start()
if __name__ == '__main__':
main()
Simplify your code:
avoid unnecessary globals, pass parameters to the corresponding functions instead
avoid reimplementing a thread pool (it hurts readability and it misses convience features accumulated over the years).
The simplest way to capture stderr is to use stderr=PIPE and .communicate() (blocking call):
#!/usr/bin/env python3
from configparser import ConfigParser
from datetime import datetime
from multiprocessing.dummy import Pool
from subprocess import Popen, PIPE
def backup_db(item, conf): # config[item] == conf
"""Run `mysqldump ... | gpg ...` command."""
genfile = '{conf[DBName]}-{now:%Y%m%d}-{conf[PubKey]}.sql.gpg'.format(
conf=conf, now=datetime.now())
# ...
args = ['mysqldump', '-u', conf['UserNm'], ...]
with Popen(['gpg', ...], stdin=PIPE) as gpg, \
Popen(args, stdout=gpg.stdin, stderr=PIPE) as db_dump:
gpg.communicate()
error = db_dump.communicate()[1]
if gpg.returncode or db_dump.returncode:
error
def main():
config = ConfigParser()
with open('backup.cfg') as file: # raise exception if config is unavailable
config.read_file(file)
with Pool(2) as pool:
pool.starmap(backup_db, config.items())
if __name__ == "__main__":
main()
NOTE: no need to call db_dump.terminate() if gpg dies prematurely: mysqldump dies when it tries to write something to the closed gpg.stdin.
If there are huge number of items in the config then you could use pool.imap() instead of pool.starmap() (the call should be modified slightly).
For robustness, wrap backup_db() function to catch and log all exceptions.
I am starting a subprocess via python and display the stdout (progress) in a Progress bar:
def rv(args):
p = subprocess.Popen(["linkto.exe"]+[x for x in args], stdout=subprocess.PIPE)
while True:
line = p.stdout.readline()
if line != "":
progressStr=re.search(r"([0-9]+.[0-9]+%)", line.rstrip())
if progressStr == None:
print line.rstrip()
else:
progressInt=int(float(re.sub("[^0123456789\.]", "", progressStr.group())))
print progressInt
else:
break
As you see, progressInt is my cleaned up version of the stdout with integer values for the progress % - it works fine so far. However, depending on my input the stdout may vary because the subprocess may spawn another process after the primary one.
How could I drop all lines of my stdout after progressInt hits 100 for the first time?
I managed to find a solution via re.search. There was a small difference in the stdout of process1 (writes "Info:") and process2 (writes "Info [32]:").
def rv(args):
p = subprocess.Popen(["C:/Program Files/Tweak/RV-4.2.3-64/bin/rvio_hw.exe"]+[x for x in args], stdout=subprocess.PIPE)
for line in iter(p.stdout.readline,""):
noFFMpeg=re.search(r"INFO: (.*)", line.rstrip())
if noFFMpeg is not None:
progressStr=re.search(r"([0-9]+.[0-9]+%)", noFFMpeg.group())
if progressStr is not None:
progressInt=int(float(re.sub("[^0123456789\.]", "", progressStr.group())))
self.prog_QProgressBar.setValue(progressInt)
QtGui.QApplication.processEvents()
print progressStr.group()
When i run the python script ( BootScript.py ) on the shell it runs properly but when i try to run it through another script( automation.py ) it gets stuck
//automation.py
#!/usr/bin/env python
import sys
import optparse
import subprocess
global flag
failcount = 0
def incrfailcount():
global failcount
failcount += 1
def readfile():
fp = open('BootStrap.log','r')
print "press any key"
#_input()
for l in fp.readlines() :
if "BOOTSCRIPT SCORE IS: 3010" in l :
#import pdb
#pdb.set_trace()
global flag
flag = 0
fp.close()
parser = optparse.OptionParser()
parser.add_option('-c', '--count', dest='counter', help='no of time reboot Should Happen')
(options, args) = parser.parse_args()
#counter = 1
if options.counter is None:
counter = 1
else :
counter = options.counter
count = 0
output = ""
mylist = [ ' --cfgfile="BDXT0_PO_0.cfg"' , ' --cfgfile="BDXT0_PO_OVR_0.cfg"' ,' --scbypass' , ' --dmipy="C:\\sfd\\jg\\kdg\\dmi_pcie_po.py"', ' --fusestr="IA_CORE_DISABLE=0y111111111111111111111110"' , ' --fusestr="HT_DIS=1"' , ' --earbreakpy="C:\\dvfdfv\\dskf\\lsvcd\\config_restart.py"']
logfile = open('BootStrap.log', 'w')
#if out.__contains__('3010') :
#break
for i in range(int(counter)):
global flag
flag = 1
logfile = open('BootStrap.log', 'w')
proc = subprocess.Popen(['python' ,'bdxBootScript.py', mylist ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
for line in proc.stdout:
sys.stdout.write(line)
logfile.write(line)
proc.wait()
count = count + 1
print "file closing "
logfile.close()
readfile()
#global flag
if flag :
incrfailcount()
continue
if flag :
print "Error Occured in %d th iteration" %count
else :
print "Every thing Went well"
if failcount >= 0 :
print "Script failed %d times of total run %d " %(failcount, count)
I am trying to automate BootScript.py
**What the Program Does ?**
Here it runs BootScript.py which arguments . the output of the bootscript.py is checked for specific line (BOOTSCRIPT SCORE IS: 3010)
If present it is asumed as to sucess else failure , this script is run for counter number of times
**What i want?**
This script gets stuck for a long time , i want it to execute with out beeing sstuck , as though i am running the bootscript manually
There are several issues e.g., Popen(['python' ,'bdxBootScript.py', mylist ]) should raise an exception because you should use Popen(['python' ,'bdxBootScript.py'] + mylist) instead. If you don't see the exception then either the code is not run e.g., counter==0 or (worse) you suppress exceptions up the stack (don't do it, at the very least you should log unexpected errors).
If bdxBootScript.py doesn't produce much output then for line in proc.stdout: may appear to do nothing for some time, to fix it pass -u flag to python to make its output unbuffered and use iter(p.stdout.readline, b'') to workaround the "hidden read-ahead buffer" bug for pipes in Python 2:
import os
import sys
from subprocess import Popen, PIPE, STDOUT
with open(os.devnull, 'rb', 0) as DEVNULL:
proc = Popen([sys.executable, '-u', 'bdxBootScript.py'] + mylist,
stdin=DEVNULL, stdout=PIPE, stderr=STDOUT, bufsize=1)
for line in iter(proc.stdout.readline, b''):
sys.stdout.write(line)
sys.stdout.flush()
logfile.write(line)
logfile.flush() # make the line available in the log immediately
proc.stdout.close()
rc = proc.wait()
I have three commands that would otherwise be easily chained together on the command-line like so:
$ echo foo | firstCommand - | secondCommand - | thirdCommand - > finalOutput
In other words, the firstCommand processes foo from standard input and pipes the result to secondCommand, which in turn processes that input and pipes its output to thirdCommand, which does processing and redirects its output to the file finalOutput.
I have been trying to recapitulate this in a Python script, using threading. I'd like to use Python in order to manipulate the output from firstCommand before passing it to secondCommand, and again between secondCommand and thirdCommand.
Here's an excerpt of code that does not seem to work:
first_process = subprocess.Popen(['firstCommand', '-'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
second_process = subprocess.Popen(['secondCommand', '-'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
third_process = subprocess.Popen(['thirdCommand', '-'], stdin=subprocess.PIPE, stdout=sys.stdout)
first_thread = threading.Thread(target=consumeOutputFromStdin, args=(sys.stdin, first_process.stdin))
second_thread = threading.Thread(target=consumeOutputFromFirstCommand, args=(first_process.stdout, second_process.stdin))
third_thread = threading.Thread(target=consumeOutputFromSecondCommand, args=(second_process.stdout, third_process.stdin))
first_thread.start()
second_thread.start()
third_thread.start()
first_thread.join()
second_thread.join()
third_thread.join()
first_process.communicate()
second_process.communicate()
third_process.communicate()
# read 1K chunks from standard input
def consumeOutputFromStdin(from_stream, to_stream):
chunk = from_stream.read(1024)
while chunk:
to_stream.write(chunk)
to_stream.flush()
chunk = from_stream.read(1024)
def consumeOutputFromFirstCommand(from_stream, to_stream):
while True:
unprocessed_line = from_stream.readline()
if not unprocessed_line:
break
processed_line = some_python_function_that_processes_line(unprocessed_line)
to_stream.write(processed_line)
to_stream.flush()
def consumeOutputFromSecondCommand(from_stream, to_stream):
while True:
unprocessed_line = from_stream.readline()
if not unprocessed_line:
break
processed_line = a_different_python_function_that_processes_line(unprocessed_line)
to_stream.write(processed_line)
to_stream.flush()
When I run this, the script hangs:
$ echo foo | ./myConversionScript.py
** hangs here... **
If I hit Ctrl-C to terminate the script, the code is stuck on the line third_thread.join():
C-c C-c
Traceback (most recent call last):
File "./myConversionScript.py", line 786, in <module>
sys.exit(main(*sys.argv))
File "./myConversionScript.py", line 556, in main
third_thread.join()
File "/home/foo/proj/tools/lib/python2.7/threading.py", line 949, in join
self.__block.wait()
File "/home/foo/proj/tools/lib/python2.7/threading.py", line 339, in wait
waiter.acquire()
KeyboardInterrupt
If I don't use a third_process and third_thread, instead only passing data from the output of the first thread to the input of the second thread, there is no hang.
Something about the third thread seems to cause things to break, but I don't know why.
I thought the point of communicate() is that it will handle I/O for the three processes, so I'm not sure why there is an I/O hang.
How do I get three or more commands/processes working together, where one thread consumes the output of another thread/process?
UPDATE
Okay, I made some changes that seem to help, based on some comments here and on other sites. The processes are made to wait() for completion, and within the thread methods, I close() the pipes once the thread has processed all the data that it can. My concern is that memory usage will be very high for large datasets, but at least things are working:
first_process = subprocess.Popen(['firstCommand', '-'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
second_process = subprocess.Popen(['secondCommand', '-'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
third_process = subprocess.Popen(['thirdCommand', '-'], stdin=subprocess.PIPE, stdout=sys.stdout)
first_thread = threading.Thread(target=consumeOutputFromStdin, args=(sys.stdin, first_process.stdin))
second_thread = threading.Thread(target=consumeOutputFromFirstCommand, args=(first_process.stdout, second_process.stdin))
third_thread = threading.Thread(target=consumeOutputFromSecondCommand, args=(second_process.stdout, third_process.stdin))
first_thread.start()
second_thread.start()
third_thread.start()
first_thread.join()
second_thread.join()
third_thread.join()
first_process.wait()
second_process.wait()
third_process.wait()
# read 1K chunks from standard input
def consumeOutputFromStdin(from_stream, to_stream):
chunk = from_stream.read(1024)
while chunk:
to_stream.write(chunk)
to_stream.flush()
chunk = from_stream.read(1024)
def consumeOutputFromFirstCommand(from_stream, to_stream):
while True:
unprocessed_line = from_stream.readline()
if not unprocessed_line:
from_stream.close()
to_stream.close()
break
processed_line = some_python_function_that_processes_line(unprocessed_line)
to_stream.write(processed_line)
to_stream.flush()
def consumeOutputFromSecondCommand(from_stream, to_stream):
while True:
unprocessed_line = from_stream.readline()
if not unprocessed_line:
from_stream.close()
to_stream.close()
break
processed_line = a_different_python_function_that_processes_line(unprocessed_line)
to_stream.write(processed_line)
to_stream.flush()
To emulate:
echo foo |
firstCommand - | somePythonRoutine - |
secondCommand - | anotherPythonRoutine - |
thirdCommand - > finalOutput
your current approach with threads works:
from subprocess import Popen, PIPE
first = Popen(["firstCommand", "-"], stdin=PIPE, stdout=PIPE, bufsize=1)
second = Popen(["secondCommand", "-"], stdin=PIPE, stdout=PIPE, bufsize=1)
bind(first.stdout, second.stdin, somePythonRoutine)
with open("finalOutput", "wb") as file:
third = Popen(["thirdCommand", "-"], stdin=PIPE, stdout=file, bufsize=1)
bind(second.stdout, third.stdin, anotherPythonRoutine)
# provide input for the pipeline
first.stdin.write(b"foo")
first.stdin.close()
# wait for it to complete
pipestatus = [p.wait() for p in [first, second, third]]
where each bind() starts a new thread:
from threading import Thread
def bind(input_pipe, output_pipe, line_filter):
def f():
try:
for line in iter(input_pipe.readline, b''):
line = line_filter(line)
if line:
output_pipe.write(line) # no flush unless newline present
finally:
try:
output_pipe.close()
finally:
input_pipe.close()
t = Thread(target=f)
t.daemon = True # die if the program exits
t.start()
and somePythonRoutine, anotherPythonRoutine accept a single line and return it (possibly modified).
The point of communicate() is that it returns the output of the process. This collides with your pipe setup.
You should only call it once on the third process; all the other ones are connected via pipes and know how to communicate with each other - no other / manual intervention is necessary.
This problem makes me confused
I just want to run 1 command on 18 different input file so I've wrote it like
while filenames or running:
while filenames and len(running) < N_CORES:
filename = filenames.pop(0)
print 'Submiting process for %s' % filename
cmd = COMMAND % dict(filename=filename, localdir=localdir)
p = subprocess.Popen(cmd, shell=True)
print 'running:', cmd
running.append((cmd, p))
i = 0
while i < len(running):
(cmd, p) = running[i]
ret = p.poll()
if ret is not None:
rep = open('Crux.report.%d' % (report_number), 'w')
rep.write('Command: %s' % cmd)
print localdir
print 'done!'
report_number += 1
running.remove((cmd, p))
else:
i += 1
time.sleep(1)
But when I've run it after 3 hours all of the process going to Sleep mode.
But if I call the command from terminal manually (for all of the different files), all of them have been Ok.
Any help would be appreciate.
I assume you want to run 18 processes (one process per file) with no more than N_CORES processes in parallel.
The simplest way could be to use multiprocessing.Pool here:
import multiprocessing as mp
import subprocess
def process_file(filename):
try:
return filename, subprocess.call([cmd, filename], cwd=localdir)
except OSError:
return filename, None # failed to start subprocess
if __name__ == "__main__":
pool = mp.Pool()
for result in pool.imap_unordered(process_file, filenames):
# report result here
Whithout knowing what your subprocesses are supposed to do and how long they are supposed to be running it's hard to give an accurate answer here.
Some problems I see with your program:
you check for i < len(running), while incrementing i and removing from running.
Either use a counter or check if the list still contains elements, but don't do both at the same time. This way you will break out of the loop halfway.
you increment i each time a process has not finished, you probably want to increment if a process has finished.