Frequently check a file while subprocess is writing to it - python

I have the following piece of code where the c++ executable (run.out) prints out a bunch of info in the runtime using std::cout. This code stores the outputs of run.out into the storage.txt.
storage = open("storage.txt", "w")
shell_cmd = "run.out"
proc = subprocess.Popen([shell_cmd], stdout=storage, stderr=storage)
Once the subprocess starts, I need to frequently check the contents of storage.txt and decide based on what has just been stored in there. How may I do that?

You could use subprocess.poll() which returns immediately and indicates if the subprocess is still running:
while proc.poll() is None:
time.sleep(0.25) # reads the content 4 times a seconds!
data = open("storage.txt").read()
if 'error' in data:
print("failed ...")
# somesomething ...

Related

python checking file changes without reading the full file

I have a web app (in the backend) where I am using pysondb (https://github.com/pysonDB/pysonDB) to upload some tasks which will be executed by another program (sniffer).
The sniffer program (a completely separate program) now checks the database for any new unfinished uploaded tasks in an infinite loop and executes them and updates the database.
I don't want to read the database repeatedly, instead want to look for any file changes in the database file (db.json), then read the database only. I have looked into watchdog but was looking for something lightweight and modern to suit my needs.
# infinite loop
import pysondb
import time
from datetime import datetime
# calling aligner with os.system
import os
import subprocess
from pathlib import Path
while True:
# always alive
time.sleep(2)
try:
# process files
db = pysondb.getDb("../tasks_db.json")
tasks = db.getBy({"task_status": "uploaded"})
for task in tasks:
try:
task_path = task["task_path"]
cost = task["cost"]
corpus_folder = task_path
get_output = subprocess.Popen(f"mfa validate {corpus_folder} english english", shell=True, stdout=subprocess.PIPE).stdout
res = get_output.read().decode("utf-8")
# print(type(res))
if "ERROR - There was an error in the run, please see the log." in res:
# log errors
f = open("sniffer_log.error", "a+")
f.write(f"{datetime.now()} :: {str(res)}\n")
f.close()
else:
align_folder = f"{corpus_folder}_aligned"
Path(align_folder).mkdir(parents=True, exist_ok=True)
o = subprocess.Popen(f"mfa align {corpus_folder} english english {align_folder}", shell=True, stdout=subprocess.PIPE).stdout.read().decode("utf-8")
# success
except subprocess.CalledProcessError:
# mfa align ~/mfa_data/my_corpus english english ~/mfa_data/my_corpus_aligned
# log errors
f = open("sniffer_log.error", "a+")
f.write(f"{datetime.now()} :: Files not in right format\n")
f.close()
except Exception as e:
# log errors
f = open("sniffer_log.error", "a+")
f.write(f"{datetime.now()} :: {e}\n")
f.close()
Using python-rq would be a much more efficient way of doing this that wouldn't need a database. It has no requirements other then needing a redis install. From there, you could just move all of that into a function:
def task(task_path, cost):
corpus_folder = task_path
get_output = subprocess.Popen(f"mfa validate {corpus_folder} english english", shell=True, stdout=subprocess.PIPE).stdout
res = get_output.read().decode("utf-8")
# print(type(res))
if "ERROR - There was an error in the run, please see the log." in res:
# log errors
f = open("sniffer_log.error", "a+")
f.write(f"{datetime.now()} :: {str(res)}\n")
... #etc
Obviously you would rename that function and put the try-except statement back, but then you could just call that through RQ:
# ... where you want to call the function
from wherever.you.put.your.task.function import task
result = your_redis_queue.enqueue(task, "whatever", "arguments)

Display process output incrementally using Python subprocess

I'm trying to run "docker-compose pull" from inside a Python automation script and to incrementally display the same output that Docker command would print if it was run directly from the shell. This command prints a line for each Docker image found in the system, incrementally updates each line with the Docker image's download progress (a percentage) and replaces this percentage with a "done" when the download has completed. I first tried getting the command output with subprocess.poll() and (blocking) readline() calls:
import shlex
import subprocess
def run(command, shell=False):
p = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell)
while True:
# print one output line
output_line = p.stdout.readline().decode('utf8')
error_output_line = p.stderr.readline().decode('utf8')
if output_line:
print(output_line.strip())
if error_output_line:
print(error_output_line.strip())
# check if process finished
return_code = p.poll()
if return_code is not None and output_line == '' and error_output_line == '':
break
if return_code > 0:
print("%s failed, error code %d" % (command, return_code))
run("docker-compose pull")
The code gets stuck in the first (blocking) readline() call. Then I tried to do the same without blocking:
import select
import shlex
import subprocess
import sys
import time
def run(command, shell=False):
p = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell)
io_poller = select.poll()
io_poller.register(p.stdout.fileno(), select.POLLIN)
io_poller.register(p.stderr.fileno(), select.POLLIN)
while True:
# poll IO for output
io_events_list = []
while not io_events_list:
time.sleep(1)
io_events_list = io_poller.poll(0)
# print new output
for event in io_events_list:
# must be tested because non-registered events (eg POLLHUP) can also be returned
if event[1] & select.POLLIN:
if event[0] == p.stdout.fileno():
output_str = p.stdout.read(1).decode('utf8')
print(output_str, end="")
if event[0] == p.stderr.fileno():
error_output_str = p.stderr.read(1).decode('utf8')
print(error_output_str, end="")
# check if process finished
# when subprocess finishes, iopoller.poll(0) returns a list with 2 select.POLLHUP events
# (one for stdout, one for stderr) and does not enter in the inner loop
return_code = p.poll()
if return_code is not None:
break
if return_code > 0:
print("%s failed, error code %d" % (command, return_code))
run("docker-compose pull")
This works, but only the final lines (with "done" at the end) are printed to the screen, when all Docker images downloads have been completed.
Both methods work fine with a command with simpler output such as "ls". Maybe the problem is related with how this Docker command prints incrementally to screen, overwriting already written lines ? Is there a safe way to incrementally show the exact output of a command in the command line when running it via a Python script?
EDIT: 2nd code block was corrected
Always openSTDIN as a pipe, and if you are not using it, close it immediately.
p.stdout.read() will block until the pipe is closed, so your polling code does nothing useful here. It needs modifications.
I suggest not to use shell=True
Instead of *.readline(), try with *.read(1) and wait for "\n"
Of course you can do what you want in Python, the question is how. Because, a child process might have different ideas about how its output should look like, that's when trouble starts. E.g. the process might want explicitly a terminal at the other end, not your process. Or a lot of such simple nonsense. Also, a buffering may also cause problems. You can try starting Python in unbuffered mode to check. (/usr/bin/python -U)
If nothing works, then use pexpect automation library instead of subprocess.
I have found a solution, based on the first code block of my question:
def run(command,shell=False):
p = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell)
while True:
# read one char at a time
output_line = p.stderr.read(1).decode("utf8")
if output_line != "":
print(output_line,end="")
else:
# check if process finished
return_code = p.poll()
if return_code is not None:
if return_code > 0:
raise Exception("Command %s failed" % command)
break
return return_code
Notice that docker-compose uses stderr to print its progress instead of stdout. #Dalen has explained that some applications do it when they want that their results are pipeable somewhere, for instance a file, but also want to be able to show their progress.

Run Python script within Python by using `subprocess.Popen` in real time

I want to run a Python script (or any executable, for that manner) from a python script and get the output in real time. I have followed many tutorials, and my current code looks like this:
import subprocess
with open("test2", "w") as f:
f.write("""import time
print('start')
time.sleep(5)
print('done')""")
process = subprocess.Popen(['python3', "test2"], stdout=subprocess.PIPE)
while True:
output = process.stdout.readline()
if output == '' and process.poll() is not None:
break
if output:
print(output.strip())
rc = process.poll()
The first bit just creates the file that will be run, for clarity's sake.
I have two problems with this code:
It does not give the output in real time. It waits untill the process has finished.
It does not terminate the loop once the process has finished.
Any help would be very welcome.
EDIT: Thanks to #JohnAnderson for the fix to the first problem: replacing if output == '' and process.poll() is not None: with if output == b'' and process.poll() is not None:
Last night I've set out to do this using a pipe:
import os
import subprocess
with open("test2", "w") as f:
f.write("""import time
print('start')
time.sleep(2)
print('done')""")
(readend, writeend) = os.pipe()
p = subprocess.Popen(['python3', '-u', 'test2'], stdout=writeend, bufsize=0)
still_open = True
output = ""
output_buf = os.read(readend, 1).decode()
while output_buf:
print(output_buf, end="")
output += output_buf
if still_open and p.poll() is not None:
os.close(writeend)
still_open = False
output_buf = os.read(readend, 1).decode()
Forcing buffering out of the picture and reading one character at the time (to make sure we do not block writes from the process having filled a buffer), closing the writing end when process finishes to make sure read catches the EOF correctly. Having looked at the subprocess though that turned out to be a bit of an overkill. With PIPE you get most of that for free and I ended with this which seems to work fine (call read as many times as necessary to keep emptying the pipe) with just this and assuming the process finished, you do not have to worry about polling it and/or making sure the write end of the pipe is closed to correctly detect EOF and get out of the loop:
p = subprocess.Popen(['python3', '-u', 'test2'],
stdout=subprocess.PIPE, bufsize=1,
universal_newlines=True)
output = ""
output_buf = p.stdout.readline()
while output_buf:
print(output_buf, end="")
output += output_buf
output_buf = p.stdout.readline()
This is a bit less "real-time" as it is basically line buffered.
Note: I've added -u to you Python call, as you need to also make sure your called process' buffering does not get in the way.

Running into a Multithreading issue connecting to multiple devices at the same time

I am defining the main function with def get_info(). This function doesn't take the arguments. This program uses argumentParser to parse the arguments from the command line. The argument provided is the CSV file with --csv option. This picks up the csv file from the current directory and read the lines each containing an IP address, logs into devices serially and runs few commands return the output and appends in the text file. When the code runs it removes the old text file from the directory and create a new output text file when executed.
Problem: I want to achieve this using threading module so that it takes 5 devices in parallel and outputs to a file. The problem I am running is with the lock issues as the same object is being used by same process at the same time. Here the sample code I have written. The threading concept is very new to me so please understand.
import getpass
import csv
import time
import os
import netmiko
import paramiko
from argparse import ArgumentParser
from multiprocessing import Process, Queue
def get_ip(device_ip,output_q):
try:
ssh_session = netmiko.ConnectHandler(device_type='cisco_ios', ip=device_row['device_ip'],
username=ssh_username, password=ssh_password)
time.sleep(2)
ssh_session.clear_buffer()
except (netmiko.ssh_exception.NetMikoTimeoutException,
netmiko.ssh_exception.NetMikoAuthenticationException,
paramiko.ssh_exception.SSHException) as s_error:
print(s_error)
def main():
show_vlanfile = "pool.txt"
if os.path.isfile(show_vlanfile):
try:
os.remove(show_vlanfile)
except OSError as e:
print("Error: %s - %s." %(e.filename, e.strerror))
parser = ArgumentParser(description='Arguments for running oneLiner.py')
parser.add_argument('-c', '--csv', required=True, action='store', help='Location of CSV file')
args = parser.parse_args()
ssh_username = input("SSH username: ")
ssh_password = getpass.getpass('SSH Password: ')
with open(args.csv, "r") as file:
reader = csv.DictReader(file)
output_q = Queue(maxsize=5)
procs = []
for device_row in reader:
# print("+++++ {0} +++++".format(device_row['device_ip']))
my_proc = Process(target=show_version_queue, args=(device_row, output_q))
my_proc.start()
procs.append(my_proc)
# Make sure all processes have finished
for a_proc in procs:
a_proc.join()
commands = ["terminal length 0","terminal width 511","show run | inc hostname","show ip int brief | ex una","show
vlan brief","terminal length 70"]
output = ''
for cmd in commands:
output += "\n"
output += ssh_session.send_command(cmd)
output += "\n"
with open("pool.txt", 'a') as outputfile:
while not output_q.empty():
output_queue = output_q.get()
for x in output_queue:
outputfile.write(x)
if name == "main":
main()
Somewhat different take...
I run effectively a main task, and then just fire up a (limited) number of threads; and they communicate via 2 data queues - basically "requests" and "responses".
Main task
dumps the requests into the request queue.
fires up a number (i.e. 10 or so...) worker tasks.
sits on the "response" queue waiting for results. The results can be simple user info messages about status, error messages, or DATA responses to be written out to files.
When all the threads finish, program shuts down.
Workers basically:
get a request. If none, just shut down
connect to the device
send a log message to the response queue that it's started.
does what it has to do.
puts the result as DATA to the response queue
closes the connection to the device
loop back to the start
This way you don't inadvertently flood the processing host, as you have a limited number of concurrent threads going, all doing exactly the same thing in their own sweet time, until there's nothing left to do.
Note that you DON'T do any screen/file IO in the threads, as it will get jumbled with the different tasks running at the same time. Each essentially only sees inputQ, outputQ, and the Netmiko sessions that get cycled through.
It looks like you have code that is from a Django example:
def main():
'''
Use threads and Netmiko to connect to each of the devices in the database.
'''
devices = NetworkDevice.objects.all()
You should move the argument parsing into the main thread. You should read the CSV file in the main thread. You should have each child thread be the Netmiko-SSH connection.
I say this as your current solution--has all of the SSH connections happen in one thread which is not what you intended.
At a high-level main() should have argument parsing, delete old output file, obtain username/password (assuming these are the same for all the devices), loop over CSV file obtaining the IP address for each device.
Once you have the IP address, then you create a thread, the thread uses Netmiko-SSH to connect to each device and retrieve your output. I would then use a Queue to pass back the output from each device (back to the main thread).
Then back in the main thread, you would write all of the output to a single file.
It would look a bit like this:
https://github.com/ktbyers/netmiko/blob/develop/examples/use_cases/case16_concurrency/threads_netmiko.py
Here is an example using a queue (with multiprocessing) though you can probably adapt this using a thread-Queue pretty easily.
https://github.com/ktbyers/netmiko/blob/develop/examples/use_cases/case16_concurrency/processes_netmiko_queue.py

Checking process status from Python

I am running on a Linux x86-64 system. From a Python (2.6) script, I wish to periodically check whether a given process (identified by pid) has become "defunct"/zombie (this means that entry in the process table exists but the process is doing nothing). It would be also good to know how much CPU the process is consuming (similar to what 'top' command shows).
Can somebody give me some pointers on how I can get these in Python?
I'd use the psutil library:
import psutil
proc = psutil.Process(pid)
if proc.status() == psutil.STATUS_ZOMBIE:
# Zombie process!
you can get top result in python as below:
linux:
import sys, os
f = os.popen("top -p 1 -n 1", "r")
text = f.read()
print text
update
windows:
from os import popen
from sys import stdin
ps = popen("C:/WINDOWS/system32/tasklist.exe","r")
pp = ps.readlines()
ps.close()
# wow, look at the robust parser!
pp.pop(0) # blank line
ph = pp.pop(0) # header line
pp.pop(0) # ===
print ("%d processes reported." % len(pp))
print ("First process in list:")
print (pp[0])
stdin.readline()

Categories

Resources