subprocess or multithreading or threading pool - python

I'm going to run the command line utility multiple times in parallel using Python.
I know that multithreading is better to use for I/O operations, multiprocessing - for CPU oriented operations.
But what should I use for parallel subprocess.run?
I also know that I can create a pool from the subprocess module, but how is it different from pools from the multiprocessing and threading modules? And why shouldn't I just put subprocess.run function into multiprocessing or threading pools?
Or maybe there are some criteria when it is better to put a utility run cmd into a pool of threads or processes?
(In my case, I'm going to run the "ffmpeg" utility)

In a situation like this, I tend to run subprocesses from a ThreadPoolExecutor, basically because it's easy.
Example (from here):
from datetime import datetime
from functools import partial
import argparse
import concurrent.futures as cf
import logging
import os
import subprocess as sp
import sys
__version__ = "2021.09.19"
def main():
"""
Entry point for dicom2jpg.
"""
args = setup()
if not args.fn:
logging.error("no files to process")
sys.exit(1)
if args.quality != 80:
logging.info(f"quality set to {args.quality}")
if args.level:
logging.info("applying level correction.")
convert_partial = partial(convert, quality=args.quality, level=args.level)
starttime = str(datetime.now())[:-7]
logging.info(f"started at {starttime}.")
with cf.ThreadPoolExecutor(max_workers=os.cpu_count()) as tp:
for infn, outfn, rv in tp.map(convert_partial, args.fn):
logging.info(f"finished conversion of {infn} to {outfn} (returned {rv})")
endtime = str(datetime.now())[:-7]
logging.info(f"completed at {endtime}.")
def setup():
"""Parse command-line arguments."""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--log",
default="warning",
choices=["debug", "info", "warning", "error"],
help="logging level (defaults to 'warning')",
)
parser.add_argument("-v", "--version", action="version", version=__version__)
parser.add_argument(
"-l",
"--level",
action="store_true",
default=False,
help="Correct color levels (default: no)",
)
parser.add_argument(
"-q", "--quality", type=int, default=80, help="JPEG quailty level (default: 80)"
)
parser.add_argument(
"fn", nargs="*", metavar="filename", help="DICOM files to process"
)
args = parser.parse_args(sys.argv[1:])
logging.basicConfig(
level=getattr(logging, args.log.upper(), None),
format="%(levelname)s: %(message)s",
)
logging.debug(f"command line arguments = {sys.argv}")
logging.debug(f"parsed arguments = {args}")
# Check for requisites
try:
sp.run(["convert"], stdout=sp.DEVNULL, stderr=sp.DEVNULL)
logging.info("found “convert”")
except FileNotFoundError:
logging.error("the program “convert” cannot be found")
sys.exit(1)
return args
def convert(filename, quality, level):
"""
Convert a DICOM file to a JPEG file.
Removing the blank areas from the Philips detector.
Arguments:
filename: name of the file to convert.
quality: JPEG quality to apply
level: Boolean to indicate whether level adustment should be done.
Returns:
Tuple of (input filename, output filename, convert return value)
"""
outname = filename.strip() + ".jpg"
size = "1574x2048"
args = [
"convert",
filename,
"-units",
"PixelsPerInch",
"-density",
"300",
"-depth",
"8",
"-crop",
size + "+232+0",
"-page",
size + "+0+0",
"-auto-gamma",
"-quality",
str(quality),
]
if level:
args += ["-level", "-35%,70%,0.5"]
args.append(outname)
cp = sp.run(args, stdout=sp.DEVNULL, stderr=sp.DEVNULL)
return (filename, outname, cp.returncode)
if __name__ == "__main__":
main()
Alternatively, you can manage a bunch of subprocesses (in the form of Popen objects) directly, as shown below.
(This was older code, now modified for Python 3)
import os
import sys
import subprocess
from multiprocessing import cpu_count
from time import sleep
def checkfor(args):
"""Make sure that a program necessary for using this script is
available.
Arguments:
args -- string or list of strings of commands. A single string may
not contain spaces.
"""
if isinstance(args, str):
if " " in args:
raise ValueError("No spaces in single command allowed.")
args = [args]
try:
with open("/dev/null", "w") as bb:
subprocess.check_call(args, stdout=bb, stderr=bb)
except Exception:
print("Required program '{}' not found! exiting.".format(args[0]))
sys.exit(1)
def startconvert(fname):
"""Use the convert(1) program from the ImageMagick suite to convert the
image and crop it."""
size = "1574x2048"
args = [
"convert",
fname,
"-units",
"PixelsPerInch",
"-density",
"300",
"-crop",
size + "+232+0",
"-page",
size + "+0+0",
fname + ".png",
]
with open("/dev/null") as bb:
p = subprocess.Popen(args, stdout=bb, stderr=bb)
print("Start processing", fname)
return (fname, p)
def manageprocs(proclist):
"""Check a list of subprocesses for processes that have ended and
remove them from the list.
"""
for it in proclist:
fn, pr = it
result = pr.poll()
if result is not None:
proclist.remove(it)
if result == 0:
print("Finished processing", fn)
else:
s = "The conversion of {} exited with error code {}."
print(s.format(fn, result))
sleep(0.5)
def main(argv):
"""Main program.
Keyword arguments:
argv -- command line arguments
"""
if len(argv) == 1:
path, binary = os.path.split(argv[0])
print("Usage: {} [file ...]".format(binary))
sys.exit(0)
del argv[0] # delete the name of the script.
checkfor("convert")
procs = []
maxprocs = cpu_count()
for ifile in argv:
while len(procs) == maxprocs:
manageprocs(procs)
procs.append(startconvert(ifile))
while len(procs) > 0:
manageprocs(procs)
# This is the main program ##
if __name__ == "__main__":
main(sys.argv)

Related

How to use Wave file as input in VOSK speech recognition?

I have a project that needs to get a recorded file and then process by the code and extract the text from file and match the extracted file with the other text and verify it.
my problem is:
I can't use recorded file in code and it does'nt read the file
init function is the fundamental of code.
verify functtion confirm the matched speech and text.
import argparse
import json
import os
import queue
import random
import sys
from difflib import SequenceMatcher
import numpy as np
import sounddevice as sd
import vosk
q = queue.Queue()
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
def callback(indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
if status:
print(status, file=sys.stderr)
q.put(bytes(indata))
def init():
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument(
'-l', '--list-devices', action='store_true',
help='show list of audio devices and exit')
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument(
'-f', '--filename', type=str, metavar='FILENAME',
help='audio file to store recording to')
parser.add_argument(
'-m', '--model', type=str, metavar='MODEL_PATH',
help='Path to the model')
parser.add_argument(
'-d', '--device', type=int_or_str,
help='input device (numeric ID or substring)')
parser.add_argument(
'-r', '--samplerate', type=int, help='sampling rate')
args = parser.parse_args(remaining)
try:
if args.model is None:
args.model = "model"
if not os.path.exists(args.model):
print("Please download a model for your language from https://alphacephei.com/vosk/models")
print("and unpack as 'model' in the current folder.")
parser.exit(0)
if args.samplerate is None:
device_info = sd.query_devices(args.device, 'input')
# soundfile expects an int, sounddevice provides a float:
args.samplerate = int(device_info['default_samplerate'])
model = vosk.Model(args.model)
if args.filename:
dump_fn = open(args.filename, "wb")
else:
dump_fn = None
except KeyboardInterrupt:
print('\nDone')
parser.exit(0)
except Exception as e:
parser.exit(type(e).__name__ + ': ' + str(e))
return model, args
def verify(random_sentence, model, args):
num, T_num, F_num, num_word = 0, 0, 0, 1
with sd.RawInputStream(samplerate=args.samplerate, blocksize=8000, device=args.device, dtype='int16',
channels=1, callback=callback):
rec = vosk.KaldiRecognizer(model, args.samplerate)
print("{}) ".format(num_word), random_sentence, end='\n')
print('=' * 30, end='\n')
run = True
while run:
data = q.get()
if rec.AcceptWaveform(data):
res = json.loads(rec.FinalResult())
res['text'] = res['text'].replace('ي', 'ی')
if SequenceMatcher(None, random_sentence, res['text']).ratio() > 0.65:
T_num, num, num_word += 1
else:
F_num, num, num_word += 1
run = False
print('=' * 30)
print('True Cases : {}\n False Cases : {}'.format(T_num, F_num))
if __name__ == "__main__":
model, args = init()
verify(random_sentences, model, args)
I have been working on a similar project. I modified the code from VOSK Git repo and wrote the following function that takes file name / path as the input and outputs the captured text. Sometimes, when there is a long pause (~seconds) in the audio file, the returned text would be an empty string. To remedy this problem, I had to write additional code that picks out the longest string that was captured. I could make do with this fix.
def get_text_from_voice(filename):
if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)
wf = wave.open(filename, "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
print ("Audio file must be WAV format mono PCM.")
exit (1)
model = Model("model")
rec = KaldiRecognizer(model, wf.getframerate())
rec.SetWords(True)
text_lst =[]
p_text_lst = []
p_str = []
len_p_str = []
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
text_lst.append(rec.Result())
print(rec.Result())
else:
p_text_lst.append(rec.PartialResult())
print(rec.PartialResult())
if len(text_lst) !=0:
jd = json.loads(text_lst[0])
txt_str = jd["text"]
elif len(p_text_lst) !=0:
for i in range(0,len(p_text_lst)):
temp_txt_dict = json.loads(p_text_lst[i])
p_str.append(temp_txt_dict['partial'])
len_p_str = [len(p_str[j]) for j in range(0,len(p_str))]
max_val = max(len_p_str)
indx = len_p_str.index(max_val)
txt_str = p_str[indx]
else:
txt_str =''
return txt_str
Make sure that the correct model is present in the same directory or put in the path to the model. Also, note that VOSK accepts audio files only in wav mono PCM format.

Run interactive Bash with popen and a dedicated TTY Python

I need to run an interactive Bash instance in a separated process in Python with it's own dedicated TTY (I can't use pexpect).
I used this code snippet I commonly see used in similar programs:
master, slave = pty.openpty()
p = subprocess.Popen(["/bin/bash", "-i"], stdin=slave, stdout=slave, stderr=slave)
os.close(slave)
x = os.read(master, 1026)
print x
subprocess.Popen.kill(p)
os.close(master)
But when I run it I get the following output:
$ ./pty_try.py
bash: cannot set terminal process group (10790): Inappropriate ioctl for device
bash: no job control in this shell
Strace of the run shows some errors:
...
readlink("/usr/bin/python2.7", 0x7ffc8db02510, 4096) = -1 EINVAL (Invalid argument)
...
ioctl(3, SNDCTL_TMR_TIMEBASE or SNDRV_TIMER_IOCTL_NEXT_DEVICE or TCGETS, 0x7ffc8db03590) = -1 ENOTTY (Inappropriate ioctl for device)
...
readlink("./pty_try.py", 0x7ffc8db00610, 4096) = -1 EINVAL (Invalid argument)
The code snippet seems pretty straightforward, is Bash not getting something it needs? what could be the problem here?
This is a solution to run an interactive command in subprocess. It uses pseudo-terminal to make stdout non-blocking(also some command needs a tty device, eg. bash). it uses select to handle input and ouput to the subprocess.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import select
import termios
import tty
import pty
from subprocess import Popen
command = 'bash'
# command = 'docker run -it --rm centos /bin/bash'.split()
# save original tty setting then set it to raw mode
old_tty = termios.tcgetattr(sys.stdin)
tty.setraw(sys.stdin.fileno())
# open pseudo-terminal to interact with subprocess
master_fd, slave_fd = pty.openpty()
try:
# use os.setsid() make it run in a new process group, or bash job control will not be enabled
p = Popen(command,
preexec_fn=os.setsid,
stdin=slave_fd,
stdout=slave_fd,
stderr=slave_fd,
universal_newlines=True)
while p.poll() is None:
r, w, e = select.select([sys.stdin, master_fd], [], [])
if sys.stdin in r:
d = os.read(sys.stdin.fileno(), 10240)
os.write(master_fd, d)
elif master_fd in r:
o = os.read(master_fd, 10240)
if o:
os.write(sys.stdout.fileno(), o)
finally:
# restore tty settings back
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_tty)
This is the solution that worked for me at the end (as suggested by qarma) :
libc = ctypes.CDLL('libc.so.6')
master, slave = pty.openpty()
p = subprocess.Popen(["/bin/bash", "-i"], preexec_fn=libc.setsid, stdin=slave, stdout=slave, stderr=slave)
os.close(slave)
... do stuff here ...
x = os.read(master, 1026)
print x
Here is a full object oriented solution to do interactive shell commands with TTYs using threads and queues for stdout and stderr IO handling. This took me a while to build from multiple locations but it works perfectly so far on Unix/Linux systems and also as part of a Juniper op script. Thought I would post this here to save others time in trying to build something like this.
import pty
import re
import select
import threading
from datetime import datetime, timedelta
import os
import logging
import subprocess
import time
from queue import Queue, Empty
lib_logger = logging.getLogger("lib")
# Handler function to be run as a thread for pulling pty channels from an interactive shell
def _pty_handler(pty_master, logger, queue, stop):
poller = select.poll()
poller.register(pty_master, select.POLLIN)
while True:
# Stop handler if flagged
if stop():
logger.debug("Disabling pty handler for interactive shell")
break
fd_event = poller.poll(100)
for descriptor, event in fd_event:
# Read data from pipe and send to queue if there is data to read
if event == select.POLLIN:
data = os.read(descriptor, 1).decode("utf-8")
if not data:
break
# logger.debug("Reading in to handler queue: " + data)
queue.put(data)
# Exit handler if stdout is closing
elif event == select.POLLHUP:
logger.debug("Disabling pty handler for interactive shell")
break
# Function for reading outputs from the given queue by draining it and returning the output
def _get_queue_output(queue: Queue) -> str:
value = ""
try:
while True:
value += queue.get_nowait()
except Empty:
return value
# Helper function to create the needed list for popen and print the command run to the logger
def popen_command(command, logger, *args):
popen_list = list()
popen_list.append(command)
command_output = command
for arg in args:
popen_list.append(arg)
command_output += " " + arg
lib_logger.debug("Making Popen call using: " + str(popen_list))
logger.debug("")
logger.debug(command_output)
logger.debug("")
return popen_list
# Class for create an interactive shell and sending commands to it along with logging output to loggers
class InteractiveShell(object):
def __init__(self, command, logger, *args):
self.logger = logger
self.command = command
self.process = None
self.popen_list = popen_command(command, logger, *args)
self.master_stdout = None
self.slave_stdout = None
self.master_stderr = None
self.slave_stderr = None
self.stdout_handler = None
self.stderr_handler = None
self.stdout_queue = None
self.stderr_queue = None
self.stop_handlers = False
# Open interactive shell and setup all threaded IO handlers
def open(self, shell_prompt, timeout=DEVICE_TIMEOUT):
# Create PTYs
self.master_stdout, self.slave_stdout = pty.openpty()
self.master_stderr, self.slave_stderr = pty.openpty()
# Create shell subprocess
self.process = subprocess.Popen(self.popen_list, stdin=self.slave_stdout, stdout=self.slave_stdout,
stderr=self.slave_stderr, bufsize=0, start_new_session=True)
lib_logger.debug("")
lib_logger.debug("Started interactive shell for command " + self.command)
lib_logger.debug("")
# Create thread and queues for handling pty output and start them
self.stdout_queue = Queue()
self.stderr_queue = Queue()
self.stdout_handler = threading.Thread(target=_pty_handler, args=(self.master_stdout,
lib_logger,
self.stdout_queue,
lambda: self.stop_handlers))
self.stderr_handler = threading.Thread(target=_pty_handler, args=(self.master_stderr,
lib_logger,
self.stderr_queue,
lambda: self.stop_handlers))
self.stdout_handler.daemon = True
self.stderr_handler.daemon = True
lib_logger.debug("Enabling stderr handler for interactive shell " + self.command)
self.stderr_handler.start()
lib_logger.debug("Enabling stdout handler for interactive shell " + self.command)
self.stdout_handler.start()
# Wait for shell prompt
lib_logger.debug("Waiting for shell prompt: " + shell_prompt)
return self.wait_for(shell_prompt, timeout)
# Close interactive shell which should also kill all threaded IO handlers
def close(self):
# Wait 5 seconds before closing to let shell handle all input and outputs
time.sleep(5)
# Stop IO handler threads and terminate the process then wait another 5 seconds for cleanup to happen
self.stop_handlers = True
self.process.terminate()
time.sleep(5)
# Check for any additional output from the stdout handler
output = ""
while True:
data = _get_queue_output(self.stdout_queue)
if data != "":
output += data
else:
break
for line in iter(output.splitlines()):
self.logger.debug(line)
# Check for any additional output from the stderr handler
output = ""
while True:
data = _get_queue_output(self.stderr_queue)
if data != "":
output += data
else:
break
for line in iter(output.splitlines()):
self.logger.error(line)
# Cleanup PTYs
os.close(self.master_stdout)
os.close(self.master_stderr)
os.close(self.slave_stdout)
os.close(self.slave_stderr)
lib_logger.debug("Interactive shell command " + self.command + " terminated")
# Run series of commands given as a list of a list of commands and wait_for strings. If no wait_for is needed then
# only provide the command. Return if all the commands completed successfully or not.
# Ex:
# [
# ["ssh jsas#" + vnf_ip, r"jsas#.*:"],
# ["juniper123", r"jsas#.*\$"],
# ["sudo su", r".*jsas:"],
# ["juniper123", r"root#.*#"],
# ["usermod -p 'blah' jsas"]
# ]
def run_commands(self, commands_list):
shell_status = True
for command in commands_list:
shell_status = self.run(command[0])
if shell_status and len(command) == 2:
shell_status = self.wait_for(command[1])
# Break out of running commands if a command failed
if not shell_status:
break
return shell_status
# Run given command and return False if error occurs otherwise return True
def run(self, command, sleep=0):
# Check process to make sure it is still running and if not grab the stderr output
if self.process.poll():
self.logger.error("Interactive shell command " + self.command + " closed with return code: " +
self.process.returncode)
data = _get_queue_output(self.stderr_queue)
if data != "":
self.logger.error("Interactive shell error messages:")
for line in iter(data.splitlines()):
self.logger.error(line)
return False
# Write command to process and check to make sure a newline is in command otherwise add it
if "\n" not in command:
command += "\n"
os.write(self.master_stdout, command.encode("utf-8"))
if sleep:
time.sleep(sleep)
return True
# Wait for specific regex expression in output before continuing return False if wait time expires otherwise return
# True
def wait_for(self, this, timeout=DEVICE_TIMEOUT):
timeout = datetime.now() + timedelta(seconds=timeout)
output = ""
# Keep searching for output until timeout occurs
while timeout > datetime.now():
data = _get_queue_output(self.stdout_queue)
if data != "":
# Add to output line and check for match to regex given and if match then break and send output to
# logger
output += data
lib_logger.debug("Checking for " + this + " in data: ")
for line in iter(output.splitlines()):
lib_logger.debug(line)
if re.search(r"{}\s?$".format(this), output):
break
time.sleep(1)
# Send output to logger
for line in iter(output.splitlines()):
self.logger.debug(line)
# If wait time expired print error message and return False
if timeout < datetime.now():
self.logger.error("Wait time expired when waiting for " + this)
return False
return True

Python list with Vcenter vm

I found a Python script to list all Vcenter VM attributes, but now I need to register some of attributes into a Python list (or array, dict... ).
But it doesn't works.
My getVminfos.py :
EDIT : the right file :
import argparse
import atexit
import itertools
import unicodedata
import pyVmomi
from pyVmomi import vmodl
from pyVmomi import vim
from pyVim.connect import SmartConnect, Disconnect
def GetArgs():
parser = argparse.ArgumentParser(description='Process args for retrieving all the Virtual Machines')
parser.add_argument('-s', '--host', required=True, action='store',help='Remote host to connect to')
parser.add_argument('-o', '--port', type=int, default=443, action='store',help='Port to connect on')
parser.add_argument('-u', '--user', required=True, action='store',help='User name to use when connecting to host')
parser.add_argument('-p', '--password', required=False, action='store',help='Password to use when connecting to host')
args = parser.parse_args()
return args
def print_vm_info(virtual_machine):
"""
Print information for a particular virtual machine or recurse into a
folder with depth protection
"""
Ansible_Hosts = []
Ansible_Groups = []
Ansible_Names = []
summary = virtual_machine.summary
print("Name : ", summary.config.name)
print("Template : ", summary.config.template)
#print("Path : ", summary.config.vmPathName)
print"Guest : ", str(unicodedata.normalize('NFKD', summary.config.guestFullName))
#print("Instance UUID : ", summary.config.instanceUuid)
#print("Bios UUID : ", summary.config.uuid)
print"State : ", summary.runtime.powerState
if summary.guest is not None:
ip_address = summary.guest.ipAddress
if ip_address:
Ansible_Hosts.append([ip_address])
print "Ansible_Hosts[1:15]", Ansible_Hosts[1:15]
def main():
args = GetArgs()
try:
si = SmartConnect(host=args.host,user=args.user,pwd=args.password,port=int(args.port))
if not si:
print("Could not connect to the specified host using specified "
"username and password")
return -1
atexit.register(Disconnect, si)
content = si.RetrieveContent() # get root folder
container = content.rootFolder # starting point to look into
viewType = [vim.VirtualMachine] # object types to look for
recursive = True # whether we should look into it recursively
containerView = content.viewManager.CreateContainerView(
container, viewType, recursive)
children = containerView.view
for child in children:
print_vm_info(child)
except vmodl.MethodFault as error:
print("Caught vmodl fault : " + error.msg)
return -1
return 0
# Start program
if __name__ == "__main__":
main()
Prints works like a charm, but always my lists (Ansible_Hosts, ...) are empty...
The lists initialization statements (Ansible_Hosts = [] etc.) should go to main()

pysox PTT digital voice recorder

I need help.
I spent at least a day experimenting and failing to produce
this very simple sounding app.
PTT (Push To Talk) means any key starts a recording and any key stops it.
I want to assemble a library of recorded spoken sentences.
Start with:
a choice of format (.wav, .ogg, .mp3, ...)
a directory into which files go
a list of sentences from which filenames are generated
a text prompt to speak a sentence.
Preferred behavior:
Text sentence appears in terminal window
user hits a PTT button
user speaks prompted sentence
user hits a PTT button
file appears in directory.
I anticipate the LLOC should be only a dozen or two lines long.
Here is framing code with one failed experiment:
#!/usr/bin/env python
# This class is here just to support keyboard input.
from sys import (stdin, stdout)
from atexit import (register)
from select import (select)
from termios import (tcgetattr, tcsetattr, ICANON, ECHO, TCSAFLUSH)
class Raw(object):
def __init__(self):
self.fd = stdin.fileno()
self.org = tcgetattr(self.fd)
self.raw = tcgetattr(self.fd)
self.raw[3] = (self.raw[3] & ~ICANON & ~ECHO)
register(self.set_normal_term)
def set_normal_term(self): tcsetattr(self.fd, TCSAFLUSH, self.org)
def set_curses_term(self): tcsetattr(self.fd, TCSAFLUSH, self.raw)
def putch(self, ch): stdout.write(ch)
def getch(self): return stdin.read(1)
def getche(self): ch = getch(); putch(ch); return ch
def kbhit(self): dr,dw,de = select([stdin], [], [], 0); return dr <> []
def __enter__(self): self.set_curses_term(); return self
def __exit__(self, type, value, traceback): self.set_normal_term()
# This is the PTT recorder code.
from pysox import (CSoxStream, CSignalInfo)
def prompt(sentence, filename):
print 'Hit any key to start recording. Hit any key to stop recording.'
with Raw() as raw:
print 'Say: "%s".' % (sentence)
while not raw.kbhit(): pass
raw.getch()
print 'Start'
outfile = CSoxStream(filename,'w', CSignalInfo(48000,2,32))
# Signal is to be collected until kbhit()
while not raw.kbhit(): pass
raw.getch()
print 'Stop'
outfile.close()
# This is the CLI to drive the PTT recorder code.
if __name__ == "__main__":
prefix, ext = ['sentence', 'wav']
sentences = ["hello world", "klatu barada nikto", "fubar" ]
for sentence in sentences:
filename = prefix + '.' + sentence.replace(' ', '.') + "." + ext
prompt(sentence, filename)
The trick is to pipe and then kill the pipe pid when finished.
#!/usr/bin/env python
# This class is here just to support keyboard input.
from atexit import (register)
from select import (select)
from termios import (tcgetattr, tcsetattr, ICANON, ECHO, TCSAFLUSH)
class Raw(object):
def __init__(self):
self.fd = stdin.fileno()
self.org, self.raw = (tcgetattr(self.fd), tcgetattr(self.fd))
self.raw[3] = (self.raw[3] & ~ICANON & ~ECHO)
register(self.set_normal_term)
def set_normal_term(self): tcsetattr(self.fd, TCSAFLUSH, self.org)
def set_curses_term(self): tcsetattr(self.fd, TCSAFLUSH, self.raw)
def getch(self): return stdin.read(1)
def kbhit(self): dr,dw,de = select([stdin], [], [], 0); return dr <> []
def __enter__(self): self.set_curses_term(); return self
def __exit__(self, type, value, traceback): self.set_normal_term()
# This function handles prompts and keywaits
from sys import (stdin, stdout)
def prompt(pre, s, post):
print '%s: "%s". Press any key to %s. \r' % (pre, s, post),
stdout.flush()
with Raw() as raw:
while not raw.kbhit(): pass
raw.getch()
print ' '*79+'\r',
stdout.flush()
# This is the PTT recorder code.
from subprocess import (Popen, PIPE)
from signal import (SIGTERM)
from os import (kill)
def collect(sentence):
filename = prefix + '.' + sentence.replace(' ', '.') + "." + ext
command = 'rec -q -r 16000 -b 16 -c 1 %s trim 0 2:00' % (filename)
try:
prompt('Preparing', sentence, 'start')
pid = Popen(command.split(), stderr=PIPE).pid
prompt('Recording', sentence, 'stop')
kill(pid, SIGTERM)
except:
pass
# This is the CLI to drive the PTT recorder code.
if __name__ == "__main__":
prefix, ext = ['sentence', 'wav']
sentences = ["hello world", "klatu barada nikto", "fubar" ]
print 'The following recordings must take less than 2 minutes of speech.'
for sentence in sentences:
collect(sentence)

how to add non blocking stderr capture to threaded popen's

I've got a python 3 script i use to backup and encrypt mysqldump files and im having a particular issues with one database that is 67gb after encryption & compression.
The mysqldump is outputting errorcode 3, so i'd like to catch the actual error message, as this could mean a couple of things.
The random thing is the backup file is the right size, so not sure what the error means. it worked once on this database...
the code looks like the below and i'd really appreciate some help on how to add non-blocking capture of stderr when the return code is anything but 0 for both p1 and p2.
Also, if im doing anything glaringly obvious wrong, please do let me know, as i'd like to make sure this is a reliable process. it has been working fine on my databases under 15gb compressed.
def dbbackup():
while True:
item = q.get()
#build up folder structure, daily, weekly, monthy & project
genfile = config[item]['DBName'] + '-' + dateyymmdd + '-'
genfile += config[item]['PubKey'] + '.sql.gpg'
if os.path.isfile(genfile):
syslog.syslog(item + ' ' + genfile + ' exists, removing')
os.remove(genfile)
syslog.syslog(item + ' will be backed up as ' + genfile)
args = ['mysqldump', '-u', config[item]['UserNm'],
'-p' + config[item]['Passwd'], '-P', config[item]['Portnu'],
'-h', config[item]['Server']]
args.extend(config[item]['MyParm'].split())
args.append(config[item]['DBName'])
p1 = subprocess.Popen(args, stdout=subprocess.PIPE)
p2 = subprocess.Popen(['gpg', '-o', genfile, '-r',
config[item]['PubKey'], '-z', '9', '--encrypt'], stdin=p1.stdout)
p2.wait()
if p2.returncode == 0:
syslog.syslog(item + ' encryption successful')
else:
syslog.syslog(syslog.LOG_CRIT, item + ' encryption failed '+str(p2.returncode))
p1.terminate()
p1.wait()
if p1.returncode == 0:
#does some uploads of the file etc..
else:
syslog.syslog(syslog.LOG_CRIT, item + ' extract failed '+str(p1.returncode))
q.task_done()
def main():
db2backup = []
for settingtest in config:
db2backup.append(settingtest)
if len(db2backup) >= 1:
syslog.syslog('Backups started')
for database in db2backup:
q.put(database)
syslog.syslog(database + ' added to backup queue')
q.join()
syslog.syslog('Backups finished')
q = queue.Queue()
config = configparser.ConfigParser()
config.read('backup.cfg')
backuptype = 'daily'
dateyymmdd = datetime.datetime.now().strftime('%Y%m%d')
for i in range(2):
t = threading.Thread(target=dbbackup)
t.daemon = True
t.start()
if __name__ == '__main__':
main()
Simplify your code:
avoid unnecessary globals, pass parameters to the corresponding functions instead
avoid reimplementing a thread pool (it hurts readability and it misses convience features accumulated over the years).
The simplest way to capture stderr is to use stderr=PIPE and .communicate() (blocking call):
#!/usr/bin/env python3
from configparser import ConfigParser
from datetime import datetime
from multiprocessing.dummy import Pool
from subprocess import Popen, PIPE
def backup_db(item, conf): # config[item] == conf
"""Run `mysqldump ... | gpg ...` command."""
genfile = '{conf[DBName]}-{now:%Y%m%d}-{conf[PubKey]}.sql.gpg'.format(
conf=conf, now=datetime.now())
# ...
args = ['mysqldump', '-u', conf['UserNm'], ...]
with Popen(['gpg', ...], stdin=PIPE) as gpg, \
Popen(args, stdout=gpg.stdin, stderr=PIPE) as db_dump:
gpg.communicate()
error = db_dump.communicate()[1]
if gpg.returncode or db_dump.returncode:
error
def main():
config = ConfigParser()
with open('backup.cfg') as file: # raise exception if config is unavailable
config.read_file(file)
with Pool(2) as pool:
pool.starmap(backup_db, config.items())
if __name__ == "__main__":
main()
NOTE: no need to call db_dump.terminate() if gpg dies prematurely: mysqldump dies when it tries to write something to the closed gpg.stdin.
If there are huge number of items in the config then you could use pool.imap() instead of pool.starmap() (the call should be modified slightly).
For robustness, wrap backup_db() function to catch and log all exceptions.

Categories

Resources