The vosk model that I'm using is the vosk-model-en-us-aspire-0.2 (1.4GB). Every time needs quite an amount of time to load the vosk model. Is it necessary to recreate the vosk object every time? It takes much time to load the model if we only load the model once. It can save up at least half of the time.
No it isn't required. In many of the examples they load the model first and then perform transcription. Your software is probably just not written correctly.
https://github.com/alphacep/vosk-server/blob/master/websocket-microphone/asr_server_microphone.py
#!/usr/bin/env python3
import json
import os
import sys
import asyncio
import websockets
import logging
import sounddevice as sd
import argparse
import queue
from vosk import Model, KaldiRecognizer
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
def callback(indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
loop.call_soon_threadsafe(audio_queue.put_nowait, bytes(indata))
async def serve_client(websocket, path):
clients.add(websocket)
print ("Client connected from", websocket)
await websocket.wait_closed()
clients.remove(websocket)
async def recognize_microphone():
global audio_queue
model = Model(args.model)
audio_queue = asyncio.Queue()
with sd.RawInputStream(samplerate=args.samplerate, blocksize = 2000, device=args.device, dtype='int16',
channels=1, callback=callback) as device:
logging.info("Running recognition")
rec = KaldiRecognizer(model, device.samplerate)
while True:
data = await audio_queue.get()
if rec.AcceptWaveform(data):
result = rec.Result()
logging.info(result)
websockets.broadcast(clients, result)
async def main():
global args
global clients
global loop
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('-l', '--list-devices', action='store_true',
help='show list of audio devices and exit')
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(description="ASR Server",
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument('-m', '--model', type=str, metavar='MODEL_PATH',
help='Path to the model', default='model')
parser.add_argument('-i', '--interface', type=str, metavar='INTERFACE',
help='Bind interface', default='0.0.0.0')
parser.add_argument('-p', '--port', type=int, metavar='PORT',
help='Port', default=2700)
parser.add_argument('-d', '--device', type=int_or_str,
help='input device (numeric ID or substring)')
parser.add_argument('-r', '--samplerate', type=int, help='sampling rate', default=16000)
args = parser.parse_args(remaining)
logging.basicConfig(level=logging.INFO)
loop = asyncio.get_running_loop()
clients = set()
logging.info("Listening on %s:%d", args.interface, args.port)
await asyncio.gather(
websockets.serve(serve_client, args.interface, args.port),
recognize_microphone())
if __name__ == '__main__':
asyncio.run(main())
Related
I'm going to run the command line utility multiple times in parallel using Python.
I know that multithreading is better to use for I/O operations, multiprocessing - for CPU oriented operations.
But what should I use for parallel subprocess.run?
I also know that I can create a pool from the subprocess module, but how is it different from pools from the multiprocessing and threading modules? And why shouldn't I just put subprocess.run function into multiprocessing or threading pools?
Or maybe there are some criteria when it is better to put a utility run cmd into a pool of threads or processes?
(In my case, I'm going to run the "ffmpeg" utility)
In a situation like this, I tend to run subprocesses from a ThreadPoolExecutor, basically because it's easy.
Example (from here):
from datetime import datetime
from functools import partial
import argparse
import concurrent.futures as cf
import logging
import os
import subprocess as sp
import sys
__version__ = "2021.09.19"
def main():
"""
Entry point for dicom2jpg.
"""
args = setup()
if not args.fn:
logging.error("no files to process")
sys.exit(1)
if args.quality != 80:
logging.info(f"quality set to {args.quality}")
if args.level:
logging.info("applying level correction.")
convert_partial = partial(convert, quality=args.quality, level=args.level)
starttime = str(datetime.now())[:-7]
logging.info(f"started at {starttime}.")
with cf.ThreadPoolExecutor(max_workers=os.cpu_count()) as tp:
for infn, outfn, rv in tp.map(convert_partial, args.fn):
logging.info(f"finished conversion of {infn} to {outfn} (returned {rv})")
endtime = str(datetime.now())[:-7]
logging.info(f"completed at {endtime}.")
def setup():
"""Parse command-line arguments."""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--log",
default="warning",
choices=["debug", "info", "warning", "error"],
help="logging level (defaults to 'warning')",
)
parser.add_argument("-v", "--version", action="version", version=__version__)
parser.add_argument(
"-l",
"--level",
action="store_true",
default=False,
help="Correct color levels (default: no)",
)
parser.add_argument(
"-q", "--quality", type=int, default=80, help="JPEG quailty level (default: 80)"
)
parser.add_argument(
"fn", nargs="*", metavar="filename", help="DICOM files to process"
)
args = parser.parse_args(sys.argv[1:])
logging.basicConfig(
level=getattr(logging, args.log.upper(), None),
format="%(levelname)s: %(message)s",
)
logging.debug(f"command line arguments = {sys.argv}")
logging.debug(f"parsed arguments = {args}")
# Check for requisites
try:
sp.run(["convert"], stdout=sp.DEVNULL, stderr=sp.DEVNULL)
logging.info("found “convert”")
except FileNotFoundError:
logging.error("the program “convert” cannot be found")
sys.exit(1)
return args
def convert(filename, quality, level):
"""
Convert a DICOM file to a JPEG file.
Removing the blank areas from the Philips detector.
Arguments:
filename: name of the file to convert.
quality: JPEG quality to apply
level: Boolean to indicate whether level adustment should be done.
Returns:
Tuple of (input filename, output filename, convert return value)
"""
outname = filename.strip() + ".jpg"
size = "1574x2048"
args = [
"convert",
filename,
"-units",
"PixelsPerInch",
"-density",
"300",
"-depth",
"8",
"-crop",
size + "+232+0",
"-page",
size + "+0+0",
"-auto-gamma",
"-quality",
str(quality),
]
if level:
args += ["-level", "-35%,70%,0.5"]
args.append(outname)
cp = sp.run(args, stdout=sp.DEVNULL, stderr=sp.DEVNULL)
return (filename, outname, cp.returncode)
if __name__ == "__main__":
main()
Alternatively, you can manage a bunch of subprocesses (in the form of Popen objects) directly, as shown below.
(This was older code, now modified for Python 3)
import os
import sys
import subprocess
from multiprocessing import cpu_count
from time import sleep
def checkfor(args):
"""Make sure that a program necessary for using this script is
available.
Arguments:
args -- string or list of strings of commands. A single string may
not contain spaces.
"""
if isinstance(args, str):
if " " in args:
raise ValueError("No spaces in single command allowed.")
args = [args]
try:
with open("/dev/null", "w") as bb:
subprocess.check_call(args, stdout=bb, stderr=bb)
except Exception:
print("Required program '{}' not found! exiting.".format(args[0]))
sys.exit(1)
def startconvert(fname):
"""Use the convert(1) program from the ImageMagick suite to convert the
image and crop it."""
size = "1574x2048"
args = [
"convert",
fname,
"-units",
"PixelsPerInch",
"-density",
"300",
"-crop",
size + "+232+0",
"-page",
size + "+0+0",
fname + ".png",
]
with open("/dev/null") as bb:
p = subprocess.Popen(args, stdout=bb, stderr=bb)
print("Start processing", fname)
return (fname, p)
def manageprocs(proclist):
"""Check a list of subprocesses for processes that have ended and
remove them from the list.
"""
for it in proclist:
fn, pr = it
result = pr.poll()
if result is not None:
proclist.remove(it)
if result == 0:
print("Finished processing", fn)
else:
s = "The conversion of {} exited with error code {}."
print(s.format(fn, result))
sleep(0.5)
def main(argv):
"""Main program.
Keyword arguments:
argv -- command line arguments
"""
if len(argv) == 1:
path, binary = os.path.split(argv[0])
print("Usage: {} [file ...]".format(binary))
sys.exit(0)
del argv[0] # delete the name of the script.
checkfor("convert")
procs = []
maxprocs = cpu_count()
for ifile in argv:
while len(procs) == maxprocs:
manageprocs(procs)
procs.append(startconvert(ifile))
while len(procs) > 0:
manageprocs(procs)
# This is the main program ##
if __name__ == "__main__":
main(sys.argv)
I am having a hard time passing the arguments as value for my script in python. Here's my code:
import request, json, sys
def main():
url = 'https://jsonplaceholder.typicode.com/posts'
r = requests.get(url)
data = json.loads(r.text)
if len(sys.argv) != 3:
print("Usage must equal [userId] [postId]")
exit()
for user in data:
if user['userId'] == sys.argv[1] and user['id'] == sys.argv[2]:
print('here i am')
print(user)
if __name__ == "__main__":
main()
When I run python -m test 1 1, nothing happens. But it does trigger when I don't have enough arguments or too many.
The problem is that command line arguments are strings and the data you seek are integers. You could convert arg[1] and arg[2] to integers or you could use the argparse module to build a more comprehensive command line parser.
import requests, json, sys, argparse
def main():
parser = argparse.ArgumentParser(description='Do all the things')
parser.add_argument('user_id', type=int,
help='the user id')
parser.add_argument('id', type=int,
help='the other id')
args = parser.parse_args()
url = 'https://jsonplaceholder.typicode.com/posts'
r = requests.get(url)
data = json.loads(r.text)
for user in data:
if user['userId'] == args.user_id and user['id'] == args.id:
print('here i am')
print(user)
if __name__ == "__main__":
main()
I'm attempting to parse IGMPv3 packets with the RawPcapReader in scapy. For some reason unbeknownst to me it's not detecting the IGMP layer:
#!/usr/bin/env python3
import argparse
import os.path
import sys
from scapy.contrib.igmpv3 import IGMPv3
from scapy.utils import RawPcapReader
from scapy.layers.l2 import CookedLinux
from scapy.layers.inet import IP
def pcap_parser(pcap_file):
for (pkt_data, pkt_metadata) in RawPcapReader(pcap_file):
ether_pkt = CookedLinux(pkt_data)
if ether_pkt.proto != 0x800:
# Ignore non-ip packets.
continue
ip_pkt = ether_pkt[IP]
ip_proto = ip_pkt.fields['proto']
if ip_proto == 2:
igmp_pkt = ip_pkt[IGMPv3]
def _command_line_args():
parser = argparse.ArgumentParser()
parser.add_argument('--pcap', metavar='<input pcap file>', help='pcap file to parse', required=True)
args = parser.parse_args()
return args
def main():
args = _command_line_args()
if not os.path.exists(args.pcap):
print(f'Input pcap file "{args.pcap}" does not exist', file=sys.stderr)
sys.exit(-1)
pcap_parser(args.pcap)
if __name__ == '__main__':
main()
I get a IndexError: Layer [IGMPv3] not found error, despite the packets being detected in Wireshark:
I have a project that needs to get a recorded file and then process by the code and extract the text from file and match the extracted file with the other text and verify it.
my problem is:
I can't use recorded file in code and it does'nt read the file
init function is the fundamental of code.
verify functtion confirm the matched speech and text.
import argparse
import json
import os
import queue
import random
import sys
from difflib import SequenceMatcher
import numpy as np
import sounddevice as sd
import vosk
q = queue.Queue()
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
def callback(indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
if status:
print(status, file=sys.stderr)
q.put(bytes(indata))
def init():
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument(
'-l', '--list-devices', action='store_true',
help='show list of audio devices and exit')
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument(
'-f', '--filename', type=str, metavar='FILENAME',
help='audio file to store recording to')
parser.add_argument(
'-m', '--model', type=str, metavar='MODEL_PATH',
help='Path to the model')
parser.add_argument(
'-d', '--device', type=int_or_str,
help='input device (numeric ID or substring)')
parser.add_argument(
'-r', '--samplerate', type=int, help='sampling rate')
args = parser.parse_args(remaining)
try:
if args.model is None:
args.model = "model"
if not os.path.exists(args.model):
print("Please download a model for your language from https://alphacephei.com/vosk/models")
print("and unpack as 'model' in the current folder.")
parser.exit(0)
if args.samplerate is None:
device_info = sd.query_devices(args.device, 'input')
# soundfile expects an int, sounddevice provides a float:
args.samplerate = int(device_info['default_samplerate'])
model = vosk.Model(args.model)
if args.filename:
dump_fn = open(args.filename, "wb")
else:
dump_fn = None
except KeyboardInterrupt:
print('\nDone')
parser.exit(0)
except Exception as e:
parser.exit(type(e).__name__ + ': ' + str(e))
return model, args
def verify(random_sentence, model, args):
num, T_num, F_num, num_word = 0, 0, 0, 1
with sd.RawInputStream(samplerate=args.samplerate, blocksize=8000, device=args.device, dtype='int16',
channels=1, callback=callback):
rec = vosk.KaldiRecognizer(model, args.samplerate)
print("{}) ".format(num_word), random_sentence, end='\n')
print('=' * 30, end='\n')
run = True
while run:
data = q.get()
if rec.AcceptWaveform(data):
res = json.loads(rec.FinalResult())
res['text'] = res['text'].replace('ي', 'ی')
if SequenceMatcher(None, random_sentence, res['text']).ratio() > 0.65:
T_num, num, num_word += 1
else:
F_num, num, num_word += 1
run = False
print('=' * 30)
print('True Cases : {}\n False Cases : {}'.format(T_num, F_num))
if __name__ == "__main__":
model, args = init()
verify(random_sentences, model, args)
I have been working on a similar project. I modified the code from VOSK Git repo and wrote the following function that takes file name / path as the input and outputs the captured text. Sometimes, when there is a long pause (~seconds) in the audio file, the returned text would be an empty string. To remedy this problem, I had to write additional code that picks out the longest string that was captured. I could make do with this fix.
def get_text_from_voice(filename):
if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)
wf = wave.open(filename, "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
print ("Audio file must be WAV format mono PCM.")
exit (1)
model = Model("model")
rec = KaldiRecognizer(model, wf.getframerate())
rec.SetWords(True)
text_lst =[]
p_text_lst = []
p_str = []
len_p_str = []
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
text_lst.append(rec.Result())
print(rec.Result())
else:
p_text_lst.append(rec.PartialResult())
print(rec.PartialResult())
if len(text_lst) !=0:
jd = json.loads(text_lst[0])
txt_str = jd["text"]
elif len(p_text_lst) !=0:
for i in range(0,len(p_text_lst)):
temp_txt_dict = json.loads(p_text_lst[i])
p_str.append(temp_txt_dict['partial'])
len_p_str = [len(p_str[j]) for j in range(0,len(p_str))]
max_val = max(len_p_str)
indx = len_p_str.index(max_val)
txt_str = p_str[indx]
else:
txt_str =''
return txt_str
Make sure that the correct model is present in the same directory or put in the path to the model. Also, note that VOSK accepts audio files only in wav mono PCM format.
I found a Python script to list all Vcenter VM attributes, but now I need to register some of attributes into a Python list (or array, dict... ).
But it doesn't works.
My getVminfos.py :
EDIT : the right file :
import argparse
import atexit
import itertools
import unicodedata
import pyVmomi
from pyVmomi import vmodl
from pyVmomi import vim
from pyVim.connect import SmartConnect, Disconnect
def GetArgs():
parser = argparse.ArgumentParser(description='Process args for retrieving all the Virtual Machines')
parser.add_argument('-s', '--host', required=True, action='store',help='Remote host to connect to')
parser.add_argument('-o', '--port', type=int, default=443, action='store',help='Port to connect on')
parser.add_argument('-u', '--user', required=True, action='store',help='User name to use when connecting to host')
parser.add_argument('-p', '--password', required=False, action='store',help='Password to use when connecting to host')
args = parser.parse_args()
return args
def print_vm_info(virtual_machine):
"""
Print information for a particular virtual machine or recurse into a
folder with depth protection
"""
Ansible_Hosts = []
Ansible_Groups = []
Ansible_Names = []
summary = virtual_machine.summary
print("Name : ", summary.config.name)
print("Template : ", summary.config.template)
#print("Path : ", summary.config.vmPathName)
print"Guest : ", str(unicodedata.normalize('NFKD', summary.config.guestFullName))
#print("Instance UUID : ", summary.config.instanceUuid)
#print("Bios UUID : ", summary.config.uuid)
print"State : ", summary.runtime.powerState
if summary.guest is not None:
ip_address = summary.guest.ipAddress
if ip_address:
Ansible_Hosts.append([ip_address])
print "Ansible_Hosts[1:15]", Ansible_Hosts[1:15]
def main():
args = GetArgs()
try:
si = SmartConnect(host=args.host,user=args.user,pwd=args.password,port=int(args.port))
if not si:
print("Could not connect to the specified host using specified "
"username and password")
return -1
atexit.register(Disconnect, si)
content = si.RetrieveContent() # get root folder
container = content.rootFolder # starting point to look into
viewType = [vim.VirtualMachine] # object types to look for
recursive = True # whether we should look into it recursively
containerView = content.viewManager.CreateContainerView(
container, viewType, recursive)
children = containerView.view
for child in children:
print_vm_info(child)
except vmodl.MethodFault as error:
print("Caught vmodl fault : " + error.msg)
return -1
return 0
# Start program
if __name__ == "__main__":
main()
Prints works like a charm, but always my lists (Ansible_Hosts, ...) are empty...
The lists initialization statements (Ansible_Hosts = [] etc.) should go to main()