text_generator.py "Access is Denied"

text_generator.py "Access is Denied" - python

I'm having trouble with this code. For some reason whenever I use the test client, I get this message.
import stdio
import sys
from markov_model import MarkovModel
def main():
k = int(sys.argv[1])
T = int(sys.argv[2])
# text from standard input initialized to variable
text = sys.stdin.read()
# slices text and initializes it to kgram
kgram = text[:k]
model = markov_model(text, k)
# model is used to generate random length of text
stdio.writeln(model.gen(kgram, T))
if __name__ == '__main__':
main()
text_generator.py 5 50 < data / bible.txt
Access is denied.

Related

Computer crashing when using python tools in same script

I am attempting to use the speech recognition toolkit VOSK and the speech diarization package Resemblyzer to transcibe audio and then identify the speakers in the audio.
Tools:
https://github.com/alphacep/vosk-api
https://github.com/resemble-ai/Resemblyzer
I can do both things individually but run into issues when trying to do them when running the one python script.
I used the following guide when setting up the diarization system:
https://medium.com/saarthi-ai/who-spoke-when-build-your-own-speaker-diarization-module-from-scratch-e7d725ee279
Computer specs are as follows:
Intel(R) Core(TM) i3-7100 CPU # 3.90GHz, 3912 Mhz, 2 Core(s), 4 Logical Processor(s)
32GB RAM
The following is my code, I am not to sure if using threading is appropriate or if I even implemented it correctly, how can I best optimize this code as to achieve the results I am looking for and not crash.
from vosk import Model, KaldiRecognizer
from pydub import AudioSegment
import json
import sys
import os
import subprocess
import datetime
from resemblyzer import preprocess_wav, VoiceEncoder
from pathlib import Path
from resemblyzer.hparams import sampling_rate
from spectralcluster import SpectralClusterer
import threading
import queue
import gc
def recognition(queue, audio, FRAME_RATE):
model = Model("Vosk_Models/vosk-model-small-en-us-0.15")
rec = KaldiRecognizer(model, FRAME_RATE)
rec.SetWords(True)
rec.AcceptWaveform(audio.raw_data)
result = rec.Result()
transcript = json.loads(result)#["text"]
#return transcript
queue.put(transcript)
def diarization(queue, audio):
wav = preprocess_wav(audio)
encoder = VoiceEncoder("cpu")
_, cont_embeds, wav_splits = encoder.embed_utterance(wav, return_partials=True, rate=16)
print(cont_embeds.shape)
clusterer = SpectralClusterer(
min_clusters=2,
max_clusters=100,
p_percentile=0.90,
gaussian_blur_sigma=1)
labels = clusterer.predict(cont_embeds)
def create_labelling(labels, wav_splits):
times = [((s.start + s.stop) / 2) / sampling_rate for s in wav_splits]
labelling = []
start_time = 0
for i, time in enumerate(times):
if i > 0 and labels[i] != labels[i - 1]:
temp = [str(labels[i - 1]), start_time, time]
labelling.append(tuple(temp))
start_time = time
if i == len(times) - 1:
temp = [str(labels[i]), start_time, time]
labelling.append(tuple(temp))
return labelling
#return
labelling = create_labelling(labels, wav_splits)
queue.put(labelling)
def identify_speaker(queue1, queue2):
transcript = queue1.get()
labelling = queue2.get()
for speaker in labelling:
speakerID = speaker[0]
speakerStart = speaker[1]
speakerEnd = speaker[2]
result = transcript['result']
words = [r['word'] for r in result if speakerStart < r['start'] < speakerEnd]
#return
print("Speaker",speakerID,":",' '.join(words), "\n")
def main():
queue1 = queue.Queue()
queue2 = queue.Queue()
FRAME_RATE = 16000
CHANNELS = 1
podcast = AudioSegment.from_mp3("Podcast_Audio/Film-Release-Clip.mp3")
podcast = podcast.set_channels(CHANNELS)
podcast = podcast.set_frame_rate(FRAME_RATE)
first_thread = threading.Thread(target=recognition, args=(queue1, podcast, FRAME_RATE))
second_thread = threading.Thread(target=diarization, args=(queue2, podcast))
third_thread = threading.Thread(target=identify_speaker, args=(queue1, queue2))
first_thread.start()
first_thread.join()
gc.collect()
second_thread.start()
second_thread.join()
gc.collect()
third_thread.start()
third_thread.join()
gc.collect()
# transcript = recognition(podcast,FRAME_RATE)
#
# labelling = diarization(podcast)
#
# print(identify_speaker(transcript, labelling))
if __name__ == '__main__':
main()
When I say crash I mean everything freezes, I have to hold down the power button on the desktop and turn it back on again. No blue/blank screen, just frozen in my IDE looking at my code. Any help in resolving this issue would be greatly appreciated.

Pydubs AudioSegment was not returning a suitable type for the Resembylzer function preprocess_wav.
podcast = AudioSegment.from_mp3("Podcast_Audio/Film-Release-Clip.mp3")
preprocess_wav instead requires a Numpy Array / Path.
audio_file_path = 'Podcast_Audio/WAV-Film-Release-Clip.wav'
wav_fpath = Path(audio_file_path)
wav = preprocess_wav(wav_fpath)
Additionally preprocess_wav functionality can be achieved using Librosa if desired.
import librosa
def preprocess_wav(waveform, sr):
waveform = librosa.resample(waveform, orig_sr=sr, target_sr=16000)
waveform = waveform.astype(np.float32) / np.max(np.abs(waveform))
return waveform
waveform, sr = librosa.load('Podcast_Audio/WAV-Film-Release-Clip.wav')
wav = preprocess_wav(waveform, sr)

iterating intergers over a method from a range

Im trying to create a list of CommandLinks in Revit Dynamo using python with a forloop that checks the length of the inputs list and creates an array of Commandlinks from a range generated from that list. Is it possible to insert the integer into the CommandLink method using a forloop?
`
import clr
import sys
import System
clr.AddReference("System.Windows.Forms")
from System.Windows.Forms import Clipboard
# import Revit API
clr.AddReference("RevitAPI")
import Autodesk
from Autodesk.Revit.DB import *
clr.AddReference("RevitAPIUI")
from Autodesk.Revit.UI import (TaskDialog, TaskDialogCommonButtons,
TaskDialogCommandLinkId, TaskDialogResult)
title = IN[0]
buttonlists = IN[1]
resultslist = IN[2]
dialog = TaskDialog(title)
buttonNum = len(resultslist)
# Properties
dialog.MainInstruction = title
# dialog.ExpandedContent = expanded_content
# Settings and buttons
dialog.TitleAutoPrefix = False
dialog.AllowCancellation = True
dialog.CommonButtons = TaskDialogCommonButtons.Cancel
dialog.DefaultButton = TaskDialogResult.Cancel
# Add Command Link
for n in range(buttonNum+1):
dialog.AddCommandLink(TaskDialogCommandLinkId.CommandLink+(n), buttontext+(n))
result = dialog.Show()
if result == TaskDialogResult.Cancel:
OUT = 'Dialog was Cancelled'
if result == TaskDialogResult.CommandLink(n):
OUT = result(n)
`
enter image description here
I need to iterate (n) into the commandlink numbers and results, so it creates commandlinks based on the length of the input list.

ImportError: cannot import name 'BaseSSLError' from 'urllib3.connection'

I am trying to use mllfow library to log the variables. The code is very simple and basic. After running my script, I am receiving an error like cannot import name 'BaseSSLError' from 'urllib3.connection'. Please refer below code.
import mlflow
def calculate_nthpower(x, n):
return x**n
if __name__ == '__main__':
# context manager
with mlflow.start_run():
x, n = 2, 5
y = calculate_nthpower(x, n)
mlflow.log_param('x', x)
mlflow.log_param('n', n)
mlflow.log_metric("y", y)

How to delete X% of entries from a DynamoDB Table?

I want to remove 10% of entries from a DDB table every time a script is ran. So far, I have created a Python script using boto3 that will delete all items from a DDB table:
import boto3
import sys
src_region = sys.argv[1]
src_profile_name = sys.argv[2]
src_ddb_table = sys.argv[3]
# Create source session.
src_session = boto3.session.Session(profile_name=src_profile_name)
dynamoclient = src_session.client('dynamodb', region_name=src_region)
dynamoresponse = dynamoclient.get_paginator('scan').paginate(
TableName=src_ddb_table,
Select='ALL_ATTRIBUTES',
ReturnConsumedCapacity='NONE',
ConsistentRead=True
)
for page in dynamoresponse:
for item in page['Items']:
dynamoclient.delete_item(
Key={'testTableDest': item['testTableDest']},
TableName=src_ddb_table)
How can I modify this script to allow the user to select a percentage of entries they want to delete?
Thank you for any help!

If you want to delete them at random and are ok with a non-exact percentage you can do this easily with the python random package.
import boto3
import sys
import random
src_region = sys.argv[1]
src_profile_name = sys.argv[2]
src_ddb_table = sys.argv[3]
percent_delete = int(sys.argv[4]) # 20
# Create source session.
src_session = boto3.session.Session(profile_name=src_profile_name)
dynamoclient = src_session.client('dynamodb', region_name=src_region)
dynamoresponse = dynamoclient.get_paginator('scan').paginate(
TableName=src_ddb_table,
Select='ALL_ATTRIBUTES',
ReturnConsumedCapacity='NONE',
ConsistentRead=True
)
for page in dynamoresponse:
for item in page['Items']:
if random.random() * 100 < percent_delete:
dynamoclient.delete_item(
Key={'testTableDest': item['testTableDest']},
TableName=src_ddb_table)
This isn't "perfectly random" but will suffice.

Python : ValueError: could not convert string to float in real time data

I want to build an ecg. the filter is built in udoo, then I want to plot the signal in python. however it keeps getting this while I run my code:
ValueError: could not convert string to float.
import serial
import sys
import time
from pyqtgraph.Qt import QtGui, QtCore
import numpy as np
import pyqtgraph as pg
# constants
BAUDE_RATE = 9600
ARDUINO_MAX_INT = 2 ** 10
ARDUINO_MAX_VOLTAGE = 3.3
WINDOW_SIZE = 30
MAX_DATA_SIZE = 1024
# declare the Window
app = QtGui.QApplication([])
win = pg.GraphicsWindow(title="Arduino Analog Plotter")
win.resize(1000, 600)
# initialize plots
raw_plot = win.addPlot(title="Raw Pin Data")
raw_curve = raw_plot.plot(pen='y')
raw_plot.addLegend()
raw_plot.showGrid(True, True)
raw_plot.setYRange(0, 1200)
raw_plot.setXRange(0, 1024)
# disable auto size of the x-y axis
raw_plot.enableAutoRange('xy', False)
raw_data = np.zeros(1024)
# open serial
ser = serial.Serial('COM10', 115200, timeout=1)
line = pg.InfiniteLine(pos=1024, angle=0, pen=(24, 215, 248))
raw_plot.addItem(line)
ser.flushInput()
def gettemp(ser):
ser.write('t')
ser.flush()
return ser.readline().strip('\r').strip('\n').split(' ').pop(7)
def update():
global raw_data
# open serial port
raw_capture = []
for x in range(WINDOW_SIZE):
sensoroutput=gettemp()
r=sensoroutput
ser.readline().strip('\r').strip('\n').split(' ').pop(7)
raw_capture.append(float(r).pop(7))
raw_data = np.concatenate([raw_data, raw_capture])
# remove first bin to make room for new bin
if len(raw_data) > MAX_DATA_SIZE:
raw_data = raw_data[WINDOW_SIZE:]
# plot data
raw_curve.setData(raw_data)
def savecounter():
ser.close()
import atexit
atexit.register(savecounter)
timer = QtCore.QTimer()
timer.timeout.connect(update)
timer.start(0)
## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
QtGui.QApplication.instance().exec_()
Does anybody know how to fix this?

Your sensoroutput seems to be a string.
You cannot convert it directly with float() :
raw_capture.append(float(r).pop(7))
Can you post what is the output of sensoroutput?

I'm just taking a wild stab here, but usually if you have both \r and \n for line endings the \r comes first and the \n comes second. The way you're stripping off those characters, the \r will remain because you try to strip it first, before the \n has been removed; float() will fail on the non-numeric character in the string. Try this instead, it will remove both end-of-line characters at the same time:
ser.readline().strip('\r\n').split(' ').pop(7)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

text_generator.py "Access is Denied" - python

Related

Computer crashing when using python tools in same script

iterating intergers over a method from a range

ImportError: cannot import name 'BaseSSLError' from 'urllib3.connection'

How to delete X% of entries from a DynamoDB Table?

Python : ValueError: could not convert string to float in real time data

Categories

Resources