pyttsx3 makes pauses evere 82 words

pyttsx3 makes pauses evere 82 words - python

I am learning to use pyttsx3 module.
I want to make a continuous speech without any pauses so I deleted all '.' and ',', but pauses are created in what I thought were random places. So dug deeper and figured that it makes a pause every 82 (+/-1 words).
Any idea how to fix it?
Here is my code:
import pyttsx3
with open('D:\D-Chilldom\QuickMovieRecap\Movie.txt', 'r') as f:
text = f.read()
text = text.replace('.', '')
text = text.replace(',', '')
# Initialize the TTS engine
engine = pyttsx3.init()
# Set the rate of speech (words per minute)
rate = 250
engine.setProperty('rate', rate)
# Set the volume of the voice
volume = 1
engine.setProperty('volume', volume)
# Set the voice to use
voice_id = "com.apple.speech.synthesis.voice.samantha"
engine.setProperty('voice', voice_id)
engine.save_to_file(text, 'D:/D-Chilldom/QuickMovieRecap/rec/zzzz.mp3')
engine.runAndWait()

Related

Change languag of text to speech

I want to change the voice of azure from python, with these characteristics
languageCode = 'es‑MX'
ssmlGender = 'FEMALE'
voicName = 'es‑MX‑DaliaNeural'
but i'm new to azure so i don't know how, this is my code:
import PyPDF2
import azure.cognitiveservices.speech as sdk
key = "fake key"
region = "fake region"
config = sdk.SpeechConfig(subscription=key, region=region)
synthesizer = sdk.SpeechSynthesizer(speech_config=config)
book = open("prueba.pdf", "rb")
reader = PyPDF2.PdfFileReader(book)
for num in range(0,reader.numPages):
text = reader.getPage(num).extractText()
result = synthesizer.speak_text_async(text).get()

Acording to the documentation https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/how-to-speech-synthesis?tabs=browserjs%2Cterminal&pivots=programming-language-python#select-synthesis-language-and-voice you should be able to do:
config.speech_synthesis_language = "es‑MX"
config.speech_synthesis_voice_name ="es-MX-DaliaNeural"
The list of voices is here https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts

How to display text on the screen it is said over the audio

As a personal project, I decided to create one of the reddit text-to-speech bot.
I pulled all the data from reddit with praw
import praw, random
def scrapeData(subredditName):
# Instantiate praw
reddit = praw.Reddit()
# Get subreddit
subreddit = reddit.subreddit(subredditName)
# Get a bunch of posts and convert them into a list
posts = list(subreddit.new(limit=100))
# Get random number
randomNumber = random.randint(0, 100)
# Store post's title and description in variables
postTitle = posts[randomNumber].title
postDesc = posts[randomNumber].selftext
return postTitle + " " + postDesc
Then, I converted it to speech stored in a .mp3 file with gTTS.
from google.cloud import texttospeech
def convertTextToSpeech(textString):
# Instantiate TTS
client = texttospeech.TextToSpeechClient().from_service_account_json("path/to/json")
# Set text input to be synthesized
synthesisInput = texttospeech.SynthesisInput(text=textString)
# Build the voice request
voice = texttospeech.VoiceSelectionParams(language_code = "en-us",
ssml_gender = texttospeech.SsmlVoiceGender.MALE)
# Select the type of audio file
audioConfig = texttospeech.AudioConfig(audio_encoding =
texttospeech.AudioEncoding.MP3)
# Perform the TTS request on the text input
response = client.synthesize_speech(input = synthesisInput, voice =
voice, audio_config= audioConfig)
# Convert from binary to mp3
with open("output.mp3", "wb") as out:
out.write(response.audio_content)
I've created an .mp4 with moviepy that has generic footage in the background with the audio synced over it,
from moviepy.editor import *
from moviepy.video.tools.subtitles import SubtitlesClip
# get vide and audio source files
clip = VideoFileClip("background.mp4").subclip(20,30)
audio = AudioFileClip("output.mp3").subclip(0, 10)
# Set audio and create final video
videoClip = clip.set_audio(audio)
videoClip.write_videofile("output.mp4")
but my issue is I can't find a way to have only the current word or sentence displayed on screen as a subtitle, rather than the entire post.

How to integrate Azure text to speech with streamlit?

I am trying to integrate azure text to speech with streamlit.
import azure.cognitiveservices.speech as speechsdk
import streamlit as st
st.title("Let's learn Math!")
def recognize_from_microphone():
speech_config = speechsdk.SpeechConfig(subscription="743ae1f5555f49f9a5de4457d4e91b2d", region="australiaeast")
speech_config.speech_recognition_language="en-US"
#To recognize speech from an audio file, use `filename` instead of `use_default_microphone`:
#audio_config = speechsdk.audio.AudioConfig(filename="YourAudioFile.wav")
audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
st.text("Speak into your microphone.")
speech_recognition_result = speech_recognizer.recognize_once_async().get()
if speech_recognition_result.reason == speechsdk.ResultReason.RecognizedSpeech:
st.text("Recognized: {}".format(speech_recognition_result.text))
elif speech_recognition_result.reason == speechsdk.ResultReason.NoMatch:
st.text("No speech could be recognized: {}".format(speech_recognition_result.no_match_details))
elif speech_recognition_result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speech_recognition_result.cancellation_details
st.text("Speech Recognition canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
st.text("Error details: {}".format(cancellation_details.error_details))
st.text("Did you set the speech resource key and region values?")
text = st.text_input("Enter text", value="Hi", max_chars=5)
def audio_output(text):
speech_config = speechsdk.SpeechConfig(subscription="743ae1f5555f49f9a5de4457d4e91b2d", region="australiaeast")
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
# The language of the voice that speaks.
speech_config.speech_synthesis_voice_name='en-US-JennyNeural'
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
# Get text from the console and synthesize to the default speaker.
st.write("Enter some text that you want to speak >")
speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
st.write("Speech synthesized for text [{}]".format(text))
elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speech_synthesis_result.cancellation_details
st.write("Speech synthesis canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
if cancellation_details.error_details:
st.write("Error details: {}".format(cancellation_details.error_details))
st.write("Did you set the speech resource key and region values?")
recognize_from_microphone()
audio_output(text)
This is my code, but streamlit is not loading the functions at all. Is there any fix? I am new to streamlit and azure.

You declared the functions but didn't called them.

How to change volume of stem files while playing using python

I'm attempting to write a python project that plays multiple parts of a song at the same time.
For background information, a song is split into "stems", and then each stem is played simultaneously to recreate the full song. What I am trying to achieve is using potentiometers to control the volume of each stem, so that the user can mix songs differently. For a product relation, the StemPlayer from Kanye West is what I am trying to achieve.
I can change the volume of the overlayed song at the end, but what I want to do is change the volume of each stem using a potentiometer while the song is playing. Is this even possible using pyDub? Below is the code I have right now.
from pydub import AudioSegment
from pydub.playback import play
vocals = AudioSegment.from_file("walkin_vocals.mp3")
drums = AudioSegment.from_file("walkin_drums.mp3")
bass = AudioSegment.from_file("walkin_bass.mp3")
vocalsDrums = vocals.overlay(drums)
bassVocalsDrums = vocalsDrums.overlay(bass)
songQuiet = bassVocalsDrums - 20
play(songQuiet)

Solved this question, I ended up using pyaudio instead of pydub.
With pyaudio, I was able to define a custom stream_callback function. Within this callback function, I multiply each stem by a modifier, then add each stem to one audio output.
def callback(in_data, frame_count, time_info, status):
global drumsMod, vocalsMod, bassMod, otherMod
drums = drumsWF.readframes(frame_count)
vocals = vocalsWF.readframes(frame_count)
bass = bassWF.readframes(frame_count)
other = otherWF.readframes(frame_count)
decodedDrums = numpy.frombuffer(drums, numpy.int16)
decodedVocals = numpy.frombuffer(vocals, numpy.int16)
decodedBass = numpy.frombuffer(bass, numpy.int16)
decodedOther = numpy.frombuffer(other, numpy.int16)
newdata = (decodedDrums*drumsMod + decodedVocals*vocalsMod + decodedBass*bassMod + decodedOther*otherMod).astype(numpy.int16)
return (newdata.tobytes(), pyaudio.paContinue)

DialoGPT output using pyttsx3?

I want to use DialoGPT to have a conversation with me using a microphone and speaker.
However, before I get there, I want to begin by somehow connecting this chat bot to a speaker using pyttsx3. I am able to properly use pyttsx3 to produce sound out of my computer speakers using pre inserted text (i.e. "hello world").
I am also able to use DialoGPT to have a conversation using the terminal.
Therein lies the first issue of connecting the two. I have pasted my current code and was wondering if someone could offer me some assistance.
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import pyttsx3
engine = pyttsx3.init()
# Set properties before adding
# Things to say
# Sets speed percent
# Can be more than 100
engine.setProperty('rate',190)
# Set volume 0-1
engine.setProperty('volume', 0.7)
# Set ID of voice
voice_id = "com.apple.speech.synthesis.voice.karen"
# Use female voice
engine.setProperty('voice', voice_id)
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
# Let's chat for 5 lines
for step in range(5):
# encode the new user input, add the eos_token and return a tensor in Pytorch
new_user_input_ids = tokenizer.encode(input(">> User:") + tokenizer.eos_token, return_tensors='pt')
# append the new user input tokens to the chat history
bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if step > 0 else new_user_input_ids
# generated a response while limiting the total chat history to 1000 tokens,
chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)
# pretty print last ouput tokens from bot
engine.say("DialoGPT: {}").format(tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True))

Solved~
import pyttsx3
engine = pyttsx3.init()
# Set properties before adding
# Things to say
# Sets speed percent
# Can be more than 100
engine.setProperty('rate',190)
# Set volume 0-1
engine.setProperty('volume', 0.7)
# Set ID of voice
voice_id = "com.apple.speech.synthesis.voice.karen"
# Use female voice
engine.setProperty('voice', voice_id)
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
# Let's chat for 3 lines
for step in range(3):
# encode the new user input, add the eos_token and return a tensor in Pytorch
new_user_input_ids = tokenizer.encode(input("User:") + tokenizer.eos_token, return_tensors='pt')
# append the new user input tokens to the chat history
bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if step > 0 else new_user_input_ids
# generated a response while limiting the total chat history to 1000 tokens,
chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)
# pretty print last ouput tokens from bot
str = ("DialoGPT: {}".format(tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)))
engine.say(tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True))
print(str)
engine.runAndWait()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

pyttsx3 makes pauses evere 82 words - python

Related

Change languag of text to speech

How to display text on the screen it is said over the audio

How to integrate Azure text to speech with streamlit?

How to change volume of stem files while playing using python

DialoGPT output using pyttsx3?

Categories

Resources