Change languag of text to speech

Change languag of text to speech - python

I want to change the voice of azure from python, with these characteristics
languageCode = 'es‑MX'
ssmlGender = 'FEMALE'
voicName = 'es‑MX‑DaliaNeural'
but i'm new to azure so i don't know how, this is my code:
import PyPDF2
import azure.cognitiveservices.speech as sdk
key = "fake key"
region = "fake region"
config = sdk.SpeechConfig(subscription=key, region=region)
synthesizer = sdk.SpeechSynthesizer(speech_config=config)
book = open("prueba.pdf", "rb")
reader = PyPDF2.PdfFileReader(book)
for num in range(0,reader.numPages):
text = reader.getPage(num).extractText()
result = synthesizer.speak_text_async(text).get()

Acording to the documentation https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/how-to-speech-synthesis?tabs=browserjs%2Cterminal&pivots=programming-language-python#select-synthesis-language-and-voice you should be able to do:
config.speech_synthesis_language = "es‑MX"
config.speech_synthesis_voice_name ="es-MX-DaliaNeural"
The list of voices is here https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts

Related

How to display text on the screen it is said over the audio

As a personal project, I decided to create one of the reddit text-to-speech bot.
I pulled all the data from reddit with praw
import praw, random
def scrapeData(subredditName):
# Instantiate praw
reddit = praw.Reddit()
# Get subreddit
subreddit = reddit.subreddit(subredditName)
# Get a bunch of posts and convert them into a list
posts = list(subreddit.new(limit=100))
# Get random number
randomNumber = random.randint(0, 100)
# Store post's title and description in variables
postTitle = posts[randomNumber].title
postDesc = posts[randomNumber].selftext
return postTitle + " " + postDesc
Then, I converted it to speech stored in a .mp3 file with gTTS.
from google.cloud import texttospeech
def convertTextToSpeech(textString):
# Instantiate TTS
client = texttospeech.TextToSpeechClient().from_service_account_json("path/to/json")
# Set text input to be synthesized
synthesisInput = texttospeech.SynthesisInput(text=textString)
# Build the voice request
voice = texttospeech.VoiceSelectionParams(language_code = "en-us",
ssml_gender = texttospeech.SsmlVoiceGender.MALE)
# Select the type of audio file
audioConfig = texttospeech.AudioConfig(audio_encoding =
texttospeech.AudioEncoding.MP3)
# Perform the TTS request on the text input
response = client.synthesize_speech(input = synthesisInput, voice =
voice, audio_config= audioConfig)
# Convert from binary to mp3
with open("output.mp3", "wb") as out:
out.write(response.audio_content)
I've created an .mp4 with moviepy that has generic footage in the background with the audio synced over it,
from moviepy.editor import *
from moviepy.video.tools.subtitles import SubtitlesClip
# get vide and audio source files
clip = VideoFileClip("background.mp4").subclip(20,30)
audio = AudioFileClip("output.mp3").subclip(0, 10)
# Set audio and create final video
videoClip = clip.set_audio(audio)
videoClip.write_videofile("output.mp4")
but my issue is I can't find a way to have only the current word or sentence displayed on screen as a subtitle, rather than the entire post.

How to integrate Azure text to speech with streamlit?

I am trying to integrate azure text to speech with streamlit.
import azure.cognitiveservices.speech as speechsdk
import streamlit as st
st.title("Let's learn Math!")
def recognize_from_microphone():
speech_config = speechsdk.SpeechConfig(subscription="743ae1f5555f49f9a5de4457d4e91b2d", region="australiaeast")
speech_config.speech_recognition_language="en-US"
#To recognize speech from an audio file, use `filename` instead of `use_default_microphone`:
#audio_config = speechsdk.audio.AudioConfig(filename="YourAudioFile.wav")
audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
st.text("Speak into your microphone.")
speech_recognition_result = speech_recognizer.recognize_once_async().get()
if speech_recognition_result.reason == speechsdk.ResultReason.RecognizedSpeech:
st.text("Recognized: {}".format(speech_recognition_result.text))
elif speech_recognition_result.reason == speechsdk.ResultReason.NoMatch:
st.text("No speech could be recognized: {}".format(speech_recognition_result.no_match_details))
elif speech_recognition_result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speech_recognition_result.cancellation_details
st.text("Speech Recognition canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
st.text("Error details: {}".format(cancellation_details.error_details))
st.text("Did you set the speech resource key and region values?")
text = st.text_input("Enter text", value="Hi", max_chars=5)
def audio_output(text):
speech_config = speechsdk.SpeechConfig(subscription="743ae1f5555f49f9a5de4457d4e91b2d", region="australiaeast")
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
# The language of the voice that speaks.
speech_config.speech_synthesis_voice_name='en-US-JennyNeural'
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
# Get text from the console and synthesize to the default speaker.
st.write("Enter some text that you want to speak >")
speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
st.write("Speech synthesized for text [{}]".format(text))
elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speech_synthesis_result.cancellation_details
st.write("Speech synthesis canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
if cancellation_details.error_details:
st.write("Error details: {}".format(cancellation_details.error_details))
st.write("Did you set the speech resource key and region values?")
recognize_from_microphone()
audio_output(text)
This is my code, but streamlit is not loading the functions at all. Is there any fix? I am new to streamlit and azure.

You declared the functions but didn't called them.

Google Vision API problem with batch annotations

I wanted to use Cloud Vision API to detect labels from ca. 40K photographs and download the results as CSV files. I uploaded photos into the cloud storage and used the following code, but the error occured. I asked a person who uses python in his job but he cannot deal with this error. Can you help mi with fixing it?
TypeError: Invalid constructor input for BatchAnnotateImagesRequest: [{'image': source {
image_uri: "gs://bucket/image-path.jpg"
}
, 'features': [{'type': <Type.LABEL_DETECTION: 4>}]}]
The code I used:
from google.cloud import
from google.cloud import storage
from google.cloud.vision_v1 import ImageAnnotatorClient
from google.cloud.vision_v1 import types
import os
import json
import numpy as np
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]='C://file-path.json'
#(created in step 1)
# Get GCS bucket
storage_client = storage.Client()
bucket = storage_client.bucket('bucket_name')
image_paths = []
for blob in list(bucket.list_blobs()):
image_paths.append("gs://bucket_name/"+blob.name)
# We can send a maximum of 16 images per request.
start = 0
end = 16
label_output = []
for i in range(int(np.floor(len(image_paths)/16))+1):
requests = []
client = vision.ImageAnnotatorClient()
for image_path in image_paths[start:end]:
image = types.Image()
image.source.image_uri = image_path
requests.append({'image': image,'features': [{'type': vision.Feature.Type.LABEL_DETECTION}]})
response = client.batch_annotate_images(requests)
for image_path, i in zip(image_paths[start:end], response.responses):
labels = [{label.description: label.score} for label in i.label_annotations]
labels = {k: v for d in labels for k, v in d.items()}
filename = os.path.basename(image_path)
l = {'filename': filename, 'labels': labels}
label_output.append(l)
start = start+16
end = end+16
#export results to CSV file
for l in label_output:
print('"' + label_output[l]['filename'] + '";', end = '')
for label in label_output[l]["labels"]:
print('"' + label + '";"' + label_output[l][label] + '";', end = '')
print("")

batch_annotate_images() is not getting the contents of requests properly. To fix this, just assign your variable requests explicitly to the parameter requests of batch_annotate_images().
response = client.batch_annotate_images(requests=requests)
See batch_annotate_images() for reference. Also if you are planning to update your Vision API to 2.3.1, you might encounter errors on features: see this reference for the updated usage of its parameters.

How to set title to a powerpoint slide using win32com client

Could anyone please help me in setting a title to a powerpoint slide using win32com library in Python. The following is the code. I have used the slide layout 11 which denotes Title only
import openpyxl as op
import pptx
import os
import win32com.client
import smtplib
os.chdir(r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT')
path= r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT\Summary2.xlsx'
wb = op.load_workbook(path)
ExcelApp = win32com.client.Dispatch("Excel.Application")
ExcelApp.Visible = False
workbook = ExcelApp.Workbooks.open(r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT\Summary2.xlsx')
worksheet = workbook.Worksheets("Summary")
excelrange = worksheet.Range("A2:R24")
PptApp = win32com.client.Dispatch("Powerpoint.Application")
PptApp.Visible = True
z= excelrange.Copy()
PPtPresentation = PptApp.Presentations.Open(r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT\PBC Performance Update.pptx')
pptSlide = PPtPresentation.Slides.Add(1,11)
#pptSlide.Title.Characters.Text ='Metrics'
#title = pptSlide.Shapes.Title
#title.Text ='Metrics Summary'
pptSlide.Shapes.PasteSpecial(z)
PPtPresentation.Save()

Just a small syntax issue:
PptApp = win32com.client.Dispatch("Powerpoint.Application")
PptApp.Visible = True
z= excelrange.Copy()
PPtPresentation = PptApp.Presentations.Open(r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT\PBC Performance Update.pptx')
pptSlide = PPtPresentation.Slides.Add(1,11)
title = pptSlide.Shapes.Title
title.TextFrame.TextRange.Text = 'My title here'

Page orientation doesn't work in OpenOffice with Python

I am trying to set the pageorientation in an OpenOffice document with python. The following code i use to try this:
import json
from win32com.client import Dispatch as Dispatch
svm = Dispatch("com.sun.star.ServiceManager")
svm._FlagAsMethod("Bridge_GetStruct")
coreflect = svm.createInstance("com.sun.star.reflection.CoreReflection")
desktop = svm.createInstance("com.sun.star.frame.Desktop")
doc = desktop.loadComponentFromURL("private:factory/swriter", "_blank",0, [])
txt = doc.getText()
cur = txt.createTextCursor()
Then i tried two different approaches:
p = doc.getPagePrintSettings()
p[8].Value = True
doc.setPagePrintSettings(p)
and
oStyleFamilies = doc.getStyleFamilies()
oObj1 = oStyleFamilies.getByName("PageStyles")
oObj2 = oObj1.getByName("Default")
oObj2.IsLandscape = True
Both give no error, but the page is still in Portrait.. Anybody has an idea?
Thanks in advance!!

try : oObj2.setPropertyValue("IsLandscape",True)
see http://codesnippets.services.openoffice.org/Calc/Calc.SwitchOrientation.snip

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Change languag of text to speech - python

Related

How to display text on the screen it is said over the audio

How to integrate Azure text to speech with streamlit?

Google Vision API problem with batch annotations

How to set title to a powerpoint slide using win32com client

Page orientation doesn't work in OpenOffice with Python

Categories

Resources