Change languag of text to speech - python

I want to change the voice of azure from python, with these characteristics
languageCode = 'es‑MX'
ssmlGender = 'FEMALE'
voicName = 'es‑MX‑DaliaNeural'
but i'm new to azure so i don't know how, this is my code:
import PyPDF2
import azure.cognitiveservices.speech as sdk
key = "fake key"
region = "fake region"
config = sdk.SpeechConfig(subscription=key, region=region)
synthesizer = sdk.SpeechSynthesizer(speech_config=config)
book = open("prueba.pdf", "rb")
reader = PyPDF2.PdfFileReader(book)
for num in range(0,reader.numPages):
text = reader.getPage(num).extractText()
result = synthesizer.speak_text_async(text).get()

Acording to the documentation https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/how-to-speech-synthesis?tabs=browserjs%2Cterminal&pivots=programming-language-python#select-synthesis-language-and-voice you should be able to do:
config.speech_synthesis_language = "es‑MX"
config.speech_synthesis_voice_name ="es-MX-DaliaNeural"
The list of voices is here https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts

Related

How to display text on the screen it is said over the audio

As a personal project, I decided to create one of the reddit text-to-speech bot.
I pulled all the data from reddit with praw
import praw, random
def scrapeData(subredditName):
# Instantiate praw
reddit = praw.Reddit()
# Get subreddit
subreddit = reddit.subreddit(subredditName)
# Get a bunch of posts and convert them into a list
posts = list(subreddit.new(limit=100))
# Get random number
randomNumber = random.randint(0, 100)
# Store post's title and description in variables
postTitle = posts[randomNumber].title
postDesc = posts[randomNumber].selftext
return postTitle + " " + postDesc
Then, I converted it to speech stored in a .mp3 file with gTTS.
from google.cloud import texttospeech
def convertTextToSpeech(textString):
# Instantiate TTS
client = texttospeech.TextToSpeechClient().from_service_account_json("path/to/json")
# Set text input to be synthesized
synthesisInput = texttospeech.SynthesisInput(text=textString)
# Build the voice request
voice = texttospeech.VoiceSelectionParams(language_code = "en-us",
ssml_gender = texttospeech.SsmlVoiceGender.MALE)
# Select the type of audio file
audioConfig = texttospeech.AudioConfig(audio_encoding =
texttospeech.AudioEncoding.MP3)
# Perform the TTS request on the text input
response = client.synthesize_speech(input = synthesisInput, voice =
voice, audio_config= audioConfig)
# Convert from binary to mp3
with open("output.mp3", "wb") as out:
out.write(response.audio_content)
I've created an .mp4 with moviepy that has generic footage in the background with the audio synced over it,
from moviepy.editor import *
from moviepy.video.tools.subtitles import SubtitlesClip
# get vide and audio source files
clip = VideoFileClip("background.mp4").subclip(20,30)
audio = AudioFileClip("output.mp3").subclip(0, 10)
# Set audio and create final video
videoClip = clip.set_audio(audio)
videoClip.write_videofile("output.mp4")
but my issue is I can't find a way to have only the current word or sentence displayed on screen as a subtitle, rather than the entire post.

How to integrate Azure text to speech with streamlit?

I am trying to integrate azure text to speech with streamlit.
import azure.cognitiveservices.speech as speechsdk
import streamlit as st
st.title("Let's learn Math!")
def recognize_from_microphone():
speech_config = speechsdk.SpeechConfig(subscription="743ae1f5555f49f9a5de4457d4e91b2d", region="australiaeast")
speech_config.speech_recognition_language="en-US"
#To recognize speech from an audio file, use `filename` instead of `use_default_microphone`:
#audio_config = speechsdk.audio.AudioConfig(filename="YourAudioFile.wav")
audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
st.text("Speak into your microphone.")
speech_recognition_result = speech_recognizer.recognize_once_async().get()
if speech_recognition_result.reason == speechsdk.ResultReason.RecognizedSpeech:
st.text("Recognized: {}".format(speech_recognition_result.text))
elif speech_recognition_result.reason == speechsdk.ResultReason.NoMatch:
st.text("No speech could be recognized: {}".format(speech_recognition_result.no_match_details))
elif speech_recognition_result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speech_recognition_result.cancellation_details
st.text("Speech Recognition canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
st.text("Error details: {}".format(cancellation_details.error_details))
st.text("Did you set the speech resource key and region values?")
text = st.text_input("Enter text", value="Hi", max_chars=5)
def audio_output(text):
speech_config = speechsdk.SpeechConfig(subscription="743ae1f5555f49f9a5de4457d4e91b2d", region="australiaeast")
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
# The language of the voice that speaks.
speech_config.speech_synthesis_voice_name='en-US-JennyNeural'
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
# Get text from the console and synthesize to the default speaker.
st.write("Enter some text that you want to speak >")
speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
st.write("Speech synthesized for text [{}]".format(text))
elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speech_synthesis_result.cancellation_details
st.write("Speech synthesis canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
if cancellation_details.error_details:
st.write("Error details: {}".format(cancellation_details.error_details))
st.write("Did you set the speech resource key and region values?")
recognize_from_microphone()
audio_output(text)
This is my code, but streamlit is not loading the functions at all. Is there any fix? I am new to streamlit and azure.
You declared the functions but didn't called them.

Google Vision API problem with batch annotations

I wanted to use Cloud Vision API to detect labels from ca. 40K photographs and download the results as CSV files. I uploaded photos into the cloud storage and used the following code, but the error occured. I asked a person who uses python in his job but he cannot deal with this error. Can you help mi with fixing it?
TypeError: Invalid constructor input for BatchAnnotateImagesRequest: [{'image': source {
image_uri: "gs://bucket/image-path.jpg"
}
, 'features': [{'type': <Type.LABEL_DETECTION: 4>}]}]
The code I used:
from google.cloud import
from google.cloud import storage
from google.cloud.vision_v1 import ImageAnnotatorClient
from google.cloud.vision_v1 import types
import os
import json
import numpy as np
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]='C://file-path.json'
#(created in step 1)
# Get GCS bucket
storage_client = storage.Client()
bucket = storage_client.bucket('bucket_name')
image_paths = []
for blob in list(bucket.list_blobs()):
image_paths.append("gs://bucket_name/"+blob.name)
# We can send a maximum of 16 images per request.
start = 0
end = 16
label_output = []
for i in range(int(np.floor(len(image_paths)/16))+1):
requests = []
client = vision.ImageAnnotatorClient()
for image_path in image_paths[start:end]:
image = types.Image()
image.source.image_uri = image_path
requests.append({'image': image,'features': [{'type': vision.Feature.Type.LABEL_DETECTION}]})
response = client.batch_annotate_images(requests)
for image_path, i in zip(image_paths[start:end], response.responses):
labels = [{label.description: label.score} for label in i.label_annotations]
labels = {k: v for d in labels for k, v in d.items()}
filename = os.path.basename(image_path)
l = {'filename': filename, 'labels': labels}
label_output.append(l)
start = start+16
end = end+16
#export results to CSV file
for l in label_output:
print('"' + label_output[l]['filename'] + '";', end = '')
for label in label_output[l]["labels"]:
print('"' + label + '";"' + label_output[l][label] + '";', end = '')
print("")
batch_annotate_images() is not getting the contents of requests properly. To fix this, just assign your variable requests explicitly to the parameter requests of batch_annotate_images().
response = client.batch_annotate_images(requests=requests)
See batch_annotate_images() for reference. Also if you are planning to update your Vision API to 2.3.1, you might encounter errors on features: see this reference for the updated usage of its parameters.

How to set title to a powerpoint slide using win32com client

Could anyone please help me in setting a title to a powerpoint slide using win32com library in Python. The following is the code. I have used the slide layout 11 which denotes Title only
import openpyxl as op
import pptx
import os
import win32com.client
import smtplib
os.chdir(r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT')
path= r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT\Summary2.xlsx'
wb = op.load_workbook(path)
ExcelApp = win32com.client.Dispatch("Excel.Application")
ExcelApp.Visible = False
workbook = ExcelApp.Workbooks.open(r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT\Summary2.xlsx')
worksheet = workbook.Worksheets("Summary")
excelrange = worksheet.Range("A2:R24")
PptApp = win32com.client.Dispatch("Powerpoint.Application")
PptApp.Visible = True
z= excelrange.Copy()
PPtPresentation = PptApp.Presentations.Open(r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT\PBC Performance Update.pptx')
pptSlide = PPtPresentation.Slides.Add(1,11)
#pptSlide.Title.Characters.Text ='Metrics'
#title = pptSlide.Shapes.Title
#title.Text ='Metrics Summary'
pptSlide.Shapes.PasteSpecial(z)
PPtPresentation.Save()
Just a small syntax issue:
PptApp = win32com.client.Dispatch("Powerpoint.Application")
PptApp.Visible = True
z= excelrange.Copy()
PPtPresentation = PptApp.Presentations.Open(r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT\PBC Performance Update.pptx')
pptSlide = PPtPresentation.Slides.Add(1,11)
title = pptSlide.Shapes.Title
title.TextFrame.TextRange.Text = 'My title here'

Page orientation doesn't work in OpenOffice with Python

I am trying to set the pageorientation in an OpenOffice document with python. The following code i use to try this:
import json
from win32com.client import Dispatch as Dispatch
svm = Dispatch("com.sun.star.ServiceManager")
svm._FlagAsMethod("Bridge_GetStruct")
coreflect = svm.createInstance("com.sun.star.reflection.CoreReflection")
desktop = svm.createInstance("com.sun.star.frame.Desktop")
doc = desktop.loadComponentFromURL("private:factory/swriter", "_blank",0, [])
txt = doc.getText()
cur = txt.createTextCursor()
Then i tried two different approaches:
p = doc.getPagePrintSettings()
p[8].Value = True
doc.setPagePrintSettings(p)
and
oStyleFamilies = doc.getStyleFamilies()
oObj1 = oStyleFamilies.getByName("PageStyles")
oObj2 = oObj1.getByName("Default")
oObj2.IsLandscape = True
Both give no error, but the page is still in Portrait.. Anybody has an idea?
Thanks in advance!!
try : oObj2.setPropertyValue("IsLandscape",True)
see http://codesnippets.services.openoffice.org/Calc/Calc.SwitchOrientation.snip

Categories

Resources