I'm working on a Virtual Assistant project that recognizes a speech then convert it to a text and follow instructions according to the speech.
now, my issue is the speech never recognize any word I say, there are no errors popping. however, when I print the speech recognized it is always blank.
please see the below code:
import wikipedia
import webbrowser
import speech_recognition as sr
import pyttsx3
import subprocess
import os
from gtts import gTTS
import datetime
import warnings
import calendar
import random
import pyaudio
#ignore warnings
warnings.filterwarnings('ignore')
def record_audio():
#record
r = sr.Recognizer() #creating recognizer object
#open the mic and record
with sr.Microphone() as source:
print('say somthing!')
audio = r.listen(source)
#use google speech recognition
data = ''
try:
date = r.recognize_google(audio)
print('you said: '+data)
except sr.UnknownValueError:
print('google cant understand the audio !')
except sr.RequestError as e:
print('request results from google speech recognition service error '+ e)
return data
record_audio()
the output is always as per the below:
you said :
what I have tried to solve this:
r.adjust_for_ambient_noise(source, duration=1)
in terminal: pip install pipwin
none of the above worked.
indentations are fine, it is just the formatting of the thread.
There is a typo in your code, that's why you will never know what google understood.
data = r.recognize_google(audio)
Related
import speech_recognition as sr
import requests
from gtts import gTTS
from playsound import playsound
import os
import subprocess
bot_message = ""
message = ""
myobj = gTTS(text="Hello I am Shilpa Sheety Speak Anything I am Listening", lang='en', tld='com.au')
myobj.save("starting.mp3")
playsound("starting.mp3")
while bot_message !="Bye":
r = sr.Recognizer()
with sr.Microphone() as source:
audio = r.listen(source)
try:
message = r.recognize_google(audio)
print("You said : {}".format(message))
except:
print("Sorry Could not recognize your voice")
if len(message) == 0:
continue
print("Sending Message Now")
r = requests.post("http://localhost:5002/webhooks/rest/webhook", json={'message':message})
print("Bot Says,", end=' ')
for i in r.json():
bot_message = i['text']
print(f"{i['text']}")
myobj = gTTS(text=bot_message)
myobj.save("Welcome.mp3")
playsound("Welcome.mp3")
In above program I am playing welcome.mp3 in a loop. It is working fine for first 2 iterations but in 3rd iteration of for loop I am getting the following error:
Error 263 for command:
open Welcome.mp3
The specified device is not open or is not recognized by MCI.
Error 263 for command:
close Welcome.mp3
The specified device is not open or is not recognized by MCI. Failed to close the file: Welcome.mp3 Traceback (most recent call last): File "Voice_bot.py", line 31, in <module>
playsound("Welcome.mp3") File "C:\Users\DJ9004\anaconda4\lib\site-packages\playsound.py", line 72, in _playsoundWin
winCommand(u'open {}'.format(sound)) File "C:\Users\DJ9004\anaconda4\lib\site-packages\playsound.py", line 64, in winCommand
raise PlaysoundException(exceptionMessage) playsound.PlaysoundException:
Error 263 for command:
open Welcome.mp3
The specified device is not open or is not recognized by MCI.*
It worked for me when I uninstalled playsound module and installed sn older version like this:
pip uninstall playsound
pip install playsound==1.2.2
It works when I install the version of playsound by uninstalling the previous version like this
pip uninstall playsound
pip install playsound==1.2.2
Try it
I had the same issue, it looks like the file is still being saved when you are trying to open it.
I added a couple of lines and it worked just fine:
for i in r.json():
bot_message = i['text']
print(f"{i['text']}")
myobj = gTTS(text=bot_message)
os.remove('Welcome.mp3') #LINE ADDED
myobj.save("Welcome.mp3")
time.sleep(1) #LINE ADDED
playsound("Welcome.mp3")
I had the same error as you and not finding answers I started to do tests. The way I found is not very practical, but it works for me. In a new file I wrote this piece of code (For the example we will call the 'function_sound_file' file):
from playsound import playsound
def function_sound():
playsound('complete/path/file.wav')
And in the file where I had the problem, I call the function I created after importing it (below).
from function_sound_file import function_sound
function_sound()
I tried an illogical solution but it worked every time.
just change the name of your audio file to 'audio.mp3'
and don't forget to close it by using "os.close('audio.mp3')" .
The code below never worked:
from gtts import gTTS
import os
#greetings
def start():
tts = gTTS(text="hi, its kate here! how may i help you?", lang='en' ,
slow=False)
tts.save('lol.mp3')
from playsound import playsound
playsound('lol.mp3')
os.remove('lol.mp3')
start()
But it worked every time:
from gtts import gTTS
import os
#greetings
def start():
tts = gTTS(text="hi, its kate here! how may i help you?", lang='en' ,
slow=False)
tts.save('audio.mp3')
from playsound import playsound
playsound('audio.mp3')
os.remove('audio.mp3')
start()
As you can see I just changed 'lol.mp3' to 'audio.mp3'.
Hope it works.
import os
import sys
import time
from naoqi import ALProxy
from naoqi import ALBroker
from naoqi import ALModule
import speech_recognition as sr
record = ALProxy("ALAudioRecorder", "192.168.8.104", 9559)
record.stopMicrophonesRecording()
print('Start recording...')
# tts.say("start recording...")
record.startMicrophonesRecording('/home/nao/recordings/cameras/maha1', 'wav', 16000, (0,0, 1, 0))
time.sleep(5)
record.stopMicrophonesRecording()
audio_player_service = ALProxy("ALSpeechRecognition", "192.168.8.104", 9559)
recognizer = sr.Recognizer()
audio_file_ = sr.AudioFile('/home/nao/recordings/cameras/maha1.wav') #problem is here
print(type(audio_file_))
with audio_file_ as source:
audio_file = recognizer.record(source, duration = 8.0)
result = recognizer.recognize_google(audio_data=audio_file, language="ar")
print(result)
this is my code I have to retrieve the audio recording from pepper cloud but it's seems it can't see this path because it's running on my laptop, so how can I retrieve the audio file from pepper cloud
You may have a look at the Pepper Controller Python library which contains the speech recognition implementation (see robot.py: listen()).
I am making a simple speech recognition system for a project purpose. I am following a youtube video where the following code was working in that system but when I tried It is showing me error
This is the code
import speech_recognition as sr
with sr.AudioFile('/content/male.wav') as source:
audio = r.record(source)
And this is the error
NameError Traceback (most recent call last)
<ipython-input-52-428b394f05e3> in <module>()
1 import speech_recognition as sr
2 with sr.AudioFile('/content/male.wav') as source:
----> 3 audio = r.record(source)
NameError: name 'r' is not defined
This is the full code
pip install SpeechRecognition
import speech_recognition as sr
catch = sr.Recognizer()
songss = sr.AudioFile('/content/male.wav')
print(type(songss))
import speech_recognition as sr
with sr.AudioFile('/content/male.wav') as source:
audio = r.record(source)
The r is not defined. It should be defined as:
r = sr.Recognizer()
Hope this will work.
So this speech to text code in python is having problem in import gTTs when it is imported gtts only then its fine but the problem persists with gTTS. The code is as belows.
from gtts import gTTs
import os
text_to_read = "Read any text written "
language = 'en'
slow_audio_speed = False
filename = 'myfile.mp3'
def reading_from_string():
audio_created = gtts.gTTs(text=text_to_read,lang = language,slow = slow_audio_speed)
audio_created.save("myfile.mp3")
os.system("mpg321 myfile.mp3")
if __name__ == "__main__":
reading_from_string()
the error is as belows
*ImportError: cannot import name 'gTTs' from 'gtts' *
i tried to uninstall and install pip gtts and pip gTTs
again and again but the problem seems to be with gTTs.
Also if possible can you suggest a solution to add some natural voice in this code to make it sound more natural
this should work.
from gtts import gTTS
import os
text_to_read = "Read any text written "
language = 'en'
slow_audio_speed = False
filename = 'myfile.mp3'
def reading_from_string():
audio_created = gTTS(text=text_to_read,lang = language,slow = slow_audio_speed)
audio_created.save("myfile.mp3")
os.system("start myfile.mp3")
if __name__ == "__main__":
reading_from_string()
first it should be gTTS and not gTTs (note small and caps 'S') and in function reading_from_string() it should be gTTS(...) and not gtts.gTTs
I want to make a chatbot's response in audio and text.
All the example code using gTTS seem like one needs to 'save the text into a file then play the file'.
Is there another way to simplify the process such as, play the 'response from chatbot' automatically, using gTTS?
If you look even briefly at the docs, you'll see that, of the three examples, only one of them requires you to call save, and the third one is specifically called "Playing sound directly".
So, just do exactly what's in that example, but substitute your string in place of the literal 'hello':
>>> from gtts import gTTS
>>> from io import BytesIO
>>>
>>> my_variable = 'hello' # your real code gets this from the chatbot
>>>
>>> mp3_fp = BytesIO()
>>> tts = gTTS(my_variable, 'en')
>>> tts.write_to_fp(mp3_fp)
But notice that gTTS doesn't come with an MP3 player; you need a separate audio library to play that mp3_fp buffer:
>>> # Load `audio_fp` as an mp3 file in
>>> # the audio library of your choice
As the docs say, there are many such libraries, and Stack Overflow is not a good place to get recommendations for libraries. I happen to have a library installed, named musicplayer, and a sample app that can be easily adapted here, but it's probably not the simplest one by a long shot (it's made for doing more powerful, low-level stuff):
>>> import musicplayer
>>> class Song:
... def __init__(self, f):
... self.f = f
... def readPacket(self, size):
... return self.f.read(size)
... def seekRaw(self, offset, whence):
... self.f.seek(offset, whence)
... return f.tell()
>>> player = musicplayer.createPlayer()
>>> player.queue = [Song(mp3_fp)]
>>> player.playing = True
if you want to call speak function again and again without any error.
Basically, this serves the purpose.
from gtts import gTTS
import os
import playsound
def speak(text):
tts = gTTS(text=text, lang='en')
filename = "abc.mp3"
tts.save(filename)
playsound.playsound(filename)
os.remove(filename)
One of the solution that I found is by using pygame.mixer. In this case, import time is only used to ensure audio finishes before program ends.
from gtts import gTTS
from io import BytesIO
from pygame import mixer
import time
def speak():
mp3_fp = BytesIO()
tts = gTTS('hello, Welcome to Python Text-to-Speech!', lang='en')
tts.write_to_fp(mp3_fp)
return mp3_fp
mixer.init()
sound = speak()
sound.seek(0)
mixer.music.load(sound, "mp3")
mixer.music.play()
time.sleep(5)
[Linux] Speech in Python
Installation
[Terminal] Upgrade pip: pip install --upgrade pip
[Terminal] Install Google Text to Speech: pip install gTTS
[Terminal] Install pygame: pip install pygame
[Coding IDE] Add speech.py: See listing below
[Coding IDE] Call speak: See listing below
speech.py
from gtts import gTTS
from io import BytesIO
import pygame
class Speech():
#classmethod
def speak(cls, text):
mp3_file_object = BytesIO()
tts = gTTS(text, lang='en')
tts.write_to_fp(mp3_file_object)
pygame.init()
pygame.mixer.init()
pygame.mixer.music.load(mp3_file_object, 'mp3')
pygame.mixer.music.play()
Example
from .speech import Speech
Speech.speak('hello world')
Warning
It's a female voice and sounds realistic. It sounds like there's a woman in the room, fwiw.
You can also use the playsound library.
>>>import playsound
>>>playsound.playsound('sound.mp3')
For more information on playsound.Visit Playsound Docs .