pepper robot speech Recognition - python

import os
import sys
import time
from naoqi import ALProxy
from naoqi import ALBroker
from naoqi import ALModule
import speech_recognition as sr
record = ALProxy("ALAudioRecorder", "192.168.8.104", 9559)
record.stopMicrophonesRecording()
print('Start recording...')
# tts.say("start recording...")
record.startMicrophonesRecording('/home/nao/recordings/cameras/maha1', 'wav', 16000, (0,0, 1, 0))
time.sleep(5)
record.stopMicrophonesRecording()
audio_player_service = ALProxy("ALSpeechRecognition", "192.168.8.104", 9559)
recognizer = sr.Recognizer()
audio_file_ = sr.AudioFile('/home/nao/recordings/cameras/maha1.wav') #problem is here
print(type(audio_file_))
with audio_file_ as source:
audio_file = recognizer.record(source, duration = 8.0)
result = recognizer.recognize_google(audio_data=audio_file, language="ar")
print(result)
this is my code I have to retrieve the audio recording from pepper cloud but it's seems it can't see this path because it's running on my laptop, so how can I retrieve the audio file from pepper cloud

You may have a look at the Pepper Controller Python library which contains the speech recognition implementation (see robot.py: listen()).

Related

trouble importing enums from google.cloud.speech_v1

I have this code:
from google.cloud import speech_v1
from google.cloud.speech_v1 import enums
import os
import importlib
# Import the enums module from the google.cloud.speech_v1 package
enums = importlib.import_module("google.cloud.speech_v1.enums")
# Set your Google Cloud project and service account credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "creds.json"
# Create a client for the Google Cloud Speech-to-Text API
stt_client = speech_v1.SpeechClient()
# Transcribe the audio data
response = stt_client.recognize(
audio=speech_v1.types.RecognitionAudio(uri="gs://focus-0/speech-to-text-sample.wav"),
config=speech_v1.types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=48000,
language_code="en-US"
)
)
# Print the transcribed text
for result in response.results:
print("Transcription: {}".format(result.alternatives[0].transcript))
When I run it, I get this:
Traceback (most recent call last):
File "/Users/dir/git/fp-scrapers/speech/1-STT.py", line 5, in <module>
from google.cloud.speech_v1 import enums
ImportError: cannot import name 'enums' from 'google.cloud.speech_v1' (/opt/homebrew/lib/python3.9/site-packages/google/cloud/speech_v1/__init__.py)
I have tried several ways to import enums, but none of them have worked.
Does anyone see what I'm doing wrong?
enums and types have been removed in the 2.x versions of the library
Mentioned in this github.Refer to this migration guide. You can refer to this quick start for setup instructions and an updated client library
Before:
from google.cloud import speech
encoding = speech.enums.RecognitionConfig.AudioEncoding.LINEAR16
audio = speech.types.RecognitionAudio(content=content)
After:
from google.cloud import speech
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
audio = speech.RecognitionAudio(content=content)

How to get data just from one channel using python and alsaaudio

I am using python 3 and with alsaaudio I read data from microphone but I have to work with each channel separately. So is there a way how to get data just from one channel? Or how to parse data from each channel separately?
import wave
import sys
import threading
import time
import os
import alsaaudio, audioop
# Input
inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE,alsaaudio.PCM_NONBLOCK)
inp.setchannels(2)
inp.setrate(8000)
inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)
inp.setperiodsize(80)
# Output file
output = wave.open("test2.wav",'wb')
output.setnchannels(2)
output.setsampwidth(2)
output.setframerate(8000)
while True:
l,frames = inp.read()
if l>0:
print(frames)
output.writeframes(frames)

speech recognition not working in my code

I am making a simple speech recognition system for a project purpose. I am following a youtube video where the following code was working in that system but when I tried It is showing me error
This is the code
import speech_recognition as sr
with sr.AudioFile('/content/male.wav') as source:
audio = r.record(source)
And this is the error
NameError Traceback (most recent call last)
<ipython-input-52-428b394f05e3> in <module>()
1 import speech_recognition as sr
2 with sr.AudioFile('/content/male.wav') as source:
----> 3 audio = r.record(source)
NameError: name 'r' is not defined
This is the full code
pip install SpeechRecognition
import speech_recognition as sr
catch = sr.Recognizer()
songss = sr.AudioFile('/content/male.wav')
print(type(songss))
import speech_recognition as sr
with sr.AudioFile('/content/male.wav') as source:
audio = r.record(source)
The r is not defined. It should be defined as:
r = sr.Recognizer()
Hope this will work.

why speech recognition is not recognizing any sound

I'm working on a Virtual Assistant project that recognizes a speech then convert it to a text and follow instructions according to the speech.
now, my issue is the speech never recognize any word I say, there are no errors popping. however, when I print the speech recognized it is always blank.
please see the below code:
import wikipedia
import webbrowser
import speech_recognition as sr
import pyttsx3
import subprocess
import os
from gtts import gTTS
import datetime
import warnings
import calendar
import random
import pyaudio
#ignore warnings
warnings.filterwarnings('ignore')
def record_audio():
#record
r = sr.Recognizer() #creating recognizer object
#open the mic and record
with sr.Microphone() as source:
print('say somthing!')
audio = r.listen(source)
#use google speech recognition
data = ''
try:
date = r.recognize_google(audio)
print('you said: '+data)
except sr.UnknownValueError:
print('google cant understand the audio !')
except sr.RequestError as e:
print('request results from google speech recognition service error '+ e)
return data
record_audio()
the output is always as per the below:
you said :
what I have tried to solve this:
r.adjust_for_ambient_noise(source, duration=1)
in terminal: pip install pipwin
none of the above worked.
indentations are fine, it is just the formatting of the thread.
There is a typo in your code, that's why you will never know what google understood.
data = r.recognize_google(audio)

gTTS direct output

I want to make a chatbot's response in audio and text.
All the example code using gTTS seem like one needs to 'save the text into a file then play the file'.
Is there another way to simplify the process such as, play the 'response from chatbot' automatically, using gTTS?
If you look even briefly at the docs, you'll see that, of the three examples, only one of them requires you to call save, and the third one is specifically called "Playing sound directly".
So, just do exactly what's in that example, but substitute your string in place of the literal 'hello':
>>> from gtts import gTTS
>>> from io import BytesIO
>>>
>>> my_variable = 'hello' # your real code gets this from the chatbot
>>>
>>> mp3_fp = BytesIO()
>>> tts = gTTS(my_variable, 'en')
>>> tts.write_to_fp(mp3_fp)
But notice that gTTS doesn't come with an MP3 player; you need a separate audio library to play that mp3_fp buffer:
>>> # Load `audio_fp` as an mp3 file in
>>> # the audio library of your choice
As the docs say, there are many such libraries, and Stack Overflow is not a good place to get recommendations for libraries. I happen to have a library installed, named musicplayer, and a sample app that can be easily adapted here, but it's probably not the simplest one by a long shot (it's made for doing more powerful, low-level stuff):
>>> import musicplayer
>>> class Song:
... def __init__(self, f):
... self.f = f
... def readPacket(self, size):
... return self.f.read(size)
... def seekRaw(self, offset, whence):
... self.f.seek(offset, whence)
... return f.tell()
>>> player = musicplayer.createPlayer()
>>> player.queue = [Song(mp3_fp)]
>>> player.playing = True
if you want to call speak function again and again without any error.
Basically, this serves the purpose.
from gtts import gTTS
import os
import playsound
def speak(text):
tts = gTTS(text=text, lang='en')
filename = "abc.mp3"
tts.save(filename)
playsound.playsound(filename)
os.remove(filename)
One of the solution that I found is by using pygame.mixer. In this case, import time is only used to ensure audio finishes before program ends.
from gtts import gTTS
from io import BytesIO
from pygame import mixer
import time
def speak():
mp3_fp = BytesIO()
tts = gTTS('hello, Welcome to Python Text-to-Speech!', lang='en')
tts.write_to_fp(mp3_fp)
return mp3_fp
mixer.init()
sound = speak()
sound.seek(0)
mixer.music.load(sound, "mp3")
mixer.music.play()
time.sleep(5)
[Linux] Speech in Python
Installation
[Terminal] Upgrade pip: pip install --upgrade pip
[Terminal] Install Google Text to Speech: pip install gTTS
[Terminal] Install pygame: pip install pygame
[Coding IDE] Add speech.py: See listing below
[Coding IDE] Call speak: See listing below
speech.py
from gtts import gTTS
from io import BytesIO
import pygame
class Speech():
#classmethod
def speak(cls, text):
mp3_file_object = BytesIO()
tts = gTTS(text, lang='en')
tts.write_to_fp(mp3_file_object)
pygame.init()
pygame.mixer.init()
pygame.mixer.music.load(mp3_file_object, 'mp3')
pygame.mixer.music.play()
Example
from .speech import Speech
Speech.speak('hello world')
Warning
It's a female voice and sounds realistic. It sounds like there's a woman in the room, fwiw.
You can also use the playsound library.
>>>import playsound
>>>playsound.playsound('sound.mp3')
For more information on playsound.Visit Playsound Docs .

Categories

Resources