i am connecting the Raspberry Pi with Google Ai Assistant and it gives me the following error.
The command, which i run:
googlesamples-assistant-pushtotalk
File "/home/pi/env/bin/googlesamples-assistant-pushtotalk", line 6, in <module>
from googlesamples.assistant.grpc.pushtotalk import main
File "/home/pi/env/lib/python3.9/site-packages/googlesamples/assistant/grpc/pushtotalk.py", line 30, in <module>
from tenacity import retry, stop_after_attempt, retry_if_exception
File "/home/pi/env/lib/python3.9/site-packages/tenacity/__init__.py", line 292
from tenacity.async import AsyncRetrying
^
SyntaxError: invalid syntax```
The file:``` # Copyright (C) 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Sample that implements gRPC client for Google Assistant API."""
#pip install grpc
import grpc
import json
import logging
import os.path
import click
#import grpc
import google.auth.transport.grpc
import google.auth.transport.requests
import google.oauth2.credentials
from google.assistant.embedded.v1alpha1 import embedded_assistant_pb2
from google.rpc import code_pb2
from tenacity import retry, stop_after_attempt, retry_if_exception
try:
from . import (
assistant_helpers,
audio_helpers
)
except SystemError:
import assistant_helpers
import audio_helpers
ASSISTANT_API_ENDPOINT = 'embeddedassistant.googleapis.com'
END_OF_UTTERANCE = embedded_assistant_pb2.ConverseResponse.END_OF_UTTERANCE
DIALOG_FOLLOW_ON = embedded_assistant_pb2.ConverseResult.DIALOG_FOLLOW_ON
CLOSE_MICROPHONE = embedded_assistant_pb2.ConverseResult.CLOSE_MICROPHONE
DEFAULT_GRPC_DEADLINE = 60 * 3 + 5
class SampleAssistant(object):
"""Sample Assistant that supports follow-on conversations.
Args:
conversation_stream(ConversationStream): audio stream
for recording query and playing back assistant answer.
channel: authorized gRPC channel for connection to the
Google Assistant API.
deadline_sec: gRPC deadline in seconds for Google Assistant API call.
"""
def __init__(self, conversation_stream, channel, deadline_sec):
self.conversation_stream = conversation_stream
# Opaque blob provided in ConverseResponse that,
# when provided in a follow-up ConverseRequest,
# gives the Assistant a context marker within the current state
# of the multi-Converse()-RPC "conversation".
# This value, along with MicrophoneMode, supports a more natural
# "conversation" with the Assistant.
self.conversation_state = None
# Create Google Assistant API gRPC client.
self.assistant = embedded_assistant_pb2.EmbeddedAssistantStub(channel)
self.deadline = deadline_sec
def __enter__(self):
return self
def __exit__(self, etype, e, traceback):
if e:
return False
self.conversation_stream.close()
def is_grpc_error_unavailable(e):
is_grpc_error = isinstance(e, grpc.RpcError)
if is_grpc_error and (e.code() == grpc.StatusCode.UNAVAILABLE):
logging.error('grpc unavailable error: %s', e)
return True
return False
#retry(reraise=True, stop=stop_after_attempt(3),
retry=retry_if_exception(is_grpc_error_unavailable))
def converse(self):
"""Send a voice request to the Assistant and playback the response.
Returns: True if conversation should continue.
"""
continue_conversation = False
self.conversation_stream.start_recording()
logging.info('Recording audio request.')
def iter_converse_requests():
for c in self.gen_converse_requests():
assistant_helpers.log_converse_request_without_audio(c)
yield c
self.conversation_stream.start_playback()
# This generator yields ConverseResponse proto messages
# received from the gRPC Google Assistant API.
for resp in self.assistant.Converse(iter_converse_requests(),
self.deadline):
assistant_helpers.log_converse_response_without_audio(resp)
if resp.error.code != code_pb2.OK:
logging.error('server error: %s', resp.error.message)
break
if resp.event_type == END_OF_UTTERANCE:
logging.info('End of audio request detected')
self.conversation_stream.stop_recording()
if resp.result.spoken_request_text:
logging.info('Transcript of user request: "%s".',
resp.result.spoken_request_text)
logging.info('Playing assistant response.')
if len(resp.audio_out.audio_data) > 0:
self.conversation_stream.write(resp.audio_out.audio_data)
if resp.result.spoken_response_text:
logging.info(
'Transcript of TTS response '
'(only populated from IFTTT): "%s".',
resp.result.spoken_response_text)
if resp.result.conversation_state:
self.conversation_state = resp.result.conversation_state
if resp.result.volume_percentage != 0:
self.conversation_stream.volume_percentage = (
resp.result.volume_percentage
)
if resp.result.microphone_mode == DIALOG_FOLLOW_ON:
continue_conversation = True
logging.info('Expecting follow-on query from user.')
elif resp.result.microphone_mode == CLOSE_MICROPHONE:
continue_conversation = False
logging.info('Finished playing assistant response.')
self.conversation_stream.stop_playback()
return continue_conversation
def gen_converse_requests(self):
"""Yields: ConverseRequest messages to send to the API."""
converse_state = None
if self.conversation_state:
logging.debug('Sending converse_state: %s',
self.conversation_state)
converse_state = embedded_assistant_pb2.ConverseState(
conversation_state=self.conversation_state,
)
config = embedded_assistant_pb2.ConverseConfig(
audio_in_config=embedded_assistant_pb2.AudioInConfig(
encoding='LINEAR16',
sample_rate_hertz=self.conversation_stream.sample_rate,
),
audio_out_config=embedded_assistant_pb2.AudioOutConfig(
encoding='LINEAR16',
sample_rate_hertz=self.conversation_stream.sample_rate,
volume_percentage=self.conversation_stream.volume_percentage,
),
converse_state=converse_state
)
# The first ConverseRequest must contain the ConverseConfig
# and no audio data.
yield embedded_assistant_pb2.ConverseRequest(config=config)
for data in self.conversation_stream:
# Subsequent requests need audio data, but not config.
yield embedded_assistant_pb2.ConverseRequest(audio_in=data)
#click.command()
#click.option('--api-endpoint', default=ASSISTANT_API_ENDPOINT,
metavar='<api endpoint>', show_default=True,
help='Address of Google Assistant API service.')
#click.option('--credentials',
metavar='<credentials>', show_default=True,
default=os.path.join(click.get_app_dir('google-oauthlib-tool'),
'credentials.json'),
help='Path to read OAuth2 credentials.')
#click.option('--verbose', '-v', is_flag=True, default=False,
help='Verbose logging.')
#click.option('--input-audio-file', '-i',
metavar='<input file>',
help='Path to input audio file. '
'If missing, uses audio capture')
#click.option('--output-audio-file', '-o',
metavar='<output file>',
help='Path to output audio file. '
'If missing, uses audio playback')
#click.option('--audio-sample-rate',
default=audio_helpers.DEFAULT_AUDIO_SAMPLE_RATE,
metavar='<audio sample rate>', show_default=True,
help='Audio sample rate in hertz.')
#click.option('--audio-sample-width',
default=audio_helpers.DEFAULT_AUDIO_SAMPLE_WIDTH,
metavar='<audio sample width>', show_default=True,
help='Audio sample width in bytes.')
#click.option('--audio-iter-size',
default=audio_helpers.DEFAULT_AUDIO_ITER_SIZE,
metavar='<audio iter size>', show_default=True,
help='Size of each read during audio stream iteration in bytes.')
#click.option('--audio-block-size',
default=audio_helpers.DEFAULT_AUDIO_DEVICE_BLOCK_SIZE,
metavar='<audio block size>', show_default=True,
help=('Block size in bytes for each audio device '
'read and write operation..'))
#click.option('--audio-flush-size',
default=audio_helpers.DEFAULT_AUDIO_DEVICE_FLUSH_SIZE,
metavar='<audio flush size>', show_default=True,
help=('Size of silence data in bytes written '
'during flush operation'))
#click.option('--grpc-deadline', default=DEFAULT_GRPC_DEADLINE,
metavar='<grpc deadline>', show_default=True,
help='gRPC deadline in seconds')
#click.option('--once', default=False, is_flag=True,
help='Force termination after a single conversation.')
def main(api_endpoint, credentials, verbose,
input_audio_file, output_audio_file,
audio_sample_rate, audio_sample_width,
audio_iter_size, audio_block_size, audio_flush_size,
grpc_deadline, once, *args, **kwargs):
"""Samples for the Google Assistant API.
Examples:
Run the sample with microphone input and speaker output:
$ python -m googlesamples.assistant
Run the sample with file input and speaker output:
$ python -m googlesamples.assistant -i <input file>
Run the sample with file input and output:
$ python -m googlesamples.assistant -i <input file> -o <output file>
"""
# Setup logging.
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
# Load OAuth 2.0 credentials.
try:
with open(credentials, 'r') as f:
credentials = google.oauth2.credentials.Credentials(token=None,
**json.load(f))
http_request = google.auth.transport.requests.Request()
credentials.refresh(http_request)
except Exception as e:
logging.error('Error loading credentials: %s', e)
logging.error('Run google-oauthlib-tool to initialize '
'new OAuth 2.0 credentials.')
return
# Create an authorized gRPC channel.
grpc_channel = google.auth.transport.grpc.secure_authorized_channel(
credentials, http_request, api_endpoint)
logging.info('Connecting to %s', api_endpoint)
# Configure audio source and sink.
audio_device = None
if input_audio_file:
audio_source = audio_helpers.WaveSource(
open(input_audio_file, 'rb'),
sample_rate=audio_sample_rate,
sample_width=audio_sample_width
)
else:
audio_source = audio_device = (
audio_device or audio_helpers.SoundDeviceStream(
sample_rate=audio_sample_rate,
sample_width=audio_sample_width,
block_size=audio_block_size,
flush_size=audio_flush_size
)
)
if output_audio_file:
audio_sink = audio_helpers.WaveSink(
open(output_audio_file, 'wb'),
sample_rate=audio_sample_rate,
sample_width=audio_sample_width
)
else:
audio_sink = audio_device = (
audio_device or audio_helpers.SoundDeviceStream(
sample_rate=audio_sample_rate,
sample_width=audio_sample_width,
block_size=audio_block_size,
flush_size=audio_flush_size
)
)
# Create conversation stream with the given audio source and sink.
conversation_stream = audio_helpers.ConversationStream(
source=audio_source,
sink=audio_sink,
iter_size=audio_iter_size,
sample_width=audio_sample_width,
)
with SampleAssistant(conversation_stream,
grpc_channel, grpc_deadline) as assistant:
# If file arguments are supplied:
# exit after the first turn of the conversation.
if input_audio_file or output_audio_file:
assistant.converse()
return
# If no file arguments supplied:
# keep recording voice requests using the microphone
# and playing back assistant response using the speaker.
# When the once flag is set, don't wait for a trigger. Otherwise, wait.
wait_for_user_trigger = not once
while True:
if wait_for_user_trigger:
click.pause(info='Press Enter to send a new request...')
continue_conversation = assistant.converse()
# wait for user trigger if there is no follow-up turn in
# the conversation.
wait_for_user_trigger = not continue_conversation
# If we only want one conversation, break.
if once and (not continue_conversation):
break
if __name__ == '__main__':
thanks for your kind support.
Related
I'm trying to transcribe a conversation audio file into text with Azure's SpeechToText. I got it making use of the SKD and did another try with the API (following this instructions https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/batch/python/python-client/main.py) but I also want to split the result text by the different voices. Is it possible?
I know it is available on beta the conversation service, but as my audios are in spanish, I can't use it. Is there a configuration to split result by speakers?
This is the call with SDK:
all_results = []
def speech_recognize_continuous_from_file(file_to_transcript):
"""performs continuous speech recognition with input from an audio file"""
# <SpeechContinuousRecognitionWithFile>
speech_config = speechsdk.SpeechConfig(subscription=speech_key,
region=service_region,
speech_recognition_language='es-ES')
audio_config = speechsdk.audio.AudioConfig(filename=file_to_transcribe)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
done = False
def stop_cb(evt):
"""callback that stops continuous recognition upon receiving an event `evt`"""
print('CLOSING on {}'.format(evt))
speech_recognizer.stop_continuous_recognition()
nonlocal done
done = True
# Connect callbacks to the events fired by the speech recognizer
speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt)))
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
# stop continuous recognition on either session stopped or canceled events
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
def handle_final_result(evt):
all_results.append(evt.result.text)
speech_recognizer.recognized.connect(handle_final_result)
# Start continuous speech recognition
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
# </SpeechContinuousRecognitionWithFile>
And this with the API:
from __future__ import print_function
from typing import List
import logging
import sys
import requests
import time
import swagger_client as cris_client
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format="%(message)s")
SUBSCRIPTION_KEY = subscription_key
HOST_NAME = "westeurope.cris.ai"
PORT = 443
NAME = "Simple transcription"
DESCRIPTION = "Simple transcription description"
LOCALE = "es-ES"
RECORDINGS_BLOB_URI = bobl_url
# ADAPTED_ACOUSTIC_ID = None # guid of a custom acoustic model
# ADAPTED_LANGUAGE_ID = None # guid of a custom language model
def transcribe():
logging.info("Starting transcription client...")
# configure API key authorization: subscription_key
configuration = cris_client.Configuration()
configuration.api_key['Ocp-Apim-Subscription-Key'] = SUBSCRIPTION_KEY
# create the client object and authenticate
client = cris_client.ApiClient(configuration)
# create an instance of the transcription api class
transcription_api = cris_client.CustomSpeechTranscriptionsApi(api_client=client)
# get all transcriptions for the subscription
transcriptions: List[cris_client.Transcription] = transcription_api.get_transcriptions()
logging.info("Deleting all existing completed transcriptions.")
# delete all pre-existing completed transcriptions
# if transcriptions are still running or not started, they will not be deleted
for transcription in transcriptions:
transcription_api.delete_transcription(transcription.id)
logging.info("Creating transcriptions.")
# transcription definition using custom models
# transcription_definition = cris_client.TranscriptionDefinition(
# name=NAME, description=DESCRIPTION, locale=LOCALE, recordings_url=RECORDINGS_BLOB_URI,
# models=[cris_client.ModelIdentity(ADAPTED_ACOUSTIC_ID), cris_client.ModelIdentity(ADAPTED_LANGUAGE_ID)]
# )
# comment out the previous statement and uncomment the following to use base models for transcription
transcription_definition = cris_client.TranscriptionDefinition(
name=NAME, description=DESCRIPTION, locale=LOCALE, recordings_url=RECORDINGS_BLOB_URI
)
data, status, headers = transcription_api.create_transcription_with_http_info(transcription_definition)
# extract transcription location from the headers
transcription_location: str = headers["location"]
# get the transcription Id from the location URI
created_transcriptions = list()
created_transcriptions.append(transcription_location.split('/')[-1])
logging.info("Checking status.")
completed, running, not_started = 0, 0, 0
while completed < 1:
# get all transcriptions for the user
transcriptions: List[cris_client.Transcription] = transcription_api.get_transcriptions()
# for each transcription in the list we check the status
for transcription in transcriptions:
if transcription.status == "Failed" or transcription.status == "Succeeded":
# we check to see if it was one of the transcriptions we created from this client
if transcription.id not in created_transcriptions:
continue
completed += 1
if transcription.status == "Succeeded":
results_uri = transcription.results_urls["channel_0"]
results = requests.get(results_uri)
logging.info("Transcription succeeded. Results: ")
logging.info(results.content.decode("utf-8"))
elif transcription.status == "Running":
running += 1
elif transcription.status == "NotStarted":
not_started += 1
logging.info(f"Transcriptions status: {completed} completed, {running} running, {not_started} not started yet")
# wait for 5 seconds
time.sleep(5)
input("Press any key...")
def main():
transcribe()
if __name__ == "__main__":
main()
I also want to split the result text by the different voices.
The transcript received does not contains any notion of speaker. Here you are just calling an endpoint doing transcription, there is no speaker recognition feature inside.
Two things:
If your audio has separate channels for each speaker, then you will have your result (see transcript results_urls channels)
If not, you may use Speaker Recognition API (doc here) to do this identification but:
it needs some training first
you don't have the offsets in the reply, so it will be complicated to map with your transcript result
As you mentioned, the Speech SDK's ConversationTranscriber API (doc here) is currently limited to en-US and zh-CN languages
Contrary to the previous answer, I did get a result where speakers are recognized without any further training or other difficulties. I followed this Github issue:
https://github.com/Azure-Samples/cognitive-services-speech-sdk/issues/286
Which lead me to the following change:
transcription_definition = cris_client.TranscriptionDefinition(
name=NAME, description=DESCRIPTION, locale=LOCALE, recordings_url=RECORDINGS_BLOB_URI,
properties={"AddDiarization": "True"}
)
Which gives the desired result.
I have the youtube script that YouTube supplies installed and working, but I can only figure out how to use it from the command line.
For example to upload a file, I use the command prompt and enter
python upload_video.py --file="temp/british.mp4" --title="Summer vacation in California" --description="Had fun surfing in Santa Cruz" --keywords="surfing,Santa Cruz" --category="22" --privacyStatus="private" which will upload the file using the parameters.
My question is, how would I go about having script start.py, and then run something like the above command prompt command from within start.py, calling upon the parameters and passing them though to video_upload.py? I am trying to call the command three times in a row in the start.py script for three different videos. I am using Python 2.7.
I have figured out how to do it via os.system with
import os
os.system("python video_upload.py --file=\"temp/british.mp4\" --title=\"Summer vacation in California\" --description=\"Had fun surfing in Santa Cruz\" --keywords=\"surfing,Santa Cruz\" --category=\"22\" --privacyStatus=\"private\"")
But this done externally and not done straight in Python as I would like to capture the 'id' that is printed out at the end.
#!/usr/bin/python
import httplib
import httplib2
import os
import random
import sys
import time
from apiclient.discovery import build
from apiclient.errors import HttpError
from apiclient.http import MediaFileUpload
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
from oauth2client.tools import argparser, run_flow
# Explicitly tell the underlying HTTP transport library not to retry, since
# we are handling retry logic ourselves.
httplib2.RETRIES = 1
# Maximum number of times to retry before giving up.
MAX_RETRIES = 10
# Always retry when these exceptions are raised.
RETRIABLE_EXCEPTIONS = (httplib2.HttpLib2Error, IOError, httplib.NotConnected,
httplib.IncompleteRead, httplib.ImproperConnectionState,
httplib.CannotSendRequest, httplib.CannotSendHeader,
httplib.ResponseNotReady, httplib.BadStatusLine)
# Always retry when an apiclient.errors.HttpError with one of these status
# codes is raised.
RETRIABLE_STATUS_CODES = [500, 502, 503, 504]
# The CLIENT_SECRETS_FILE variable specifies the name of a file that contains
# the OAuth 2.0 information for this application, including its client_id and
# client_secret. You can acquire an OAuth 2.0 client ID and client secret from
# the Google Developers Console at
# https://console.developers.google.com/.
# Please ensure that you have enabled the YouTube Data API for your project.
# For more information about using OAuth2 to access the YouTube Data API, see:
# https://developers.google.com/youtube/v3/guides/authentication
# For more information about the client_secrets.json file format, see:
# https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
CLIENT_SECRETS_FILE = "client_secrets.json"
# This OAuth 2.0 access scope allows an application to upload files to the
# authenticated user's YouTube channel, but doesn't allow other types of access.
YOUTUBE_UPLOAD_SCOPE = "https://www.googleapis.com/auth/youtube.upload"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
# This variable defines a message to display if the CLIENT_SECRETS_FILE is
# missing.
MISSING_CLIENT_SECRETS_MESSAGE = """
WARNING: Please configure OAuth 2.0
To make this sample run you will need to populate the client_secrets.json file
found at:
%s
with information from the Developers Console
https://console.developers.google.com/
For more information about the client_secrets.json file format, please visit:
https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
""" % os.path.abspath(os.path.join(os.path.dirname(__file__),
CLIENT_SECRETS_FILE))
VALID_PRIVACY_STATUSES = ("public", "private", "unlisted")
def get_authenticated_service(args):
flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE,
scope=YOUTUBE_UPLOAD_SCOPE,
message=MISSING_CLIENT_SECRETS_MESSAGE)
storage = Storage("%s-oauth2.json" % sys.argv[0])
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = run_flow(flow, storage, args)
return build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
http=credentials.authorize(httplib2.Http()))
def initialize_upload(youtube, options):
tags = None
if options.keywords:
tags = options.keywords.split(",")
body=dict(
snippet=dict(
title=options.title,
description=options.description,
tags=tags,
categoryId=options.category
),
status=dict(
privacyStatus=options.privacyStatus
)
)
# Call the API's videos.insert method to create and upload the video.
insert_request = youtube.videos().insert(
part=",".join(body.keys()),
body=body,
# The chunksize parameter specifies the size of each chunk of data, in
# bytes, that will be uploaded at a time. Set a higher value for
# reliable connections as fewer chunks lead to faster uploads. Set a lower
# value for better recovery on less reliable connections.
#
# Setting "chunksize" equal to -1 in the code below means that the entire
# file will be uploaded in a single HTTP request. (If the upload fails,
# it will still be retried where it left off.) This is usually a best
# practice, but if you're using Python older than 2.6 or if you're
# running on App Engine, you should set the chunksize to something like
# 1024 * 1024 (1 megabyte).
media_body=MediaFileUpload(options.file, chunksize=-1, resumable=True)
)
resumable_upload(insert_request)
# This method implements an exponential backoff strategy to resume a
# failed upload.
def resumable_upload(insert_request):
response = None
error = None
retry = 0
while response is None:
try:
print "Uploading file..."
status, response = insert_request.next_chunk()
if 'id' in response:
print "Video id '%s' was successfully uploaded." % response['id']
else:
exit("The upload failed with an unexpected response: %s" % response)
except HttpError, e:
if e.resp.status in RETRIABLE_STATUS_CODES:
error = "A retriable HTTP error %d occurred:\n%s" % (e.resp.status,
e.content)
else:
raise
except RETRIABLE_EXCEPTIONS, e:
error = "A retriable error occurred: %s" % e
if error is not None:
print error
retry += 1
if retry > MAX_RETRIES:
exit("No longer attempting to retry.")
max_sleep = 2 ** retry
sleep_seconds = random.random() * max_sleep
print "Sleeping %f seconds and then retrying..." % sleep_seconds
time.sleep(sleep_seconds)
if __name__ == '__main__':
argparser.add_argument("--file", required=True, help="Video file to upload")
argparser.add_argument("--title", help="Video title", default="Test Title")
argparser.add_argument("--description", help="Video description",
default="Test Description")
argparser.add_argument("--category", default="22",
help="Numeric video category. " +
"See https://developers.google.com/youtube/v3/docs/videoCategories/list")
argparser.add_argument("--keywords", help="Video keywords, comma separated",
default="")
argparser.add_argument("--privacyStatus", choices=VALID_PRIVACY_STATUSES,
default=VALID_PRIVACY_STATUSES[0], help="Video privacy status.")
args = argparser.parse_args()
if not os.path.exists(args.file):
exit("Please specify a valid file using the --file= parameter.")
youtube = get_authenticated_service(args)
try:
initialize_upload(youtube, args)
except HttpError, e:
print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)
Have you tried using
your_string = raw_input('Enter your string here: ')
Then you would be able to type your string in the console running your start.py
I have code which stream audio data from laptop microphone to the google Speech recog., but i want to stream audio from other source. From that source i can get buffer of raw data, and this buffer is what i want to stream to the google.Can somebody help me or give some usefull advice?
I try to search and solve this by myself but i couldn find out.
Here is code:
from __future__ import division
import contextlib
import functools
import re
import signal
import sys
import google.auth
import google.auth.transport.grpc
import google.auth.transport.requests
from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2
from google.rpc import code_pb2
import grpc
import pyaudio
from six.moves import queue
RATE = 16000
CHUNK = int(RATE / 10) # 100ms
DEADLINE_SECS = 60 * 3 + 5
SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform'
def make_channel(host, port):
"""Creates a secure channel with auth credentials from the environment."""
# Grab application default credentials from the environment
credentials, _ = google.auth.default(scopes=[SPEECH_SCOPE])
# Create a secure channel using the credentials.
http_request = google.auth.transport.requests.Request()
target = '{}:{}'.format(host, port)
return google.auth.transport.grpc.secure_authorized_channel(
credentials, http_request, target)
def _audio_data_generator(buff):
stop = False
while not stop:
# Use a blocking get() to ensure there's at least one chunk of data.
data = [buff.get()]
# Now consume whatever other data's still buffered.
while True:
try:
data.append(buff.get(block=False))
except queue.Empty:
break
# `None` in the buffer signals that the audio stream is closed. Yield
# the final bit of the buffer and exit the loop.
if None in data:
stop = True
data.remove(None)
yield b''.join(data)
def _fill_buffer(buff, in_data, frame_count, time_info, status_flags):
"""Continuously collect data from the audio stream, into the buffer."""
buff.put(in_data)
return None, pyaudio.paContinue
# [START audio_stream]
#contextlib.contextmanager
def record_audio(rate, chunk):
"""Opens a recording stream in a context manager."""
# Create a thread-safe buffer of audio data
buff = queue.Queue()
audio_interface = pyaudio.PyAudio()
audio_stream = audio_interface.open(
format=pyaudio.paInt16,
# The API currently only supports 1-channel (mono) audio
channels=1, rate=rate,
input=True, frames_per_buffer=chunk,
# Run the audio stream asynchronously to fill the buffer object.
# This is necessary so that the input device's buffer doesn't
# overflow
# while the calling thread makes network requests, etc.
stream_callback=functools.partial(_fill_buffer, buff),
)
yield _audio_data_generator(buff)
audio_stream.stop_stream()
audio_stream.close()
# Signal the _audio_data_generator to finish
buff.put(None)
audio_interface.terminate()
# [END audio_stream]
def request_stream(data_stream, rate, interim_results=True):
"""Yields `StreamingRecognizeRequest`s constructed from a recording audio
stream.
Args:
data_stream: A generator that yields raw audio data to send.
rate: The sampling rate in hertz.
interim_results: Whether to return intermediate results, before the
transcription is finalized.
"""
# The initial request must contain metadata about the stream, so the
# server knows how to interpret it.
recognition_config = cloud_speech_pb2.RecognitionConfig(
# There are a bunch of config options you can specify.
encoding='LINEAR16', # raw 16-bit signed LE samples
sample_rate=rate, # the rate in hertz
language_code='sk-SK', #sk-SK a BCP-47 language tag
)
streaming_config = cloud_speech_pb2.StreamingRecognitionConfig(
interim_results=interim_results,
config=recognition_config,
)
yield cloud_speech_pb2.StreamingRecognizeRequest(
streaming_config=streaming_config)
for data in data_stream:
# Subsequent requests can all just have the content
yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=data)
def listen_print_loop(recognize_stream):
"""Iterates through server responses and prints them.
The recognize_stream passed is a generator that will block until a response
is provided by the server. When the transcription response comes, print it.
In this case, responses are provided for interim results as well. If the
response is an interim one, print a line feed at the end of it, to allow
the next result to overwrite it, until the response is a final one. For the
final one, print a newline to preserve the finalized transcription.
"""
num_chars_printed = 0
for resp in recognize_stream:
if resp.error.code != code_pb2.OK:
raise RuntimeError('Server error: ' + resp.error.message)
if not resp.results:
continue
# Display the top transcription
result = resp.results[0]
transcript = result.alternatives[0].transcript
# Display interim results, but with a carriage return at the end of the
# line, so subsequent lines will overwrite them.
#
# If the previous result was longer than this one, we need to print
# some extra spaces to overwrite the previous result
overwrite_chars = ' ' * max(0, num_chars_printed - len(transcript))
if not result.is_final:
sys.stdout.write(transcript + overwrite_chars + '\r')
sys.stdout.flush()
num_chars_printed = len(transcript)
else:
print(transcript + overwrite_chars)
# Exit recognition if any of the transcribed phrases could be
# one of our keywords.
if re.search(r'\b(exit|quit)\b', transcript, re.I):
print('Exiting..')
break
num_chars_printed = 0
def main():
service = cloud_speech_pb2.SpeechStub(
make_channel('speech.googleapis.com', 443))
# For streaming audio from the microphone, there are three threads.
# First, a thread that collects audio data as it comes in
with record_audio(RATE, CHUNK) as buffered_audio_data:
# Second, a thread that sends requests with that data
requests = request_stream(buffered_audio_data, RATE)
# Third, a thread that listens for transcription responses
recognize_stream = service.StreamingRecognize(
requests, DEADLINE_SECS)
# Exit things cleanly on interrupt
signal.signal(signal.SIGINT, lambda *_: recognize_stream.cancel())
# Now, put the transcription responses to use.
try:
listen_print_loop(recognize_stream)
recognize_stream.cancel()
except grpc.RpcError as e:
code = e.code()
# CANCELLED is caused by the interrupt handler, which is expected.
if code is not code.CANCELLED:
raise
if __name__ == '__main__':
main()
I implemented this for java, maybe it will help.
Source: https://github.com/achernetsov/java-docs-samples/blob/stream-from-file-example/speech/cloud-client/src/main/java/com/example/speech/RecognizeStreamFromFile.java
I want merge two code.
A.py is webpy code.
B.py is Google cloud speech(STT) example code.
but when I merge two code, it occurs webpy error
type 'exceptions.keyerror'
I insert A.py code to B.py in main() first line.
how to merge this code?
This is A.py
import web
urls = ("/.*", "hello")
app = web.application(urls, globals())
class hello:
def GET(self):
return 'Hello, world!'
if __name__ == "__main__":
app.run()
This is B.py(Google colud speech(STT) example code)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Sample that streams audio to the Google Cloud Speech API via GRPC."""
from __future__ import division
import contextlib
import functools
import re
import signal
import sys
import web
import google.auth
import google.auth.transport.grpc
import google.auth.transport.requests
from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2
from google.rpc import code_pb2
import grpc
import pyaudio
from six.moves import queue
# Audio recording parameters
RATE = 48000
CHUNK = int(RATE / 10) # 100ms
# The Speech API has a streaming limit of 60 seconds of audio*, so keep the
# connection alive for that long, plus some more to give the API time to figure
# out the transcription.
# * https://g.co/cloud/speech/limits#content
DEADLINE_SECS = 60 * 3 + 5
SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform'
def make_channel(host, port):
"""Creates a secure channel with auth credentials from the environment."""
# Grab application default credentials from the environment
credentials, _ = google.auth.default(scopes=[SPEECH_SCOPE])
# Create a secure channel using the credentials.
http_request = google.auth.transport.requests.Request()
target = '{}:{}'.format(host, port)
return google.auth.transport.grpc.secure_authorized_channel(
credentials, http_request, target)
def _audio_data_generator(buff):
"""A generator that yields all available data in the given buffer.
Args:
buff - a Queue object, where each element is a chunk of data.
Yields:
A chunk of data that is the aggregate of all chunks of data in `buff`.
The function will block until at least one data chunk is available.
"""
stop = False
while not stop:
# Use a blocking get() to ensure there's at least one chunk of data.
data = [buff.get()]
# Now consume whatever other data's still buffered.
while True:
try:
data.append(buff.get(block=False))`enter code here`
except queue.Empty:
break
# `None` in the buffer signals that the audio stream is closed. Yield
# the final bit of the buffer and exit the loop.
if None in data:
stop = True
data.remove(None)
yield b''.join(data)
def _fill_buffer(buff, in_data, frame_count, time_info, status_flags):
"""Continuously collect data from the audio stream, into the buffer."""
buff.put(in_data)
return None, pyaudio.paContinue
# [START audio_stream]
#contextlib.contextmanager
def record_audio(rate, chunk):
"""Opens a recording stream in a context manager."""
# Create a thread-safe buffer of audio data
buff = queue.Queue()
audio_interface = pyaudio.PyAudio()
audio_stream = audio_interface.open(
format=pyaudio.paInt16,
# The API currently only supports 1-channel (mono) audio
channels=1, rate=rate,
input=True, frames_per_buffer=chunk,
# Run the audio stream asynchronously to fill the buffer object.
# This is necessary so that the input device's buffer doesn't overflow
# while the calling thread makes network requests, etc.
stream_callback=functools.partial(_fill_buffer, buff),
)
yield _audio_data_generator(buff)
audio_stream.stop_stream()
audio_stream.close()
# Signal the _audio_data_generator to finish
buff.put(None)
audio_interface.terminate()
# [END audio_stream]
def request_stream(data_stream, rate, interim_results=True):
"""Yields `StreamingRecognizeRequest`s constructed from a recording audio
stream.
Args:
data_stream: A generator that yields raw audio data to send.
rate: The sampling rate in hertz.
interim_results: Whether to return intermediate results, before the
transcription is finalized.
"""
# The initial request must contain metadata about the stream, so the
# server knows how to interpret it.
recognition_config = cloud_speech_pb2.RecognitionConfig(
# There are a bunch of config options you can specify. See
encoding='LINEAR16', # raw 16-bit signed LE samples
sample_rate=rate, # the rate in hertz
# See http://g.co/cloud/speech/docs/languages
# for a list of supported languages.
language_code='ko-KR', # a BCP-47 language tag
)
streaming_config = cloud_speech_pb2.StreamingRecognitionConfig(
interim_results=interim_results,
config=recognition_config,
)
yield cloud_speech_pb2.StreamingRecognizeRequest(
streaming_config=streaming_config)
for data in data_stream:
# Subsequent requests can all just have the content
yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=data)
def listen_print_loop(recognize_stream):
"""Iterates through server responses and prints them.
The recognize_stream passed is a generator that will block until a response
is provided by the server. When the transcription response comes, print it.
In this case, responses are provided for interim results as well. If the
response is an interim one, print a line feed at the end of it, to allow
the next result to overwrite it, until the response is a final one. For the
final one, print a newline to preserve the finalized transcription.
"""
num_chars_printed = 0
for resp in recognize_stream:
if resp.error.code != code_pb2.OK:
raise RuntimeError('Server error: ' + resp.error.message)
if not resp.results:
continue
# Display the top transcription
result = resp.results[0]
transcript = result.alternatives[0].transcript
# Display interim results, but with a carriage return at the end of the
# line, so subsequent lines will overwrite them.
#
# If the previous result was longer than this one, we need to print
# some extra spaces to overwrite the previous result
overwrite_chars = ' ' * max(0, num_chars_printed - len(transcript))
if not result.is_final:
sys.stdout.write(transcript + overwrite_chars + '\r')
sys.stdout.flush()
num_chars_printed = len(transcript)
else:
print(transcript + overwrite_chars)
# Exit recognition if any of the transcribed phrases could be
# one of our keywords.
if re.search(r'\b(exit|quit)\b', transcript, re.I):
print('Exiting..')
break
num_chars_printed = 0
def main():
urls = ("/.*", "hello")
app = web.application(urls, globals())
class hello:
def GET(self):
return 'Hello, world!'
app.run()
service = cloud_speech_pb2.SpeechStub(
make_channel('speech.googleapis.com', 443))
# For streaming audio from the microphone, there are three threads.
# First, a thread that collects audio data as it comes in
with record_audio(RATE, CHUNK) as buffered_audio_data:
# Second, a thread that sends requests with that data
requests = request_stream(buffered_audio_data, RATE)
# Third, a thread that listens for transcription responses
recognize_stream = service.StreamingRecognize(
requests, DEADLINE_SECS)
# Exit things cleanly on interrupt
signal.signal(signal.SIGINT, lambda *_: recognize_stream.cancel())
# Now, put the transcription responses to use.
try:
listen_print_loop(recognize_stream)
recognize_stream.cancel()
except grpc.RpcError as e:
code = e.code()
# CANCELLED is caused by the interrupt handler, which is expected.
if code is not code.CANCELLED:
raise
if __name__ == '__main__':
main()
The error refers to the fact the web.py will be looking for a class hello, accessible from global scope. You've defined your class hello within main(). web.py will never find it.
That being said, there are other issues. Your call to app.run() within main starts the web.py webserver and never returns so nothing after that will ever get executed.
Combining two code examples requires one to understand both snippets. Read the docs, and keep trying.
The question should be how to upload users video to their youtube channel?
for that i am using youtube api V3 and quick search from google i found below code
client_secrets.json
{
"web": {
"client_id": "[[INSERT CLIENT ID HERE]]",
"client_secret": "[[INSERT CLIENT SECRET HERE]]",
"redirect_uris": [],
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://accounts.google.com/o/oauth2/token"
}
}
from the terminal
python upload_video.py --file="/tmp/test_video_file.flv"
--title="Summer vacation in California"
--description="Had a great time surfing in Santa Cruz"
--keywords="surfing,Santa Cruz"
--category="22"
--privacyStatus="private"
How to upload my video to other channels in youtube API
The complete working sample for the upload_video.py script is listed below:
#!/usr/bin/python
import httplib
import httplib2
import os
import random
import sys
import time
from apiclient.discovery import build
from apiclient.errors import HttpError
from apiclient.http import MediaFileUpload
from oauth2client.file import Storage
from oauth2client.client import flow_from_clientsecrets
from oauth2client.tools import run
from optparse import OptionParser
# Explicitly tell the underlying HTTP transport library not to retry, since
# we are handling retry logic ourselves.
httplib2.RETRIES = 1
# Maximum number of times to retry before giving up.
MAX_RETRIES = 10
# Always retry when these exceptions are raised.
RETRIABLE_EXCEPTIONS = (httplib2.HttpLib2Error, IOError, httplib.NotConnected,
httplib.IncompleteRead, httplib.ImproperConnectionState,
httplib.CannotSendRequest, httplib.CannotSendHeader,
httplib.ResponseNotReady, httplib.BadStatusLine)
# Always retry when an apiclient.errors.HttpError with one of these status
# codes is raised.
RETRIABLE_STATUS_CODES = [500, 502, 503, 504]
# CLIENT_SECRETS_FILE, name of a file containing the OAuth 2.0 information for
# this application, including client_id and client_secret. You can acquire an
# ID/secret pair from the API Access tab on the Google APIs Console
# http://code.google.com/apis/console#access
# For more information about using OAuth2 to access Google APIs, please visit:
# https://developers.google.com/accounts/docs/OAuth2
# For more information about the client_secrets.json file format, please visit:
# https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
# Please ensure that you have enabled the YouTube Data API for your project.
CLIENT_SECRETS_FILE = "client_secrets.json"
# A limited OAuth 2 access scope that allows for uploading files, but not other
# types of account access.
YOUTUBE_UPLOAD_SCOPE = "https://www.googleapis.com/auth/youtube.upload"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
# Helpful message to display if the CLIENT_SECRETS_FILE is missing.
MISSING_CLIENT_SECRETS_MESSAGE = """
WARNING: Please configure OAuth 2.0
To make this sample run you will need to populate the client_secrets.json file
found at:
%s
with information from the APIs Console
https://code.google.com/apis/console#access
For more information about the client_secrets.json file format, please visit:
https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
""" % os.path.abspath(os.path.join(os.path.dirname(__file__),
CLIENT_SECRETS_FILE))
def get_authenticated_service():
flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE, scope=YOUTUBE_UPLOAD_SCOPE,
message=MISSING_CLIENT_SECRETS_MESSAGE)
storage = Storage("%s-oauth2.json" % sys.argv[0])
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = run(flow, storage)
return build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
http=credentials.authorize(httplib2.Http()))
def initialize_upload(options):
youtube = get_authenticated_service()
tags = None
if options.keywords:
tags = options.keywords.split(",")
insert_request = youtube.videos().insert(
part="snippet,status",
body=dict(
snippet=dict(
title=options.title,
description=options.description,
tags=tags,
categoryId=options.category
),
status=dict(
privacyStatus=options.privacyStatus
)
),
# chunksize=-1 means that the entire file will be uploaded in a single
# HTTP request. (If the upload fails, it will still be retried where it
# left off.) This is usually a best practice, but if you're using Python
# older than 2.6 or if you're running on App Engine, you should set the
# chunksize to something like 1024 * 1024 (1 megabyte).
media_body=MediaFileUpload(options.file, chunksize=-1, resumable=True)
)
resumable_upload(insert_request)
def resumable_upload(insert_request):
response = None
error = None
retry = 0
while response is None:
try:
print "Uploading file..."
status, response = insert_request.next_chunk()
if 'id' in response:
print "'%s' (video id: %s) was successfully uploaded." % (
options.title, response['id'])
else:
exit("The upload failed with an unexpected response: %s" % response)
except HttpError, e:
if e.resp.status in RETRIABLE_STATUS_CODES:
error = "A retriable HTTP error %d occurred:\n%s" % (e.resp.status,
e.content)
else:
raise
except RETRIABLE_EXCEPTIONS, e:
error = "A retriable error occurred: %s" % e
if error is not None:
print error
retry += 1
if retry > MAX_RETRIES:
exit("No longer attempting to retry.")
max_sleep = 2 ** retry
sleep_seconds = random.random() * max_sleep
print "Sleeping %f seconds and then retrying..." % sleep_seconds
time.sleep(sleep_seconds)
if __name__ == '__main__':
parser = OptionParser()
parser.add_option("--file", dest="file", help="Video file to upload")
parser.add_option("--title", dest="title", help="Video title",
default="Test Title")
parser.add_option("--description", dest="description",
help="Video description",
default="Test Description")
parser.add_option("--category", dest="category",
help="Numeric video category. " +
"See https://developers.google.com/youtube/v3/docs/videoCategories/list",
default="22")
parser.add_option("--keywords", dest="keywords",
help="Video keywords, comma separated", default="")
parser.add_option("--privacyStatus", dest="privacyStatus",
help="Video privacy status: public, private or unlisted",
default="public")
(options, args) = parser.parse_args()
if options.file is None or not os.path.exists(options.file):
exit("Please specify a valid file using the --file= parameter.")
else:
initialize_upload(options)
Question 1 :
How to integrate with django view function ?
like when user access localhost:8000/upload page then followed by browse & submit button next google will ask for user credential after successful login video will uploaded to given username youtube channel. For this how to integrate above code to django view funtion
Question 2 :
How to change below command to view
python upload_video.py --file="/tmp/test_video_file.flv"
--title="Summer vacation in California"
--description="Had a great time surfing in Santa Cruz"
--keywords="surfing,Santa Cruz"
--category="22"
--privacyStatus="private"
No way to do it as you can't get the CLIENT SECRET FILE of a user until he gives you
client_id
client_secret
And here is what google says
Warning: Keep your client secret private. If someone obtains your client secret, they could use it to consume your quota, incur charges against your Google APIs Console project, and request access to user data.
You will almost certainly find it easier using a library like python-social-auth or django-allauth, personally I'd use python-social-auth but as you seem to be just starting out with Django use django-allauth.
https://github.com/pennersr/django-allauth
There is a good tutorial that will take you through it:
http://www.sarahhagstrom.com/2013/09/the-missing-django-allauth-tutorial/
As for turning that script into a view, just call it from inside any django view, you should check to see if the user is logged into Google, if they are then use the access token stored in the database. If not you should redirect them to the Google login url pattern provide by django-allauth.
Save your upload_video.py file into your video app.
In your view.py try following the post of the form:
form_upload = VideoForm(request.POST, request.FILES)
if form_upload.is_valid():
uploaded_video = form_upload.save(commit=True)
# send this file to youtube
credentials = get_authenticated_service(uploaded_video)
initialize_upload(credentials, uploaded_video)
In your videos models.py
file_on_server = models.FileField(max_length=100, null=True, blank=True)
auth_host_name = 'localhost'
noauth_local_webserver = True
auth_host_port = [8080, 8090]
logging_level = 'ERROR'
category = 23
privacyStatus = 'public'
#property
def file(self):
return self.file_on_server.path
Under the Google Developer Console:
Create a Google OAuth Client ID for native application not web application.
Download JSON to your video app.
Once submitted you should get the following message in your console:
Uploading file...
Video id '' was successfully uploaded.