Stream video and audio frames from YouTube - python

I am trying to stream video and audio data from a YouTube video so that I can do some video and audio analysis seperately which is then superimposed on the frames using OpenCV. I have this working perfectly fine with files but want to extend this to streaming from YouTube.
At the moment, I've thought of usings VLC Python bindings to stream from YouTube but I'm not sure how to extract the frames from this video.
Here is the vlc code that performs YouTube streaming at the moment:
import vlc
import time
import numpy as np
from ctypes import *
class MyDisplay(vlc.VideoDisplayCb):
def __doc__(o,p):
print "fsdfs"
class MyLock(vlc.VideoLockCb):
def __doc__():
raise Exception("sdsds")
return np.array(500,500).__array_interface__['data']
class MyPlayback(vlc.AudioPlayCb):
def from_param(self,a,b,c,d):
print "asfds"
def callbck(a,b,c,d):
print 'aa'
print a
print b
print c
print d
return 'a'
if __name__ == '__main__':
url = 'https://www.youtube.com/watch?v=F82XtLmL0tU'
i = vlc.Instance('--verbose 2'.split())
media = i.media_new(url)
media_list = i.media_list_new([url])
p = i.media_player_new()
p.set_media(media)
lp = i.media_list_player_new()
lp.set_media_player(p)
lp.set_media_list(media_list)
CMPFUNC = CFUNCTYPE(c_char, c_void_p, c_void_p, c_uint, c_long)
lp.next()
lock = MyLock()
display = MyDisplay()
playback = MyPlayback()
p.audio_set_callbacks(CMPFUNC(callbck),None,None,None,None,None)
p.play()
time.sleep(5)
r = p.video_take_snapshot(0,'rnd.pong',0,0)
How could I produce a stream of frames and audio data using VLC (with Python bindings)? Also is there another way to do this (using ffmpeg for example)?
Thanks

Related

How to connect video stream from python to Kurento Media Server

I am working on a Real Time Video Streaming project using RTMP protocol. I have to use DirectX to capture the screen and then Kurento Media Server to stream.
For capturing, I am using dxcam in python:
import dxcam
import cv2
# import time
# camera = dxcam.create() # returns a DXCamera instance on primary monitor
target_fps = 30
camera = dxcam.create(output_idx=0, output_color="BGR")
camera.start(target_fps=target_fps, video_mode=True)
writer = cv2.VideoWriter(
"video.mp4", cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (1920, 1080)
)
for i in range(1000):
writer.write(camera.get_latest_frame())
# time.sleep(10)
camera.stop()
writer.release()
del camera
I need help starting with the Kurento Media server to stream the video captured in real time but I can't find any tutorial to do that. Can someone help me with that?

save microphone audio input when using azure speech to text

I'm currently using Azure speech to text in my project. It is recognizing speech input directly from microphone (which is what I want) and saving the text output, but I'm also interested in saving that audio input so that I can listen to it later on. Before moving to Azure I was using the python speech recognition library with recognize_google, that allowed me to use get_wav_data() to save the input as a .wav file. Is there something similar I can use with Azure? I read the documentation but could only find ways to save audio files for text to speech. My temporary solution is to save the audio input myself first and then use the azure stt on that audio file rather than directly using the microphone for input, but I'm worried this will slow down the process. Any ideas?
Thank you in advance!
This is Darren from the Microsoft Speech SDK Team. Unfortunately, at the moment there is no built-in support for simultaneously doing live recognition from a microphone and writing the audio to a WAV file. We have heard this customer request before and we will consider adding this feature in a future version of the Speech SDK.
What I think you can do at the moment (it will require a bit of programming on your part), is use Speech SDK with a push stream. You can write code to read audio buffers from the microphone and write it to a WAV file. At the same time, you can push the same audio buffers into Speech SDK for recognition. We have Python samples showing how to use Speech SDK with push stream. See function "speech_recognition_with_push_stream" in this file: https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/python/console/speech_sample.py. However, I'm not familiar with Python options for reading real-time audio buffers from a Microphone, and writing to WAV file.
Darren
If you use Azure's speech_recognizer.recognize_once_async(), you can simultaneously capture the microphone with pyaudio. Below is the code I use:
#!/usr/bin/env python3
# enter your output path here:
output_file='/Users/username/micaudio.wav'
import pyaudio, signal, sys, os, requests, wave
pa = pyaudio.PyAudio()
import azure.cognitiveservices.speech as speechsdk
def vocrec_callback(in_data, frame_count, time_info, status):
global voc_data
voc_data['frames'].append(in_data)
return (in_data, pyaudio.paContinue)
def vocrec_start():
global voc_stream
global voc_data
voc_data = {
'channels':1 if sys.platform == 'darwin' else 2,
'rate':44100,
'width':pa.get_sample_size(pyaudio.paInt16),
'format':pyaudio.paInt16,
'frames':[]
}
voc_stream = pa.open(format=voc_data['format'],
channels=voc_data['channels'],
rate=voc_data['rate'],
input=True,
output=False,
stream_callback=vocrec_callback)
def vocrec_stop():
voc_stream.close()
def vocrec_write():
with wave.open(output_file, 'wb') as wave_file:
wave_file.setnchannels(voc_data['channels'])
wave_file.setsampwidth(voc_data['width'])
wave_file.setframerate(voc_data['rate'])
wave_file.writeframes(b''.join(voc_data['frames']))
class SIGINT_handler():
def __init__(self):
self.SIGINT = False
def signal_handler(self, signal, frame):
self.SIGINT = True
print('You pressed Ctrl+C!')
vocrec_stop()
quit()
def init_azure():
global speech_recognizer
# ——— check azure keys
my_speech_key = os.getenv('SPEECH_KEY')
if my_speech_key is None:
error_and_quit("Error: No Azure Key.")
my_speech_region = os.getenv('SPEECH_REGION')
if my_speech_region is None:
error_and_quit("Error: No Azure Region.")
_headers = {
'Ocp-Apim-Subscription-Key': my_speech_key,
'Content-type': 'application/x-www-form-urlencoded',
# 'Content-Length': '0',
}
_URL = f"https://{my_speech_region}.api.cognitive.microsoft.com/sts/v1.0/issueToken"
_response = requests.post(_URL,headers=_headers)
if not "200" in str(_response):
error_and_quit("Error: Wrong Azure Key Or Region.")
# ——— keys correct. continue
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'),
region=os.environ.get('SPEECH_REGION'))
audio_config_stt = speechsdk.audio.AudioConfig(use_default_microphone=True)
speech_config.set_property(speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary, 'true')
# ——— disable profanity filter:
speech_config.set_property(speechsdk.PropertyId.SpeechServiceResponse_ProfanityOption, "2")
speech_config.speech_recognition_language="en-US"
speech_recognizer = speechsdk.SpeechRecognizer(
speech_config=speech_config,
audio_config=audio_config_stt)
def error_and_quit(_error):
print(error)
quit()
def recognize_speech ():
vocrec_start()
print("Say something: ")
speech_recognition_result = speech_recognizer.recognize_once_async().get()
print("Recording done.")
vocrec_stop()
vocrec_write()
quit()
handler = SIGINT_handler()
signal.signal(signal.SIGINT, handler.signal_handler)
init_azure()
recognize_speech()

Passing a video stream from Python to VB.NET through UDP

I need to pass a live video stream from a Python analytics backend to a VB.NET WPF frontend. I'm using a Python server and a VB.NET client communicating via UDP.
This is what I have so far on both ends:
Python Server:
import cv2
from socket import *
import socket
import numpy as np
import sys
_s = socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
_address = ('localhost',5000)
_quality=[int(cv2.IMWRITE_JPEG_QUALITY),80]
vcap = cv2.VideoCapture("rtsp://wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov")
while(1):
ret, frame = vcap.read()
image = frame
# processing
result, imgencode = cv2.imencode('.jpg', image, _quality)
data = np.array(imgencode)
stringData = data.tobytes()
_s.sendto(stringData,_address)
VB.NET client (Using a console app for testing):
Imports System.Net
Imports System.Net.Sockets
Module Program
Dim Mat = New Emgu.CV.Mat(New System.Drawing.Size(640, 480), Emgu.CV.CvEnum.DepthType.Cv8U, 3)
Sub Main(args As String())
Using socket As UdpClient = New UdpClient(5000)
Try
While True
Dim remoteEP = New IPEndPoint(IPAddress.Any, 5000)
Dim data As Byte() = socket.Receive(remoteEP)
Emgu.CV.CvInvoke.Imdecode(data, Emgu.CV.CvEnum.ImreadModes.ReducedColor8, Mat)
Emgu.CV.CvInvoke.Imshow("", Mat)
End While
Catch __unusedSocketException1__ As SocketException
Throw
End Try
End Using
End Sub
End Module
The code I've got so far doesn't throw any errors, but the output isn't displayed in EmguCV ImShow() window. Any help or alternative solutions for my use case are appreciated!

How to play streaming audio from internet radio on Python 3.5.3

I am using Python 3.5.3 on Windows 8.1 x64 and i need play audio from here
I have tried pyaudio, but it gives me only white noise and error occurred after a few runs of pyaudio (pyaudio module 'pyaudio' has no attribute 'PyAudio').
Please, advise me how better play the streaming audio from url, using Python...
P.S. I already got the song title and artist name with this code:
import requests
import time
import datetime
print(datetime.datetime.now())
import re
url = 'http://prem1.rockradio.com:80/bluesrock?9555ae7caa92404c73cade1d'
encoding = 'latin1'
info = ''
radio_session = requests.Session()
while True:
radio = radio_session.get(url, headers={'Icy-MetaData': '1'}, stream=True)
metaint = int(radio.headers['icy-metaint'])
stream = radio.raw
audio_data = stream.read(metaint)
meta_byte = stream.read(1)
if (meta_byte):
meta_length = ord(meta_byte) * 16
meta_data = stream.read(meta_length).rstrip(b'\0')
stream_title = re.search(br"StreamTitle='([^']*)';", meta_data)
if stream_title:
stream_title = stream_title.group(1).decode(encoding, errors='replace')
if info != stream_title:
print('Now playing: ', stream_title)
info = stream_title
else:
pass
else:
print('No StreamTitle!')
time.sleep(1)
If you are open for external libraries, you can install vlc binding for python using pip install python-vlc
And use player method to play audio file directly from URL as below.
import vlc
import time
url = 'http://prem1.rockradio.com:80/bluesrock?9555ae7caa92404c73cade1d'
#define VLC instance
instance = vlc.Instance('--input-repeat=-1', '--fullscreen')
#Define VLC player
player=instance.media_player_new()
#Define VLC media
media=instance.media_new(url)
#Set player media
player.set_media(media)
#Play the media
player.play()
Advantage of vlc player is that you can play most media types directly from URL (not just mp3) and also perform player like options such as
>>> player.pause() #pause play back
>>> player.play() #resume play back
>>> player.stop() #stop play back

play MIDI files in python?

I'm looking for a method to play midi files in python.
It seems python does not support MIDI in its standard library.
After I searched, I found some python midi librarys such as pythonmidi.
However, most of them can only create and read MIDI file without playing function.
I would like to find a python midi library including playing method.
Any recommendations? Thanks!
The pygame module can be used to play midi files.
http://www.pygame.org/docs/ref/music.html
See the example here:
http://www.daniweb.com/software-development/python/code/216979
a whole bunch of options available at:
http://wiki.python.org/moin/PythonInMusic
and also here which you can modify to suit your purpose:
http://xenon.stanford.edu/~geksiong/code/playmus/playmus.py
Just to add a minimal example (via DaniWeb):
# conda install -c cogsci pygame
import pygame
def play_music(midi_filename):
'''Stream music_file in a blocking manner'''
clock = pygame.time.Clock()
pygame.mixer.music.load(midi_filename)
pygame.mixer.music.play()
while pygame.mixer.music.get_busy():
clock.tick(30) # check if playback has finished
midi_filename = 'FishPolka.mid'
# mixer config
freq = 44100 # audio CD quality
bitsize = -16 # unsigned 16 bit
channels = 2 # 1 is mono, 2 is stereo
buffer = 1024 # number of samples
pygame.mixer.init(freq, bitsize, channels, buffer)
# optional volume 0 to 1.0
pygame.mixer.music.set_volume(0.8)
# listen for interruptions
try:
# use the midi file you just saved
play_music(midi_filename)
except KeyboardInterrupt:
# if user hits Ctrl/C then exit
# (works only in console mode)
pygame.mixer.music.fadeout(1000)
pygame.mixer.music.stop()
raise SystemExit
pretty_midi can generate the waveform for you, you can then play it with e.g. IPython.display.Audio
from IPython.display import Audio
from pretty_midi import PrettyMIDI
sf2_path = 'path/to/sf2' # path to sound font file
midi_file = 'music.mid'
music = PrettyMIDI(midi_file=midi_file)
waveform = music.fluidsynth(sf2_path=sf2_path)
Audio(waveform, rate=44100)
Use pygame to play your midi file. Examples are here or here
I find that midi2audio works well.
Example:
from midi2audio import FluidSynth
#Play MIDI
FluidSynth().play_midi('input.mid')
#Synthesize MIDI to audio
# Note: the default sound font is in 44100 Hz sample rate
fs = FluidSynth()
fs.midi_to_audio('input.mid', 'output.wav')
# FLAC, a lossless codec, is recommended
fs.midi_to_audio('input.mid', 'output.flac')
On macOS, you can use the pyObjC library to access the OS's own MIDI handling routines. This script will play midi files given as arguments.
#!/usr/bin/env python3
from AVFoundation import AVMIDIPlayer
from Foundation import NSURL
import time
import sys
def myCompletionHandler():
return
def playMIDIFile(filepath):
midiFile = NSURL.fileURLWithPath_(filepath)
midiPlayer, error = AVMIDIPlayer.alloc().initWithContentsOfURL_soundBankURL_error_(midiFile, None, None)
if error:
print (error)
sys.exit(1)
MIDItime = midiPlayer.duration()
midiPlayer.prepareToPlay()
midiPlayer.play_(myCompletionHandler)
if not midiPlayer.isPlaying:
midiPlayer.stop()
else:
time.sleep(MIDItime)
return
if __name__ == "__main__":
for filename in sys.argv[1:]:
playMIDIFile(filename)

Categories

Resources