So I am trying to scrape data from some websocket news site. I have a main script (not shown) that handles postprocessing and calls pool.apply_async(main_news, ()) to return data. My problem is, that it's only possible for me to return data from a message if I do ws.close() before "return", however ws.close() takes around 1 second which is too much and I feel it is unnecessary too.
What options do I have to just directly return "data" to my main script without having to wait for ws.close()?
import websocket, ssl, json, re, time
from datetime import datetime
def on_open(ws):
print("Opened connection")
def on_message(ws, message):
json_message = json.loads(message) # load json string into dict format
title = json_message["title"]
print(title,str(datetime.now())[11:-4])
global data
data_match = re.search(r"DATA", title)
if data_match:
data = data_match.group(0)
ws.close() # Takes around 1 second!!
return data
def on_error(ws, error):
print(error)
def on_close(ws, close_status_code, close_msg): # automatically restart websocket in case of error
print("on_close args:")
if close_status_code or close_msg:
print("close status code: " + str(close_status_code))
print("close message: " + str(close_msg))
print ("Retry: %s" % time.ctime())
time.sleep(3)
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
def main_news(socket = "wss://www.madnews.io/ws"):
ws = websocket.WebSocketApp(socket, on_open = on_open, on_message = on_message, on_close = on_close, on_error = on_error)
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
return data
if __name__ == "__main__":
main_news()
import pyaudio
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
from threading import Thread
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
try:
from Queue import Queue, Full
except ImportError:
from queue import Queue, Full
###############################################
#### Initalize queue to store the recordings ##
###############################################
CHUNK = 1024
# Note: It will discard if the websocket client can't consumme fast enough
# So, increase the max size as per your choice
BUF_MAX_SIZE = CHUNK * 10
# Buffer to store audio
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))
# Create an instance of AudioSource
audio_source = AudioSource(q, True, True)
###############################################
#### Prepare Speech to Text Service ########
###############################################
# initialize speech to text service
authenticator = IAMAuthenticator('apikey')
speech_to_text = SpeechToTextV1(authenticator=authenticator)
#speech_to_text.set_service_url('https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/62a2f19f-959f-4c3c-a276-27ab0e458341/v1/recognize')
speech_to_text.set_service_url('https://stream.watsonplatform.net/speech-to-text/api')
# define callback for the speech to text service
class MyRecognizeCallback(RecognizeCallback):
def __init__(self):
RecognizeCallback.__init__(self)
def on_transcription(self, transcript):
print(transcript)
def on_connected(self):
print('Connection was successful')
def on_error(self, error):
print('Error received: {}'.format(error))
def on_inactivity_timeout(self, error):
print('Inactivity timeout: {}'.format(error))
def on_listening(self):
print('Service is listening')
def on_hypothesis(self, hypothesis):
print(hypothesis)
def on_data(self, data):
print(data)
def on_close(self):
print("Connection closed")
# this function will initiate the recognize service and pass in the AudioSource
def recognize_using_weboscket(*args):
mycallback = MyRecognizeCallback()
speech_to_text.recognize_using_websocket(audio=audio_source,
content_type='audio/l16; rate=44100',
recognize_callback=mycallback,
interim_results=True)
###############################################
#### Prepare the for recording using Pyaudio ##
###############################################
# Variables for recording the speech
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
# define callback for pyaudio to store the recording in queue
def pyaudio_callback(in_data, frame_count, time_info, status):
try:
q.put(in_data)
except Full:
pass # discard
return (None, pyaudio.paContinue)
# instantiate pyaudio
audio = pyaudio.PyAudio()
# open stream using callback
stream = audio.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
stream_callback=pyaudio_callback,
start=False
)
#########################################################################
#### Start the recording and start service to recognize the stream ######
#########################################################################
print("Enter CTRL+C to end recording...")
stream.start_stream()
try:
recognize_thread = Thread(target=recognize_using_weboscket, args=())
recognize_thread.start()
while True:
pass
except KeyboardInterrupt:
# stop recording
stream.stop_stream()
stream.close()
audio.terminate()
audio_source.completed_recording()
This is the code for IBM's Speech-To-Text service using a mic as input. May I know what the output of the program is? This is the output im getting:
Enter CTRL+C to end recording...
Connection was successful
Service is listening
File "C:\Users\---\AppData\Local\Programs\Python\Python38-32\lib\site-packages\websocket\_app.py", line 320, in _callback
callback(self, *args)
File "C:\Users\---\AppData\Local\Programs\Python\Python38-32\lib\site-packages\ibm_watson\websocket\recognize_listener.py", line 199, in on_data
hypothesis = json_object['results'][0]['alternatives'][0][
Connection closed
It suddenly works when I tested with my wireless headset mic. Not sure why though as both devices are functioning well. The output is the transcript in the console.
This is happening to me too and I think the cause of your problem is the audio that you sent to the websocket was probably difficult to recognize, so the websocket's response was none / null, and when the hypothesis function tries to get the answer this the error occurs because the result does not exist.
The output on hypotesis function (def hypotesis ) will be a string with the result of transcript audio file and on data function (def data) will be a json like that:
{'result_index': 0, 'results': [{'final': True, 'alternatives': [{'transcript': 'hello ', 'confidence': 0.66}], 'keywords_result': {}}]}
I am VERY new to coding and Python and I am trying to just receive live trade data from the Bitfinex API, and filter out specific messages as they come in because it gives duplicates. I want to take these filtered messages and then output them to a csv file continuously.
Specifically, I want to save the messages titled "te" (see output from API below) because these are the trades that are executed as they are executed. The stream gives "tu" as well, which are duplicates. I want to just take the "te" and download them live into a csv for other processing and saving.
Here is my code, which is a stripped down version of one that I found online:
import websocket
import time
import sys
from datetime import datetime, timedelta, timezone
import sched, time
import json
import csv
import requests
class BitfinexWebSocketReader():
endpoint = "wss://api.bitfinex.com/ws/2"
def __init__(self):
#websocket.enableTrace(True)
self.ws = websocket.WebSocketApp(
BitfinexWebSocketReader.endpoint,
on_message = self.on_message,
on_error = self.on_error,
on_close = self.on_close
)
self.ws.on_open = self.on_open
try:
self.run()
except KeyboardInterrupt:
self.ws.close()
def run(self):
self.ws.run_forever()
print("### run ###")
pass
def on_message(self, ws, message):
print(message)
def on_error(self, ws, error):
print(error)
sys.exit()
def on_close(self, ws):
print("### closed ###")
def on_open(self, ws):
#print("### open ###")
ws.send(json.dumps({"event": "subscribe", "channel": "Trades", "symbol": "tBTCUSD"}))
if __name__=="__main__":
BitfinexWebSocketReader()
And here is an example of a couple seconds of the output:
{"event":"info","version":2,"serverId":"88c6df7e-5159-4a8e-b1c4-f08904aeeb0a","platform":{"status":1}}
{"event":"subscribed","channel":"trades","chanId":23,"symbol":"tBTCUSD","pair":"BTCUSD"}
[23,[[281534165,1534448458635,0.005,6401.5],[281534164,1534448457975,0.01999998,6401.5],[281534139,1534448438766,-0.31749096,6401.4],[281534132,1534448438051,0.005,6401.5],[281534116,1534448432624,-0.051,6401.4],[281534099,1534448425380,0.18699482,6401.5],[281534097,1534448424900,0.013558,6401.5],[281534096,1534448424718,0.0514726,6401.5],[281534083,1534448415788,0.005,6401.8],[281534080,1534448415568,-1,6400.8],[281534079,1534448415566,-1,6401.8],[281534073,1534448409395,-0.0325,6403],[281534053,1534448398108,-0.2498,6405.1],[281534048,1534448396370,-0.25,6404.9],[281534043,1534448394675,0.42406762,6400],[281534029,1534448390257,0.30000001,6400],[281534028,1534448390236,0.30000001,6400],[281534027,1534448389714,1,6400],[281534025,1534448389033,1.18922278,6400],[281534024,1534448389030,0.41523564,6399.7],[281534023,1534448389028,0.39554158,6399.7],[281534013,1534448384920,0.025,6399.7],[281534011,1534448382885,0.018794,6399.7],[281534008,1534448380817,-1.49155951,6399.6],[281534007,1534448380815,-2.5,6399.6],[281534006,1534448380813,-0.34,6399.6],[281534005,1534448380811,-0.15098794,6399.6],[281534004,1534448380808,-0.29899445,6399.6],[281534000,1534448379152,-0.005,6399.6],[281533999,1534448377821,-0.16825162,6399.6]]]
[23,"hb"]
[23,"te",[281534199,1534448478028,-0.00937287,6401.4]]
[23,"te",[281534200,1534448478031,-0.29062714,6401.4]]
[23,"te",[281534201,1534448478036,-0.30000001,6401.4]]
[23,"tu",[281534201,1534448478036,-0.30000001,6401.4]]
[23,"tu",[281534199,1534448478028,-0.00937287,6401.4]]
[23,"tu",[281534200,1534448478031,-0.29062714,6401.4]]
[23,"te",[281534204,1534448478180,-0.65915285,6401.4]]
[23,"tu",[281534204,1534448478180,-0.65915285,6401.4]]
[23,"hb"]
[23,"te",[281534224,1534448479402,-0.114,6399.9]]
[23,"tu",[281534224,1534448479402,-0.114,6399.9]]
[23,"te",[281534232,1534448480466,-0.00012512,6399.9]]
[23,"tu",[281534232,1534448480466,-0.00012512,6399.9]]
Bonus question: why does that super long first entry pop up every time I execute the code?
You can initialize some kind of data structure in the constructor, like a list() or a set() to store the desired messages and then filter them in the on_message method.
So in your constructor
def __init__(self):
#websocket.enableTrace(True)
self.ws = websocket.WebSocketApp(
BitfinexWebSocketReader.endpoint,
on_message = self.on_message,
on_error = self.on_error,
on_close = self.on_close
)
self.ws.on_open = self.on_open
self.store = []
try:
self.run()
except KeyboardInterrupt:
self.ws.close()
And in your on_message method
def on_message(self, ws, message):
if "te" in message:
self.store.append(message)
print(message)
When I trying to connect to its jupyter client in python code ,I encount a problem.
In the source code of jupyter, connection to zmq channel was established when websocket is opened
def open(self, kernel_id):
super(ZMQChannelsHandler, self).open()
try:
self.create_stream()
except web.HTTPError as e:
for channel, stream in self.channels.items():
if not stream.closed():
stream.close()
self.close()
else:
for channel, stream in self.channels.items():
//this is callback function when receive message from zqm channel
stream.on_recv_stream(self._on_zmq_reply)
while in the create_stream function, the zmq channel was established.
def create_stream(self):
km = self.kernel_manager
identity = self.session.bsession
for channel in ('shell', 'iopub', 'stdin'):
meth = getattr(km, 'connect_' + channel)
self.channels[channel] = stream = meth(self.kernel_id, identity=identity)
stream.channel = channel
... ignore no significance code
when the server receive message, on_message was invoke
def on_message(self, msg):
if not self.channels:
return
if isinstance(msg, bytes):
msg = deserialize_binary_message(msg)
else:
msg = json.loads(msg)
channel = msg.pop('channel', None)
if channel is None:
channel = 'shell'
if channel not in self.channels:
return
stream = self.channels[channel]
self.session.send(stream, msg)
At this time, zmq channel receive python code to be executed. After that, the execution result should be return, thus the function on_recv_stream above should be called and we got the result finally.
So I write the python code snippet like this:
from jupyter_client.multikernelmanager import MultiKernelManager
from jupyter_client.session import Session
from tornado import gen, web
from tornado.concurrent import Future
from tornado.ioloop import IOLoop
km = MultiKernelManager()
kernelid = km.start_kernel()
kernel =km.get_kernel(kernelid)
channel = km.connect_shell(kernelid)
print 'channel', channel
def on_reply(msg):
print 'we got return'
def on_timeout():
print("Timeout waiting for kernel_info_reply: %s", kernel_id)
kernel.session.send(channel, 'kernel_info_request')
channel.on_recv(on_reply)
Actually, I did not get the return message, that is to say,the on_reply function was not invoked. I did not what the problem is, Can anynone help me?
I solve the problem like this:
from jupyter_client.multikernelmanager import MultiKernelManager
km = MultiKernelManager()
kernelid = km.start_kernel('python2')
kn =km.get_kernel(kernelid)
kc = kn.client()
kc.start_channels()
msg_id = kc.execute('import math\nprint(math.sqrt(2))')
while True:
try:
msg = kc.get_iopub_msg()
print('\niopub msg is')
print(msg)
except Excption,e:
print(e)
break
if msg['parent_header'].get('msg_id') != msg_id:
continue
msg_type = msg['msg_type']
content = msg['content']
print('content is :')
print(content)
if msg_type == 'status':
if content['execution_state'] == 'idle':
break
else:
continue
iv created a simple async client and server but im unable to get the client to reply after receiving the first time. It seems the server can send back a reply after receiving from the client but the client cant:
here is the client's session:
[mike#mike Public]$ python cl.py
buf got your stuff
dded callback ## this is a log addded to see if execution got where i wanted
and here is the server's log:
[mike#mike Public]$ python that.py
buf ehlo localhost
i was expecting some sort of ping pong effect where one send then the other then rinse lather repeat.
here is the client's code:
import socket
import fcntl, os, io, time, functools
from tornado import ioloop
class Punk(object):
def __init__(self):
self.loop = ioloop.IOLoop.instance()
self.address = 'blah.sock'
self.authkey = "none"
self.sock = socket.socket(socket.AF_UNIX)
def setup(self):
self.sock.connect(self.address)
fcntl.fcntl(self.sock, fcntl.F_SETFL, os.O_NONBLOCK)
self.sock.sendall("ehlo localhost")
self.fd = self.sock.fileno()
self.loop.add_handler(self.fd,self.reader,self.loop.READ)
self.loop.start()
def reader(self,fd,event):
result = b""
if event == self.loop.READ:
try:
while True:
servrep = self.sock.recv(1024)
if not servrep:
break
result += servrep
self.prnt(result)
break
except Exception as e:
print "this %s happend"%e
return
def prnt(self,buf):
print "buf %s"%buf
tim = time.time() + 2
self.loop.instance().add_timeout(tim, self.reply)
#callbac = functools.partial(self.loop.add_timeout,tim,self.reply)
#self.loop.add_callback(self.reply) ### i tried this too
print "added callback"
def reply(self):
self.sock.sendall(" clent got your stuff")
if __name__ == "__main__":
bob = Punk()
bob.setup()
and here is the server:
import socket
import fcntl, os, io, time, functools
from array import array
from tornado import ioloop
class Player(object):
def __init__(self):
self.loop = ioloop.IOLoop.instance()
self.address = 'blah.sock'
self.authkey = "none"
self.sock = socket.socket(socket.AF_UNIX)
def setup(self):
self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR,1)
self.sock.bind(self.address)
fcntl.fcntl(self.sock, fcntl.F_SETFL, os.O_NONBLOCK)
self.sock.listen(1)
self.fd = self.sock.fileno()
self.loop.add_handler(self.fd,self.reader,self.loop.READ)
self.loop.start()
def reader(self,fd,event):
result = b""
if event == self.loop.READ:
self.conn, self.addr = self.sock.accept()
try:
while True:
maxrep = self.conn.recv(1024)
if not maxrep:
break
result += maxrep
self.prnt(result)
break
except Exception as e:
print "this %s happend"%e
return
def prnt(self,buf):
print "buf %s"%buf
tim = time.time() + 2
self.loop.instance().add_timeout(tim, self.reply)
#callbac = functools.partial(self.loop.add_timeout,tim,self.reply)
#self.loop.add_callback(callbac)
def reply(self):
self.conn.sendall("got your stuff")
if __name__ == "__main__":
bob = Player()
bob.setup()
i had set my sockets to nonblock mode, but i did not catch an error when accepting from
a nonblock state when there is no connection:
here:
def reader(self,fd,event):
result = b""
if event == self.loop.READ:
self.conn, self.addr = self.sock.accept()
should be
def reader(self,fd,event):
result = b""
if event == self.loop.READ:
try:
self.conn, self.addr = self.sock.accept() # we get stuck here
self.connl.append(self.conn)
except Exception as e:
pass