I would like to use a dataframe to store my portfolio information and update every minute.
But the outcome is empty from the below code, am I missing something?
from ibapi.client import EClient
from ibapi.wrapper import EWrapper
from ibapi.contract import Contract
from ibapi.ticktype import TickTypeEnum
import pandas as pd
import time
class IBapi(EWrapper, EClient):
def __init__(self):
EClient.__init__(self, self)
self.all_positions = pd.DataFrame([], columns=['ConID', 'Symbol', 'Quantity', 'Average Cost', 'MarketPrice', 'marketValue', 'unrealizedONL', 'realizedPNL'])
def updatePortfolio(self, contract: Contract, position: float, marketPrice: float, marketValue: float,averageCost: float, unrealizedPNL: float, realizedPNL:float, accountName:str):
super().updatePortfolio(contract, position, marketPrice, marketValue, averageCost, unrealizedPNL, realizedPNL, accountName)
index = str(contract.conId)
self.all_positions.loc[index] = contract.conId, contract.symbol, position, averageCost, marketPrice, marketValue, unrealizedPNL, realizedPNL
def main():
app = IBapi()
app.connect('127.0.0.1', 7497, 0)
app.reqAccountUpdates(True, "XXXXXXXX")
current_positions = app.reqAccountUpdates(True, "XXXXXXX")
app.run()
print(current_positions.to_string())
app.disconnect()
if __name__ == "__main__":
main()
reqAcccountUpdates is an asynchronous function call - it sends an outgoing message but does not wait for a response. (Since the message initiates a subscription to a data stream, there isn't a single response returned but instead a series of responses).
So current_positions in:
current_positions = app.reqAccountUpdates(True, "XXXXXXX")
will always be None. Instead the responses are stored in app.all_positions by the overridden updatePortfolio() function.
Also, the run() loop is an infinite loop so the lines after that won't execute. Most commonly this type of architecture would be handled by python futures/the asyncio module (as in the ib_insync library), or with an additional thread for the run loop.
Related
Summary
I want to use streamlit to create a dashboard of all the trades (buy and sell) happening in a given market. I connect to a websocket stream to receive data of BTCUSDT from the Binance exchange. Messages are received every ~0.1s and I would like to update my dashboard in ~0.09s.
How can you handle this kind of situation where messages are delivered at high frequency? With my code, I successfully create a dashboard but it doesn't get updated fast enough. I am wondering if the dashboard is running behind.
The dashboard must display the buy and sell volumes at any moment in time as bar charts. I am also adding some metrics to show the total volume of buy and sell, as well as their change.
Steps to reproduce
My code is structured in the following way.
There is a streamer.py file, that defines a class Streamer. The Streamer object is a Websocket client. It connects to a stream, handles messages, and updates the dashboard. Whenever a new message is received, Streamer acquires a threading.Lock() and updates the pandas dataframes (one dataframe for buy orders and one dataframe for sell orders). If there are multiple orders happening at the same timestamp, it combines them by summing the corresponding volumes. Then, it releases the threading.Lock() and it creates a new thread where the update function (defined in streamer.py) is executed. The update function acquires the lock to avoid messing up with memory.
In the main.py file, streamlit's dashboard and the Streamerobject are initialized.
To reproduce the following code you need to connect to the Websocket from a region where Binance is not restricted. Since I live in the US, I must use a VPN to properly receive the data.
Code snippet:
main.py file
# main.py
import streamer
import pandas as pd
import streamlit as st # web development
import numpy as np # np mean, np random
import time # to simulate a real time data, time loop
import plotly.express as px # interactive charts
df_buy = pd.DataFrame(columns = [ 'Price', 'Quantity', 'USD Value'])
df_sell = pd.DataFrame(columns = [ 'Price', 'Quantity', 'USD Value'])
st.set_page_config(
page_title='Real-Time Data Science Dashboard',
page_icon='✅',
layout='wide'
)
# dashboard title
st.title("Real-Time / Live Data Science Dashboard")
placeholder = st.empty()
streamer.Stream(df_buy,df_sell,placeholder).connect()
streamer.py file
# streamer.py
import websocket
import json
import streamlit as st
import plotly.express as px
import pandas as pd
from threading import Thread, Lock
from streamlit.script_run_context import add_script_run_ctx
from datetime import datetime
import time
def on_close(ws, close_status_code, close_msg):
print('LOG', 'Closed orderbook client')
def update(df_buy,df_sell, placeholder, lock):
lock.acquire()
with placeholder.container():
# create three columns
kpi1, kpi2 = st.columns(2)
current_sumSellVolumes = df_sell['Quantity'].sum()
previous_sumSellVolumes = df_sell.iloc[:-1]['Quantity'].sum()
current_sumBuyVolumes = df_buy['Quantity'].sum()
previous_sumBuyVolumes = df_buy.iloc[:-1]['Quantity'].sum()
# fill in those three columns with respective metrics or KPIs
kpi2.metric(label="Sell quantity 📉", value=round(current_sumSellVolumes, 2),
delta=round(current_sumSellVolumes - previous_sumSellVolumes, 2))
kpi1.metric(label="Buy quantity 📈", value=round(current_sumBuyVolumes, 2),
delta=round(current_sumBuyVolumes - previous_sumBuyVolumes, 2))
# create two columns for charts
fig_col1, fig_col2 = st.columns(2)
with fig_col1:
st.markdown("### Buy Volumes")
fig = px.bar(data_frame=df_buy, x=df_buy.index, y='Quantity')
st.write(fig)
with fig_col2:
st.markdown("### Sell Volumes")
fig2 = px.bar(data_frame=df_sell, x=df_sell.index, y='Quantity')
st.write(fig2)
st.markdown("### Detailed Data View")
st.dataframe(df_buy)
st.dataframe(df_sell)
lock.release()
class Stream():
def __init__(self, df_buy, df_sell, placeholder):
self.symbol = 'BTCUSDT'
self.df_buy = df_buy
self.df_sell = df_sell
self.placeholder = placeholder
self.lock = Lock()
self.url = "wss://stream.binance.com:9443/ws"
self.stream = f"{self.symbol.lower()}#aggTrade"
self.times = []
def on_error(self, ws, error):
print(self.times)
print('ERROR', error)
def on_open(self, ws):
print('LOG', f'Opening WebSocket stream for {self.symbol}')
subscribe_message = {"method": "SUBSCRIBE",
"params": [self.stream],
"id": 1}
ws.send(json.dumps(subscribe_message))
def handle_message(self, message):
self.lock.acquire()
timestamp = datetime.utcfromtimestamp(int(message['T']) / 1000)
price = float(message['p'])
qty = float(message['q'])
USDvalue = price * qty
side = 'BUY' if message['m'] == False else 'SELL'
if side == 'BUY':
df = self.df_buy
else:
df = self.df_sell
if timestamp not in df.index:
df.loc[timestamp] = [price, qty, USDvalue]
else:
df.loc[df.index == timestamp, 'Quantity'] += qty
df.loc[df.index == timestamp, 'USD Value'] += USDvalue
self.lock.release()
def on_message(self, ws, message):
message = json.loads(message)
self.times.append(time.time())
if 'e' in message:
self.handle_message(message)
thr = Thread(target=update, args=(self.df_buy, self.df_sell, self.placeholder, self.lock,))
add_script_run_ctx(thr)
thr.start()
def connect(self):
print('LOG', 'Connecting to websocket')
self.ws = websocket.WebSocketApp(self.url, on_close=on_close, on_error=self.on_error,
on_open=self.on_open, on_message=self.on_message)
self.ws.run_forever()
Debug info
Streamlit version: 1.4.0
Python version: 3.10.4
OS version: MacOS 13.1
Browser version: Safari 16.2
I am trying to use the cryptofeed module to receive API OHLC data, store the data in a global variable by placing the cryptofeed stream in a separate multiprocessing thread, then accessing the global variable from a separate asyncio instance.
I am having trouble storing the global data using the multiprocess, the async function of close(), returns an empty pandas dataframe. I would like a suggestion on how to approach this problem.
from cryptofeed import FeedHandler
from cryptofeed.backends.aggregate import OHLCV
from cryptofeed.defines import TRADES
from cryptofeed.exchanges import BinanceFutures
import pandas as pd
from multiprocessing import Process
from concurrent.futures import ProcessPoolExecutor
import asyncio
data1 = pd.DataFrame() # Create an empty DataFrame
queue = multiprocessing.Queue()
async def ohlcv(data):
global data1
# Convert data to a Pandas DataFrame
df = pd.DataFrame.from_dict(data, orient='index')
# Reset the index
df.reset_index(inplace=True)
df.index = [pd.Timestamp.now()]
data1 = data1.append(df)
queue.put('nd')
# Append the rows of df to data
async def close(data):
while True:
print(data)
await asyncio.sleep(15)
def main1():
f = FeedHandler()
f.add_feed(BinanceFutures(symbols=['BTC-USDT-PERP'], channels=[TRADES], callbacks={TRADES: OHLCV(ohlcv, window=10)}))
f.run()
if __name__ == '__main__':
p = Process(target=main1)
p.start()
asyncio.run(close(data1))
It appears that you are trying to combine asyncio with multiprocessing in some fashion. I don't have access to your FeedHandler and BinanceFutures classes, so I will just have main1 directly call ohlcv and since it is running in a separate process from the main process, which is using asyncio, I can't see any reason with the code you posted why oh1cv would need to be a coroutine (asyncio function).
asyncio has a provision for running multiprocessing tasks and that is the way to proceed. So we run close as a coroutine and it runs main1 in a child process (a multiprocessing pool process, actually) returning back the result that was returned from main1. There is no need for explicit queue operations to return any result:
import asyncio
import pandas as pd
from concurrent.futures import ProcessPoolExecutor
def ohlcv(data):
# Convert data to a Pandas DataFrame
df = pd.DataFrame.from_dict(data, orient='index')
# Reset the index
df.reset_index(inplace=True)
df.index = [pd.Timestamp.now()]
return df
def main1():
"""
f = FeedHandler()
f.add_feed(BinanceFutures(symbols=['BTC-USDT-PERP'], channels=[TRADES], callbacks={TRADES: OHLCV(ohlcv, window=10)}))
f.run()
"""
return ohlcv({'a': 1})
async def close():
loop = asyncio.get_running_loop()
with ProcessPoolExecutor(1) as executor:
return await loop.run_in_executor(executor, main1)
if __name__ == '__main__':
df = asyncio.run(close())
print(df)
Prints:
index 0
2023-01-08 15:59:44.939261 a 1
The goal is to pulling real time data in the background (say every 5 seconds) and pull into the dashboard when needed. Here is my code. It kinda works but two issues I am seeing: 1. if I move st.write("TESTING!") to the end, it will never get executed because of the while loop. Is there a way to improve? I can imagine as the dashboard grows, there will be multiple pages/tables etc.. This won't give much flexibility. 2. The return px line in the async function, I am not very comfortable with it because I got it right via trial and error. Sorry for being such a newbie, but if there are better ways to do it, I would really appreciate.
Thank you!
import asyncio
import streamlit as st
import numpy as np
st.set_page_config(layout="wide")
async def data_generator(test):
while True:
with test:
px = np.random.randn(5, 1)
await asyncio.sleep(1)
return px
test = st.empty()
st.write("TESTING!")
with test:
while True:
px = asyncio.run(data_generator(test))
st.write(px[0])
From my experience, the trick to using asyncio is to create your layout ahead of time, using empty widgets where you need to display async info. The async coroutine would take in these empty slots and fill them out. This should help you create a more complex application.
Then the asyncio.run command can become the last streamlit action taken. Any streamlit commands after this wouldn't be processed, as you have observed.
I would also recommend to arrange any input widgets outside of the async function, during the initial layout, and then send in the widget output for processing. Of course you could draw your input widgets inside the function, but the layout then might become tricky.
If you still want to have your input widgets inside your async function, you'd definitely have to put them outside of the while loop, otherwise you would get duplicated widget error. (You might try to overcome this by creating new widgets all the time, but then the input widgets would be "reset" and interaction isn't achieved, let alone possible memory issue.)
Here's a complete example of what I mean:
import asyncio
import pandas as pd
import plotly.express as px
import streamlit as st
from datetime import datetime
CHOICES = [1, 2, 3]
def main():
print('\nmain...')
# layout your app beforehand, with st.empty
# for the widgets that the async function would populate
graph = st.empty()
radio = st.radio('Choose', CHOICES, horizontal=True)
table = st.empty()
try:
# async run the draw function, sending in all the
# widgets it needs to use/populate
asyncio.run(draw_async(radio, graph, table))
except Exception as e:
print(f'error...{type(e)}')
raise
finally:
# some additional code to handle user clicking stop
print('finally')
# this doesn't actually get called, I think :(
table.write('User clicked stop!')
async def draw_async(choice, graph, table):
# must send in all the streamlit widgets that
# this fn would interact with...
# this could possibly work, but layout is tricky
# choice2 = st.radio('Choose 2', CHOICES)
while True:
# this would not work because you'd be creating duplicated
# radio widgets
# choice3 = st.radio('Choose 3', CHOICES)
timestamp = datetime.now()
sec = timestamp.second
graph_df = pd.DataFrame({
'x': [0, 1, 2],
'y': [max(CHOICES), choice, choice*sec/60.0],
'color': ['max', 'current', 'ticking']
})
df = pd.DataFrame({
'choice': CHOICES,
'current_choice': len(CHOICES)*[choice],
'time': len(CHOICES)*[timestamp]
})
graph.plotly_chart(px.bar(graph_df, x='x', y='y', color='color'))
table.dataframe(df)
_ = await asyncio.sleep(1)
if __name__ == '__main__':
main()
Would something like this work?
import asyncio
import streamlit as st
async def tick(placeholder):
tick = 0
while True:
with placeholder:
tick += 1
st.write(tick)
await asyncio.sleep(1)
async def main():
st.header("Async")
placeholder = st.empty()
await tick(placeholder)
asyncio.run(main())
This is my take for async based on this
How to use AsyncHTTPProvider in web3py?
article. However, upon running this code it executes like a
synchronous function.
For web3.js, there is a support for batch request
https://dapp-world.com/smartbook/web3-batch-request-Eku8 . However,
web3.py does not have any.
I am using Ethereum Alchemy API which supports about 19 API calls per
second.
I have about 1000 Ethereum Addresses
How do I modify the code
such that I am able to batch 19 addresses per second?
from web3 import Web3
from web3.eth import AsyncEth
import time
import pandas as pd
import aiohttp
import asyncio
alchemy_url = "https://eth-mainnet.g.alchemy.com/v2/zCTn-wyjipF5DvGFVNEx_XqCKZakaB57"
w3 = Web3(Web3.AsyncHTTPProvider(alchemy_url), modules={'eth': (AsyncEth,)}, middlewares=[])
start = time.time()
df = pd.read_csv('Ethereum/ethereumaddresses.csv')
Wallet_Address=(df.loc[:,'Address'])
#Balance_storage = []
session_timeout = aiohttp.ClientTimeout(total=None)
async def get_balances():
for address in Wallet_Address:
balance = await w3.eth.get_balance(address)
print(address, balance)
asyncio.run(get_balances())
end = time.time()
total_time = end - start
print(f"It took {total_time} seconds to make {len(Wallet_Address)} API calls")
I think my idea isn't the best but you can use it as a temporary solution.
For this, you have to use ThreadpoolExecutor.
I executed a benchmark and found these results:
Without ThreadpoolExecutor, using BSC Public RPC, just running in for loop, takes more than 3 minutes to finish the process.
Click here to see the output of test 1
With ThreadpoolExecutor, BSC Public RPC, and 100ms Delay using time.sleep(0.1), finishes in less than 40 seconds as you can see in the next image. Click here to see the output of test 2
With ThreadpoolExecutor, using Quicknode, and 100ms Delay, finishes in 35 seconds. Click here to see the output of test 3
Doing simple math (1000 wallets / 19 calls per sec.) we know your process needs to take at least something close to 50 seconds. Try running at 100ms delays and if it doesn't work you can increase more delay.
One of the problems with using time.sleep is if you are using GUI or something like that which we can't pause (because GUI will freeze) during the process. (I think you can use multiprocessing to bypass this xD)
The second problem is that doing this will probably change each address's position in CSV. (You can attribute _id or something like that for each address to organize with For Loops after ends.)
Code: Working Good at BSC (Just change the RPC). This code will find all balances and store them inside self.data (defaultdict). After this, save it in new CSV file called "newBalances.csv" (You can change this)
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from web3 import Web3
import pandas as pd
import time
class multiGetBalanceExample():
def __init__(self):
self.initialtime = datetime.now() #initial time
#=== Setup Web3 ===#
self.bsc = "https://bsc-dataseed.binance.org/" #rpc (change this)
self.web3 = Web3(Web3.HTTPProvider(self.bsc)) #web3 connect
#=== Loading Csv file ===#
self.df = pd.read_csv(r"./Ethereum/ethereumaddresses.csv")
self.wallet_address=(self.df.loc[:,'Address'])
#=== Setup Temporary Address/Balance Save Defaultdict ===#
self.data = defaultdict(list)
#=== Start ===#
self.start_workers(self.data)
#=== Finish ===#
self.saveCsv() #saving in new csv file
self.finaltime = datetime.now() #end time
print(f"\nFinished! Process takes: {self.finaltime - self.initialtime}")
def start_workers(self, data, workers=10):
with ThreadPoolExecutor(max_workers=workers) as executor:
[executor.submit(self.getBalances, _data=data, _from=0, _to=101)]
[executor.submit(self.getBalances, _data=data, _from=101, _to=201)]
[executor.submit(self.getBalances, _data=data, _from=201, _to=301)]
[executor.submit(self.getBalances, _data=data, _from=301, _to=401)]
[executor.submit(self.getBalances, _data=data, _from=401, _to=501)]
[executor.submit(self.getBalances, _data=data, _from=501, _to=601)]
[executor.submit(self.getBalances, _data=data, _from=601, _to=701)]
[executor.submit(self.getBalances, _data=data, _from=701, _to=801)]
[executor.submit(self.getBalances, _data=data, _from=801, _to=901)]
[executor.submit(self.getBalances, _data=data, _from=901, _to=1000)]
return data
def getBalances(self, _data, _from, _to):
for i in range (_from, _to):
# == Getting Balances from each wallet == #
get_balance = self.web3.eth.get_balance(self.wallet_address[i])
# == Appending in self.data == #
_data["Address"].append(self.wallet_address[i])
_data["Balance"].append(get_balance)
# == Print and time.sleep(100ms) == #
print(f"Found: {self.wallet_address[i], get_balance}\n") #printing process.
time.sleep(0.1) #change this conform to your max limit (in my test 100ms takes 40 seconds to finish.)
return _data
def saveCsv(self):
#== Creating new CSV File ==#
headers = ["Address","Balance"]
new_df = pd.DataFrame(columns=headers)
new_df["Address"] = self.data["Address"]
new_df["Balance"] = self.data["Balance"]
new_df.to_csv(r"./Ethereum/newBalances.csv", index=False) #save
multiGetBalanceExample()
I was trying to export IB position/account value into data frame for further processing purposes in python. But failed to figure out how to achieve this. Can anyone help?
import pandas as pd
import numpy as np
import time
import ibapi
from ibapi.client import EClient
from ibapi.wrapper import EWrapper
import threading
import sys
import queue
from ibapi.contract import Contract
class MyWrapper(EWrapper):
##property
def updatePortfolio(self, contract: Contract, position: float, marketPrice: float, marketValue: float, averageCost: float, unrealizedPNL: float, realizedPNL: float, accountName: str):
super().updatePortfolio(contract, position, marketPrice, marketValue, averageCost, unrealizedPNL, realizedPNL, accountName)
if (len(contract.symbol)<5) & (contract.secType == 'STK'):
new_symbol = contract.symbol.zfill(5)
else:
new_symbol = contract.symbol
print (contract.secType, contract.exchange, new_symbol, "Position:", position, "MarketPrice:", marketPrice, "MarketValue:", marketValue, "AverageCost:", averageCost, "UnrealizedPNL:", unrealizedPNL, "RealizedPNL:", realizedPNL)
accountName = ''
callback = MyWrapper() # wrapper = MyWrapper()
#Instntiate My Wrapper.callback
tws = EClient(callback) # app = EClient(wrapper)
#Instantiate EClient and return data to call back
host = '127.0.0.1'
port = 4001
clientID = 8
tws.connect(host, port, clientID)
print("serverVersion:%s connectionTime:%s" % (tws.serverVersion(), tws.twsConnectionTime()))
print(tws.isConnected())
tws.reqAccountUpdates(1, accountName)
time.sleep(2)
tws.run()
accvalue = pd.DataFrame(callback.updatePortfolio, columns = ['Symbol','Position','MarketPrice','MarketValue',
'AverageCost', 'UnrealisedPnL', 'RealisedPnL'])
#accvalue = callback.updateAccountValue
print ('Account: \n' + accvalue)
You are on the right track. You need to set up the queue class objects inside of the wrapper to collect the response from the client function you are calling. Then, you can do anything you want with the data. Take a look at this blog --> https://qoppac.blogspot.com/2017/03/interactive-brokers-native-python-api.html
There is some code there you can reuse to help with the implementation.