Overlap graphs with figurefactory - python

I am trying to plot some finance graphs and I used the plotly libraries for it.
I am quite new to this, so had to look for a way to do it, and wanted to do it offline, so I found this way:
import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like
from pandas_datareader import data as web
import datetime
import fix_yahoo_finance as yf
import plotly
from plotly import figure_factory as ff
import os
yf.pdr_override()
class Stock:
#We need a constructor to get all the data when we create the class
def __init__(self, name): #This needs a string as argument to work
self.name = name
#If we already have the sotcks data we don't download it again
if os.path.exists('stock_prices_yahoo/' + name + '.csv'):
self.df = pd.read_csv('stock_prices_yahoo/' + name + '.csv')
else:
start_date = '1950-01-01'
end_date = datetime.date.today()
df = web.get_data_yahoo(name, start_date, end_date)
df.to_csv('stock_prices_yahoo/' + name + '.csv')
def plot_stock(self): #This will print a plot of the stock prices
fig = ff.create_candlestick(self.df.Open, self.df.High, self.df.Low, self.df.Close, dates = self.df.Date)
fig['layout'].update({
'title': self.name + ' prices',
'yaxis': {'title': 'Price'},
'xaxis': {'title': 'Date'},
})
plotly.offline.plot(fig, filename = self.name + '-candlestick.html', validate = True)
AAPL = Stock('AAPL')
AAPL.plot_stock()
The point is I want to overlap some moving averages on it, and, after looking for it around the whole internet, peaple always do it via matplotlib.
Is there any way to do it with figure_factory?

Related

Handling high frequency updates with streamlit

Summary
I want to use streamlit to create a dashboard of all the trades (buy and sell) happening in a given market. I connect to a websocket stream to receive data of BTCUSDT from the Binance exchange. Messages are received every ~0.1s and I would like to update my dashboard in ~0.09s.
How can you handle this kind of situation where messages are delivered at high frequency? With my code, I successfully create a dashboard but it doesn't get updated fast enough. I am wondering if the dashboard is running behind.
The dashboard must display the buy and sell volumes at any moment in time as bar charts. I am also adding some metrics to show the total volume of buy and sell, as well as their change.
Steps to reproduce
My code is structured in the following way.
There is a streamer.py file, that defines a class Streamer. The Streamer object is a Websocket client. It connects to a stream, handles messages, and updates the dashboard. Whenever a new message is received, Streamer acquires a threading.Lock() and updates the pandas dataframes (one dataframe for buy orders and one dataframe for sell orders). If there are multiple orders happening at the same timestamp, it combines them by summing the corresponding volumes. Then, it releases the threading.Lock() and it creates a new thread where the update function (defined in streamer.py) is executed. The update function acquires the lock to avoid messing up with memory.
In the main.py file, streamlit's dashboard and the Streamerobject are initialized.
To reproduce the following code you need to connect to the Websocket from a region where Binance is not restricted. Since I live in the US, I must use a VPN to properly receive the data.
Code snippet:
main.py file
# main.py
import streamer
import pandas as pd
import streamlit as st # web development
import numpy as np # np mean, np random
import time # to simulate a real time data, time loop
import plotly.express as px # interactive charts
df_buy = pd.DataFrame(columns = [ 'Price', 'Quantity', 'USD Value'])
df_sell = pd.DataFrame(columns = [ 'Price', 'Quantity', 'USD Value'])
st.set_page_config(
page_title='Real-Time Data Science Dashboard',
page_icon='✅',
layout='wide'
)
# dashboard title
st.title("Real-Time / Live Data Science Dashboard")
placeholder = st.empty()
streamer.Stream(df_buy,df_sell,placeholder).connect()
streamer.py file
# streamer.py
import websocket
import json
import streamlit as st
import plotly.express as px
import pandas as pd
from threading import Thread, Lock
from streamlit.script_run_context import add_script_run_ctx
from datetime import datetime
import time
def on_close(ws, close_status_code, close_msg):
print('LOG', 'Closed orderbook client')
def update(df_buy,df_sell, placeholder, lock):
lock.acquire()
with placeholder.container():
# create three columns
kpi1, kpi2 = st.columns(2)
current_sumSellVolumes = df_sell['Quantity'].sum()
previous_sumSellVolumes = df_sell.iloc[:-1]['Quantity'].sum()
current_sumBuyVolumes = df_buy['Quantity'].sum()
previous_sumBuyVolumes = df_buy.iloc[:-1]['Quantity'].sum()
# fill in those three columns with respective metrics or KPIs
kpi2.metric(label="Sell quantity 📉", value=round(current_sumSellVolumes, 2),
delta=round(current_sumSellVolumes - previous_sumSellVolumes, 2))
kpi1.metric(label="Buy quantity 📈", value=round(current_sumBuyVolumes, 2),
delta=round(current_sumBuyVolumes - previous_sumBuyVolumes, 2))
# create two columns for charts
fig_col1, fig_col2 = st.columns(2)
with fig_col1:
st.markdown("### Buy Volumes")
fig = px.bar(data_frame=df_buy, x=df_buy.index, y='Quantity')
st.write(fig)
with fig_col2:
st.markdown("### Sell Volumes")
fig2 = px.bar(data_frame=df_sell, x=df_sell.index, y='Quantity')
st.write(fig2)
st.markdown("### Detailed Data View")
st.dataframe(df_buy)
st.dataframe(df_sell)
lock.release()
class Stream():
def __init__(self, df_buy, df_sell, placeholder):
self.symbol = 'BTCUSDT'
self.df_buy = df_buy
self.df_sell = df_sell
self.placeholder = placeholder
self.lock = Lock()
self.url = "wss://stream.binance.com:9443/ws"
self.stream = f"{self.symbol.lower()}#aggTrade"
self.times = []
def on_error(self, ws, error):
print(self.times)
print('ERROR', error)
def on_open(self, ws):
print('LOG', f'Opening WebSocket stream for {self.symbol}')
subscribe_message = {"method": "SUBSCRIBE",
"params": [self.stream],
"id": 1}
ws.send(json.dumps(subscribe_message))
def handle_message(self, message):
self.lock.acquire()
timestamp = datetime.utcfromtimestamp(int(message['T']) / 1000)
price = float(message['p'])
qty = float(message['q'])
USDvalue = price * qty
side = 'BUY' if message['m'] == False else 'SELL'
if side == 'BUY':
df = self.df_buy
else:
df = self.df_sell
if timestamp not in df.index:
df.loc[timestamp] = [price, qty, USDvalue]
else:
df.loc[df.index == timestamp, 'Quantity'] += qty
df.loc[df.index == timestamp, 'USD Value'] += USDvalue
self.lock.release()
def on_message(self, ws, message):
message = json.loads(message)
self.times.append(time.time())
if 'e' in message:
self.handle_message(message)
thr = Thread(target=update, args=(self.df_buy, self.df_sell, self.placeholder, self.lock,))
add_script_run_ctx(thr)
thr.start()
def connect(self):
print('LOG', 'Connecting to websocket')
self.ws = websocket.WebSocketApp(self.url, on_close=on_close, on_error=self.on_error,
on_open=self.on_open, on_message=self.on_message)
self.ws.run_forever()
Debug info
Streamlit version: 1.4.0
Python version: 3.10.4
OS version: MacOS 13.1
Browser version: Safari 16.2

'numpy.int64' object has no attribute 'to_pydatetime' in backtrader feed

I am unable to fix the error - 'numpy.int64' object has no attribute 'to_pydatetime', I will be really grateful, if anyone could please help me out in this? I have already tried uninstalling pyfolio and itstalling it from git. Please see the complete code below
import os
import glob
import requests
import pandas as pd
from nsepy import *
from datetime import datetime
import backtrader as bt
import backtrader.feeds as btfeeds
from __future__ import (absolute_import, division, print_function,
unicode_literals)
class TestStrategy(bt.Strategy):
def log(self, txt, dt=None):
''' Logging function for this strategy'''
dt = dt or self.datas[0].datetime.date(0)
print('%s, %s' % (dt.isoformat(), txt))
def __init__(self):
# Keep a reference to the "close" line in the data[0] dataseries
self.dataclose = self.datas[0].close
def next(self):
# Simply log the closing price of the series from the reference
self.log('Close, %.2f' % self.dataclose[0])
if __name__ == '__main__':
cerebro = bt.Cerebro()
cerebro.addstrategy(TestStrategy)
#Data feed block
data_path = "/Users/kumarun/Documents/data/files"
joined_files = os.path.join(data_path, "Oct-MONTHLY-Expirydata_2020.csv")
joined_list = glob.glob(joined_files)
df = pd.concat(map(pd.read_csv, joined_list), ignore_index=True)
df.columns=['Ticker','date', 'open', 'high', 'low', 'close', 'volume','Open Interest']
filtered = df[(df['Ticker'] == 'BANKNIFTY')]
#Cerebro block
filtered.date = pd.to_datetime(filtered.date, format='%d-%m-%Y %H:%M:%S')
feed = bt.feeds.PandasData(dataname=filtered)
cerebro.adddata(feed)
cerebro.broker.setcash(100000.0)
print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
cerebro.run()
print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())
I feel like you might be running into issues with the column names. Try ascribing your column names using:
data = bt.feeds.PandasData(
dataname=filtered,
datetime="date",
open='open',
high='high',
low='low',
close='close',
volume='volume',
openinterest="Open Interest"
)
Also, could drop the Ticker name column, and add the name using:
ticker = 'BANKNIFTY'
cerebro.adddata(feed, name=ticker)

How do I import various Data Frames from different python file?

I have a python file called 'clean_data.py' which has all the data frames I need, and I want to import them for use in another python file called 'main.py' to use in creating a dashboard.
Is it possible to create a class in my clean_data.py, and if so can someone direct me to an article (which I struggled to find so far) so that I can figure it out?
The aim is to shift from CSV to an API overtime, so I wanted to keep data side wrangling side of things in a different file while the web app components in the main.py file.
Any help would be much appreciated.
The code from the clean_data.py is:
import pandas as pd
import csv
import os # To access my file directory
print(os.getcwd()) # Let's me know the Current Work Directory
fdi_data = pd.read_csv(r'Data/fdi_data.csv')
fdi_meta = pd.read_csv(r'Data/fdi_metadata.csv')
debt_data = pd.read_csv(r'Data/debt_data.csv')
debt_meta = pd.read_csv(r'Data/debt_metadata.csv')
gdp_percap_data = pd.read_csv(r'Data/gdp_percap_data.csv', header=2)
gdp_percap_meta = pd.read_csv(r'Data/gdp_percap_metadata.csv')
gov_exp_data = pd.read_csv(r'Data/gov_exp_data.csv', header=2)
gov_exp_meta = pd.read_csv(r'Data/gov_exp_metadata.csv')
pop_data = pd.read_csv(r'Data/pop_data.csv', header=2)
pop_meta = pd.read_csv(r'Data/pop_metadata.csv')
"""
'wb' stands for World Bank
"""
def wb_merge_data(data, metadata):
merge = pd.merge(
data,
metadata,
on = 'Country Code',
how = 'inner'
)
return merge
fdi_merge = wb_merge_data(fdi_data, fdi_meta)
debt_merge = wb_merge_data(debt_data, debt_meta)
gdp_percap_merge = wb_merge_data(gdp_percap_data, gdp_percap_meta)
gov_exp_merge = wb_merge_data(gov_exp_data, gov_exp_meta)
pop_merge = wb_merge_data(pop_data, pop_meta)
def wb_drop_data(data):
drop = data.drop(['Country Code','Indicator Name','Indicator Code','TableName','SpecialNotes','Unnamed: 5'], axis=1)
return drop
fdi_merge = wb_drop_data(fdi_merge)
debt_merge = wb_drop_data(debt_merge)
gdp_percap_merge = wb_drop_data(gdp_percap_merge)
gov_exp_merge = wb_drop_data(gov_exp_merge)
pop_merge = wb_drop_data(pop_merge)
def wb_mr_data(data, value_name):
data = data.melt(['Country Name','Region','IncomeGroup']).reset_index()
data = data.rename(columns={'variable': 'Year', 'value': value_name})
data = data.drop('index', axis = 1)
return data
fdi_merge = wb_mr_data(fdi_merge, 'FDI')
debt_merge = wb_mr_data(debt_merge, 'Debt')
gdp_percap_merge = wb_mr_data(gdp_percap_merge, 'GDP per Cap')
gov_exp_merge = wb_mr_data(gov_exp_merge, 'Gov Expend.')
pop_merge = wb_mr_data(pop_merge, 'Population')
def avg_groupby(data, col_cal, cn=False, ig=False, rg=False):
if cn == True:
return data.groupby('Country Name')[col_cal].mean().reset_index()
elif ig == True:
return data.groupby('IncomeGroup')[col_cal].mean().reset_index()
elif rg == True:
return data.groupby('Region')[col_cal].mean().reset_index()
"""
avg_cn_... For country
avg_ig_... Income Group
avg_rg_... Region
"""
avg_cn_fdi = avg_groupby(fdi_merge, 'FDI', cn=True)
avg_ig_fdi = avg_groupby(fdi_merge, 'FDI', ig=True)
avg_rg_fdi = avg_groupby(fdi_merge, 'FDI', rg=True)
avg_cn_debt = avg_groupby(debt_merge, 'Debt', cn=True)
avg_ig_debt = avg_groupby(debt_merge, 'Debt', ig=True)
avg_rg_debt = avg_groupby(debt_merge, 'Debt', rg=True)
avg_cn_gdp_percap = avg_groupby(gdp_percap_merge, 'GDP per Cap', cn=True)
avg_ig_gdp_percap = avg_groupby(gdp_percap_merge, 'GDP per Cap', ig=True)
avg_rg_gdp_percap = avg_groupby(gdp_percap_merge, 'GDP per Cap', rg=True)
avg_cn_gexp = avg_groupby(gov_exp_merge, 'Gov Expend.', cn=True)
avg_ig_gexp = avg_groupby(gov_exp_merge, 'Gov Expend.', ig=True)
avg_rg_gexp = avg_groupby(gov_exp_merge, 'Gov Expend.', rg=True)
avg_cn_pop = avg_groupby(pop_merge, 'Population', cn=True)
avg_ig_pop = avg_groupby(pop_merge, 'Population', ig=True)
avg_rg_pop = avg_groupby(pop_merge, 'Population', rg=True)
In Python, every file is a module. So if you want to re-use your code, you can simple import this module. For example,
# main.py
import clean_data
print(clean_data.avg_cn_fdi)
Maybe you needn't create a class for this
You can import the whole python file like you'd import any other locally created files and have access to the DataFrames in them. Here's an example:
I created a file called temporary.py:
import pandas as pd
data = pd.read_csv("temp.csv")
And then in a separate file I was able to use data like so:
import temporary
print(temporary.data)
Or, you could also do:
from temporary import data
print(data)
All that being said, I don't believe that this would be the best way to handle your data.

the DataFrameClinet class of python's package for influxdb uploads only the last line from the dataframe

i am trying to use python's package for influxdb to upload dataframe into the database
i am using the write_points class to write point into the database as given in the documentation(https://influxdb-python.readthedocs.io/en/latest/api-documentation.html)
every time i try to use the class it only updates the last line of the dataframe instead of the complete dataframe.
is this a usual behavior or there is some problem here?
given below is my script:
from influxdb import InfluxDBClient, DataFrameClient
import pathlib
import numpy as np
import pandas as pd
import datetime
db_client = DataFrameClient('dbserver', port, 'username', 'password', 'database',
ssl=True, verify_ssl=True)
today = datetime.datetime.now().strftime('%Y%m%d')
path = pathlib.Path('/dir1/dir/2').glob(f'pattern_to_match*/{today}.filename.csv')
for file in path:
order_start = pd.read_csv(f'{file}')
if not order_start.empty:
order_start['data_line1'] = (order_start['col1'] - \
order_start['col2'])*1000
order_start['data_line2'] = (order_start['col3'] - \
order_start['col4'])*1000
d1 = round(order_start['data_line1'].quantile(np.arange(0,1.1,0.1)), 3)
d2 = round(order_start['data_line2'].quantile(np.arange(0,1.1,0.1)), 3)
out_file = pd.DataFrame()
out_file = out_file.append(d1)
out_file = out_file.append(d2)
out_file = out_file.T
out_file.index = out_file.index.set_names(['percentile'])
out_file = out_file.reset_index()
out_file['percentile'] = out_file.percentile.apply(lambda x: f'{100*x:.0f}%')
out_file['tag_col'] = str(file).split('/')[2]
out_file['time'] = pd.to_datetime('today').strftime('%Y%m%d')
out_file = out_file.set_index('time')
out_file.index = pd.to_datetime(out_file.index)
db_client.write_points(out_file, 'measurement', database='database',
retention_policy='rp')
can anyone please help?

Resampling Live Websocket Ticks to Candles using Pandas in python

I am trying to resample live ticks from KiteTicker websocket into OHLC candles using pandas and this is the code I have written, which works fine with single instrument (The commented trd_portfolio on line 9) but doesn't work with multiple instruments (Line 8) as it mixes up data of different instruments.
Is there any way to relate the final candles df to instrument tokens? or make this work with multiple intruments?
I would like to run my algo on multiple instruments at once, please suggest if there is a better way around it.
from kiteconnect import KiteTicker;
from kiteconnect import KiteConnect;
import logging
import time,os,datetime,math;
import winsound
import pandas as pd
trd_portfolio = {954883:"USDINR19MARFUT",4632577:"JUBLFOOD"}
# trd_portfolio = {954883:"USDINR19MARFUT"}
trd_tkn1 = [];
for x in trd_portfolio:
trd_tkn1.append(x)
c_id = '****************'
ak = '************'
asecret = '*************************'
kite = KiteConnect(api_key=ak)
print('[*] Generate access Token : ',kite.login_url())
request_tkn = input('[*] Enter Your Request Token Here : ')[-32:];
data = kite.generate_session(request_tkn, api_secret=asecret)
kite.set_access_token(data['access_token'])
kws = KiteTicker(ak, data['access_token'])
#columns in data frame
df_cols = ["Timestamp", "Token", "LTP"]
data_frame = pd.DataFrame(data=[],columns=df_cols, index=[])
def on_ticks(ws, ticks):
global data_frame, df_cols
data = dict()
for company_data in ticks:
token = company_data["instrument_token"]
ltp = company_data["last_price"]
timestamp = company_data['timestamp']
data[timestamp] = [timestamp, token, ltp]
tick_df = pd.DataFrame(data.values(), columns=df_cols, index=data.keys()) #
data_frame = data_frame.append(tick_df)
ggframe=data_frame.set_index(['Timestamp'],['Token'])
print ggframe
gticks=ggframe.ix[:,['LTP']]
candles=gticks['LTP'].resample('1min').ohlc().dropna()
print candles
def on_connect(kws , response):
print('Connected')
kws.subscribe(trd_tkn1)
kws.set_mode(kws.MODE_FULL, trd_tkn1)
def on_close(ws, code, reason):
print('Connection Error')
kws.on_ticks = on_ticks
kws.on_connect = on_connect
kws.on_close = on_close
kws.connect()
I don't have access to the Kite API, but I've been looking at some code snippets that use it trying to figure out another issue I'm having related to websockets. I came across this open question, and I think I can help, though I can't really test this solution.
The problem I think is that you're not calculating OHLC for each "token"... it just does it for all tokens.
data_frame = data_frame.append(tick_df)
ggframe=data_frame.set_index('Timestamp')
candles=ggframe.groupby('token').resample('1min').agg({'LTP':'ohlc'})
You'll get a multi-index output, but the column names might not quite line up for the rest of your code. To fix that:
candles.columns=['open','high','low','close']

Categories

Resources