The Bokeh DateRangeSlider widget requires int values for its step attribute which must be the time value in milliseconds. It works well when the step is set to seconds, minutes, hours, days or years. However I need a month resolution on the slider.
When the step is set to 31 days it works well for the start date until March when instead of 1 March I get 4 March. Then the shift from 1th of the month in the displayed value gets bigger and bigger.
I want to be able to set and get displayed the slider range on both sides always to be the 1th day of the month e.g. 1 March, 1 April, 1 May, 1 June etc... like it is in the DataFrame.
Considering the following code, what would be the best way to realize it (possibly using a JS callback) ?
import pandas as pd
from bokeh.plotting import show
from bokeh.models import DateRangeSlider
data = {'date_start': ['201812', '201901', '201902', '201903', '201904', '201905', '201906', '201907', '201908', '201909', '201910', '201911'],
'date_end': [ '201901', '201902', '201903', '201904', '201905', '201906', '201907', '201908', '201909', '201910', '201911', '201912'],
'values' : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]}
df = pd.DataFrame(data)
df['Start'] = pd.to_datetime(df['date_start'], format='%Y%m')
df['End'] = pd.to_datetime(df['date_end'], format='%Y%m')
start_date = df['Start'].min()
end_date = df['End'].max()
range_slider = DateRangeSlider(start=start_date, end=end_date, value=(start_date, end_date), step=31*24*60*60*1000, title="Date Range", callback_policy = 'mouseup', tooltips = False, width=600)
show(range_slider)import pandas as pd
from bokeh.plotting import show
from bokeh.models import DateRangeSlider
data = {'date_start': ['201812', '201901', '201902', '201903', '201904', '201905', '201906', '201907', '201908', '201909', '201910', '201911'],
'date_end': [ '201901', '201902', '201903', '201904', '201905', '201906', '201907', '201908', '201909', '201910', '201911', '201912'],
'values' : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]}
df = pd.DataFrame(data)
df['Start'] = pd.to_datetime(df['date_start'], format='%Y%m')
df['End'] = pd.to_datetime(df['date_end'], format='%Y%m')
start_date = df['Start'].min()
end_date = df['End'].max()
range_slider = DateRangeSlider(start=start_date, end=end_date, value=(start_date, end_date), step=31*24*60*60*1000, title="Date Range", callback_policy = 'mouseup', tooltips = False, width=600)
show(range_slider)
After some struggling I came up with this JS callback which temporary changes the step to 1 day in order to be able to correct the date. It also changes temporary the range so that when the step is restored the slider handle remains on its position. Far from perfect but working:
import pandas as pd
from bokeh.plotting import show
from bokeh.models import CustomJS, DateRangeSlider
data = {'date_start': ['201812', '201901', '201902', '201903', '201904', '201905', '201906', '201907', '201908', '201909', '201910', '201911'],
'date_end': [ '201901', '201902', '201903', '201904', '201905', '201906', '201907', '201908', '201909', '201910', '201911', '201912'],
'values' : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]}
df = pd.DataFrame(data)
df['Start'] = pd.to_datetime(df['date_start'], format='%Y%m')
df['End'] = pd.to_datetime(df['date_end'], format='%Y%m')
start_date = df['Start'].min()
end_date = df['End'].max()
range_slider = DateRangeSlider(start=start_date, end=end_date, value=(start_date, end_date), step=31*24*60*60*1000, title="Date Range", callback_policy = 'mouseup', tooltips = False, width=600)
code = '''
console.log('start, end', cb_obj.start, cb_obj.end)
for (i in cb_obj.value) {
if (getDay(cb_obj.value[i]) != 1) {
correctDate(day, i)
}
}
function getDay(value) {
date = new Date(value)
str_date = date.toString()
day = str_date.split(' ')[2]
return Number(day)
}
function correctDate(day, side) {
if (day < 15) {
console.log('day < 15')
difference = day - 1
difference_milliseconds = -1 * difference*24*60*60*1000
}
else {
console.log('day >= 15')
difference = 0
new_day = -1
while(new_day != 1) {
difference_milliseconds = difference*24*60*60*1000
new_date = new Date(cb_obj.value[0] + difference_milliseconds)
new_day = Number(new_date.getDate())
difference += 1
}
}
cb_obj.step = 1*24*60*60*1000 // set slider step to 1 day to be able to correct
if (side == 0) {
cb_obj.start = cb_obj.start + difference_milliseconds
cb_obj.value = [cb_obj.value[0] + difference_milliseconds, cb_obj.value[1]]
}
else if (side == 1) {
cb_obj.end = cb_obj.end + difference_milliseconds + 4*24*60*60*1000
cb_obj.value = [cb_obj.value[0], cb_obj.value[1] + difference_milliseconds]
}
setTimeout(resetStep, 50, cb_obj) // reset step to 31 days
}
function resetStep(cb_obj) {
cb_obj.step = 31*24*60*60*1000
}
'''
range_slider.js_on_change('value_throttled', CustomJS(args = {'end_date': end_date}, code=code))
show(range_slider)
Or maybe the best option is not to use the DateRangeSlider at all fo the month step. The solution below uses a RangeSlider in combination with a Div to realize the same functionality which looks much nicer:
import pandas as pd
from bokeh.plotting import show
from bokeh.models import RangeSlider, Div, Column, CustomJS
data = {'date_start': ['2018-12', '2019-01', '2019-02', '2019-03', '2019-04', '2019-05', '2019-06', '2019-07', '2019-08', '2019-09', '2019-10', '2019-11'],
'date_end': [ '2019-01', '2019-02', '2019-03', '2019-04', '2019-05', '2019-06', '2019-07', '2019-08', '2019-09', '2019-10', '2019-11', '2019-12'],
'values' : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]}
df = pd.DataFrame(data)
df['Start'] = pd.to_datetime(df['date_start'], format='%Y-%m')
df['End'] = pd.to_datetime(df['date_end'], format='%Y-%m')
number_dates = len(list(df.date_start.unique()))
start_dates = df.date_start.to_list()
end_dates = df.date_end.to_list()
range_slider = RangeSlider(start=0, end=number_dates, value=(0, number_dates), step=1, title="", callback_policy = 'mouseup', tooltips = False, width=600, show_value = False)
div = Div(text = "Date Range: <b>" + str(start_dates[range_slider.value[0]]) + ' . . . ' + str(end_dates[range_slider.value[1]-1]) + '</b>', render_as_text = False, width = 575)
code = '''
range = Math.round(Number(cb_obj.value[1] - cb_obj.value[0]), 10)
range = range < 10 ? '0' + range : range
div.text = "Date Range: <b>" + start_dates[Math.round(cb_obj.value[0], 10)] + ' . . . ' + end_dates[Math.round(cb_obj.value[1], 10) + -1] + '</b>'
'''
range_slider.js_on_change('value_throttled', CustomJS(args = {'div': div, 'start_dates': start_dates, 'end_dates': end_dates}, code=code))
show(Column(div, range_slider))
Related
I would like to add images to the rows in a Plotly Table, but could not find a good solution.
As an alternative option, I am using the method add_layout_image() placing the images approximately at the row space - see the example of what I am trying to achieve.
Anyone has a better solution!?
import plotly.graph_objects as go
import pandas as pd
import base64
data = {'team': {1: 'Sales team 1', 2: 'Sales team 2', 3: 'Sales team 3'},
'award': {1: '', 2: '', 3: ''},
'performance': {1: '67.00%', 2: '45.00%', 3: '35.00%'}}
df = pd.DataFrame(data)
fig = go.Figure(data=[go.Table(
columnwidth=[40, 40, 40],
header=dict(
values=list(df.columns),
height=35),
cells=dict(
values=[df.team,
df.award,
df.performance],
align=['center', 'center', 'center'],
font=dict(color='black', size=18),
height=45)
)])
heightRow = fig.data[0].cells.height
numberRow = fig.data[0].cells.values[0].__len__()
image_1st = 'medal1st.png'
image_1st_base64 = base64.b64encode(open(image_1st, 'rb').read())
step_y = 1 / numberRow * .2
coordinate_y = 0
for index, eachRow in enumerate(df.iterrows()):
fig.add_layout_image(
source='data:image/png;base64,{}'.format(image_1st_base64.decode()),
x=0.5,
y=.9 - coordinate_y,
xref="x domain",
yref="y domain",
xanchor="center",
yanchor="bottom",
sizex=.055,
sizey=.055,
)
coordinate_y = coordinate_y + step_y
fig.show()
I've been trying to edit a dataframe into a formated Excel spreadsheet for work, and though most issues were solved elsewhere (Formatting an Excel file with XlsxWriter - Locale settings appear not to be applied to the output file) some remained.
The problem now is with the header and border formatting passed to date columns:
[]
I expected to have borders on all sides of each cell, but colmuns J, L and M, which receive information in datetime type, present border formatting issues (no borders actually). Also, there is an extra colmun (R) which is formated.
Keep in mind that columns B, C and F were also formated using xlsxwriter and have no border formatting problems.
Below is the code I have so far:
# Importar bibliotecas
import os
from typing import Self
import pandas as pd
import pandas.io.formats.excel
import pandas.io.excel
import numpy as np
import time
import xlsxwriter
template_excel_file = r"C:\CriarTabelaOpme\Modelo Material Alto Custo - Intranet.xlsx"
depara_nome_espec_file = r"C:\CriarTabelaOpme\Especialidade_Dicionario.csv"
report_csv_file = r"C:\CriarTabelaOpme\ReportServiceIntranet.csv"
csv_dataframe = pd.read_csv(report_csv_file, sep = ',', encoding = "ISO-8859-1", engine = 'python', index_col = None, names = ['TIPO', 'CODIGO', 'PTU',
'DESCRICAO', 'FORNECEDOR', 'VALOR', 'COD_PRINCP_ATIVO', 'PRINCIPIO_ATIVO', 'ANVISA', 'VALIDADE_RMS', 'FABRICANTE', 'DT_ATUALIZACAO', 'PTU_LIMITE', 'COD_ESP', 'NOME_ESPEC', 'REFERENCIA', 'OBSERVACAO'])
csv_dataframe.insert(16, "", "")
csv_dataframe["VALOR"] = csv_dataframe["VALOR"].apply(lambda x: x.replace(",", "")).astype('float')
csv_dataframe["VALIDADE_RMS"] = pd.to_datetime(csv_dataframe["VALIDADE_RMS"])
csv_dataframe["DT_ATUALIZACAO"] = pd.to_datetime(csv_dataframe["DT_ATUALIZACAO"])
csv_dataframe["PTU_LIMITE"] = pd.to_datetime(csv_dataframe["PTU_LIMITE"])
#print(csv_dataframe.dtypes)
csv_depara_espec = pd.read_csv(depara_nome_espec_file, sep = ',', header = None, encoding = "ISO-8859-1", engine = 'python')
#print(csv_depara_espec)
csv_dataframe = csv_dataframe.iloc[:, [0,1,2,3,4,5,6,7,8,9,10,11,12,13,16,14,15]]
#print(csv_dataframe)
dict = {'TIPO' : 'TIPO', 'CODIGO' : 'CODIGO', 'PTU': 'PTU',
'DESCRICAO' : 'DESCRICAO', 'FORNECEDOR' : 'FORNECEDOR', 'VALOR' : 'VALOR', 'COD_PRINCP_ATIVO' : 'COD_PRINCP_ATIVO',
'PRINCIPIO_ATIVO' : 'PRINCIPIO_ATIVO', 'ANVISA' : 'ANVISA', 'VALIDADE_RMS' : 'VALIDADE_RMS', 'FABRICANTE' : 'FABRICANTE',
'DT_ATUALIZACAO' : 'DT_ATUALIZACAO', 'PTU_LIMITE' : 'PTU_LIMITE', 'COD_ESP' : 'COD_ESP', '' : 'NOME_ESPEC',
'NOME_ESPEC' : 'REFERENCIA', 'REFERENCIA' : 'OBSERVACAO'}
csv_dataframe.rename(columns = dict, inplace = True)
for row in range(len(csv_dataframe)):
cod_esp_row = (csv_dataframe.iloc[row, 13])
csv_dataframe.iloc[row,14] = csv_depara_espec.iloc[cod_esp_row, 1]
pandas.io.formats.excel.header_style = None
writer = pd.ExcelWriter(template_excel_file, engine = 'xlsxwriter', date_format = 'dd/mm/yyyy', datetime_format = 'dd/mm/yyyy')
excel_dataframe = csv_dataframe.to_excel(writer, sheet_name = 'Material Alto Custo', index = False, header = ['TIPO', 'CODIGO', 'PTU',
'DESCRICAO', 'FORNECEDOR', 'VALOR', 'COD_PRINCP_ATIVO', 'PRINCIPIO_ATIVO', 'ANVISA', 'VALIDADE_RMS', 'FABRICANTE', 'DT_ATUALIZACAO',
'PTU_LIMITE', 'COD_ESP', 'NOME_ESPEC', 'REFERENCIA', 'OBSERVACAO'])
(max_row, max_col) = csv_dataframe.shape
workbook = writer.book
worksheet = writer.sheets['Material Alto Custo']
header_format = workbook.add_format({'bold' : True,
'font' : 'Arial',
'size' : 10,
'border' : 1})
font_size_and_border = workbook.add_format({'font' : 'Arial', 'size' : 10, 'border' : 1})
column_valor_format_and_border = workbook.add_format({'num_format': '[$R$-pt-BR] #,##0.00','font' : 'Arial', 'size' : 10, 'border' : 1})
column_date_format_and_border = workbook.add_format({'num_format' : 'dd/mm/yyyy','font' : 'Arial', 'size' : 10, 'border' : 1})
column_left_zeroes_format_and_border = workbook.add_format({'num_format' : '00000000','font' : 'Arial', 'size' : 10, 'border' : 1})
worksheet.set_row(0, None, header_format)
worksheet.set_column(0,max_col, 20.0, font_size_and_border)
worksheet.set_column(1, 1, 20.0, column_left_zeroes_format_and_border)
worksheet.set_column(2, 2, 20.0, column_left_zeroes_format_and_border)
worksheet.set_column(5, 5, 20.0, column_valor_format_and_border)
worksheet.set_column(9, 9, 20.0, column_date_format_and_border)
worksheet.set_column(11, 11, 20.0, column_date_format_and_border)
worksheet.set_column(12, 12, 20.0, column_date_format_and_border)
worksheet.set_row(0, None, header_format)
writer.close()
I've been trying to spot the error, but I have failed. Could someone help me out?
Thanks in advance for any help you can provide!
I am using Altair to generate my plots (As i need the linked bar-chart selection) and Panel to create my dashboard. I have two dropdowns, where the values in the second are conditional on the value in the first.
When I use a Single Select dropdown the dashboard works as expected. However when I try and use any Mulitiple select widget I get no data rendered on my chart
import panel as pn
import altair as alt
import pandas as pd
from vega_datasets import data
import datetime as dt
from altair import datum
alt.renderers.enable('default')
pn.extension('vega')
data = pd.read_excel('randomtestdata.xlsx')
df = pd.DataFrame(data, columns=['Parent Location','Location','Alert Definition','Alert Type','Initiated Date'])
df = df[(df['Parent Location'] == 'Zone 1') | (df['Parent Location'] == 'Zone 2' )| (df['Parent Location'] == 'Zone 3' )]
df.rename(columns={'Parent Location': 'ParentLocation'},
inplace=True, errors='raise')
source = df
title = '##Dashboard'
subtitle = 'This is a test dashboard. Use widgets below to show desired chart.'
_locations = {
'Zone 1': source.loc[source['ParentLocation'] == 'Zone 1']['Location'].unique().tolist(),
'Zone 2' : source.loc[source['ParentLocation'] == 'Zone 2']['Location'].unique().tolist(),
'Zone 3': source.loc[source['ParentLocation'] == 'Zone 3']['Location'].unique().tolist()
}
zone = pn.widgets.Select(
name = 'Select a Zone',
value ='Zone 1',
options =['Zone 1', 'Zone 2', 'Zone 3']
)
#The following does not work
location = pn.widgets.MultiSelect(
name = 'Select a Location',
value =[True],
options =_locations[zone.value]
)
# The following does works:
# location = pn.widgets.Select(
# name = 'Select a Location',
# value = _locations[zone.value][0],
# options =_locations[zone.value]
# )
date_range_slider = pn.widgets.DateRangeSlider(
name='Date range to consider',
start=dt.datetime(2021, 1, 1), end=dt.datetime(2022, 1, 1),
value=(dt.datetime(2021, 1, 1), dt.datetime(2022, 1, 1))
)
#pn.depends(zone.param.value, location.param.value, date_range_slider.param.value, watch=True)
def get_plot(zone, location, date_range): # start function
df = source
df['Initiated Date'] = pd.to_datetime(df['Initiated Date']) # format date as datetime
start_date = date_range_slider.value[0]
end_date = date_range_slider.value[1]
mask = (df['Initiated Date'] > start_date) & (df['Initiated Date'] <= end_date)
df = df.loc[mask]
selection2 = alt.selection_single(fields=['Alert Type'])
chart = alt.Chart(df).mark_bar(
color="#0c1944",
opacity=0.8).encode(
x=alt.X('Alert Type:O', scale=alt.Scale(domain=source['Alert Type'].unique())),
y='count(Alert Type)').transform_filter(
(datum.Location == location)
).add_selection(selection2)
chart2 = alt.Chart(df).mark_bar(
color="#0c1944",
opacity=0.8).encode(
x='Alert Definition',
y='count(Alert Definition)').transform_filter(
(datum.Location == location)
).transform_filter(selection2)
return (chart|chart2)
#pn.depends(zone.param.value, watch=True)
def _update_locations(zone):
locations = _locations[zone]
location.options = locations
location.value = locations[0]
return
pn.Row(
pn.Column(title, subtitle, zone, location, date_range_slider,
get_plot )
)
Random test data:
https://github.com/KWSpittles/testdata
The reason this is not working is because you are filtering you Altair charts using
.transform_filter(
datum.Location == location
)
which allows filtering for a single value. When you pass a list of multiple values you need to instead use indexof like this
.transform_filter(
f'indexof({location}, datum.Location) != -1'
)
So I am trying to get multiple stock prices using pandas and panadas datareader. If I only try to import one ticker it will run fine, but if I use more than one then an error arises. The code is:
import pandas as pd
import pandas_datareader as web
import datetime as dt
stocks = ['BA', 'AMD']
start = dt.datetime(2018, 1, 1)
end = dt.datetime(2020, 1, 1)
d = web.DataReader(stocks, 'yahoo', start, end)
Though I get the error:
ValueError: Wrong number of items passed 2, placement implies 1
So how do I get around it only allowing to pass 1 stock.
So far I have tried using quandl and google instead, which dont work either. I also have tried pdr.get_data_yahoo but I get the same result. I have also tried yf.download() and still get the same issue. Does anyone have any ideas to get around this? Thank you.
EDIT: Full code:
import pandas as pd
import pandas_datareader as web
import datetime as dt
import yfinance as yf
import numpy as np
stocks = ['BA', 'AMD', 'AAPL']
start = dt.datetime(2018, 1, 1)
end = dt.datetime(2020, 1, 1)
d = web.DataReader(stocks, 'yahoo', start, end)
d['sma50'] = np.round(d['Close'].rolling(window=2).mean(), decimals=2)
d['sma200'] = np.round(d['Close'].rolling(window=14).mean(), decimals=2)
d['200-50'] = d['sma200'] - d['sma50']
_buy = -2
d['Crossover_Long'] = np.where(d['200-50'] < _buy, 1, 0)
d['Crossover_Long_Change']=d.Crossover_Long.diff()
d['buy'] = np.where(d['Crossover_Long_Change'] == 1, 'buy', 'n/a')
d['sell'] = np.where(d['Crossover_Long_Change'] == -1, 'sell', 'n/a')
pd.set_option('display.max_rows', 5093)
d.drop(['High', 'Low', 'Close', 'Volume', 'Open'], axis=1, inplace=True)
d.dropna(inplace=True)
#make 2 dataframe
d.set_index(d['Adj Close'], inplace=True)
buy_price = d.index[d['Crossover_Long_Change']==1]
sell_price = d.index[d['Crossover_Long_Change']==-1]
d['Crossover_Long_Change'].value_counts()
profit_loss = (sell_price - buy_price)*10
commision = buy_price*.01
position_value = (buy_price + commision)*10
percent_return = (profit_loss/position_value)*100
percent_rounded = np.round(percent_return, decimals=2)
prices = {
"Buy Price" : buy_price,
"Sell Price" : sell_price,
"P/L" : profit_loss,
"Return": percent_rounded
}
df = pd.DataFrame(prices)
print('The return was {}%, and profit or loss was ${} '.format(np.round(df['Return'].sum(), decimals=2),
np.round(df['P/L'].sum(), decimals=2)))
d
I tried 3 stocks in your code and it returns data for all 3, not sure I understood the problem you're facing?
import pandas as pd
import pandas_datareader as web
import datetime as dt
stocks = ['BA', 'AMD', 'AAPL']
start = dt.datetime(2018, 1, 1)
end = dt.datetime(2020, 1, 1)
d = web.DataReader(stocks, 'yahoo', start, end)
print(d)
Output:
Attributes Adj Close Close ... Open Volume
Symbols BA AMD AAPL BA AMD AAPL ... BA AMD AAPL BA AMD AAPL
Date ...
2018-01-02 282.886383 10.980000 166.353714 296.839996 10.980000 172.259995 ... 295.750000 10.420000 170.160004 2978900.0 44146300.0 25555900.0
2018-01-03 283.801239 11.550000 166.324722 297.799988 11.550000 172.229996 ... 295.940002 11.610000 172.529999 3211200.0 154066700.0 29517900.0
2018-01-04 282.724396 12.120000 167.097290 296.670013 12.120000 173.029999 ... 297.940002 12.100000 172.539993 4171700.0 109503000.0 22434600.0
2018-01-05 294.322296 11.880000 168.999741 308.839996 11.880000 175.000000 ... 296.769989 12.190000 173.440002 6177700.0 63808900.0 23660000.0
2018-01-08 295.570740 12.280000 168.372040 310.149994 12.280000 174.350006 ... 308.660004 12.010000 174.350006 4124900.0 63346000.0 20567800.0
... ... ... ... ... ... ... ... ... ... ... ... ... ...
2019-12-24 331.030457 46.540001 282.831299 333.000000 46.540001 284.269989 ... 339.510010 46.099998 284.690002 4120100.0 44432200.0 12119700.0
2019-12-26 327.968689 46.630001 288.442780 329.920013 46.630001 289.910004 ... 332.700012 46.990002 284.820007 4593400.0 57562800.0 23280300.0
2019-12-27 328.187408 46.180000 288.333313 330.140015 46.180000 289.799988 ... 330.200012 46.849998 291.119995 4124000.0 36581300.0 36566500.0
2019-12-30 324.469513 45.520000 290.044617 326.399994 45.520000 291.519989 ... 330.500000 46.139999 289.459991 4525500.0 41149700.0 36028600.0
2019-12-31 323.833313 45.860001 292.163818 325.760010 45.860001 293.649994 ... 325.410004 45.070000 289.929993 4958800.0 31673200.0 25201400.0
I think the error comes from your moving average and the line
d['sma50'] = np.round(d['Close'].rolling(window=2).mean(), decimals=2)
because d represent 3 stocks, I think you have to separate each stock and compute the moving average separately
EDIT : I tried something for two stocks only (BA and AMD) but it is not the best solution because I'm always repeating myself for every line.
I'm just a beginner in Python but maybe this will help you to find a solution to your problem
PS : The last line doesn't work really well (which is the printing of the P&L and Return)
"
import pandas as pd
import pandas_datareader as web
import datetime as dt
stock1 = ['BA']
stock2=['AMD']
start = dt.datetime(2018, 1, 1)
end = dt.datetime(2020, 1, 1)
d1 = web.DataReader(stock1, 'yahoo', start, end)
d2 = web.DataReader(stock2, 'yahoo', start, end)
d1['sma50'] = np.round(d1['Close'].rolling(window=2).mean(), decimals=2)
d2['sma50'] = np.round(d2['Close'].rolling(window=2).mean(), decimals=2)
d1['sma200'] = np.round(d1['Close'].rolling(window=14).mean(), decimals=2)
d2['sma200'] = np.round(d2['Close'].rolling(window=14).mean(), decimals=2)
d1['200-50'] = d1['sma200'] - d1['sma50']
d2['200-50'] = d2['sma200'] - d2['sma50']
_buy = -2
d1['Crossover_Long'] = np.where(d1['200-50'] < _buy, 1, 0)
d2['Crossover_Long'] = np.where(d2['200-50'] < _buy, 1, 0)
d1['Crossover_Long_Change']=d1.Crossover_Long.diff()
d2['Crossover_Long_Change']=d2.Crossover_Long.diff()
d1['buy'] = np.where(d1['Crossover_Long_Change'] == 1, 'buy', 'n/a')
d2['buy'] = np.where(d2['Crossover_Long_Change'] == 1, 'buy', 'n/a')
d1['sell_BA'] = np.where(d1['Crossover_Long_Change'] == -1, 'sell', 'n/a')
d2['sell_AMD'] = np.where(d2['Crossover_Long_Change'] == -1, 'sell', 'n/a')
pd.set_option('display.max_rows', 5093)
d1.drop(['High', 'Low', 'Close', 'Volume', 'Open'], axis=1, inplace=True)
d2.drop(['High', 'Low', 'Close', 'Volume', 'Open'], axis=1, inplace=True)
d2.dropna(inplace=True)
d1.dropna(inplace=True)
d1.set_index("Adj Close",inplace=True)
d2.set_index("Adj Close",inplace=True)
buy_price_BA = np.array(d1.index[d1['Crossover_Long_Change']==1])
buy_price_AMD = np.array(d2.index[d2['Crossover_Long_Change']==1])
sell_price_BA = np.array(d1.index[d1['Crossover_Long_Change']==-1])
sell_price_AMD = np.array(d2.index[d2['Crossover_Long_Change']==-1])
d1['Crossover_Long_Change'].value_counts()
d2['Crossover_Long_Change'].value_counts()
profit_loss_BA = (sell_price_BA - buy_price_BA)*10
profit_loss_AMD = (sell_price_AMD - buy_price_AMD)*10
commision_BA = buy_price_BA*.01
commision_AMD = buy_price_AMD*.01
position_value_BA = (buy_price_BA + commision_BA)*10
position_value_AMD = (buy_price_AMD + commision_AMD)*10
percent_return_BA = np.round(((profit_loss_BA/position_value_BA)*100),decimals=2)
percent_return_AMD = np.round(((profit_loss_AMD/position_value_AMD)*100),decimals=2)
prices_BA = {
"Buy Price BA" : [buy_price_BA],
"Sell Price BA" : [sell_price_BA],
"P/L BA" : [profit_loss_BA],
"Return BA": [percent_return_BA]}
df = pd.DataFrame(prices_BA)
print('The return was {}%, and profit or loss was ${} '.format(np.round(df['Return BA'].sum(), decimals=2),
np.round(df['P/L BA'].sum(), decimals=2)))
prices_AMD = {
"Buy Price AMD" : [buy_price_AMD],
"Sell Price AMD" : [sell_price_AMD],
"P/L AMD" : [profit_loss_AMD],
"Return AMD": [percent_return_AMD]}
df = pd.DataFrame(prices_AMD)
print('The return was {}%, and profit or loss was ${} '.format(np.round(df['Return AMD'].sum(), decimals=2),
np.round(df['P/L AMD'].sum(), decimals=2)))
It seems like there's a bug in the pandas data reader. I work around it by initialising with one symbol and then setting the symbols property on the instantiated object. After doing that, it works fine to call read() on tmp below.
import pandas_datareader as pdr
all_symbols = ['ibb', 'xly', 'fb', 'exx1.de']
tmp = pdr.yahoo.daily.YahooDailyReader(symbols=all_symbols[0])
# this is a work-around, pdr is broken...
tmp.symbols = all_symbols
data = tmp.read()
I am trying out algorithmic trading with python, for backtesting.
First I downloaded some tick data, then resampled them as 10 second OHLC-data, and then used ATR-indicator, as a defined function:
df = pd.read_csv( 'gbpusd jan17.csv',
names = ['instrument', 'time', 'bid', 'ask'],
index_col = 1,
parse_dates = True,
nrows = 1000
)
df = df['ask'].resample('10s').ohlc()
n = list( range( 0, len( df.index ) ) ) # I changed index because my indicator
# doesn't work on datetime index
df.index = n
def ATR(df, n): #________________________# Average True Range
i = 0
TR_l = [0]
while i < df.index[-1]:
TR = max( df.get_value( i + 1, 'high' ),
df.get_value( i, 'close' )
)
- min( df.get_value( i + 1, 'low' ),
df.get_value( i, 'close' )
)
TR_l.append( TR )
i = i + 1
TR_s = pd.Series( TR_l )
ATR = pd.Series( TR_s.rolling( window = 12,
min_periods = 12,
center = False
).mean(),
name = 'ATR_' + str(n)
)
df = df.join( ATR )
return df
Outputs: head()
instrument bid ask
time
2017-01-02 00:00:01.105 GBP/USD 1.23399 1.23551
2017-01-02 00:00:01.561 GBP/USD 1.23399 1.23551
2017-01-02 00:00:05.122 GBP/USD 1.23399 1.23551
2017-01-02 00:00:05.525 GBP/USD 1.23365 1.23577
2017-01-02 00:00:06.139 GBP/USD 1.23365 1.23577
open high low close ATR_8
\
1.23562 1.23562 1.23562 1.23562 0.000120 0.596718
1.23562 1.23575 1.23548 1.23548 0.000121 0.619445
1.23548 1.23548 1.23541 1.23541 0.000122 0.645532
1.23541 1.23541 1.23541 1.23541 0.000117 0.674178
1.23541 1.23548 1.23541 1.23548 0.000123 0.687229
But, the problem starts when I connect to Oanda API to get streaming rates, the while loop for ATR calculation doesn't seem to work, firstly I thought, it's not working because of, not enough rows of data at the begining, so I made ATR calculation start after certain ticks, still not working.
Can any one help with the while loop, how should I change for the streaming data?