So I'm trying to plot Heiken Ashi candles, and then I want to plot them on graph.
My code so far:
def heikin_ashi():
historical_data = client.get_historical_klines(symbol=SYMBOL, interval=TIME_PERIOD, start_str="15 days ago UTC", klines_type=HistoricalKlinesType.FUTURES)
hist_df = pd.DataFrame(historical_data)
hist_df.columns = ['Open Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close Time', 'Quote Asset Volume',
'Number of Trades', 'TB Base Volume', 'TB Quote Volume', 'Ignore']
hist_df['Open Time'] = pd.to_datetime(hist_df['Open Time']/1000, unit='s')
hist_df['Close Time'] = pd.to_datetime(hist_df['Close Time']/1000, unit='s')
df_HA = hist_df
df_HA['Close'] = (hist_df['Open'] + hist_df['High'] + hist_df['Low'] + hist_df['Close']) / 4
# idx = df_HA.index.name
# df_HA.reset_index(inplace=True)
for i in range(0, len(hist_df)):
if i == 0:
df_HA['Open'][i] = ((hist_df['Open'][i] + hist_df['Close'][i]) / 2)
else:
df_HA['Open'][i] = ((hist_df['Open'][i - 1] + hist_df['Close'][i - 1]) / 2)
# if idx:
# df_HA.set_index(idx, inplace=True)
df_HA['High'] = hist_df[['Open', 'Close', 'High']].max(axis=1)
df_HA['Low'] = hist_df[['Open', 'Close', 'Low']].min(axis=1)
print(df_HA)
Error:
result[mask] = op(xrav[mask], y)
TypeError: unsupported operand type(s) for /: 'str' and 'int'
Also I came across this:
import pandas as pd
def heikin_ashi(df):
heikin_ashi_df = pd.DataFrame(index=df.index.values, columns=['open', 'high', 'low', 'close'])
heikin_ashi_df['close'] = (df['open'] + df['high'] + df['low'] + df['close']) / 4
for i in range(len(df)):
if i == 0:
heikin_ashi_df.iat[0, 0] = df['open'].iloc[0]
else:
heikin_ashi_df.iat[i, 0] = (heikin_ashi_df.iat[i-1, 0] + heikin_ashi_df.iat[i-1, 3]) / 2
heikin_ashi_df['high'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['high']).max(axis=1)
heikin_ashi_df['low'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['low']).min(axis=1)
return heikin_ashi_df
How do I use the above code with my data? I'm a novice, so I'm confused. I'd appreciate it if someone could provide me with a proper way to do this.
Link to the source: https://github.com/emreturan/heikin-ashi/blob/master/heikin_ashi.py
I need to plot this on a graph too. Thanks.
I will use the 'heikin_ashi' code to answer the example of using mplfinance, a popular finance library, for the graph. There are many other libraries available for visualizing investments, so we will use this as a basic form for data acquisition and visualization. A sample of mplfinance can be found here for reference.
import yfinance as yf
import pandas as pd
import mplfinance as mpf
data = yf.download("AAPL", start="2021-07-01", end="2022-01-01", progress=False)
data.columns = ['open', 'high', 'low', 'close', 'adj close', 'volume']
def heikin_ashi(df):
heikin_ashi_df = df.copy()
#heikin_ashi_df = pd.DataFrame(index=df.index.values, columns=['open', 'high', 'low', 'close'])
heikin_ashi_df['close'] = (df['open'] + df['high'] + df['low'] + df['close']) / 4
for i in range(len(df)):
if i == 0:
heikin_ashi_df.iat[0, 0] = df['open'].iloc[0]
else:
heikin_ashi_df.iat[i, 0] = (heikin_ashi_df.iat[i-1, 0] + heikin_ashi_df.iat[i-1, 3]) / 2
heikin_ashi_df['high'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['high']).max(axis=1)
heikin_ashi_df['low'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['low']).min(axis=1)
return heikin_ashi_df
df_ha = heikin_ashi(data)
# mpf plotting
mpf.plot(df_ha, type='candle', figratio=(8,4), title='APPL', style='yahoo')
Related
I have the following custom function that generates a row with EMA data for a specific asset based on the current time.
Here's the complete code for the function:
def find_ema(futures_symbol):
futures_symbol = futures_symbol
def fetch_ohlc(symbol,timeframe, timesymbol):
symbol = symbol
timeframe = timeframe
timesymbol = timesymbol
#fetch data-binance api
candlestick_url = 'https://fapi.binance.com/fapi/v1/continuousKlines?pair='+symbol+'&contractType=PERPETUAL&interval='+str(timeframe)+timesymbol+'&limit=1500'
candlestick_chart = requests.get(candlestick_url).json()
candlestick_df = pd.DataFrame(candlestick_chart)
candlestick_df = candlestick_df.iloc[:,1:7]
candlestick_df.columns = ['open', 'high', 'low','close','volume', 'date']
candlestick_df['date'] = pd.to_datetime(candlestick_df['date'], unit='ms').round('1s')
candlestick_df.insert(0, 'date', candlestick_df.pop('date') )
# reset to midnight
candlestick_df.date = pd.to_datetime(candlestick_df.date)
min_date = candlestick_df.date.min()
NextDay_Date = (min_date + datetime.timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
candlestick_df = candlestick_df[candlestick_df.date >= NextDay_Date].copy()
candlestick_df = candlestick_df.set_index('date')
candlestick_df['symbol'] = symbol
ohlc_data = candlestick_df
cols= ['open', 'high', 'low','close','volume']
ohlc_data[cols] = ohlc_data[cols].apply(pd.to_numeric, errors = 'coerce')
ohlc_data[cols] = ohlc_data[cols].round(decimals=2)
return ohlc_data
#separate df for limited candle stick data
ohlc_smaller = fetch_ohlc(futures_symbol,5,'m')
ohlc_larger = fetch_ohlc(futures_symbol,1,'h')
ema_df = ohlc_smaller
#calculating ema with 200 row data
ema_df['15m'] = ohlc_smaller.resample('15T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['30m'] = ohlc_smaller.resample('30T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['1h'] = ohlc_larger.resample('60T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['2h'] = ohlc_larger.resample('120T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['4h'] = ohlc_larger.resample('240T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
#forward fill larger tf data to smaller tf
ema_df = ema_df.fillna(method='ffill').tail(1)
ema_df.insert(0, 'symbol', ema_df.pop('symbol'))
ema_df = ema_df.drop(['high','low','close','volume'], axis=1)
return ema_df
When I apply this function to a single symbol it returns the dataframe perfectly. for example:
working example on single symbol
However, I now have a list of symbols on which I want to apply this function and create a new dataframe.
Here's how I am generating my list of symbols:
symbols_url = 'https://fapi.binance.com/fapi/v1/ticker/price'
symbols_data = requests.get(symbols_url).json()
symbols_df = pd.DataFrame(symbols_data)
symbols_df = symbols_df[symbols_df['symbol'].str.contains('USDT')]
futures_tickers_binance = list(symbols_df['symbol'])
#some ends with numbers(eg:Quarterly Contracts), hence filter:
futures_tickers_binance = list(filter(lambda x: x.endswith(('USDT')), futures_tickers_binance))
Here's what I thought would work:
for symbol in futures_tickers_binance:
for j in range(len(futures_tickers_binance)):
df = df.append(find_ema(futures_tickers_binance[j]))
df = df.drop_duplicates()
However, this returns a valueError:
ValueError: If using all scalar values, you must pass an index
Is there a way to apply this function and generate a new dataframe with the values for the complete list in a faster way?
Thank you in advance for your patience to read this!
The final result would look something like this, however my loop is not working the way it is supposed to be working:
Expected (almost) perfect result
Here's my complete code if needed:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import datetime
import requests
symbols_url = 'https://fapi.binance.com/fapi/v1/ticker/price'
symbols_data = requests.get(symbols_url).json()
symbols_df = pd.DataFrame(symbols_data)
symbols_df = symbols_df[symbols_df['symbol'].str.contains('USDT')]
futures_tickers_binance = list(symbols_df['symbol'])
#some ends with numbers(eg:Quarterly Contracts), hence filter:
futures_tickers_binance = list(filter(lambda x: x.endswith(('USDT')), futures_tickers_binance))
def find_ema(futures_symbol):
futures_symbol = futures_symbol
def fetch_ohlc(symbol,timeframe, timesymbol):
symbol = symbol
timeframe = timeframe
timesymbol = timesymbol
#fetch data-binance api
candlestick_url = 'https://fapi.binance.com/fapi/v1/continuousKlines?pair='+symbol+'&contractType=PERPETUAL&interval='+str(timeframe)+timesymbol+'&limit=1500'
candlestick_chart = requests.get(candlestick_url).json()
candlestick_df = pd.DataFrame(candlestick_chart)
candlestick_df = candlestick_df.iloc[:,1:7]
candlestick_df.columns = ['open', 'high', 'low','close','volume', 'date']
candlestick_df['date'] = pd.to_datetime(candlestick_df['date'], unit='ms').round('1s')
candlestick_df.insert(0, 'date', candlestick_df.pop('date') )
# reset to midnight
candlestick_df.date = pd.to_datetime(candlestick_df.date)
min_date = candlestick_df.date.min()
NextDay_Date = (min_date + datetime.timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
candlestick_df = candlestick_df[candlestick_df.date >= NextDay_Date].copy()
candlestick_df = candlestick_df.set_index('date')
candlestick_df['symbol'] = symbol
ohlc_data = candlestick_df
cols= ['open', 'high', 'low','close','volume']
ohlc_data[cols] = ohlc_data[cols].apply(pd.to_numeric, errors = 'coerce')
ohlc_data[cols] = ohlc_data[cols].round(decimals=2)
return ohlc_data
#separate df for limited candle stick data
ohlc_smaller = fetch_ohlc(futures_symbol,5,'m')
ohlc_larger = fetch_ohlc(futures_symbol,1,'h')
ema_df = ohlc_smaller
#calculating ema with 200 row data
ema_df['15m'] = ohlc_smaller.resample('15T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['30m'] = ohlc_smaller.resample('30T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['1h'] = ohlc_larger.resample('60T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['2h'] = ohlc_larger.resample('120T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['4h'] = ohlc_larger.resample('240T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
#forward fill larger tf data to smaller tf
ema_df = ema_df.fillna(method='ffill').tail(1)
ema_df.insert(0, 'symbol', ema_df.pop('symbol'))
ema_df = ema_df.drop(['high','low','close','volume'], axis=1)
return ema_df
for symbol in futures_tickers_binance:
for j in range(len(futures_tickers_binance)):
df = df.append(find_ema(futures_tickers_binance[j]))
df = df.drop_duplicates()
I am trying to run the following script. I am getting a KeyError on the function trying to plot stock returns.
It seems to be coming from fig = px.line(grouped_metrics, x="Date Snapshot", y="value", color='variable'). However, it is a valid column in my df. I have tried adding different columns in the color= argument but I get the same error. My three columns are 'variable', 'value', 'Date Snapshot'. Appologies for the block of code:
Data for ref
import pandas as pd
import numpy as np
import datetime
import plotly.express as px
import yfinance as yf
import pandas_market_calendars as mcal
from plotly.offline import init_notebook_mode, plot
init_notebook_mode(connected=True)
def create_market_cal(start, end):
nyse = mcal.get_calendar('NYSE')
schedule = nyse.schedule(stocks_start, stocks_end)
market_cal = mcal.date_range(schedule, frequency='1D')
market_cal = market_cal.tz_localize(None)
market_cal = [i.replace(hour=0) for i in market_cal]
return market_cal
def get_data(stocks, start, end):
def data(ticker):
df = yf.download(ticker, start=start, end=(end + datetime.timedelta(days=1)))
df['symbol'] = ticker
df.index = pd.to_datetime(df.index)
return df
datas = map(data, stocks)
return(pd.concat(datas, keys=stocks, names=['Ticker', 'Date'], sort=True))
def get_benchmark(benchmark, start, end):
benchmark = get_data(benchmark, start, end)
benchmark = benchmark.drop(['symbol'], axis=1)
benchmark.reset_index(inplace=True)
return benchmark
portfolio_df = pd.read_csv('C:\\tmp\\stock_transactions.csv')
portfolio_df['Open date'] = pd.to_datetime(portfolio_df['Open date'])
symbols = portfolio_df.Symbol.unique()
stocks_start = datetime.datetime(2018, 3, 1)
stocks_end = datetime.datetime(2021, 3, 10)
daily_adj_close = get_data(symbols, stocks_start, stocks_end)
daily_adj_close = daily_adj_close[['Close']].reset_index()
daily_benchmark = get_benchmark(['SPY'], stocks_start, stocks_end)
daily_benchmark = daily_benchmark[['Date', 'Close']]
market_cal = create_market_cal(stocks_start, stocks_end)
def position_adjust(daily_positions, sale):
stocks_with_sales = pd.DataFrame()
buys_before_start = daily_positions[daily_positions['Type'] == 'Buy'].sort_values(by='Open date')
for position in buys_before_start[buys_before_start['Symbol'] == sale[1]['Symbol']].iterrows():
if position[1]['Qty'] <= sale[1]['Qty']:
sale[1]['Qty'] -= position[1]['Qty']
position[1]['Qty'] = 0
else:
position[1]['Qty'] -= sale[1]['Qty']
sale[1]['Qty'] -= sale[1]['Qty']
stocks_with_sales = stocks_with_sales.append(position[1])
return stocks_with_sales
def portfolio_start_balance(portfolio, start_date):
positions_before_start = portfolio[portfolio['Open date'] <= start_date]
future_sales = portfolio[(portfolio['Open date'] >= start_date) & (portfolio['Type'] == 'Sell')]
sales = positions_before_start[positions_before_start['Type'] =='Sell'].groupby(['Symbol'])['Qty'].sum()
sales = sales.reset_index()
positions_no_change = positions_before_start[~positions_before_start['Symbol'].isin(sales['Symbol'].unique())]
adj_positions_df = pd.DataFrame()
for sale in sales.iterrows():
adj_positions = position_adjust(positions_before_start, sale)
adj_positions_df = adj_positions_df.append(adj_positions)
adj_positions_df = adj_positions_df.append(positions_no_change)
adj_positions_df = adj_positions_df.append(future_sales)
adj_positions_df = adj_positions_df[adj_positions_df['Qty'] > 0]
return adj_positions_df
active_portfolio = portfolio_start_balance(portfolio_df, stocks_start)
def fifo(daily_positions, sales, date):
sales = sales[sales['Open date'] == date]
daily_positions = daily_positions[daily_positions['Open date'] <= date]
positions_no_change = daily_positions[~daily_positions['Symbol'].isin(sales['Symbol'].unique())]
adj_positions = pd.DataFrame()
for sale in sales.iterrows():
adj_positions = adj_positions.append(position_adjust(daily_positions, sale))
adj_positions = adj_positions.append(positions_no_change)
adj_positions = adj_positions[adj_positions['Qty'] > 0]
return adj_positions
def time_fill(portfolio, market_cal):
sales = portfolio[portfolio['Type'] == 'Sell'].groupby(['Symbol','Open date'])['Qty'].sum()
sales = sales.reset_index()
per_day_balance = []
for date in market_cal:
if (sales['Open date'] == date).any():
portfolio = fifo(portfolio, sales, date)
daily_positions = portfolio[portfolio['Open date'] <= date]
daily_positions = daily_positions[daily_positions['Type'] == 'Buy']
daily_positions['Date Snapshot'] = date
per_day_balance.append(daily_positions)
return per_day_balance
positions_per_day = time_fill(active_portfolio, market_cal)
def modified_cost_per_share(portfolio, adj_close, start_date):
df = pd.merge(portfolio, adj_close, left_on=['Date Snapshot', 'Symbol'],
right_on=['Date', 'Ticker'], how='left')
df.rename(columns={'Close': 'Symbol Adj Close'}, inplace=True)
df['Adj cost daily'] = df['Symbol Adj Close'] * df['Qty']
df = df.drop(['Ticker', 'Date'], axis=1)
return df
def benchmark_portfolio_calcs(portfolio, benchmark):
portfolio = pd.merge(portfolio, benchmark, left_on=['Date Snapshot'],
right_on=['Date'], how='left')
portfolio = portfolio.drop(['Date'], axis=1)
portfolio.rename(columns={'Close': 'Benchmark Close'}, inplace=True)
benchmark_max = benchmark[benchmark['Date'] == benchmark['Date'].max()]
portfolio['Benchmark End Date Close'] = portfolio.apply(lambda x: benchmark_max['Close'], axis=1)
benchmark_min = benchmark[benchmark['Date'] == benchmark['Date'].min()]
portfolio['Benchmark Start Date Close'] = portfolio.apply(lambda x: benchmark_min['Close'], axis=1)
return portfolio
def portfolio_end_of_year_stats(portfolio, adj_close_end):
adj_close_end = adj_close_end[adj_close_end['Date'] == adj_close_end['Date'].max()]
portfolio_end_data = pd.merge(portfolio, adj_close_end, left_on='Symbol',
right_on='Ticker')
portfolio_end_data.rename(columns={'Close': 'Ticker End Date Close'}, inplace=True)
portfolio_end_data = portfolio_end_data.drop(['Ticker', 'Date'], axis=1)
return portfolio_end_data
def portfolio_start_of_year_stats(portfolio, adj_close_start):
adj_close_start = adj_close_start[adj_close_start['Date'] == adj_close_start['Date'].min()]
portfolio_start = pd.merge(portfolio, adj_close_start[['Ticker', 'Close', 'Date']],
left_on='Symbol', right_on='Ticker')
portfolio_start.rename(columns={'Close': 'Ticker Start Date Close'}, inplace=True)
portfolio_start['Adj cost per share'] = np.where(portfolio_start['Open date'] <= portfolio_start['Date'],
portfolio_start['Ticker Start Date Close'],
portfolio_start['Adj cost per share'])
portfolio_start['Adj cost'] = portfolio_start['Adj cost per share'] * portfolio_start['Qty']
portfolio_start = portfolio_start.drop(['Ticker', 'Date'], axis=1)
portfolio_start['Equiv Benchmark Shares'] = portfolio_start['Adj cost'] / portfolio_start['Benchmark Start Date Close']
portfolio_start['Benchmark Start Date Cost'] = portfolio_start['Equiv Benchmark Shares'] * portfolio_start['Benchmark Start Date Close']
return portfolio_start
def calc_returns(portfolio):
portfolio['Benchmark Return'] = portfolio['Benchmark Close'] / portfolio['Benchmark Start Date Close'] - 1
portfolio['Ticker Return'] = portfolio['Symbol Adj Close'] / portfolio['Adj cost per share'] - 1
portfolio['Ticker Share Value'] = portfolio['Qty'] * portfolio['Symbol Adj Close']
portfolio['Benchmark Share Value'] = portfolio['Equiv Benchmark Shares'] * portfolio['Benchmark Close']
portfolio['Abs Value Compare'] = portfolio['Ticker Share Value'] - portfolio['Benchmark Start Date Cost']
portfolio['Abs Value Return'] = portfolio['Abs Value Compare']/portfolio['Benchmark Start Date Cost']
portfolio['Stock Gain / (Loss)'] = portfolio['Ticker Share Value'] - portfolio['Adj cost']
portfolio['Benchmark Gain / (Loss)'] = portfolio['Benchmark Share Value'] - portfolio['Adj cost']
portfolio['Abs. Return Compare'] = portfolio['Ticker Return'] - portfolio['Benchmark Return']
return portfolio
def per_day_portfolio_calcs(per_day_holdings, daily_benchmark, daily_adj_close, stocks_start):
df = pd.concat(per_day_holdings, sort=True)
mcps = modified_cost_per_share(df, daily_adj_close, stocks_start)
bpc = benchmark_portfolio_calcs(mcps, daily_benchmark)
pes = portfolio_end_of_year_stats(bpc, daily_adj_close)
pss = portfolio_start_of_year_stats(pes, daily_adj_close)
returns = calc_returns(pss)
return returns
combined_df = per_day_portfolio_calcs(positions_per_day, daily_benchmark, daily_adj_close, stocks_start)
def line(df, val_1, val_2):
grouped_metrics = combined_df.groupby(['Date Snapshot'])[[val_1, val_2]].sum().reset_index()
grouped_metrics = pd.melt(grouped_metrics, id_vars=['Date Snapshot'],
value_vars=[val_1, val_2])
fig = px.line(grouped_metrics, x="Date Snapshot", y="value",
color='variable')
plot(fig)
line(combined_df, 'Stock Gain / (Loss)', 'Benchmark Gain / (Loss)')
def line_facets(df, val_1, val_2):
grouped_metrics = combined_df.groupby(['Symbol', 'Date Snapshot'])[[val_1, val_2]].sum().reset_index()
grouped_metrics = pd.melt(grouped_metrics, id_vars=['Symbol', 'Date Snapshot'],
value_vars=[val_1, val_2])
fig = px.line(grouped_metrics, x="Date Snapshot", y="value",
color='variable', facet_col="Symbol", facet_col_wrap=5)
plot(fig)
line_facets(combined_df, 'Ticker Return', 'Benchmark Return')
The above throws the following error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-4-337cc930bd36> in <module>
183
184
--> 185 line(combined_df, 'Stock Gain / (Loss)', 'Benchmark Gain / (Loss)')
186
187
<ipython-input-4-337cc930bd36> in line(df, val_1, val_2)
179 value_vars=[val_1, val_2])
180 fig = px.line(grouped_metrics, x="Date Snapshot", y="value",
--> 181 color='variable')
182 plot(fig)
183
~\anaconda3\lib\site-packages\plotly\express\_chart_types.py in line(data_frame, x, y, line_group, color, line_dash, hover_name, hover_data, custom_data, text, facet_row, facet_col, facet_col_wrap, facet_row_spacing, facet_col_spacing, error_x, error_x_minus, error_y, error_y_minus, animation_frame, animation_group, category_orders, labels, orientation, color_discrete_sequence, color_discrete_map, line_dash_sequence, line_dash_map, log_x, log_y, range_x, range_y, line_shape, render_mode, title, template, width, height)
250 a polyline mark in 2D space.
251 """
--> 252 return make_figure(args=locals(), constructor=go.Scatter)
253
254
~\anaconda3\lib\site-packages\plotly\express\_core.py in make_figure(args, constructor, trace_patch, layout_patch)
1887 prefix = get_label(args, args["facet_row"]) + "="
1888 row_labels = [prefix + str(s) for s in sorted_group_values[m.grouper]]
-> 1889 for val in sorted_group_values[m.grouper]:
1890 if val not in m.val_map:
1891 m.val_map[val] = m.sequence[len(m.val_map) % len(m.sequence)]
KeyError: 'variable'
In case someone comes across this issue: I had the same situation, in my case the error message was misleading: root cause was that the dataframe in px.line() was emtpy (no rows).
I tried running the code to get stock data but it fails, showing the following error:
'DatetimeProperties' object has no attribute 'weekday_name'
'NoneType' object has no attribute 'to_csv'
from pandas_datareader import data as web
import os
import pandas as pd
from pandas.testing import assert_frame_equal
def get_stock(ticker, start_date, end_date, s_window, l_window):
try:
df = web.get_data_yahoo(ticker, start=start_date, end=end_date)
df['Return'] = df['Adj Close'].pct_change()
df['Return'].fillna(0, inplace = True)
df['Date'] = df.index
df['Date'] = pd.to_datetime(df['Date'])
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year
df['Day'] = df['Date'].dt.day
for col in ['Open', 'High', 'Low', 'Close', 'Adj Close']:
df[col] = df[col].round(2)
df['Weekday'] = df['Date'].dt.weekday_name
df['Week_Number'] = df['Date'].dt.strftime('%U')
df['Year_Week'] = df['Date'].dt.strftime('%Y-%U')
df['Short_MA'] = df['Adj Close'].rolling(window=s_window, min_periods=1).mean()
df['Long_MA'] = df['Adj Close'].rolling(window=l_window, min_periods=1).mean()
col_list = ['Date', 'Year', 'Month', 'Day', 'Weekday',
'Week_Number', 'Year_Week', 'Open',
'High', 'Low', 'Close', 'Volume', 'Adj Close',
'Return', 'Short_MA', 'Long_MA']
num_lines = len(df)
df = df[col_list]
print('read ', num_lines, ' lines of data for ticker: ' , ticker)
return df
except Exception as error:
print(error)
return None
try:
ticker='MSFT'
input_dir = r'/Users/sirishpulijala/Desktop'
output_file = os.path.join(input_dir, ticker + '.csv')
df = get_stock(ticker, start_date='2014-01-01', end_date='2019-12-31',
s_window=14, l_window=50)
df.to_csv(output_file, index=False)
print('wrote ' + str(len(df)) + ' lines to file: ' + output_file)
except Exception as e:
print(e)
print('failed to get Yahoo stock data for ticker: ', ticker)
Your problem is the following line:
df['Weekday'] = df['Date'].dt.weekday_name
Change it to:
df['Weekday'] = df['Date'].dt.day_name()
and you're fine to go.
We can use
df['Weekday'] = df['Date'].dt.strftime("%A")
This will give the weekday names
More formatting options include:
%A -Full weekday name like MONDAY, TUESDAY etc
%w -Weekday as a decimal number like 1,2,3 etc
%a -Abbreviated weekday name like SUN,MON etc
%Y -year
%m -month
%d -day
%H -hours
%M -minutes
%S -seconds
Good afternoon,
I am a student and I was trying to implement the WaveTrend Oscillator strategy on the Quantopian Platform: https://www.tradingview.com/script/2KE8wTuF-Indicator-WaveTrend-Oscillator-WT/
what I wanted to do is selling AAPL when the indicator is high and buying it when is low.
It keeps giving me this error:
AttributeError: 'zipline.assets._assets.Equity' object has no attribute 'history'
Can anyone help me?
import talib
import pandas
# ---------------------------------------------------
n1, n2, period, stock = 10, 21, 12, sid(24)
# ---------------------------------------------------
def initialize(context):
schedule_function(open_positions, date_rules.week_start(), time_rules.market_open())
def handle_data(context, data):
if get_open_orders(): return
close = stock.history(stock, 'close', period + 1, '1d')
low = stock.history(stock, 'low', period + 1, '1d')
high = stock.history(stock, 'high', period + 1, '1d')
ap = (high+low+close)/3
esa = talib.EMA(ap, timeperiod=n1)
d = talib.EMA(abs(ap - esa), timeperiod=n1)
ci = (ap - esa) / (0.015 * d)
wt1 = talib.EMA(ci, timeperiod=n2)
wt1 = wt1.dropna()
wt2 = talib.SMA(wt1, timeperiod=4)
wt2 = wt2.dropna()
def open_positions(context, data):
if data.can_trade(stock < wt1):
order_target_percent(stock, 2)
elif data.can_trade(stock > wt2):
order_target_percent(stock, -1)
ok, I think I made it work properly:
import talib
# ---------------------------------------------------
n1, n2, period, stock = 10, 21, 60, sid(24)
# ---------------------------------------------------
def initialize(context):
schedule_function(trade, date_rules.week_start(), time_rules.market_open())
def trade(context, data):
ob = 80 #"Over Bought Level"
os = -80 #"Over Sold Level"
if get_open_orders(): return
close = data.history(stock, 'close', period + 1, '1d').dropna()
low = data.history(stock, 'low', period + 1, '1d').dropna()
high = data.history(stock, 'high', period + 1, '1d').dropna()
ap = (high + low + close) / 3
esa = talib.EMA(ap, timeperiod=n1)
d = talib.EMA(abs(ap - esa), timeperiod=n1)
ci = (ap - esa) / (0.015 * d)
wt1 = talib.EMA(ci, timeperiod=n2)
record(wt1 = wt1[-1], ob = ob,os = os)
if data.can_trade(stock):
if wt1[-1] > os:
order_target_percent(stock, 2)
elif wt1[-1] < ob:
order_target_percent(stock, 0)
I am attempting to save a dataframe to Csv. When I print the dataframe it produces the output Im looking for, but when I save the dataframe to csv I only get the last line of the dataframe saved to the csv file.. what I have attempted so far.....
Index_tickers = pd.read_csv('C:\\Users\\ME\\Dropbox\\MktData\\Index_list\\Index_tickers.csv')
Ticker = Index_tickers.ticker
for ticker in Index_tickers.ticker:
index_data = pd.read_csv('C:\\Users\\ME\\Dropbox\\MktData\\Index_list\\' + ticker + '_1.csv')
mkt_data = index_data[['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']]
numRow = 2
while numRow < endRow:
dOpen0 = mkt_data.ix[numRow, 'Open']
dHigh0 = mkt_data.ix[numRow, 'High']
dLow0 = mkt_data.ix[numRow, 'Low']
dClose0 = mkt_data.ix[numRow, 'Close']
dDate0 = mkt_data.ix[numRow, 'Date']
dTime0 = mkt_data.ix[numRow, 'Time']
dTicker0 = index_data.ix[numRow, 'Ticker']
dHaClose0 = (dOpen0 + dHigh0 + dLow0 + dClose0) / 4
dClose1 = mkt_data.ix[numRow -2 , 'Close']
dOpen1 = mkt_data.ix[numRow -2 , 'Open']
dHaOpen0 = (dClose1 + dOpen1) / 2
dHaHigh0 = max(dHigh0, dHaOpen0, dHaClose0)
dHaLow0 = min(dLow0, dHaOpen0, dHaClose0)
dHaGreen0 = dHaClose0 > dHaOpen0
dHaRed0 = dHaClose0 < dHaOpen0
dNumRow = numRow
numRow = numRow + 1
df = pd.DataFrame({'numRow' : pd.Series(dNumRow), 'Time' : pd.Series(dTime0), 'Date' : pd.Series(dDate0), 'Ticker' : pd.Series(dTicker0), 'Open0' : pd.Series(dOpen0), 'High0' : pd.Series(dHigh0), 'Low0' : pd.Series(dLow0), 'Close0' : pd.Series(dClose0)})
#print df
df.to_csv('C:\Users\\ME\\Dropbox\\MktData\HaDetail.csv')
any help hugely appreciated. Im new to python and learning on the job..
You are overwriting your csv on each iteration because the default mode is 'w' which will overwrite if it exists, additionally you are writing out your header and you only need to do this on the first iteration so I would do the following:
Index_tickers = pd.read_csv('C:\\Users\\ME\\Dropbox\\MktData\\Index_list\\Index_tickers.csv')
Ticker = Index_tickers.ticker
writeHeader = True
for ticker in Index_tickers.ticker:
index_data = pd.read_csv('C:\\Users\\ME\\Dropbox\\MktData\\Index_list\\' + ticker + '_1.csv')
mkt_data = index_data[['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']]
numRow = 2
while numRow < endRow:
dOpen0 = mkt_data.ix[numRow, 'Open']
dHigh0 = mkt_data.ix[numRow, 'High']
dLow0 = mkt_data.ix[numRow, 'Low']
dClose0 = mkt_data.ix[numRow, 'Close']
dDate0 = mkt_data.ix[numRow, 'Date']
dTime0 = mkt_data.ix[numRow, 'Time']
dTicker0 = index_data.ix[numRow, 'Ticker']
dHaClose0 = (dOpen0 + dHigh0 + dLow0 + dClose0) / 4
dClose1 = mkt_data.ix[numRow -2 , 'Close']
dOpen1 = mkt_data.ix[numRow -2 , 'Open']
dHaOpen0 = (dClose1 + dOpen1) / 2
dHaHigh0 = max(dHigh0, dHaOpen0, dHaClose0)
dHaLow0 = min(dLow0, dHaOpen0, dHaClose0)
dHaGreen0 = dHaClose0 > dHaOpen0
dHaRed0 = dHaClose0 < dHaOpen0
dNumRow = numRow
numRow = numRow + 1
df = pd.DataFrame({'numRow' : pd.Series(dNumRow), 'Time' : pd.Series(dTime0), 'Date' : pd.Series(dDate0), 'Ticker' : pd.Series(dTicker0), 'Open0' : pd.Series(dOpen0), 'High0' : pd.Series(dHigh0), 'Low0' : pd.Series(dLow0), 'Close0' : pd.Series(dClose0)})
#print df
if writeHeader:
df.to_csv('C:\Users\\ME\\Dropbox\\MktData\HaDetail.csv')
writeHeader = False
else:
df.to_csv('C:\Users\\ME\\Dropbox\\MktData\HaDetail.csv', header=False, mode='a')
So we only write the header on first iteration and then for each subsequent iteration change the mode to 'a' so it appends to the file, see the docs