I am attempting to save a dataframe to Csv. When I print the dataframe it produces the output Im looking for, but when I save the dataframe to csv I only get the last line of the dataframe saved to the csv file.. what I have attempted so far.....
Index_tickers = pd.read_csv('C:\\Users\\ME\\Dropbox\\MktData\\Index_list\\Index_tickers.csv')
Ticker = Index_tickers.ticker
for ticker in Index_tickers.ticker:
index_data = pd.read_csv('C:\\Users\\ME\\Dropbox\\MktData\\Index_list\\' + ticker + '_1.csv')
mkt_data = index_data[['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']]
numRow = 2
while numRow < endRow:
dOpen0 = mkt_data.ix[numRow, 'Open']
dHigh0 = mkt_data.ix[numRow, 'High']
dLow0 = mkt_data.ix[numRow, 'Low']
dClose0 = mkt_data.ix[numRow, 'Close']
dDate0 = mkt_data.ix[numRow, 'Date']
dTime0 = mkt_data.ix[numRow, 'Time']
dTicker0 = index_data.ix[numRow, 'Ticker']
dHaClose0 = (dOpen0 + dHigh0 + dLow0 + dClose0) / 4
dClose1 = mkt_data.ix[numRow -2 , 'Close']
dOpen1 = mkt_data.ix[numRow -2 , 'Open']
dHaOpen0 = (dClose1 + dOpen1) / 2
dHaHigh0 = max(dHigh0, dHaOpen0, dHaClose0)
dHaLow0 = min(dLow0, dHaOpen0, dHaClose0)
dHaGreen0 = dHaClose0 > dHaOpen0
dHaRed0 = dHaClose0 < dHaOpen0
dNumRow = numRow
numRow = numRow + 1
df = pd.DataFrame({'numRow' : pd.Series(dNumRow), 'Time' : pd.Series(dTime0), 'Date' : pd.Series(dDate0), 'Ticker' : pd.Series(dTicker0), 'Open0' : pd.Series(dOpen0), 'High0' : pd.Series(dHigh0), 'Low0' : pd.Series(dLow0), 'Close0' : pd.Series(dClose0)})
#print df
df.to_csv('C:\Users\\ME\\Dropbox\\MktData\HaDetail.csv')
any help hugely appreciated. Im new to python and learning on the job..
You are overwriting your csv on each iteration because the default mode is 'w' which will overwrite if it exists, additionally you are writing out your header and you only need to do this on the first iteration so I would do the following:
Index_tickers = pd.read_csv('C:\\Users\\ME\\Dropbox\\MktData\\Index_list\\Index_tickers.csv')
Ticker = Index_tickers.ticker
writeHeader = True
for ticker in Index_tickers.ticker:
index_data = pd.read_csv('C:\\Users\\ME\\Dropbox\\MktData\\Index_list\\' + ticker + '_1.csv')
mkt_data = index_data[['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']]
numRow = 2
while numRow < endRow:
dOpen0 = mkt_data.ix[numRow, 'Open']
dHigh0 = mkt_data.ix[numRow, 'High']
dLow0 = mkt_data.ix[numRow, 'Low']
dClose0 = mkt_data.ix[numRow, 'Close']
dDate0 = mkt_data.ix[numRow, 'Date']
dTime0 = mkt_data.ix[numRow, 'Time']
dTicker0 = index_data.ix[numRow, 'Ticker']
dHaClose0 = (dOpen0 + dHigh0 + dLow0 + dClose0) / 4
dClose1 = mkt_data.ix[numRow -2 , 'Close']
dOpen1 = mkt_data.ix[numRow -2 , 'Open']
dHaOpen0 = (dClose1 + dOpen1) / 2
dHaHigh0 = max(dHigh0, dHaOpen0, dHaClose0)
dHaLow0 = min(dLow0, dHaOpen0, dHaClose0)
dHaGreen0 = dHaClose0 > dHaOpen0
dHaRed0 = dHaClose0 < dHaOpen0
dNumRow = numRow
numRow = numRow + 1
df = pd.DataFrame({'numRow' : pd.Series(dNumRow), 'Time' : pd.Series(dTime0), 'Date' : pd.Series(dDate0), 'Ticker' : pd.Series(dTicker0), 'Open0' : pd.Series(dOpen0), 'High0' : pd.Series(dHigh0), 'Low0' : pd.Series(dLow0), 'Close0' : pd.Series(dClose0)})
#print df
if writeHeader:
df.to_csv('C:\Users\\ME\\Dropbox\\MktData\HaDetail.csv')
writeHeader = False
else:
df.to_csv('C:\Users\\ME\\Dropbox\\MktData\HaDetail.csv', header=False, mode='a')
So we only write the header on first iteration and then for each subsequent iteration change the mode to 'a' so it appends to the file, see the docs
Related
I am not getting correct calculations for 3 columns I am trying to write on a data sheet for specific date and time in time series data. I want to calculate difference between various times and the closing price time. I am having problem for some reason I can't get correct output for the calculations.
This is the output from this code.
import pandas as pd
import os
import numpy as np
from openpyxl import Workbook
# Read the data into a Pandas DataFrame
directory_path = "C:/Users/bean/Desktop/_L"
os.chdir(directory_path)
book = Workbook()
book.remove(book.active) # remove the first sheet
for file in os.listdir(directory_path):
if file.endswith(".csv"):
file_path = os.path.join(directory_path, file)
df = pd.read_csv(file_path)
# Create a new DataFrame for each file
df_diff = df[['Date', 'CET', 'NA', 'UTC', 'Name', 'BOLLBU', 'BOLLBM', 'BOLLBL',
'VWAP', 'VWAPSD1U', 'VWAPSD1L', 'VWAPSD2U', 'VWAPSD2L', 'ATR', 'ATRMA']]
df['Date'] = pd.to_datetime(df['Date'])
df['CET'] = pd.to_datetime(df['Date'])
df['UTC'] = pd.to_datetime(df['Date'])
df['NA'] = pd.to_datetime(df['Date'])
df_diff['Date'] = pd.to_datetime(df['Date'])
df_diff['CET'] = pd.to_datetime(df['CET'])
df_diff['UTC'] = pd.to_datetime(df['UTC'])
df_diff['NA'] = pd.to_datetime(df['NA'])
df_diff['Open'] = df['Open']
df_diff['High'] = df['High']
df_diff['Low'] = df['Low']
df_diff['Close'] = df['Close']
# Calculate the differences and add them as new columns
df_diff['Open Diff'] = (df['Open'].shift(-1) -
df['Open']) / df['Open'] * 100
df_diff['High Diff'] = (df['High'].shift(-1) -
df['High']) / df['High'] * 100
df_diff['Low Diff'] = (df['Low'].shift(-1) -
df['Low']) / df['Low'] * 100
df_diff['Close Diff'] = (
df['Close'].shift(-1) - df['Close']) / df['Close'] * 100
df_1635 = df[(df['Date'].dt.hour == 16) & (
df['Date'].dt.minute == 35)].sort_values(by='Date', ascending=False)
df_1625 = df[(df['Date'].dt.hour == 16) & (
df['Date'].dt.minute == 25)].sort_values(by='Date', ascending=False)
df_1620 = df[(df['Date'].dt.hour == 16) & (
df['Date'].dt.minute == 20)].sort_values(by='Date', ascending=False)
df_1615 = df[(df['Date'].dt.hour == 16) & (
df['Date'].dt.minute == 15)].sort_values(by='Date', ascending=False)
df_1610 = df[(df['Date'].dt.hour == 16) & (
df['Date'].dt.minute == 10)].sort_values(by='Date', ascending=False)
df_1605 = df[(df['Date'].dt.hour == 16) & (
df['Date'].dt.minute == 5)].sort_values(by='Date', ascending=False)
df_1600 = df[(df['Date'].dt.hour == 16) & (
df['Date'].dt.minute == 0)].sort_values(by='Date', ascending=False)
df_1545 = df[(df['Date'].dt.hour == 15) & (
df['Date'].dt.minute == 45)].sort_values(by='Date', ascending=False)
df_1530 = df[(df['Date'].dt.hour == 15) & (
df['Date'].dt.minute == 30)].sort_values(by='Date', ascending=False)
df_1500 = df[(df['Date'].dt.hour == 15) & (
df['Date'].dt.minute == 0)].sort_values(by='Date', ascending=False)
df_1445 = df[(df['Date'].dt.hour == 14) & (
df['Date'].dt.minute == 45)].sort_values(by='Date', ascending=False)
df_1430 = df[(df['Date'].dt.hour == 14) & (
df['Date'].dt.minute == 30)].sort_values(by='Date', ascending=False)
df_1400 = df[(df['Date'].dt.hour == 14) & (
df['Date'].dt.minute == 0)].sort_values(by='Date', ascending=False)
df_1330 = df[(df['Date'].dt.hour == 13) & (
df['Date'].dt.minute == 30)].sort_values(by='Date', ascending=False)
df_1300 = df[(df['Date'].dt.hour == 13) & (
df['Date'].dt.minute == 0)].sort_values(by='Date', ascending=False)
df_1230 = df[(df['Date'].dt.hour == 12) & (
df['Date'].dt.minute == 30)].sort_values(by='Date', ascending=False)
df_0800 = df[(df['Date'].dt.hour == 8) & (
df['Date'].dt.minute == 0)].sort_values(by='Date', ascending=False)
# Calculate difference between Close price of df_1635 and other DataFrames
df_diff_1635_1625 = df_1635['Close'] - df_1625['Close']
df_diff_1635_1620 = df_1635['Close'].subtract(df_1620['Close'])
df_diff_1635_1615 = df_1635['Close'].subtract(df_1615['Close'])
df_diff_1635_1610 = df_1635['Close'].subtract(df_1610['Close'])
df_diff_1635_1605 = df_1635['Close'].subtract(df_1605['Close'])
df_diff_1635_1600 = df_1635['Close'].subtract(df_1600['Close'])
df_diff_1635_1545 = df_1635['Close'].subtract(df_1545['Close'])
df_diff_1635_1530 = df_1635['Close'].subtract(df_1530['Close'])
df_diff_1635_1500 = df_1635['Close'].subtract(df_1500['Close'])
df_diff_1635_1445 = df_1635['Close'].subtract(df_1445['Close'])
df_diff_1635_1430 = df_1635['Close'].subtract(df_1430['Close'])
df_diff_1635_1400 = df_1635['Close'].subtract(df_1400['Close'])
df_diff_1635_1330 = df_1635['Close'].subtract(df_1330['Close'])
df_diff_1635_1300 = df_1635['Close'].subtract(df_1300['Close'])
df_diff_1635_1230 = df_1635['Close'].subtract(df_1230['Close'])
df_diff_1635_0800 = df_1635['Close'].subtract(df_0800['Close'])
print(df_diff_1635_1625)
# Add Difference, Percent_Diff, and U/D columns to each DataFrame
df_1635['Difference'] = df_1635['Close'].subtract(
df_1635['Close'].shift())
df_1635['Percent_Diff'] = (df_1635['Difference'] /
df_1635['Close']) * 100
df_1635['U/D'] = np.where(df_1635['Difference'] > 0, 'U', 'D')
df_1625['Difference'] = df_diff_1635_1625
df_1625['Percent_Diff'] = (df_diff_1635_1625 / df_1635['Close']) * 100
df_1625['U/D'] = np.where(df_1625['Percent_Diff'] > 0, 'U', 'D')
print(df_1625.dtypes)
df_1620['Difference'] = df_diff_1635_1620
df_1620['Percent_Diff'] = (df_diff_1635_1620 / df_1635['Close']) * 100
df_1620['U/D'] = np.where(df_1620['Percent_Diff'] > 0, 'U', 'D')
df_1615['Difference'] = df_diff_1635_1615
df_1615['Percent_Diff'] = (df_diff_1635_1615 / df_1635['Close']) * 100
df_1615['U/D'] = np.where(df_1615['Percent_Diff'] > 0, 'U', 'D')
df_1610['Difference'] = df_diff_1635_1610
df_1610['Percent_Diff'] = (df_diff_1635_1610 / df_1635['Close']) * 100
df_1610['U/D'] = np.where(df_1610['Percent_Diff'] > 0, 'U', 'D')
df_1605['Difference'] = df_diff_1635_1605
df_1605['Percent_Diff'] = (df_diff_1635_1605 / df_1635['Close']) * 100
df_1605['U/D'] = np.where(df_1605['Percent_Diff'] > 0, 'U', 'D')
df_1600['Difference'] = df_diff_1635_1600
df_1600['Percent_Diff'] = (df_diff_1635_1600 / df_1635['Close']) * 100
df_1600['U/D'] = np.where(df_1600['Percent_Diff'] > 0, 'U', 'D')
df_1545['Difference'] = df_diff_1635_1545
df_1545['Percent_Diff'] = (df_diff_1635_1545 / df_1635['Close']) * 100
df_1545['U/D'] = np.where(df_1545['Percent_Diff'] > 0, 'U', 'D')
df_1530['Percent_Diff'] = (df_diff_1635_1530 / df_1635['Close']) * 100
df_1530['U/D'] = np.where(df_1530['Percent_Diff'] > 0, 'U', 'D')
df_1500['Difference'] = df_diff_1635_1500
df_1500['Percent_Diff'] = (df_diff_1635_1500 / df_1635['Close']) * 100
df_1500['U/D'] = np.where(df_1500['Percent_Diff'] > 0, 'U', 'D')
df_1445['Difference'] = df_diff_1635_1445
df_1445['Percent_Diff'] = (df_diff_1635_1445 / df_1635['Close']) * 100
df_1445['U/D'] = np.where(df_1445['Percent_Diff'] > 0, 'U', 'D')
df_1430['Difference'] = df_diff_1635_1430
df_1430['Percent_Diff'] = (df_diff_1635_1430 / df_1635['Close']) * 100
df_1430['U/D'] = np.where(df_1430['Percent_Diff'] > 0, 'U', 'D')
df_1400['Difference'] = df_diff_1635_1400
df_1400['Percent_Diff'] = (df_diff_1635_1400 / df_1635['Close']) * 100
df_1400['U/D'] = np.where(df_1400['Percent_Diff'] > 0, 'U', 'D')
df_1330['Difference'] = df_diff_1635_1330
df_1330['Percent_Diff'] = (df_diff_1635_1330 / df_1635['Close']) * 100
df_1330['U/D'] = np.where(df_1330['Percent_Diff'] > 0, 'U', 'D')
df_1300['Difference'] = df_diff_1635_1300
df_1300['Percent_Diff'] = (df_diff_1635_1300 / df_1635['Close']) * 100
df_1300['U/D'] = np.where(df_1300['Percent_Diff'] > 0, 'U', 'D')
df_1230['Difference'] = df_diff_1635_1230
df_1230['Percent_Diff'] = (df_diff_1635_1230 / df_1635['Close']) * 100
df_1230['U/D'] = np.where(df_1230['Percent_Diff'] > 0, 'U', 'D')
df_0800['Difference'] = df_diff_1635_0800
df_0800['Percent_Diff'] = (df_diff_1635_0800 / df_1635['Close']) * 100
df_0800['U/D'] = np.where(df_0800['Percent_Diff'] > 0, 'U', 'D')
df_25 = df[(df['Date'].dt.hour == 16) & (
df['Date'].dt.minute == 25)].sort_values(by='Date', ascending=False)
df_35 = df[(df['Date'].dt.hour == 16) & (
df['Date'].dt.minute == 35)].sort_values(by='Date', ascending=False)
# Concat all results for each time into this sheet.
df_35 = df_35[['Date', 'CET', 'NA', 'UTC', 'Name', 'Open', 'Open Diff', 'High', 'High Diff', 'Low', 'Low Diff',
'Close', 'Close Diff', 'BOLLBU', 'BOLLBM', 'BOLLBL', 'VWAP', 'VWAPSD1U', 'VWAPSD1L', 'VWAPSD2U',
'VWAPSD2L', 'ATR', 'ATRMA']]
df_diff = df_diff.sort_values(by='Date', ascending=False)
df_diff = df_diff[['Date', 'CET', 'NA', 'UTC', 'Name', 'Open', 'Open Diff', 'High', 'High Diff', 'Low', 'Low Diff',
'Close', 'Close Diff', 'BOLLBU', 'BOLLBM', 'BOLLBL', 'VWAP', 'VWAPSD1U', 'VWAPSD1L', 'VWAPSD2U',
'VWAPSD2L', 'ATR', 'ATRMA']]
writer = pd.ExcelWriter(
f'{file.split(".")[0]}.xlsx', engine='openpyxl')
df_diff.to_excel(writer, sheet_name='df_diff', index=False, startrow=0)
df_35.to_excel(writer, sheet_name='Sheet_35min', index=False)
dataframes = [df_1625, df_1620, df_1615, df_1610, df_1605, df_1600, df_1545,
df_1530, df_1500, df_1445, df_1430, df_1400, df_1330, df_1300, df_1230, df_0800]
for i, df in enumerate(dataframes):
df.to_excel(writer, sheet_name=f"df_{i}", index=False)
writer.save()
Essentially, the calculations under the for loop and for df_35 are not coming out properly correctly? How am I doing the operations wrong? The date is datetime but I am accessing column in that specific time value so I don't understand why it doesn't work. I tried various ways here are a few calculation methods I tried that were still wrong,
Neither of these work
df_diff_1635_1625 = df_1635['Close'] - df_1625['Close']
df_diff_1635_1620 = df_1635['Close'].subtract(df_1620['Close'])
all my columns are mostly float64 including close ones except date which is datetime. i check and print the calculation i get nan values so its clearly not processing it.
`
I'm making a script that, based on input variables from an excel file (script.xlsm), connects to the database, processes the data, and at the output it should add the processed data to a new sheet in the script.xlsm file. Code execution result error:
import pandas as pd
from openpyxl import load_workbook
def matrixf():
ex = pd.read_excel('C:\\Users\\admin\\Documents\\Project Python\\script.xlsm', sheet_name='Sheet1')
file_path = ex['Unnamed: 2'][1]
bank = ex['Unnamed: 2'][2]
obligations_type = ex['Unnamed: 2'][3]
date1='2020-01-01'
date2='2019-01-01'
df = pd.read_csv(file_path, sep=';', encoding = 'cp1251')
df.columns = ['id', 'date', 'delay_day', 'oblig_type', 'bank']
df.date = pd.to_datetime(df.date, format='%d/%m/%Y')
df = df[df.bank == bank]
df = df[df.oblig_type==obligations_type]
def delay_category(val):
if val == 0:
return 'Без просрочки'
elif 0 < val <= 30:
return '0 - 30 days'
elif 30 < val <= 60:
return '31-60 days'
elif 60 < val <= 90:
return '61-90 days'
else:
return '91+ days'
df['delay_cat'] = df['delay_day'].apply(delay_category)
df = df.drop(columns=['oblig_type', 'bank', 'delay_day'])
df1 = df[df['date'] == date1]
df2 = df[df['date'] == date2]
suff1 = '_'+date1
suff2 = '_'+date2
df_final = df1.merge(df2, on='id', how='outer', suffixes=(suff1, suff2))
matrix = df_final.pivot_table(index=['date'+suff1, 'delay_cat'+suff1],
columns=['date'+suff2, 'delay_cat'+suff2],
values='id',
aggfunc='count')
book = load_workbook('script.xlsm')
writer = pd.ExcelWriter('script.xlsm', mode='a', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
matrix.to_excel(writer, "matrix")
writer.save()
[Errno 13] Permission denied: 'script.xlsm'
def auto_program(ticker, file_name, your_name):
ticker_df = ticker.upper()
company_name = quandl.get_table('SHARADAR/TICKERS', ticker=ticker, table='SF1')['name'][0]
today = datetime.today().strftime('%Y-%m-%d')
header = {'label': ['Company Name', 'Ticker Symbol', 'Created By', 'Last Retrieved'],
'value': [company_name, ticker_df, your_name, today]}
header_df = pd.DataFrame(header)
initial_quarterly = quandl.get_table('SHARADAR/SF1',dimension='ARQ', ticker=ticker)
initial_quarterly['calendardate'] = pd.to_datetime(initial_quarterly['calendardate'])
ordered_quarterly = initial_quarterly.sort_values(by='calendardate', ascending = True)
initial_annual = quandl.get_table('SHARADAR/SF1',dimension='ARY', ticker=ticker)
initial_annual['calendardate'] = pd.to_datetime(initial_annual['calendardate'])
ordered_annual = initial_annual.sort_values(by='calendardate', ascending = True)
fixed_quarterly = ordered_quarterly.drop(['assetsavg'], axis=1)\
.swapaxes('index', 'columns', copy=True)
idx = [2] + [i for i in range(len(fixed_quarterly)) if i != 2]
fixed_annual = ordered_annual.drop(['assetsavg'], axis=1)\
.swapaxes('index', 'columns', copy=True)
idx = [2] + [i for i in range(len(fixed_annual)) if i != 2]
annual_pd = fixed_annual.iloc[idx]
quarterly_pd = fixed_quarterly.iloc[idx]
# '/Users/user/Downloads/' is a specific local path to file, will need to change for others
book = load_workbook('/Users/user/Downloads/' + file_name+ '.xlsx')
writer = pd.ExcelWriter(file_name+'.xlsx', engine='openpyxl')
writer.book = book
header_df.to_excel(writer,sheet_name='Header', index = False,header= False)
annual_pd.to_excel(writer,sheet_name='Annual', index = True,header= False)
quarterly_pd.to_excel(writer,sheet_name='Quarterly', index = True,header= False)
writer.save()
I would like to be able to run this program more than once and have a new ticker symbol in the input, and have the header, annual, and quarterly sheets simply be over written with new data. How can this be done?
So I'm trying to plot Heiken Ashi candles, and then I want to plot them on graph.
My code so far:
def heikin_ashi():
historical_data = client.get_historical_klines(symbol=SYMBOL, interval=TIME_PERIOD, start_str="15 days ago UTC", klines_type=HistoricalKlinesType.FUTURES)
hist_df = pd.DataFrame(historical_data)
hist_df.columns = ['Open Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close Time', 'Quote Asset Volume',
'Number of Trades', 'TB Base Volume', 'TB Quote Volume', 'Ignore']
hist_df['Open Time'] = pd.to_datetime(hist_df['Open Time']/1000, unit='s')
hist_df['Close Time'] = pd.to_datetime(hist_df['Close Time']/1000, unit='s')
df_HA = hist_df
df_HA['Close'] = (hist_df['Open'] + hist_df['High'] + hist_df['Low'] + hist_df['Close']) / 4
# idx = df_HA.index.name
# df_HA.reset_index(inplace=True)
for i in range(0, len(hist_df)):
if i == 0:
df_HA['Open'][i] = ((hist_df['Open'][i] + hist_df['Close'][i]) / 2)
else:
df_HA['Open'][i] = ((hist_df['Open'][i - 1] + hist_df['Close'][i - 1]) / 2)
# if idx:
# df_HA.set_index(idx, inplace=True)
df_HA['High'] = hist_df[['Open', 'Close', 'High']].max(axis=1)
df_HA['Low'] = hist_df[['Open', 'Close', 'Low']].min(axis=1)
print(df_HA)
Error:
result[mask] = op(xrav[mask], y)
TypeError: unsupported operand type(s) for /: 'str' and 'int'
Also I came across this:
import pandas as pd
def heikin_ashi(df):
heikin_ashi_df = pd.DataFrame(index=df.index.values, columns=['open', 'high', 'low', 'close'])
heikin_ashi_df['close'] = (df['open'] + df['high'] + df['low'] + df['close']) / 4
for i in range(len(df)):
if i == 0:
heikin_ashi_df.iat[0, 0] = df['open'].iloc[0]
else:
heikin_ashi_df.iat[i, 0] = (heikin_ashi_df.iat[i-1, 0] + heikin_ashi_df.iat[i-1, 3]) / 2
heikin_ashi_df['high'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['high']).max(axis=1)
heikin_ashi_df['low'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['low']).min(axis=1)
return heikin_ashi_df
How do I use the above code with my data? I'm a novice, so I'm confused. I'd appreciate it if someone could provide me with a proper way to do this.
Link to the source: https://github.com/emreturan/heikin-ashi/blob/master/heikin_ashi.py
I need to plot this on a graph too. Thanks.
I will use the 'heikin_ashi' code to answer the example of using mplfinance, a popular finance library, for the graph. There are many other libraries available for visualizing investments, so we will use this as a basic form for data acquisition and visualization. A sample of mplfinance can be found here for reference.
import yfinance as yf
import pandas as pd
import mplfinance as mpf
data = yf.download("AAPL", start="2021-07-01", end="2022-01-01", progress=False)
data.columns = ['open', 'high', 'low', 'close', 'adj close', 'volume']
def heikin_ashi(df):
heikin_ashi_df = df.copy()
#heikin_ashi_df = pd.DataFrame(index=df.index.values, columns=['open', 'high', 'low', 'close'])
heikin_ashi_df['close'] = (df['open'] + df['high'] + df['low'] + df['close']) / 4
for i in range(len(df)):
if i == 0:
heikin_ashi_df.iat[0, 0] = df['open'].iloc[0]
else:
heikin_ashi_df.iat[i, 0] = (heikin_ashi_df.iat[i-1, 0] + heikin_ashi_df.iat[i-1, 3]) / 2
heikin_ashi_df['high'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['high']).max(axis=1)
heikin_ashi_df['low'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['low']).min(axis=1)
return heikin_ashi_df
df_ha = heikin_ashi(data)
# mpf plotting
mpf.plot(df_ha, type='candle', figratio=(8,4), title='APPL', style='yahoo')
I am trying to split up a json file from alpha-vantages api into separate files depending on the date. I'm also trying to reformat the file to have blank values in the gaps where dates are missing. The following code is what I have come up with but it gives me the TypeError: 'list' object is not callable". I'm fairly new to python and pandas so I'm sure there is a better way to go about this.
import requests
import pandas as pd
from datetime import datetime, timedelta
from dateutil import parser
import numpy as np
from pandas import DataFrame
import json
symbol = "MSFT"
symbol_list = symbol.split(",")
def num_el(list):
count = 0
for element in list:
count += 1
return count
def csv_make(sy, dar, dat):
csv_file = open(f"{sy}_1min_{dar}.csv", "w", newline="")
csv_file.write(dat)
csv_file.close()
i = 0
x = -1
n = num_el(symbol_list)
while i < n:
namesym = symbol_list[x]
ticker = namesym
api_key = 'APIKEYHERE'
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={ticker}&outputsize=full&interval=1min&apikey={api_key}'
data = requests.get(url)
dsf = data.json()
daf = pd.DataFrame(dsf['Time Series (1min)'])
dxf: DataFrame = daf.T
dxf.index.name = 'time'
dxf.reset_index(inplace=True)
dxf['time'] = pd.to_datetime(dxf['time'])
dxf['minute'] = dxf['time'].dt.time
dxf['day'] = dxf['time'].dt.day
dxf['date'] = dxf['time'].dt.date
agg = dxf.groupby([dxf['day']])
length1 = dxf.groupby([dxf['day']]).size()
length = pd.DataFrame(length1)
length.index.name = 'day'
length.reset_index(inplace=True)
length_sum = length[0].sum()
v = 0
d = length_sum
b = len(length)
x2 = length_sum
while v < b:
a = length[0][v]
x2 -= length[0][v]
xd = agg.get_group(length['day'][v])
date = xd['date'][x2]
max_dt = parser.parse(str(max(xd['minute'])))
min_dt = parser.parse(str(min(xd['minute'])))
dt_range = []
while min_dt <= max_dt:
dt_range.append(min_dt.strftime("%H:%M:%S"))
min_dt += timedelta(seconds=60)
complete_df = pd.DataFrame({'minute': dt_range})
xy = complete_df.astype('str')
yx = xd.astype('str')
dasf = xy.merge(yx, how='left', on='minute')
dasf['ev'] = np.where(dasf['1. open'].notnull(), 'False', 'True')
time = []
open = []
high = []
low = []
close = []
volume = []
empty_value = []
for ib in range(len(dasf)):
time.append(dasf['minute'][ib])
open.append(dasf['1. open'][ib])
high.append(dasf['2. high'][ib])
low.append(dasf['3. low'][ib])
close.append(dasf['4. close'][ib])
volume.append(dasf['5. volume'][ib])
empty_value.append(dasf['ev'][ib])
time_df = pd.DataFrame(time).rename(columns={0: 'Time'})
open_df = pd.DataFrame(open).rename(columns={0: 'Open'})
high_df = pd.DataFrame(high).rename(columns={0: 'High'})
low_df = pd.DataFrame(low).rename(columns={0: 'Low'})
close_df = pd.DataFrame(close).rename(columns={0: 'Close'})
volume_df = pd.DataFrame(volume).rename(columns={0: 'Volume'})
empty_value_df = pd.DataFrame(empty_value).rename(columns={0: 'Empty Value'})
frames = [time_df, open_df, high_df, low_df, close_df, volume_df, empty_value_df]
df = pd.concat(frames, axis=1, join='inner')
df = df.set_index('Time')
ad = df.to_csv()
csv_make(namesym, date, ad)
v += 1
i += 1