Related
import yfinance as yf
import pandas as pd
dataF = yf.download("EURUSD=X", start="2022-12-22", end="2022-12-24", interval='60m')
print(dataF.iloc[:])
def signal_generator(df):
open = df.Open.iloc[-1]
close = df.Close.iloc[1]
one_open = df.Open.iloc[-2]
one_close = df.Close.iloc[-2]
# Bearish Pattern
if (open<=close and
one_open>one_close ):
return 1
# Bullish Pattern
elif (open>=close and
one_open<one_close
):
return 2
# No clear pattern
else:
return 0
signal = []
signal.append(0)
for i in range(1,len(dataF)):
df = dataF[i-1:i+1]
signal.append(signal_generator(df))
#signal_generator(data)
dataF["signal"] = signal
print(dataF.signal.value_counts())
in the first example seems like is working grabbing 2 candle stick but when i grab 4 as next code example it shows me an error .........
import yfinance as yf
import pandas as pd
dataF = yf.download("EURUSD=X", start="2022-12-22", end="2022-12-24", interval='60m')
print(dataF.iloc[:])
def signal_generator(df):
open = df.Open.iloc[-1]
close = df.Close.iloc[1]
one_open = df.Open.iloc[-2]
one_close = df.Close.iloc[-2]
two_open = df.Close.iloc[-3]
two_close = df.Close.iloc[-3]
three_open = df.Close.iloc[-3]
three_close = df.Close.iloc[-3]
# Bearish Pattern
if (open<=close and
one_open>one_close and
two_open<two_close and
three_open>=three_close):
return 1
# Bullish Pattern
elif (open>=close and
one_open<one_close and
two_open>two_close and
three_open<=three_close
):
return 2
# No clear pattern
else:
return 0
signal = []
signal.append(0)
for i in range(1,len(dataF)):
df = dataF[i-1:i+1]
signal.append(signal_generator(df))
#signal_generator(data)
dataF["signal"] = signal
print(dataF.signal.value_counts())
I believe the problem is in this line.......
df = dataF[i-1:i+1]
Try this and feed back.
from utils import *
import time
import numpy as np
import pandas as pd
import datetime
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")
from pandas_datareader import data as wb
tickers = ['SBUX']
start = '2022-09-01'
end = '2022-12-13'
price_data = []
for ticker in tickers:
data = yf.download(ticker, start, end)
data = data.reset_index()
prices = data.loc[:,['Date','Adj Close']]
price_data.append(prices.assign(ticker=ticker)[['ticker', 'Date', 'Adj Close']])
df = pd.concat(price_data)
df.dtypes
df.head()
df.shape
# Technical Indicators
data = df
num_training_days = int(data.shape[0]*.7)
print('Number of training days: {}. Number of test days: {}.'.format(num_training_days, data.shape[0]-num_training_days))
data['ma7'] = data['Adj Close'].rolling(window=7).mean()
data['ma21'] = data['Adj Close'].rolling(window=21).mean()
# Create exponential weighted moving average
data['26ema'] = data['Adj Close'].ewm(span=26).mean()
data['12ema'] = data['Adj Close'].ewm(span=12).mean()
data['MACD'] = (data['12ema']-data['26ema'])
# Create Bollinger Bands
data['20sd'] = data['Adj Close'].rolling(window=20).std()
data['upper_band'] = data['ma21'] + (data['20sd']*2)
data['lower_band'] = data['ma21'] - (data['20sd']*2)
# Create Exponential moving average
data['ema'] = data['Adj Close'].ewm(com=0.5).mean()
# Create Momentum
data['momentum'] = data['Adj Close']-1
dataset_TI_df = data
dataset = data
#def plot_technical_indicators(dataset, last_days):
last_days = 250
plt.figure(figsize=(16, 10), dpi=100)
shape_0 = dataset.shape[0]
xmacd_ = shape_0-last_days
dataset = dataset.iloc[-last_days:, :]
x_ = range(3, dataset.shape[0])
x_ =list(dataset.index)
# Plot first subplot
plt.subplot(2, 1, 1)
plt.plot(dataset['ma7'],label='MA 7', color='g',linestyle='--')
plt.plot(dataset['Adj Close'],label='Closing Price', color='b')
plt.plot(dataset['ma21'],label='MA 21', color='r',linestyle='--')
plt.plot(dataset['upper_band'],label='Upper Band', color='c')
plt.plot(dataset['lower_band'],label='Lower Band', color='c')
plt.fill_between(x_, dataset['lower_band'], dataset['upper_band'], alpha=0.35)
plt.title('Technical indicators for Starbucks - last {} days.'.format(last_days))
plt.legend()
# Plot second subplot
plt.subplot(2, 1, 2)
plt.title('MACD')
plt.plot(dataset['MACD'],label='MACD', linestyle='-.')
plt.hlines(15, xmacd_, shape_0, colors='g', linestyles='--')
plt.hlines(-15, xmacd_, shape_0, colors='g', linestyles='--')
# plt.plot(dataset['log_momentum'],label='Momentum', color='b',linestyle='-')
plt.legend()
plt.show()
# Trade Signals
signalBuy = []
signalSell = []
position = False
for i in range(len(data)):
if data['ma7'][i] > data['ma21'][i]:
if position == False :
signalBuy.append(data['Adj Close'][i])
signalSell.append(np.nan)
position = True
else:
signalBuy.append(np.nan)
signalSell.append(np.nan)
elif data['ma7'][i] < data['ma21'][i]:
if position == True:
signalBuy.append(np.nan)
signalSell.append(data['Adj Close'][i])
position = False
else:
signalBuy.append(np.nan)
signalSell.append(np.nan)
else:
signalBuy.append(np.nan)
signalSell.append(np.nan)
data['Buy_Signal_price'] = signalBuy
data['Sell_Signal_price'] = signalSell
data
# Plotting Buy and Sell Points
fig, ax = plt.subplots(figsize=(14,8))
ax.plot(data['Adj Close'] , label = 'stock' ,linewidth=0.5, color='blue', alpha = 0.9)
ax.plot(data['ma7'], label = 'ma7', alpha = 0.85)
ax.plot(data['ma21'], label = 'ma21' , alpha = 0.85)
ax.scatter(data.index , data['Buy_Signal_price'] , label = 'Buy' , marker = '^', color = 'green',alpha =1 )
ax.scatter(data.index , data['Sell_Signal_price'] , label = 'Sell' , marker = 'v', color = 'red',alpha =1 )
ax.set_title(" Price History with buy and sell signals",fontsize=10, backgroundcolor='blue', color='white')
ax.set_xlabel(f'{startdate} - {end_date}' ,fontsize=18)
ax.set_ylabel('Close Price INR (₨)' , fontsize=18)
legend = ax.legend()
ax.grid()
plt.tight_layout()
plt.show()
moving_average_window = 30
data = df
# readjusting data Frame
data = data[["Adj Close"]]
# creating ** moving average
data["ma20"] = data["Adj Close"].rolling(window=moving_average_window).mean()
#calculating daily returns
data["daily returns"] = np.log(data["Adj Close"] / data["Adj Close"].shift(1))
data["position"] = [0] * len(data)
data.reset_index(inplace=True)
data = data.drop(["index"], axis=1)
pos_exit = False
pos = "N"
std = round(data["daily returns"].std(),4)
mean = round(data["daily returns"].mean(),4)
print("Std on daily returns :", std)
print("Mean on daily returns :", mean,"\n")
print(data.head(7))
# Event Driven Testing
for i in range(1, len(data)):
# Signal to go short and reset position
if pos_exit:
pos_exit = False
pos = "N"
continue
# going long, if return goes beyond lower bound
# (1 standard deviation). The asumption here is
# that the stock will revert back to its mean value
if data["Adj Close"][i] < ((1 - std) * data["ma20"][i]):
data.at[i, "position"] = 1
pos = "L"
# scenario if return in between lower and upper bounds
if pos == "L":
data.at[i, "position"] = 1
# updating strategy returns
data["strategy returns"] = data["daily returns"] * data["position"]
# exiting if the strategy return drops by 3%
if data["strategy returns"][i] < -0.03:
data.at[i, "position"] = 0
pos_exit = True
data.tail(10)
# taking positions after one day of signals being generated
data["position"].shift(1)
print("Buy and hold returns =",round(list(data["daily returns"].cumsum())[-1],4)*100,"%")
print("Strategy returns =", round(list(data["strategy returns"].cumsum())[-1],4)*100,"%")
I am trying to create a time series of the sea surface temperature data over the whole year for six consecutive years and plot them using the subplots. I want to mark the x-ticks as the months. I tried using the matplotlib.dates option. However the years doesn't change on the subsequent subplots.
import numpy as np
import sys
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import set_epoch
arrays14 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2014.ascii')] #loading the data
arrays15 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2015.ascii')]
arrays16 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2016.ascii')]
arrays17 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2017.ascii')]
arrays18 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2018.ascii')]
arrays19 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2019.ascii')]
arrays14 = np.delete(arrays14,[0,1,2,3,4],0) #deleting the headers
arrays15 = np.delete(arrays15,[0,1,2,3,4],0)
arrays16 = np.delete(arrays16,[0,1,2,3,4],0)
arrays17 = np.delete(arrays17,[0,1,2,3,4],0)
arrays18 = np.delete(arrays18,[0,1,2,3,4],0)
arrays19 = np.delete(arrays19,[0,1,2,3,4,215,216,217],0)
sst14 = []
for i in arrays14:
d1 = i[0]
d2 = i[2]
sst1 = i[2]
sst14.append(sst1)
datetime1.append(d1)
datetime2.append(d2)
sst14 = np.array(sst14,dtype = np.float64)
sst_14_m = np.ma.masked_equal(sst14,-9.99) #masking the fillvalues
sst15 = []
for i in arrays15:
sst2 = i[2]
sst15.append(sst2)
sst15 = np.array(sst15,dtype = np.float64)
sst_15_m = np.ma.masked_equal(sst15,-9.99)
sst16 = []
for i in arrays16:
sst3 = i[2]
sst16.append(sst3)
sst16 = np.array(sst16,dtype = np.float64)
sst_16_m = np.ma.masked_equal(sst16,-9.99)
sst17 = []
for i in arrays17:
sst4 = i[2]
sst17.append(sst4)
sst17 = np.array(sst17,dtype = np.float64)
sst_17_m = np.ma.masked_equal(sst17,-9.99)
sst18 = []
for i in arrays18:
sst5 = i[2]
sst18.append(sst5)
sst18 = np.array(sst18,dtype = np.float64)
sst_18_m = np.ma.masked_equal(sst18,-9.99)
np.shape(sst18)
sst19 = []
for i in arrays19:
sst6 = i[2]
sst19.append(sst6)
sst19 = np.array(sst19,dtype = np.float64)
sst19_u = np.zeros(len(sst14), dtype = np.float64)
sst19_fill = np.full([118],-9.99,dtype=np.float64)
sst19_u[0:211] = sst19[0:211]
sst19_u[211:329] = sst19_fill
sst19_u[329:365] = sst19[211:247]
sst_19_m = np.ma.masked_equal(sst19_u,-9.99)
##########Plotting
new_epoch = '2016-01-01T00:00:00'
mdates.set_epoch(new_epoch)
fig, axs=plt.subplots(3, 2, figsize=(12, 8),constrained_layout=True)
axs = axs.ravel()
axs[0].plot(sst_14_m)
axs[1].plot(sst_15_m)
axs[2].plot(sst_16_m)
axs[3].plot(sst_17_m)
axs[4].plot(sst_18_m)
axs[5].plot(sst_19_m)
for i in range(6):
axs[i].xaxis.set_major_locator(mdates.MonthLocator())
axs[i].xaxis.set_minor_locator(mdates.MonthLocator())
axs[i].xaxis.set_major_formatter(mdates.ConciseDateFormatter(axs[i].xaxis.get_major_locator()))
#axs[i].grid(True)
axs[i].set_ylim(bottom=25, top=32)
axs[i].set_ylabel('SST')
plt.show()
I got an output like the following:
I would like to change the xlabels as 2016,2017,2018,2019 etc.
The data can be found in the folder - https://drive.google.com/drive/folders/1bETa7PjWKIUNS13xg3RgIMa5L7bpYn5W?usp=sharing
I love NumPy as much as the next person but this is a good use case for pandas. Pandas has the advantage of being able to label rows with more meaningful things than just positional index. For example, you can use dates. This is very convenient.
First, load your data:
import pandas as pd
import glob
dfs = []
for fname in glob.glob('./sst15n90e_dy_*.ascii'):
df = pd.read_csv(fname, skiprows=4, delimiter='\s+')
dfs.append(df)
df = pd.concat(dfs, axis=0, ignore_index=True)
Now do df.head() and you'll see this:
Let's convert that date to a 'datetime' object, and use it as the index instead of the default row numbers. We'll also deal with those -9.99 values.
import numpy as np
df['ds'] = pd.to_datetime(df['YYYYMMDD'], format='%Y%m%d')
df = df.set_index('ds')
df = df.sort_index()
df.loc[df['SST'] == -9.99, 'SST'] = np.nan
Now you have a dataset you can do all sorts of magic with, like df.resample('Y')['SST'].sum() shows you the annual sum of SST.
Anyway, now we can make plots in various ways. You can plot DataFrames directly, eg check out df.groupby(df.index.year)['SST'].plot(). Or you can use seaborn (check out the gallery!), which understands DataFrames. Or you can construct a plot with matplotlib in the usual way. For instance:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(12, 8), sharey=True)
for ax, (year, group) in zip(axs.flat, df.groupby(df.index.year)):
ax.plot(group['SST'])
ax.set_title(year)
ax.grid(c='k', alpha=0.15)
date_form = DateFormatter("%b")
ax.xaxis.set_major_formatter(date_form)
plt.tight_layout()
This is close to what you wanted, but with a more useful data structure and quite a bit less code:
I did some modifications and got the results as desired:
from pickletools import float8
import os
import numpy as np
import sys
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import set_epoch
from datetime import datetime
# for files in os.listdir('/home/swadhin/project/sst/daily'):
# path = (files)
# print(path)
# arrays = [np.asarray(list(map(str, line.split()))) for line in open(files)]
arrays14 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2014.ascii')] #loading the data
arrays15 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2015.ascii')]
arrays16 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2016.ascii')]
arrays17 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2017.ascii')]
arrays18 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2018.ascii')]
arrays08 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2008.ascii')]
arrays14 = np.delete(arrays14,[0,1,2,3,4],0) #deleting the headers
arrays15 = np.delete(arrays15,[0,1,2,3,4],0)
arrays16 = np.delete(arrays16,[0,1,2,3,4],0)
arrays17 = np.delete(arrays17,[0,1,2,3,4],0)
arrays18 = np.delete(arrays18,[0,1,2,3,4],0)
arrays08 = np.delete(arrays08,[0,1,2,3,4,215,216,217],0)
sst14 = []
datetime1 = [] #year, month,date
#datetime2 = [] #hour,min,second
for i in arrays14:
d1 = i[0]
#d2 = i[2]
sst1 = i[2]
sst14.append(sst1)
datetime1.append(d1)
#datetime2.append(d2)
#reading the data
# datetime1 = np.array(datetime1,dtype = np.float64)
# datetime2 = np.array(datetime2,dtype = np.float64)
sst14 = np.array(sst14,dtype = np.float64)
sst_14_m = np.ma.masked_equal(sst14,-9.99) #masking the fillvalues
sst15 = []
datetime2 = []
for i in arrays15:
d2 = i[0]
sst2 = i[2]
sst15.append(sst2)
datetime2.append(d2)
sst15 = np.array(sst15,dtype = np.float64)
sst_15_m = np.ma.masked_equal(sst15,-9.99)
sst16 = []
datetime3 = []
for i in arrays16:
d3 = i[0]
sst3 = i[2]
sst16.append(sst3)
datetime3.append(d3)
sst16 = np.array(sst16,dtype = np.float64)
sst_16_m = np.ma.masked_equal(sst16,-9.99)
sst17 = []
datetime4 = []
for i in arrays17:
d4 = i[0]
sst4 = i[2]
sst17.append(sst4)
datetime4.append(d4)
sst17 = np.array(sst17,dtype = np.float64)
sst_17_m = np.ma.masked_equal(sst17,-9.99)
sst18 = []
datetime5 = []
for i in arrays18:
d5 = i[0]
sst5 = i[2]
sst18.append(sst5)
datetime5.append(d5)
sst18 = np.array(sst18,dtype = np.float64)
sst_18_m = np.ma.masked_equal(sst18,-9.99)
sst08 = []
datetime6 = []
for i in arrays08:
d6 = i[0]
sst6 = i[2]
sst08.append(sst6)
datetime6.append(d6)
sst08 = np.array(sst08,dtype = np.float64)
# sst08_u = np.zeros(len(sst14), dtype = np.float64)
# sst08_fill = np.full([118],-9.99,dtype=np.float64)
# sst08_u[0:211] = sst08[0:211]
# sst08_u[211:329] = sst08_fill
# sst08_u[329:365] = sst08[211:247]
sst_08_m = np.ma.masked_equal(sst08,-9.99)
dt = np.asarray([datetime1,datetime2,datetime3,datetime4,datetime5,datetime6])
dt_m = []
for i in dt:
dt_m1= []
for j in i:
datetime_object = datetime.strptime(j,'%Y%m%d')
dt_m1.append(datetime_object)
dt_m.append(dt_m1)
##########Plotting
# new_epoch = '2016-01-01T00:00:00'
# mdates.set_epoch(new_epoch)
fig, axs=plt.subplots(3, 2, figsize=(12, 8),constrained_layout=True)
axs = axs.ravel()
axs[0].plot_date(dt_m[5],sst_08_m,'-')
axs[1].plot_date(dt_m[0],sst_14_m,'-')
axs[2].plot_date(dt_m[1],sst_15_m,'-')
axs[3].plot_date(dt_m[2],sst_16_m,'-')
axs[4].plot_date(dt_m[3],sst_17_m,'-')
axs[5].plot_date(dt_m[4],sst_18_m,'-')
for i in range(6):
axs[i].xaxis.set_major_locator(mdates.MonthLocator())
axs[i].xaxis.set_minor_locator(mdates.MonthLocator())
axs[i].xaxis.set_major_formatter(mdates.ConciseDateFormatter(axs[i].xaxis.get_major_locator()))
axs[i].grid(True)
axs[i].set_ylim(bottom=25, top=32)
axs[i].set_ylabel('SST')
plt.show()
And it solved the issue.
I am trying to split up a json file from alpha-vantages api into separate files depending on the date. I'm also trying to reformat the file to have blank values in the gaps where dates are missing. The following code is what I have come up with but it gives me the TypeError: 'list' object is not callable". I'm fairly new to python and pandas so I'm sure there is a better way to go about this.
import requests
import pandas as pd
from datetime import datetime, timedelta
from dateutil import parser
import numpy as np
from pandas import DataFrame
import json
symbol = "MSFT"
symbol_list = symbol.split(",")
def num_el(list):
count = 0
for element in list:
count += 1
return count
def csv_make(sy, dar, dat):
csv_file = open(f"{sy}_1min_{dar}.csv", "w", newline="")
csv_file.write(dat)
csv_file.close()
i = 0
x = -1
n = num_el(symbol_list)
while i < n:
namesym = symbol_list[x]
ticker = namesym
api_key = 'APIKEYHERE'
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={ticker}&outputsize=full&interval=1min&apikey={api_key}'
data = requests.get(url)
dsf = data.json()
daf = pd.DataFrame(dsf['Time Series (1min)'])
dxf: DataFrame = daf.T
dxf.index.name = 'time'
dxf.reset_index(inplace=True)
dxf['time'] = pd.to_datetime(dxf['time'])
dxf['minute'] = dxf['time'].dt.time
dxf['day'] = dxf['time'].dt.day
dxf['date'] = dxf['time'].dt.date
agg = dxf.groupby([dxf['day']])
length1 = dxf.groupby([dxf['day']]).size()
length = pd.DataFrame(length1)
length.index.name = 'day'
length.reset_index(inplace=True)
length_sum = length[0].sum()
v = 0
d = length_sum
b = len(length)
x2 = length_sum
while v < b:
a = length[0][v]
x2 -= length[0][v]
xd = agg.get_group(length['day'][v])
date = xd['date'][x2]
max_dt = parser.parse(str(max(xd['minute'])))
min_dt = parser.parse(str(min(xd['minute'])))
dt_range = []
while min_dt <= max_dt:
dt_range.append(min_dt.strftime("%H:%M:%S"))
min_dt += timedelta(seconds=60)
complete_df = pd.DataFrame({'minute': dt_range})
xy = complete_df.astype('str')
yx = xd.astype('str')
dasf = xy.merge(yx, how='left', on='minute')
dasf['ev'] = np.where(dasf['1. open'].notnull(), 'False', 'True')
time = []
open = []
high = []
low = []
close = []
volume = []
empty_value = []
for ib in range(len(dasf)):
time.append(dasf['minute'][ib])
open.append(dasf['1. open'][ib])
high.append(dasf['2. high'][ib])
low.append(dasf['3. low'][ib])
close.append(dasf['4. close'][ib])
volume.append(dasf['5. volume'][ib])
empty_value.append(dasf['ev'][ib])
time_df = pd.DataFrame(time).rename(columns={0: 'Time'})
open_df = pd.DataFrame(open).rename(columns={0: 'Open'})
high_df = pd.DataFrame(high).rename(columns={0: 'High'})
low_df = pd.DataFrame(low).rename(columns={0: 'Low'})
close_df = pd.DataFrame(close).rename(columns={0: 'Close'})
volume_df = pd.DataFrame(volume).rename(columns={0: 'Volume'})
empty_value_df = pd.DataFrame(empty_value).rename(columns={0: 'Empty Value'})
frames = [time_df, open_df, high_df, low_df, close_df, volume_df, empty_value_df]
df = pd.concat(frames, axis=1, join='inner')
df = df.set_index('Time')
ad = df.to_csv()
csv_make(namesym, date, ad)
v += 1
i += 1
I am trying to load an excel file using openpyxl library but I kept getting a value error. I also created a new excel file and tried loading the file using pandas but I still get an exception error related cell "C7".
openpyxl
DestFile ="C:\\Users\\yaxee\\OneDrive\\Desktop\\NBET Extraction data\\December Data Extration 2020 NBET\\XYX.xlsx"
wb2 = xl.load_workbook(DestFile)
Error
ValueError: invalid literal for int() with base 10: '7.0'
pandas
df = pd.read_excel (r'C:\Users\yaxee\OneDrive\Desktop\NBET Extraction data\December Data Extration 2020 NBET\XYX.xlsx')
Error
Exception: cell name 'C7.0' but row number is '7'
I can post the full error script if needed.
here is the full script i'm working with:
import openpyxl as xl
ExtractionFile ="C:\\Users\\yaxee\\OneDrive\\Desktop\\NBET Extraction data\\August Data Extration 2020 NBET\\NOR04082020.xlsx"
wb1 = xl.load_workbook(ExtractionFile, data_only=True)
daily_broadcast = wb1.worksheets[0]
DestFile ="C:\\Users\\yaxee\\OneDrive\\Desktop\\NBET Extraction data\\August Data Extration 2020 NBET\\SampleOct.xlsx"
wb2 = xl.load_workbook(DestFile)
peak_gen = wb2.worksheets[0]
off_gen = wb2.worksheets[1]
energy_gen = wb2.worksheets[2]
energy_sent = wb2.worksheets[3]
instlld_cap = wb2.worksheets[4]
gen_cap = wb2.worksheets[5]
onBar_cap = wb2.worksheets[6]
gen_6am = wb2.worksheets[7]
unutilized = wb2.worksheets[8]
col_count = 6
step = 2
read_start_row = 73
write_start_row = 4
amount_of_rows = 54
#peak generation capability code
for row in range(5, 34):
a = daily_broadcast.cell(row = row, column = 25)
peak_gen.cell(row = row-1,column = col_count).value = a.value
wb2.save(str(DestFile))
#off generation capability code
for row in range(5, 34):
b = daily_broadcast.cell(row = row, column = 27)
off_gen.cell(row = row-1,column = col_count).value = b.value
wb2.save(str(DestFile))
#Energy generated code
for row in range(39, 68):
c = daily_broadcast.cell(row = row, column = 25)
energy_gen.cell(row = row-35,column = col_count).value = c.value
wb2.save(str(DestFile))
#Energy dispatched code
for row in range(39, 68):
d = daily_broadcast.cell(row = row, column = 27)
energy_sent.cell(row = row-35,column = col_count).value = d.value
wb2.save(str(DestFile))
#Installed Capacity code
for i in range(0, amount_of_rows, step):
e = daily_broadcast.cell(row = read_start_row + i, column = 13)
instlld_cap.cell(row = write_start_row+(i/step),column = col_count).value = e.value
wb2.save(str(DestFile))
#Generation Capablity code
for i in range(0, amount_of_rows, step):
f = daily_broadcast.cell(row = read_start_row + i, column = 15)
gen_cap.cell(row = write_start_row+(i/step),column = col_count).value = f.value
wb2.save(str(DestFile))
#On Bar Capablity code
for i in range(0, amount_of_rows, step):
g = daily_broadcast.cell(row = read_start_row + i, column = 19)
onBar_cap.cell(row = write_start_row+(i/step),column = col_count).value = g.value
wb2.save(str(DestFile))
#Generation at 6am code
for i in range(0, amount_of_rows, step):
g = daily_broadcast.cell(row = read_start_row + i, column = 21)
gen_6am.cell(row = write_start_row+(i/step),column = col_count).value = g.value
wb2.save(str(DestFile))
[
This happens when a worksheet is created with the row or the column designation with a float point number. For example, worksheet.cell(row=1.0, column=1).value = 'some value'. While Excel reads the file without any issues, having openpyxl open the file causes the error. A simple remedy is to always use an integer for the row and the column designations.
I am trying to write the results from the loop into an Excel file (keys = column names) and (values = rows data). This code generates the file for me, but it only prints one row of data in the file. How can i make it append the other rows to the file?
import pandas as pd
p = (('BusinessName', 'CustomerNameToSearch'), ('PageSize', '2'), ('CountryCode', 'CA'))
prepare_link = requests.get('https://api.myapiloopuplink?', auth=BearerAuth('PMay4TY5K577b76154i97yC9DlbPytqd'), params=p)
test = requests.get(prepare_link.url, auth=BearerAuth('PMay4TY5K577b76154i97yC9DlbPytqd'), params=p)
data = json.loads(test.text)
CustomerIdList = []
for customer in data['Data']:
BusinessID = customer['BusinessId']
BusinessName = customer['BusinessName']
CustomerIdList.append(str(customer['BusinessId']))
for i in CustomerIdList:
links2 = ("https://api.myapiloopuplink/"+i+"/History?count=1")
test2 = requests.get(links2, auth=BearerAuth('PMay4TY5K577b76154i97yC9DlbPytqd'))
data2 = json.loads(test2.text)
start_row = 0
for extradetails in data2['Data']:
myDict = {}
myDict["BusinessId"] = customer['BusinessId']
myDict["BusinessName"] = customer['BusinessName']
myDict["Year"] = extradetails['Year']
myDict["Rate"] = extradetails['Rate']
print(myDict)
k = list(myDict.keys())
v = list(myDict.values())
#print(k)
#print(v)
x = [myDict]
df = pd.DataFrame(x)
df.to_excel ('locationandnameoffile.xlsx', sheet_name = 'sheet1', index = False, startrow=start_row)
start_row = start_row + len(df) + 1
This is the output i currently get
This is the output i am trying to get
In the loop i get the right results when i print (it shows multiple rows)
print(myDict)
I think the problem is here:
for extradetails in data2['Data']:
myDict = {}
myDict["BusinessId"] = customer['BusinessId']
myDict["BusinessName"] = customer['BusinessName']
myDict["Year"] = extradetails['Year']
myDict["Rate"] = extradetails['Rate']
print(myDict)
k = list(myDict.keys())
v = list(myDict.values())
#print(k)
#print(v)
x = [myDict]
df = pd.DataFrame(x) #problem
df.to_excel ('locationandnameoffile.xlsx', sheet_name = 'sheet1', index = False, startrow=start_row)#problem
start_row = start_row + len(df) + 1
You are creating an excel file in every loop. How about create an excel file after the loop completes. like this:
datas=[]
for extradetails in data2['Data']:
myDict = {}
myDict["BusinessId"] = customer['BusinessId']
myDict["BusinessName"] = customer['BusinessName']
myDict["Year"] = extradetails['Year']
myDict["Rate"] = extradetails['Rate']
print(myDict)
k = list(myDict.keys())
v = list(myDict.values())
#print(k)
#print(v)
datas.append([myDict])
start_row = start_row + len(df) + 1
df = pd.DataFrame(datas)
df.to_excel ('locationandnameoffile.xlsx', sheet_name = 'sheet1', index = False, startrow=start_row)