When running this code, I am expecting a table to print from data_period, however pycharm is returning an empty index when it is run. My friend, using the same code on pycharm, gets the desired table. So we think it has to do with the files that pycharm is using/storing information in. Any other ideas?
import yfinance as yf
import pandas as pd
import numpy as np
from pandas_market_calendars import get_calendar
# Get the calendar for the NYSE
nyse = get_calendar('NYSE')
# Get the trading days for the NYSE between the start and end date
start_date = '2020-01-01'
end_date = '2023-01-26'
schedule = nyse.schedule(start_date, end_date)
trading_days = schedule.index
# Download the daily price action of the S&P 500
data = yf.download("SPY", start=start_date, end=end_date)
# Filter out days when the market is closed
data_period = data.loc[data.index.isin(trading_days)]
# Number of periods to calculate the standard deviation
averagingPeriod = 30
data_period['up'] = 100 * (data_period['High'].shift(1) - data_period['Open'].shift(1)) / data_period['Close'].shift(1)
data_period['down'] = 100 * np.abs(data_period['Open'].shift(1) - data_period['Low'].shift(1)) / data_period['Close'].shift(1)
# standard deviation
std_dev = data_period[['up','down']].rolling(window=averagingPeriod).std()
std_dev.rename(columns={'up':'up_std_dev','down':'down_std_dev'}, inplace=True)
# average
average = data_period[['up','down']].rolling(window=averagingPeriod).mean()
average.rename(columns={'up':'ave_up','down':'ave_down'}, inplace=True)
# H1 resistance level
data_period['H1'] = data_period['Open'] + (average['ave_up'] / 100) * data_period['Open']
# H2 resistance level
data_period['H2'] = data_period['Open'] + ((average['ave_up'] + std_dev['up_std_dev']) / 100) * data_period['Open']
# L1 support level
data_period['L1'] = data_period['Open'] - (average['ave_down'] / 100) * data_period['Open']
# L2 support level
data_period['L2'] = data_period['Open'] - ((average['ave_down'] + std_dev['down_std_dev']) / 100) * data_period['Open']
print(data_period)
I tried to get data_period to print out a full table, but I instead received an empty index.
Related
Goal is to pull stock tickers from Wikipedia, use yfinance to grab the historical closing prices, and perform the Relative Strength Index (RSI) calculation for each of the tickers listed in the S&P500. Once this is accomplished, the tickers will be grouped into a 'buy', 'sell', or 'donothing' category based on their respective calculated RSI values. However, I am receiving an Index Error that refers to [ if RSI[245]>=30 and RSI[245-10]<30: ] claiming 245 is out of bounds for axis 0 with size 126, with size 126 being the number of days of closing prices collected.
import yfinance as yf
import pandas as pd
# Read and print the stock tickers that make up S&P500
tickers = pd.read_html(
'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
print(tickers.head())
# Get the data for this tickers from yahoo finance
df = yf.download(tickers.Symbol.to_list(),'2021-9-24','2022-3-24', auto_adjust=True)['Close']
print(data.head())
#-------------------------------------------------------
df2 = df
#print(df2)
total_columns = df2.shape[1]
#print(total_columns)
print(df2)
#-------------------------------------------------------
buy = []
sell = []
donothing = []
for i in range(total_columns):
#company_name = header
df_RSI = df2.iloc[:,i]
#print(df_RSI)
df_RSI['diff']=df_RSI.diff(1)
#print(df_RSI['diff'])
# Calculate Avg. Gains/Losses
df_RSI['gain'] = df_RSI['diff'].clip(lower=0).round(2)
df_RSI['loss'] = df_RSI['diff'].clip(upper=0).abs().round(2)
#print(df_RSI['gain'])
window_length = 14
df_RSI['avg_gain'] = df_RSI['gain'].rolling(window=window_length,min_periods=window_length).mean()
#print(df_RSI['avg_gain'][:30]) #yay working!
df_RSI['avg_loss'] = df_RSI['loss'].rolling(window=window_length, min_periods=window_length).mean()
#print(df_RSI['avg_loss'][:30]) #yay working!
#print(df_RSI.name) #prints out the tickers wooooo
# Get WMS averages
# Average Gains
for k, row in enumerate(df_RSI['avg_gain'].iloc[window_length+1:]):
df_RSI['avg_gain'].iloc[k + window_length + 1] =\
(df_RSI['avg_gain'].iloc[k + window_length] *
(window_length - 1) +
df_RSI['gain'].iloc[k + window_length + 1])\
/ window_length
# Average Losses
for j, row in enumerate(df_RSI['avg_loss'].iloc[window_length+1:]):
df_RSI['avg_loss'].iloc[j + window_length + 1] =\
(df_RSI['avg_loss'].iloc[j + window_length] *
(window_length - 1) +
df_RSI['loss'].iloc[j + window_length + 1])\
/ window_length
# View initial results
#print(df_RSI[:,window_length-1:window_length+5])
# Calculate RS Values
df_RSI['rs'] = df_RSI['avg_gain'] / df_RSI['avg_loss']
#print(df_RSI['rs'][:30])
#print(df_RSI['rs'][30:]) #yay working!
# Calculate RSI
df_RSI['rsi'] = 100 - (100 / (1.0 + df_RSI['rs']))
# View Result RSI
RSI = (df_RSI['rsi'])
#print(RSI[200:])
#print(RSI[199])
if RSI[245]>=30 and RSI[245-10]<30:
buy.append(df_RSI.name)
elif RSI[240]<=70 and RSI[245-10]>70:
sell.append(df_RSI.name)
else:
donothing.append(df_RSI.name)
print(buy)
print(sell)
print(donothing)
I have made a few changes to your code so that it works (I received an error because you didn't set df_RSI to a dataframe initially, and I changed your if statements at the end - explained below code).
import yfinance as yf
import pandas as pd
# Read and print the stock tickers that make up S&P500
tickers = pd.read_html(
'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
print(tickers.head())
# Get the data for this tickers from yahoo finance
df = yf.download(tickers.Symbol.to_list(),'2021-9-24','2022-3-24', auto_adjust=True)['Close']
print(df.head())
#-------------------------------------------------------
df2 = df
#print(df2)
total_columns = df2.shape[1]
#print(total_columns)
print(df2)
#-------------------------------------------------------
buy = []
sell = []
donothing = []
for i in range(total_columns):
#company_name = header
df_RSI = df2.iloc[:,i].to_frame()
#print(df_RSI)
df_RSI['diff']=df_RSI.diff(1)
#print(df_RSI['diff'])
# Calculate Avg. Gains/Losses
df_RSI['gain'] = df_RSI['diff'].clip(lower=0).round(2)
df_RSI['loss'] = df_RSI['diff'].clip(upper=0).abs().round(2)
#print(df_RSI['gain'])
window_length = 14
df_RSI['avg_gain'] = df_RSI['gain'].rolling(window=window_length,min_periods=window_length).mean()
#print(df_RSI['avg_gain'][:30]) #yay working!
df_RSI['avg_loss'] = df_RSI['loss'].rolling(window=window_length, min_periods=window_length).mean()
#print(df_RSI['avg_loss'][:30]) #yay working!
#print(df_RSI.name) #prints out the tickers wooooo
# Get WMS averages
# Average Gains
for k, row in enumerate(df_RSI['avg_gain'].iloc[window_length+1:]):
df_RSI['avg_gain'].iloc[k + window_length + 1] =\
(df_RSI['avg_gain'].iloc[k + window_length] *
(window_length - 1) +
df_RSI['gain'].iloc[k + window_length + 1])\
/ window_length
# Average Losses
for j, row in enumerate(df_RSI['avg_loss'].iloc[window_length+1:]):
df_RSI['avg_loss'].iloc[j + window_length + 1] =\
(df_RSI['avg_loss'].iloc[j + window_length] *
(window_length - 1) +
df_RSI['loss'].iloc[j + window_length + 1])\
/ window_length
# View initial results
#print(df_RSI[:,window_length-1:window_length+5])
# Calculate RS Values
df_RSI['rs'] = df_RSI['avg_gain'] / df_RSI['avg_loss']
#print(df_RSI['rs'][:30])
#print(df_RSI['rs'][30:]) #yay working!
# Calculate RSI
df_RSI['rsi'] = 100 - (100 / (1.0 + df_RSI['rs']))
# View Result RSI
RSI = (df_RSI['rsi'])
#print(RSI[200:])
#print(RSI[199])
if RSI[-1]>=30 and RSI[-1-10]<30:
buy.append(df2.columns[i])
elif RSI[-1]<=70 and RSI[-1-10]>70:
sell.append(df2.columns[i])
else:
donothing.append(df2.columns[i])
print(buy)
print(sell)
print(donothing)
The changes to your if statement:
df_RSI.name was changed to df2.columns[i], as I assume you wanted to append the ticker to buy, sell or donothing (and I changed df_RSI to a dataframe at the start of the for loop, so that you were appending columns in the rest of the loop, rather than additional rows to a series - and changing to_frame means that there is no longer a name for df_RSI).
The if statement itself was changed, to if RSI[-1]>=30.... This is because you are currently trying to find the index values: 245, 235 (in if), 240 and 235 (in elif), but the length of the series RSI is only 126 (so, because the index starts at 0, the very last row is RSI[125]). I changed this to be the last row RSI[-1], and the row 10 before that. Let me know if this is not what you were looking for, and I can change it (if you need more help with it).
How do I print the dataframe, where the population is within 5% of the mean? (2.5% below and 2.5% above)
Here is what I've tried:
mean = df['population'].mean()
minimum = mean - (0.025*mean)
maximum = mean + (0.025*mean)
df[df.population < maximum]
Use:
df.loc[(df['population'] > minimum) & (df['population'] < maximum)]
import pandas as pd
df = pd.read_csv("fileName.csv")
#suppose this dataFrame contains the population in the int format
mean = df['population'].mean()
minimum = mean - (0.025*mean)
maximum = mean + (0.025*mean)
ans = df.loc[(df['population']>minimum) & (df['population'] <maximum)]
ans
you can use this
I built this dataframe for testing.
import numpy as np
import pandas as pd
random_data = np.random.randint(1_000_000, 100_000_000, 200)
random_df = pd.DataFrame(random_data, columns=['population'])
random_df
Here's the answer to specifically what you were asking for.
pop = random_df.population
top_boundary = pop.mean() + pop.mean() * 0.025
low_boundary = pop.mean() - pop.mean() * 0.025
criteria_boundary_limits = random_df.population.between(low_boundary, top_boundary)
criteria_boundary_df = random_df.loc[criteria_boundary_limits]
criteria_boundary_df
But, maybe, another answer could be had by using quantiles. I used 40 quantiles because 1/40 = 0.025.
groups_list = list(range(1,41))
random_df['groups'] = pd.qcut(random_df['population'], 40, labels = groups_list)
criteria_groups_limits = random_df.groups.between(20,21)
criteria_groups_df = random_df.loc[criteria_groups_limits]
criteria_groups_df
First of all I will share objective of running python code.
Getting Daily High and Low Prices for a stock from Yahoo.
Converting the daily high and lows to Weekly High/Lows, monthly High Lows, Yearly High Lows.
Getting exact dates of Weekly or Monthly High Lows from a daily dataframe
Finally after fetching Dates for Weekly(or Monthly)High & lows, I want to arrange the data of what occured first High or Low during the week. for eg. during week ending 12th December, 2020, I get High of the week is 100 and low of week is 97(after completing step 2) and also High date and low date from daily dataframe (from step 3), I want to arrange Prices in order of occurence. so if High happened on 9th December and Low happened on 12th December. The prices will be arranged as 100 in row 1 and then 97 in row 2 and this process repeats for entire data frame.
What I have been able to achieve.
I have completed step 1 and step 2. Struggling in step for 3 as of now.
Have accomplished Step 1 by
import pandas as pd
import yfinance as yf
Ticker = '^NSEI'
f = yf.download(Ticker,period="max")
f = f.drop(['Adj Close'], axis=1)
f = f.drop(['Open'], axis=1)
f = f.drop(['Close'], axis=1)
f = f.drop(['Volume'], axis=1)
f.reset_index(inplace=True)
f.insert(0,'Ticker',Ticker)
Step 2 by
fw = f.groupby(['Ticker', pd.Grouper(key='Date', freq='W')])\
.agg(High=pd.NamedAgg(column='High', aggfunc='max'),
Low=pd.NamedAgg(column='Low', aggfunc='min'))\
.reset_index()
fm = f.groupby(['Ticker', pd.Grouper(key='Date', freq='M')])\
.agg(High=pd.NamedAgg(column='High', aggfunc='max'),
Low=pd.NamedAgg(column='Low', aggfunc='min'))\
.reset_index()
fq = f.groupby(['Ticker', pd.Grouper(key='Date', freq='Q')])\
.agg(High=pd.NamedAgg(column='High', aggfunc='max'),
Low=pd.NamedAgg(column='Low', aggfunc='min'))\
.reset_index()
fy = f.groupby(['Ticker', pd.Grouper(key='Date', freq='Y')])\
.agg(High=pd.NamedAgg(column='High', aggfunc='max'),
Low=pd.NamedAgg(column='Low', aggfunc='min'))\
.reset_index()
Struggling with step 3. used pd.merge, pd.join, pd.concat but unable to combine Weekly dataframe with dataframe on Highs and lows. The no of weekly records increase by performing merge and drop duplcates also didn't work properly when specified keep last.
So if you all can help me in step 3 and 4 would be grateful. Thanks
Solved the query which i posted above. Hope this help others. Thanks
import pandas as pd
import yfinance as yf
import datetime as dt
import numpy as np
Ticker = '^NSEI'
df = yf.download(Ticker, period='max')
df= df.drop(['Open', 'Close', 'Adj Close', 'Volume'], axis = 1).reset_index()
# Daily 3238 columns for reference
#Adding columns for weekly, monthly,6 month,Yearly,
df['WkEnd'] = df.Date.dt.to_period('W').apply(lambda r: r.start_time) + dt.timedelta(days=6)
df['MEnd'] = (df.Date.dt.to_period('M').apply(lambda r: r.end_time)).dt.date
df['6Mend'] = np.where(df.Date.dt.month <= 6,(df.Date.dt.year).astype(str)+'-1H',(df['Date'].dt.year).astype(str)+'-2H')
df['YEnd'] = (df.Date.dt.to_period('Y').apply(lambda r: r.end_time)).dt.date
# key variable for melting
d = {'Date':['Hidate', 'Lodate'], 'Price':['High','Low']}
#creating weekly neoformat
dw = df.groupby(['WkEnd']).agg({'High' : 'max','Low' : 'min' }).reset_index()
dw['Hidate'] = dw[['WkEnd','High']].merge(df,how = 'left').Date
dw['Lodate'] = dw[['WkEnd','Low']].merge(df,how = 'left').Date
dw = pd.lreshape(dw,d)
dw = dw.sort_values(by = ['Date']).reset_index()
dw = dw.drop(['index'], axis = 1)
#creating Monthly neoformat
dm = df.groupby(['MEnd']).agg({'High' : 'max','Low' : 'min' }).reset_index()
dm['Hidate'] = dm[['MEnd','High']].merge(df,how = 'left').Date
dm['Lodate'] = dm[['MEnd','Low']].merge(df,how = 'left').Date
dm = pd.lreshape(dm,d)
dm = dm.sort_values(by = ['Date']).reset_index()
dm = dm.drop(['index'], axis = 1)
#creating 6mth neoformat
d6m = df.groupby(['6Mend']).agg({'High' : 'max','Low' : 'min' }).reset_index()
d6m['Hidate'] = d6m[['6Mend','High']].merge(df,how = 'left').Date
d6m['Lodate'] = d6m[['6Mend','Low']].merge(df,how = 'left').Date
d6m = pd.lreshape(d6m,d)
d6m = d6m.sort_values(by = ['Date']).reset_index()
d6m = d6m.drop(['index'], axis = 1)
#creating Yearly neoformat
dy = df.groupby(['YEnd']).agg({'High' : 'max','Low' : 'min' }).reset_index()
dy['Hidate'] = dy[['YEnd','High']].merge(df,how = 'left').Date
dy['Lodate'] = dy[['YEnd','Low']].merge(df,how = 'left').Date
dy = pd.lreshape(dy,d)
dy = dy.sort_values(by = ['Date']).reset_index()
dy = dy.drop(['index'], axis = 1)
I guess this is supposed to be simple.. But I cant seem to make it work.
I have some stock data
import pandas as pd
import numpy as np
df = pd.DataFrame(index=pd.date_range(start = "06/01/2018", end = "08/01/2018"),
data = np.random.rand(62)*100)
I am doing some analysis on it, this results of my drawing some lines on the graph.
And I want to plot a 45 line somewhere on the graph as a reference for lines I drew on the graph.
What I have tried is
x = df.tail(len(df)/20).index
x = x.reset_index()
x_first_val = df.loc[x.loc[0].date].adj_close
In order to get some point and then use slope = 1 and calculate y values.. but this sounds all wrong.
Any ideas?
Here is a possibility:
import pandas as pd
import numpy as np
df = pd.DataFrame(index=pd.date_range(start = "06/01/2018", end = "08/01/2018"),
data=np.random.rand(62)*100,
columns=['data'])
# Get values for the time:
index_range = df.index[('2018-06-18' < df.index) & (df.index < '2018-07-21')]
# get the timestamps in nanoseconds (since epoch)
timestamps_ns = index_range.astype(np.int64)
# convert it to a relative number of days (for example, could be seconds)
time_day = (timestamps_ns - timestamps_ns[0]) / 1e9 / 60 / 60 / 24
# Define y-data for a line:
slope = 3 # unit: "something" per day
something = time_day * slope
trendline = pd.Series(something, index=index_range)
# Graph:
df.plot(label='data', alpha=0.8)
trendline.plot(label='some trend')
plt.legend(); plt.ylabel('something');
which gives:
edit - first answer, using dayofyear instead of the timestamps:
import pandas as pd
import numpy as np
df = pd.DataFrame(index=pd.date_range(start = "06/01/2018", end = "08/01/2018"),
data=np.random.rand(62)*100,
columns=['data'])
# Define data for a line:
slope = 3 # unit: "something" per day
index_range = df.index[('2018-06-18' < df.index) & (df.index < '2018-07-21')]
dayofyear = index_range.dayofyear # it will not work around the new year...
dayofyear = dayofyear - dayofyear[0]
something = dayofyear * slope
trendline = pd.Series(something, index=index_range)
# Graph:
df.plot(label='data', alpha=0.8)
trendline.plot(label='some trend')
plt.legend(); plt.ylabel('something');
I am a Python beginner and wrote a function for a simple moving average strategy. I created a portfolio DataFrame inside the function and now I want to use this DataFrame outside of the function for plotting some graphs. My solution is: return portfolio - but this does not work. Can anybody help me?
This is my code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Import a data source - FSE-Data with Index 'Date'
all_close_prices = pd.read_csv('FSE_daily_close.csv')
all_close_prices = all_close_prices.set_index('Date')
# Fill NaN Values with the last available stock price - except for Zalando
all_close_prices = all_close_prices.fillna(method='ffill')
# Import ticker symbols
ticker_list = list(all_close_prices)
# Zalando 'FSE/ZO1_X' (position row 99) - doesn't begin in 2004
# Drop Zalando
all_close_prices.drop('FSE/ZO1_X', axis=1)
# Also from the ticker list
ticker_list.remove('FSE/ZO1_X')
# Create an empty signal dataframe with datetime index equivalent to the stocks
signals = pd.DataFrame(index=all_close_prices.index)
def ma_strategy(ticker, long_window, short_window):
# Calculate the moving avergaes
moving_avg_long = all_close_prices.rolling(window=long_window, min_periods=1).mean()
moving_avg_short = all_close_prices.rolling(window=short_window, min_periods=1).mean()
moving_avg_short = moving_avg_short
moving_avg_long = moving_avg_long
# Add the two MAs for the stocks in the ticker_list to the signals dataframe
for i in ticker_list:
signals['moving_avg_short_' + i] = moving_avg_short[i]
signals['moving_avg_long_' + i] = moving_avg_long[i]
# Set up the signals
for i in ticker_list:
signals['signal_' + i] = np.where(signals['moving_avg_short_' + i] > signals['moving_avg_long_' + i], 1, 0)
signals['positions_' + i] = signals['signal_' + i].diff(periods=1)
#Backtest
initial_capital = float(100000)
# Create a DataFrame `positions` with index of signals
positions = pd.DataFrame(index=all_close_prices)
# Create a new column in the positions DataFrame
# On the days that the signal is 1 (short moving average crosses the long moving average, you’ll buy a 100 shares.
# The days on which the signal is 0, the final result will be 0 as a result of the operation 100*signals['signal']
positions = 100 * signals[['signal_' + ticker]]
# Store the portfolio value owned with the stock
# DataFrame.multiply(other, axis='columns', fill_value=None) - Multiplication of dataframe and other, element-wise
# Store the difference in shares owned - same like position column in signals
pos_diff = positions.diff()
# Add `holdings` to portfolio
portfolio = pd.DataFrame(index=all_close_prices.index)
portfolio['holdings'] = (positions.multiply(all_close_prices[ticker], axis=0)).sum(axis=1)
# Add `cash` to portfolio
portfolio['cash'] = initial_capital - (pos_diff.multiply(all_close_prices[ticker], axis=0)).sum(
axis=1).cumsum()
# Add `total` to portfolio
portfolio['total'] = portfolio['cash'] + portfolio['holdings']
# Add `returns` to portfolio
portfolio['return'] = portfolio['total'].pct_change()
portfolio['return_cum'] = portfolio['total'].pct_change().cumsum()
return portfolio
ma_strategy('FSE/VOW3_X',20,5)
# Visualize the total value of the portfolio
portfolio_value = plt.figure(figsize=(12, 8))
ax1 = portfolio_value.add_subplot(1, 1, 1, ylabel='Portfolio value in $')
# Plot the equity curve in dollars
portfolio['total'].plot(ax=ax1, lw=2.)
You need to assign your function return value to a variable. The line which says
ma_strategy('FSE/VOW3_X',20,5)
probably needs to change to
portfolio = ma_strategy('FSE/VOW3_X',20,5)