So I am trying to build a trading software and I am using the code from an online YouTuber. I am gathering all of the data for the companies on the S&P 500 in the get_data_from_yahoo() function. So when I run that code it says Already Have (then the given ticker) which is fine, but when I got to print the data for this in the following function, which is compile_data(), it only print one ticker which is ZTS.
Anyone have any ideas?
import bs4 as bs
import datetime as dt
import os
import pandas as pd
from pandas_datareader import data as pdr
import pickle
import requests
import fix_yahoo_finance as yf
def save_sp500_tickers():
resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': 'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[0].text.replace('.', '-')
ticker = ticker[:-1]
tickers.append(ticker)
with open("sp500tickers.pickle", "wb") as f:
pickle.dump(tickers, f)
print(tickers)
return tickers
save_sp500_tickers()
def get_data_from_yahoo(reload_sp500=False):
if reload_sp500:
tickers = save_sp500_tickers()
else:
with open("sp500tickers.pickle", "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
start = dt.datetime(2019, 6, 8)
end = dt.datetime.now()
for ticker in tickers:
print(ticker)
if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
df = pdr.get_data_yahoo(ticker, start, end)
df.reset_index(inplace=True)
df.set_index("Date", inplace=True)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
else:
print('Already have {}'.format(ticker))
save_sp500_tickers()
get_data_from_yahoo()
def complied_data():
with open("sp500tickers.pickle","rb") as f:
tickers = pickle.load(f)
main_df = pd.DataFrame()
for count, ticker in enumerate(tickers):
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
df.set_index('Date', inplace=True)
df.rename(columns = {'Adj Close':ticker}, inplace=True)
df.drop(['Open', 'High', 'Low','Close','Volume'], 1, inplace=True)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df, how='outer')
if count % 10 == 0:
print(count)
print(main_df.head())
main_df.to_csv('sp500_joined_closes.csv')
complied_data()
When I run this code this is what it says:
MMM
Already have MMM
ABT
Already have ABT
ABBV
Already have ABBV
ABMD
Already have ABMD
ACN
Already have ACN
ATVI
Already have ATVI
ADBE
Already have ADBE
AMD
Already have AMD
AAP
Already have AAP
AES
Already have AES
AMG
Already have AMG
AFL
Already have AFL
A
Already have A
APD
Already have APD
AKAM
Already have AKAM
ALK
Already have ALK
ALB
Already have ALB
It then continues to say that it already has all of the 500 companies(I did not show the hole thing because the list is very long). But when I run the compile_data()
function it only prints the data for one ticker:
ZTS
Date
2019-01-02 83.945038
2019-01-03 81.043526
2019-01-04 84.223267
2019-01-07 84.730026
2019-01-08 85.991997
The problem is in a for loop, specifically the one in complied_data.
The if-else and if blocks should be included in the for loop:
for count, ticker in enumerate(tickers):
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
df.set_index('Date', inplace=True)
df.rename(columns = {'Adj Close':ticker}, inplace=True)
df.drop(['Open', 'High', 'Low','Close','Volume'], 1, inplace=True)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df, how='outer')
if count % 10 == 0:
print(count)
Otherwise they will be evaluated only after it is done looping and elaborate the last element.
The following is the output when changing to the above indentation:
(... omitted counting from 0)
470
480
490
500
MMM ABT ABBV ABMD ... YUM ZBH ZION ZTS
Date ...
2019-06-10 165.332672 80.643486 74.704918 272.429993 ... 107.794380 121.242027 43.187107 109.920105
2019-06-11 165.941788 80.494644 75.889320 262.029999 ... 106.722885 120.016762 43.758469 109.860268
2019-06-12 166.040024 81.318237 76.277657 254.539993 ... 108.082100 120.225945 43.512192 111.136780
2019-06-13 165.882843 81.655624 76.646561 255.529999 ... 108.121788 119.329407 44.063854 109.730621
2019-06-14 163.760803 81.586166 76.394157 250.960007 ... 108.925407 116.998398 44.211620 110.488556
[5 rows x 505 columns]
Related
Could someone please explain to me how I rewrite this code to run faster using pools? Sorry for that question but I'm a beginner and I spent so much time trying but unfortunately I couldn't figure it out.
for i in constituents:
print(i) # print out the ticker so we know the downloading progress
prc = yf.download(i, interval="1d", start=start_date, end=end_date)
prc = pd.DataFrame(prc['Adj Close']) # select adjusted close price only
prc.columns = [i] # rename the column with the ticker of the stock
try:
df_prc = pd.concat([df_prc, prc], axis=1) # if the dataframe already exists, join the newly downloaded data to the existing table
except:
df_prc = prc # create the dataframe for the first ticker
stk = yf.Ticker(i)
try:
stk.info['floatShares']
except:
stk.info['floatShares'] = None
try:
stk.info['sharesOutstanding']
except:
stk.info['sharesOutstanding'] = None
if stk.info['floatShares']:
mcap = prc * stk.info['floatShares']
elif stk.info['sharesOutstanding']:
mcap = prc * stk.info['sharesOutstanding']
else:
mcap = prc * ((stk.info['marketCap'])/(stk.info['previousClose']))
try:
df_mcap = pd.concat([df_mcap, mcap], axis=1)
except:
df_mcap = mcap
Further, I would like to provide the code that runs befor that one I posted to clarify my question:
import yfinance as yf
import pandas as pd
start_date = "2021-01-04"
end_date = "2021-11-29"
idx = "^STOXX50E"
Index = yf.download(idx, # ticker
interval="1d", # daily frequency
start=start_date, end=end_date) # sampling period
Index = pd.DataFrame(Index['Adj Close'].rename(idx)) # select adjusted close price
page = pd.read_html('https://en.wikipedia.org/wiki/EURO_STOXX_50')
constituents = page[2]['Ticker'] # we only need tickers
constituents.pop(46) # Ticker UMG.AS is removed because otherwise the for loop produces an error
I was able to reduce the running time from 386 seconds to 17 seconds with the following code. Note that I had to do an import of module ssl and issue ssl._create_default_https_context = ssl._create_unverified_context to overcome an SSL certificate error I was getting from the pd.read_html() method call.
import yfinance as yf
import pandas as pd
from multiprocessing.pool import ThreadPool
from functools import partial
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
start_date = "2021-01-04"
end_date = "2021-11-29"
def process_constituent(data, constituent):
prc = pd.DataFrame(data[constituent]['Adj Close']) # select adjusted close price only
prc.columns = [constituent] # rename the column with the ticker of the stock
stk = yf.Ticker(constituent)
try:
stk.info['floatShares']
except:
stk.info['floatShares'] = None
try:
stk.info['sharesOutstanding']
except:
stk.info['sharesOutstanding'] = None
if stk.info['floatShares']:
mcap = prc * stk.info['floatShares']
elif stk.info['sharesOutstanding']:
mcap = prc * stk.info['sharesOutstanding']
else:
mcap = prc * ((stk.info['marketCap'])/(stk.info['previousClose']))
return mcap
def process_constituents(constituents):
# Download all the tickers:
data = yf.download(
tickers = ' '.join(constituents),
interval='1d',
start = start_date,
end = end_date,
group_by = 'ticker',
adjust=False,
threads = True,
proxy = None
)
pool = ThreadPool(len(constituents))
for idx, mcap in enumerate(pool.imap(partial(process_constituent, data), constituents)):
if idx == 0:
df_mcap = mcap
else:
df_mcap = pd.concat([df_mcap, mcap], axis=1)
return df_mcap
def main():
idx = "^STOXX50E"
Index = yf.download(idx, # ticker
interval="1d", # daily frequency
start=start_date, end=end_date) # sampling period
Index = pd.DataFrame(Index['Adj Close'].rename(idx)) # select adjusted close price
page = pd.read_html('https://en.wikipedia.org/wiki/EURO_STOXX_50')
constituents = page[2]['Ticker'] # we only need tickers
constituents.pop(46) # Ticker UMG.AS is removed because otherwise the for loop produces an error
df_mcap = process_constituents(constituents)
print(df_mcap)
if __name__ == '__main__':
main()
Prints:
[*********************100%***********************] 1 of 1 completed
[*********************100%***********************] 49 of 49 completed
ADS.DE ADYEN.AS AD.AS AI.PA ... TTE.PA DG.PA VOW.DE VNA.DE
Date ...
2021-01-04 5.083322e+10 4.880933e+10 2.414267e+10 6.200227e+10 ... 8.326552e+10 3.746300e+10 3.842743e+10 3.322534e+10
2021-01-05 4.983515e+10 4.800875e+10 2.403104e+10 6.134340e+10 ... 8.545638e+10 3.682896e+10 3.849667e+10 3.338207e+10
2021-01-06 5.019652e+10 4.548888e+10 2.411223e+10 6.147971e+10 ... 8.921219e+10 3.824197e+10 3.886594e+10 3.203872e+10
2021-01-07 4.964585e+10 4.500328e+10 2.407163e+10 6.195684e+10 ... 9.018724e+10 3.830537e+10 3.946601e+10 3.183722e+10
2021-01-08 5.078160e+10 4.610573e+10 2.400059e+10 6.232034e+10 ... 9.024743e+10 3.879449e+10 3.893518e+10 3.225142e+10
... ... ... ... ... ... ... ... ... ...
2021-11-22 4.851034e+10 6.454539e+10 3.178177e+10 7.108912e+10 ... 1.073903e+11 4.175263e+10 6.518791e+10 2.937961e+10
2021-11-23 4.727562e+10 6.298360e+10 3.187473e+10 7.017166e+10 ... 1.086315e+11 4.224230e+10 6.532881e+10 2.843048e+10
2021-11-24 4.667566e+10 6.206490e+10 3.153388e+10 7.028287e+10 ... 1.092141e+11 4.271798e+10 6.326233e+10 2.985586e+10
2021-11-25 4.659740e+10 6.453227e+10 3.159068e+10 7.013459e+10 ... 1.091381e+11 4.279726e+10 6.298054e+10 3.005144e+10
2021-11-26 4.405841e+10 6.358732e+10 3.132214e+10 6.882791e+10 ... 1.026661e+11 3.918302e+10 6.072620e+10 2.859604e+10
[233 rows x 49 columns]
import yfinance as yf
import pandas as pd
from multiprocessing.pool import ThreadPool
from functools import partial
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
start_date = "2021-01-04"
end_date = "2021-11-29"
# download data
idx = "^STOXX50E"
Index = yf.download(idx, # ticker
interval="1d", # daily frequency
start=start_date, end=end_date) # sampling period
Index = pd.DataFrame(Index['Adj Close'].rename(idx)) # select adjusted close price
page = pd.read_html('https://en.wikipedia.org/wiki/EURO_STOXX_50')
constituents = page[2]['Ticker'] # we only need tickers
constituents.pop(46) # Ticker UMG.AS is removed because otherwise the for loop produces an error
data = yf.download(
tickers = ' '.join(constituents),
interval='1d',
start = start_date,
end = end_date,
group_by = 'ticker',
adjust=False,
threads = True,
proxy = None
)
def process_prc(data, constituent):
prc = pd.DataFrame(data[constituent]['Adj Close']) # select adjusted close price only
prc.columns = [constituent] # rename the column with the ticker of the stock
return prc
def process_constituent(data, constituent):
prc = pd.DataFrame(data[constituent]['Adj Close']) # select adjusted close price only
prc.columns = [constituent] # rename the column with the ticker of the stock
stk = yf.Ticker(constituent)
try:
stk.info['floatShares']
except:
stk.info['floatShares'] = None
try:
stk.info['sharesOutstanding']
except:
stk.info['sharesOutstanding'] = None
if stk.info['floatShares']:
mcap = prc * stk.info['floatShares']
elif stk.info['sharesOutstanding']:
mcap = prc * stk.info['sharesOutstanding']
else:
mcap = prc * ((stk.info['marketCap'])/(stk.info['previousClose']))
return mcap
def process_dfprc(constituents):
pool = ThreadPool(len(constituents))
for idx, prc in enumerate(pool.imap(partial(process_prc, data), constituents)):
try:
df_prc = pd.concat([df_prc, prc], axis=1) # if the dataframe already exists, join the newly downloaded data to the existing table
except:
df_prc = prc # create the dataframe for the first ticker
return df_prc
def process_constituents(constituents):
# Download all the tickers:
pool = ThreadPool(len(constituents))
for idx, mcap in enumerate(pool.imap(partial(process_constituent, data), constituents)):
#if idx == 0:
# df_mcap = mcap
#else:
# df_mcap = pd.concat([df_mcap, mcap], axis=1)
try:
df_mcap = pd.concat([df_mcap, mcap], axis=1)
except:
df_mcap = mcap
return df_mcap
page = pd.read_html('https://en.wikipedia.org/wiki/EURO_STOXX_50')
constituents = page[2]['Ticker'] # we only need tickers
constituents.pop(46) # Ticker UMG.AS is removed because otherwise the for loop produces an error
calculating df_mcap:
if __name__ == '__main__':
df_mcap = process_constituents(constituents)
df_mcap
print:
ADS.DE ADYEN.AS AD.AS AI.PA AIR.PA ALV.DE ABI.BR ASML.AS CS.PA BAS.DE ... SAF.PA SAN.PA SAP.DE SU.PA SIE.DE STLA.MI TTE.PA DG.PA VOW.DE VNA.DE
Date
2021-01-04 5.083322e+10 4.880933e+10 2.414267e+10 6.200227e+10 6.001130e+10 7.936595e+10 5.306992e+10 1.647683e+11 3.507867e+10 5.679746e+10 ... 4.005571e+10 8.460465e+10 1.080643e+11 6.363292e+10 9.197665e+10 2.174885e+10 8.763420e+10 3.746300e+10 3.842743e+10 3.316382e+10
2021-01-05 4.983515e+10 4.800875e+10 2.403104e+10 6.134340e+10 5.997124e+10 7.855080e+10 5.304209e+10 1.650319e+11 3.506423e+10 5.636858e+10 ... 4.014193e+10 8.459394e+10 1.077770e+11 6.303187e+10 9.178897e+10 2.169038e+10 8.994003e+10 3.682896e+10 3.849667e+10 3.332026e+10
2021-01-06 5.019652e+10 4.548888e+10 2.411223e+10 6.147971e+10 6.019823e+10 8.263458e+10 5.451703e+10 1.633893e+11 3.656208e+10 5.896818e+10 ... 4.010744e+10 8.407989e+10 1.082285e+11 6.478275e+10 9.530789e+10 2.123436e+10 9.389289e+10 3.824197e+10 3.886594e+10 3.197940e+10
2021-01-07 4.964585e+10 4.500328e+10 2.407163e+10 6.195684e+10 5.983105e+10 8.195529e+10 5.417381e+10 1.638151e+11 3.678766e+10 5.987848e+10 ... 3.993501e+10 8.323385e+10 1.072435e+11 6.611552e+10 9.720029e+10 2.161438e+10 9.491911e+10 3.830537e+10 3.946601e+10 3.177828e+10
2021-01-08 5.078160e+10 4.610573e+10 2.400059e+10 6.232034e+10 6.015150e+10 8.221501e+10 5.367288e+10 1.687430e+11 3.675157e+10 6.002728e+10 ... 4.012468e+10 8.437975e+10 1.089467e+11 6.682110e+10 9.696569e+10 2.121390e+10 9.498246e+10 3.879449e+10 3.893518e+10 3.219170e+10
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2021-11-22 4.851034e+10 6.454539e+10 3.178177e+10 7.108912e+10 7.262242e+10 8.481195e+10 4.905974e+10 3.047505e+11 4.871214e+10 5.686775e+10 ... 3.862038e+10 9.936060e+10 1.269160e+11 8.638617e+10 1.251472e+11 3.187450e+10 1.074238e+11 4.175263e+10 6.518791e+10 2.937961e+10
2021-11-23 4.727562e+10 6.298360e+10 3.187473e+10 7.017166e+10 7.361048e+10 8.489549e+10 4.868553e+10 2.879491e+11 4.890396e+10 5.780438e+10 ... 3.883494e+10 9.827092e+10 1.247044e+11 8.521374e+10 1.230764e+11 3.105655e+10 1.086654e+11 4.224230e+10 6.532881e+10 2.843048e+10
2021-11-24 4.667566e+10 6.206490e+10 3.153388e+10 7.028287e+10 7.441161e+10 8.483284e+10 4.900361e+10 2.891317e+11 4.899028e+10 5.683102e+10 ... 3.892492e+10 9.708117e+10 1.240785e+11 8.322063e+10 1.219527e+11 3.068774e+10 1.092482e+11 4.271798e+10 6.326233e+10 2.985586e+10
2021-11-25 4.659740e+10 6.453227e+10 3.159068e+10 7.013459e+10 7.494570e+10 8.464487e+10 5.084663e+10 2.899473e+11 4.885600e+10 5.657391e+10 ... 3.898721e+10 9.634731e+10 1.249965e+11 8.351906e+10 1.232691e+11 3.050516e+10 1.091722e+11 4.279726e+10 6.298054e+10 3.005144e+10
2021-11-26 4.405841e+10 6.358732e+10 3.132214e+10 6.882791e+10 6.633355e+10 7.996257e+10 4.789032e+10 2.795891e+11 4.646787e+10 5.317635e+10 ... 3.498675e+10 9.452378e+10 1.201978e+11 8.077986e+10 1.165751e+11 2.842012e+10 1.026981e+11 3.918302e+10 6.072620e+10 2.859604e+10
233 rows × 49 columns
calculating df_prc:
if __name__ == '__main__':
df_prc = process_dfprc(constituents)
df_prc
print:
ADS.DE ADYEN.AS AD.AS AI.PA AIR.PA ALV.DE ABI.BR ASML.AS CS.PA BAS.DE ... SAF.PA SAN.PA SAP.DE SU.PA SIE.DE STLA.MI TTE.PA DG.PA VOW.DE VNA.DE
Date
2021-01-04 292.307343 1859.5 23.374092 133.809341 89.889999 190.011627 56.726482 404.039764 18.287489 61.853455 ... 115.747612 76.089233 103.588997 119.404495 114.593018 11.912073 34.584999 80.331764 163.641632 57.650417
2021-01-05 286.568085 1829.0 23.266014 132.387405 89.830002 188.060059 56.696735 404.686218 18.279963 61.386387 ... 115.996742 76.079597 103.313599 118.276649 114.359184 11.880051 35.494999 78.972191 163.936478 57.922352
2021-01-06 288.646088 1733.0 23.344616 132.681595 90.169998 197.837112 58.273296 400.658264 19.060837 64.217407 ... 115.897095 75.617287 103.746368 121.562111 118.743378 11.630281 37.055000 82.002113 165.509003 55.591476
2021-01-07 285.479584 1714.5 23.305313 133.711288 89.620003 196.210800 57.906425 401.702545 19.178438 65.208740 ... 115.398827 74.856400 102.802139 124.062988 121.101105 11.838422 37.459999 82.138069 168.064377 55.241844
2021-01-08 292.010498 1756.5 23.236538 134.495789 90.099998 196.832611 57.370987 413.786438 19.159622 65.370781 ... 115.946915 75.886971 104.434860 125.386978 120.808823 11.619075 37.485001 83.186890 165.803864 55.960529
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2021-11-22 278.950012 2459.0 30.770000 153.419998 108.779999 203.050003 52.439999 747.299988 25.395000 61.930000 ... 111.599998 89.360001 121.660004 162.100006 155.919998 17.458000 42.395000 89.529999 277.600006 51.072109
2021-11-23 271.850006 2399.5 30.860001 151.440002 110.260002 203.250000 52.040001 706.099976 25.495001 62.950001 ... 112.220001 88.379997 119.540001 159.899994 153.339996 17.010000 42.884998 90.580002 278.200012 49.422203
2021-11-24 268.399994 2364.5 30.530001 151.679993 111.459999 203.100006 52.380001 709.000000 25.540001 61.889999 ... 112.480003 87.309998 118.940002 156.160004 151.940002 16.808001 43.115002 91.599998 269.399994 51.900002
2021-11-25 267.950012 2458.5 30.584999 151.360001 112.260002 202.649994 54.349998 711.000000 25.469999 61.610001 ... 112.660004 86.650002 119.820000 156.720001 153.580002 16.708000 43.084999 91.769997 268.200012 52.240002
2021-11-26 253.350006 2422.5 30.325001 148.539993 99.360001 191.440002 51.189999 685.599976 24.225000 57.910000 ... 101.099998 85.010002 115.220001 151.580002 145.240005 15.566000 40.529999 84.019997 258.600006 49.709999
233 rows × 49 columns
I'm getting a KeyError on my 'Ticker' column when trying to select only AAPL stock and I'm not sure why... I've also tried using aapl = all_data.loc[all_data['Ticker'] == 'AAPL']
import pandas_datareader as pdr
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def get(tickers, startdate, enddate):
def data(ticker):
return (pdr.get_data_yahoo(ticker, start=startdate, end=enddate))
datas = map (data, tickers)
return(pd.concat(datas, keys=tickers, names=['Ticker', 'Date']))
tickers = ['AAPL', 'MSFT', 'IBM', 'GOOG']
all_data = get(tickers, datetime.datetime(2006, 10, 1), datetime.datetime(2012, 1, 1))
print(all_data.head())
aapl = all_data[all_data['Ticker'] == 'AAPL']
This is probably what you you are looking for
all_data[all_data.index.get_loc("AAPL")]
outputs
High Low Open Close Volume \
Ticker Date
AAPL 2006-10-02 2.709643 2.653571 2.682143 2.673571 712639200.0
2006-10-03 2.676786 2.613929 2.658929 2.645714 790708800.0
2006-10-04 2.695000 2.612857 2.646429 2.692143 829082800.0
2006-10-05 2.720000 2.647500 2.661786 2.672500 683883200.0
2006-10-06 2.680000 2.636071 2.657857 2.650714 466958800.0
... ... ... ... ... ...
2011-12-23 14.413929 14.267500 14.274643 14.404643 269399200.0
2011-12-27 14.610357 14.393572 14.396428 14.518929 265076000.0
2011-12-28 14.580358 14.333571 14.531786 14.380000 228662000.0
2011-12-29 14.487500 14.303928 14.407143 14.468572 215978000.0
2011-12-30 14.510000 14.410357 14.411072 14.464286 179662000.0
So I am trying to get multiple stock prices using pandas and panadas datareader. If I only try to import one ticker it will run fine, but if I use more than one then an error arises. The code is:
import pandas as pd
import pandas_datareader as web
import datetime as dt
stocks = ['BA', 'AMD']
start = dt.datetime(2018, 1, 1)
end = dt.datetime(2020, 1, 1)
d = web.DataReader(stocks, 'yahoo', start, end)
Though I get the error:
ValueError: Wrong number of items passed 2, placement implies 1
So how do I get around it only allowing to pass 1 stock.
So far I have tried using quandl and google instead, which dont work either. I also have tried pdr.get_data_yahoo but I get the same result. I have also tried yf.download() and still get the same issue. Does anyone have any ideas to get around this? Thank you.
EDIT: Full code:
import pandas as pd
import pandas_datareader as web
import datetime as dt
import yfinance as yf
import numpy as np
stocks = ['BA', 'AMD', 'AAPL']
start = dt.datetime(2018, 1, 1)
end = dt.datetime(2020, 1, 1)
d = web.DataReader(stocks, 'yahoo', start, end)
d['sma50'] = np.round(d['Close'].rolling(window=2).mean(), decimals=2)
d['sma200'] = np.round(d['Close'].rolling(window=14).mean(), decimals=2)
d['200-50'] = d['sma200'] - d['sma50']
_buy = -2
d['Crossover_Long'] = np.where(d['200-50'] < _buy, 1, 0)
d['Crossover_Long_Change']=d.Crossover_Long.diff()
d['buy'] = np.where(d['Crossover_Long_Change'] == 1, 'buy', 'n/a')
d['sell'] = np.where(d['Crossover_Long_Change'] == -1, 'sell', 'n/a')
pd.set_option('display.max_rows', 5093)
d.drop(['High', 'Low', 'Close', 'Volume', 'Open'], axis=1, inplace=True)
d.dropna(inplace=True)
#make 2 dataframe
d.set_index(d['Adj Close'], inplace=True)
buy_price = d.index[d['Crossover_Long_Change']==1]
sell_price = d.index[d['Crossover_Long_Change']==-1]
d['Crossover_Long_Change'].value_counts()
profit_loss = (sell_price - buy_price)*10
commision = buy_price*.01
position_value = (buy_price + commision)*10
percent_return = (profit_loss/position_value)*100
percent_rounded = np.round(percent_return, decimals=2)
prices = {
"Buy Price" : buy_price,
"Sell Price" : sell_price,
"P/L" : profit_loss,
"Return": percent_rounded
}
df = pd.DataFrame(prices)
print('The return was {}%, and profit or loss was ${} '.format(np.round(df['Return'].sum(), decimals=2),
np.round(df['P/L'].sum(), decimals=2)))
d
I tried 3 stocks in your code and it returns data for all 3, not sure I understood the problem you're facing?
import pandas as pd
import pandas_datareader as web
import datetime as dt
stocks = ['BA', 'AMD', 'AAPL']
start = dt.datetime(2018, 1, 1)
end = dt.datetime(2020, 1, 1)
d = web.DataReader(stocks, 'yahoo', start, end)
print(d)
Output:
Attributes Adj Close Close ... Open Volume
Symbols BA AMD AAPL BA AMD AAPL ... BA AMD AAPL BA AMD AAPL
Date ...
2018-01-02 282.886383 10.980000 166.353714 296.839996 10.980000 172.259995 ... 295.750000 10.420000 170.160004 2978900.0 44146300.0 25555900.0
2018-01-03 283.801239 11.550000 166.324722 297.799988 11.550000 172.229996 ... 295.940002 11.610000 172.529999 3211200.0 154066700.0 29517900.0
2018-01-04 282.724396 12.120000 167.097290 296.670013 12.120000 173.029999 ... 297.940002 12.100000 172.539993 4171700.0 109503000.0 22434600.0
2018-01-05 294.322296 11.880000 168.999741 308.839996 11.880000 175.000000 ... 296.769989 12.190000 173.440002 6177700.0 63808900.0 23660000.0
2018-01-08 295.570740 12.280000 168.372040 310.149994 12.280000 174.350006 ... 308.660004 12.010000 174.350006 4124900.0 63346000.0 20567800.0
... ... ... ... ... ... ... ... ... ... ... ... ... ...
2019-12-24 331.030457 46.540001 282.831299 333.000000 46.540001 284.269989 ... 339.510010 46.099998 284.690002 4120100.0 44432200.0 12119700.0
2019-12-26 327.968689 46.630001 288.442780 329.920013 46.630001 289.910004 ... 332.700012 46.990002 284.820007 4593400.0 57562800.0 23280300.0
2019-12-27 328.187408 46.180000 288.333313 330.140015 46.180000 289.799988 ... 330.200012 46.849998 291.119995 4124000.0 36581300.0 36566500.0
2019-12-30 324.469513 45.520000 290.044617 326.399994 45.520000 291.519989 ... 330.500000 46.139999 289.459991 4525500.0 41149700.0 36028600.0
2019-12-31 323.833313 45.860001 292.163818 325.760010 45.860001 293.649994 ... 325.410004 45.070000 289.929993 4958800.0 31673200.0 25201400.0
I think the error comes from your moving average and the line
d['sma50'] = np.round(d['Close'].rolling(window=2).mean(), decimals=2)
because d represent 3 stocks, I think you have to separate each stock and compute the moving average separately
EDIT : I tried something for two stocks only (BA and AMD) but it is not the best solution because I'm always repeating myself for every line.
I'm just a beginner in Python but maybe this will help you to find a solution to your problem
PS : The last line doesn't work really well (which is the printing of the P&L and Return)
"
import pandas as pd
import pandas_datareader as web
import datetime as dt
stock1 = ['BA']
stock2=['AMD']
start = dt.datetime(2018, 1, 1)
end = dt.datetime(2020, 1, 1)
d1 = web.DataReader(stock1, 'yahoo', start, end)
d2 = web.DataReader(stock2, 'yahoo', start, end)
d1['sma50'] = np.round(d1['Close'].rolling(window=2).mean(), decimals=2)
d2['sma50'] = np.round(d2['Close'].rolling(window=2).mean(), decimals=2)
d1['sma200'] = np.round(d1['Close'].rolling(window=14).mean(), decimals=2)
d2['sma200'] = np.round(d2['Close'].rolling(window=14).mean(), decimals=2)
d1['200-50'] = d1['sma200'] - d1['sma50']
d2['200-50'] = d2['sma200'] - d2['sma50']
_buy = -2
d1['Crossover_Long'] = np.where(d1['200-50'] < _buy, 1, 0)
d2['Crossover_Long'] = np.where(d2['200-50'] < _buy, 1, 0)
d1['Crossover_Long_Change']=d1.Crossover_Long.diff()
d2['Crossover_Long_Change']=d2.Crossover_Long.diff()
d1['buy'] = np.where(d1['Crossover_Long_Change'] == 1, 'buy', 'n/a')
d2['buy'] = np.where(d2['Crossover_Long_Change'] == 1, 'buy', 'n/a')
d1['sell_BA'] = np.where(d1['Crossover_Long_Change'] == -1, 'sell', 'n/a')
d2['sell_AMD'] = np.where(d2['Crossover_Long_Change'] == -1, 'sell', 'n/a')
pd.set_option('display.max_rows', 5093)
d1.drop(['High', 'Low', 'Close', 'Volume', 'Open'], axis=1, inplace=True)
d2.drop(['High', 'Low', 'Close', 'Volume', 'Open'], axis=1, inplace=True)
d2.dropna(inplace=True)
d1.dropna(inplace=True)
d1.set_index("Adj Close",inplace=True)
d2.set_index("Adj Close",inplace=True)
buy_price_BA = np.array(d1.index[d1['Crossover_Long_Change']==1])
buy_price_AMD = np.array(d2.index[d2['Crossover_Long_Change']==1])
sell_price_BA = np.array(d1.index[d1['Crossover_Long_Change']==-1])
sell_price_AMD = np.array(d2.index[d2['Crossover_Long_Change']==-1])
d1['Crossover_Long_Change'].value_counts()
d2['Crossover_Long_Change'].value_counts()
profit_loss_BA = (sell_price_BA - buy_price_BA)*10
profit_loss_AMD = (sell_price_AMD - buy_price_AMD)*10
commision_BA = buy_price_BA*.01
commision_AMD = buy_price_AMD*.01
position_value_BA = (buy_price_BA + commision_BA)*10
position_value_AMD = (buy_price_AMD + commision_AMD)*10
percent_return_BA = np.round(((profit_loss_BA/position_value_BA)*100),decimals=2)
percent_return_AMD = np.round(((profit_loss_AMD/position_value_AMD)*100),decimals=2)
prices_BA = {
"Buy Price BA" : [buy_price_BA],
"Sell Price BA" : [sell_price_BA],
"P/L BA" : [profit_loss_BA],
"Return BA": [percent_return_BA]}
df = pd.DataFrame(prices_BA)
print('The return was {}%, and profit or loss was ${} '.format(np.round(df['Return BA'].sum(), decimals=2),
np.round(df['P/L BA'].sum(), decimals=2)))
prices_AMD = {
"Buy Price AMD" : [buy_price_AMD],
"Sell Price AMD" : [sell_price_AMD],
"P/L AMD" : [profit_loss_AMD],
"Return AMD": [percent_return_AMD]}
df = pd.DataFrame(prices_AMD)
print('The return was {}%, and profit or loss was ${} '.format(np.round(df['Return AMD'].sum(), decimals=2),
np.round(df['P/L AMD'].sum(), decimals=2)))
It seems like there's a bug in the pandas data reader. I work around it by initialising with one symbol and then setting the symbols property on the instantiated object. After doing that, it works fine to call read() on tmp below.
import pandas_datareader as pdr
all_symbols = ['ibb', 'xly', 'fb', 'exx1.de']
tmp = pdr.yahoo.daily.YahooDailyReader(symbols=all_symbols[0])
# this is a work-around, pdr is broken...
tmp.symbols = all_symbols
data = tmp.read()
Im attempting to create a table as follows, where equities in a list get appended as columns to the dataframe:
Fundamentals CTRP EBAY ...... MPNGF
price
dividend
five_year_dividend
pe_ratio
pegRatio
priceToBook
price_to_sales
book_value
ebit
net_income
EPS
DebtEquity
threeYearAverageReturn
At the moment, based on the code below, only the last equity in the list is showing up:
Fundamentals MPNGF
price
dividend
five_year_dividend
pe_ratio
pegRatio
priceToBook
price_to_sales
book_value
ebit
net_income
EPS
DebtEquity
threeYearAverageReturn
from yahoofinancials import YahooFinancials
import pandas as pd
import lxml
from lxml import html
import requests
import numpy as np
from datetime import datetime
def scrape_table(url):
page = requests.get(url)
tree = html.fromstring(page.content)
table = tree.xpath('//table')
assert len(table) == 1
df = pd.read_html(lxml.etree.tostring(table[0], method='html'))[0]
df = df.set_index(0)
df = df.dropna()
df = df.transpose()
df = df.replace('-', '0')
df[df.columns[0]] = pd.to_datetime(df[df.columns[0]])
cols = list(df.columns)
cols[0] = 'Date'
df = df.set_axis(cols, axis='columns', inplace=False)
numeric_columns = list(df.columns)[1::]
df[numeric_columns] = df[numeric_columns].astype(np.float64)
return df
ecommerce = ['CTRP', 'EBAY', 'GRUB', 'BABA', 'JD', 'EXPE', 'AMZN', 'BKNG', 'MPNGF']
price=[]
dividend=[]
five_year_dividend=[]
pe_ratio=[]
pegRatio=[]
priceToBook=[]
price_to_sales=[]
book_value=[]
ebit=[]
net_income=[]
EPS=[]
DebtEquity=[]
threeYearAverageReturn=[]
for i, symbol in enumerate(ecommerce):
yahoo_financials = YahooFinancials(symbol)
balance_sheet_url = 'https://finance.yahoo.com/quote/' + symbol + '/balance-sheet?p=' + symbol
df_balance_sheet = scrape_table(balance_sheet_url)
df_balance_sheet_de = pd.DataFrame(df_balance_sheet, columns = ["Total Liabilities", "Total stockholders' equity"])
j= df_balance_sheet_de.loc[[1]]
j['DebtEquity'] = j["Total Liabilities"]/j["Total stockholders' equity"]
k= j.iloc[0]['DebtEquity']
X = yahoo_financials.get_key_statistics_data()
for d in X.values():
PEG = d['pegRatio']
PB = d['priceToBook']
three_year_ave_return = d['threeYearAverageReturn']
data = [['price', yahoo_financials.get_current_price()], ['dividend', yahoo_financials.get_dividend_yield()], ['five_year_dividend', yahoo_financials.get_five_yr_avg_div_yield()], ['pe_ratio', yahoo_financials.get_pe_ratio()], ['pegRatio', PEG], ['priceToBook', PB], ['price_to_sales', yahoo_financials.get_price_to_sales()], ['book_value', yahoo_financials.get_book_value()], ['ebit', yahoo_financials.get_ebit()], ['net_income', yahoo_financials.get_net_income()], ['EPS', yahoo_financials.get_earnings_per_share()], ['DebtEquity', mee], ['threeYearAverageReturn', three_year_ave_return]]
data.append(symbol.text)
df = pd.DataFrame(data, columns = ['Fundamentals', symbol])
df
Seeking your kind advice please as to where may i have gone wrong in the above table? Thank you so very much!
You need to call your df outside of your for loop. Your code as currently written will recreate a new df for every loop.
I am trying to loop over 2 lists to get all combinations possible in the loop below. I have some difficulties to understand why the first part works and the second does not. Basically it query the same data but with all pattern from the lists. Any help would be very appreciated.
THE CODE:
base = ['BTC', 'ETH']
quoted = ['USDT', 'AUD','USD']
def daily_volume_historical(symbol, comparison_symbol, all_data=False, limit=90, aggregate=1, exchange=''):
url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&aggregate={}'\
.format(symbol.upper(), comparison_symbol.upper(), limit, aggregate)
if exchange:
url += '&e={}'.format(exchange)
if all_data:
url += '&allData=true'
page = requests.get(url)
data = page.json()['Data']
df = pd.DataFrame(data)
df.drop(df.index[-1], inplace=True)
df['timestamp'] = [datetime.datetime.fromtimestamp(d) for d in df.time]
df.set_index('timestamp')
return df
## THIS CODE GIVES SOME DATA ##
volu = daily_volume_historical('BTC', 'USD', 'CCCAGG').set_index('timestamp').volumefrom
## THIS CODE GIVES EMPTY DATA FRAME ##
d_volu = []
for a,b in [(a,b) for a in base for b in quoted]:
volu = daily_volume_historical(a, b, exchange= 'CCCAGG').volumefrom
d_volu.append
d_volu = pd.concat(d_volu, axis=1)
volu output sample:
timestamp
2010-07-17 09:00:00 20.00
2010-07-18 09:00:00 75.01
2010-07-19 09:00:00 574.00
2010-07-20 09:00:00 262.00
2010-07-21 09:00:00 575.00
2010-07-22 09:00:00 2160.00
2010-07-23 09:00:00 2402.50
2010-07-24 09:00:00 496.32
import itertools
base = ['BTC', 'ETH']
quoted = ['USDT', 'AUD','USD']
combinations = list(itertools.product(base, quoted))
def daily_volume_historical(symbol, comparison_symbol, all_data=False, limit=90, aggregate=1, exchange=''):
url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&aggregate={}'\
.format(symbol.upper(), comparison_symbol.upper(), limit, aggregate)
if exchange:
url += '&e={}'.format(exchange)
if all_data:
url += '&allData=true'
page = requests.get(url)
data = page.json()['Data']
df = pd.DataFrame(data)
df.drop(df.index[-1], inplace=True)
df['timestamp'] = [datetime.datetime.fromtimestamp(d) for d in df.time]
df.set_index('timestamp')
return df
## THIS CODE GIVES SOME DATA ##
volu = daily_volume_historical('BTC', 'USD', 'CCCAGG').set_index('timestamp').volumefrom
## THIS CODE GIVES EMPTY DATA FRAME ##
d_volu = []
for a,b in combinations:
volu = daily_volume_historical(a, b, exchange= 'CCCAGG').volumefrom
d_volu.append
d_volu = pd.concat(d_volu, axis=1)