How To: Python Pandas get current stock data - python
I've used:
data = DataReader("yhoo", "yahoo", datetime.datetime(2000, 1, 1),
datetime.datetime.today())
in pandas (python) to get history data of yahoo, but it cannot show today's price (the market has not yet closed) how can I resolve such problem, thanks in advance.
import pandas
import pandas.io.data
import datetime
import urllib2
import csv
YAHOO_TODAY="http://download.finance.yahoo.com/d/quotes.csv?s=%s&f=sd1ohgl1vl1"
def get_quote_today(symbol):
response = urllib2.urlopen(YAHOO_TODAY % symbol)
reader = csv.reader(response, delimiter=",", quotechar='"')
for row in reader:
if row[0] == symbol:
return row
## main ##
symbol = "TSLA"
history = pandas.io.data.DataReader(symbol, "yahoo", start="2014/1/1")
print history.tail(2)
today = datetime.date.today()
df = pandas.DataFrame(index=pandas.DatetimeIndex(start=today, end=today, freq="D"),
columns=["Open", "High", "Low", "Close", "Volume", "Adj Close"],
dtype=float)
row = get_quote_today(symbol)
df.ix[0] = map(float, row[2:])
history = history.append(df)
print "today is %s" % today
print history.tail(2)
just to complete perigee's answer, it cost me quite some time to find a way to append the data.
Open High Low Close Volume Adj Close
Date
2014-02-04 180.7 181.60 176.20 178.73 4686300 178.73
2014-02-05 178.3 180.59 169.36 174.42 7268000 174.42
today is 2014-02-06
Open High Low Close Volume Adj Close
2014-02-05 178.30 180.59 169.36 174.420 7268000 174.420
2014-02-06 176.36 180.11 176.00 178.793 5199297 178.793
Find a way to work around, just use urllib to fetch the data with:
http://download.finance.yahoo.com/d/quotes.csv?s=yhoo&f=sd1ohgl1l1v
then add it to dataframe
This code uses the pandas read_csv method to get the new quote from yahoo, and it checks if the new quote is an update from the current date or a new date in order to update the last record in history or append a new record.
If you add a while(true) loop and a sleep around the new_quote section, you can have the code refresh the quote during the day.
It also has duplicate last trade price to fill in the Close and the Adjusted Close, given that intraday close and adj close are always the same value.
import pandas as pd
import pandas.io.data as web
def get_quote_today(symbol):
url="http://download.finance.yahoo.com/d/quotes.csv?s=%s&f=d1t1ohgl1vl1"
new_quote= pd.read_csv(url%symbol,
names=[u'Date',u'time',u'Open', u'High', u'Low',
u'Close', u'Volume', u'Adj Close'])
# generate timestamp:
stamp = pd.to_datetime(new_quote.Date+" "+new_quote.time)
new_quote.index= stamp
return new_quote.iloc[:, 2:]
if __name__ == "__main__":
symbol = "TSLA"
history = web.DataReader(symbol, "yahoo", start="2014/1/1")
print history.tail()
new_quote = get_quote_today(symbol)
if new_quote.index > history.index[-1]:
if new_quote.index[-1].date() == history.index[-1].date():
# if both quotes are for the first date, update history's last record.
history.iloc[-1]= new_quote.iloc[-1]
else:
history=history.append(new_quote)
history.tail()
So from trying this out and looking at the dataframe, it doesn't look too possible. You tell it to go from a specific day until today, yet the dataframe stops at may 31st 2013. This tells me that yahoo probably has not made it available for you to use in the past couple days or somehow pandas is just not picking it up. It is not just missing 1 day, it is missing 3.
If I do the following:
>>> df = DataReader("yhoo", "yahoo", datetime.datetime(2013, 6, 1),datetime.datetime.today())
>>> len(df)
0
it shows me that there simply is no data to pick up in those days so far. If there is some way around this then I cannot figure it out, but it just seems that the data is not available for you yet, which is hard to believe.
The module from pandas doesn't work anymore, because the google and yahoo doens't provide support anymore. So you can create a function to take the data direct from the Google Finance using the url. Here is a part of a code to do this
import csv
import datetime
import re
import codecs
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
You can wrote a function to get data from Google Finance using the url, you have to indent the parte bellow.
#You have to indent this part
def get_google_finance_intraday(ticker, period=60, days=1, exchange='NASD'):
"""
Retrieve intraday stock data from Google Finance.
Parameters
----------------
ticker : str
Company ticker symbol.
period : int
Interval between stock values in seconds.
i = 60 corresponds to one minute tick data
i = 86400 corresponds to daily data
days : int
Number of days of data to retrieve.
exchange : str
Exchange from which the quotes should be fetched
Returns
---------------
df : pandas.DataFrame
DataFrame containing the opening price, high price, low price,
closing price, and volume. The index contains the times associated with
the retrieved price values.
"""
# build url
url = 'https://finance.google.com/finance/getprices?p={days}d&f=d,o,h,l,c,v&q={ticker}&i={period}&x={exchange}'.format(ticker=ticker, period=period, days=days, exchange=exchange)
page = requests.get(url)
reader = csv.reader(codecs.iterdecode(page.content.splitlines(), "utf-8"))
columns = ['Open', 'High', 'Low', 'Close', 'Volume']
rows = []
times = []
for row in reader:
if re.match('^[a\d]', row[0]):
if row[0].startswith('a'):
start = datetime.datetime.fromtimestamp(int(row[0][1:]))
times.append(start)
else:
times.append(start+datetime.timedelta(seconds=period*int(row[0])))
rows.append(map(float, row[1:]))
if len(rows):
return pd.DataFrame(rows, index=pd.DatetimeIndex(times, name='Date'), columns=columns)
else:
return pd.DataFrame(rows, index=pd.DatetimeIndex(times, name='Date'))
Now you can just call the function with the ticket that you want, in my case AAPL and the result is a pandas DataFrame containing the opening price, high price, low price, closing price, and volume.
ticker = 'AAPL'
period = 60
days = 1
exchange = 'NASD'
df = get_google_finance_intraday(ticker, period=period, days=days)
df
The simplest way to extract Indian stock price data into Python is to use the nsepy library.
In case you do not have the nsepy library do the following:
pip install nsepy
The following code allows you to extract HDFC stock price for 10 years.
from nsepy import get_history
from datetime import date
dfc=get_history(symbol="HDFCBANK",start=date(2015,5,12),end=date(2020,5,18))
This is so far the easiest code I have found.
Related
Alpha Vantage: Iterate through list of stocks to get technical indicators
I have a list of 5 stocks for which I would like to obtain data using Alpha Vantage's TechIndicators. Below are what I have imported and defined: from alpha_vantage.timeseries import TimeSeries from alpha_vantage.foreignexchange import ForeignExchange from alpha_vantage.cryptocurrencies import CryptoCurrencies from alpha_vantage.techindicators import TechIndicators from alpha_vantage.sectorperformance import SectorPerformances import datetime import numpy as np import pandas as pd top5 = ['CELH', 'MAXR', 'CD', 'WDC', 'IMAB'] Querying for technical indicators returns a dataframe (data) and a dictionary (meta_data) data, meta_data = ti.get_sma(symbol=ticker, interval='weekly') How do I run the query for my top5 list? I first thought it should look like this: for ticker in top5: ticker_sma[ticker], ticker_meta_sma[ticker] = ti.get_sma(symbol=ticker, interval='weekly') or like this: sma = {} for ticker in top5gainers: gainers_sma[ticker], gainers_sma_data[ticker] = ti.get_sma(symbol=ticker, interval='weekly') gainers_sma.keys() gainers_sma.values() But I have had zero luck with that approach. I get a name error... NameError: name 'ticker_sma' is not defined ...which is odd to me because I have success getting data for an individual stock, e.g., Microsoft, below: msft_sma, msft_meta_sma = ti.get_sma(symbol='msft', interval='weekly') If I remove [ticker], and just run this get request... for ticker in top5: data_sma, meta_sma = ti.get_sma(symbol=ticker, interval='weekly') ...then according to the meta data in meta_sma, I've only obtained data for the 4th ticker out of the 5 tickers: {'1: Symbol': 'IMAB', '2: Indicator': 'Simple Moving Average (SMA)', '3: Last Refreshed': '2020-12-31', '4: Interval': 'weekly', '5: Time Period': 20, '6: Series Type': 'close', '7: Time Zone': 'US/Eastern'} Thank you for reading this question and for your answer!
You are trying to dynamically create variables, which isn't exactly straightforward. The following shows you how to do that, but you may also want to consider just reading each symbol into its own dataframe and either concatenating them together or saving individually. That really just depends on what you are doing with the data (using live or saving for later). This will create each variable in the loop and assign text to it, just so you can what's going on. top5 = ['CELH', 'MAXR', 'CD', 'WDC', 'IMAB'] for ticker in top5: globals()['{}_sma'.format(ticker)] = ticker + 'sma_value' globals()['{}_meta_sma'.format(ticker)] = ticker + 'sma_meta_value' Then print a few for proof: In [6]: print(CELH_sma) ...: print(WDC_sma) ...: print(IMAB_meta_sma) CELHsma_value WDCsma_value IMABsma_meta_value I can't test but what I think will work for you is this: for ticker in top5: globals()['{}_sma'.format(ticker)], globals()['{}_meta_sma'.format(ticker)] = ti.get_sma(symbol=ticker, interval='weekly')
How to start a for loop for this given DataFrame in Pandas for multiple same name rows?
I need some help, I am working on a .ipynb file to filter data and get certain things from that Dataframe. This is DataFrame I'm working with. From this dataframe, as you can see there are multiple rows of the same SYMBOL. I need help to open a "for" loop which will get me the highest CHG_IN_OI for every symbol, take the row of that highest CHG_IN_OI for that row. For example if there are 14 rows of ACC as a symbol, I need to find highest CHG_IN_OI for ACC from the CHG_IN_OI column and get that row of the highest change and Retain the remaining columns as well!. I have made a list named, Multisymbols which has these symbols: multisymbols = [ 'ACC', 'ADANIENT', 'ADANIPORTS', 'AMARAJABAT', 'AMBUJACEM', 'APOLLOHOSP', 'APOLLOTYRE', 'ASHOKLEY', 'ASIANPAINT', 'AUROPHARMA', 'AXISBANK', 'BAJAJ-AUTO', 'BAJAJFINSV', 'BAJFINANCE', 'BALKRISIND', 'BANDHANBNK', 'BANKBARODA', 'BATAINDIA', 'BEL', 'BERGEPAINT', 'BHARATFORG', 'BHARTIARTL', 'BHEL', 'BIOCON', 'BOSCHLTD', 'BPCL', 'BRITANNIA', 'CADILAHC', 'CANBK', 'CENTURYTEX', 'CHOLAFIN', 'CIPLA', 'COALINDIA', 'COLPAL', 'CONCOR', 'CUMMINSIND', 'DABUR', 'DIVISLAB', 'DLF', 'DRREDDY', 'EICHERMOT', 'EQUITAS', 'ESCORTS', 'EXIDEIND', 'FEDERALBNK', 'GAIL', 'GLENMARK', 'GMRINFRA', 'GODREJCP', 'GODREJPROP', 'GRASIM', 'HAVELLS', 'HCLTECH', 'HDFC', 'HDFCBANK', 'HDFCLIFE', 'HEROMOTOCO', 'HINDALCO', 'HINDPETRO', 'HINDUNILVR', 'IBULHSGFIN', 'ICICIBANK', 'ICICIPRULI', 'IDEA', 'IDFCFIRSTB', 'IGL', 'INDIGO', 'INDUSINDBK', 'INFRATEL', 'INFY', 'IOC', 'ITC', 'JINDALSTEL', 'JSWSTEEL', 'JUBLFOOD', 'KOTAKBANK', 'L&TFH', 'LICHSGFIN', 'LT', 'LUPIN', 'M&M', 'M&MFIN', 'MANAPPURAM', 'MARICO', 'MARUTI', 'MCDOWELL-N', 'MFSL', 'MGL', 'MINDTREE', 'MOTHERSUMI', 'MRF', 'MUTHOOTFIN', 'NATIONALUM', 'NAUKRI', 'NESTLEIND', 'NIITTECH', 'NMDC', 'NTPC', 'ONGC', 'PAGEIND', 'PEL', 'PETRONET', 'PFC', 'PIDILITIND', 'PNB', 'POWERGRID', 'PVR', 'RAMCOCEM', 'RBLBANK', 'RECLTD', 'RELIANCE', 'SAIL', 'SBILIFE', 'SBIN', 'SHREECEM', 'SEIMENS', 'SRF', 'SRTRANSFIN', 'SUNPHARMA', 'SUNTV', 'TATACHEM', 'TATACONSUM', 'TATAMOTORS', 'TATAPOWER', 'TATASTEEL', 'TCS', 'TECHM', 'TITAN', 'TORNTPHARM', 'TORNTPOWER', 'TVSMOTOR', 'UBL', 'UJJIVAN', 'ULTRACEMCO', 'UPL', 'VEDL', 'VOLTAS', 'WIPRO', 'ZEEL' ] df = df[df['SYMBOL'].isin(multisymbols)] df These are all the shares in the NSE. Hope you can understand and help me out. I used .groupby(),it successfully gave me the highest CHG_IN_OI and .agg() to retain the remaining columns but the data was not correct. I just simply want the row for every symbols "HIGHEST" CHG_IN_OI. Thanks in Advance!
Although different from the data presented in the question, we have answered the same financial data using equity data as an example. import pandas as pd import pandas_datareader.data as web import datetime with open('./alpha_vantage_api_key.txt') as f: api_key = f.read() start = datetime.datetime(2019, 1, 1) end = datetime.datetime(2020, 8,1) df_all = pd.DataFrame() symbol = ['AAPL','TSLA'] for i in symbol: df = web.DataReader(i, 'av-daily', start, end, api_key=api_key) df['symbol'] = i df_all = pd.concat([df_all, df], axis=0) df.index = pd.to_datetime(df.index) Aggregating a single column df_all.groupby('symbol')['volume'].agg('max').reset_index() symbol volume 0 AAPL 106721200 1 TSLA 60938758 Multi-Column Aggregation df_all.groupby('symbol')[['high','volume']].agg(high=('high','max'), volume=('volume','max')) high volume symbol AAPL 425.66 106721200 TSLA 1794.99 60938758 Extract the target line symbol_max = df_all.groupby('symbol').apply(lambda x: x.loc[x['volume'].idxmax()]).reset_index(drop=True) symbol_max open high low close volume symbol 0 257.26 278.4100 256.37 273.36 106721200 AAPL 1 882.96 968.9899 833.88 887.06 60938758 TSLA
How to View Several Excel Rows in Python
So, I am trying to create a Python program that reads a password protected excel file. The program is intended to report any names expiring between 90 and 105 days. The problem I am running into right now is getting the program to read multiple rows. I've been using import xlrd. I was hoping that 'counter' would change the row being read, but only the first row is being read. Edit: Solved. I was able to use the code below to get my program to display entries that are expiring within my time field. import pandas as pd from datetime import date, timedelta today = date.today() ninety_Days = (date.today()+timedelta(days=90)) hundred_Days = (date.today()+timedelta(days=105)) hundred_Days = '%s-%s-%s' % (hundred_Days.month, hundred_Days.day, hundred_Days.year) ninety_Days = '%s-%s-%s' % (ninety_Days.month, ninety_Days.day, ninety_Days.year) wkbk = pd.read_excel('Practice Inventory.xlsx', 'Sheet1') mask = (wkbk['Expiration'] >= ninety_Days) & (wkbk['Expiration'] <= hundred_Days) wkbk = wkbk.loc[mask] print(wkbk)
Use Pandas! import pandas as pd df = pd.read_excel('Practice Inventory.xlsx') new_df = df[df['days to expiration'] >= 90] final_df = pd.concat([df[df['days to expiration'] <= 120], new_df] The final_df will hold all the rows with days of expiration greater than 90 and less than 120.
Continuing during an exception in a try/except statement
I have read numerous StackOverflow threads about looping during try/except statements, using else and finally, if/else statements, and while statements, but none of them address what I want. That or I don't know how to utilise that information to get what I want done. Basically, I am trying to get adjusted closing stock prices for various companies on a given date. I pasted some dummy data in the code block below to demonstrate (NOTE: you'll have to install pandas and pandas_datareader to get the dummy code to run). The get_stock_adj_close function returns the adj_close price given a ticker and date. The dummy_dataframe contains 4 companies with their tickers and random dates. And the add_days function takes a date and adds any number of days. I would like to append the adjusted close stock prices for each company in the dataframe on the listed date into the stock_prices list. Because the yahoo stock price database isn't that reliable for older entries and because some dates fall on days when the market is closed, whenever a price isn't available it raises a KeyError: 'Date'. Thus, what I would like to do is keep adding days indefinitely until it finds a date where a price does exist. The problem is it only adds the day once and then raises the same KeyError. I want it to keep adding days until it finds a day where the database has a stock price available and then return back to the dataframe and keep going with the next row. Right now the whole thing breaks on the first GM date (fourth row), which raises the KeyError and the fifth row/second GM date is ignored. Any help is appreciated! Dummy data: from datetime import datetime, date, timedelta import pandas as pd import pandas_datareader as pdr from dateutil.relativedelta import relativedelta def add_days(d, num_days): return d + timedelta(days=num_days) def get_stock_adj_close(ticker, chosen_date): stock_df = pdr.get_data_yahoo(ticker, start = chosen_date, end = chosen_date) return stock_df.iloc[0]['Adj Close'] d = {'TICKER': ['AMD','AMD','CHTR','GM'], 'DATE': [datetime(2020,2,4), datetime(2019,2,8),datetime(2019,1,31), datetime(2010,4,7)]} dummy_dataframe = pd.DataFrame(data=d) stock_prices = [] for i, row in dummy_dataframe.iterrows(): given_date = row['DATE'] try: stock_price = get_stock_adj_close(row['TICKER'], given_date) print(stock_price) stock_prices.append(stock_price) except KeyError: given_date = add_days(given_date,1) stock_price = get_stock_adj_close(row['TICKER'], given_date) stock_prices.append(stock_price) print(stock_prices)
I think while loop will help you. For example: for i, row in dummy_dataframe.iterrows(): given_date = row['DATE'] stock_price_found = False while not stock_price_found: try: stock_price = get_stock_adj_close(row['TICKER'], given_date) print(stock_price) stock_prices.append(stock_price) stock_price_found = False except KeyError: given_date = add_days(given_date,1) Or you can also use while True together with break: for i, row in dummy_dataframe.iterrows(): given_date = row['DATE'] while True: try: stock_price = get_stock_adj_close(row['TICKER'], given_date) print(stock_price) stock_prices.append(stock_price) break except KeyError: given_date = add_days(given_date,1) Don't forget to make sure that you are not stuck in indefinite loop, would be also helpful some other exit conditions from while loop, for example, after 10 failures.
Grabbing per minute stock data from a large time range Python
So I'm trying to grab per minute stock data over a one year time gap and I know the Google Finance API doesn't work anymore so I did some digging around I found some code from a old github thread that could find the range within 5 days from yahoo finance data; however, it does not do anymore than that even when I put a keyword like '1Y' which defaults to 1 day. Here is the code below: import requests import pandas as pd import arrow import datetime import os def get_quote_data(symbol='AAPL', data_range='5d', data_interval='1m'): res = requests.get('https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?range={data_range}&interval={data_interval}'.format(**locals())) data = res.json() body = data['chart']['result'][0] dt = datetime.datetime dt = pd.Series(map(lambda x: arrow.get(x).datetime.replace(tzinfo=None), body['timestamp']), name='Datetime') df = pd.DataFrame(body['indicators']['quote'][0], index=dt) dg = pd.DataFrame(body['timestamp']) df = df.loc[:, ('open', 'high', 'low', 'close', 'volume')] df.dropna(inplace=True) #removing NaN rows df.columns = ['OPEN', 'HIGH','LOW','CLOSE','VOLUME'] #Renaming columns in pandas return df
body['meta']['validRanges'] tells you: ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max'] You are requesting 1Y instead of 1y. This difference is important. By the way you can load the timestamps much more easily like this: pd.to_datetime(body['timestamp'], unit='s')
print('stock ticker: {0}'.format(get_quote_data(symbol='AAPL', data_range='1d', data_interval='1m'))) works