Connection to Yahoo Finance with requests.get() not working anymore - python

until recently I was able to connect to Yahoo Finance, which allowed me to fill my stock screener with json-data. However, since a couple of weeks, I am not able to connect to Yahoo Finance anymore. Apparently, Yahoo Finance blocked the traditional way of accessing its data and I am wondering if there is a work around to reestablish such a connection.
Here are a few technical details how I was connecting to Yahoo Finance (which worked without any problems during the last 2 years):
r = requests.get(url_root_yahoo + ticker + key_stats_yahoo + ticker)
data = json.loads(p.findall(r.text)[0])
quote_store = data['context']['dispatcher']['stores']['QuoteSummaryStore']
statistics = quote_store['defaultKeyStatistics']
profit_margin = statistics['profitMargins']['raw']
But now I am getting the following error message:
File "stock_screener_4.py", line 80, in <module>
quote_store = data['context']['dispatcher']['stores']['QuoteSummaryStore']
NameError: name 'data' is not defined
Any hints how to reestablish the connection?
Thanks a lot.
P.S. Minimum working example:
import os
import urllib.request
import requests, re, json
p = re.compile(r'root\.App\.main = (.*);')
url_root_yahoo = 'https://finance.yahoo.com/quote/'
key_stats_yahoo = '/key-statistics?p='
ticker = 'AAPL'
execute_next_block = 1
r = requests.get(url_root_yahoo + ticker + key_stats_yahoo + ticker)
try:
data = json.loads(p.findall(r.text)[0])
except (KeyError, IndexError, TypeError):
execute_next_block = 0
try:
quote_store = data['context']['dispatcher']['stores']['QuoteSummaryStore']
statistics = quote_store['defaultKeyStatistics']
except (KeyError, IndexError, TypeError):
execute_next_block = 0

Related

Web scraping code using BS4+request not refreshing

I have a problem with a code that scrapes a weather website. It's supposed to update hourly, but for some reason, the data given is not the current data on the website; it also doesn't update its data, but keeps feeding the same data continuously. Please help!!!
Also, I need help scraping the weather icon from the site.
Here is my code:
from bs4 import BeautifulSoup
from plyer import notification
import requests
import time
if __name__ == '__main__':
while True:
def notifyMe(title, message):
notification.notify(
title = title,
message = message,
#app_icon = icon,
timeout = 7
)
try:
# site = requests.get('https://weather.com/weather/today/l/5.02,7.97?par=google')
site = requests.get('https://weather.com/en-NG/weather/today/l/4dce0117809bca3e9ecdaa65fb45961a9718d6829adeb72b6a670240e10bd8c9')
# site = requests.get('http://localhost/weather.com/weather/today/l/5.02,7.97.html')
soup = BeautifulSoup(site.content, 'html.parser')
day = soup.find(class_= 'CurrentConditions--CurrentConditions--14ztG')
location = day.find(class_='CurrentConditions--location--2_osB').get_text()
timestamp = day.find(class_='CurrentConditions--timestamp--3_-CV').get_text()
tempValue = day.find(class_='CurrentConditions--tempValue--1RYJJ').get_text()
phraseValue = day.find(class_='CurrentConditions--phraseValue--17s79').get_text()
precipValue = day.find(class_='CurrentConditions--precipValue--1RgXi').get_text()
#icon = day.find(id ='svg-symbol-cloud').get_icon()
weather = timestamp + "\n" + tempValue + " " + phraseValue + "\n" + precipValue
except requests.exceptions.ConnectionError:
location = "Couldn't get a location."
weather = "Error connecting to website."
except AttributeError:
weather = timestamp + "\n" + tempValue + " " + phraseValue
# print (weather)
notifyMe( location, weather )
time.sleep(30)
Expected output:
Uyo, Akwa Ibom Weather
As of 13:28 WAT
30° Mostly Cloudy
55% chance of rain until 14:00
import requests
from bs4 import BeautifulSoup
def main(url):
r = requests.get(url)
soup = BeautifulSoup(r.text, 'lxml')
x = list(soup.select_one('.card').stripped_strings)
del x[4:8]
print(x)
main('https://weather.com/en-NG/weather/today/l/4dce0117809bca3e9ecdaa65fb45961a9718d6829adeb72b6a670240e10bd8c9')
Output:
['Uyo, Akwa Ibom Weather', 'As of 8:03 WAT', '24°', 'Cloudy', '47% chance of rain until 9:00']
It appears the error might have been from the site, because it's working now without the issues. Thank you all for the suggestions. #Ahmed American your code is beautiful. I've learnt from it. #furas I'll try to construct the SVG as you suggested.
That's the output.

Can't crawl Data from Yahoo finance

I doing crawl data from Yahoo Financail. I have searched link:
https://query1.finance.yahoo.com/v7/finance/download/BVH?period1=923729900&period2=1618039708&interval=1d&events=history&includeAdjustedClose=true.
def createLink(symbol,table):
s = "https://query1.finance.yahoo.com/v7/finance/download/BVH?period1=923729900&period2=1618039708&interval=1d&events=history&includeAdjustedClose=true"
return s.replace("BVH",symbol).replace("history",table)
def getData(symbol,table):
URL = createLink(symbol,table)
web = requests.get(URL)
if web.status_code == 200:
reader = pd.read_csv(URL)
else:
reader = pd.DataFrame({"Data":[],"Dividends":[],"Stock Splits":[]})
return reader
def history(symbol):
history_close = getData(symbol,'history')
if history_close.empty:
return history_close
divend = getData(symbol,'div')
stock = getData(symbol,'split')
x = pd.merge(divend,stock, how="outer", on="Date")
data = pd.merge(history_close,x, how="outer", on="Date")
return data
df = pd.read_excel("/content/drive/MyDrive/Colab Notebooks/symbolNYSE.xlsx")
count = 0
count_fail = 0
for i in range(0,len(df["Symbol"])):
try:
count += 1
print(df["Symbol"][i],count)
a = history(df["Symbol"][i])
if not a.empty:
a.to_excel("/content/drive/MyDrive/ColabNotebooks/GetCloseYahoo/"+df["Symbol"][i]+".xlsx")
except:
count_fail+=1
pass
print("success:", count)
print("fail:", count_fail)
I am using python, request, pandas on Jupiter to crawl it.
The errors:
Error tokenizing data. C error: Expected 2 fields in line 3, saw 12
Key error
Start, i can crawl about 100 - 200 company. Then the program will error by any Symbol company. Finally, i wait a minute i can run repeat it, the program is not error.
What is the reason? Thank you so much.

NameError: name 'all_df' is not defined

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 14 2017
Modified on Wed Aug 16 2017
Author: Yanfei Wu
Get the past 500 S&P 500 stocks data
"""
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import pandas as pd
import pandas_datareader.data as web
def get_ticker_and_sector(url='https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'):
"""
get the s&p 500 stocks from Wikipedia:
https://en.wikipedia.org/wiki/List_of_S%26P_500_companies
---
return: a dictionary with ticker names as keys and sectors as values
"""
r = requests.get(url)
data = r.text
soup = BeautifulSoup(data, 'lxml')
# we only want to parse the first table of this wikipedia page
table = soup.find('table')
sp500 = {}
# loop over the rows and get ticker symbol and sector name
for tr in table.find_all('tr')[1:]:
tds = tr.find_all('td')
ticker = tds[0].text
sector = tds[3].text
sp500[ticker] = sector
return sp500
def get_stock_data(ticker, start_date, end_date):
""" get stock data from google with stock ticker, start and end dates """
data = web.DataReader(ticker, 'google', start_date, end_date)
return data
if __name__ == '__main__':
""" get the stock data from the past 5 years """
# end_date = datetime.now()
end_date = datetime(2017, 8, 14)
start_date = datetime(end_date.year - 5, end_date.month , end_date.day)
sp500 = get_ticker_and_sector()
sp500['SPY'] = 'SPY' # also include SPY as reference
print('Total number of tickers (including SPY): {}'.format(len(sp500)))
bad_tickers =[]
for i, (ticker, sector) in enumerate(sp500.items()):
try:
stock_df = get_stock_data(ticker, start_date, end_date)
stock_df['Name'] = ticker
stock_df['Sector'] = sector
if stock_df.shape[0] == 0:
bad_tickers.append(ticker)
#output_name = ticker + '_data.csv'
#stock_df.to_csv(output_name)
if i == 0:
all_df = stock_df
else:
all_df = all_df.append(stock_df)
except:
bad_tickers.append(ticker)
print(bad_tickers)
all_df.to_csv('./data/all_sp500_data_2.csv')
""" Write failed queries to a text file """
if len(bad_tickers) > 0:
with open('./data/failed_queries_2.txt','w') as outfile:
for ticker in bad_tickers:
outfile.write(ticker+'\n')
Your problem is in your try/except block. It is good style to always catch a specific exception, not just blindly throw except statements after a long block of code. The problem with this approach, as demonstrated in your problem, is that if you have an unrelated or unexpected error, you won't know about it. In this case, this is the exception I get from running your code:
NotImplementedError: data_source='google' is not implemented
I'm not sure what that means, but it looks like the pandas_datareader.data.DataReader docs have good information about how to use that DataReader correctly.

Python How to retrieve a stock's last current stock price from the dictionary and put it into a variable?

I am trying to obtain a stock's current price, and then put it into a variable to run if / else statements on. I have used the Google API to retrieve current stock prices, but I am unable to figure out how to put it into a variable. Thanks!
import json
import sys
try:
from urllib.request import Request, urlopen
except ImportError: #python 2
from urllib2 import Request, urlopen
googleFinanceKeyToFullName = {
u'id' : u'ID',
u't' : u'StockSymbol',
u'e' : u'Index',
u'l' : u'LastTradePrice',
u'l_cur' : u'LastTradeWithCurrency',
u'ltt' : u'LastTradeTime',
u'lt_dts' : u'LastTradeDateTime',
u'lt' : u'LastTradeDateTimeLong',
u'div' : u'Dividend',
u'yld' : u'Yield'
}
def buildUrl(symbols):
symbol_list = ','.join([symbol for symbol in symbols])
#a deprecated but still active & correct api
return 'http://finance.google.com/finance/info?client=ig&q=' \
+ symbol_list
def request(symbols):
url = buildUrl(symbols)
req = Request(url)
resp = urlopen(req)
#remove special symbols such as the pound symbol
content = resp.read().decode('ascii', 'ignore').strip()
content = content[3:]
return content
def replaceKeys(quotes):
global googleFinanceKeyToFullName
quotesWithReadableKey = []
for q in quotes:
qReadableKey = {}
for k in googleFinanceKeyToFullName:
if k in q:
qReadableKey[googleFinanceKeyToFullName[k]] = q[k]
quotesWithReadableKey.append(qReadableKey)
return quotesWithReadableKey
def getQuotes(symbols):
if type(symbols) == type('str'):
symbols = [symbols]
content = json.loads(request(symbols))
return replaceKeys(content);
if __name__ == '__main__':
try:
symbols = sys.argv[1]
except:
symbols = "GOOG,AAPL,MSFT,AMZN,SBUX"
symbols = symbols.split(',')
try:
print(json.dumps(getQuotes(symbols), indent=2))
except:
print("Fail")
You can get the last current stock price from the dictionary and put it into a variable, say price,
by changing the last part of the code to
try:
quotes = getQuotes(symbols)
price = quotes[-1]['LastTradePrice'] # -1 means last in a list
print(price)
except Exception as e:
print(e)
but it is very unreliable because if the order of prices is changed, you will get a price for a different stock.
What you should do is to learn how to define a data structure that's suitable ro solve your problem.

How do I catch a 404 error in urllib? (python 3)

I've been reading tens of examples for similar issues, but I can't get any of the solutions I've seen or their variants to run. I'm screen scraping, and I just want to ignore 404 errors (skip the pages). I get
'AttributeError: 'module' object has no attribute 'HTTPError'.
I've tried 'URLError' as well. I've seen the near identical syntax accepted as working answers. Any ideas? Here's what I've got:
import urllib
import datetime
from bs4 import BeautifulSoup
class EarningsAnnouncement:
def __init__(self, Company, Ticker, EPSEst, AnnouncementDate, AnnouncementTime):
self.Company = Company
self.Ticker = Ticker
self.EPSEst = EPSEst
self.AnnouncementDate = AnnouncementDate
self.AnnouncementTime = AnnouncementTime
webBaseStr = 'http://biz.yahoo.com/research/earncal/'
earningsAnnouncements = []
dayVar = datetime.date.today()
for dte in range(1, 30):
currDay = str(dayVar.day)
currMonth = str(dayVar.month)
currYear = str(dayVar.year)
if (len(currDay)==1): currDay = '0' + currDay
if (len(currMonth)==1): currMonth = '0' + currMonth
dateStr = currYear + currMonth + currDay
webString = webBaseStr + dateStr + '.html'
try:
#with urllib.request.urlopen(webString) as url: page = url.read()
page = urllib.request.urlopen(webString).read()
soup = BeautifulSoup(page)
tbls = soup.findAll('table')
tbl6= tbls[6]
rows = tbl6.findAll('tr')
rows = rows[2:len(rows)-1]
for earn in rows:
earningsAnnouncements.append(EarningsAnnouncement(earn.contents[0], earn.contents[1],
earn.contents[3], dateStr, earn.contents[3]))
except urllib.HTTPError as err:
if err.code == 404:
continue
else:
raise
dayVar += datetime.timedelta(days=1)
It looks like for urllib (not urllib2) that the exception is urllib.error.HTTPError, not urllib.HTTPError. See the documentation for more information.
Do this :
import urllib.error# import
except urllib.error.URLError as e:# use 'urllib.error.URLError' and not 'urllib.HTTPError'
print ('Error code: ', e.code)# or what ever u want
return e.code

Categories

Resources