Python KeyError : 0, can you help me find the error? - python

I have a keyError 0 in my python code.
I dont really understand what in means in my case I read a lot about it but I cant find my error on my own
can somebody help me find it and maybe explain it to me ?
regards,
# use a function to pull all info from website
def getdata(stock):
# company quote group of items
company_quote = requests.get(f"https://financialmodelingprep.com/api/v3/quote/{stock}")
company_quote = company_quote.json()
share_price = float("{0:.2f}".format(company_quote[0]['price']))
# balance sheet
BS = requests.get(f"https://financialmodelingprep.com/api/v3/financials/balance-sheet-statement/{stock}?period=quarter")
BS = BS.json()
# total debt
debt = float("{0:.2f}".format(float(BS['financials'][0]['Total debt'])/10**9))
# total cash
cash = float("{0:.2f}".format(float(BS['financials'][0]['Cash and short-term investments'])/10**9))
# income statement group of item
IS = requests.get(f"https://financialmodelingprep.com/api/v3/financials/income-statement/{stock}?period=quarter")
IS = IS.json()
# most recent quarterly revenue
qRev = float("{0:.2f}".format(float(IS['financials'][0]['Revenue'])/10**9))
# company profile group of items
company_info = requests.get(f"https://financialmodelingprep.com/api/v3/company/profile/{stock}")
company_info = company_info.json()
# CEO
ceo = company_info['profile']['ceo']
return (share_price, cash, debt, qRev, ceo)
tickers = ('AAPL', 'MSFT', 'GOOG', 'MVIS')
data = map(getdata, tickers)
# create the dataframe with pandas to store all of the info
df = pd.DataFrame(data, columns = ['Total Cash', 'Total Debt', 'Q3 2019 Revenue', 'CEO'], index = tickers)
print(df)
# writing to excel
writer = pd.ExcelWriter('example.xlsx')
df.to_excel(writer, 'Statistics')
writer.save()

I just executed the code you pasted and seems the issue is you are not using correctly the API, seems it is missing an API KEY, from your code I get this:
{'Error Message': 'Invalid API KEY. Please retry or visit our documentation to create one FREE https://financialmodelingprep.com/developer/docs'}
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 5, in getdata
KeyError: 0
So, take a look to the API and send the correct values (Probably it is missing a header or so on)

Related

BeautifulSoup4 and Requests Module 'IndexError: list index out of range'

I'm new to web scraping with python and am having a problem with the weather web scraping script I wrote. Here is the whole code 'weather.py':
#! python3
import bs4, requests
weatherSite = requests.get('https://weather.com/en-CA/weather/today/l/eef019cb4dca2160f08eb9714e30f28e05e624bbae351ccb6a855dbc7f14f017')
weatherSoup = bs4.BeautifulSoup(weatherSite.text, 'html.parser')
weatherLoc = weatherSoup.select('.CurrentConditions--location--kyTeL')
weatherTime = weatherSoup.select('.CurrentConditions--timestamp--23dfw')
weatherTemp = weatherSoup.select('.CurrentConditions--tempValue--3a50n')
weatherCondition = weatherSoup.select('.CurrentConditions--phraseValue--2Z18W')
weatherDet = weatherSoup.select('.CurrentConditions--precipValue--3nxCj > span:nth-child(1)')
location = weatherLoc[0].text
time = weatherTime[0].text
temp = weatherTemp[0].text
condition = weatherCondition[0].text
det = weatherDet[0].text
print(location)
print(time)
print(temp + 'C')
print(condition)
print(det)
It basically parses the weather information from 'The Weather Channel' and prints it out. This code was working fine yesterday when I wrote it. But, I tried today and it is giving me the following error:
Traceback (most recent call last):
File "C:\Users\username\filesAndStuff\weather.py", line 16, in <module>
location = weatherLoc[0].text
IndexError: list index out of range
Replace:
weatherLoc = weatherSoup.select('.CurrentConditions--location--kyTeL')
# print(weatherLoc)
# []
By:
weatherLoc = weatherSoup.select('h1[class*="CurrentConditions--location--"]')
# print(weatherLoc)
# [<h1 class="CurrentConditions--location--2_osB">Hamilton, Ontario Weather</h1>]
As you can see, your suffix kYTeL is not the same for me 2_osB. You need a partial match on class attribute (class*=) (note the *)

Scraping Google News with pygooglenews

I am trying to do scraping from Google News with pygooglenews.
I am trying to scrape more than 100 articles at a time (as google sets limit at 100) by changing the target dates using for loop. The below is what I have so far but I keep getting error message
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-84-4ada7169ebe7> in <module>
----> 1 df = pd.DataFrame(get_news('Banana'))
2 writer = pd.ExcelWriter('My Result.xlsx', engine='xlsxwriter')
3 df.to_excel(writer, sheet_name='Results', index=False)
4 writer.save()
<ipython-input-79-c5266f97934d> in get_titles(search)
9
10 for date in date_list[:-1]:
---> 11 search = gn.search(search, from_=date, to_=date_list[date_list.index(date)])
12 newsitem = search['entries']
13
~\AppData\Roaming\Python\Python37\site-packages\pygooglenews\__init__.py in search(self, query, helper, when, from_, to_, proxies, scraping_bee)
140 if from_ and not when:
141 from_ = self.__from_to_helper(validate=from_)
--> 142 query += ' after:' + from_
143
144 if to_ and not when:
TypeError: unsupported operand type(s) for +=: 'dict' and 'str'
import pandas as pd
from pygooglenews import GoogleNews
import datetime
gn = GoogleNews()
def get_news(search):
stories = []
start_date = datetime.date(2021,3,1)
end_date = datetime.date(2021,3,5)
delta = datetime.timedelta(days=1)
date_list = pd.date_range(start_date, end_date).tolist()
for date in date_list[:-1]:
search = gn.search(search, from_=date.strftime('%Y-%m-%d'), to_=(date+delta).strftime('%Y-%m-%d'))
newsitem = search['entries']
for item in newsitem:
story = {
'title':item.title,
'link':item.link,
'published':item.published
}
stories.append(story)
return stories
df = pd.DataFrame(get_news('Banana'))
Thank you in advance.
It looks like you are correctly passing in a string into get_news() which is then passed on as the first argument (search) into gn.search().
However, you're reassigning search to the result of gn.search() in the line:
search = gn.search(search, from_=date.strftime('%Y-%m-%d'), to_=(date+delta).strftime('%Y-%m-%d'))
# ^^^^^^
# gets overwritten with the result of gn.search()
In the next iteration this reassigned search is passed into gn.search() which it doesn't like.
If you look at the code in pygooglenews, it looks like gn.search() is returning a dict which would explain the error.
To fix this, simply use a different variable, e.g.:
result = gn.search(search, from_=date.strftime('%Y-%m-%d'), to_=(date+delta).strftime('%Y-%m-%d'))
newsitem = result['entries']
I know that pygooglenews has a limit of 100 articles, so you must to make a loop in which it will scrape every day separately.

My script doesn't scrape all of Yelps restaurants

My script stops scraping after 449th Yelp restaurant.
Entire Code: https://pastebin.com/5U3irKZp
for idx, item in enumerate(yelp_containers, 1):
print("--- Restaurant number #", idx)
restaurant_title = item.h3.get_text(strip=True)
restaurant_title = re.sub(r'^[\d.\s]+', '', restaurant_title)
restaurant_address = item.select_one('[class*="secondaryAttributes"]').get_text(separator='|', strip=True).split('|')[1]
The error I am getting is:
Traceback (most recent call last):
File "/Users/kenny/MEGA/Python/yelp scraper.py", line 41, in
restaurant_address = item.select_one('[class*="secondaryAttributes"]').get_text(separator='|', strip=True).split('|')[1]
IndexError: list index out of range
The problem is that some restaurants are missing the address, for example this one:
What you should do is check first, if the address has enough elements before indexing it. Change this line of code:
restaurant_address = item.select_one('[class*="secondaryAttributes"]').get_text(separator='|', strip=True).split('|')[1]
to these:
restaurant_address = item.select_one('[class*="secondaryAttributes"]').get_text(separator='|', strip=True).split('|')
restaurant_address = restaurant_address[1] if len(restaurant_address) > 1 else restaurant_address[0]
I ran your parser for all pages and it worked.

Not iterating through whole dictionary

So basically, I have an api from which i have several dictionaries/arrays. (http://dev.c0l.in:5984/income_statements/_all_docs)
When getting the financial information for each company from the api (e.g. sector = technology and statement = income) python is supposed to return 614 technology companies, however i get this error:
Traceback (most recent call last):
File "C:\Users\samuel\Desktop\Python Project\Mastercopy.py", line 83, in <module>
user_input1()
File "C:\Users\samuel\Desktop\Python Project\Mastercopy.py", line 75, in user_input1
income_statement_fn()
File "C:\Users\samuel\Desktop\Python Project\Mastercopy.py", line 51, in income_statement_fn
if is_response ['sector'] == user_input3:
KeyError: 'sector'
on a random company (usually on one of the 550-600th ones)
Here is the function for income statements
def income_statement_fn():
user_input3 = raw_input("Which sector would you like to iterate through in Income Statement?: ")
print 'Starting...'
for item in income_response['rows']:
is_url = "http://dev.c0l.in:5984/income_statements/" + item['id']
is_request = urllib2.urlopen(is_url).read()
is_response = json.loads(is_request)
if is_response ['sector'] == user_input3:
csv.writerow([
is_response['company']['name'],
is_response['company']['sales'],
is_response['company']['opening_stock'],
is_response['company']['purchases'],
is_response['company']['closing_stock'],
is_response['company']['expenses'],
is_response['company']['interest_payable'],
is_response['company']['interest_receivable']])
print 'loading...'
print 'done!'
print end - start
Any idea what could be causing this error?
(I don't believe that it is the api itself)
Cheers
Well, on testing the url you pass in the urlopen call, with a random number, I got this:
{"error":"not_found","reason":"missing"}
In that case, your function will return exactly the error you get. If you want your program to handle the error nicely and add a "missing" line instead of actual data, you could do that for instance:
def income_statement_fn():
user_input3 = raw_input("Which sector would you like to iterate through in Income Statement?: ")
print 'Starting...'
for item in income_response['rows']:
is_url = "http://dev.c0l.in:5984/income_statements/" + item['id']
is_request = urllib2.urlopen(is_url).read()
is_response = json.loads(is_request)
if is_response.get('sector', False) == user_input3:
csv.writerow([
is_response['company']['name'],
is_response['company']['sales'],
is_response['company']['opening_stock'],
is_response['company']['purchases'],
is_response['company']['closing_stock'],
is_response['company']['expenses'],
is_response['company']['interest_payable'],
is_response['company']['interest_receivable']])
print 'loading...'
else:
csv.writerow(['missing data'])
print 'done!'
print end - start
The problem seems to be with the final row of your income_response data
{"id":"_design/auth","key":"_design/auth","value":{"rev":"1-3d8f282ec7c26779194caf1d62114dc7"}}
This does not have a sector value. You need to alter your code to handle this line, for example by ignoring any line where the sector key is not present.
You could easily have debugged this with a few print statements - for example insert
print item['id'], is_response.get('sector', None)
into your code before the part that outputs the CSV.
A KeyError means that the key you tried to use does not exist in the dictionary. When checking for a key, it is much safer to use .get(). So you would replace this line:
if is_response['sector'] == user_input3:
With this:
if is_response.get('sector') == user_input3:

Python reading date from excel throws error

I am trying to read date from excel file using xlrd module. Below is my code for this :
# Variables
myfile = '/home/mobaxterm/.git/Operation_Documentation/docs/Servicing Portal User & Certificate Inventory.xlsx'
mydate = 'Expiration Date'
row_head = 0
# Import required modules
import xlrd
import datetime
today = datetime.date.today()
book = xlrd.open_workbook(myfile)
sheet = book.sheet_by_index(1)
for col_index in range(sheet.ncols):
print xlrd.cellname(row_head,col_index),"-",
print sheet.cell(row_head,col_index).value
if sheet.cell(row_head,col_index).value == mydate:
for raw_index in range(sheet.nrows):
expire = sheet.cell(raw_index,col_index).value
print expire
expire_date = datetime.datetime(*xlrd.xldate_as_tuple(expire, book.datemode))
print 'datetime: %s' % expire_date
break
While running the code i am getting following error :
Traceback (most recent call last):
File "cert_monitor.py", line 31, in <module>
expire_date = datetime.datetime(*xlrd.xldate_as_tuple(expire, book.datemode))
File "/usr/lib/python2.6/site-packages/xlrd/xldate.py", line 61, in xldate_as_tuple
xldays = int(xldate)
ValueError: invalid literal for int() with base 10: 'Expiration Date'
Can anyone suggest what could be the issue here?
Thanks for your time.
I believe that you should only skip the header:
for raw_index in range(1, sheet.nrows):
...
You are checking that sheet.cell(row_head,col_index).value == mydate, and then you want to iterate over the rows, but you should skip row_head first - it is ==mydate, which is not a date but a simple 'Expiration Date' string.

Categories

Resources