I've obtained the historical values from an example stock (Apple in this case) and was following an example I saw online, however, when their code succeeded mine failed because of some keyerror?
Could anyone tell/show me what's wrong and hopefully how to fix it? Error is:
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2645 try:
-> 2646 return self._engine.get_loc(key)
2647 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 1
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-60-d89407b24e87> in <module>
3
4 for i in range(1, len(typical_price)):
----> 5 if typical_price[i] > typical_price[i-1]:
6 positive_flow.append(money_flow[i-1])
7 negative_flow.append(0)
~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2798 if self.columns.nlevels > 1:
2799 return self._getitem_multilevel(key)
-> 2800 indexer = self.columns.get_loc(key)
2801 if is_integer(indexer):
2802 indexer = [indexer]
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2646 return self._engine.get_loc(key)
2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 1
Code is:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math
import pandas_datareader as pdr
stocks = ['AAPL']
data_close = pdr.get_data_yahoo(stocks, start='2020-01-01')['Close']
data_high = pdr.get_data_yahoo(stocks, start='2020-01-01')['High']
data_low = pdr.get_data_yahoo(stocks, start='2020-01-01')['Low']
data_volume = pdr.get_data_yahoo(stocks, start='2020-01-01')['Volume']
typical_price = (data_close + data_high + data_low)/3;
money_flow = typical_price * data_volume;
positive_flow = []
negative_flow = []
for i in range(1, len(typical_price)):
if typical_price[i] > typical_price[i-1]:
positive_flow.append(money_flow[i-1])
negative_flow.append(0)
elif typical_price[i] < typical_price[i-1]:
positive_flow.append(0)
negative_flow.append(money_flow[i-1])
else:
positive_flow.append(0)
negative_flow.append(0)
Error appears when I run the final part of the code where I try to retrieve the positive and negative moneyflow for my MFI algorithm.
Use iloc indexing
for i in range(1, len(typical_price)):
if typical_price.iloc[i].item() > typical_price.iloc[i-1].item():
positive_flow.append(money_flow.iloc[i-1])
negative_flow.append(0)
elif typical_price.iloc[i].item() < typical_price.iloc[i-1].item():
positive_flow.append(0)
negative_flow.append(money_flow.iloc[i-1])
else:
positive_flow.append(0)
negative_flow.append(0)
typical_price and money_flow probably has datetime as index not integers. If you want access row by integer-location then you can use iloc
Related
When I am trying to print a single column of my data set it is showing errors
KeyError Traceback (most recent call
last) ~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in
get_loc(self, key, method, tolerance) 2645 try:
-> 2646 return self._engine.get_loc(key) 2647 except KeyError:
pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libs\hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas_libs\hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Label'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call
last) in
----> 1 data['Label']
~\anaconda3\lib\site-packages\pandas\core\frame.py in
getitem(self, key) 2798 if self.columns.nlevels > 1: 2799 return self._getitem_multilevel(key)
-> 2800 indexer = self.columns.get_loc(key) 2801 if is_integer(indexer): 2802 indexer = [indexer]
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in
get_loc(self, key, method, tolerance) 2646 return
self._engine.get_loc(key) 2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key)) 2649
indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libs\hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas_libs\hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Label'
data['Label']
It could be possible that the column name is having trailing spaces. Just try to print the column names & verify.
print(data.columns)
or try to print the columns after
data.columns = data.columns.str.strip()
If you have DataFrame and would like to access or select a specific few rows/columns from that DataFrame, you can use square brackets.
Now suppose that you want to select a column from the data(as per your question) DataFrame.
data["Label"]
But if you are unaware of the columns. You can get a column list and then display column data.
columns = data.columns.values.tolist()
data[columns[index]]
I want to calculate the total return (last price - first price)/first price. I would like to do it directly dowloading the data from yahoo without dowlading previously the csv file and then uploading it into pyhton.
Any idea why the error and how to solve it?
Thank you!
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import datetime as dt
start=dt.datetime(2019,5,1)
end=dt.datetime.now()
data= yf.download('TSLA',start,end)['Adj Close']
data = pd.DataFrame(data).dropna()
total_return = (data[-1] - data[0]) / data[0]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\anaconda4\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2894 try:
-> 2895 return self._engine.get_loc(casted_key)
2896 except KeyError as err:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 0
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-45-bb800ce55aa9> in <module>
----> 1 total_return = (data[0]- data[0])/ data[0]
~\anaconda4\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2900 if self.columns.nlevels > 1:
2901 return self._getitem_multilevel(key)
-> 2902 indexer = self.columns.get_loc(key)
2903 if is_integer(indexer):
2904 indexer = [indexer]
~\anaconda4\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2895 return self._engine.get_loc(casted_key)
2896 except KeyError as err:
-> 2897 raise KeyError(key) from err
2898
2899 if tolerance is not None:
KeyError: 0
The problem is that you neglected to specify the DF column you want to use.
In that last line, replace each reference to df with df["Adj Close"].
total_return = ( data["Adj Close"][-1] - data["Adj Close"][0] ) / data["Adj Close"][0]
For the code
data = pd.DataFrame(data).dropna()
print(data["Adj Close"][-1], data["Adj Close"][0])
total_return = ( data["Adj Close"][-1] - data["Adj Close"][0] ) /
data["Adj Close"][0]
print(total_return)
The output is
570.22998046875 46.801998138427734
11.1838810980284
I have loaded a CSV file into a Pandas dataframe:
import pandas as pd
Name ID Sex M_Status DaysOff
Joe 3 M S 1
NaN NaN NaN NaN 2
NaN NaN NaN NaN 3
df = pd.read_csv('People.csv')
This data will then be loaded into an HTML file.
test = """
HTML code
"""
Now for preparing data for the HTML file:
df1 = df.filter(['Name','ID','Sex','M_Status','DaysOff'])
file = ""
for i, rows in df1.iterrows():
name = (df1['Name'][i])
id = (df1['ID'][i])
sex = (df1['Sex'][i])
m_status = (df1['M_Status'][i])
days_off = (df1['DaysOff'][i])
with open(f"personInfo{i}.html", "w") as file:
file.write(test.format(name,id,sex,m_status,days_off))
file.close()
And the error:
KeyError: 'days_off'
Note: This error is occurs within the for loop.
Can anyone see where I'm going wrong? This error is generated when you try to grab data from a column which doesn't match the name, or if the column doesn't have that header namne. However, it does.
Error Information:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in
get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'days_off'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-16-35e6b916521b> in <module>
1 name = (df1['Name'][i])
2 id = (df1['ID'][i])
3 sex = (df1['Sex'][i])
4 m_status = (df1['M_Status'][i])
----> 5 days_off = (df1['DaysOff'][i])
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in
__getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in
get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return
self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method,
tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'days_off'
Just a hunch, but your error message suggests that you are trying to access your dataframe column with the key days_off, when it should be DaysOff. I don't see any place in the code you provided where this happens, but I would double-check your source code file to make sure that you are using the right key name.
I've resolved this and what a stupid error it was!
Basically there was a space at the end of the header name.
What Python wanted/was expecting:
days_off = (df1['DaysOff '][i])
whereas I was giving it:
days_off = (df1['DaysOff'][i])
Very stupid human error. Thanks to all that looked into it though
I am a beginner in Python.I merged two columnsAfter that i tried to change 'not assigned' value of a column with another column value. I cant do that. If I use premodified dataframe then I can change.
I scraped a table from a page then modifying the data in that dataframe.
import pandas as pd
import numpy as np
import requests
pip install lxml
toronto_url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
toronto_df1= pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
toronto_df1.head()
toronto_df1.drop(toronto_df1.loc[toronto_df1['Borough']=="Not assigned"].index, inplace=True)
toronto_df2=toronto_df1.groupby(['Postcode','Borough'],sort=False).agg(lambda x: ','.join(x))
toronto_df2.loc[toronto_df2['Neighbourhood'] == "Not assigned", 'Neighbourhood'] = toronto_df2['Borough']
This is the code i have used.
I expect to change the neighbourhood value with borough value.
I got this error.
KeyError Traceback (most recent call
last)
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in
get_loc(self, key, method, tolerance) 2656 try:
-> 2657 return self._engine.get_loc(key) 2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Borough'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call
last) 9 frames
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in
get_loc(self, key, method, tolerance) 2657 return
self._engine.get_loc(key) 2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key)) 2660
indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Borough'
Reason of your keyerror is Neighbourhood is not column, but index level, solution is add reset_index:
toronto_df1= pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
#boolean indexing
toronto_df1 = toronto_df1.loc[toronto_df1['Borough']!="Not assigned"]
toronto_df2 = toronto_df1.groupby(['Postcode','Borough'],sort=False)['Neighbourhood'].agg(','.join).reset_index()
toronto_df2.loc[toronto_df2['Neighbourhood'] == "Not assigned", 'Neighbourhood'] = toronto_df2['Borough']
Or parameter as_index=False to groupby:
toronto_df1= pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
#boolean indexing
toronto_df1 = toronto_df1.loc[toronto_df1['Borough']!="Not assigned"]
toronto_df2=toronto_df1.groupby(['Postcode','Borough'],sort=False, as_index=False)['Neighbourhood'].agg(','.join)
toronto_df2.loc[toronto_df2['Neighbourhood'] == "Not assigned", 'Neighbourhood'] = toronto_df2['Borough']
I am attempting logistic regression for a class, and I got stuck on something very strange, I haven't seen this error before. I am a complete python noob so I'd appreciate being very specific in details. Please dumb it down for me!
Here is my code:
import pandas as pd
import matplotlib.pyplot as plt
cancer=pd.read_csv('C:\\Users\\kpic1\\Desktop\\Spring 2019\\IDIS 450\\Second exam\\Provided documents\\Cancer_Research_Q2.csv')
cancer.set_index('PatientID', inplace=True)
cancer=pd.get_dummies(cancer, columns=['Class'], drop_first=True)
cancer.head()
from sklearn.model_selection import train_test_split
cancer.rename(columns={'Class_Malignant':'Malignant'}, inplace=True)
cancer.head()
The table that is returned shows the dummy variable on the very end still being named "Class_Malignant."
When I try to print just this column using
print(cancer['Class_Malignant'])
I get the following long error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3077 try:
-> 3078 return self._engine.get_loc(key)
3079 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Class_Malignant'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-15-09d1356e98ac> in <module>
1 #for some reason, "Class_Malignant" is not registering as a column head?
----> 2 print(cancer['Class_Malignant'])
~\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2686 return self._getitem_multilevel(key)
2687 else:
-> 2688 return self._getitem_column(key)
2689
2690 def _getitem_column(self, key):
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
2693 # get column
2694 if self.columns.is_unique:
-> 2695 return self._get_item_cache(key)
2696
2697 # duplicate columns & possible reduce dimensionality
~\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
2487 res = cache.get(item)
2488 if res is None:
-> 2489 values = self._data.get(item)
2490 res = self._box_item_values(item, values)
2491 cache[item] = res
~\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
4113
4114 if not isna(item):
-> 4115 loc = self.items.get_loc(item)
4116 else:
4117 indexer = np.arange(len(self.items))[isna(self.items)]
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3078 return self._engine.get_loc(key)
3079 except KeyError:
-> 3080 return self._engine.get_loc(self._maybe_cast_indexer(key))
3081
3082 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Class_Malignant'
From what I can tell from this, the column name "Class_Malignant" does not exist in the data set? Anyone have any ideas what's going on?