I got a csv file with the following columns:
Province/State Country/Region Lat Long 1/22/20 1/23/20 ...
This is the dataframe:
Country/Region 1/22/20 1/23/20 1/24/20
Afghanistan 100 200 300
Albania 400 500 0
Algeria 20 30 70
I'm trying to get a function with inputs: csv-file and last date I want to filter.
Here I show the function I did:
def create_covid_pickle (csv_doc, date):
csv_doc = pd.read_csv(csv_doc)
# assign correct format to date
date = datetime.datetime.strptime(date,"%m-%d-%y")
date = date.strftime("%m/%d/%y")
# delete columns I don't need
csv_doc = csv_doc.loc[:, 'Country/Region': date]
csv_doc = csv_doc.drop(columns = ['Lat', 'Long'])
# to_dict
# csv_dictionary = csv_doc.to_dict()
csv_dictionary = [{c: {'time': d.columns.tolist(), 'cases': d.values.tolist()[0]}}
for c, d in csv_doc.set_index(['Country/Region']).groupby('Country/Region')]
return csv_dictionary
I am getting 2 errors in csv_doc = csv_doc.loc[:, 'Country/Region': date] The first, when using .loc[] and the second because of date date
The complete message of error shown is
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_slice_bound(self, label, side, kind)
4844 try:
-> 4845 return self._searchsorted_monotonic(label, side)
4846 except ValueError:
~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in _searchsorted_monotonic(self, label, side)
4805
-> 4806 raise ValueError("index must be monotonic increasing or decreasing")
4807
ValueError: index must be monotonic increasing or decreasing
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-2-a84a2fe01741> in <module>
1 # Test
----> 2 create_covid_pickle("data/time_series_covid19_confirmed_global.csv", "01-06-20")
3
4 # Load and print some data
5 # country_cases = pickle.load(open("primera_ola.pkl", "rb"))
<ipython-input-1-1c34e9c20811> in create_covid_pickle(csv_doc, date)
29
30 # seleccionar columnas - crear funcion 1 indepte?
---> 31 csv_doc = csv_doc.loc[:, 'Country/Region': date]
32
33 # REVISAR FUNCION CON NUEVOS CAMBIOS DE LAS LINEAS ANTERIORES
~/.local/lib/python3.8/site-packages/pandas/core/indexing.py in __getitem__(self, key)
1760 except (KeyError, IndexError, AttributeError):
1761 pass
-> 1762 return self._getitem_tuple(key)
1763 else:
1764 # we by definition only have the 0th axis
~/.local/lib/python3.8/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
1287 continue
1288
-> 1289 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
1290
1291 return retval
~/.local/lib/python3.8/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
1910 if isinstance(key, slice):
1911 self._validate_key(key, axis)
-> 1912 return self._get_slice_axis(key, axis=axis)
1913 elif com.is_bool_indexer(key):
1914 return self._getbool_axis(key, axis=axis)
~/.local/lib/python3.8/site-packages/pandas/core/indexing.py in _get_slice_axis(self, slice_obj, axis)
1794
1795 labels = obj._get_axis(axis)
-> 1796 indexer = labels.slice_indexer(
1797 slice_obj.start, slice_obj.stop, slice_obj.step, kind=self.name
1798 )
~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in slice_indexer(self, start, end, step, kind)
4711 slice(1, 3)
4712 """
-> 4713 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
4714
4715 # return a slice
~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in slice_locs(self, start, end, step, kind)
4930 end_slice = None
4931 if end is not None:
-> 4932 end_slice = self.get_slice_bound(end, "right", kind)
4933 if end_slice is None:
4934 end_slice = len(self)
~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_slice_bound(self, label, side, kind)
4846 except ValueError:
4847 # raise the original KeyError
-> 4848 raise err
4849
4850 if isinstance(slc, np.ndarray):
~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_slice_bound(self, label, side, kind)
4840 # we need to look up the label
4841 try:
-> 4842 slc = self.get_loc(label)
4843 except KeyError as err:
4844 try:
~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2646 return self._engine.get_loc(key)
2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: '01/06/20'
I thought that the lines referred to datetime should avoid this problem, but it doesn't.
How can I solve it?
Thank you
date.strftime("%m/%d/%y") returns 01/31/20, whereas the same column in your dataframe is labelled 1/31/20, thus the mismatch.
I would suggest that you try this:
def create_covid_pickle (csv_doc, date):
csv_doc = pd.read_csv(csv_doc)
# properly format csv_doc columns
csv_doc.columns = [
datetime.datetime.strptime(col, "%m/%d/%y").strftime("%m/%d/%y")
if col.replace("/", "").isnumeric()
else col
for col in csv_doc.columns
]
# assign correct format to date
date = datetime.datetime.strptime(date,"%m-%d-%y")
date = date.strftime("%m/%d/%y")
# Sort columns
csv_doc = csv_doc.sort_index(axis=1, ascending=False)
# delete columns I don't need
csv_doc = csv_doc.loc[:, 'Country/Region': date]
...
Related
I inherited this code from previous employee, and I tried to run this code but I'm getting an error.
def replaceitem(x):
if x in ['ORION', 'ACTION', 'ICE', 'IRIS', 'FOCUS']:
return 'CRM Application'
else:
return x
def clean_list(row):
new_list = sorted(set(row['APLN_NM']), key=lambda x: row['APLN_NM'].index(x))
for idx,i in enumerate(new_list):
new_list[idx] = replaceitem(i)
new_list = sorted(set(new_list), key=lambda x: new_list.index(x))
return new_list
#*********************************************************************************************************************************************
df_agg['APLN_NM_DISTINCT'] = df_agg.apply(clean_list, axis = 1)
df_agg_single['APLN_NM_DISTINCT'] = df_agg_single.apply(clean_list, axis = 1)
While running the code I got this error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2890 try:
-> 2891 return self._engine.get_loc(casted_key)
2892 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'APLN_NM_DISTINCT'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/generic.py in _set_item(self, key, value)
3570 try:
-> 3571 loc = self._info_axis.get_loc(key)
3572 except KeyError:
/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2892 except KeyError as err:
-> 2893 raise KeyError(key) from err
2894
KeyError: 'APLN_NM_DISTINCT'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-71-e8b5e8d5b514> in <module>
431 #*********************************************************************************************************************************************
432 df_agg['APLN_NM_DISTINCT'] = df_agg.apply(clean_list, axis = 1)
--> 433 df_agg_single['APLN_NM_DISTINCT'] = df_agg_single.apply(clean_list, axis = 1)
434
435 df_agg['TOTAL_HOLD_TIME'] = df_agg_single['TOTAL_HOLD_TIME'].astype(int)
/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
3038 else:
3039 # set column
-> 3040 self._set_item(key, value)
3041
3042 def _setitem_slice(self, key: slice, value):
/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/frame.py in _set_item(self, key, value)
3115 self._ensure_valid_index(value)
3116 value = self._sanitize_column(key, value)
-> 3117 NDFrame._set_item(self, key, value)
3118
3119 # check if we are modifying a copy
/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/generic.py in _set_item(self, key, value)
3572 except KeyError:
3573 # This item wasn't present, just insert at end
-> 3574 self._mgr.insert(len(self._info_axis), key, value)
3575 return
3576
/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/internals/managers.py in insert(self, loc, item, value, allow_duplicates)
1187 value = _safe_reshape(value, (1,) + value.shape)
1188
-> 1189 block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))
1190
1191 for blkno, count in _fast_count_smallints(self.blknos[loc:]):
/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/internals/blocks.py in make_block(values, placement, klass, ndim, dtype)
2717 values = DatetimeArray._simple_new(values, dtype=dtype)
2718
-> 2719 return klass(values, ndim=ndim, placement=placement)
2720
2721
/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
2373 values = np.array(values, dtype=object)
2374
-> 2375 super().__init__(values, ndim=ndim, placement=placement)
2376
2377 #property
/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
128 if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values):
129 raise ValueError(
--> 130 f"Wrong number of items passed {len(self.values)}, "
131 f"placement implies {len(self.mgr_locs)}"
132 )
ValueError: Wrong number of items passed 3, placement implies 1
df_agg and df_agg_single are dataframes with same column names.
But the data is present only in df_agg
data in df_agg dataframe looks like this
data in df_agg_single dataframe looks like this
so if the data frame is empty I am getting this type of error while applying clean_list method on the data frame.
I identified the error is occurring only if the data frame is empty, so I tried if else to filter the empty data frame and it worked.
if df_agg.empty:
df_agg['APLN_NM_DISTINCT'] = ''
else:
df_agg['APLN_NM_DISTINCT'] = df_agg.apply(clean_list, axis = 1)
if df_agg_single.empty:
df_agg_single['APLN_NM_DISTINCT'] = ''
else:
df_agg_single['APLN_NM_DISTINCT'] = df_agg_single.apply(clean_list, axis = 1)
I have a dataframe with a lot of columns, but in this case I am trying to create an if conditional just for one of them.
The idea is compare one row with the previous one to check if they are equal. But the code it doesn't work.
Proyectonevera2['CodProducto']
Out:
0 10390792
1 10390792
2 10390792
3 10390792
4 10390792
...
12685 10229147
12686 10229147
12687 10229147
12688 10229147
12689 10229147
Name: CodProducto, Length: 12690, dtype: object
The column is called "CodProducto" and the type is object
for i in range(0,len(Proyectonevera2)):
if Proyectonevera2.loc[i+1,'CodProducto'] == Proyectonevera2.loc[i,'CodProducto']:
Proyectonevera2.loc[i,'Prueba'] = 1
else:
Proyectonevera2.loc[i,'Prueba'] = 0
But when I run the code, it appears this error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
C:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 12690
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_17980/3964080695.py in <module>
1 for i in range(0,len(Proyectonevera2)):
----> 2 if Proyectonevera2.loc[i+1,'CodProducto'] == Proyectonevera2.loc[i,'CodProducto']:
3 Proyectonevera2.loc[i,'Prueba'] = 1
4 else:
5 Proyectonevera2.loc[i,'Prueba'] = 0
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
923 with suppress(KeyError, IndexError):
924 return self.obj._get_value(*key, takeable=self._takeable)
--> 925 return self._getitem_tuple(key)
926 else:
927 # we by definition only have the 0th axis
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup)
1098 def _getitem_tuple(self, tup: tuple):
1099 with suppress(IndexingError):
-> 1100 return self._getitem_lowerdim(tup)
1101
1102 # no multi-index, so validate all of the indexers
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_lowerdim(self, tup)
836 # We don't need to check for tuples here because those are
837 # caught by the _is_nested_tuple_indexer check above.
--> 838 section = self._getitem_axis(key, axis=i)
839
840 # We should never have a scalar section here, because
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1162 # fall thru to straight lookup
1163 self._validate_key(key, axis)
-> 1164 return self._get_label(key, axis=axis)
1165
1166 def _get_slice_axis(self, slice_obj: slice, axis: int):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _get_label(self, label, axis)
1111 def _get_label(self, label, axis: int):
1112 # GH#5667 this will fail if the label is not present in the axis.
-> 1113 return self.obj.xs(label, axis=axis)
1114
1115 def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in xs(self, key, axis, level, drop_level)
3774 raise TypeError(f"Expected label or tuple of labels, got {key}") from e
3775 else:
-> 3776 loc = index.get_loc(key)
3777
3778 if isinstance(loc, np.ndarray):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 12690
You don't need a loop to do that:
Proyectonevera2['Prueba'] = (Proyectonevera2['CodProducto'] == Proyectonevera2['CodProducto'].shift()).astype('int')
Wrong number of items passed 2, placement implies 1
I want to calculate 'overnight returns' & 'Intraday returns' of AAMZN & AAPL Error
df['intradayReturn'] = (df1["adj_close"]/df1["open"])-1
df['overnightReturn'] = (df1["open_shift"]/df1["adj_close"])-1
in python , below is my code :
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import warnings
df = yf.download(['AAPL','AMZN'],start='2006-01-01',end='2020-12-31')**
df.head()
Adj Close Close High Low Open Volume
AAPL AMZN AAPL AMZN AAPL AMZN AAPL AMZN AAPL AMZN AAPL AMZN
Date
2006-01-03 2.299533 47.580002 2.669643 47.580002 2.669643 47.849998 2.580357 46.250000 2.585000 47.470001 807234400 7582200
2006-01-04 2.306301 47.250000 2.677500 47.250000 2.713571 47.730000 2.660714 46.689999 2.683214 47.490002 619603600 7440900
2006-01-05 2.288151 47.650002 2.656429 47.650002 2.675000 48.200001 2.633929 47.110001 2.672500 47.160000 449422400 5417200
2006-01-06 2.347216 47.869999 2.725000 47.869999 2.739286 48.580002 2.662500 47.320000 2.687500 47.970001 704457600 6152900
2006-01-09 2.339525 47.080002 2.716071 47.080002 2.757143 47.099998 2.705000 46.400002 2.740357 46.549999 675040800 8943100**
Upto this it was working fine
But when i used this formula
df['intradayReturn'] = (df["Adj Close"]/df["Open"])-1
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'intradayReturn'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\generic.py in _set_item(self, key, value)
3824 try:
-> 3825 loc = self._info_axis.get_loc(key)
3826 except KeyError:
~\Anaconda3\lib\site-packages\pandas\core\indexes\multi.py in get_loc(self, key, method)
2875 if not isinstance(key, tuple):
-> 2876 loc = self._get_level_indexer(key, level=0)
2877 return _maybe_to_slice(loc)
~\Anaconda3\lib\site-packages\pandas\core\indexes\multi.py in _get_level_indexer(self, key, level, indexer)
3157
-> 3158 idx = self._get_loc_single_level_index(level_index, key)
3159
~\Anaconda3\lib\site-packages\pandas\core\indexes\multi.py in _get_loc_single_level_index(self, level_index, key)
2808 else:
-> 2809 return level_index.get_loc(key)
2810
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
KeyError: 'intradayReturn'
enter code here
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-24-b2a4b46270a9> in <module>
----> 1 df['intradayReturn'] = (df["Adj Close"]/df["Open"])-1
~\Anaconda3\lib\site-packages\pandas\core\frame.py in __setitem__(self, key, value)
3161 else:
3162 # set column
-> 3163 self._set_item(key, value)
3164
3165 def _setitem_slice(self, key: slice, value):
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _set_item(self, key, value)
3238 self._ensure_valid_index(value)
3239 value = self._sanitize_column(key, value)
-> 3240 NDFrame._set_item(self, key, value)
3241
3242 # check if we are modifying a copy
~\Anaconda3\lib\site-packages\pandas\core\generic.py in _set_item(self, key, value)
3826 except KeyError:
3827 # This item wasn't present, just insert at end
-> 3828 self._mgr.insert(len(self._info_axis), key, value)
3829 return
3830
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in insert(self, loc, item, value, allow_duplicates)
1201 value = safe_reshape(value, (1,) + value.shape)
1202
-> 1203 block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))
1204
1205 for blkno, count in _fast_count_smallints(self.blknos[loc:]):
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in make_block(values, placement, klass, ndim, dtype)
2730 values = DatetimeArray._simple_new(values, dtype=dtype)
2731
-> 2732 return klass(values, ndim=ndim, placement=placement)
2733
2734
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in __init__(self, values, placement, ndim)
141 if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values):
142 raise ValueError(
--> 143 f"Wrong number of items passed {len(self.values)}, "
144 f"placement implies {len(self.mgr_locs)}"
145 )
its showing this error
ValueError: Wrong number of items passed 2, placement implies 1
It's EZ PZ you can easily do this by the code below.
gf = (df['Adj Close']['AAPL']/df['Open']['AAPL']-1).to_frame()
sd = (df['Adj Close']['AMZN']/df['Open']['AMZN']-1).to_frame()
df['intradayReturn', 'AAPL'] = gf
df['intradayReturn', 'AMZN'] = sd
df.head()
so your output is like:
you need to assign separately when it comes to multi-index columns.
now it's your turn for overnight return 😉.
I'm getting a key error when trying to pull a row of my data by index. I'm choosing the index based off looking at the .head() command so I know the value is there. My index is based off of a policy id.
I've looked at a few posts like the following but they don't answer the question:
KeyError in Python Pandas dataframe using .loc
Here is a screenshot of the data frame:
Then I try:
df_test.loc[92026963:]
This gives me a key error. Then I tried:
df_test.loc[92026963]
This gave me the same error. I even tried putting the policy id in quotes. Full traceback below:
ValueError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
4804 try:
-> 4805 return self._searchsorted_monotonic(label, side)
4806 except ValueError:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in _searchsorted_monotonic(self, label, side)
4764
-> 4765 raise ValueError('index must be monotonic increasing or decreasing')
4766
ValueError: index must be monotonic increasing or decreasing
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-45-85aa3a940778> in <module>
----> 1 df_test.loc[92026963:]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
1498
1499 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1500 return self._getitem_axis(maybe_callable, axis=axis)
1501
1502 def _is_scalar_access(self, key):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1865 if isinstance(key, slice):
1866 self._validate_key(key, axis)
-> 1867 return self._get_slice_axis(key, axis=axis)
1868 elif com.is_bool_indexer(key):
1869 return self._getbool_axis(key, axis=axis)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _get_slice_axis(self, slice_obj, axis)
1531 labels = obj._get_axis(axis)
1532 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop,
-> 1533 slice_obj.step, kind=self.name)
1534
1535 if isinstance(indexer, slice):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in slice_indexer(self, start, end, step, kind)
4671 """
4672 start_slice, end_slice = self.slice_locs(start, end, step=step,
-> 4673 kind=kind)
4674
4675 # return a slice
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in slice_locs(self, start, end, step, kind)
4870 start_slice = None
4871 if start is not None:
-> 4872 start_slice = self.get_slice_bound(start, 'left', kind)
4873 if start_slice is None:
4874 start_slice = 0
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
4806 except ValueError:
4807 # raise the original KeyError
-> 4808 raise err
4809
4810 if isinstance(slc, np.ndarray):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
4800 # we need to look up the label
4801 try:
-> 4802 slc = self._get_loc_only_exact_matches(label)
4803 except KeyError as err:
4804 try:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in _get_loc_only_exact_matches(self, key)
4770 get_slice_bound.
4771 """
-> 4772 return self.get_loc(key)
4773
4774 def get_slice_bound(self, label, side, kind):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 92026963
It seems here is necessary assign back DataFrame.set_index:
df_test = df_test.set_index('policy')
Or use inplace=True:
df_test.set_index('policy', inplace=True)
I'm trying to makea column of boolean values based on if one column has the word 'hazard' and does not contain the word 'roof' (thus I get all non-roof hazards).
I'm using the below code and I'm getting an error:
labels['h_count2'] = labels[(labels['Description'].str.contains('Hazard')) & (labels['Description'].str.contains('Roof'))]
This is the traceback:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'h_count2'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\internals\managers.py in set(self, item, value)
1052 try:
-> 1053 loc = self.items.get_loc(item)
1054 except KeyError:
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'h_count2'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-46-51360ea6f27f> in <module>
1 labels['h_count'] = labels['Description'].str.contains('Roof Hazard')
2 labels['b_count'] = labels['Description'].str.contains('Brush')
----> 3 labels['h_count2'] = labels[(labels['Description'].str.contains('Hazard')) & (labels['Description'].str.contains('Roof'))]
4
5 def target(row):
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\frame.py in __setitem__(self, key, value)
3368 else:
3369 # set column
-> 3370 self._set_item(key, value)
3371
3372 def _setitem_slice(self, key, value):
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\frame.py in _set_item(self, key, value)
3444 self._ensure_valid_index(value)
3445 value = self._sanitize_column(key, value)
-> 3446 NDFrame._set_item(self, key, value)
3447
3448 # check if we are modifying a copy
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\generic.py in _set_item(self, key, value)
3170
3171 def _set_item(self, key, value):
-> 3172 self._data.set(key, value)
3173 self._clear_item_cache()
3174
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\internals\managers.py in set(self, item, value)
1054 except KeyError:
1055 # This item wasn't present, just insert at end
-> 1056 self.insert(len(self.items), item, value)
1057 return
1058
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\internals\managers.py in insert(self, loc, item, value, allow_duplicates)
1156
1157 block = make_block(values=value, ndim=self.ndim,
-> 1158 placement=slice(loc, loc + 1))
1159
1160 for blkno, count in _fast_count_smallints(self._blknos[loc:]):
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\internals\blocks.py in make_block(values, placement, klass, ndim, dtype, fastpath)
3093 values = DatetimeArray._simple_new(values, dtype=dtype)
3094
-> 3095 return klass(values, ndim=ndim, placement=placement)
3096
3097
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\internals\blocks.py in __init__(self, values, placement, ndim)
2629
2630 super(ObjectBlock, self).__init__(values, ndim=ndim,
-> 2631 placement=placement)
2632
2633 #property
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\internals\blocks.py in __init__(self, values, placement, ndim)
85 raise ValueError(
86 'Wrong number of items passed {val}, placement implies '
---> 87 '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs)))
88
89 def _check_ndim(self, values, ndim):
ValueError: Wrong number of items passed 5, placement implies 1
What am i doing wrong?
labels:
A Description
0 1 Roof
1 2 Hazard
2 3 Roof Hazard
labels['h_count2'] = labels.Description.str.contains('Hazard') & ~labels.Description.str.contains('Roof')
Results in
A Description h_count2
0 1 Roof False
1 2 Hazard True
2 3 Roof Hazard False
labels = pd.DataFrame({'Description': ['Hazard Roof test', 'test', 'Hazard is not', 'test2']})
labels['h_count2'] = (labels['Description'].str.upper().str.contains('HAZARD')) & ~(labels['Description'].str.upper().str.contains('ROOF'))
Description h_count2
0 Hazard Roof test False
1 test False
2 Hazard is not True
3 test2 False