If conditional | Dataframe | Key error: 12690 - python

I have a dataframe with a lot of columns, but in this case I am trying to create an if conditional just for one of them.
The idea is compare one row with the previous one to check if they are equal. But the code it doesn't work.
Proyectonevera2['CodProducto']
Out:
0 10390792
1 10390792
2 10390792
3 10390792
4 10390792
...
12685 10229147
12686 10229147
12687 10229147
12688 10229147
12689 10229147
Name: CodProducto, Length: 12690, dtype: object
The column is called "CodProducto" and the type is object
for i in range(0,len(Proyectonevera2)):
if Proyectonevera2.loc[i+1,'CodProducto'] == Proyectonevera2.loc[i,'CodProducto']:
Proyectonevera2.loc[i,'Prueba'] = 1
else:
Proyectonevera2.loc[i,'Prueba'] = 0
But when I run the code, it appears this error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
C:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 12690
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_17980/3964080695.py in <module>
1 for i in range(0,len(Proyectonevera2)):
----> 2 if Proyectonevera2.loc[i+1,'CodProducto'] == Proyectonevera2.loc[i,'CodProducto']:
3 Proyectonevera2.loc[i,'Prueba'] = 1
4 else:
5 Proyectonevera2.loc[i,'Prueba'] = 0
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
923 with suppress(KeyError, IndexError):
924 return self.obj._get_value(*key, takeable=self._takeable)
--> 925 return self._getitem_tuple(key)
926 else:
927 # we by definition only have the 0th axis
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup)
1098 def _getitem_tuple(self, tup: tuple):
1099 with suppress(IndexingError):
-> 1100 return self._getitem_lowerdim(tup)
1101
1102 # no multi-index, so validate all of the indexers
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_lowerdim(self, tup)
836 # We don't need to check for tuples here because those are
837 # caught by the _is_nested_tuple_indexer check above.
--> 838 section = self._getitem_axis(key, axis=i)
839
840 # We should never have a scalar section here, because
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1162 # fall thru to straight lookup
1163 self._validate_key(key, axis)
-> 1164 return self._get_label(key, axis=axis)
1165
1166 def _get_slice_axis(self, slice_obj: slice, axis: int):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _get_label(self, label, axis)
1111 def _get_label(self, label, axis: int):
1112 # GH#5667 this will fail if the label is not present in the axis.
-> 1113 return self.obj.xs(label, axis=axis)
1114
1115 def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in xs(self, key, axis, level, drop_level)
3774 raise TypeError(f"Expected label or tuple of labels, got {key}") from e
3775 else:
-> 3776 loc = index.get_loc(key)
3777
3778 if isinstance(loc, np.ndarray):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 12690

You don't need a loop to do that:
Proyectonevera2['Prueba'] = (Proyectonevera2['CodProducto'] == Proyectonevera2['CodProducto'].shift()).astype('int')

Related

Cannot slice DatetimeIndex using the datetime object

I create a dataframe indexed by a datetime object and the index becomes a DatetimeIndex.
from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse
import pandas as pd
datestr=["2021/2/3","2021/01/6","2021/2/4","2021/2/7","2021/2/7","2021/2/9"]
time_data_2=pd.DataFrame({"data_1":[0.3,0.4,0.9,0.5,0.3,0.3],"data_2":[1,2,3,4,5,6]},index=pd.to_datetime(datestr))
I want to slice it using datetime object. But then error comes.
time_data_2.loc[datetime(2021,2,1):datetime(2021,2,7),]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas\_libs\index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine._get_loc_duplicates()
pandas\_libs\index_class_helper.pxi in pandas._libs.index.Int64Engine._maybe_get_bool_indexer()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine._unpack_bool_indexer()
KeyError: 1612137600000000000
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_loc(self, key, method, tolerance)
685 try:
--> 686 return Index.get_loc(self, key, method, tolerance)
687 except KeyError as err:
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
KeyError: Timestamp('2021-02-01 00:00:00')
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-45-db7af75f07ac> in <module>
1 time_data_2=pd.DataFrame({"data_1":[0.3,0.4,0.9,0.5,0.3,0.3],"data_2":[1,2,3,4,5,6]},index=pd.to_datetime(datestr))
----> 2 time_data_2.loc[datetime(2021,2,1):datetime(2021,2,7),]
~\anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
887 # AttributeError for IntervalTree get_value
888 return self.obj._get_value(*key, takeable=self._takeable)
--> 889 return self._getitem_tuple(key)
890 else:
891 # we by definition only have the 0th axis
~\anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup)
1067 return self._multi_take(tup)
1068
-> 1069 return self._getitem_tuple_same_dim(tup)
1070
1071 def _get_label(self, label, axis: int):
~\anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple_same_dim(self, tup)
773 continue
774
--> 775 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
776 # We should never have retval.ndim < self.ndim, as that should
777 # be handled by the _getitem_lowerdim call above.
~\anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1100 if isinstance(key, slice):
1101 self._validate_key(key, axis)
-> 1102 return self._get_slice_axis(key, axis=axis)
1103 elif com.is_bool_indexer(key):
1104 return self._getbool_axis(key, axis=axis)
~\anaconda3\lib\site-packages\pandas\core\indexing.py in _get_slice_axis(self, slice_obj, axis)
1134
1135 labels = obj._get_axis(axis)
-> 1136 indexer = labels.slice_indexer(
1137 slice_obj.start, slice_obj.stop, slice_obj.step, kind="loc"
1138 )
~\anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in slice_indexer(self, start, end, step, kind)
782
783 try:
--> 784 return Index.slice_indexer(self, start, end, step, kind=kind)
785 except KeyError:
786 # For historical reasons DatetimeIndex by default supports
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in slice_indexer(self, start, end, step, kind)
5275 slice(1, 3, None)
5276 """
-> 5277 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
5278
5279 # return a slice
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in slice_locs(self, start, end, step, kind)
5474 start_slice = None
5475 if start is not None:
-> 5476 start_slice = self.get_slice_bound(start, "left", kind)
5477 if start_slice is None:
5478 start_slice = 0
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5394 except ValueError:
5395 # raise the original KeyError
-> 5396 raise err
5397
5398 if isinstance(slc, np.ndarray):
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5388 # we need to look up the label
5389 try:
-> 5390 slc = self.get_loc(label)
5391 except KeyError as err:
5392 try:
~\anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_loc(self, key, method, tolerance)
686 return Index.get_loc(self, key, method, tolerance)
687 except KeyError as err:
--> 688 raise KeyError(orig_key) from err
689
690 def _maybe_cast_for_get_loc(self, key) -> Timestamp:
KeyError: Timestamp('2021-02-01 00:00:00')
The problem is that the index isn't sorted, so you can't properly slice on a date range. It's not a great error message from pandas. See here:
https://github.com/pandas-dev/pandas/issues/5821
So, to fix your example:
from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse
import pandas as pd
datestr=["2021/2/3","2021/01/6","2021/2/4","2021/2/7","2021/2/7","2021/2/9"]
time_data_2=pd.DataFrame({"data_1":[0.3,0.4,0.9,0.5,0.3,0.3],"data_2":[1,2,3,4,5,6]},index=pd.to_datetime(datestr))
time_data_2 = time_data_2.sort_index() # I only added this
time_data_2.loc[datetime(2021,2,1):datetime(2021,2,7),] # now this works

Why I can't create two columns with different moving average windows on Pandas?

I'm sincerely out of clue with this one. I've been trying to create a couple of columns from a dataframe but I get the ValueError: Wrong number of items passed 2, placement implies 1 . Somehow I can create one, doesn't matter if it is the window=7 or the window=14 but only allowed to create one. Here's my code:
import pandas as pd
from datetime import datetime, timedelta
suspects_url = 'https://raw.githubusercontent.com/mariorz/covid19-mx-time-series/master/data/covid19_suspects_mx.csv'
suspects = pd.read_csv(suspects_url, index_col=0)
suspects = suspects.loc['Colima']
suspects = pd.DataFrame(suspects)
suspects.index = pd.to_datetime(suspects.index, format='%d-%m-%Y')
suspects['suspects_ma_7'] = suspects.rolling(window=7).mean()
suspects['suspects_ma_14'] = suspects.rolling(window=14).mean()
suspects.columns = ['suspects','suspects_ma_7','suspects_ma_14']
suspects
And this is the error I am getting:
KeyError Traceback (most recent call last)
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2645 try:
-> 2646 return self._engine.get_loc(key)
2647 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'suspects_ma_14'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/internals/managers.py in set(self, item, value)
1070 try:
-> 1071 loc = self.items.get_loc(item)
1072 except KeyError:
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'suspects_ma_14'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-6-4138f1be9342> in <module>
5 suspects.index = pd.to_datetime(suspects.index, format='%d-%m-%Y')
6 suspects['suspects_ma_7'] = suspects.rolling(window=7).mean()
----> 7 suspects['suspects_ma_14'] = suspects.rolling(window=14).mean()
8 suspects.columns = ['suspects','suspects_ma_7','suspects_ma_14']
9
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
2936 else:
2937 # set column
-> 2938 self._set_item(key, value)
2939
2940 def _setitem_slice(self, key, value):
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/frame.py in _set_item(self, key, value)
2999 self._ensure_valid_index(value)
3000 value = self._sanitize_column(key, value)
-> 3001 NDFrame._set_item(self, key, value)
3002
3003 # check if we are modifying a copy
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/generic.py in _set_item(self, key, value)
3622
3623 def _set_item(self, key, value) -> None:
-> 3624 self._data.set(key, value)
3625 self._clear_item_cache()
3626
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/internals/managers.py in set(self, item, value)
1072 except KeyError:
1073 # This item wasn't present, just insert at end
-> 1074 self.insert(len(self.items), item, value)
1075 return
1076
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/internals/managers.py in insert(self, loc, item, value, allow_duplicates)
1179 new_axis = self.items.insert(loc, item)
1180
-> 1181 block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))
1182
1183 for blkno, count in _fast_count_smallints(self._blknos[loc:]):
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/internals/blocks.py in make_block(values, placement, klass, ndim, dtype)
3045 values = DatetimeArray._simple_new(values, dtype=dtype)
3046
-> 3047 return klass(values, ndim=ndim, placement=placement)
3048
3049
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
122
123 if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values):
--> 124 raise ValueError(
125 f"Wrong number of items passed {len(self.values)}, "
126 f"placement implies {len(self.mgr_locs)}"
ValueError: Wrong number of items passed 2, placement implies 1
How can I solve this issue?
I insist, at my attempts with only one suspects_ma_# it works. But when I'm trying to create both, I just get the error.
When you first run suspects['suspects_ma_7'] = suspects.rolling(window=7).mean() you automatically transform your Series into a DataFrame.
So, for running the second rolling approach, use:
suspects['suspects_ma_7'] = suspects.Colima.rolling(window=7).mean()
Note the "suspects.Colima" in the code above.

Selecting rows with a string index that contains a bracket

My table review_cp is indexed on beer names. I got the top three beer names through the following code.
top_3_spacy = review_cp.groupby('Name')['Average Evaluation Score'].mean().sort_values(by='Average Evaluation Score', ascending = False).index[:3].tolist()
The results are ['Rodenbach Caractère Rouge', 'Dorothy (Wine Barrel Aged)', 'Doubleganger']
However, when I tried to select rows using review_cp.loc[top_3_spacy[0]], it gave me a key error.
KeyError Traceback (most recent call
last) ~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in
get_loc(self, key, method, tolerance) 2896 try:
-> 2897 return self._engine.get_loc(key) 2898 except KeyError:
pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libs\index_class_helper.pxi in
pandas._libs.index.Int64Engine._check_type()
KeyError: 'Rodenbach Caractère Rouge'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call
last) in
----> 1 review_cp.loc[top_3_spacy[0]]
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in
getitem(self, key) 1422 1423 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1424 return self._getitem_axis(maybe_callable, axis=axis) 1425 1426 def _is_scalar_access(self, key:
Tuple):
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in
_getitem_axis(self, key, axis) 1848 # fall thru to straight lookup 1849 self._validate_key(key, axis)
-> 1850 return self._get_label(key, axis=axis) 1851 1852
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in
_get_label(self, label, axis)
158 raise IndexingError("no slices here, handle elsewhere")
159
--> 160 return self.obj._xs(label, axis=axis)
161
162 def _get_loc(self, key: int, axis: int):
~\Anaconda3\lib\site-packages\pandas\core\generic.py in xs(self, key,
axis, level, drop_level) 3735 loc, new_index =
self.index.get_loc_level(key, drop_level=drop_level) 3736
else:
-> 3737 loc = self.index.get_loc(key) 3738 3739 if isinstance(loc, np.ndarray):
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in
get_loc(self, key, method, tolerance) 2897 return
self._engine.get_loc(key) 2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key)) 2900
indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libs\index_class_helper.pxi in
pandas._libs.index.Int64Engine._check_type()
KeyError: 'Rodenbach Caractère Rouge'
I tried another method using review_cp[review_cp['Name'].str.contains(top_3_spacy[0])], it worked for 'Rodenbach Caractère Rouge' and 'Doubleganger', but not for 'Dorothy (Wine Barrel Aged)'. I wonder if it was because of the bracket?
I doubt the issue is due to the bracket, as it is part of the string. As long as the string matches a name in the "Name" column, there shouldn't be a problem. If you want to get the rows of your top 3 list, instead of using loc, you can use:
review_cp[review_cp['Name'].isin(top_3_spacy)]
That will isolate your top3 names (and it should include Dorothy).
I guess, the problem is, that Name is not in your index. Otherwise, your groupby statement would not be able to access the value. So your index is most likely an automatic integer index.
But .loc expects values in the index and can't find the name in the index of your DataFrame.
You can resolve this by using an indexer like in the post of DDD1.
review_cp['Name'].isin(top_3_spacy)
Creates this indexer to select the rows with the names in the list.

Why am I receiving a key error using .loc on a known observation

I'm getting a key error when trying to pull a row of my data by index. I'm choosing the index based off looking at the .head() command so I know the value is there. My index is based off of a policy id.
I've looked at a few posts like the following but they don't answer the question:
KeyError in Python Pandas dataframe using .loc
Here is a screenshot of the data frame:
Then I try:
df_test.loc[92026963:]
This gives me a key error. Then I tried:
df_test.loc[92026963]
This gave me the same error. I even tried putting the policy id in quotes. Full traceback below:
ValueError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
4804 try:
-> 4805 return self._searchsorted_monotonic(label, side)
4806 except ValueError:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in _searchsorted_monotonic(self, label, side)
4764
-> 4765 raise ValueError('index must be monotonic increasing or decreasing')
4766
ValueError: index must be monotonic increasing or decreasing
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-45-85aa3a940778> in <module>
----> 1 df_test.loc[92026963:]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
1498
1499 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1500 return self._getitem_axis(maybe_callable, axis=axis)
1501
1502 def _is_scalar_access(self, key):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1865 if isinstance(key, slice):
1866 self._validate_key(key, axis)
-> 1867 return self._get_slice_axis(key, axis=axis)
1868 elif com.is_bool_indexer(key):
1869 return self._getbool_axis(key, axis=axis)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _get_slice_axis(self, slice_obj, axis)
1531 labels = obj._get_axis(axis)
1532 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop,
-> 1533 slice_obj.step, kind=self.name)
1534
1535 if isinstance(indexer, slice):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in slice_indexer(self, start, end, step, kind)
4671 """
4672 start_slice, end_slice = self.slice_locs(start, end, step=step,
-> 4673 kind=kind)
4674
4675 # return a slice
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in slice_locs(self, start, end, step, kind)
4870 start_slice = None
4871 if start is not None:
-> 4872 start_slice = self.get_slice_bound(start, 'left', kind)
4873 if start_slice is None:
4874 start_slice = 0
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
4806 except ValueError:
4807 # raise the original KeyError
-> 4808 raise err
4809
4810 if isinstance(slc, np.ndarray):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
4800 # we need to look up the label
4801 try:
-> 4802 slc = self._get_loc_only_exact_matches(label)
4803 except KeyError as err:
4804 try:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in _get_loc_only_exact_matches(self, key)
4770 get_slice_bound.
4771 """
-> 4772 return self.get_loc(key)
4773
4774 def get_slice_bound(self, label, side, kind):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 92026963
It seems here is necessary assign back DataFrame.set_index:
df_test = df_test.set_index('policy')
Or use inplace=True:
df_test.set_index('policy', inplace=True)

Making a column of boolean values based on two conditions in pandas dataframe

I'm trying to makea column of boolean values based on if one column has the word 'hazard' and does not contain the word 'roof' (thus I get all non-roof hazards).
I'm using the below code and I'm getting an error:
labels['h_count2'] = labels[(labels['Description'].str.contains('Hazard')) & (labels['Description'].str.contains('Roof'))]
This is the traceback:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'h_count2'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\internals\managers.py in set(self, item, value)
1052 try:
-> 1053 loc = self.items.get_loc(item)
1054 except KeyError:
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'h_count2'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-46-51360ea6f27f> in <module>
1 labels['h_count'] = labels['Description'].str.contains('Roof Hazard')
2 labels['b_count'] = labels['Description'].str.contains('Brush')
----> 3 labels['h_count2'] = labels[(labels['Description'].str.contains('Hazard')) & (labels['Description'].str.contains('Roof'))]
4
5 def target(row):
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\frame.py in __setitem__(self, key, value)
3368 else:
3369 # set column
-> 3370 self._set_item(key, value)
3371
3372 def _setitem_slice(self, key, value):
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\frame.py in _set_item(self, key, value)
3444 self._ensure_valid_index(value)
3445 value = self._sanitize_column(key, value)
-> 3446 NDFrame._set_item(self, key, value)
3447
3448 # check if we are modifying a copy
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\generic.py in _set_item(self, key, value)
3170
3171 def _set_item(self, key, value):
-> 3172 self._data.set(key, value)
3173 self._clear_item_cache()
3174
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\internals\managers.py in set(self, item, value)
1054 except KeyError:
1055 # This item wasn't present, just insert at end
-> 1056 self.insert(len(self.items), item, value)
1057 return
1058
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\internals\managers.py in insert(self, loc, item, value, allow_duplicates)
1156
1157 block = make_block(values=value, ndim=self.ndim,
-> 1158 placement=slice(loc, loc + 1))
1159
1160 for blkno, count in _fast_count_smallints(self._blknos[loc:]):
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\internals\blocks.py in make_block(values, placement, klass, ndim, dtype, fastpath)
3093 values = DatetimeArray._simple_new(values, dtype=dtype)
3094
-> 3095 return klass(values, ndim=ndim, placement=placement)
3096
3097
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\internals\blocks.py in __init__(self, values, placement, ndim)
2629
2630 super(ObjectBlock, self).__init__(values, ndim=ndim,
-> 2631 placement=placement)
2632
2633 #property
C:\ProgramData\Anaconda3\envs\tensorflowenvironment\lib\site-packages\pandas\core\internals\blocks.py in __init__(self, values, placement, ndim)
85 raise ValueError(
86 'Wrong number of items passed {val}, placement implies '
---> 87 '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs)))
88
89 def _check_ndim(self, values, ndim):
ValueError: Wrong number of items passed 5, placement implies 1
What am i doing wrong?
labels:
A Description
0 1 Roof
1 2 Hazard
2 3 Roof Hazard
labels['h_count2'] = labels.Description.str.contains('Hazard') & ~labels.Description.str.contains('Roof')
Results in
A Description h_count2
0 1 Roof False
1 2 Hazard True
2 3 Roof Hazard False
labels = pd.DataFrame({'Description': ['Hazard Roof test', 'test', 'Hazard is not', 'test2']})
labels['h_count2'] = (labels['Description'].str.upper().str.contains('HAZARD')) & ~(labels['Description'].str.upper().str.contains('ROOF'))
Description h_count2
0 Hazard Roof test False
1 test False
2 Hazard is not True
3 test2 False

Categories

Resources