Pandas Join results in keyerror on index column

Pandas Join results in keyerror on index column - python

Join results in keyerror on index column
# Import libraries
import pandas as pd
import numpy as np
# Open and load all files indexed by 'ISI_LOC'
df_all = pd.read_csv('AUTHORS.csv', index_col='ISI_LOC', dtype={'ISI_LOC':str, 'POSITION':int}, engine='c', low_memory=False)
df_addresses = pd.read_csv('ADDRESSES.csv', index_col='ISI_LOC', dtype={'ISI_LOC': str, 'POSITION':int, 'Seg1':str }, engine='c', low_memory=False)
# There are more, but for the sake of brevity...
an inspection of the dataframes show index on string as expected
# Goal: df_all.join([df_addresses, df_catagories, df_keywordsplus, df_articles])
df_all.join(df_addresses, on='ISI_LOC')
This results in:
KeyError Traceback (most recent call last)
<ipython-input-17-35d37498b69e> in <module>()
1 # df_all.join([df_addresses, df_catagories, df_keywordsplus, df_articles])
----> 2 df_all.join(df_addresses, on='ISI_LOC')
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in join(self, other, on, how, lsuffix, rsuffix, sort)
3865 # For SparseDataFrame's benefit
3866 return self._join_compat(other, on=on, how=how, lsuffix=lsuffix,
-> 3867 rsuffix=rsuffix, sort=sort)
3868
3869 def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in _join_compat(self, other, on, how, lsuffix, rsuffix, sort)
3879 return merge(self, other, left_on=on, how=how,
3880 left_index=on is None, right_index=True,
-> 3881 suffixes=(lsuffix, rsuffix), sort=sort)
3882 else:
3883 if on is not None:
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tools\merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy)
36 right_on=right_on, left_index=left_index,
37 right_index=right_index, sort=sort, suffixes=suffixes,
---> 38 copy=copy)
39 return op.get_result()
40 if __debug__:
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tools\merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy)
182 (self.left_join_keys,
183 self.right_join_keys,
--> 184 self.join_names) = self._get_merge_keys()
185
186 def get_result(self):
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tools\merge.py in _get_merge_keys(self)
359 join_names.append(None)
360 else:
--> 361 left_keys.append(left[k].values)
362 join_names.append(k)
363 if isinstance(self.right.index, MultiIndex):
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1778 return self._getitem_multilevel(key)
1779 else:
-> 1780 return self._getitem_column(key)
1781
1782 def _getitem_column(self, key):
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1785 # get column
1786 if self.columns.is_unique:
-> 1787 return self._get_item_cache(key)
1788
1789 # duplicate columns & possible reduce dimensionaility
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1066 res = cache.get(item)
1067 if res is None:
-> 1068 values = self._data.get(item)
1069 res = self._box_item_values(item, values)
1070 cache[item] = res
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
2847
2848 if not isnull(item):
-> 2849 loc = self.items.get_loc(item)
2850 else:
2851 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\index.py in get_loc(self, key)
1400 loc : int if unique index, possibly slice or mask if not
1401 """
-> 1402 return self._engine.get_loc(_values_from_object(key))
1403
1404 def get_value(self, series, key):
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3807)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3687)()
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12310)()
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12261)()
KeyError: 'ISI_LOC'
Yes, I can use other methods. In fact, I have it working in a rather fugly way using this syntax, so I know that the data is formatted correctly:
df_catagories = pd.concat([df_catagories, df_keywordsplus], keys='ISI_LOC')
Which works, but not in the way I'd like it to. What am I missing on the join statement? I have played with 'how=' and other parameters without success.

Related

MemoryError on database import in Jupyter Notebook

I'm trying to import the database and place the files one under the other
months = {'jan': 1, 'fev':2, 'mar':3, 'abr':4, 'mai':5, 'jun':6, 'jul':7, 'ago':8, 'set':9, 'out':10, 'nov':11, 'dez':12}
base_path = pathlib.Path('dataset')
base_airbnb = pd.DataFrame()
for file in base_path.iterdir():
month_name = file.name[:3]
month = months[month_name]
year = file.name[-8:]
year = int(year.replace('.csv', ''))
df = pd.read_csv(base_path / file.name)
df['year'] = year
df['month'] = month
base_airbnb = base_airbnb.append(df)
display(base_airbnb)
is not aligned. A future version of pandas will change to not sort by
default.
To accept the future behavior, pass 'sort=False'.
To retain the current behavior and silence the warning, pass
'sort=True'.
It appears that I have MemoryError:. But something goes wrong and I don't understand why... I'm sure I'm doing everything correctly in Jupyter Notebook
To accept the future behavior, pass 'sort=False'.
To retain the current behavior and silence the warning, pass
'sort=True'.
sort=sort,
C:\Users\CASA\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3058: DtypeWarning: Columns (87) have mixed types. Specify dtype option on import or set low_memory=False. interactivity=interactivity, compiler=compiler, result=result)
--------------------------------------------------------------------------- MemoryError Traceback (most recent call last) <ipython-input-7-e9e45fb3206d> in <module>
15 df['year'] = year
16 df['month'] = month
---> 17 base_airbnb = base_airbnb.append(df)
18
19 big_data=pd.concat(months, axis=0)
~\Anaconda3\lib\site-packages\pandas\core\frame.py in append(self, other, ignore_index, verify_integrity, sort) 7121 ignore_index=ignore_index, 7122 verify_integrity=verify_integrity,
-> 7123 sort=sort, 7124 ) 7125
~\Anaconda3\lib\site-packages\pandas\core\reshape\concat.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, sort, copy)
253 verify_integrity=verify_integrity,
254 copy=copy,
--> 255 sort=sort,
256 )
257
~\Anaconda3\lib\site-packages\pandas\core\reshape\concat.py in
__init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy, sort)
333
334 # consolidate
--> 335 obj._consolidate(inplace=True)
336 ndims.add(obj.ndim)
337
~\Anaconda3\lib\site-packages\pandas\core\generic.py in
_consolidate(self, inplace) 5268 inplace = validate_bool_kwarg(inplace, "inplace") 5269 if inplace:
-> 5270 self._consolidate_inplace() 5271 else: 5272 f = lambda: self._data.consolidate()
~\Anaconda3\lib\site-packages\pandas\core\generic.py in
_consolidate_inplace(self) 5250 self._data = self._data.consolidate() 5251
-> 5252 self._protect_consolidate(f) 5253 5254 def _consolidate(self, inplace=False):
~\Anaconda3\lib\site-packages\pandas\core\generic.py in
_protect_consolidate(self, f) 5239 """ 5240 blocks_before = len(self._data.blocks)
-> 5241 result = f() 5242 if len(self._data.blocks) != blocks_before: 5243 self._clear_item_cache()
~\Anaconda3\lib\site-packages\pandas\core\generic.py in f() 5248 5249 def f():
-> 5250 self._data = self._data.consolidate() 5251 5252 self._protect_consolidate(f)
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in consolidate(self)
930 bm = self.__class__(self.blocks, self.axes)
931 bm._is_consolidated = False
--> 932 bm._consolidate_inplace()
933 return bm
934
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in
_consolidate_inplace(self)
935 def _consolidate_inplace(self):
936 if not self.is_consolidated():
--> 937 self.blocks = tuple(_consolidate(self.blocks))
938 self._is_consolidated = True
939 self._known_consolidated = True
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in
_consolidate(blocks) 1911 for (_can_consolidate, dtype), group_blocks in grouper: 1912 merged_blocks =
_merge_blocks(
-> 1913 list(group_blocks), dtype=dtype, _can_consolidate=_can_consolidate 1914 ) 1915 new_blocks = _extend_blocks(merged_blocks, new_blocks)
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in
_merge_blocks(blocks, dtype, _can_consolidate) 3318 # combination of those slices is a slice, too. 3319 new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks])
-> 3320 new_values = np.vstack([b.values for b in blocks]) 3321 3322 argsort = np.argsort(new_mgr_locs)
~\Anaconda3\lib\site-packages\numpy\core\shape_base.py in vstack(tup)
281 """
282 _warn_for_nonsequence(tup)
--> 283 return _nx.concatenate([atleast_2d(_m) for _m in tup], 0)
284
285
MemoryError:

How do I fix this error in pandas where I need to find the element with the highest of some features?

This is my code. I need to find the element (pokemon) with the highest Speed, HP, Attack and Defense. They are ordered and the strongest is on top. I do not see where the error is coming from.
def strongestPokemon():
strongPokemon_df = pd.DataFrame()
each_type_df = []
for type2 in pokemon_data['Type 2'].unique():
sub_type = pokemon_data[pokemon_data['Type 2'] == type2]
sub_type.sort_values(by=['HP','Attack','Defense','Speed'],ascending=[False,False,False,False],inplace=True)
sub_type.reset_index(drop=True,inplace=True)
top_pokemon = sub_type.loc[1]
each_type_df.append(top_pokemon)
strongPokemon_df = pd.DataFrame(each_type_df,columns=pk_data.columns)
return strongPokemon_df
pk_strongest = strongestPokemon()
pk_strongest
These are the errors I get. Sorry for the long question.I am not sure why this error is occurring.
ValueError: 1 is not in range
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
6 frames
<ipython-input-87-fe71fd5a4e0f> in <module>()
14 return strongPokemon_df
15
---> 16 pk_strongest = strongestPokemon()
17 pk_strongest
<ipython-input-87-fe71fd5a4e0f> in strongestPokemon()
7 sub_type.sort_values(by=['HP','Attack','Defense','Speed'],ascending=[False,False,False,False],inplace=True)
8 sub_type.reset_index(drop=True,inplace=True)
----> 9 top_pokemon = sub_type.loc[1]
10 each_type_df.append(top_pokemon)
11
/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py in __getitem__(self, key)
1766
1767 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1768 return self._getitem_axis(maybe_callable, axis=axis)
1769
1770 def _is_scalar_access(self, key: Tuple):
/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
1963 # fall thru to straight lookup
1964 self._validate_key(key, axis)
-> 1965 return self._get_label(key, axis=axis)
1966
1967
/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py in _get_label(self, label, axis)
623 raise IndexingError("no slices here, handle elsewhere")
624
--> 625 return self.obj._xs(label, axis=axis)
626
627 def _get_loc(self, key: int, axis: int):
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in xs(self, key, axis, level, drop_level)
3535 loc, new_index = self.index.get_loc_level(key, drop_level=drop_level)
3536 else:
-> 3537 loc = self.index.get_loc(key)
3538
3539 if isinstance(loc, np.ndarray):
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
350 return self._range.index(new_key)
351 except ValueError:
--> 352 raise KeyError(key)
353 return super().get_loc(key, method=method, tolerance=tolerance)
354
KeyError: 1

try:
top_pokemon = sub_type.iloc[1]
iloc is for when you are passong index and since you want the first row, use:
top_pokemon = sub_type.iloc[0]

Trying to filter my pandas column with another list but recieving a memory error

My dataframe ag_data contains a column called state that contains state abbreviations, however not all the data in state is a correct and acknowledged US state abbreviation. SA is a list of state abbreviations. I wanted to check if the state abbreviation in my state column was in the SA list as well and filter my dataset to that, but I keep getting an error. Is there another way to do this???
ag_data[ag_data.state.isin(SA)]
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-26-8e3eefc5127e> in <module>()
1 #ag_data["state"] = ag_data[ag_data[ag_data.columns[0]].isin(SA)]
----> 2 ag_data[ag_data.state.isin(SA)]
~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1956 if isinstance(key, (Series, np.ndarray, Index, list)):
1957 # either boolean or fancy integer index
-> 1958 return self._getitem_array(key)
1959 elif isinstance(key, DataFrame):
1960 return self._getitem_frame(key)
~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\frame.py in _getitem_array(self, key)
1998 key = check_bool_indexer(self.index, key)
1999 indexer = key.nonzero()[0]
-> 2000 return self.take(indexer, axis=0, convert=False)
2001 else:
2002 indexer = self.loc._convert_to_indexer(key, axis=1)
~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\generic.py in take(self, indices, axis, convert, is_copy, **kwargs)
1926 new_data = self._data.take(indices,
1927 axis=self._get_block_manager_axis(axis),
-> 1928 convert=True, verify=True)
1929 result = self._constructor(new_data).__finalize__(self)
1930
~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\internals.py in take(self, indexer, axis, verify, convert)
4009 new_labels = self.axes[axis].take(indexer)
4010 return self.reindex_indexer(new_axis=new_labels, indexer=indexer,
-> 4011 axis=axis, allow_dups=True)
4012
4013 def merge(self, other, lsuffix='', rsuffix=''):
~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\internals.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
3895 new_blocks = [blk.take_nd(indexer, axis=axis, fill_tuple=(
3896 fill_value if fill_value is not None else blk.fill_value,))
-> 3897 for blk in self.blocks]
3898
3899 new_axes = list(self.axes)
~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\internals.py in <listcomp>(.0)
3895 new_blocks = [blk.take_nd(indexer, axis=axis, fill_tuple=(
3896 fill_value if fill_value is not None else blk.fill_value,))
-> 3897 for blk in self.blocks]
3898
3899 new_axes = list(self.axes)
~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\internals.py in take_nd(self, indexer, axis, new_mgr_locs, fill_tuple)
1044 fill_value = fill_tuple[0]
1045 new_values = algos.take_nd(values, indexer, axis=axis,
-> 1046 allow_fill=True, fill_value=fill_value)
1047
1048 if new_mgr_locs is None:
~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\algorithms.py in take_nd(arr, indexer, axis, out, fill_value, mask_info, allow_fill)
1465 out = np.empty(out_shape, dtype=dtype, order='F')
1466 else:
-> 1467 out = np.empty(out_shape, dtype=dtype)
1468
1469 func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis,
MemoryError:

Get keyerror accessing row by index in 16.0 Pandas dataframe in Python 3.4

Why do I keep getting a key error?
[edit] Here is the data:
GEO,LAT,LON
AALBORG DENMARK,57.0482206,9.9193939
AARHUS DENMARK,56.1496278,10.2134046
ABBOTSFORD BC CANADA,49.0519047,-122.3290473
ABEOKUTA NIGERIA,7.161,3.348
ABERDEEN SCOTLAND,57.1452452,-2.0913745
[end edit]
Can't find row by index, but its clearly there:
geocache = pd.read_csv('geolog.csv',index_col=['GEO']) # index_col=['GEO']
geocache.head()
Shows
LAT LON
GEO
AALBORG DENMARK 57.048221 9.919394
AARHUS DENMARK 56.149628 10.213405
ABBOTSFORD BC CANADA 49.051905 -122.329047
ABEOKUTA NIGERIA 7.161000 3.348000
ABERDEEN SCOTLAND 57.145245 -2.091374
So then I test it:
x = 'AARHUS DENMARK'
print(x)
geocache[x]
And this is what I get:
AARHUS DENMARK
KeyError Traceback (most recent call last)
in ()
2 x = u'AARHUS DENMARK'
3 print(x)
----> 4 geocache[x]
C:\Users\g\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1785 return self._getitem_multilevel(key)
1786 else:
-> 1787 return self._getitem_column(key)
1788
1789 def _getitem_column(self, key):
C:\Users\g\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1792 # get column
1793 if self.columns.is_unique:
-> 1794 return self._get_item_cache(key)
1795
1796 # duplicate columns & possible reduce dimensionaility
C:\Users\g\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1077 res = cache.get(item)
1078 if res is None:
-> 1079 values = self._data.get(item)
1080 res = self._box_item_values(item, values)
1081 cache[item] = res
C:\Users\g\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
2841
2842 if not isnull(item):
-> 2843 loc = self.items.get_loc(item)
2844 else:
2845 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\Users\g\Anaconda3\lib\site-packages\pandas\core\index.py in get_loc(self, key, method)
1435 """
1436 if method is None:
-> 1437 return self._engine.get_loc(_values_from_object(key))
1438
1439 indexer = self.get_indexer([key], method=method)
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3824)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3704)()
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12349)()
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12300)()
KeyError: 'AARHUS DENMARK'
No extra spaces or non-visible chars, Tried putting r and u before the string assignment with no change in behavior.
Ok, what am I missing?

As you didn't pass a sep (separator) arg to read_csv the default is comma separated. As your csv contained spaces/tabs after the commas then these get treated as part of the data hence your index data contains embedded spaces.
So you need to pass additional params to read_csv:
pd.read_csv('geolog.csv',index_col=['GEO'], sep=',\s+', engine='python')
The sep arg means that it will look for commas with optional 1 or more spaces in front of the commas, we pass engine='python' as the c engine does not accept a regex for separators.

DataFrame.groupby(TimeGrouper="d"): Invalid length for values or for binner

I am trying to build groups of my data using the groupby function of pandas over a DataFrame with a DateTimeIndex. Using pd.TimeGrouper, I want to group by day.
When I define this DataFrame, the below operation n.groupby(pd.TimeGrouper("d")) does not work.
n = pd.DataFrame(
{"value": [5462,5462,3185]},
index=[pd.to_datetime("2013-10-13 19:03:54"),
pd.to_datetime("2013-10-12 19:03:54"),
pd.to_datetime("2013-10-11 13:19:23")])
Error:
n.groupby(pd.TimeGrouper("d"))
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-248-120eaa65b064> in <module>()
----> 1 n.groupby(pd.TimeGrouper("d"))
\lib\site-packages\pandas\core\generic.pyc in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze)
184 return groupby(self, by, axis=axis, level=level, as_index=as_index,
185 sort=sort, group_keys=group_keys,
--> 186 squeeze=squeeze)
187
188 def asfreq(self, freq, method=None, how=None, normalize=False):
\lib\site-packages\pandas\core\groupby.pyc in groupby(obj, by, **kwds)
531 raise TypeError('invalid type: %s' % type(obj))
532
--> 533 return klass(obj, by, **kwds)
534
535
\lib\site-packages\pandas\core\groupby.pyc in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze)
195 if grouper is None:
196 grouper, exclusions = _get_grouper(obj, keys, axis=axis,
--> 197 level=level, sort=sort)
198
199 self.grouper = grouper
\lib\site-packages\pandas\core\groupby.pyc in _get_grouper(obj, key, axis, level, sort)
1268
1269 if isinstance(key, CustomGrouper):
-> 1270 gpr = key.get_grouper(obj)
1271 return gpr, []
1272 elif isinstance(key, Grouper):
\lib\site-packages\pandas\tseries\resample.pyc in get_grouper(self, obj)
106 def get_grouper(self, obj):
107 # Only return grouper
--> 108 return self._get_time_grouper(obj)[1]
109
110 def _get_time_grouper(self, obj):
\lib\site-packages\pandas\tseries\resample.pyc in _get_time_grouper(self, obj)
112
113 if self.kind is None or self.kind == 'timestamp':
--> 114 binner, bins, binlabels = self._get_time_bins(axis)
115 else:
116 binner, bins, binlabels = self._get_time_period_bins(axis)
\lib\site-packages\pandas\tseries\resample.pyc in _get_time_bins(self, axis)
146
147 # general version, knowing nothing about relative frequencies
--> 148 bins = lib.generate_bins_dt64(ax_values, bin_edges, self.closed)
149
150 if self.closed == 'right':
\lib\site-packages\pandas\lib.pyd in pandas.lib.generate_bins_dt64 (pandas\lib.c:16139)()
ValueError: Invalid length for values or for binner
Surprisingly, when I define the DataFrame like below, it works just fine. Notice that I changed the last day to be 2013-10-12 instead of 2013-10-11.
n = pd.DataFrame(
{"value": [5462,5462,3185]},
index=[pd.to_datetime("2013-10-13 19:03:54"),
pd.to_datetime("2013-10-13 19:03:54"),
pd.to_datetime("2013-10-12 13:19:23")])
In this case I get a correct group object:
n.groupby(pd.TimeGrouper("d"))
<pandas.core.groupby.DataFrameGroupBy object at 0x000000000A3D84E0>
I already looked up some of the core functions of pandas in the source code, but I am not sure whether this is a bug or whether I just do not know how to use the function properly.
Notice also that aggregating on month works just fine.
Thank you for your help.

This is a bug, because the index is not ordered monotonically, see here. but no reason to use TimeGrouper, this is somewhat internal ATM, use resample.
In [3]: df
Out[3]:
value
2013-10-13 19:03:54 5462
2013-10-12 19:03:54 5462
2013-10-11 13:19:23 3185
In [4]: df.resample('d')
Out[4]:
value
2013-10-11 3185
2013-10-12 5462
2013-10-13 5462

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Pandas Join results in keyerror on index column - python

Related

MemoryError on database import in Jupyter Notebook

How do I fix this error in pandas where I need to find the element with the highest of some features?

Trying to filter my pandas column with another list but recieving a memory error

Get keyerror accessing row by index in 16.0 Pandas dataframe in Python 3.4

DataFrame.groupby(TimeGrouper="d"): Invalid length for values or for binner

Categories

Resources