Matplotlib errorbar fails to read a pandas data frame - python

I have two data frames in pandas format that I am trying to plot as values and error bars. But the python interface complains about some error I cannot understand. I have tested a colleague's almost the same code, and it appears that the fact that I run python 3.5 while he utilizes 2.7, is the source of the error. Therefore, I did test his code on my computer (python 3.5) and I am getting the same error message.
Bellow is a subset of my troubling code:
"Using pandas library to combine the three white spruce data sets"
trees = [white_spruce_1,white_spruce_2,white_spruce_3]
ntrees = pd.concat(trees) # Concatenate list into a series
spruce_stat = ntrees.groupby("Wvl") #Converted the series into a panda object
mean_spruce = spruce_stat.mean()
std_spruce = spruce_stat.std()
#mean_spruce.head()
mean_spruce['wvl']=mean_spruce.index
mean_spruce.head()
Chan.# Rad. (Ref.) Rad. (Target) Tgt./Ref. %
Wvl
350 0 0 0.000014 0.686176
351 0 0 0.000015 0.707577
std_spruce.head()
Chan.# Rad. (Ref.) Rad. (Target) Tgt./Ref. %
Wvl
350 0 0 0.000014 0.686176
351 0 0 0.000015 0.707577
plt.errorbar(mean_spruce['wvl'],mean_spruce['Tgt./Ref. %'], xerr = None, yerr = std_spruce['Rad. (Ref.)'])
Bellow is the error message I receive:
KeyError Traceback (most recent call last)
<ipython-input-52-13352d94b09c> in <module>()
2 #plt.errorbar(mean_spruce['wvl'],mean_spruce['Tgt./Ref. %'], xerr = None,yerr=std_spruce['Tgt./Ref. %'],c='k',ecolor='r', elinewidth=0.5, errorevery=5)
3 #plt.errorbar( x, y, xerr = None , yerr = sd_white_spruce['Tgt./Ref. %'],c = 'green', ecolor = 'red', capsize = 0,elinewidth = 0.5, errorevery = 5 )
----> 4 plt.errorbar(mean_spruce['wvl'],mean_spruce['Tgt./Ref. %'], xerr = None, yerr = std_spruce['Rad. (Ref.)'])# ,c = 'green', ecolor = 'red', capsize = 0,elinewidth = 0.5, errorevery = 5)
5
C:\Users\mike\Anaconda3\lib\site-packages\matplotlib\pyplot.py in errorbar(x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, hold, data, **kwargs)
2828 xlolims=xlolims, xuplims=xuplims,
2829 errorevery=errorevery, capthick=capthick, data=data,
-> 2830 **kwargs)
2831 finally:
2832 ax.hold(washold)
C:\Users\mike\Anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, *args, **kwargs)
1809 warnings.warn(msg % (label_namer, func.__name__),
1810 RuntimeWarning, stacklevel=2)
-> 1811 return func(ax, *args, **kwargs)
1812 pre_doc = inner.__doc__
1813 if pre_doc is None:
C:\Users\mike\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in errorbar(self, x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, **kwargs)
2961 # Check for scalar or symmetric, as in xerr.
2962 if len(yerr) > 1 and not ((len(yerr) == len(y) and not (
-> 2963 iterable(yerr[0]) and len(yerr[0]) > 1))):
2964 raise ValueError("yerr must be a scalar, the same "
2965 "dimensions as y, or 2xN.")
C:\Users\mike\Anaconda3\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
555 def __getitem__(self, key):
556 try:
--> 557 result = self.index.get_value(self, key)
558
559 if not np.isscalar(result):
C:\Users\mike\Anaconda3\lib\site-packages\pandas\core\index.py in get_value(self, series, key)
3882
3883 k = _values_from_object(key)
-> 3884 loc = self.get_loc(k)
3885 new_values = _values_from_object(series)[loc]
3886
C:\Users\mike\Anaconda3\lib\site-packages\pandas\core\index.py in get_loc(self, key, method, tolerance)
3940 pass
3941 return super(Float64Index, self).get_loc(key, method=method,
-> 3942 tolerance=tolerance)
3943
3944 #property
C:\Users\mike\Anaconda3\lib\site-packages\pandas\core\index.py in get_loc(self, key, method, tolerance)
1757 'backfill or nearest lookups')
1758 key = _values_from_object(key)
-> 1759 return self._engine.get_loc(key)
1760
1761 indexer = self.get_indexer([key], method=method,
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3979)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3843)()
pandas\hashtable.pyx in pandas.hashtable.Float64HashTable.get_item (pandas\hashtable.c:9556)()
pandas\hashtable.pyx in pandas.hashtable.Float64HashTable.get_item (pandas\hashtable.c:9494)()
KeyError: 0.0
Thanks for the help

The problem is the discrepancy between the Pandas indexing and the indexing of Matplotlib internal functions. One way to resolve it -albeit not being elegant- is to create a dummy dataframe just for the plotting purpose. In your case:
mean_spruce_dummy = mean_spruce
mean_spruce_dummy.columns = np.arange(0, len(mean_spruce))
In principle, this discrepancy is solved in the newer version of Pandas.

I'm seeing a similar error in python 2.7. My solution is to access the underlying data directly. This should work for you
x = mean_spruce['wvl'].values
y = mean_spruce['Tgt./Ref. %'].values
yerr = std_spruce['Rad. (Ref.)'].values
plt.errorbar(x, y yerr=yerr)

Related

How do I solve this error message- I'm trying to draw a scatter plot with a line of best fit

Please I'm trying to use jupyter notebook to draw a scatter plot and insert a line of best fit in it. When I write the code without this line:
plt.plot (X, regression.predict(X), color='red')
It draws the scatter plot, but when I insert the line above to draw a line of best fit, the error below is outputed.
import pandas
from pandas import DataFrame
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
data = pandas.read_csv("cost_revenue_clean.csv")
data.describe()
X = DataFrame (data, columns = ['production_budget_usd'])
y = DataFrame (data, columns = ['worldwide_gross_usd'])
regression = LinearRegression()
regression.fit(X, y)
regression.coef_
regression.intercept_
plt.figure(figsize = (10,6))
plt.scatter(X,y, alpha=0.3)
plt.plot (X, regression.predict(X), color='red')
plt.title ('revenue vs budget')
plt.xlabel('prod. budget $')
plt.ylabel('worldwide revenue $')
plt.xlim(0,450000000)
plt.ylim(0,3000000000)
plt.show()
I get the error pasted below. I have no idea what the problem could be. I have tried
restarting the kernel several times. I'm pretty new to python and jupyter notebook
TypeError Traceback (most recent call last)
File ~\anaconda3\lib\site-packages\pandas\core\indexes\base.py:3621, in
Index.get_loc(self, key, method, tolerance)
3620 try:
-> 3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
File ~\anaconda3\lib\site-packages\pandas\_libs\index.pyx:136, in
pandas._libs.index.IndexEngine.get_loc()
File ~\anaconda3\lib\site-packages\pandas\_libs\index.pyx:142, in
pandas._libs.index.IndexEngine.get_loc()
TypeError: '(slice(None, None, None), None)' is an invalid key
During handling of the above exception, another exception occurred:
InvalidIndexError Traceback (most recent call last)
Input In [9], in <cell line: 3>()
1 plt.figure(figsize = (10,6))
2 plt.scatter(X,y, alpha=0.3)
----> 3 plt.plot (X, regression.predict(X), color='red')
4 plt.title ('revenue vs budget')
5 plt.xlabel('prod. budget $')
File ~\anaconda3\lib\site-packages\matplotlib\pyplot.py:2757, in plot(scalex, scaley,
data, *args, **kwargs)
2755 #_copy_docstring_and_deprecators(Axes.plot)
2756 def plot(*args, scalex=True, scaley=True, data=None, **kwargs):
-> 2757 return gca().plot(
2758 *args, scalex=scalex, scaley=scaley,
2759 **({"data": data} if data is not None else {}), **kwargs)
File ~\anaconda3\lib\site-packages\matplotlib\axes\_axes.py:1632, in Axes.plot(self,
scalex, scaley, data, *args, **kwargs)
1390 """
1391 Plot y versus x as lines and/or markers.
1392
(...)
1629 (``'green'``) or hex strings (``'#008000'``).
1630 """
1631 kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D)
-> 1632 lines = [*self._get_lines(*args, data=data, **kwargs)]
1633 for line in lines:
1634 self.add_line(line)
File ~\anaconda3\lib\site-packages\matplotlib\axes\_base.py:312, in
_process_plot_var_args.__call__(self, data, *args, **kwargs)
310 this += args[0],
311 args = args[1:]
--> 312 yield from self._plot_args(this, kwargs)
File ~\anaconda3\lib\site-packages\matplotlib\axes\_base.py:487, in
_process_plot_var_args._plot_args(self, tup, kwargs, return_kwargs)
484 kw[prop_name] = val
486 if len(xy) == 2:
--> 487 x = _check_1d(xy[0])
488 y = _check_1d(xy[1])
489 else:
File ~\anaconda3\lib\site-packages\matplotlib\cbook\__init__.py:1327, in _check_1d(x)
1321 with warnings.catch_warnings(record=True) as w:
1322 warnings.filterwarnings(
1323 "always",
1324 category=Warning,
1325 message='Support for multi-dimensional indexing')
-> 1327 ndim = x[:, None].ndim
1328 # we have definitely hit a pandas index or series object
1329 # cast to a numpy array.
1330 if len(w) > 0:
File ~\anaconda3\lib\site-packages\pandas\core\frame.py:3505, in
DataFrame.__getitem__(self, key)
3503 if self.columns.nlevels > 1:
3504 return self._getitem_multilevel(key)
-> 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = [indexer]
File ~\anaconda3\lib\site-packages\pandas\core\indexes\base.py:3628, in
Index.get_loc(self, key, method, tolerance)
3623 raise KeyError(key) from err
3624 except TypeError:
3625 # If we have a listlike key, _check_indexing_error will raise
3626 # InvalidIndexError. Otherwise we fall through and re-raise
3627 # the TypeError.
-> 3628 self._check_indexing_error(key)
3629 raise
3631 # GH#42269
File ~\anaconda3\lib\site-packages\pandas\core\indexes\base.py:5637, in
Index._check_indexing_error(self, key)
5633 def _check_indexing_error(self, key):
5634 if not is_scalar(key):
5635 # if key is not a scalar, directly raise an error (the code below
5636 # would convert to numpy arrays and raise later any way) - GH29926
-> 5637 raise InvalidIndexError(key)
InvalidIndexError: (slice(None, None, None), None)

KeyError when using for loop on dataframe to plot histograms

I have a dataframe similar to:
df = pd.DataFrame({'Date': ['2016-01-05', '2016-01-05', '2016-01-05', '2016-01-05', '2016-01-08', '2016-01-08', '2016-02-01'], 'Count': [1, 2, 2, 3, 2, 0, 2]})
and I am trying to plot a histogram of Count for each unique Date
I've tried:
for date in df.Date.unique():
plt.hist([df[df.Date == '%s' %(date)]['Count']])
plt.title('%s' %(date))
which results in
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-17-971a1cf07250> in <module>()
1 for date in df.Date.unique():
----> 2 plt.hist([df[df.Date == '%s' %(date)]['Count']])
3 plt.title('%s' %(date))
c:~\anaconda3\lib\site-packages\matplotlib\pyplot.py in hist(x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, hold, data, **kwargs)
2963 histtype=histtype, align=align, orientation=orientation,
2964 rwidth=rwidth, log=log, color=color, label=label,
-> 2965 stacked=stacked, data=data, **kwargs)
2966 finally:
2967 ax.hold(washold)
c:~\anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, *args, **kwargs)
1816 warnings.warn(msg % (label_namer, func.__name__),
1817 RuntimeWarning, stacklevel=2)
-> 1818 return func(ax, *args, **kwargs)
1819 pre_doc = inner.__doc__
1820 if pre_doc is None:
c:~\anaconda3\lib\site-packages\matplotlib\axes\_axes.py in hist(self, x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, **kwargs)
5925
5926 # basic input validation
-> 5927 flat = np.ravel(x)
5928
5929 input_empty = len(flat) == 0
c:~\anaconda3\lib\site-packages\numpy\core\fromnumeric.py in ravel(a, order)
1482 return asarray(a).ravel(order=order)
1483 else:
-> 1484 return asanyarray(a).ravel(order=order)
1485
1486
c:~\anaconda3\lib\site-packages\numpy\core\numeric.py in asanyarray(a, dtype, order)
581
582 """
--> 583 return array(a, dtype, copy=False, order=order, subok=True)
584
585
c:~\anaconda3\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
581 key = com._apply_if_callable(key, self)
582 try:
--> 583 result = self.index.get_value(self, key)
584
585 if not lib.isscalar(result):
c:~\anaconda3\lib\site-packages\pandas\indexes\base.py in get_value(self, series, key)
1978 try:
1979 return self._engine.get_value(s, k,
-> 1980 tz=getattr(series.dtype, 'tz', None))
1981 except KeyError as e1:
1982 if len(self) > 0 and self.inferred_type in ['integer', 'boolean']:
pandas\index.pyx in pandas.index.IndexEngine.get_value (pandas\index.c:3332)()
pandas\index.pyx in pandas.index.IndexEngine.get_value (pandas\index.c:3035)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4018)()
pandas\hashtable.pyx in pandas.hashtable.Int64HashTable.get_item (pandas\hashtable.c:6610)()
pandas\hashtable.pyx in pandas.hashtable.Int64HashTable.get_item (pandas\hashtable.c:6554)()
KeyError: 0
But when I try to simply print it, there is no problem:
for date in df.Date.unique():
print([df[df.Date == '%s' %(date)]['Count']])
[0 1
1 2
2 2
3 3
Name: Count, dtype: int64]
[4 2
5 0
Name: Count, dtype: int64]
[6 2
Name: Count, dtype: int64]
What is the issue with calling plt.hist on my dataframe the way that I have it here?
Essentially you have two square brackets too much in your code.
plt.hist([series]) # <- wrong
plt.hist(series) # <- correct
In the first case matplotlib would try to plot a histogram of a list of one element, which is non-numeric. That won't work.
Instead, removing the brackts and directly supplying the series, works fine
for date in df.Date.unique():
plt.hist(df[df.Date == '%s' %(date)]['Count'])
plt.title('%s' %(date))
Now this will create all histograms in the same plot. Not sure if this is desired. If not, consider the incredibly short alternative:
df.hist(by="Date")
You're passing a list of dataframes, which is causing a problem here. You could deconstruct a groupby object and plot each one separately.
gps = df.groupby('Date').Count
_, axes = plt.subplots(nrows=gps.ngroups)
for (_, g), ax in zip(df.groupby('Date').Count, axes):
g.plot.hist(ax=ax)
plt.show()
Take a look at the Visualisation docs if you need more sugar in your graph.

pandas Series' object has no attribute 'find'

I am trying to do simple plot of data and getting the following error.. any help is very much appreciated
AttributeError: 'Series' object has no attribute 'find'
Versions :
python3 ,
matplotlib (2.0.2) ,
pandas (0.20.3) ,
jupyter (1.0.0).
Code:
import pandas as pd
import matplotlib.pyplot as plt
pd_hr_data = pd.read_csv("/Users/pc/Downloads/HR_comma_sep.csv")
#print(pd_hr_data['average_montly_hours'],pd_hr_data['sales'])
take_ten_data = pd_hr_data[0:19]
x = take_ten_data['average_montly_hours'].astype(int)
y = take_ten_data['sales'].astype(str)
print(type(x[0]))
print(type(y[0]))
#print(x,y) ---- this gives me all the 20 values
#print(type(y[0]))
plt.plot(x,y)
plt.show()
Out Put / Error:
-
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
in ()
9 #print(type(y[0]))
10
---> 11 plt.plot(x,y)
12 plt.show()
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/matplotlib/pyplot.py
in plot(*args, **kwargs)
3315 mplDeprecation)
3316 try:
-> 3317 ret = ax.plot(*args, **kwargs)
3318 finally:
3319 ax._hold = washold
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/matplotlib/__init__.py
in inner(ax, *args, **kwargs)
1896 warnings.warn(msg % (label_namer, func.__name__),
1897 RuntimeWarning, stacklevel=2)
-> 1898 return func(ax, *args, **kwargs)
1899 pre_doc = inner.__doc__
1900 if pre_doc is None:
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/matplotlib/axes/_axes.py
in plot(self, *args, **kwargs)
1404 kwargs = cbook.normalize_kwargs(kwargs, _alias_map)
1405
-> 1406 for line in self._get_lines(*args, **kwargs):
1407 self.add_line(line)
1408 lines.append(line)
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/matplotlib/axes/_base.py
in _grab_next_args(self, *args, **kwargs)
405 return
406 if len(remaining) <= 3:
--> 407 for seg in self._plot_args(remaining, kwargs):
408 yield seg
409 return
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/matplotlib/axes/_base.py
in _plot_args(self, tup, kwargs)
355 ret = []
356 if len(tup) > 1 and is_string_like(tup[-1]):
--> 357 linestyle, marker, color = _process_plot_format(tup[-1])
358 tup = tup[:-1]
359 elif len(tup) == 3:
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/matplotlib/axes/_base.py
in _process_plot_format(fmt)
92 # handle the multi char special cases and strip them from the
93 # string
---> 94 if fmt.find('--') >= 0:
95 linestyle = '--'
96 fmt = fmt.replace('--', '')
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/generic.py
in __getattr__(self, name)
3079 if name in self._info_axis:
3080 return self[name]
-> 3081 return object.__getattribute__(self, name)
3082
3083 def __setattr__(self, name, value):
AttributeError: 'Series' object has no attribute 'find'
I think you can use DataFrame.plot with define x and y by columns names, because it better support plotting non numeric values:
take_ten_data = pd_hr_data[0:19]
x = take_ten_data['average_montly_hours'].astype(int)
y = take_ten_data['sales'].astype(str)
take_ten_data.plot(x='average_montly_hours', y='sales')
#working without x,y also, but less readable
#take_ten_data.plot('average_montly_hours','sales')
plt.show()
Sample:
take_ten_data = pd.DataFrame({'average_montly_hours':[3,10,12], 'sales':[10,20,30]})
x = take_ten_data['average_montly_hours'].astype(int)
y = take_ten_data['sales'].astype(str)
take_ten_data.plot(x='average_montly_hours', y='sales')
plt.show()
But if all values are numeric it works nice:
take_ten_data = pd.DataFrame({'average_montly_hours':[3,10,12], 'sales':['10','20','30']})
x = take_ten_data['average_montly_hours'].astype(int)
#convert to int if necessary
y = take_ten_data['sales'].astype(int)
plt.plot(x,y)
plt.show()
Following worked for me and hope it helps.... Issue was mixing differnt data types for plotting.
import pandas as pd
import matplotlib.pyplot as plt
pd_hr_data = pd.read_csv("/Users/pc/Downloads/HR_comma_sep.csv")
take_ten_data = pd_hr_data[0:4]
y = take_ten_data['average_montly_hours'].astype(int)
x = [1,2,3,4] ----this is can be autogenerated based on the series/matrix size
names = take_ten_data['sales']
plt.bar(x,y, align='center')
#plt.plot(x,y) ---- use this if you want
plt.xticks(x, names)
plt.show()

Pie chart with pandas & matplot lib

I'm trying to plot a pie chart of the titanic survivor data. I have been trying to plot this as a pie chart but I keep getting a KeyError 0. How can I fix this?
figure(1, figsize=(6,6))
ax = axes([0.1, 0.1, 0.8, 0.8])
s_survival = (titanic_data.Survived[titanic_data.Embarked == 'S'][titanic_data.Survived == 1].value_counts()
) / survivors.sum()
c_survival = (titanic_data.Survived[titanic_data.Embarked == 'C'][titanic_data.Survived == 1].value_counts()
) / survivors.sum()
q_survival = (titanic_data.Survived[titanic_data.Embarked == 'Q'][titanic_data.Survived == 1].value_counts()
) / survivors.sum()
labels = ['s_survival', 'c_survival', 'q_survival']
sizes = [s_survival, c_survival, q_survival]
pie(sizes, explode=None, labels=labels, autopct='%1.1f%%', shadow=False, startangle=90)
title('Survivor Percentage from Embarked Port')
Any advice would be greatly appreciated.
StackTrace :
KeyError Traceback (most recent call last)
<ipython-input-18-a8c68ae3422f> in <module>()
11 sizes = [s_survival, c_survival, q_survival]
12 # add a list zip here
---> 13 plt.pie(sizes, explode=None, labels=labels, autopct='%1.1f%%', shadow=False, startangle=90)
14 title('Survivor Percentage from Embarked Port')
//anaconda/lib/python2.7/site-packages/matplotlib/pyplot.pyc in pie(x, explode, labels, colors, autopct, pctdistance, shadow, labeldistance, startangle, radius, counterclock, wedgeprops, textprops, center, frame, hold, data)
3135 radius=radius, counterclock=counterclock,
3136 wedgeprops=wedgeprops, textprops=textprops, center=center,
-> 3137 frame=frame, data=data)
3138 finally:
3139 ax.hold(washold)
//anaconda/lib/python2.7/site-packages/matplotlib/__init__.pyc in inner(ax, *args, **kwargs)
1810 warnings.warn(msg % (label_namer, func.__name__),
1811 RuntimeWarning, stacklevel=2)
-> 1812 return func(ax, *args, **kwargs)
1813 pre_doc = inner.__doc__
1814 if pre_doc is None:
//anaconda/lib/python2.7/site-packages/matplotlib/axes/_axes.pyc in pie(self, x, explode, labels, colors, autopct, pctdistance, shadow, labeldistance, startangle, radius, counterclock, wedgeprops, textprops, center, frame)
2546 """
2547
-> 2548 x = np.asarray(x).astype(np.float32)
2549
2550 sx = float(x.sum())
//anaconda/lib/python2.7/site-packages/numpy/core/numeric.pyc in asarray(a, dtype, order)
472
473 """
--> 474 return array(a, dtype, copy=False, order=order)
475
476 def asanyarray(a, dtype=None, order=None):
//anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in __getitem__(self, key)
558 def __getitem__(self, key):
559 try:
--> 560 result = self.index.get_value(self, key)
561
562 if not lib.isscalar(result):
//anaconda/lib/python2.7/site-packages/pandas/indexes/base.pyc in get_value(self, series, key)
1909 try:
1910 return self._engine.get_value(s, k,
-> 1911 tz=getattr(series.dtype, 'tz', None))
1912 except KeyError as e1:
1913 if len(self) > 0 and self.inferred_type in ['integer', 'boolean']:
pandas/index.pyx in pandas.index.IndexEngine.get_value (pandas/index.c:3234)()
pandas/index.pyx in pandas.index.IndexEngine.get_value (pandas/index.c:2931)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3891)()
pandas/hashtable.pyx in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6527)()
pandas/hashtable.pyx in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6465)()
KeyError: 0

KeyError when plotting a sliced pandas dataframe with datetimes

I get a KeyError when I try to plot a slice of a pandas DataFrame column with datetimes in it. Does anybody know what could cause this?
I managed to reproduce the error in a little self contained example (which you can also view here: http://nbviewer.ipython.org/3714142/):
import numpy as np
from pandas import DataFrame
import datetime
from pylab import *
test = DataFrame({'x' : [datetime.datetime(2012,9,10) + datetime.timedelta(n) for n in range(10)],
'y' : range(10)})
Now if I plot:
plot(test['x'][0:5])
there is not problem, but when I plot:
plot(test['x'][5:10])
I get the KeyError below (and the error message is not very helpfull to me). This only happens with datetime columns, not with other columns (as far as I experienced). E.g. plot(test['y'][5:10]) is not a problem.
Ther error message:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-7-aa076e3fc4e0> in <module>()
----> 1 plot(test['x'][5:10])
C:\Python27\lib\site-packages\matplotlib\pyplot.pyc in plot(*args, **kwargs)
2456 ax.hold(hold)
2457 try:
-> 2458 ret = ax.plot(*args, **kwargs)
2459 draw_if_interactive()
2460 finally:
C:\Python27\lib\site-packages\matplotlib\axes.pyc in plot(self, *args, **kwargs)
3846 lines = []
3847
-> 3848 for line in self._get_lines(*args, **kwargs):
3849 self.add_line(line)
3850 lines.append(line)
C:\Python27\lib\site-packages\matplotlib\axes.pyc in _grab_next_args(self, *args, **kwargs)
321 return
322 if len(remaining) <= 3:
--> 323 for seg in self._plot_args(remaining, kwargs):
324 yield seg
325 return
C:\Python27\lib\site-packages\matplotlib\axes.pyc in _plot_args(self, tup, kwargs)
298 x = np.arange(y.shape[0], dtype=float)
299
--> 300 x, y = self._xy_from_xy(x, y)
301
302 if self.command == 'plot':
C:\Python27\lib\site-packages\matplotlib\axes.pyc in _xy_from_xy(self, x, y)
215 if self.axes.xaxis is not None and self.axes.yaxis is not None:
216 bx = self.axes.xaxis.update_units(x)
--> 217 by = self.axes.yaxis.update_units(y)
218
219 if self.command!='plot':
C:\Python27\lib\site-packages\matplotlib\axis.pyc in update_units(self, data)
1277 neednew = self.converter!=converter
1278 self.converter = converter
-> 1279 default = self.converter.default_units(data, self)
1280 #print 'update units: default=%s, units=%s'%(default, self.units)
1281 if default is not None and self.units is None:
C:\Python27\lib\site-packages\matplotlib\dates.pyc in default_units(x, axis)
1153 'Return the tzinfo instance of *x* or of its first element, or None'
1154 try:
-> 1155 x = x[0]
1156 except (TypeError, IndexError):
1157 pass
C:\Python27\lib\site-packages\pandas\core\series.pyc in __getitem__(self, key)
374 def __getitem__(self, key):
375 try:
--> 376 return self.index.get_value(self, key)
377 except InvalidIndexError:
378 pass
C:\Python27\lib\site-packages\pandas\core\index.pyc in get_value(self, series, key)
529 """
530 try:
--> 531 return self._engine.get_value(series, key)
532 except KeyError, e1:
533 if len(self) > 0 and self.inferred_type == 'integer':
C:\Python27\lib\site-packages\pandas\_engines.pyd in pandas._engines.IndexEngine.get_value (pandas\src\engines.c:1479)()
C:\Python27\lib\site-packages\pandas\_engines.pyd in pandas._engines.IndexEngine.get_value (pandas\src\engines.c:1374)()
C:\Python27\lib\site-packages\pandas\_engines.pyd in pandas._engines.DictIndexEngine.get_loc (pandas\src\engines.c:2498)()
C:\Python27\lib\site-packages\pandas\_engines.pyd in pandas._engines.DictIndexEngine.get_loc (pandas\src\engines.c:2460)()
KeyError: 0
HYRY explained why you get the KeyError.
To plot with slices using matplotlib you can do:
In [157]: plot(test['x'][5:10].values)
Out[157]: [<matplotlib.lines.Line2D at 0xc38348c>]
In [158]: plot(test['x'][5:10].reset_index(drop=True))
Out[158]: [<matplotlib.lines.Line2D at 0xc37e3cc>]
x, y plotting in one go with 0.7.3
In [161]: test[5:10].set_index('x')['y'].plot()
Out[161]: <matplotlib.axes.AxesSubplot at 0xc48b1cc>
Instead of calling plot(test["x"][5:10]), you can call the plot method of Series object:
test["x"][5:10].plot()
The reason: test["x"][5:10] is a Series object with integer index from 5 to 10. plot() try to get index 0 of it, that will cause error.
I encountered this error with pd.groupby in Pandas 0.14.0 and solved it with df = df[df['col']!= 0].reset_index()

Categories

Resources