I'm trouble in error of pandas "DataFrame"

I'm trouble in error of pandas "DataFrame" - python

I'm a bigginer who are learning pandas.
I tried two things as follows, but I didn't solve it.
I use Jupyter notebook.
Could you please help me?
#1
import sys
sys.getdefaultencoding()
#2
from importlib import reload
import sys
reload(sys)
#I wanted to make it.
#I succeeded in it at another file, but it couldn't work at current file.
from pandas import Series,DataFrame
import pandas as pd
data={"ID":["001","002","003"],
"city":["hyougo","tiba","gihu"],
"people":["100","230","249"]}
data_frame=DataFrame(data)
print(data_frame)
#error
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-133-640865466ed4> in <module>
3 "people":["100","230","249"]}
4 data_frame=DataFrame(data)
----> 5 print(data_frame)
~\anaconda3\lib\site-packages\pandas\core\frame.py in __repr__(self)
678 else:
679 width = None
--> 680 self.to_string(
681 buf=buf,
682 max_rows=max_rows,
~\anaconda3\lib\site-packages\pandas\core\frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, min_rows, max_cols, show_dimensions, decimal, line_width, max_colwidth, encoding)
818 line_width=line_width,
819 )
--> 820 return formatter.to_string(buf=buf, encoding=encoding)
821
822 # ----------------------------------------------------------------------
~\anaconda3\lib\site-packages\pandas\io\formats\format.py in to_string(self, buf, encoding)
912 encoding: Optional[str] = None,
913 ) -> Optional[str]:
--> 914 return self.get_result(buf=buf, encoding=encoding)
915
916 def to_latex(
~\anaconda3\lib\site-packages\pandas\io\formats\format.py in get_result(self, buf, encoding)
519 """
520 with self.get_buffer(buf, encoding=encoding) as f:
--> 521 self.write_result(buf=f)
522 if buf is None:
523 return f.getvalue()
~\anaconda3\lib\site-packages\pandas\io\formats\format.py in write_result(self, buf)
821 else:
822
--> 823 strcols = self._to_str_columns()
824 if self.line_width is None: # no need to wrap around just print
825 # the whole frame
~\anaconda3\lib\site-packages\pandas\io\formats\format.py in _to_str_columns(self)
717 # may include levels names also
718
--> 719 str_index = self._get_formatted_index(frame)
720
721 if not is_list_like(self.header) and not self.header:
~\anaconda3\lib\site-packages\pandas\io\formats\format.py in _get_formatted_index(self, frame)
1057 )
1058 else:
-> 1059 fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)]
1060
1061 fmt_index = [
AttributeError: 'list' object has no attribute 'format'

You need to import pandas first and then add the alias pd before DataFrame
import pandas as pd
data={"ID":["001","002","003"],
"city":["hyougo","tiba","gihu"],
"people":["100","230","249"]}
data_frame=pd.DataFrame(data)
print(data_frame)
Prints:
ID city people
0 001 hyougo 100
1 002 tiba 230
2 003 gihu 249
If you don't have pandas installed you will need to do that first
pip install pandas

Related

I can't seem to find a fix for the "ValueError: Unknown subheader signature" raised while reading sas file using pd.read_sas?

I am trying to load a sasbdat file in python using pd.read_sas() and I fail to load the data due to the below error.
ValueError Traceback (most recent call last)
<ipython-input-148-64f915da8256> in <module>
----> 1 df_sas = pd.read_sas('input_sasfile.sas7bdat', format='sas7bdat')
~\.conda\envs\overloaded-new\lib\site-packages\pandas\io\sas\sasreader.py in read_sas(filepath_or_buffer, format, index, encoding, chunksize, iterator)
121
122 reader = SAS7BDATReader(
--> 123 filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize
124 )
125 else:
~\.conda\envs\overloaded-new\lib\site-packages\pandas\io\sas\sas7bdat.py in __init__(self, path_or_buf, index, convert_dates, blank_missing, chunksize, encoding, convert_text, convert_header_text)
144
145 self._get_properties()
--> 146 self._parse_metadata()
147
148 def column_data_lengths(self):
~\.conda\envs\overloaded-new\lib\site-packages\pandas\io\sas\sas7bdat.py in _parse_metadata(self)
349 self.close()
350 raise ValueError("Failed to read a meta data page from the SAS file.")
--> 351 done = self._process_page_meta()
352
353 def _process_page_meta(self):
~\.conda\envs\overloaded-new\lib\site-packages\pandas\io\sas\sas7bdat.py in _process_page_meta(self)
355 pt = [const.page_meta_type, const.page_amd_type] + const.page_mix_types
356 if self._current_page_type in pt:
--> 357 self._process_page_metadata()
358 is_data_page = self._current_page_type & const.page_data_type
359 is_mix_page = self._current_page_type in const.page_mix_types
~\.conda\envs\overloaded-new\lib\site-packages\pandas\io\sas\sas7bdat.py in _process_page_metadata(self)
388 subheader_signature = self._read_subheader_signature(pointer.offset)
389 subheader_index = self._get_subheader_index(
--> 390 subheader_signature, pointer.compression, pointer.ptype
391 )
392 self._process_subheader(subheader_index, pointer)
~\.conda\envs\overloaded-new\lib\site-packages\pandas\io\sas\sas7bdat.py in _get_subheader_index(self, signature, compression, ptype)
401 else:
402 self.close()
--> 403 raise ValueError("Unknown subheader signature")
404 return index
405
ValueError: Unknown subheader signature
Though I found relevant github issue (https://github.com/pandas-dev/pandas/issues/24794), but it was closed because the issue got resolved by updating the pandas.
Any help is greatly appreciated.

How to convert 2 column dataframe to dictionary without turning keys into a list

I will preface this by saying I'm a very amateur user and though I've researched my problem extensively I have not found a solution. I assume the solution is simple, but we will see.
Simplified, I have a dataframe with column names A, B, C, D, etc., and I want to change those names to a, b, c, d, etc.. The list of column names is long so in order to achieve these I've imported a dataframe from an excel file with 2 columns (I used excel here because I want to create an easily reproducible method for the entire program I'm creating). The first column has A, B, C, D... and the second column has a, b, c, d.
I then took this dataframe, set the index to column 0, and transposed it. I then used .to_dict('list') and the resulting dictionary looks almost correct except that the values are in lists: {'A':['a'], 'B':['b']...}. So when I try to execute df.rename(columns=dictionary) I get the unhashable type list error.
I know this is because my values are stored as lists, if the dictionary looked like {'A':'a', 'B':'b'...} I'm betting it would work fine.
So basically, how do I turn my dataframe into a dictionary without lists that is formatted as such? Or is this not possible and I should approach this in a different way?
Thanks!
Here is my actual code:
INPUT
df_plate = pd.read_excel('plate.xlsx',index_col='sample')
df_plate_t = df_plate.T
dict_plate = df_plate_t.to_dict('list')
df_sorted2 = df_sorted.rename(columns=dict_plate)
df_sorted2
OUTPUT
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
400 if cls is not object \
401 and callable(cls.__dict__.get('__repr__')):
--> 402 return _repr_pprint(obj, self, cycle)
403
404 return _default_pprint(obj, self, cycle)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
695 """A pprint that just redirects to the normal repr function."""
696 # Find newlines and replace them with p.break_()
--> 697 output = repr(obj)
698 for idx,output_line in enumerate(output.splitlines()):
699 if idx:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\base.py in __repr__(self)
76 Yields Bytestring in Py2, Unicode String in py3.
77 """
---> 78 return str(self)
79
80
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\base.py in __str__(self)
55
56 if compat.PY3:
---> 57 return self.__unicode__()
58 return self.__bytes__()
59
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in __unicode__(self)
632 width = None
633 self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols,
--> 634 line_width=width, show_dimensions=show_dimensions)
635
636 return buf.getvalue()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, max_cols, show_dimensions, decimal, line_width)
719 decimal=decimal,
720 line_width=line_width)
--> 721 formatter.to_string()
722
723 if buf is None:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\formats\format.py in to_string(self)
596 else:
597
--> 598 strcols = self._to_str_columns()
599 if self.line_width is None: # no need to wrap around just print
600 # the whole frame
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\formats\format.py in _to_str_columns(self)
527 str_columns = [[label] for label in self.header]
528 else:
--> 529 str_columns = self._get_formatted_column_labels(frame)
530
531 stringified = []
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\formats\format.py in _get_formatted_column_labels(self, frame)
770 need_leadsp[x] else x]
771 for i, (col, x) in enumerate(zip(columns,
--> 772 fmt_columns))]
773
774 if self.show_row_idx_names:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\formats\format.py in <listcomp>(.0)
769 str_columns = [[' ' + x if not self._get_formatter(i) and
770 need_leadsp[x] else x]
--> 771 for i, (col, x) in enumerate(zip(columns,
772 fmt_columns))]
773
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\formats\format.py in _get_formatter(self, i)
363 if is_integer(i) and i not in self.columns:
364 i = self.columns[i]
--> 365 return self.formatters.get(i, None)
366
367
TypeError: unhashable type: 'list'
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in _repr_html_(self)
672
673 return self.to_html(max_rows=max_rows, max_cols=max_cols,
--> 674 show_dimensions=show_dimensions, notebook=True)
675 else:
676 return None
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in to_html(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, max_cols, show_dimensions, decimal, bold_rows, classes, escape, notebook, border, table_id, render_links)
2263 render_links=render_links)
2264 # TODO: a generic formatter wld b in DataFrameFormatter
-> 2265 formatter.to_html(classes=classes, notebook=notebook, border=border)
2266
2267 if buf is None:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\formats\format.py in to_html(self, classes, notebook, border)
727 from pandas.io.formats.html import HTMLFormatter, NotebookFormatter
728 Klass = NotebookFormatter if notebook else HTMLFormatter
--> 729 html = Klass(self, classes=classes, border=border).render()
730 if hasattr(self.buf, 'write'):
731 buffer_put_lines(self.buf, html)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\formats\html.py in render(self)
527 self.write('<div>')
528 self.write_style()
--> 529 super(NotebookFormatter, self).render()
530 self.write('</div>')
531 return self.elements
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\formats\html.py in render(self)
144
145 def render(self):
--> 146 self._write_table()
147
148 if self.should_show_dimensions:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\formats\html.py in _write_table(self, indent)
180 self._write_header(indent + self.indent_delta)
181
--> 182 self._write_body(indent + self.indent_delta)
183
184 self.write('</table>', indent)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\formats\html.py in _write_body(self, indent)
323 def _write_body(self, indent):
324 self.write('<tbody>', indent)
--> 325 fmt_values = {i: self.fmt._format_col(i) for i in range(self.ncols)}
326
327 # write values
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\formats\html.py in <dictcomp>(.0)
323 def _write_body(self, indent):
324 self.write('<tbody>', indent)
--> 325 fmt_values = {i: self.fmt._format_col(i) for i in range(self.ncols)}
326
327 # write values
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\formats\format.py in _format_col(self, i)
702 def _format_col(self, i):
703 frame = self.tr_frame
--> 704 formatter = self._get_formatter(i)
705 values_to_format = frame.iloc[:, i]._formatting_values()
706 return format_array(values_to_format, formatter,
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\formats\format.py in _get_formatter(self, i)
363 if is_integer(i) and i not in self.columns:
364 i = self.columns[i]
--> 365 return self.formatters.get(i, None)
366
367
TypeError: unhashable type: 'list'

Yup it was an easy solution. If you want to do this and don't know how (probably not many of you out there...) then you want to use a series rather than a dataframe with keys=index and values=column.
dict_plate = pd.Series(df_plate['condition'].values,index=df_plate['sample']).to_dict()

appending json files in python

I am trying to append some json files in python. I have the following code. It seems right. However, I am getting an error.
The code is as follows.
import pandas as pd
df1=pd.DataFrame()
for i in range(0,49):
df = pd.read_json ('/media/michael/extHDD/Kaggle/DeepFAke/DF_all/metadata{}.json'.format(i))
df1.append(df.T)
The error is as follows.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-76-ddb355627155> in <module>
3 df1=pd.DataFrame()
4 for i in range(0,49):
----> 5 df = pd.read_json ('/media/michael/extHDD/Kaggle/DeepFAke/DF_all/metadata{}.json'.format(i))
6 df1.append(df.T)
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, lines, chunksize, compression)
590 return json_reader
591
--> 592 result = json_reader.read()
593 if should_close:
594 try:
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in read(self)
715 obj = self._get_object_parser(self._combine_lines(data.split("\n")))
716 else:
--> 717 obj = self._get_object_parser(self.data)
718 self.close()
719 return obj
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in _get_object_parser(self, json)
737 obj = None
738 if typ == "frame":
--> 739 obj = FrameParser(json, **kwargs).parse()
740
741 if typ == "series" or obj is None:
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in parse(self)
847
848 else:
--> 849 self._parse_no_numpy()
850
851 if self.obj is None:
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in _parse_no_numpy(self)
1091 if orient == "columns":
1092 self.obj = DataFrame(
-> 1093 loads(json, precise_float=self.precise_float), dtype=None
1094 )
1095 elif orient == "split":
ValueError: Expected object or value
The code works when I do it for each file individually. Would anyone be able to help me regarding this.
Thanks & Best Regards
Michael

The error occurs on a df = pd.read_json (...) line. It is likely that one of the file is non existent or incorrect. My advice is to use a try catch to identify it:
for i in range(0,49):
try:
df = pd.read_json ('/media/michael/extHDD/Kaggle/DeepFAke/DF_all/metadata{}.json'.format(i))
except:
print('Error on iteration', i, ', file',
'/media/michael/extHDD/Kaggle/DeepFAke/DF_all/metadata{}.json'.format(i))
raise
df1.append(df.T)
Catching any exception is normally bad practice because it can hide truely abnormal conditions like an IO or memory error. That is the reason why I re-raise the original exception in above code.

Pandas Dataframe to plotly plot showing TypeError

I wrote a script which uses nltk's FreqDist module then converts it into a pandas dataframe. The code snippet is as follows:
.......
import unicodedata
str2 = unicodedata.normalize('NFKD', str1).encode('ascii','ignore')
words = nltk.tokenize.word_tokenize(str2)
fdist = nltk.FreqDist(words)
df = pd.DataFrame.from_dict(fdist, orient='index').reset_index()
df = df.rename(columns={'index':'query_word', 0:'count'})
df2 = df.sort_values(['count'], ascending=[False])
Now, I am trying to plot it using plotly for which my code snippet looks as follows:
import plotly.plotly as py
import plotly.graph_objs as go
data = [go.Bar(x= df.query_word, y= df.count)]
py.iplot(data, filename='basic-bar')
When I run this part, I get the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-15-87d0c9af254b> in <module>()
----> 1 py.iplot(data, filename='basic-bar')
/usr/local/lib/python2.7/dist-packages/plotly/plotly/plotly.pyc in iplot(figure_or_data, **plot_options)
150 if 'auto_open' not in plot_options:
151 plot_options['auto_open'] = False
--> 152 url = plot(figure_or_data, **plot_options)
153
154 if isinstance(figure_or_data, dict):
/usr/local/lib/python2.7/dist-packages/plotly/plotly/plotly.pyc in plot(figure_or_data, validate, **plot_options)
239
240 plot_options = _plot_option_logic(plot_options)
--> 241 res = _send_to_plotly(figure, **plot_options)
242
243 if res['error'] == '':
/usr/local/lib/python2.7/dist-packages/plotly/plotly/plotly.pyc in _send_to_plotly(figure, **plot_options)
1407 fig = tools._replace_newline(figure) # does not mutate figure
1408 data = json.dumps(fig['data'] if 'data' in fig else [],
-> 1409 cls=utils.PlotlyJSONEncoder)
1410 credentials = get_credentials()
1411 validate_credentials(credentials)
/usr/lib/python2.7/json/__init__.pyc in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, encoding, default, sort_keys, **kw)
249 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
250 separators=separators, encoding=encoding, default=default,
--> 251 sort_keys=sort_keys, **kw).encode(obj)
252
253
/usr/local/lib/python2.7/dist-packages/plotly/utils.pyc in encode(self, o)
144
145 # this will raise errors in a normal-expected way
--> 146 encoded_o = super(PlotlyJSONEncoder, self).encode(o)
147
148 # now:
/usr/lib/python2.7/json/encoder.pyc in encode(self, o)
205 # exceptions aren't as detailed. The list call should be roughly
206 # equivalent to the PySequence_Fast that ''.join() would do.
--> 207 chunks = self.iterencode(o, _one_shot=True)
208 if not isinstance(chunks, (list, tuple)):
209 chunks = list(chunks)
/usr/lib/python2.7/json/encoder.pyc in iterencode(self, o, _one_shot)
268 self.key_separator, self.item_separator, self.sort_keys,
269 self.skipkeys, _one_shot)
--> 270 return _iterencode(o, 0)
271
272 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
/usr/local/lib/python2.7/dist-packages/plotly/utils.pyc in default(self, obj)
211 except NotEncodable:
212 pass
--> 213 return json.JSONEncoder.default(self, obj)
214
215 #staticmethod
/usr/lib/python2.7/json/encoder.pyc in default(self, o)
182
183 """
--> 184 raise TypeError(repr(o) + " is not JSON serializable")
185
186 def encode(self, o):
TypeError: <bound method DataFrame.count of query_word count
0 1,2 1
1 four 1
2 prefix 1
.. ...... ..
.. ...... ..
3 francesco 1
As far as I understand from the other SF questions on the topic "is not json serializable" and from the error message, it is the problem with encoding? and not of the datatype.
Because, when I print type(df2.query_word) it says <class 'pandas.core.series.Series'>. So how to make a series serializable? Since the traceback doesn't show any encoding error such as in here or here.
What is the easy turn-around? My main intension to post this question is to understand whether this is a problem with dataframe, the data, ipython or plotly.

Pandas groupby and describe flags AttributeError

I have a bunch of data stored in vals. The indices are monotonic, but not continuous. I'm attempting to do some analysis on histograms of the data, so I've created the following structure:
hist = pd.DataFrame(vals)
hist['bins'] = pd.cut(vals, 100)
This is data taken from an experimental instrument and I know that some of the bins have only 1 or 2 counts in them, which I'm trying to remove. I've tried using groupby as follows and get the following error (Full traceback included at the end of the note):
hist.groupby('bins').describe()
AttributeError: 'Categorical' object has no attribute 'flags'
However, when I do the following, the error does not show up and I get the expected result:
In[]: hist.index = hist.bins
In[]: hist['bins'] = hist.index
In[]: desc = hist.groupby('bins').describe()
In[]: desc.index.names = ['bins', 'describe']
Out[]: **describe with MultiIndex for rows.**
If I don't include the second line hist['bins'] = hist.index, I still get an AttributeError: 'Categorical' object has no attribute 'flags' and to the best that I can tell, the traceback is identical.
Can someone explain what the flags are and why they only seem to work when I set the index to bins and then replace the bins by the version stored in the index?
My end goal is to remove the data for bins with counts <= 6. If someone has an easier workaround than the way I'm going after it, I'd also be grateful.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-11-f606a051f2e4> in <module>()
----> 1 hist.groupby('bins').describe()
C:\Users\balterma\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.4.1.1975.win-x86_64\lib\site-packages\IPython\core\displayhook.pyc in __call__(self, result)
245 self.start_displayhook()
246 self.write_output_prompt()
--> 247 format_dict, md_dict = self.compute_format_data(result)
248 self.write_format_data(format_dict, md_dict)
249 self.update_user_ns(result)
C:\Users\balterma\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.4.1.1975.win-x86_64\lib\site-packages\IPython\core\displayhook.pyc in compute_format_data(self, result)
155
156 """
--> 157 return self.shell.display_formatter.format(result)
158
159 def write_format_data(self, format_dict, md_dict=None):
C:\Users\balterma\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.4.1.1975.win-x86_64\lib\site-packages\IPython\core\formatters.pyc in format(self, obj, include, exclude)
150 md = None
151 try:
--> 152 data = formatter(obj)
153 except:
154 # FIXME: log the exception
C:\Users\balterma\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.4.1.1975.win-x86_64\lib\site-packages\IPython\core\formatters.pyc in __call__(self, obj)
479 type_pprinters=self.type_printers,
480 deferred_pprinters=self.deferred_printers)
--> 481 printer.pretty(obj)
482 printer.flush()
483 return stream.getvalue()
C:\Users\balterma\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.4.1.1975.win-x86_64\lib\site-packages\IPython\lib\pretty.pyc in pretty(self, obj)
360 if callable(meth):
361 return meth(obj, self, cycle)
--> 362 return _default_pprint(obj, self, cycle)
363 finally:
364 self.end_group()
C:\Users\balterma\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.4.1.1975.win-x86_64\lib\site-packages\IPython\lib\pretty.pyc in _default_pprint(obj, p, cycle)
480 if getattr(klass, '__repr__', None) not in _baseclass_reprs:
481 # A user-provided repr.
--> 482 p.text(repr(obj))
483 return
484 p.begin_group(1, '<')
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\base.pyc in __repr__(self)
62 Yields Bytestring in Py2, Unicode String in py3.
63 """
---> 64 return str(self)
65
66
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\base.pyc in __str__(self)
42 if compat.PY3:
43 return self.__unicode__()
---> 44 return self.__bytes__()
45
46 def __bytes__(self):
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\base.pyc in __bytes__(self)
54
55 encoding = get_option("display.encoding")
---> 56 return self.__unicode__().encode(encoding, 'replace')
57
58 def __repr__(self):
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\frame.pyc in __unicode__(self)
507 width = None
508 self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols,
--> 509 line_width=width, show_dimensions=show_dimensions)
510
511 return buf.getvalue()
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\frame.pyc in to_string(self, buf, columns, col_space, colSpace, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, line_width, max_rows, max_cols, show_dimensions)
1340 max_rows=max_rows,
1341 max_cols=max_cols,
-> 1342 show_dimensions=show_dimensions)
1343 formatter.to_string()
1344
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\format.pyc in __init__(self, frame, buf, columns, col_space, header, index, na_rep, formatters, justify, float_format, sparsify, index_names, line_width, max_rows, max_cols, show_dimensions, **kwds)
345 self.columns = frame.columns
346
--> 347 self._chk_truncate()
348
349 def _chk_truncate(self):
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\format.pyc in _chk_truncate(self)
410 else:
411 row_num = max_rows_adj // 2
--> 412 frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :]))
413 self.tr_row_num = row_num
414
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\tools\merge.pyc in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
752 keys=keys, levels=levels, names=names,
753 verify_integrity=verify_integrity,
--> 754 copy=copy)
755 return op.get_result()
756
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\tools\merge.pyc in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy)
884 self.copy = copy
885
--> 886 self.new_axes = self._get_new_axes()
887
888 def get_result(self):
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\tools\merge.pyc in _get_new_axes(self)
957 new_axes[i] = ax
958
--> 959 new_axes[self.axis] = self._get_concat_axis()
960 return new_axes
961
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\tools\merge.pyc in _get_concat_axis(self)
1009
1010 if self.keys is None:
-> 1011 concat_axis = _concat_indexes(indexes)
1012 else:
1013 concat_axis = _make_concat_multiindex(indexes, self.keys,
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\tools\merge.pyc in _concat_indexes(indexes)
1027
1028 def _concat_indexes(indexes):
-> 1029 return indexes[0].append(indexes[1:])
1030
1031
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\index.pyc in append(self, other)
4603 arrays = []
4604 for i in range(self.nlevels):
-> 4605 label = self.get_level_values(i)
4606 appended = [o.get_level_values(i) for o in other]
4607 arrays.append(label.append(appended))
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\index.pyc in get_level_values(self, level)
4239 unique = self.levels[num] # .values
4240 labels = self.labels[num]
-> 4241 filled = com.take_1d(unique.values, labels, fill_value=unique._na_value)
4242 values = unique._simple_new(filled, self.names[num],
4243 freq=getattr(unique, 'freq', None),
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\common.pyc in take_nd(arr, indexer, axis, out, fill_value, mask_info, allow_fill)
829 out_shape[axis] = len(indexer)
830 out_shape = tuple(out_shape)
--> 831 if arr.flags.f_contiguous and axis == arr.ndim - 1:
832 # minor tweak that can make an order-of-magnitude difference
833 # for dataframes initialized directly from 2-d ndarrays
AttributeError: 'Categorical' object has no attribute 'flags'

This looks to be be a bug with Categorical data that will be corrected in version 0.17.0 (issue here).
In the meantime, you could just cast the category to an object dtype - this is what was happening when you assigned to the index and back.
df['bins'] = df['bins'].astype(str)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

I'm trouble in error of pandas "DataFrame" - python

Related

I can't seem to find a fix for the "ValueError: Unknown subheader signature" raised while reading sas file using pd.read_sas?

How to convert 2 column dataframe to dictionary without turning keys into a list

appending json files in python

Pandas Dataframe to plotly plot showing TypeError

Pandas groupby and describe flags AttributeError

Categories

Resources