I am completely new in pyspark. I am getting error while executing the command
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("basics").getOrCreate()
df = spark.read.csv("data.csv",inferSchema=True,header=True)
df.columns
my data has 1,000,000 rows and 50 columns. I am getting following error.
ValueError Traceback (most recent call last)
<ipython-input-71-b666bf274d0a> in <module>
----> 1 df.columns
~/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py in columns(self)
935 ['age', 'name']
936 """
--> 937 return [f.name for f in self.schema.fields]
938
939 #since(2.3)
~/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py in schema(self)
253 if self._schema is None:
254 try:
--> 255 self._schema = _parse_datatype_json_string(self._jdf.schema().json())
256 except AttributeError as e:
257 raise Exception(
~/anaconda3/lib/python3.7/site-packages/pyspark/sql/types.py in _parse_datatype_json_string(json_string)
867 >>> check_datatype(complex_maptype)
868 """
--> 869 return _parse_datatype_json_value(json.loads(json_string))
870
871
~/anaconda3/lib/python3.7/site-packages/pyspark/sql/types.py in _parse_datatype_json_value(json_value)
884 tpe = json_value["type"]
885 if tpe in _all_complex_types:
--> 886 return _all_complex_types[tpe].fromJson(json_value)
887 elif tpe == 'udt':
888 return UserDefinedType.fromJson(json_value)
~/anaconda3/lib/python3.7/site-packages/pyspark/sql/types.py in fromJson(cls, json)
575 #classmethod
576 def fromJson(cls, json):
--> 577 return StructType([StructField.fromJson(f) for f in json["fields"]])
578
579 def fieldNames(self):
~/anaconda3/lib/python3.7/site-packages/pyspark/sql/types.py in <listcomp>(.0)
575 #classmethod
576 def fromJson(cls, json):
--> 577 return StructType([StructField.fromJson(f) for f in json["fields"]])
578
579 def fieldNames(self):
~/anaconda3/lib/python3.7/site-packages/pyspark/sql/types.py in fromJson(cls, json)
432 def fromJson(cls, json):
433 return StructField(json["name"],
--> 434 _parse_datatype_json_value(json["type"]),
435 json["nullable"],
436 json["metadata"])
~/anaconda3/lib/python3.7/site-packages/pyspark/sql/types.py in _parse_datatype_json_value(json_value)
880 return DecimalType(int(m.group(1)), int(m.group(2)))
881 else:
--> 882 raise ValueError("Could not parse datatype: %s" % json_value)
883 else:
884 tpe = json_value["type"]
ValueError: Could not parse datatype: decimal(6,-8)
Can anyone please help me to understand why am I getting this error and how to over come this? If I am getting the error because of wrong schema, how can I define the schema for 50 columns? TIA!
according to what you commented, use inferSchema=True and this UNTESTED code should help you out:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
spark = SparkSession.builder.appName("basics").getOrCreate()
df = spark.read.csv("data.csv",inferSchema=True,header=True)
for column_type in df.dtypes:
if 'string' in column_type[1]:
df = df.withColumn(column_type[0], df[column_type[0]].cast(StringType()))
elif 'double' in column_type[1]:
df = df.withColumn(column_type[0],df[column_type[0]].cast(DoubleType()))
elif 'int' in column_type[1]:
df = df.withColumn(column_type[0],df[column_type[0]].cast(IntegerType()))
elif 'bool' in column_type[1]:
df = df.withColumn(column_type[0], df[column_type[0]].cast(BooleanType()))
elif 'decimal' in column_type[1]:
df = df.withColumn(column_type[0],df[column_type[0]].cast(DoubleType()))
# add as many condiitions as you need for types
df.schema
let me know if it did it for you, if not than i'll test and update it
Related
I'm trying to make telegram alarm bot but encountered error such as "TypeError: Object of type DataFrame is not JSON serializable"
Here is my code:
import FinanceDataReader as fdr
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import datetime
from datetime import date
import sys
fdr.__version__
import schedule
import time
import pytz
import telegram
count = 1
def job():
# 전역변수 설정
global count
count += 1
# 한국시각, 주말 설정
now = datetime.datetime.now(pytz.timezone('Asia/Seoul'))
today = date.today()
weekend = today.weekday()
# 예외시간 설정. 9시 이전 및 15시 이후로는 알람을 보내지 않음
if now.hour >= 15 or now.hour <= 9 or weekend == [5, 6]:
return
API_KEY = 'My key'
bot = telegram.Bot(token=API_KEY)
bot.get_updates()
# for i in updates:
# print(i.message['chat']['id'])
# 코스닥지수
code = 'KQ11'
df = fdr.DataReader('KQ11','2022-08').reset_index()
# 3,5,10 이동평균 딕셔너리에 할당
df['close_sma3d'] = df['Close'].rolling(3).mean()
df['close_sma5d'] = df['Close'].rolling(5).mean()
df['close_sma10d'] = df['Close'].rolling(10).mean()
# dataframe 재구성
# df = df.to_dict()
# df.rename(columns={0: 'Date', 1: 'Close', 2: 'Open', 3: 'High', 4: 'Low', 5: 'Volume', 6: 'Change', 7: 'close_sma3d', 8: 'close_sma5d', 9: 'close_sma10d'}, inplace = True)
df2 = df.loc[: ,['Date','Close', 'close_sma3d','close_sma5d','close_sma10d']].iloc[-1:]
alerts = df2[(df2['Close'] > df2['close_sma3d']) | (df2['Close'] > df2['close_sma5d']) | (df2['Close'] > df2['close_sma10d'])]
def display(row):
print(f" - {row['Date']} Signal 발생! 코스닥_현재가 {row['Close']} 3일이동평균 {row['close_sma3d']:.2f} 5일이동평균 {row['close_sma5d']:.2f} 10일이동평균 {row['close_sma10d']:.2f}")
Market_timing = alerts.apply(display, axis=1)
if count % 1 == 0:
bot.sendMessage(chat_id = 'Mykey', text = Market_timing)
else:
print('대기 중입니다..')
# 2 시간 마다 실행
schedule.every(1).minutes.do(job)
print('Start App..')
while True:
schedule.run_pending()
time.sleep(1)
And here is the error:
TypeError Traceback (most recent call last)
Input In [3], in <cell line: 49>()
46 print('Start App..')
48 while True:
---> 49 schedule.run_pending()
50 time.sleep(1)
File ~\miniconda3\envs\py38\lib\site-packages\schedule\__init__.py:780, in run_pending()
776 def run_pending() -> None:
777 """Calls :meth:`run_pending <Scheduler.run_pending>` on the
778 :data:`default scheduler instance <default_scheduler>`.
779 """
--> 780 default_scheduler.run_pending()
File ~\miniconda3\envs\py38\lib\site-packages\schedule\__init__.py:100, in Scheduler.run_pending(self)
98 runnable_jobs = (job for job in self.jobs if job.should_run)
99 for job in sorted(runnable_jobs):
--> 100 self._run_job(job)
File ~\miniconda3\envs\py38\lib\site-packages\schedule\__init__.py:172, in Scheduler._run_job(self, job)
171 def _run_job(self, job: "Job") -> None:
--> 172 ret = job.run()
173 if isinstance(ret, CancelJob) or ret is CancelJob:
174 self.cancel_job(job)
File ~\miniconda3\envs\py38\lib\site-packages\schedule\__init__.py:661, in Job.run(self)
658 return CancelJob
660 logger.debug("Running job %s", self)
--> 661 ret = self.job_func()
662 self.last_run = datetime.datetime.now()
663 self._schedule_next_run()
Input In [3], in job()
35 Market_timing = alerts.apply(display, axis=1)
37 if count % 1 == 0:
---> 38 bot.sendMessage(chat_id = '1760120639', text = Market_timing)
39 else:
40 print('대기 중입니다..')
File ~\miniconda3\envs\py38\lib\site-packages\telegram\bot.py:133, in log.<locals>.decorator(*args, **kwargs)
130 #functools.wraps(func)
131 def decorator(*args: object, **kwargs: object) -> RT: # pylint: disable=W0613
132 logger.debug('Entering: %s', func.__name__)
--> 133 result = func(*args, **kwargs)
134 logger.debug(result)
135 logger.debug('Exiting: %s', func.__name__)
File ~\miniconda3\envs\py38\lib\site-packages\telegram\bot.py:525, in Bot.send_message(self, chat_id, text, parse_mode, disable_web_page_preview, disable_notification, reply_to_message_id, reply_markup, timeout, api_kwargs, allow_sending_without_reply, entities, protect_content)
522 if entities:
523 data['entities'] = [me.to_dict() for me in entities]
--> 525 return self._message( # type: ignore[return-value]
526 'sendMessage',
527 data,
528 disable_notification=disable_notification,
529 reply_to_message_id=reply_to_message_id,
530 reply_markup=reply_markup,
531 allow_sending_without_reply=allow_sending_without_reply,
532 timeout=timeout,
533 api_kwargs=api_kwargs,
534 protect_content=protect_content,
535 )
File ~\miniconda3\envs\py38\lib\site-packages\telegram\bot.py:339, in Bot._message(self, endpoint, data, reply_to_message_id, disable_notification, reply_markup, allow_sending_without_reply, timeout, api_kwargs, protect_content)
336 else:
337 data['media'].parse_mode = None
--> 339 result = self._post(endpoint, data, timeout=timeout, api_kwargs=api_kwargs)
341 if result is True:
342 return result
File ~\miniconda3\envs\py38\lib\site-packages\telegram\bot.py:298, in Bot._post(self, endpoint, data, timeout, api_kwargs)
295 # Drop any None values because Telegram doesn't handle them well
296 data = {key: value for key, value in data.items() if value is not None}
--> 298 return self.request.post(
299 f'{self.base_url}/{endpoint}', data=data, timeout=effective_timeout
300 )
File ~\miniconda3\envs\py38\lib\site-packages\telegram\utils\request.py:364, in Request.post(self, url, data, timeout)
359 result = self._request_wrapper('POST', url, fields=data, **urlopen_kwargs)
360 else:
361 result = self._request_wrapper(
362 'POST',
363 url,
--> 364 body=json.dumps(data).encode('utf-8'),
365 headers={'Content-Type': 'application/json'},
366 **urlopen_kwargs,
367 )
369 return self._parse(result)
File ~\miniconda3\envs\py38\lib\json\__init__.py:231, in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
226 # cached encoder
227 if (not skipkeys and ensure_ascii and
228 check_circular and allow_nan and
229 cls is None and indent is None and separators is None and
230 default is None and not sort_keys and not kw):
--> 231 return _default_encoder.encode(obj)
232 if cls is None:
233 cls = JSONEncoder
File ~\miniconda3\envs\py38\lib\json\encoder.py:199, in JSONEncoder.encode(self, o)
195 return encode_basestring(o)
196 # This doesn't pass the iterator directly to ''.join() because the
197 # exceptions aren't as detailed. The list call should be roughly
198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
201 chunks = list(chunks)
File ~\miniconda3\envs\py38\lib\json\encoder.py:257, in JSONEncoder.iterencode(self, o, _one_shot)
252 else:
253 _iterencode = _make_iterencode(
254 markers, self.default, _encoder, self.indent, floatstr,
255 self.key_separator, self.item_separator, self.sort_keys,
256 self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)
File ~\miniconda3\envs\py38\lib\json\encoder.py:179, in JSONEncoder.default(self, o)
160 def default(self, o):
161 """Implement this method in a subclass such that it returns
162 a serializable object for ``o``, or calls the base implementation
163 (to raise a ``TypeError``).
(...)
177
178 """
--> 179 raise TypeError(f'Object of type {o.__class__.__name__} '
180 f'is not JSON serializable')
TypeError: Object of type DataFrame is not JSON serializable
I tried TypeError: Object of type 'DataFrame' is not JSON serializable this method but had a problem making a column.
Any help would be appreciated.
I'm a bigginer who are learning pandas.
I tried two things as follows, but I didn't solve it.
I use Jupyter notebook.
Could you please help me?
#1
import sys
sys.getdefaultencoding()
#2
from importlib import reload
import sys
reload(sys)
#I wanted to make it.
#I succeeded in it at another file, but it couldn't work at current file.
from pandas import Series,DataFrame
import pandas as pd
data={"ID":["001","002","003"],
"city":["hyougo","tiba","gihu"],
"people":["100","230","249"]}
data_frame=DataFrame(data)
print(data_frame)
#error
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-133-640865466ed4> in <module>
3 "people":["100","230","249"]}
4 data_frame=DataFrame(data)
----> 5 print(data_frame)
~\anaconda3\lib\site-packages\pandas\core\frame.py in __repr__(self)
678 else:
679 width = None
--> 680 self.to_string(
681 buf=buf,
682 max_rows=max_rows,
~\anaconda3\lib\site-packages\pandas\core\frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, min_rows, max_cols, show_dimensions, decimal, line_width, max_colwidth, encoding)
818 line_width=line_width,
819 )
--> 820 return formatter.to_string(buf=buf, encoding=encoding)
821
822 # ----------------------------------------------------------------------
~\anaconda3\lib\site-packages\pandas\io\formats\format.py in to_string(self, buf, encoding)
912 encoding: Optional[str] = None,
913 ) -> Optional[str]:
--> 914 return self.get_result(buf=buf, encoding=encoding)
915
916 def to_latex(
~\anaconda3\lib\site-packages\pandas\io\formats\format.py in get_result(self, buf, encoding)
519 """
520 with self.get_buffer(buf, encoding=encoding) as f:
--> 521 self.write_result(buf=f)
522 if buf is None:
523 return f.getvalue()
~\anaconda3\lib\site-packages\pandas\io\formats\format.py in write_result(self, buf)
821 else:
822
--> 823 strcols = self._to_str_columns()
824 if self.line_width is None: # no need to wrap around just print
825 # the whole frame
~\anaconda3\lib\site-packages\pandas\io\formats\format.py in _to_str_columns(self)
717 # may include levels names also
718
--> 719 str_index = self._get_formatted_index(frame)
720
721 if not is_list_like(self.header) and not self.header:
~\anaconda3\lib\site-packages\pandas\io\formats\format.py in _get_formatted_index(self, frame)
1057 )
1058 else:
-> 1059 fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)]
1060
1061 fmt_index = [
AttributeError: 'list' object has no attribute 'format'
You need to import pandas first and then add the alias pd before DataFrame
import pandas as pd
data={"ID":["001","002","003"],
"city":["hyougo","tiba","gihu"],
"people":["100","230","249"]}
data_frame=pd.DataFrame(data)
print(data_frame)
Prints:
ID city people
0 001 hyougo 100
1 002 tiba 230
2 003 gihu 249
If you don't have pandas installed you will need to do that first
pip install pandas
Here's my dataset
result score
1 0.786
1 0.896
0 0.435
1 0.563
0 0.145
Here's my code
import pandas as pd
intervals = data.groupby('result')['score'].transform(pd.qcut, 10)
Here's the error
TypeError Traceback (most recent call last)
/opt/conda/lib/python3.8/site-packages/pandas/core/arrays/interval.py in astype(self, dtype, copy)
708 try:
--> 709 return np.asarray(self).astype(dtype, copy=copy)
710 except (TypeError, ValueError) as err:
TypeError: float() argument must be a string or a number, not 'pandas._libs.interval.Interval'
The above exception was the direct cause of the following exception:
TypeError Traceback (most recent call last)
<ipython-input-88-429b3ee3f973> in <module>
1 data['score'] = pd.to_numeric(data['score'])
----> 2 intervals = data.groupby('result')['score'].transform(pd.qcut, 10)
3 data['Bin_low'] = pd.IntervalIndex(intervals).left
4 data['Bin_high'] = pd.IntervalIndex(intervals).right
/opt/conda/lib/python3.8/site-packages/pandas/core/groupby/generic.py in transform(self, func, engine, engine_kwargs, *args, **kwargs)
491
492 if not isinstance(func, str):
--> 493 return self._transform_general(
494 func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
495 )
/opt/conda/lib/python3.8/site-packages/pandas/core/groupby/generic.py in _transform_general(self, func, engine, engine_kwargs, *args, **kwargs)
557 dtype = self._selected_obj.dtype
558 if is_numeric_dtype(dtype):
--> 559 result = maybe_downcast_to_dtype(result, dtype)
560
561 result.name = self._selected_obj.name
/opt/conda/lib/python3.8/site-packages/pandas/core/dtypes/cast.py in maybe_downcast_to_dtype(result, dtype)
150 dtype = np.dtype(dtype)
151
--> 152 converted = maybe_downcast_numeric(result, dtype, do_round)
153 if converted is not result:
154 return converted
/opt/conda/lib/python3.8/site-packages/pandas/core/dtypes/cast.py in maybe_downcast_numeric(result, dtype, do_round)
250 and not is_string_dtype(result.dtype)
251 ):
--> 252 return result.astype(dtype)
253
254 return result
/opt/conda/lib/python3.8/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors)
5544 else:
5545 # else, only a single dtype is given
-> 5546 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors,)
5547 return self._constructor(new_data).__finalize__(self, method="astype")
5548
/opt/conda/lib/python3.8/site-packages/pandas/core/internals/managers.py in astype(self, dtype, copy, errors)
593 self, dtype, copy: bool = False, errors: str = "raise"
594 ) -> "BlockManager":
--> 595 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
596
597 def convert(
/opt/conda/lib/python3.8/site-packages/pandas/core/internals/managers.py in apply(self, f, align_keys, **kwargs)
404 applied = b.apply(f, **kwargs)
405 else:
--> 406 applied = getattr(b, f)(**kwargs)
407 result_blocks = _extend_blocks(applied, result_blocks)
408
/opt/conda/lib/python3.8/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors)
568 if self.is_extension:
569 try:
--> 570 values = self.values.astype(dtype)
571 except (ValueError, TypeError):
572 if errors == "ignore":
/opt/conda/lib/python3.8/site-packages/pandas/core/arrays/interval.py in astype(self, dtype, copy)
710 except (TypeError, ValueError) as err:
711 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
--> 712 raise TypeError(msg) from err
713
714 #classmethod
TypeError: Cannot cast IntervalArray to dtype float64
What should I do to cast the IntervalArray?
Since qcut returns a Series with same indexing, you can just use apply, which works fine:
intervals = df.groupby('result')['score'].apply(pd.qcut, 10)
Output:
0 (0.741, 0.786]
1 (0.874, 0.896]
2 (0.406, 0.435]
3 (0.5619999999999999, 0.608]
4 (0.144, 0.174]
Name: score, dtype: interval
I am trying to append some json files in python. I have the following code. It seems right. However, I am getting an error.
The code is as follows.
import pandas as pd
df1=pd.DataFrame()
for i in range(0,49):
df = pd.read_json ('/media/michael/extHDD/Kaggle/DeepFAke/DF_all/metadata{}.json'.format(i))
df1.append(df.T)
The error is as follows.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-76-ddb355627155> in <module>
3 df1=pd.DataFrame()
4 for i in range(0,49):
----> 5 df = pd.read_json ('/media/michael/extHDD/Kaggle/DeepFAke/DF_all/metadata{}.json'.format(i))
6 df1.append(df.T)
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, lines, chunksize, compression)
590 return json_reader
591
--> 592 result = json_reader.read()
593 if should_close:
594 try:
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in read(self)
715 obj = self._get_object_parser(self._combine_lines(data.split("\n")))
716 else:
--> 717 obj = self._get_object_parser(self.data)
718 self.close()
719 return obj
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in _get_object_parser(self, json)
737 obj = None
738 if typ == "frame":
--> 739 obj = FrameParser(json, **kwargs).parse()
740
741 if typ == "series" or obj is None:
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in parse(self)
847
848 else:
--> 849 self._parse_no_numpy()
850
851 if self.obj is None:
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in _parse_no_numpy(self)
1091 if orient == "columns":
1092 self.obj = DataFrame(
-> 1093 loads(json, precise_float=self.precise_float), dtype=None
1094 )
1095 elif orient == "split":
ValueError: Expected object or value
The code works when I do it for each file individually. Would anyone be able to help me regarding this.
Thanks & Best Regards
Michael
The error occurs on a df = pd.read_json (...) line. It is likely that one of the file is non existent or incorrect. My advice is to use a try catch to identify it:
for i in range(0,49):
try:
df = pd.read_json ('/media/michael/extHDD/Kaggle/DeepFAke/DF_all/metadata{}.json'.format(i))
except:
print('Error on iteration', i, ', file',
'/media/michael/extHDD/Kaggle/DeepFAke/DF_all/metadata{}.json'.format(i))
raise
df1.append(df.T)
Catching any exception is normally bad practice because it can hide truely abnormal conditions like an IO or memory error. That is the reason why I re-raise the original exception in above code.
I wrote a script which uses nltk's FreqDist module then converts it into a pandas dataframe. The code snippet is as follows:
.......
import unicodedata
str2 = unicodedata.normalize('NFKD', str1).encode('ascii','ignore')
words = nltk.tokenize.word_tokenize(str2)
fdist = nltk.FreqDist(words)
df = pd.DataFrame.from_dict(fdist, orient='index').reset_index()
df = df.rename(columns={'index':'query_word', 0:'count'})
df2 = df.sort_values(['count'], ascending=[False])
Now, I am trying to plot it using plotly for which my code snippet looks as follows:
import plotly.plotly as py
import plotly.graph_objs as go
data = [go.Bar(x= df.query_word, y= df.count)]
py.iplot(data, filename='basic-bar')
When I run this part, I get the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-15-87d0c9af254b> in <module>()
----> 1 py.iplot(data, filename='basic-bar')
/usr/local/lib/python2.7/dist-packages/plotly/plotly/plotly.pyc in iplot(figure_or_data, **plot_options)
150 if 'auto_open' not in plot_options:
151 plot_options['auto_open'] = False
--> 152 url = plot(figure_or_data, **plot_options)
153
154 if isinstance(figure_or_data, dict):
/usr/local/lib/python2.7/dist-packages/plotly/plotly/plotly.pyc in plot(figure_or_data, validate, **plot_options)
239
240 plot_options = _plot_option_logic(plot_options)
--> 241 res = _send_to_plotly(figure, **plot_options)
242
243 if res['error'] == '':
/usr/local/lib/python2.7/dist-packages/plotly/plotly/plotly.pyc in _send_to_plotly(figure, **plot_options)
1407 fig = tools._replace_newline(figure) # does not mutate figure
1408 data = json.dumps(fig['data'] if 'data' in fig else [],
-> 1409 cls=utils.PlotlyJSONEncoder)
1410 credentials = get_credentials()
1411 validate_credentials(credentials)
/usr/lib/python2.7/json/__init__.pyc in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, encoding, default, sort_keys, **kw)
249 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
250 separators=separators, encoding=encoding, default=default,
--> 251 sort_keys=sort_keys, **kw).encode(obj)
252
253
/usr/local/lib/python2.7/dist-packages/plotly/utils.pyc in encode(self, o)
144
145 # this will raise errors in a normal-expected way
--> 146 encoded_o = super(PlotlyJSONEncoder, self).encode(o)
147
148 # now:
/usr/lib/python2.7/json/encoder.pyc in encode(self, o)
205 # exceptions aren't as detailed. The list call should be roughly
206 # equivalent to the PySequence_Fast that ''.join() would do.
--> 207 chunks = self.iterencode(o, _one_shot=True)
208 if not isinstance(chunks, (list, tuple)):
209 chunks = list(chunks)
/usr/lib/python2.7/json/encoder.pyc in iterencode(self, o, _one_shot)
268 self.key_separator, self.item_separator, self.sort_keys,
269 self.skipkeys, _one_shot)
--> 270 return _iterencode(o, 0)
271
272 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
/usr/local/lib/python2.7/dist-packages/plotly/utils.pyc in default(self, obj)
211 except NotEncodable:
212 pass
--> 213 return json.JSONEncoder.default(self, obj)
214
215 #staticmethod
/usr/lib/python2.7/json/encoder.pyc in default(self, o)
182
183 """
--> 184 raise TypeError(repr(o) + " is not JSON serializable")
185
186 def encode(self, o):
TypeError: <bound method DataFrame.count of query_word count
0 1,2 1
1 four 1
2 prefix 1
.. ...... ..
.. ...... ..
3 francesco 1
As far as I understand from the other SF questions on the topic "is not json serializable" and from the error message, it is the problem with encoding? and not of the datatype.
Because, when I print type(df2.query_word) it says <class 'pandas.core.series.Series'>. So how to make a series serializable? Since the traceback doesn't show any encoding error such as in here or here.
What is the easy turn-around? My main intension to post this question is to understand whether this is a problem with dataframe, the data, ipython or plotly.