KeyError in `plotnine` (ggplot wrapper for python) - python

I am trying to use plotnine to build graphs and I keep coming across the same KeyError problem when I want to plot just the x-axis. See the traceback error below.
A sample of my data is:
WORD TAG TOPIC Value
0 hey aa 1 234
1 working bb 1 123
2 lullaby cc 2 32
3 Doggy cc 2 63
4 document aa 3 84
sample of my code:
from plotnine import *
import pandas as pd
inFile = 'infile.csv'
df = pd.read_csv(inFile, names = ['WORD', 'TAG','TOPIC','VALUE'], header=0,sep='\t')
df.sort_values('value',ascending=False)
sortedDf = df[:5]
plot1 = ggplot(sortedDf) + aes(x='TOPIC') + geom_histogram(binwidth=3)
where the final goal is to plot the count of each topic in a histogram.
I am not sure what data is missing that is raising the following key error, as there is no need for a weight as I am only interested in plotting the count of that one particular variable, ie. topic 1 = 2, topic 2= 2, topic 3 = 1.
Does anyone have any link to more detailled documentation of plotline or any experience with the library to help me understand more in detail what I am missing.
Traceback Error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-112-71707b4cf21a> in <module>()
1 plot2 = ggplot(sortedDf) + aes(x='TOPIC') + geom_histogram(binwidth=3)
----> 2 print plot2
/Users/anaconda/lib/python2.7/site-packages/plotnine/ggplot.pyc in __repr__(self)
82 Print/show the plot
83 """
---> 84 self.draw()
85 plt.show()
86 return '<ggplot: (%d)>' % self.__hash__()
/Users/anaconda/lib/python2.7/site-packages/plotnine/ggplot.pyc in draw(self)
139 # assign a default theme
140 self = deepcopy(self)
--> 141 self._build()
142
143 # If no theme we use the default
/Users/anaconda/lib/python2.7/site-packages/plotnine/ggplot.pyc in _build(self)
235
236 # Apply and map statistics
--> 237 layers.compute_statistic(layout)
238 layers.map_statistic(self)
239
/Users/anaconda/lib/python2.7/site-packages/plotnine/layer.pyc in compute_statistic(self, layout)
92 def compute_statistic(self, layout):
93 for l in self:
---> 94 l.compute_statistic(layout)
95
96 def map_statistic(self, plot):
/Users/anaconda/lib/python2.7/site-packages/plotnine/layer.pyc in compute_statistic(self, layout)
369 data = self.stat.use_defaults(data)
370 data = self.stat.setup_data(data)
--> 371 data = self.stat.compute_layer(data, params, layout)
372 self.data = data
373
/Users/anaconda/lib/python2.7/site-packages/plotnine/stats/stat.pyc in compute_layer(cls, data, params, layout)
194 return cls.compute_panel(pdata, pscales, **params)
195
--> 196 return groupby_apply(data, 'PANEL', fn)
197
198 #classmethod
/Users/anaconda/lib/python2.7/site-packages/plotnine/utils.pyc in groupby_apply(df, cols, func, *args, **kwargs)
615 # do not mark d as a slice of df i.e no SettingWithCopyWarning
616 d.is_copy = None
--> 617 lst.append(func(d, *args, **kwargs))
618 return pd.concat(lst, axis=axis, ignore_index=True)
619
/Users/anaconda/lib/python2.7/site-packages/plotnine/stats/stat.pyc in fn(pdata)
192 return pdata
193 pscales = layout.get_scales(pdata['PANEL'].iat[0])
--> 194 return cls.compute_panel(pdata, pscales, **params)
195
196 return groupby_apply(data, 'PANEL', fn)
/Users/anaconda/lib/python2.7/site-packages/plotnine/stats/stat.pyc in compute_panel(cls, data, scales, **params)
221 for _, old in data.groupby('group'):
222 old.is_copy = None
--> 223 new = cls.compute_group(old, scales, **params)
224 unique = uniquecols(old)
225 missing = unique.columns.difference(new.columns)
/Users/anaconda/lib/python2.7/site-packages/plotnine/stats/stat_bin.pyc in compute_group(cls, data, scales, **params)
107 new_data = assign_bins(
108 data['x'], breaks, data.get('weight'),
--> 109 params['pad'], params['closed'])
110 return new_data
/Users/anaconda/lib/python2.7/site-packages/plotnine/stats/binning.pyc in assign_bins(x, breaks, weight, pad, closed)
163 df = pd.DataFrame({'bin_idx': bin_idx, 'weight': weight})
164 wftable = df.pivot_table(
--> 165 'weight', index=['bin_idx'], aggfunc=np.sum)['weight']
166
167 # Empty bins get no value in the computed frequency table.
/Users/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in __getitem__(self, key)
601 result = self.index.get_value(self, key)
602
--> 603 if not is_scalar(result):
604 if is_list_like(result) and not isinstance(result, Series):
605
/Users/anaconda/lib/python2.7/site-packages/pandas/indexes/base.pyc in get_value(self, series, key)
pandas/index.pyx in pandas.index.IndexEngine.get_value (pandas/index.c:3557)()
pandas/index.pyx in pandas.index.IndexEngine.get_value (pandas/index.c:3240)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4363)()
KeyError: 'weight'

Nesting aes in ggplot like it is done in R may solve your issue:
plot1 = ggplot(sortedDf, aes(x='TOPIC')) + geom_histogram(binwidth=3)

Related

Cannot plot datetime64[ns] on hvplot axis ('pandas_datetime_types' is not defined' error)

I am simply trying to plot some values against datetime64[ns] timestamps with holoviews.
That is,
x-axis = nx1 datetime64[ns] values
y-axis = nx1 data.
Here is a screen shot of what I have:
Screenshot of my dataframe
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
and my overall code:
import hvplot.pandas
import pandas as pd
##
Code ommitted at the start to extract data and create dictionary to convert to data frame
##
#create dictionary
temp_dict = dict(sampling_time=time_y_value_is_taken, y_axis_values = y_values)
df = pd.Dataframe.from_dict(temp_dict)
df.sampling_time=df.sampling_time.astype('datetime64[ns]')
df=df.set_index('sampling_time')
##The following code cannot run this line- it throws error
df.hvplot.line()
I keep getting the error code : 'pandas_datetime_types' is not defined. I have also tried importing datetime as datetime - but it does not work.
EDIT: Here is the traceback:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
~\miniconda3\envs\mpess_visual\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include, exclude)
968
969 if method is not None:
--> 970 return method(include=include, exclude=exclude)
971 return None
972 else:
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\core\dimension.py in _repr_mimebundle_(self, include, exclude)
1315 combined and returned.
1316 """
-> 1317 return Store.render(self)
1318
1319
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\core\options.py in render(cls, obj)
1403 data, metadata = {}, {}
1404 for hook in hooks:
-> 1405 ret = hook(obj)
1406 if ret is None:
1407 continue
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in pprint_display(obj)
280 if not ip.display_formatter.formatters['text/plain'].pprint:
281 return None
--> 282 return display(obj, raw_output=True)
283
284
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in display(obj, raw_output, **kwargs)
250 elif isinstance(obj, (CompositeOverlay, ViewableElement)):
251 with option_state(obj):
--> 252 output = element_display(obj)
253 elif isinstance(obj, (Layout, NdLayout, AdjointLayout)):
254 with option_state(obj):
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in wrapped(element)
144 try:
145 max_frames = OutputSettings.options['max_frames']
--> 146 mimebundle = fn(element, max_frames=max_frames)
147 if mimebundle is None:
148 return {}, {}
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in element_display(element, max_frames)
190 return None
191
--> 192 return render(element)
193
194
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in render(obj, **kwargs)
66 renderer = renderer.instance(fig='png')
67
---> 68 return renderer.components(obj, **kwargs)
69
70
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\renderer.py in components(self, obj, fmt, comm, **kwargs)
408 doc = Document()
409 with config.set(embed=embed):
--> 410 model = plot.layout._render_model(doc, comm)
411 if embed:
412 return render_model(model, comm)
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\viewable.py in _render_model(self, doc, comm)
453 if comm is None:
454 comm = state._comm_manager.get_server_comm()
--> 455 model = self.get_root(doc, comm)
456
457 if config.embed:
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\viewable.py in get_root(self, doc, comm, preprocess)
510 """
511 doc = init_doc(doc)
--> 512 root = self._get_model(doc, comm=comm)
513 if preprocess:
514 self._preprocess(root)
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\layout\base.py in _get_model(self, doc, root, parent, comm)
120 if root is None:
121 root = model
--> 122 objects = self._get_objects(model, [], doc, root, comm)
123 props = dict(self._init_params(), objects=objects)
124 model.update(**self._process_param_change(props))
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\layout\base.py in _get_objects(self, model, old_objects, doc, root, comm)
110 else:
111 try:
--> 112 child = pane._get_model(doc, root, model, comm)
113 except RerenderError:
114 return self._get_objects(model, current_objects[:i], doc, root, comm)
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\pane\holoviews.py in _get_model(self, doc, root, parent, comm)
237 plot = self.object
238 else:
--> 239 plot = self._render(doc, comm, root)
240
241 plot.pane = self
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\pane\holoviews.py in _render(self, doc, comm, root)
304 kwargs['comm'] = comm
305
--> 306 return renderer.get_plot(self.object, **kwargs)
307
308 def _cleanup(self, root):
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\bokeh\renderer.py in get_plot(self_or_cls, obj, doc, renderer, **kwargs)
71 combining the bokeh model with another plot.
72 """
---> 73 plot = super(BokehRenderer, self_or_cls).get_plot(obj, doc, renderer, **kwargs)
74 if plot.document is None:
75 plot.document = Document() if self_or_cls.notebook_context else curdoc()
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\renderer.py in get_plot(self_or_cls, obj, doc, renderer, comm, **kwargs)
241 init_key = tuple(v if d is None else d for v, d in
242 zip(plot.keys[0], defaults))
--> 243 plot.update(init_key)
244 else:
245 plot = obj
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\plot.py in update(self, key)
980 def update(self, key):
981 if len(self) == 1 and ((key == 0) or (key == self.keys[0])) and not self.drawn:
--> 982 return self.initialize_plot()
983 item = self.__getitem__(key)
984 self.traverse(lambda x: setattr(x, '_updated', True))
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\bokeh\element.py in initialize_plot(self, ranges, plot, plots, source)
1388 element = self.hmap.last
1389 key = util.wrap_tuple(self.hmap.last_key)
-> 1390 ranges = self.compute_ranges(self.hmap, key, ranges)
1391 self.current_ranges = ranges
1392 self.current_frame = element
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\plot.py in compute_ranges(self, obj, key, ranges)
636 if (not (axiswise and not isinstance(obj, HoloMap)) or
637 (not framewise and isinstance(obj, HoloMap))):
--> 638 self._compute_group_range(group, elements, ranges, framewise,
639 axiswise, robust, self.top_level,
640 prev_frame)
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\plot.py in _compute_group_range(cls, group, elements, ranges, framewise, axiswise, robust, top_level, prev_frame)
853 continue
854 matching &= (
--> 855 len({'date' if isinstance(v, util.datetime_types) else 'number'
856 for rng in rs for v in rng if util.isfinite(v)}) < 2
857 )
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\plot.py in <setcomp>(.0)
854 matching &= (
855 len({'date' if isinstance(v, util.datetime_types) else 'number'
--> 856 for rng in rs for v in rng if util.isfinite(v)}) < 2
857 )
858 if matching:
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\core\util.py in isfinite(val)
902 return finite
903 elif isinstance(val, datetime_types+timedelta_types):
--> 904 return not isnat(val)
905 elif isinstance(val, (basestring, bytes)):
906 return True
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\core\util.py in isnat(val)
866 elif pd and val is pd.NaT:
867 return True
--> 868 elif pd and isinstance(val, pandas_datetime_types+pandas_timedelta_types):
869 return pd.isna(val)
870 else:
NameError: name 'pandas_datetime_types' is not defined
Any suggestions? Thank you
Although I couldn't find any official doc to support my statement, it's a compatibility issue (HoloViews 1.14.4 was released before Pandas 1.3.0).
Looking at [gitHub]: holoviz/holoviews - (v1.14.4) holoviews/holoviews/core/util.py (starting with line #83), there are some conditional imports. One of them is ABCIndexClass.
[GitHub]: pandas-dev/pandas - (v1.3.0) pandas/pandas/core/dtypes/dtypes.py on the other hand, does not provide it (as opposed from let's say its v1.2.5 counterpart) yielding (silent) exception, and the behavior you're experiencing.
Ways to go:
Upgrade HoloViews to v1.14.5 which no longer has this problem, (or at least, there's a Pandas 1.3.0 conditional as well - fixed by [GitHub]: holoviz/holoviews - Add support for pandas>=1.3)
You could also downgrade Pandas to (e.g.) v1.2.5, although this is not the way to go

Dask dataframe compute failed

I'm playing around with Python Dask. I followed their dataframe example jupyter notebook but failed at the step when converting a dask dataframe to pandas data frame by calling the compute() function. Would anyone please advise what I did wrong?
Code:
### Cell0
!pip install "dask[complete]"
!pip install pandas
### Cell1
import dask
import dask.dataframe as dd
df = dask.datasets.timeseries()
df
### Cell2
df2 = df[df.y > 0]
df3 = df2.groupby('name').x.std()
df3
### Cell3
computed_df = df3.compute()
type(computed_df)
Error raised when executing computed_df = df3.compute() in cell 3.
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-6-6da1eef50c1d> in <module>
----> 1 computed_df = df3.compute()
2 type(computed_df)
~/.pyenv/versions/3.9.0/lib/python3.9/site-packages/dask/base.py in compute(self, **kwargs)
283 dask.base.compute
284 """
--> 285 (result,) = compute(self, traverse=False, **kwargs)
286 return result
287
~/.pyenv/versions/3.9.0/lib/python3.9/site-packages/dask/base.py in compute(*args, **kwargs)
559 )
560
--> 561 dsk = collections_to_dsk(collections, optimize_graph, **kwargs)
562 keys, postcomputes = [], []
563 for x in collections:
~/.pyenv/versions/3.9.0/lib/python3.9/site-packages/dask/base.py in collections_to_dsk(collections, optimize_graph, optimizations, **kwargs)
335 for opt, val in groups.items():
336 dsk, keys = _extract_graph_and_keys(val)
--> 337 dsk = opt(dsk, keys, **kwargs)
338
339 for opt in optimizations:
~/.pyenv/versions/3.9.0/lib/python3.9/site-packages/dask/dataframe/optimize.py in optimize(dsk, keys, **kwargs)
20 else:
21 # Perform Blockwise optimizations for HLG input
---> 22 dsk = optimize_dataframe_getitem(dsk, keys=keys)
23 dsk = optimize_blockwise(dsk, keys=keys)
24 dsk = fuse_roots(dsk, keys=keys)
~/.pyenv/versions/3.9.0/lib/python3.9/site-packages/dask/dataframe/optimize.py in optimize_dataframe_getitem(dsk, keys)
103 # Project columns and update blocks
104 old = layers[k]
--> 105 new = old.project_columns(columns)[0]
106 if new.name != old.name:
107 columns = list(columns)
~/.pyenv/versions/3.9.0/lib/python3.9/site-packages/dask/layers.py in project_columns(self, columns)
941 # Apply column projection in IO function
942 try:
--> 943 io_func = self.io_func.project_columns(list(columns))
944 except AttributeError:
945 io_func = self.io_func
~/.pyenv/versions/3.9.0/lib/python3.9/site-packages/dask/dataframe/io/demo.py in project_columns(self, columns)
87 func = copy.deepcopy(self)
88 func.columns = columns
---> 89 func.dtypes = {c: self.dtypes[c] for c in columns}
90 return func
91
~/.pyenv/versions/3.9.0/lib/python3.9/site-packages/dask/dataframe/io/demo.py in <dictcomp>(.0)
87 func = copy.deepcopy(self)
88 func.columns = columns
---> 89 func.dtypes = {c: self.dtypes[c] for c in columns}
90 return func
91
KeyError: 'gt-d5f81fc97f91e68c389fc34631419acc'
Interesting, I can reproduce this bug with:
python=3.9.4
pandas=1.2.4
dask=2021.5.0
distributed=2021.5.0
Specifically, the error occurs in this step:
df2 = df[df.y > 0]
I raised an issue on GitHub, but in the meantime downgrading dask version to 2021.4.0 resolves the problem (the computed result will show):
python=3.9.4
pandas=1.2.4
dask=2021.4.1
distributed=2021.4.1
(note Python here is 3.9, which seems to be the case in your environment)

Hvplot AttributeError: 'list' object has no attribute 'xy'

I have trouble making a interactive map with PySAL. I want to visualize the dutch 'gemeente' (Municipalities). You can download shape files from the dutch bureau of statistics: https://www.cbs.nl/nl-nl/dossier/nederland-regionaal/geografische-data/wijk-en-buurtkaart-2019
import geopandas as gpd
import hvplot.pandas
coords = gpd.read_file('gemeente_2019_v2.shp')
coords.hvplot(geo = True)
This raises an error that 'list' object has no attribute 'xy'.
I used PySAL 2.1.0 and tried upgrading to 2.4.0, but this made no difference.
Output:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\.conda\envs\jh-open\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include, exclude)
968
969 if method is not None:
--> 970 return method(include=include, exclude=exclude)
971 return None
972 else:
~\.conda\envs\jh-open\lib\site-packages\holoviews\core\dimension.py in _repr_mimebundle_(self, include, exclude)
1315 combined and returned.
1316 """
-> 1317 return Store.render(self)
1318
1319
~\.conda\envs\jh-open\lib\site-packages\holoviews\core\options.py in render(cls, obj)
1403 data, metadata = {}, {}
1404 for hook in hooks:
-> 1405 ret = hook(obj)
1406 if ret is None:
1407 continue
~\.conda\envs\jh-open\lib\site-packages\holoviews\ipython\display_hooks.py in pprint_display(obj)
280 if not ip.display_formatter.formatters['text/plain'].pprint:
281 return None
--> 282 return display(obj, raw_output=True)
283
284
~\.conda\envs\jh-open\lib\site-packages\holoviews\ipython\display_hooks.py in display(obj, raw_output, **kwargs)
250 elif isinstance(obj, (CompositeOverlay, ViewableElement)):
251 with option_state(obj):
--> 252 output = element_display(obj)
253 elif isinstance(obj, (Layout, NdLayout, AdjointLayout)):
254 with option_state(obj):
~\.conda\envs\jh-open\lib\site-packages\holoviews\ipython\display_hooks.py in wrapped(element)
144 try:
145 max_frames = OutputSettings.options['max_frames']
--> 146 mimebundle = fn(element, max_frames=max_frames)
147 if mimebundle is None:
148 return {}, {}
~\.conda\envs\jh-open\lib\site-packages\holoviews\ipython\display_hooks.py in element_display(element, max_frames)
190 return None
191
--> 192 return render(element)
193
194
~\.conda\envs\jh-open\lib\site-packages\holoviews\ipython\display_hooks.py in render(obj, **kwargs)
66 renderer = renderer.instance(fig='png')
67
---> 68 return renderer.components(obj, **kwargs)
69
70
~\.conda\envs\jh-open\lib\site-packages\holoviews\plotting\renderer.py in components(self, obj, fmt, comm, **kwargs)
408 doc = Document()
409 with config.set(embed=embed):
--> 410 model = plot.layout._render_model(doc, comm)
411 if embed:
412 return render_model(model, comm)
~\.conda\envs\jh-open\lib\site-packages\panel\viewable.py in _render_model(self, doc, comm)
425 if comm is None:
426 comm = state._comm_manager.get_server_comm()
--> 427 model = self.get_root(doc, comm)
428
429 if config.embed:
~\.conda\envs\jh-open\lib\site-packages\panel\viewable.py in get_root(self, doc, comm, preprocess)
482 """
483 doc = init_doc(doc)
--> 484 root = self._get_model(doc, comm=comm)
485 if preprocess:
486 self._preprocess(root)
~\.conda\envs\jh-open\lib\site-packages\panel\layout\base.py in _get_model(self, doc, root, parent, comm)
111 if root is None:
112 root = model
--> 113 objects = self._get_objects(model, [], doc, root, comm)
114 props = dict(self._init_params(), objects=objects)
115 model.update(**self._process_param_change(props))
~\.conda\envs\jh-open\lib\site-packages\panel\layout\base.py in _get_objects(self, model, old_objects, doc, root, comm)
101 else:
102 try:
--> 103 child = pane._get_model(doc, root, model, comm)
104 except RerenderError:
105 return self._get_objects(model, current_objects[:i], doc, root, comm)
~\.conda\envs\jh-open\lib\site-packages\panel\pane\holoviews.py in _get_model(self, doc, root, parent, comm)
237 plot = self.object
238 else:
--> 239 plot = self._render(doc, comm, root)
240
241 plot.pane = self
~\.conda\envs\jh-open\lib\site-packages\panel\pane\holoviews.py in _render(self, doc, comm, root)
302 kwargs['comm'] = comm
303
--> 304 return renderer.get_plot(self.object, **kwargs)
305
306 def _cleanup(self, root):
~\.conda\envs\jh-open\lib\site-packages\holoviews\plotting\bokeh\renderer.py in get_plot(self_or_cls, obj, doc, renderer, **kwargs)
71 combining the bokeh model with another plot.
72 """
---> 73 plot = super(BokehRenderer, self_or_cls).get_plot(obj, doc, renderer, **kwargs)
74 if plot.document is None:
75 plot.document = Document() if self_or_cls.notebook_context else curdoc()
~\.conda\envs\jh-open\lib\site-packages\holoviews\plotting\renderer.py in get_plot(self_or_cls, obj, doc, renderer, comm, **kwargs)
241 init_key = tuple(v if d is None else d for v, d in
242 zip(plot.keys[0], defaults))
--> 243 plot.update(init_key)
244 else:
245 plot = obj
~\.conda\envs\jh-open\lib\site-packages\holoviews\plotting\plot.py in update(self, key)
980 def update(self, key):
981 if len(self) == 1 and ((key == 0) or (key == self.keys[0])) and not self.drawn:
--> 982 return self.initialize_plot()
983 item = self.__getitem__(key)
984 self.traverse(lambda x: setattr(x, '_updated', True))
~\.conda\envs\jh-open\lib\site-packages\geoviews\plotting\bokeh\plot.py in initialize_plot(self, ranges, plot, plots, source)
111 def initialize_plot(self, ranges=None, plot=None, plots=None, source=None):
112 opts = {} if isinstance(self, HvOverlayPlot) else {'source': source}
--> 113 fig = super(GeoPlot, self).initialize_plot(ranges, plot, plots, **opts)
114 if self.geographic and self.show_bounds and not self.overlaid:
115 from . import GeoShapePlot
~\.conda\envs\jh-open\lib\site-packages\holoviews\plotting\bokeh\element.py in initialize_plot(self, ranges, plot, plots, source)
1394 # Initialize plot, source and glyph
1395 if plot is None:
-> 1396 plot = self._init_plot(key, style_element, ranges=ranges, plots=plots)
1397 self._init_axes(plot)
1398 else:
~\.conda\envs\jh-open\lib\site-packages\holoviews\plotting\bokeh\element.py in _init_plot(self, key, element, plots, ranges)
492 subplots = list(self.subplots.values()) if self.subplots else []
493
--> 494 axis_types, labels, plot_ranges = self._axes_props(plots, subplots, element, ranges)
495 xlabel, ylabel, _ = labels
496 x_axis_type, y_axis_type = axis_types
~\.conda\envs\jh-open\lib\site-packages\holoviews\plotting\bokeh\element.py in _axes_props(self, plots, subplots, element, ranges)
403 # Get the Element that determines the range and get_extents
404 range_el = el if self.batched and not isinstance(self, OverlayPlot) else element
--> 405 l, b, r, t = self.get_extents(range_el, ranges)
406 if self.invert_axes:
407 l, b, r, t = b, l, t, r
~\.conda\envs\jh-open\lib\site-packages\geoviews\plotting\plot.py in get_extents(self, element, ranges, range_type)
71 extents = None
72 else:
---> 73 extents = project_extents(extents, element.crs, proj)
74 return (np.NaN,)*4 if not extents else extents
~\.conda\envs\jh-open\lib\site-packages\geoviews\util.py in project_extents(extents, src_proj, dest_proj, tol)
95 geom_in_src_proj = geom_clipped_to_dest_proj
96 try:
---> 97 geom_in_crs = dest_proj.project_geometry(geom_in_src_proj, src_proj)
98 except ValueError:
99 src_name =type(src_proj).__name__
~\.conda\envs\jh-open\lib\site-packages\cartopy\crs.py in project_geometry(self, geometry, src_crs)
216 raise ValueError('Unsupported geometry '
217 'type {!r}'.format(geom_type))
--> 218 return getattr(self, method_name)(geometry, src_crs)
219
220 def _project_point(self, point, src_crs):
~\.conda\envs\jh-open\lib\site-packages\cartopy\crs.py in _project_polygon(self, polygon, src_crs)
352 is_ccw = True
353 else:
--> 354 is_ccw = polygon.exterior.is_ccw
355 # Project the polygon exterior/interior rings.
356 # Each source ring will result in either a ring, or one or more
~\.conda\envs\jh-open\lib\site-packages\shapely\geometry\polygon.py in is_ccw(self)
86 def is_ccw(self):
87 """True is the ring is oriented counter clock-wise"""
---> 88 return bool(self.impl['is_ccw'](self))
89
90 #property
~\.conda\envs\jh-open\lib\site-packages\shapely\algorithms\cga.py in is_ccw_op(ring)
12 """Predicate implementation"""
13 def is_ccw_op(ring):
---> 14 return signed_area(ring) >= 0.0
15 return is_ccw_op
16
~\.conda\envs\jh-open\lib\site-packages\shapely\algorithms\cga.py in signed_area(ring)
4 algorithm at: https://web.archive.org/web/20080209143651/http://cgafaq.info:80/wiki/Polygon_Area
5 """
----> 6 xs, ys = ring.coords.xy
7 xs.append(xs[1])
8 ys.append(ys[1])
AttributeError: 'list' object has no attribute 'xy'
:Polygons [Longitude,Latitude]
The issue is related to the Coordinate Reference System of the shapefile.
Looking at coords.crs shows EPSG:28992.
You can get hvplot to work with this:
from cartopy import crs
coords.hvplot(geo = True, crs=crs.epsg(28992))
I am not sure if this would be considered an issue or not. It may be worth reporting in the github repo.

pandas_profiling TypeError when using HTML format

I follow the pandas_profiling document script, but this problem always arises.
My dataset is the boston from sklearn.
I have the report, but without the features of an html version:
profile2 = ProfileReport(data, title="Relatório DATASET -data-", html={'style': {'full_width': True}}, sort="None")
The image below refers to this code:
from pandas_profiling import ProfileReport
profile = ProfileReport(data, title='Pandas Profiling Report', explorative=True)
[![enter image description here][1]][1]
My version of pandas_profiling
[![enter image description here][2]][2]
I don't have the problem related above if I use this code:
profile = ProfileReport (data)
UPDATE:
Uninstalled the previous version and got the new one (2.9.0), but this problems happens:
Summarize dataset: 75%
21/28 [00:07<00:02, 2.84it/s, Get scatter matrix]
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
C:\ProgramData\Anaconda3\lib\site-packages\pandas_profiling\profile_report.py in _repr_html_(self)
407 def _repr_html_(self):
408 """The ipython notebook widgets user interface gets called by the jupyter notebook."""
--> 409 self.to_notebook_iframe()
410
411 def __repr__(self):
C:\ProgramData\Anaconda3\lib\site-packages\pandas_profiling\profile_report.py in to_notebook_iframe(self)
387 with warnings.catch_warnings():
388 warnings.simplefilter("ignore")
--> 389 display(get_notebook_iframe(self))
390
391 def to_widgets(self):
C:\ProgramData\Anaconda3\lib\site-packages\pandas_profiling\report\presentation\flavours\widget\notebook.py in get_notebook_iframe(profile)
63 output = get_notebook_iframe_src(profile)
64 elif attribute == "srcdoc":
---> 65 output = get_notebook_iframe_srcdoc(profile)
66 else:
67 raise ValueError(
C:\ProgramData\Anaconda3\lib\site-packages\pandas_profiling\report\presentation\flavours\widget\notebook.py in get_notebook_iframe_srcdoc(profile)
21 width = config["notebook"]["iframe"]["width"].get(str)
22 height = config["notebook"]["iframe"]["height"].get(str)
---> 23 src = html.escape(profile.to_html())
24
25 iframe = f'<iframe width="{width}" height="{height}" srcdoc="{src}" frameborder="0" allowfullscreen></iframe>'
C:\ProgramData\Anaconda3\lib\site-packages\pandas_profiling\profile_report.py in to_html(self)
357
358 """
--> 359 return self.html
360
361 def to_json(self) -> str:
C:\ProgramData\Anaconda3\lib\site-packages\pandas_profiling\profile_report.py in html(self)
177 def html(self):
178 if self._html is None:
--> 179 self._html = self._render_html()
180 return self._html
181
C:\ProgramData\Anaconda3\lib\site-packages\pandas_profiling\profile_report.py in _render_html(self)
284 from pandas_profiling.report.presentation.flavours import HTMLReport
285
--> 286 report = self.report
287
288 disable_progress_bar = not config["progress_bar"].get(bool)
C:\ProgramData\Anaconda3\lib\site-packages\pandas_profiling\profile_report.py in report(self)
171 def report(self):
172 if self._report is None:
--> 173 self._report = get_report_structure(self.description_set)
174 return self._report
175
C:\ProgramData\Anaconda3\lib\site-packages\pandas_profiling\profile_report.py in description_set(self)
152 def description_set(self):
153 if self._description_set is None:
--> 154 self._description_set = describe_df(self.title, self.df, self._sample)
155 return self._description_set
156
C:\ProgramData\Anaconda3\lib\site-packages\pandas_profiling\model\describe.py in describe(title, df, sample)
100 # Scatter matrix
101 pbar.set_postfix_str("Get scatter matrix")
--> 102 scatter_matrix = get_scatter_matrix(df, variables)
103 pbar.update()
104
C:\ProgramData\Anaconda3\lib\site-packages\pandas_profiling\model\summary.py in get_scatter_matrix(df, variables)
696 for y in continuous_variables:
697 if x in continuous_variables:
--> 698 scatter_matrix[x][y] = scatter_pairwise(df[x], df[y], x, y)
699 else:
700 scatter_matrix = {}
C:\ProgramData\Anaconda3\lib\contextlib.py in inner(*args, **kwds)
71 #wraps(func)
72 def inner(*args, **kwds):
---> 73 with self._recreate_cm():
74 return func(*args, **kwds)
75 return inner
C:\ProgramData\Anaconda3\lib\contextlib.py in __enter__(self)
110 del self.args, self.kwds, self.func
111 try:
--> 112 return next(self.gen)
113 except StopIteration:
114 raise RuntimeError("generator didn't yield") from None
C:\ProgramData\Anaconda3\lib\site-packages\pandas_profiling\visualisation\context.py in manage_matplotlib_context()
77 register_matplotlib_converters()
78 matplotlib.rcParams.update(customRcParams)
---> 79 sns.set_style(style="white")
80 yield
81 finally:
AttributeError: module 'seaborn' has no attribute 'set_style'
The solution was unistall / reinstall the Anaconda and the pandas-profiling. Probably some version issue as suggested by Paul H on the comments.

PatsyError, name error, name is not defined when using smf.ols

I am trying to use multi linear regression to analysis some time series data and their lags. Basically variables are some currency rate and their lag1 and lag2. Code is as below.
I tried to check each variable and there is nothing abnormal..
rate = pd.read_csv('P2training.csv', header=0)
#change date format in csv
rate['Date'] = pd.to_datetime(rate['Date'], format='%Y-%m-%d')
rate.set_index('Date', inplace=True, drop=True)
lags = [1,2]
lagdata = rate
for i in lags:
tmp = rate.shift(i).copy();
lagdata = lagdata.join(tmp, rsuffix='_lag{}'.format(i));
# fit the linear regression models
collist = list(lagdata.columns);
collist.remove('AUD/USD')
collist.remove('GBP/USD')
collist.remove('CAD/USD')
collist.remove('NLG/USD')
collist.remove('FRF/USD')
collist.remove('DEM/USD')
collist.remove('JPY/USD')
collist.remove('CHF/USD')
form = 'JPY/USD' + '~' + '+'.join(collist);
lagdata.dropna(inplace=True)
model = smf.ols(formula=form, data = lagdata).fit()
error occurs in last step when using smf.ols. A few name errors said some variables is not defined.
NameError Traceback (most recent call last)
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site- packages\patsy\compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
116 try:
--> 117 return f(*args, **kwargs)
118 except Exception as e:
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\eval.py in eval(self, expr, source_name, inner_namespace)
165 return eval(code, {}, VarLookupDict([inner_namespace]
--> 166 + self._namespaces))
167
<string> in <module>()
NameError: name 'USD_lag2' is not defined
The above exception was the direct cause of the following exception:
PatsyError Traceback (most recent call last)
<ipython-input-26-1985b8d39238> in <module>()
51 #print(collist)
52 #print(lagdata)
---> 53 model = smf.ols(formula=form, data = lagdata).fit()
54
55 #print(model.summary())
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site- packages\statsmodels\base\model.py in from_formula(cls, formula, data, subset, drop_cols, *args, **kwargs)
153
154 tmp = handle_formula_data(data, None, formula, depth=eval_env,
--> 155 missing=missing)
156 ((endog, exog), missing_idx, design_info) = tmp
157
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\statsmodels\formula\formulatools.py in handle_formula_data(Y, X, formula, depth, missing)
63 if data_util._is_using_pandas(Y, None):
64 result = dmatrices(formula, Y, depth, return_type='dataframe',
---> 65 NA_action=na_action)
66 else:
67 result = dmatrices(formula, Y, depth, return_type='dataframe',
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\highlevel.py in dmatrices(formula_like, data, eval_env, NA_action, return_type)
308 eval_env = EvalEnvironment.capture(eval_env, reference=1)
309 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 310 NA_action, return_type)
311 if lhs.shape[1] == 0:
312 raise PatsyError("model is missing required outcome variables")
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\highlevel.py in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
163 return iter([data])
164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
--> 165 NA_action)
166 if design_infos is not None:
167 return build_design_matrices(design_infos, data,
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\highlevel.py in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
68 data_iter_maker,
69 eval_env,
---> 70 NA_action)
71 else:
72 return None
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\build.py in design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action)
694 factor_states,
695 data_iter_maker,
--> 696 NA_action)
697 # Now we need the factor infos, which encapsulate the knowledge of
698 # how to turn any given factor into a chunk of data:
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\build.py in _examine_factor_types(factors, factor_states, data_iter_maker, NA_action)
441 for data in data_iter_maker():
442 for factor in list(examine_needed):
--> 443 value = factor.eval(factor_states[factor], data)
444 if factor in cat_sniffers or guess_categorical(value):
445 if factor not in cat_sniffers:
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\eval.py in eval(self, memorize_state, data)
564 return self._eval(memorize_state["eval_code"],
565 memorize_state,
--> 566 data)
567
568 __getstate__ = no_pickling
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\eval.py in _eval(self, code, memorize_state, data)
549 memorize_state["eval_env"].eval,
550 code,
--> 551 inner_namespace=inner_namespace)
552
553 def memorize_chunk(self, state, which_pass, data):
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
122 origin)
123 # Use 'exec' to hide this syntax from the Python 2 parser:
--> 124 exec("raise new_exc from e")
125 else:
126 # In python 2, we just let the original exception escape -- better
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\compat.py in <module>()
PatsyError: Error evaluating factor: NameError: name 'USD_lag2' is not defined
JPY/USD~AUD/USD_lag1+GBP/USD_lag1+CAD/USD_lag1+NLG/USD_lag1+FRF/USD_lag1+DEM/USD_lag1+JPY/USD_lag1+CHF/USD_lag1+AUD/USD_lag2+GBP/USD_lag2+CAD/USD_lag2+NLG/USD_lag2+FRF/USD_lag2+DEM/USD_lag2+JPY/USD_lag2+CHF/USD_lag2

Categories

Resources