Related
I am simply trying to plot some values against datetime64[ns] timestamps with holoviews.
That is,
x-axis = nx1 datetime64[ns] values
y-axis = nx1 data.
Here is a screen shot of what I have:
Screenshot of my dataframe
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
and my overall code:
import hvplot.pandas
import pandas as pd
##
Code ommitted at the start to extract data and create dictionary to convert to data frame
##
#create dictionary
temp_dict = dict(sampling_time=time_y_value_is_taken, y_axis_values = y_values)
df = pd.Dataframe.from_dict(temp_dict)
df.sampling_time=df.sampling_time.astype('datetime64[ns]')
df=df.set_index('sampling_time')
##The following code cannot run this line- it throws error
df.hvplot.line()
I keep getting the error code : 'pandas_datetime_types' is not defined. I have also tried importing datetime as datetime - but it does not work.
EDIT: Here is the traceback:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
~\miniconda3\envs\mpess_visual\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include, exclude)
968
969 if method is not None:
--> 970 return method(include=include, exclude=exclude)
971 return None
972 else:
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\core\dimension.py in _repr_mimebundle_(self, include, exclude)
1315 combined and returned.
1316 """
-> 1317 return Store.render(self)
1318
1319
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\core\options.py in render(cls, obj)
1403 data, metadata = {}, {}
1404 for hook in hooks:
-> 1405 ret = hook(obj)
1406 if ret is None:
1407 continue
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in pprint_display(obj)
280 if not ip.display_formatter.formatters['text/plain'].pprint:
281 return None
--> 282 return display(obj, raw_output=True)
283
284
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in display(obj, raw_output, **kwargs)
250 elif isinstance(obj, (CompositeOverlay, ViewableElement)):
251 with option_state(obj):
--> 252 output = element_display(obj)
253 elif isinstance(obj, (Layout, NdLayout, AdjointLayout)):
254 with option_state(obj):
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in wrapped(element)
144 try:
145 max_frames = OutputSettings.options['max_frames']
--> 146 mimebundle = fn(element, max_frames=max_frames)
147 if mimebundle is None:
148 return {}, {}
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in element_display(element, max_frames)
190 return None
191
--> 192 return render(element)
193
194
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in render(obj, **kwargs)
66 renderer = renderer.instance(fig='png')
67
---> 68 return renderer.components(obj, **kwargs)
69
70
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\renderer.py in components(self, obj, fmt, comm, **kwargs)
408 doc = Document()
409 with config.set(embed=embed):
--> 410 model = plot.layout._render_model(doc, comm)
411 if embed:
412 return render_model(model, comm)
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\viewable.py in _render_model(self, doc, comm)
453 if comm is None:
454 comm = state._comm_manager.get_server_comm()
--> 455 model = self.get_root(doc, comm)
456
457 if config.embed:
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\viewable.py in get_root(self, doc, comm, preprocess)
510 """
511 doc = init_doc(doc)
--> 512 root = self._get_model(doc, comm=comm)
513 if preprocess:
514 self._preprocess(root)
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\layout\base.py in _get_model(self, doc, root, parent, comm)
120 if root is None:
121 root = model
--> 122 objects = self._get_objects(model, [], doc, root, comm)
123 props = dict(self._init_params(), objects=objects)
124 model.update(**self._process_param_change(props))
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\layout\base.py in _get_objects(self, model, old_objects, doc, root, comm)
110 else:
111 try:
--> 112 child = pane._get_model(doc, root, model, comm)
113 except RerenderError:
114 return self._get_objects(model, current_objects[:i], doc, root, comm)
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\pane\holoviews.py in _get_model(self, doc, root, parent, comm)
237 plot = self.object
238 else:
--> 239 plot = self._render(doc, comm, root)
240
241 plot.pane = self
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\pane\holoviews.py in _render(self, doc, comm, root)
304 kwargs['comm'] = comm
305
--> 306 return renderer.get_plot(self.object, **kwargs)
307
308 def _cleanup(self, root):
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\bokeh\renderer.py in get_plot(self_or_cls, obj, doc, renderer, **kwargs)
71 combining the bokeh model with another plot.
72 """
---> 73 plot = super(BokehRenderer, self_or_cls).get_plot(obj, doc, renderer, **kwargs)
74 if plot.document is None:
75 plot.document = Document() if self_or_cls.notebook_context else curdoc()
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\renderer.py in get_plot(self_or_cls, obj, doc, renderer, comm, **kwargs)
241 init_key = tuple(v if d is None else d for v, d in
242 zip(plot.keys[0], defaults))
--> 243 plot.update(init_key)
244 else:
245 plot = obj
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\plot.py in update(self, key)
980 def update(self, key):
981 if len(self) == 1 and ((key == 0) or (key == self.keys[0])) and not self.drawn:
--> 982 return self.initialize_plot()
983 item = self.__getitem__(key)
984 self.traverse(lambda x: setattr(x, '_updated', True))
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\bokeh\element.py in initialize_plot(self, ranges, plot, plots, source)
1388 element = self.hmap.last
1389 key = util.wrap_tuple(self.hmap.last_key)
-> 1390 ranges = self.compute_ranges(self.hmap, key, ranges)
1391 self.current_ranges = ranges
1392 self.current_frame = element
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\plot.py in compute_ranges(self, obj, key, ranges)
636 if (not (axiswise and not isinstance(obj, HoloMap)) or
637 (not framewise and isinstance(obj, HoloMap))):
--> 638 self._compute_group_range(group, elements, ranges, framewise,
639 axiswise, robust, self.top_level,
640 prev_frame)
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\plot.py in _compute_group_range(cls, group, elements, ranges, framewise, axiswise, robust, top_level, prev_frame)
853 continue
854 matching &= (
--> 855 len({'date' if isinstance(v, util.datetime_types) else 'number'
856 for rng in rs for v in rng if util.isfinite(v)}) < 2
857 )
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\plot.py in <setcomp>(.0)
854 matching &= (
855 len({'date' if isinstance(v, util.datetime_types) else 'number'
--> 856 for rng in rs for v in rng if util.isfinite(v)}) < 2
857 )
858 if matching:
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\core\util.py in isfinite(val)
902 return finite
903 elif isinstance(val, datetime_types+timedelta_types):
--> 904 return not isnat(val)
905 elif isinstance(val, (basestring, bytes)):
906 return True
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\core\util.py in isnat(val)
866 elif pd and val is pd.NaT:
867 return True
--> 868 elif pd and isinstance(val, pandas_datetime_types+pandas_timedelta_types):
869 return pd.isna(val)
870 else:
NameError: name 'pandas_datetime_types' is not defined
Any suggestions? Thank you
Although I couldn't find any official doc to support my statement, it's a compatibility issue (HoloViews 1.14.4 was released before Pandas 1.3.0).
Looking at [gitHub]: holoviz/holoviews - (v1.14.4) holoviews/holoviews/core/util.py (starting with line #83), there are some conditional imports. One of them is ABCIndexClass.
[GitHub]: pandas-dev/pandas - (v1.3.0) pandas/pandas/core/dtypes/dtypes.py on the other hand, does not provide it (as opposed from let's say its v1.2.5 counterpart) yielding (silent) exception, and the behavior you're experiencing.
Ways to go:
Upgrade HoloViews to v1.14.5 which no longer has this problem, (or at least, there's a Pandas 1.3.0 conditional as well - fixed by [GitHub]: holoviz/holoviews - Add support for pandas>=1.3)
You could also downgrade Pandas to (e.g.) v1.2.5, although this is not the way to go
I'm getting above the error when trying to compute a dask dataframe. Here's what I'm doing(taking a pandas dataframe, then converting year to datatime then merging it with another dataframe):
from dask import dataframe as dd
#setup variables
df1x = dd.from_pandas(df1, npartitions=4).reset_index() # cudf.DataFrame.from_pandas(FullMerge)
df2x = dd.from_pandas(df2, npartitions=4).reset_index() #cudf.DataFrame.from_pandas(emissions)
# add year
df1x['year'] = dd.to_datetime(df1x.date_x,unit='ns') #pd.to_datetime(df1['date_x'])
df2x['year'] = dd.to_datetime(df2x.year,unit='ns')
#we must rename emissions DF values to match fullMerge so data can merge correctly
df2x = df2x.rename(columns={'reference_name': 'Name'})
# map revenueOut to df1 #set it to value
df1x['value'] = df1x[['year', 'Name']].merge(df2x, how='left').revenueOutput
It seems to work(no errors) but when I want to view the results, I get above error:
df1x.to_csv('myfiles.csv', single_file = True)
I get this stack trace(if it helps):
ValueError Traceback (most recent call last)
<ipython-input-10-78b6500075c4> in <module>
----> 1 df1x.to_csv('myfiles.csv', single_file = True)
2 # dd.compute(Full_df)
20 frames
/usr/local/lib/python3.7/dist-packages/dask/dataframe/core.py in to_csv(self, filename, **kwargs)
1344 from .io import to_csv
1345
-> 1346 return to_csv(self, filename, **kwargs)
1347
1348 def to_json(self, filename, *args, **kwargs):
/usr/local/lib/python3.7/dist-packages/dask/dataframe/io/csv.py in to_csv(df, filename, single_file, encoding, mode, name_function, compression, compute, scheduler, storage_options, header_first_partition_only, **kwargs)
787 )
788 if compute:
--> 789 delayed(values).compute(scheduler=scheduler)
790 return [f.path for f in files]
791 else:
/usr/local/lib/python3.7/dist-packages/dask/base.py in compute(self, **kwargs)
164 dask.base.compute
165 """
--> 166 (result,) = compute(self, traverse=False, **kwargs)
167 return result
168
/usr/local/lib/python3.7/dist-packages/dask/base.py in compute(*args, **kwargs)
435 keys = [x.__dask_keys__() for x in collections]
436 postcomputes = [x.__dask_postcompute__() for x in collections]
--> 437 results = schedule(dsk, keys, **kwargs)
438 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
439
/usr/local/lib/python3.7/dist-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
82 get_id=_thread_get_id,
83 pack_exception=pack_exception,
---> 84 **kwargs
85 )
86
/usr/local/lib/python3.7/dist-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
484 _execute_task(task, data) # Re-execute locally
485 else:
--> 486 raise_exception(exc, tb)
487 res, worker_id = loads(res_info)
488 state["cache"][key] = res
/usr/local/lib/python3.7/dist-packages/dask/local.py in reraise(exc, tb)
314 if exc.__traceback__ is not tb:
315 raise exc.with_traceback(tb)
--> 316 raise exc
317
318
/usr/local/lib/python3.7/dist-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
220 try:
221 task, data = loads(task_info)
--> 222 result = _execute_task(task, data)
223 id = get_id()
224 result = dumps((result, id))
/usr/local/lib/python3.7/dist-packages/dask/core.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
/usr/local/lib/python3.7/dist-packages/dask/optimization.py in __call__(self, *args)
980 if not len(args) == len(self.inkeys):
981 raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args)))
--> 982 return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
983
984 def __reduce__(self):
/usr/local/lib/python3.7/dist-packages/dask/core.py in get(dsk, out, cache)
149 for key in toposort(dsk):
150 task = dsk[key]
--> 151 result = _execute_task(task, cache)
152 cache[key] = result
153 result = _execute_task(out, cache)
/usr/local/lib/python3.7/dist-packages/dask/core.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
/usr/local/lib/python3.7/dist-packages/dask/core.py in <genexpr>(.0)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
/usr/local/lib/python3.7/dist-packages/dask/core.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
/usr/local/lib/python3.7/dist-packages/dask/utils.py in apply(func, args, kwargs)
28 def apply(func, args, kwargs=None):
29 if kwargs:
---> 30 return func(*args, **kwargs)
31 else:
32 return func(*args)
/usr/local/lib/python3.7/dist-packages/dask/dataframe/core.py in apply_and_enforce(*args, **kwargs)
5072 func = kwargs.pop("_func")
5073 meta = kwargs.pop("_meta")
-> 5074 df = func(*args, **kwargs)
5075 if is_dataframe_like(df) or is_series_like(df) or is_index_like(df):
5076 if not len(df):
/usr/local/lib/python3.7/dist-packages/dask/dataframe/shuffle.py in partitioning_index(df, npartitions)
604 An array of int64 values mapping each record to a partition.
605 """
--> 606 return hash_object_dispatch(df, index=False) % int(npartitions)
607
608
/usr/local/lib/python3.7/dist-packages/dask/utils.py in __call__(self, arg, *args, **kwargs)
504 """
505 meth = self.dispatch(type(arg))
--> 506 return meth(arg, *args, **kwargs)
507
508 #property
/usr/local/lib/python3.7/dist-packages/dask/dataframe/utils.py in hash_object_pandas(obj, index, encoding, hash_key, categorize)
470 ):
471 return pd.util.hash_pandas_object(
--> 472 obj, index=index, encoding=encoding, hash_key=hash_key, categorize=categorize
473 )
474
/usr/local/lib/python3.7/dist-packages/pandas/core/util/hashing.py in hash_pandas_object(obj, index, encoding, hash_key, categorize)
134 h = _combine_hash_arrays(hashes, num_items)
135
--> 136 h = Series(h, index=obj.index, dtype="uint64", copy=False)
137 else:
138 raise TypeError(f"Unexpected type for hashing {type(obj)}")
/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
312 if len(index) != len(data):
313 raise ValueError(
--> 314 f"Length of passed values is {len(data)}, "
315 f"index implies {len(index)}."
316 )
ValueError: Length of passed values is 0, index implies 41478.
I'm not sure what to do as the pandas version is working.
IIUC, the following line is not something dask will handle well:
df1x['value'] = df1x[['year', 'Name']].merge(df2x, how='left').revenueOutput
The reason is that partitions must be aligned when assigning the variable (df1x['value'] = ...), while merge (in general) does not yield the same alignment (df1x[['year', 'Name']].merge(df2x, how='left')). This is not an issue when all data is in memory.
If df2y defined below fits into memory, then one possible option is to do it with .map_partitions:
# make sure this fits into memory
df2y = df2x[['year', 'Name', 'revenueOutput']].compute()
def add_value(df):
df = df.merge(df2y, how='left')
df['value'] = df['revenueOutput']
return df
df1x = df1x.map_partitions(add_value)
If df2y does not fit into memory, then it might be possible to do an explicit dask merge and then use the merged dataframe for further analysis:
merged_df = dd.merge(df1x, df2x, on=['year', 'Name'], how='left')
merged_df['value'] = merged_df['revenueOutput']
# I assume that the line above is needed for some further
# transformation, but if that's not the case, then
# a simple column rename is more efficient
I have multiple fields that stores the same value type (price) from different sources.
class Product...
price_amazon = ...
price_ebay = ...
price_etsy = ...
#property
def price...
return self.price_amazon or self.price_ebay or self.price_etsy
I'm looking for a way to annotate price to each Product from queryset. It should behave exactly as the price property.
price is either price_amazon, price_ebay or price_etsy - the first not None value.
How would you do that?
I tried:
Product.objects.all().annotate(price=F('price_amazon') or F('price_ebay') or F('price_etsy'))
which raises:
AttributeError Traceback (most recent call last)
~/PycharmProjects/.virtualenvs/.cloud/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~/PycharmProjects/.virtualenvs/.cloud/lib/python3.8/site-packages/IPython/lib/pretty.py in pretty(self, obj)
392 if cls is not object \
393 and callable(cls.__dict__.get('__repr__')):
--> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)
~/PycharmProjects/.virtualenvs/.cloud/lib/python3.8/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break_()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with p.group():
~/PycharmProjects/.virtualenvs/.cloud/lib/python3.8/site-packages/django/db/models/query.py in __repr__(self)
261
262 def __repr__(self):
--> 263 data = list(self[:REPR_OUTPUT_SIZE + 1])
264 if len(data) > REPR_OUTPUT_SIZE:
265 data[-1] = "...(remaining elements truncated)..."
~/PycharmProjects/.virtualenvs/.cloud/lib/python3.8/site-packages/django/db/models/query.py in __len__(self)
267
268 def __len__(self):
--> 269 self._fetch_all()
270 return len(self._result_cache)
271
~/PycharmProjects/.virtualenvs/.cloud/lib/python3.8/site-packages/django/db/models/query.py in _fetch_all(self)
1306 def _fetch_all(self):
1307 if self._result_cache is None:
-> 1308 self._result_cache = list(self._iterable_class(self))
1309 if self._prefetch_related_lookups and not self._prefetch_done:
1310 self._prefetch_related_objects()
~/PycharmProjects/.virtualenvs/.cloud/lib/python3.8/site-packages/django/db/models/query.py in __iter__(self)
74 if annotation_col_map:
75 for attr_name, col_pos in annotation_col_map.items():
---> 76 setattr(obj, attr_name, row[col_pos])
77
78 # Add the known related objects to the model.
AttributeError: can't set attribute
This is what Coalesce [Django-doc] is all about. You can annotate with:
from django.db.models.functions import Coalesce
Product.objects.annotate(
price=Coalesce('price_amazon', 'price_ebay', 'price_etsy')
)
Using Django 1.8 and Postgres 9.3 - I have a Django model as follows which contains log lines. I would like to extract all domain_name values that were never allowed access.
class Logs(models.Model):
date = DateTimeField(db_index=True)
action = TextField(default='a', null=True, blank=True, db_index=True)
url = TextField(null=True, blank=True)
stats = JsonField(null=True, blank=True)
domain_name = TextField(null=True, blank=True, db_index=True)
<snip>
This SQL query works nicely, but I'm having trouble translating it to a Django ORM queryset.
select domain_name from reporter_log
GROUP BY domain_name
HAVING COUNT(CASE WHEN action = 'a' OR action = 'ae' then 1 END) = 0;
I would have expected the following queryset would work:
LogLine.objects.annotate(
allowed=Count(Case(
When(action='a', then=Value(1)),
When(action='ae', then=Value(1)),
default=Value(0),
output_field=IntegerField(),
)
))
But I get a traceback:
ProgrammingError Traceback (most recent call last)
<ipython-input-4-feb056328fe8> in <module>()
4 When(action='ae', then=Value(1)),
5 default=Value(0),
----> 6 output_field=IntegerField(),
7 )
8 ))
/usr/lib/python3/dist-packages/IPython/core/displayhook.py in __call__(self, result)
245 self.start_displayhook()
246 self.write_output_prompt()
--> 247 format_dict, md_dict = self.compute_format_data(result)
248 self.write_format_data(format_dict, md_dict)
249 self.update_user_ns(result)
/usr/lib/python3/dist-packages/IPython/core/displayhook.py in compute_format_data(self, result)
155
156 """
--> 157 return self.shell.display_formatter.format(result)
158
159 def write_format_data(self, format_dict, md_dict=None):
/usr/lib/python3/dist-packages/IPython/core/formatters.py in format(self, obj, include, exclude)
150 md = None
151 try:
--> 152 data = formatter(obj)
153 except:
154 # FIXME: log the exception
/usr/lib/python3/dist-packages/IPython/core/formatters.py in __call__(self, obj)
478 type_pprinters=self.type_printers,
479 deferred_pprinters=self.deferred_printers)
--> 480 printer.pretty(obj)
481 printer.flush()
482 return stream.getvalue()
/usr/lib/python3/dist-packages/IPython/lib/pretty.py in pretty(self, obj)
361 if isinstance(meth, collections.Callable):
362 return meth(obj, self, cycle)
--> 363 return _default_pprint(obj, self, cycle)
364 finally:
365 self.end_group()
/usr/lib/python3/dist-packages/IPython/lib/pretty.py in _default_pprint(obj, p, cycle)
481 if getattr(klass, '__repr__', None) not in _baseclass_reprs:
482 # A user-provided repr.
--> 483 p.text(repr(obj))
484 return
485 p.begin_group(1, '<')
/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in __repr__(self)
136
137 def __repr__(self):
--> 138 data = list(self[:REPR_OUTPUT_SIZE + 1])
139 if len(data) > REPR_OUTPUT_SIZE:
140 data[-1] = "...(remaining elements truncated)..."
/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in __iter__(self)
160 - Responsible for turning the rows into model objects.
161 """
--> 162 self._fetch_all()
163 return iter(self._result_cache)
164
/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in _fetch_all(self)
963 def _fetch_all(self):
964 if self._result_cache is None:
--> 965 self._result_cache = list(self.iterator())
966 if self._prefetch_related_lookups and not self._prefetch_done:
967 self._prefetch_related_objects()
/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in iterator(self)
236 # Execute the query. This will also fill compiler.select, klass_info,
237 # and annotations.
--> 238 results = compiler.execute_sql()
239 select, klass_info, annotation_col_map = (compiler.select, compiler.klass_info,
240 compiler.annotation_col_map)
/usr/local/lib/python3.4/dist-packages/django/db/models/sql/compiler.py in execute_sql(self, result_type)
838 cursor = self.connection.cursor()
839 try:
--> 840 cursor.execute(sql, params)
841 except Exception:
842 cursor.close()
/usr/local/lib/python3.4/dist-packages/django/db/backends/utils.py in execute(self, sql, params)
77 start = time()
78 try:
---> 79 return super(CursorDebugWrapper, self).execute(sql, params)
80 finally:
81 stop = time()
/usr/local/lib/python3.4/dist-packages/django/db/backends/utils.py in execute(self, sql, params)
62 return self.cursor.execute(sql)
63 else:
---> 64 return self.cursor.execute(sql, params)
65
66 def executemany(self, sql, param_list):
/usr/local/lib/python3.4/dist-packages/django/db/utils.py in __exit__(self, exc_type, exc_value, traceback)
95 if dj_exc_type not in (DataError, IntegrityError):
96 self.wrapper.errors_occurred = True
---> 97 six.reraise(dj_exc_type, dj_exc_value, traceback)
98
99 def __call__(self, func):
/usr/local/lib/python3.4/dist-packages/django/utils/six.py in reraise(tp, value, tb)
656 value = tp()
657 if value.__traceback__ is not tb:
--> 658 raise value.with_traceback(tb)
659 raise value
660
/usr/local/lib/python3.4/dist-packages/django/db/backends/utils.py in execute(self, sql, params)
62 return self.cursor.execute(sql)
63 else:
---> 64 return self.cursor.execute(sql, params)
65
66 def executemany(self, sql, param_list):
ProgrammingError: could not identify an equality operator for type json
LINE 1: ...e"."domain_name", "reporter_logs"."rule_type", "reporter_...
^
Here is the output of the query that fails.
[{'sql': 'SELECT "reporter_logs"."id", "reporter_logs"."date", "reporter_logs"."userip", "reporter_logs"."action", "reporter_logs"."url", "reporter_logs"."method", "reporter_logs"."status", "reporter_logs"."mimetype", "reporter_logs"."content_length", "reporter_logs"."pruned", "reporter_logs"."page_title", "reporter_logs"."user_agent", "reporter_logs"."domain_name", "reporter_logs"."rule_type", "reporter_logs"."tally_stats", "reporter_logs"."cat_stats", "reporter_logs"."grade_stats", "reporter_logs"."ignored", "reporter_logs"."category", "reporter_logs"."genre", "reporter_logs"."grade", "reporter_logs"."top_grade", "reporter_logs"."category_confidence", "reporter_logs"."grade_confidence", COUNT(CASE WHEN "reporter_logs"."action" = \'a\' THEN 1 WHEN "reporter_logs"."action" = \'ae\' THEN 1 ELSE 0 END) AS "allowed" FROM "reporter_logs" GROUP BY "reporter_logs"."id", "reporter_logs"."date", "reporter_logs"."userip", "reporter_logs"."action", "reporter_logs"."url", "reporter_logs"."method", "reporter_logs"."status", "reporter_logs"."mimetype", "reporter_logs"."content_length", "reporter_logs"."pruned", "reporter_logs"."page_title", "reporter_logs"."user_agent", "reporter_logs"."domain_name", "reporter_logs"."rule_type", "reporter_logs"."tally_stats", "reporter_logs"."cat_stats", "reporter_logs"."grade_stats", "reporter_logs"."ignored", "reporter_logs"."category", "reporter_logs"."genre", "reporter_logs"."grade", "reporter_logs"."top_grade", "reporter_logs"."category_confidence", "reporter_logs"."grade_confidence" LIMIT 21', 'time': '0.002'}]
Any suggestions would be appreciated. Including suggestions about alternate ways to write the query.
Try by not using Q function, use separated When instead:
Logs.objects.annotate(
allowed=Count(Case(
When(action='a', then=1),
When(action='ae', then=1),
default=0,
output_field=IntegerField()
)
)).values('date')
Well, thanks #knbk and #Gocht for trying. I finally found the solution to my problem. I had to specify that I only wanted the domain_name column, which took care of the traceback.
Also, I had to use Sum instead of Count to return 0 for all domains that had no matches. Count returned 1 for nonmatching values.
Logs.objects.values('domain_name').annotate(
allowed=Sum(Case(
When(Q(action='a') |Q(action='ae'), then=Value(1)),
default=Value(0),
output_field=IntegerField()
)
))
I have a bunch of data stored in vals. The indices are monotonic, but not continuous. I'm attempting to do some analysis on histograms of the data, so I've created the following structure:
hist = pd.DataFrame(vals)
hist['bins'] = pd.cut(vals, 100)
This is data taken from an experimental instrument and I know that some of the bins have only 1 or 2 counts in them, which I'm trying to remove. I've tried using groupby as follows and get the following error (Full traceback included at the end of the note):
hist.groupby('bins').describe()
AttributeError: 'Categorical' object has no attribute 'flags'
However, when I do the following, the error does not show up and I get the expected result:
In[]: hist.index = hist.bins
In[]: hist['bins'] = hist.index
In[]: desc = hist.groupby('bins').describe()
In[]: desc.index.names = ['bins', 'describe']
Out[]: **describe with MultiIndex for rows.**
If I don't include the second line hist['bins'] = hist.index, I still get an AttributeError: 'Categorical' object has no attribute 'flags' and to the best that I can tell, the traceback is identical.
Can someone explain what the flags are and why they only seem to work when I set the index to bins and then replace the bins by the version stored in the index?
My end goal is to remove the data for bins with counts <= 6. If someone has an easier workaround than the way I'm going after it, I'd also be grateful.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-11-f606a051f2e4> in <module>()
----> 1 hist.groupby('bins').describe()
C:\Users\balterma\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.4.1.1975.win-x86_64\lib\site-packages\IPython\core\displayhook.pyc in __call__(self, result)
245 self.start_displayhook()
246 self.write_output_prompt()
--> 247 format_dict, md_dict = self.compute_format_data(result)
248 self.write_format_data(format_dict, md_dict)
249 self.update_user_ns(result)
C:\Users\balterma\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.4.1.1975.win-x86_64\lib\site-packages\IPython\core\displayhook.pyc in compute_format_data(self, result)
155
156 """
--> 157 return self.shell.display_formatter.format(result)
158
159 def write_format_data(self, format_dict, md_dict=None):
C:\Users\balterma\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.4.1.1975.win-x86_64\lib\site-packages\IPython\core\formatters.pyc in format(self, obj, include, exclude)
150 md = None
151 try:
--> 152 data = formatter(obj)
153 except:
154 # FIXME: log the exception
C:\Users\balterma\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.4.1.1975.win-x86_64\lib\site-packages\IPython\core\formatters.pyc in __call__(self, obj)
479 type_pprinters=self.type_printers,
480 deferred_pprinters=self.deferred_printers)
--> 481 printer.pretty(obj)
482 printer.flush()
483 return stream.getvalue()
C:\Users\balterma\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.4.1.1975.win-x86_64\lib\site-packages\IPython\lib\pretty.pyc in pretty(self, obj)
360 if callable(meth):
361 return meth(obj, self, cycle)
--> 362 return _default_pprint(obj, self, cycle)
363 finally:
364 self.end_group()
C:\Users\balterma\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.4.1.1975.win-x86_64\lib\site-packages\IPython\lib\pretty.pyc in _default_pprint(obj, p, cycle)
480 if getattr(klass, '__repr__', None) not in _baseclass_reprs:
481 # A user-provided repr.
--> 482 p.text(repr(obj))
483 return
484 p.begin_group(1, '<')
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\base.pyc in __repr__(self)
62 Yields Bytestring in Py2, Unicode String in py3.
63 """
---> 64 return str(self)
65
66
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\base.pyc in __str__(self)
42 if compat.PY3:
43 return self.__unicode__()
---> 44 return self.__bytes__()
45
46 def __bytes__(self):
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\base.pyc in __bytes__(self)
54
55 encoding = get_option("display.encoding")
---> 56 return self.__unicode__().encode(encoding, 'replace')
57
58 def __repr__(self):
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\frame.pyc in __unicode__(self)
507 width = None
508 self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols,
--> 509 line_width=width, show_dimensions=show_dimensions)
510
511 return buf.getvalue()
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\frame.pyc in to_string(self, buf, columns, col_space, colSpace, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, line_width, max_rows, max_cols, show_dimensions)
1340 max_rows=max_rows,
1341 max_cols=max_cols,
-> 1342 show_dimensions=show_dimensions)
1343 formatter.to_string()
1344
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\format.pyc in __init__(self, frame, buf, columns, col_space, header, index, na_rep, formatters, justify, float_format, sparsify, index_names, line_width, max_rows, max_cols, show_dimensions, **kwds)
345 self.columns = frame.columns
346
--> 347 self._chk_truncate()
348
349 def _chk_truncate(self):
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\format.pyc in _chk_truncate(self)
410 else:
411 row_num = max_rows_adj // 2
--> 412 frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :]))
413 self.tr_row_num = row_num
414
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\tools\merge.pyc in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
752 keys=keys, levels=levels, names=names,
753 verify_integrity=verify_integrity,
--> 754 copy=copy)
755 return op.get_result()
756
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\tools\merge.pyc in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy)
884 self.copy = copy
885
--> 886 self.new_axes = self._get_new_axes()
887
888 def get_result(self):
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\tools\merge.pyc in _get_new_axes(self)
957 new_axes[i] = ax
958
--> 959 new_axes[self.axis] = self._get_concat_axis()
960 return new_axes
961
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\tools\merge.pyc in _get_concat_axis(self)
1009
1010 if self.keys is None:
-> 1011 concat_axis = _concat_indexes(indexes)
1012 else:
1013 concat_axis = _make_concat_multiindex(indexes, self.keys,
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\tools\merge.pyc in _concat_indexes(indexes)
1027
1028 def _concat_indexes(indexes):
-> 1029 return indexes[0].append(indexes[1:])
1030
1031
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\index.pyc in append(self, other)
4603 arrays = []
4604 for i in range(self.nlevels):
-> 4605 label = self.get_level_values(i)
4606 appended = [o.get_level_values(i) for o in other]
4607 arrays.append(label.append(appended))
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\index.pyc in get_level_values(self, level)
4239 unique = self.levels[num] # .values
4240 labels = self.labels[num]
-> 4241 filled = com.take_1d(unique.values, labels, fill_value=unique._na_value)
4242 values = unique._simple_new(filled, self.names[num],
4243 freq=getattr(unique, 'freq', None),
C:\Users\balterma\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\common.pyc in take_nd(arr, indexer, axis, out, fill_value, mask_info, allow_fill)
829 out_shape[axis] = len(indexer)
830 out_shape = tuple(out_shape)
--> 831 if arr.flags.f_contiguous and axis == arr.ndim - 1:
832 # minor tweak that can make an order-of-magnitude difference
833 # for dataframes initialized directly from 2-d ndarrays
AttributeError: 'Categorical' object has no attribute 'flags'
This looks to be be a bug with Categorical data that will be corrected in version 0.17.0 (issue here).
In the meantime, you could just cast the category to an object dtype - this is what was happening when you assigned to the index and back.
df['bins'] = df['bins'].astype(str)