trying to create a pivot table with pandas and numpy - python

I'm using the following to try and get a pivot table that multiplies the quantity times the effective price and groups it by Product name:
df_sorted.pivot_table(values=['Quantity', 'EffectivePrice'], index=['ProductName'], aggfunc=np.multiply )
This is the stack trace - not sure why this isn't working.
ValueError Traceback (most recent call last)
/usr/local/lib/python3.9/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
970 try:
--> 971 result = self._aggregate_multiple_funcs([func], _axis=self.axis)
972
/usr/local/lib/python3.9/site-packages/pandas/core/base.py in _aggregate_multiple_funcs(self, arg, _axis)
544 if not len(results):
--> 545 raise ValueError("no results")
546
ValueError: no results
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-35-f616d7b46a13> in <module>
----> 1 df_sorted.pivot_table(values=['Quantity', 'EffectivePrice'], index=['ProductName'], aggfunc=np.multiply )
/usr/local/lib/python3.9/site-packages/pandas/core/frame.py in pivot_table(self, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed)
6824 from pandas.core.reshape.pivot import pivot_table
6825
-> 6826 return pivot_table(
6827 self,
6828 values=values,
/usr/local/lib/python3.9/site-packages/pandas/core/reshape/pivot.py in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed)
110
111 grouped = data.groupby(keys, observed=observed)
--> 112 agged = grouped.agg(aggfunc)
113 if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
114 agged = agged.dropna(how="all")
/usr/local/lib/python3.9/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
981 # raised directly by _aggregate_multiple_funcs
982 raise
--> 983 result = self._aggregate_frame(func)
984 except AttributeError:
985 # catch exception from line 969
/usr/local/lib/python3.9/site-packages/pandas/core/groupby/generic.py in _aggregate_frame(self, func, *args, **kwargs)
1173 if axis != obj._info_axis_number:
1174 for name, data in self:
-> 1175 fres = func(data, *args, **kwargs)
1176 result[name] = fres
1177 else:
ValueError: invalid number of arguments```

My understanding is you cannot apply multi-column operation in pivot table. Maybe using groupby + transform can do. Based on the explanation I recommend this code (I am not sure the expected result is what you want):
df_sorted['TotalPrice'] = df_sorted['Quantity']*df_sorted['EffectivePrice']
result = df_sorted.groupby('ProductName')['TotalPrice'].sum()
For this sample dataframe:
Quantity EffectivePrice ProductName
0 1 12 A
1 1 13 B
2 2 14 A
The output is like this:
ProductName
A 40
B 13

Related

Not able to perform mean aggregation on group by DataFrame in Panda

I have below dataset
I want to perform mean operation on 'horsepower' column after doing group by on column 'cylinders' and 'model year' using panda. I am running code in jupyter notebook.
Below is my code:
df = pd.read_csv('auto_mpg.csv')
df.groupby(['cylinders','model year']).agg({'horsepower':'mean'})
Basically, I am performing first group by on column 'cylinders' and 'model year' and then performing aggregation operation to get mean value.
I am getting below error:
DataError Traceback (most recent call last)
<ipython-input-105-967f7e0151c3> in <module>
2 #Creating a DataFrame grouped on cylinders and model_year and finding mean, min and max of horsepower
3 df = pd.read_csv('auto_mpg.csv')
----> 4 df.groupby(['cylinders','model year']).agg({'horsepower':['mean']})
~\anaconda3\lib\site-packages\pandas\core\groupby\generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
949 func = maybe_mangle_lambdas(func)
950
--> 951 result, how = self._aggregate(func, *args, **kwargs)
952 if how is None:
953 return result
~\anaconda3\lib\site-packages\pandas\core\base.py in _aggregate(self, arg, *args, **kwargs)
414
415 try:
--> 416 result = _agg(arg, _agg_1dim)
417 except SpecificationError:
418
~\anaconda3\lib\site-packages\pandas\core\base.py in _agg(arg, func)
381 result = {}
382 for fname, agg_how in arg.items():
--> 383 result[fname] = func(fname, agg_how)
384 return result
385
~\anaconda3\lib\site-packages\pandas\core\base.py in _agg_1dim(name, how, subset)
365 "nested dictionary is ambiguous in aggregation"
366 )
--> 367 return colg.aggregate(how)
368
369 def _agg_2dim(how):
~\anaconda3\lib\site-packages\pandas\core\groupby\generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
244 # but not the class list / tuple itself.
245 func = maybe_mangle_lambdas(func)
--> 246 ret = self._aggregate_multiple_funcs(func)
247 if relabeling:
248 ret.columns = columns
~\anaconda3\lib\site-packages\pandas\core\groupby\generic.py in _aggregate_multiple_funcs(self, arg)
317 obj._reset_cache()
318 obj._selection = name
--> 319 results[base.OutputKey(label=name, position=idx)] = obj.aggregate(func)
320
321 if any(isinstance(x, DataFrame) for x in results.values()):
~\anaconda3\lib\site-packages\pandas\core\groupby\generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
238
239 if isinstance(func, str):
--> 240 return getattr(self, func)(*args, **kwargs)
241
242 elif isinstance(func, abc.Iterable):
~\anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in mean(self, numeric_only)
1391 Name: B, dtype: float64
1392 """
-> 1393 return self._cython_agg_general(
1394 "mean",
1395 alt=lambda x, axis: Series(x).mean(numeric_only=numeric_only),
~\anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in _cython_agg_general(self, how, alt, numeric_only, min_count)
1049
1050 if len(output) == 0:
-> 1051 raise DataError("No numeric types to aggregate")
1052
1053 return self._wrap_aggregated_output(output, index=self.grouper.result_index)
DataError: No numeric types to aggregate
While I get min and max aggregation on 'horsepower' column successfully.
df = pd.read_csv('auto_mpg.csv')
df.groupby(['cylinders','model year']).agg({'horsepower':['min','max']})
I loaded the auto-mpg the dataset from https://www.kaggle.com/uciml/autompg-dataset/version/3nd
and managed to replicate the problem.
The root cause is that horsepower column is loaded as type object with missing values represented as question mark strings (?), for example:
df[df.horsepower.str.contains("\?")]
Pandas doesn't know how to take the mean of question marks, so the solution would be casting the column to float:
# Convert non digit strings to NaN
df.loc[~df.horsepower.str.isdigit(), "horsepower"] = np.NaN
# Cast to float
df.horsepower = df.horsepower.astype("float")
# Aggregate
df.groupby(["cylinders", "model year"]).agg({"horsepower": "mean"})
Used pandas==1.1.5 and numpy==1.19.5.
Check the data type. I see the root cause error at the bottom of your post:
raise DataError("No numeric types to aggregate")
Put that ‘mean’ into bracket then, if data type is right:
agg({'horsepower': ['mean']})
Try this
df = pd.read_csv('auto_mpg.csv')
df.groupby(['cylinders','model year']).mean()["horsepower]
df.groupby(['cylinders','model year']).mean() will give you the mean of each column and then you are selecting the horsepower variable to get the desired columns from the df on which groupby and mean operations were performed.

Why I am getting Error while using Lambda within Apply

Request help on why the following is giving error?:
import numpy as np
from pydataset import data
mtcars = data('mtcars')
mtcars.apply(['mean', lambda x: max(x)-min(x), lambda x: np.percentile(x, 0.15)])
I am trying to create a data frame with the mean, max-min and 15th Percentile for all columns of the dataset mtcars.
Error Message:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/aggregation.py in agg_list_like(obj, arg, _axis)
674 try:
--> 675 return concat(results, keys=keys, axis=1, sort=False)
676 except TypeError as err:
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
284 """
--> 285 op = _Concatenator(
286 objs,
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/reshape/concat.py in __init__(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)
369 )
--> 370 raise TypeError(msg)
371
TypeError: cannot concatenate object of type '<class 'float'>'; only Series and DataFrame objs are valid
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
<ipython-input-645-51b8f1de1855> in <module>
----> 1 mtcars.apply(['mean', lambda x: max(x)-min(x), lambda x: np.percentile(x, 0.15)])
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
7766 kwds=kwds,
7767 )
-> 7768 return op.get_result()
7769
7770 def applymap(self, func, na_action: Optional[str] = None) -> DataFrame:
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/apply.py in get_result(self)
145 # pandas\core\apply.py:144: error: "aggregate" of "DataFrame" gets
146 # multiple values for keyword argument "axis"
--> 147 return self.obj.aggregate( # type: ignore[misc]
148 self.f, axis=self.axis, *self.args, **self.kwds
149 )
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in aggregate(self, func, axis, *args, **kwargs)
7576 result = None
7577 try:
-> 7578 result, how = self._aggregate(func, axis, *args, **kwargs)
7579 except TypeError as err:
7580 exc = TypeError(
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in _aggregate(self, arg, axis, *args, **kwargs)
7607 result = result.T if result is not None else result
7608 return result, how
-> 7609 return aggregate(self, arg, *args, **kwargs)
7610
7611 agg = aggregate
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/aggregation.py in aggregate(obj, arg, *args, **kwargs)
584 # we require a list, but not an 'str'
585 arg = cast(List[AggFuncTypeBase], arg)
--> 586 return agg_list_like(obj, arg, _axis=_axis), None
587 else:
588 result = None
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/aggregation.py in agg_list_like(obj, arg, _axis)
651 colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
652 try:
--> 653 new_res = colg.aggregate(arg)
654 except (TypeError, DataError):
655 pass
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/series.py in aggregate(self, func, axis, *args, **kwargs)
3972 func = dict(kwargs.items())
3973
-> 3974 result, how = aggregate(self, func, *args, **kwargs)
3975 if result is None:
3976
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/aggregation.py in aggregate(obj, arg, *args, **kwargs)
584 # we require a list, but not an 'str'
585 arg = cast(List[AggFuncTypeBase], arg)
--> 586 return agg_list_like(obj, arg, _axis=_axis), None
587 else:
588 result = None
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/aggregation.py in agg_list_like(obj, arg, _axis)
683 result = Series(results, index=keys, name=obj.name)
684 if is_nested_object(result):
--> 685 raise ValueError(
686 "cannot combine transform and aggregation operations"
687 ) from err
ValueError: cannot combine transform and aggregation operations
But, the following works:
mtcars.apply(['mean', lambda x: max(x)-min(x)])
Both type(mtcars.apply(lambda x: np.percentile(x, 0.15))) and type(mtcars.apply(lambda x: max(x)-min(x))) gives Pandas Series. Then why the problem is happening with only the Percentile?
Thanks
Reading the answer by #James my guess is that you need to write the custom function such that the function is applied on the series and not over each element. Maybe someone else who is more familiar with the underlying pandas code can chip in:
def min_max(x):
return max(x)-min(x)
def perc(x):
return x.quantile(0.15)
mtcars.agg(['mean',min_max,perc])
mpg cyl disp hp drat wt qsec vs am gear carb
mean 20.090625 6.1875 230.721875 146.6875 3.596563 3.21725 17.84875 0.4375 0.40625 3.6875 2.8125
min_max 23.500000 4.0000 400.900000 283.0000 2.170000 3.91100 8.40000 1.0000 1.00000 2.0000 7.0000
perc 14.895000 4.0000 103.485000 82.2500 3.070000 2.17900 16.24300 0.0000 0.00000 3.0000 1.0000

How to minimize code when using sqlalchemy query and pandas dataframes

I am trying to upgrade my query code by modernizing it.
My old code (bellow). First query joins two tables and selects the rating for each song title together with artist for title and the second gets the genres for each title (association table is used):
items = []
query = db.session.query(Rating, Song).filter(Rating.id==Song.id).all()
for x in query:
dic = {
"rating": x[0],
"title": x[1].title,
"artist": x[1].artist,
"genre": Genre.query.filter(Genre.songs.any(title=x[1].title)).all(),
}
items.append(dic)
My cleaner code. I use pandas dataframes now instead of dictionaries. This gives me the error ArgumentError: SQL expression element or literal value expected, got somethingsomething
query = db.session.query(Rating, Song).filter(Rating.id==Song.id).all()
df = pd.DataFrame(query, columns=["rating", "title"])
for item in df.title:
df['genre'] = (Genre.query.filter(Genre.songs.any(title=item)).all())
How do I get this to work?
Are there more effiecient ways of coding this?
Complete error produced
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\engine\base.py in execute(self, statement, *multiparams, **params)
1194 try:
-> 1195 meth = statement._execute_on_connection
1196 except AttributeError as err:
AttributeError: 'BaseQuery' object has no attribute '_execute_on_connection'
The above exception was the direct cause of the following exception:
ObjectNotExecutableError Traceback (most recent call last)
<ipython-input-4-99ffacdf2d91> in <module>
1 query = db.session.query(Rating, Song).filter(Rating.id==Song.id)
----> 2 df = pd.read_sql_query(query, db.engine)
3 df
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\sql.py in read_sql_query(sql, con, index_col, coerce_float, params, parse_dates, chunksize)
381 coerce_float=coerce_float,
382 parse_dates=parse_dates,
--> 383 chunksize=chunksize,
384 )
385
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\sql.py in read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize)
1292 args = _convert_params(sql, params)
1293
-> 1294 result = self.execute(*args)
1295 columns = result.keys()
1296
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\sql.py in execute(self, *args, **kwargs)
1160 def execute(self, *args, **kwargs):
1161 """Simple passthrough to SQLAlchemy connectable"""
-> 1162 return self.connectable.execution_options().execute(*args, **kwargs)
1163
1164 def read_table(
<string> in execute(self, statement, *multiparams, **params)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\util\deprecations.py in warned(fn, *args, **kwargs)
388 if not skip_warning:
389 _warn_with_version(message, version, wtype, stacklevel=3)
--> 390 return fn(*args, **kwargs)
391
392 doc = func.__doc__ is not None and func.__doc__ or ""
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\engine\base.py in execute(self, statement, *multiparams, **params)
3036 """
3037 connection = self.connect(close_with_result=True)
-> 3038 return connection.execute(statement, *multiparams, **params)
3039
3040 #util.deprecated_20(
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\engine\base.py in execute(self, statement, *multiparams, **params)
1196 except AttributeError as err:
1197 util.raise_(
-> 1198 exc.ObjectNotExecutableError(statement), replace_context=err
1199 )
1200 else:
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\util\compat.py in raise_(***failed resolving arguments***)
209
210 try:
--> 211 raise exception
212 finally:
213 # credit to
ObjectNotExecutableError: Not an executable object: <flask_sqlalchemy.BaseQuery object at 0x000001CBC5F14A48>
First, create just one query to return all the data you need in one go, where the grouping of Genres is done uisng the GROUP_CONCAT function:
query = (
db.session
.query(
Rating.rating,
Song.title,
Song.artist,
db.func.GROUP_CONCAT(Genre.category, ", ").label("genres")
)
.select_from(Song)
.where(Rating.id == Song.id)
.join(Genre, Song.genres)
.group_by(
Rating.rating,
Song.title,
Song.artist,
)
)
Then use the pandas method to get it into a dataframe:
df = pd.read_sql_query(query.statement, db.engine)
Where print(df) should produce something like this:
rating title artist genres
0 2.0 title 2 art-2 pop
1 3.0 title 3 art-3 rock, pop
2 4.0 title 4 art-3 other
3 5.0 title 5 art-4 rock, pop, other

astype(str) in pandas raise TypeError

I have a simple dataframe produced an error
stdcos.head() produce
PartNo Cost
0 180 8.95
1 213 0.32
2 215 2.77
3 216 3.02
4 218 1.37
stdcos.dtypes returns
PartNo object
Cost float64
dtype: object
Why in this case can raise a TypeError for stdcos['PartNo'].astype(str)?
Is it possible to have something to do with the weird PartNo dtype?
Sorry, this is the error message
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-48-791196d10a7a> in <module>
----> 1 stdcos['PartNo'].astype(str)
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2774 if self.columns.nlevels > 1:
2775 return self._getitem_multilevel(key)
-> 2776 return self._get_item_cache(key)
2777
2778 # Do we have a slicer (on rows)?
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
3584 res = cache.get(item)
3585 if res is None:
-> 3586 values = self._data.get(item)
3587 res = self._box_item_values(item, values)
3588 cache[item] = res
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\internals\managers.py in get(self, item)
966 raise ValueError("cannot label index with a null key")
967
--> 968 return self.iget(loc)
969 else:
970
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\internals\managers.py in iget(self, i)
983 Otherwise return as a ndarray
984 """
--> 985 block = self.blocks[self._blknos[i]]
986 values = block.iget(self._blklocs[i])
987
TypeError: only integer scalar arrays can be converted to a scalar index
Thanks to #Juanpa.arrivillaga,
I found the bug.
It comes from stdcos.columns = [['PartNo', 'Cost']]
So the column name is actually nested, but for some reason it display like a flat ones in jupyter notebook.
Changing it back to stdcos.columns = ['PartNo', 'Cost'] fixed the issue.

ValueError when using .diff() with dask dataframe

I have a large time series data set which I want to process with Dask.
apart from a few other columns, there is a column called 'id' which identifies individuals and a column transc_date which identifies the date and a column transc_time identifying the time when an individual made a transaction.
The data is sorted using:
df = df.map_partitions(lambda x: x.sort_values(['id', 'transc_date', 'transc_time'], ascending=[True, True, True]))
transc_time is of type int and transc_date is of type datetime64.
I want to create a new column which gives me for each individual the number of days since the last transaction. For this I created the following function:
def get_diff_since_last_trans(df, plot=True):
df['diff_last'] = df.map_overlap(lambda x: x.groupby('id')['transc_date'].diff(), before=10, after=10)
diffs = df[['id', 'diff_last']].groupby(['id']).agg('max')['diff_last'].dt.days.compute()
if plot:
sns.distplot(diffs.values, kde = False, rug = False)
return diffs
When I try this function on a small subset of the data (200k rows) it works as intended. But when I use it on the full data set I get a ValueErro below.
I dropped all ids which have fewer than 10 occurrences first. transc_date does not contain nans, it only contains datetime64 entries.
Any idea what's going wrong?
ValueError Traceback (most recent call last)
<ipython-input-12-551d7256f328> in <module>()
1 a = get_diff_first_last_trans(df, plot=False)
----> 2 b = get_diff_since_last_trans(df, plot=False)
3 plot_trans_diff(a,b)
<ipython-input-10-8f83d4571659> in get_diff_since_last_trans(df, plot)
12 def get_diff_since_last_trans(df, plot=True):
13 df['diff_last'] = df.map_overlap(lambda x: x.groupby('id')['transc_date'].diff(), before=10, after=10)
---> 14 diffs = df[['id', 'diff_last']].groupby(['id']).agg('max')['diff_last'].dt.days.compute()
15 if plot:
16 sns.distplot(diffs.values, kde = False, rug = False)
~/venv/lib/python3.6/site-packages/dask/base.py in compute(self, **kwargs)
133 dask.base.compute
134 """
--> 135(result,)= compute(self, traverse=False,**kwargs) 136return result
137
~/venv/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
331 postcomputes = [a.__dask_postcompute__() if is_dask_collection(a)
332 else (None, a) for a in args]
--> 333 results = get(dsk, keys, **kwargs)
334 results_iter = iter(results)
335 return tuple(a if f is None else f(next(results_iter), *a)
~/venv/lib/python3.6/site-packages/distributed/client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, **kwargs)
1997 secede()
1998 try:
-> 1999 results = self.gather(packed, asynchronous=asynchronous)
2000 finally:
2001 for f in futures.values():
~/venv/lib/python3.6/site-packages/distributed/client.py in gather(self, futures, errors, maxsize, direct, asynchronous)
1435 return self.sync(self._gather, futures, errors=errors,
1436 direct=direct, local_worker=local_worker,
-> 1437 asynchronous=asynchronous)
1438
1439 #gen.coroutine
~/venv/lib/python3.6/site-packages/distributed/client.py in sync(self, func, *args, **kwargs)
590 return future
591 else:
--> 592return sync(self.loop, func,*args,**kwargs) 593 594def __repr__(self):
~/venv/lib/python3.6/site-packages/distributed/utils.py in sync(loop, func, *args, **kwargs)
252 e.wait(1000000)
253 if error[0]:
--> 254 six.reraise(*error[0])
255 else:
256 return result[0]
~/venv/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693raise value
694finally: 695 value =None
~/venv/lib/python3.6/site-packages/distributed/utils.py in f()
236 yield gen.moment
237 thread_state.asynchronous = True
--> 238 result[0] = yield make_coro()
239 except Exception as exc:
240 logger.exception(exc)
~/venv/lib/python3.6/site-packages/tornado/gen.py in run(self)
1053
1054 try:
-> 1055 value = future.result()
1056 except Exception:
1057 self.had_exception = True
~/venv/lib/python3.6/site-packages/tornado/concurrent.py in result(self, timeout)
236 if self._exc_info is not None:
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
240 self = None
~/venv/lib/python3.6/site-packages/tornado/util.py in raise_exc_info(exc_info)
~/venv/lib/python3.6/site-packages/tornado/gen.py in run(self)
1061 if exc_info is not None:
1062 try:
-> 1063 yielded = self.gen.throw(*exc_info)
1064 finally:
1065 # Break up a reference to itself
~/venv/lib/python3.6/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
1313 six.reraise(type(exception),
1314 exception,
-> 1315 traceback)
1316 if errors == 'skip':
1317 bad_keys.add(key)
~/venv/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
690 value = tp()
691 if value.__traceback__ is not tb:
--> 692raise value.with_traceback(tb) 693raise value
694finally:
~/venv/lib/python3.6/site-packages/dask/dataframe/rolling.py in overlap_chunk()
30 parts = [p for p in (prev_part, current_part, next_part) if p is not None]
31 combined = pd.concat(parts)
---> 32 out = func(combined, *args, **kwargs)
33 if prev_part is None:
34 before = None
<ipython-input-10-8f83d4571659> in <lambda>()
11
12 def get_diff_since_last_trans(df, plot=True):
---> 13 df['diff_last'] = df.map_overlap(lambda x: x.groupby('id')['transc_date'].diff(), before=10, after=10)
14 diffs = df[['id', 'diff_last']].groupby(['id']).agg('max')['diff_last'].dt.days.compute()
15 if plot:
~/venv/lib/python3.6/site-packages/pandas/core/groupby.py in wrapper()
737 *args, **kwargs)
738 except (AttributeError):
--> 739raise ValueError
740 741return wrapper
ValueError:

Categories

Resources