Why am I getting error using .map in python function - python

I'm trying to map a dictionary value to a dataset in a fuction. I keep getting the following error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-114-f1360d45f8fc> in <module>
----> 1 df['unit_value_factor_4'] = df.apply(map_value, axis=1)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6012 args=args,
6013 kwds=kwds)
-> 6014 return op.get_result()
6015
6016 def applymap(self, func):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
140 return self.apply_raw()
141
--> 142 return self.apply_standard()
143
144 def apply_empty_result(self):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
246
247 # compute the result using the series generator
--> 248 self.apply_series_generator()
249
250 # wrap results
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
275 try:
276 for i, v in enumerate(series_gen):
--> 277 results[i] = self.f(v)
278 keys.append(v.name)
279 except Exception as e:
<ipython-input-113-2ec7fc46c34e> in map_value(row)
2 def map_value(row):
3 if row['RATING_CLASS_CODE'] == 'G':
----> 4 val = row['unit_value_model'].map(g_cn_value)
5
6 elif row['RATING_CLASS_CODE'] == 'CN':
AttributeError: ("'float' object has no attribute 'map'", 'occurred at index 40')
Below is the function. This is simply looking up the RATING_CLASS_CODE on each row, then mapping a value from a dictionary that corresponds to the unit_value_model which matches my dictionary key.
def map_value(row):
if row['RATING_CLASS_CODE'] == 'G':
val = row['unit_value_model'].map(g_cn_value)
elif row['RATING_CLASS_CODE'] == 'CN':
val = row['unit_value_model'].map(g_cn_value)
elif row['RATING_CLASS_CODE'] == 'NE':
val = row['unit_value_model'].map(ne_gv_value)
elif row['RATING_CLASS_CODE'] == 'GV':
val = row['unit_value_model'].map(ne_gv_value)
elif row['RATING_CLASS_CODE'] == 'LA':
val = row['unit_value_model'].map(la_coll_value)
else:
val = None
print(val)
return val
df['unit_value_factor_4'] = df.apply(map_value, axis=1)

I thnk you need np.select with multiple conditions.
Look at this answer for an explicit example.

Related

Cannot plot datetime64[ns] on hvplot axis ('pandas_datetime_types' is not defined' error)

I am simply trying to plot some values against datetime64[ns] timestamps with holoviews.
That is,
x-axis = nx1 datetime64[ns] values
y-axis = nx1 data.
Here is a screen shot of what I have:
Screenshot of my dataframe
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
and my overall code:
import hvplot.pandas
import pandas as pd
##
Code ommitted at the start to extract data and create dictionary to convert to data frame
##
#create dictionary
temp_dict = dict(sampling_time=time_y_value_is_taken, y_axis_values = y_values)
df = pd.Dataframe.from_dict(temp_dict)
df.sampling_time=df.sampling_time.astype('datetime64[ns]')
df=df.set_index('sampling_time')
##The following code cannot run this line- it throws error
df.hvplot.line()
I keep getting the error code : 'pandas_datetime_types' is not defined. I have also tried importing datetime as datetime - but it does not work.
EDIT: Here is the traceback:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
~\miniconda3\envs\mpess_visual\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include, exclude)
968
969 if method is not None:
--> 970 return method(include=include, exclude=exclude)
971 return None
972 else:
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\core\dimension.py in _repr_mimebundle_(self, include, exclude)
1315 combined and returned.
1316 """
-> 1317 return Store.render(self)
1318
1319
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\core\options.py in render(cls, obj)
1403 data, metadata = {}, {}
1404 for hook in hooks:
-> 1405 ret = hook(obj)
1406 if ret is None:
1407 continue
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in pprint_display(obj)
280 if not ip.display_formatter.formatters['text/plain'].pprint:
281 return None
--> 282 return display(obj, raw_output=True)
283
284
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in display(obj, raw_output, **kwargs)
250 elif isinstance(obj, (CompositeOverlay, ViewableElement)):
251 with option_state(obj):
--> 252 output = element_display(obj)
253 elif isinstance(obj, (Layout, NdLayout, AdjointLayout)):
254 with option_state(obj):
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in wrapped(element)
144 try:
145 max_frames = OutputSettings.options['max_frames']
--> 146 mimebundle = fn(element, max_frames=max_frames)
147 if mimebundle is None:
148 return {}, {}
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in element_display(element, max_frames)
190 return None
191
--> 192 return render(element)
193
194
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\ipython\display_hooks.py in render(obj, **kwargs)
66 renderer = renderer.instance(fig='png')
67
---> 68 return renderer.components(obj, **kwargs)
69
70
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\renderer.py in components(self, obj, fmt, comm, **kwargs)
408 doc = Document()
409 with config.set(embed=embed):
--> 410 model = plot.layout._render_model(doc, comm)
411 if embed:
412 return render_model(model, comm)
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\viewable.py in _render_model(self, doc, comm)
453 if comm is None:
454 comm = state._comm_manager.get_server_comm()
--> 455 model = self.get_root(doc, comm)
456
457 if config.embed:
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\viewable.py in get_root(self, doc, comm, preprocess)
510 """
511 doc = init_doc(doc)
--> 512 root = self._get_model(doc, comm=comm)
513 if preprocess:
514 self._preprocess(root)
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\layout\base.py in _get_model(self, doc, root, parent, comm)
120 if root is None:
121 root = model
--> 122 objects = self._get_objects(model, [], doc, root, comm)
123 props = dict(self._init_params(), objects=objects)
124 model.update(**self._process_param_change(props))
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\layout\base.py in _get_objects(self, model, old_objects, doc, root, comm)
110 else:
111 try:
--> 112 child = pane._get_model(doc, root, model, comm)
113 except RerenderError:
114 return self._get_objects(model, current_objects[:i], doc, root, comm)
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\pane\holoviews.py in _get_model(self, doc, root, parent, comm)
237 plot = self.object
238 else:
--> 239 plot = self._render(doc, comm, root)
240
241 plot.pane = self
~\miniconda3\envs\mpess_visual\lib\site-packages\panel\pane\holoviews.py in _render(self, doc, comm, root)
304 kwargs['comm'] = comm
305
--> 306 return renderer.get_plot(self.object, **kwargs)
307
308 def _cleanup(self, root):
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\bokeh\renderer.py in get_plot(self_or_cls, obj, doc, renderer, **kwargs)
71 combining the bokeh model with another plot.
72 """
---> 73 plot = super(BokehRenderer, self_or_cls).get_plot(obj, doc, renderer, **kwargs)
74 if plot.document is None:
75 plot.document = Document() if self_or_cls.notebook_context else curdoc()
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\renderer.py in get_plot(self_or_cls, obj, doc, renderer, comm, **kwargs)
241 init_key = tuple(v if d is None else d for v, d in
242 zip(plot.keys[0], defaults))
--> 243 plot.update(init_key)
244 else:
245 plot = obj
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\plot.py in update(self, key)
980 def update(self, key):
981 if len(self) == 1 and ((key == 0) or (key == self.keys[0])) and not self.drawn:
--> 982 return self.initialize_plot()
983 item = self.__getitem__(key)
984 self.traverse(lambda x: setattr(x, '_updated', True))
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\bokeh\element.py in initialize_plot(self, ranges, plot, plots, source)
1388 element = self.hmap.last
1389 key = util.wrap_tuple(self.hmap.last_key)
-> 1390 ranges = self.compute_ranges(self.hmap, key, ranges)
1391 self.current_ranges = ranges
1392 self.current_frame = element
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\plot.py in compute_ranges(self, obj, key, ranges)
636 if (not (axiswise and not isinstance(obj, HoloMap)) or
637 (not framewise and isinstance(obj, HoloMap))):
--> 638 self._compute_group_range(group, elements, ranges, framewise,
639 axiswise, robust, self.top_level,
640 prev_frame)
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\plot.py in _compute_group_range(cls, group, elements, ranges, framewise, axiswise, robust, top_level, prev_frame)
853 continue
854 matching &= (
--> 855 len({'date' if isinstance(v, util.datetime_types) else 'number'
856 for rng in rs for v in rng if util.isfinite(v)}) < 2
857 )
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\plotting\plot.py in <setcomp>(.0)
854 matching &= (
855 len({'date' if isinstance(v, util.datetime_types) else 'number'
--> 856 for rng in rs for v in rng if util.isfinite(v)}) < 2
857 )
858 if matching:
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\core\util.py in isfinite(val)
902 return finite
903 elif isinstance(val, datetime_types+timedelta_types):
--> 904 return not isnat(val)
905 elif isinstance(val, (basestring, bytes)):
906 return True
~\miniconda3\envs\mpess_visual\lib\site-packages\holoviews\core\util.py in isnat(val)
866 elif pd and val is pd.NaT:
867 return True
--> 868 elif pd and isinstance(val, pandas_datetime_types+pandas_timedelta_types):
869 return pd.isna(val)
870 else:
NameError: name 'pandas_datetime_types' is not defined
Any suggestions? Thank you
Although I couldn't find any official doc to support my statement, it's a compatibility issue (HoloViews 1.14.4 was released before Pandas 1.3.0).
Looking at [gitHub]: holoviz/holoviews - (v1.14.4) holoviews/holoviews/core/util.py (starting with line #83), there are some conditional imports. One of them is ABCIndexClass.
[GitHub]: pandas-dev/pandas - (v1.3.0) pandas/pandas/core/dtypes/dtypes.py on the other hand, does not provide it (as opposed from let's say its v1.2.5 counterpart) yielding (silent) exception, and the behavior you're experiencing.
Ways to go:
Upgrade HoloViews to v1.14.5 which no longer has this problem, (or at least, there's a Pandas 1.3.0 conditional as well - fixed by [GitHub]: holoviz/holoviews - Add support for pandas>=1.3)
You could also downgrade Pandas to (e.g.) v1.2.5, although this is not the way to go

Trackpy tp.batch() gives generator already executing error

I am trying to track some particles in a video using trackpy.
I'm following the walkthrough from the website:
http://soft-matter.github.io/trackpy/v0.4.2/tutorial/walkthrough.html
After processing a few frames (usually around 14 frames, sometimes 0), it gives me a Value error that sais: "generator already executing"
I cannot figure out how to solve this issue, I hope someone does.
Python: 3.9.4
Trackpy: 0.5.0
The full error:
ValueError Traceback (most recent call last)
<ipython-input-8-ff6dcf7a7595> in <module>
----> 1 f = tp.batch(frames[100:300], masksize, minmass=minmass, invert=True);
~\.conda\envs\trackpyenv\lib\site-packages\trackpy\feature.py in batch(frames, diameter, output, meta, processes, after_locate, **kwargs)
556 all_features = []
557 for i, features in enumerate(map_func(curried_locate, frames)):
--> 558 image = frames[i]
559 if hasattr(image, 'frame_no') and image.frame_no is not None:
560 frame_no = image.frame_no
~\.conda\envs\trackpyenv\lib\site-packages\slicerator\__init__.py in __getitem__(self, key)
234 if not (isinstance(key, slice) or
235 isinstance(key, collections.Iterable)):
--> 236 return self._get(self._map_index(key))
237 else:
238 rel_indices, new_length = key_to_indices(key, len(self))
~\.conda\envs\trackpyenv\lib\site-packages\slicerator\__init__.py in _get(self, key)
205
206 def _get(self, key):
--> 207 return self._ancestor[key]
208
209 def _map_index(self, key):
~\.conda\envs\trackpyenv\lib\site-packages\slicerator\__init__.py in __getitem__(self, i)
478 indices, new_length = key_to_indices(i, len(self))
479 if new_length is None:
--> 480 return self._get(indices)
481 else:
482 return Slicerator(self, indices, new_length, self._propagate_attrs)
~\.conda\envs\trackpyenv\lib\site-packages\slicerator\__init__.py in _get(self, key)
459 # We need to copy here: else any _proc_func that acts inplace would
460 # change the ancestor value.
--> 461 return self._proc_func(*(copy(a[key]) for a in self._ancestors))
462
463 def __repr__(self):
~\.conda\envs\trackpyenv\lib\site-packages\slicerator\__init__.py in <genexpr>(.0)
459 # We need to copy here: else any _proc_func that acts inplace would
460 # change the ancestor value.
--> 461 return self._proc_func(*(copy(a[key]) for a in self._ancestors))
462
463 def __repr__(self):
~\.conda\envs\trackpyenv\lib\site-packages\slicerator\__init__.py in __getitem__(self, i)
186 indices, new_length = key_to_indices(i, len(self))
187 if new_length is None:
--> 188 return self._get(indices)
189 else:
190 return cls(self, indices, new_length, propagate_attrs)
~\.conda\envs\trackpyenv\lib\site-packages\pims\base_frames.py in __getitem__(self, key)
96 """__getitem__ is handled by Slicerator. In all pims readers, the data
97 returning function is get_frame."""
---> 98 return self.get_frame(key)
99
100 def __iter__(self):
~\.conda\envs\trackpyenv\lib\site-packages\pims\base_frames.py in get_frame(self, i)
590 coords.update(**{k: v for k, v in zip(self.iter_axes, iter_coords)})
591
--> 592 result = self._get_frame_wrapped(**coords)
593 if hasattr(result, 'metadata'):
594 metadata = result.metadata
~\.conda\envs\trackpyenv\lib\site-packages\pims\imageio_reader.py in get_frame_2D(self, **coords)
100 def get_frame_2D(self, **coords):
101 i = coords['t'] if 't' in coords else 0
--> 102 frame = self.reader.get_data(i)
103 return Frame(frame, frame_no=i, metadata=frame.meta)
104
~\.conda\envs\trackpyenv\lib\site-packages\imageio\core\format.py in get_data(self, index, **kwargs)
344 self._BaseReaderWriter_last_index = index
345 try:
--> 346 im, meta = self._get_data(index, **kwargs)
347 except StopIteration:
348 raise IndexError(index)
~\.conda\envs\trackpyenv\lib\site-packages\imageio\plugins\ffmpeg.py in _get_data(self, index)
379 else:
380 if (index < self._pos) or (index > self._pos + 100):
--> 381 self._initialize(index)
382 else:
383 self._skip_frames(index - self._pos - 1)
~\.conda\envs\trackpyenv\lib\site-packages\imageio\plugins\ffmpeg.py in _initialize(self, index)
393 # Close the current generator, and thereby terminate its subprocess
394 if self._read_gen is not None:
--> 395 self._read_gen.close()
396
397 iargs = []
ValueError: generator already executing
I stumbled on the same (or similar) issue.
The root cause seems to be trying to use more than one process to execute the batch code, while some internal function call isn't thread-safe.
A workaround would be to disable multi-processes by calling batch with processes=1, e.g.:
f = tp.batch(frames[100:300], masksize, minmass=minmass, invert=True, processes=1);
See trackpy.batch for reference.
Calling it a workaround as this would cause the code to execute serially, one frame at a time. Then again, better than not executing at all...

How to use .map on an integer column in python pandas

I'm trying to take an integer column and map discrete values to another column. Basically, if a credit tier is marked, 1, 2, 3, antoher column maps those to no credit state, no hit or thin files. Then fill the null values with vaild. I tried However, I keep getting this error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-129-926e6625f2b6> in <module>
1 #train.dtypes
----> 2 df['discrete_52278'] = df.apply(lambda row: discrete_credit(row, 'credit_52278'), axis = 1)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6012 args=args,
6013 kwds=kwds)
-> 6014 return op.get_result()
6015
6016 def applymap(self, func):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
140 return self.apply_raw()
141
--> 142 return self.apply_standard()
143
144 def apply_empty_result(self):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
246
247 # compute the result using the series generator
--> 248 self.apply_series_generator()
249
250 # wrap results
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
275 try:
276 for i, v in enumerate(series_gen):
--> 277 results[i] = self.f(v)
278 keys.append(v.name)
279 except Exception as e:
<ipython-input-129-926e6625f2b6> in <lambda>(row)
1 #train.dtypes
----> 2 df['discrete_52278'] = df.apply(lambda row: discrete_credit(row, 'credit_52278'), axis = 1)
<ipython-input-126-462888d46184> in discrete_credit(row, variable)
6
7 """
----> 8 score = row[variable].map({1:'no_credit_state', 2:'thin_file', 3:"no_hit"})
9 score = row[score].fillna('valid')
10 score = pd.Categorical(row[score], ['valid', 'no_credit_state','thin_file', 'no_hit'])
AttributeError: ("'numpy.int64' object has no attribute 'map'", 'occurred at index 0')
Here is a code example that is throwing the same error:
import pandas as pd
credit = {'credit_52278':[1,2,3,500,550,600,650,700,750,800,900]
}
df = pd.DataFrame(credit)
def discrete_credit(row, variable):
"""
allows thin files, no hits and no credit scores to float which will then allow the rest of the credit score to be fit \
with a spline
"""
score = row[variable].map({1:'no_credit_state', 2:'thin_file', 3:"no_hit"})
score = row[score].fillna('valid')
score = pd.Categorical(row[score], ['valid', 'no_credit_state','thin_file', 'no_hit'])
return score
df['discrete_52278'] = df.apply(lambda row: discrete_credit(row, 'credit_52278'), axis = 1)
map is a Series method, but you are trying to use it on a scalar (float) value.
You could simply do something like:
df['discrete_52278'] = (
df['credit_52278']
.map({
1: 'no_credit_state',
2: 'thin_file',
3: 'no_hit'
})
.fillna('valid')
.astype('category')
)

How to clean sms data messsages extracted from mobile phone. I want to do this with Python-Pandas

How to clean sms data messages extracted from mobile phone. I want to do this with Python-Pandas.
I need to clean data from sms messages,and I want to extract body of the message and exclude square brackets.
Example of sms message is:
' <sms protocol="0" address="+14242380303" date="1407256816998" type="1" subject="null" body="ChatON : 3630 Message is sent from the ChatON service." toa="null" sc_toa="null" service_center="null" read="1" status="-1" locked="0" date_sent="0" readable_date="5. kol 2014. 06:40:16 PM" contact_name="(Unknown)" />'
I use this code to extract body of the message.
func = lambda x: re.findall('(?<=\[)[^]]+(?=\])', x)
df=df.applymap(func)
This is the DataFrame with 'body column' which I want to clean.
Text
2 [Ok]
3 [Ok]
4 [Ok]
5 [U sedam u Dramaru kafa]
6 [Ok]
And I get this error
TypeError Traceback (most recent call last)
<ipython-input-10-f8a330ca2fe3> in <module>
1 func = lambda x: re.findall('(?<=\[)[^]]+(?=\])', x)
----> 2 df=df.applymap(func)
3
4
~/.local/lib/python3.6/site-packages/pandas/core/frame.py in applymap(self, func)
6070 return lib.map_infer(x.astype(object).values, func)
6071
-> 6072 return self.apply(infer)
6073
6074 # ----------------------------------------------------------------------
~/.local/lib/python3.6/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6012 args=args,
6013 kwds=kwds)
-> 6014 return op.get_result()
6015
6016 def applymap(self, func):
~/.local/lib/python3.6/site-packages/pandas/core/apply.py in get_result(self)
316 *self.args, **self.kwds)
317
--> 318 return super(FrameRowApply, self).get_result()
319
320 def apply_broadcast(self):
~/.local/lib/python3.6/site-packages/pandas/core/apply.py in get_result(self)
140 return self.apply_raw()
141
--> 142 return self.apply_standard()
143
144 def apply_empty_result(self):
~/.local/lib/python3.6/site-packages/pandas/core/apply.py in apply_standard(self)
246
247 # compute the result using the series generator
--> 248 self.apply_series_generator()
249
250 # wrap results
~/.local/lib/python3.6/site-packages/pandas/core/apply.py in apply_series_generator(self)
275 try:
276 for i, v in enumerate(series_gen):
--> 277 results[i] = self.f(v)
278 keys.append(v.name)
279 except Exception as e:
~/.local/lib/python3.6/site-packages/pandas/core/frame.py in infer(x)
6068 if x.empty:
6069 return lib.map_infer(x, func)
-> 6070 return lib.map_infer(x.astype(object).values, func)
6071
6072 return self.apply(infer)
pandas/_libs/src/inference.pyx in pandas._libs.lib.map_infer()
<ipython-input-10-f8a330ca2fe3> in <lambda>(x)
----> 1 func = lambda x: re.findall('(?<=\[)[^]]+(?=\])', x)
2 df=df.applymap(func)
3
4
/usr/lib/python3.6/re.py in findall(pattern, string, flags)
220
221 Empty matches are included in the result."""
--> 222 return _compile(pattern, flags).findall(string)
223
224 def finditer(pattern, string, flags=0):
TypeError: ('expected string or bytes-like object', 'occurred at index Text')
Your Text values are lists of strings, not strings. Best to first extract them. If all lists have only one message you can call
df['Text'] = df['Text'].apply(lambda x: x[0] if len(x) > 0 else x)

ValueError when using .diff() with dask dataframe

I have a large time series data set which I want to process with Dask.
apart from a few other columns, there is a column called 'id' which identifies individuals and a column transc_date which identifies the date and a column transc_time identifying the time when an individual made a transaction.
The data is sorted using:
df = df.map_partitions(lambda x: x.sort_values(['id', 'transc_date', 'transc_time'], ascending=[True, True, True]))
transc_time is of type int and transc_date is of type datetime64.
I want to create a new column which gives me for each individual the number of days since the last transaction. For this I created the following function:
def get_diff_since_last_trans(df, plot=True):
df['diff_last'] = df.map_overlap(lambda x: x.groupby('id')['transc_date'].diff(), before=10, after=10)
diffs = df[['id', 'diff_last']].groupby(['id']).agg('max')['diff_last'].dt.days.compute()
if plot:
sns.distplot(diffs.values, kde = False, rug = False)
return diffs
When I try this function on a small subset of the data (200k rows) it works as intended. But when I use it on the full data set I get a ValueErro below.
I dropped all ids which have fewer than 10 occurrences first. transc_date does not contain nans, it only contains datetime64 entries.
Any idea what's going wrong?
ValueError Traceback (most recent call last)
<ipython-input-12-551d7256f328> in <module>()
1 a = get_diff_first_last_trans(df, plot=False)
----> 2 b = get_diff_since_last_trans(df, plot=False)
3 plot_trans_diff(a,b)
<ipython-input-10-8f83d4571659> in get_diff_since_last_trans(df, plot)
12 def get_diff_since_last_trans(df, plot=True):
13 df['diff_last'] = df.map_overlap(lambda x: x.groupby('id')['transc_date'].diff(), before=10, after=10)
---> 14 diffs = df[['id', 'diff_last']].groupby(['id']).agg('max')['diff_last'].dt.days.compute()
15 if plot:
16 sns.distplot(diffs.values, kde = False, rug = False)
~/venv/lib/python3.6/site-packages/dask/base.py in compute(self, **kwargs)
133 dask.base.compute
134 """
--> 135(result,)= compute(self, traverse=False,**kwargs) 136return result
137
~/venv/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
331 postcomputes = [a.__dask_postcompute__() if is_dask_collection(a)
332 else (None, a) for a in args]
--> 333 results = get(dsk, keys, **kwargs)
334 results_iter = iter(results)
335 return tuple(a if f is None else f(next(results_iter), *a)
~/venv/lib/python3.6/site-packages/distributed/client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, **kwargs)
1997 secede()
1998 try:
-> 1999 results = self.gather(packed, asynchronous=asynchronous)
2000 finally:
2001 for f in futures.values():
~/venv/lib/python3.6/site-packages/distributed/client.py in gather(self, futures, errors, maxsize, direct, asynchronous)
1435 return self.sync(self._gather, futures, errors=errors,
1436 direct=direct, local_worker=local_worker,
-> 1437 asynchronous=asynchronous)
1438
1439 #gen.coroutine
~/venv/lib/python3.6/site-packages/distributed/client.py in sync(self, func, *args, **kwargs)
590 return future
591 else:
--> 592return sync(self.loop, func,*args,**kwargs) 593 594def __repr__(self):
~/venv/lib/python3.6/site-packages/distributed/utils.py in sync(loop, func, *args, **kwargs)
252 e.wait(1000000)
253 if error[0]:
--> 254 six.reraise(*error[0])
255 else:
256 return result[0]
~/venv/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693raise value
694finally: 695 value =None
~/venv/lib/python3.6/site-packages/distributed/utils.py in f()
236 yield gen.moment
237 thread_state.asynchronous = True
--> 238 result[0] = yield make_coro()
239 except Exception as exc:
240 logger.exception(exc)
~/venv/lib/python3.6/site-packages/tornado/gen.py in run(self)
1053
1054 try:
-> 1055 value = future.result()
1056 except Exception:
1057 self.had_exception = True
~/venv/lib/python3.6/site-packages/tornado/concurrent.py in result(self, timeout)
236 if self._exc_info is not None:
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
240 self = None
~/venv/lib/python3.6/site-packages/tornado/util.py in raise_exc_info(exc_info)
~/venv/lib/python3.6/site-packages/tornado/gen.py in run(self)
1061 if exc_info is not None:
1062 try:
-> 1063 yielded = self.gen.throw(*exc_info)
1064 finally:
1065 # Break up a reference to itself
~/venv/lib/python3.6/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
1313 six.reraise(type(exception),
1314 exception,
-> 1315 traceback)
1316 if errors == 'skip':
1317 bad_keys.add(key)
~/venv/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
690 value = tp()
691 if value.__traceback__ is not tb:
--> 692raise value.with_traceback(tb) 693raise value
694finally:
~/venv/lib/python3.6/site-packages/dask/dataframe/rolling.py in overlap_chunk()
30 parts = [p for p in (prev_part, current_part, next_part) if p is not None]
31 combined = pd.concat(parts)
---> 32 out = func(combined, *args, **kwargs)
33 if prev_part is None:
34 before = None
<ipython-input-10-8f83d4571659> in <lambda>()
11
12 def get_diff_since_last_trans(df, plot=True):
---> 13 df['diff_last'] = df.map_overlap(lambda x: x.groupby('id')['transc_date'].diff(), before=10, after=10)
14 diffs = df[['id', 'diff_last']].groupby(['id']).agg('max')['diff_last'].dt.days.compute()
15 if plot:
~/venv/lib/python3.6/site-packages/pandas/core/groupby.py in wrapper()
737 *args, **kwargs)
738 except (AttributeError):
--> 739raise ValueError
740 741return wrapper
ValueError:

Categories

Resources