Value Error: Couldn't Convert String to Float - python

I have two input spreadsheets.
Sheet 1 has 7 columns and 3 rows
/ FID / Total / A1 / B1 / A2 / B2
1 / 1 / 0.720168405 / 0.635589112 / XXX / 0.031112358 / YYY
1 / 2 / 0.760438562 / 0.328168557 / YYY / 0.311172576 / ZZZ
Sheet 2 has 2 columns and 4 rows
/ 0
XXX / 0.55
YYY / 0.52
ZZZ / 0.35
This is the code:
import pandas as pd
df = pd.read_excel("C:/Users/Sheet1.xls")
df2 = pd.read_excel("C:/Users/Sheet2.xlsx")
dictionary = df2.to_dict(orient='dict')
b = df.filter(like ='A').values
c = df.filter(like ='B').replace(dictionary[0]).astype(float).values
df['AA'] = ((c * b).sum(axis =1))
df['BB'] = df.AA / df.Total
def custom_round(x, base=5):
return base * round(float(x)/base)
df['C'] = df['BB'].apply(lambda x: custom_round(x, base=.05))
df['C'] = "X = " + df['C'].apply(lambda s: '{:,.2f}'.format(s))
df.to_excel("C:/Users/Results.xlsx")
print(df)
I have got error message: Value Error Couldn't Convert String to Float: XXX
ValueError Traceback (most recent call last)
<ipython-input-1-f42c7cb99da5> in <module>()
8
9 b = df.filter(like ='A').values
---> 10 c = df.filter(like ='B').replace(dictionary[0]).astype(float).values
11
12 df['AA'] = ((c * b).sum(axis =1))
C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\generic.pyc in astype(self, dtype, copy, errors, **kwargs)
5689 # else, only a single dtype is given
5690 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5691 **kwargs)
5692 return self._constructor(new_data).__finalize__(self)
5693
C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\internals\managers.pyc in astype(self, dtype, **kwargs)
529
530 def astype(self, dtype, **kwargs):
--> 531 return self.apply('astype', dtype=dtype, **kwargs)
532
533 def convert(self, **kwargs):
C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\internals\managers.pyc in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
393 copy=align_copy)
394
--> 395 applied = getattr(b, f)(**kwargs)
396 result_blocks = _extend_blocks(applied, result_blocks)
397
C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\internals\blocks.pyc in astype(self, dtype, copy, errors, values, **kwargs)
532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
533 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534 **kwargs)
535
536 def _astype(self, dtype, copy=False, errors='raise', values=None,
C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\internals\blocks.pyc in _astype(self, dtype, copy, errors, values, **kwargs)
631
632 # _astype_nansafe works fine with 1-d only
--> 633 values = astype_nansafe(values.ravel(), dtype, copy=True)
634
635 # TODO(extension)
C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\dtypes\cast.pyc in astype_nansafe(arr, dtype, copy, skipna)
700 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
701 # Explicit copy, or required since NumPy can't view from / to object.
--> 702 return arr.astype(dtype, copy=True)
703
704 return arr.view(dtype)
ValueError: could not convert string to float: XXX

I see in the 6th line of your code you are trying to replace some set in a dataframe (XXX, YYY,.. to 0.55, 0.52, ..). But you end up supplying a dictionary
{0:55, 1:52,..} where the keys are actually array indices.
I have changed your sheet 2 header for easier indexing like
0 / 1
XXX / 0.55
YYY / 0.52
ZZZ / 0.35
and set index using existing column 0 by replacing your line 4 as,
dictionary = df2.set_index(0)[1].to_dict()
and your line 6 as,
c = df.filter(like ='B').replace(dictionary).astype(float).values
This supplied proper dictionary to replace the dataframes.

Related

Replace multiple "less than values" in different columns in pandas dataframe

I am working with python and pandas. I have a dataset of lab analysis where I am dealing with multiple parameters and detection limits(dl). Many of the samples are reported as below the dl (e.g.<dl,<4)
For example:
import pandas as pd
df=pd.DataFrame([['<4','88.72','<0.09'],['<1','5','<0.09'],['2','17.6','<0.09']], columns=['var_1','var_2','var_3'])
df
My goal is to replace all <dl with dl/2 as a float value.
I can do this for one column pretty easily.
df['var_3'] = df.var_3.str.replace('<' ,'').astype(float)
df['var_3'] = df['var_3'].apply(lambda x: x/2 if x == 0.09 else x)
df
but this requires me looking at the dl and inputting it.
I would like to streamline it to apply it across all variables with one or more detection limits per variable as I have many variables and the detection limit will not always be constant from data frame to data frame this is applied to.
I found something similar in R but not sure how to apply it in python. Any solutions would be appreciated.
Update
So the
df=df.replace(r'<(.*)', r'\1/2', regex=True).apply(pd.eval)
works well with dataframe with just columns with numbers. I assume that is a limitation of the eval function. For some reason I can get the code to work on smaller dataframes but after I concatenate them the code will not work on the larger dataframe and I get this error message:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/var/folders/9_/w2qcdj_x2x5852py8xl6b0sh0000gn/T/ipykernel_9403/3946462310.py in <module>
----> 1 MS=MS.replace(r'<(.*)', r'\1/2', regex=True).apply(pd.eval)
~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/frame.py in apply(self, func, axis, raw, result_type, args, **kwargs)
8738 kwargs=kwargs,
8739 )
-> 8740 return op.apply()
8741
8742 def applymap(
~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/apply.py in apply(self)
686 return self.apply_raw()
687
--> 688 return self.apply_standard()
689
690 def agg(self):
~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/apply.py in apply_standard(self)
810
811 def apply_standard(self):
--> 812 results, res_index = self.apply_series_generator()
813
814 # wrap results
~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/apply.py in apply_series_generator(self)
826 for i, v in enumerate(series_gen):
827 # ignore SettingWithCopy here in case the user mutates
--> 828 results[i] = self.f(v)
829 if isinstance(results[i], ABCSeries):
830 # If we have a view on v, we need to make a copy because
~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/computation/eval.py in eval(expr, parser, engine, truediv, local_dict, global_dict, resolvers, level, target, inplace)
351 eng = ENGINES[engine]
352 eng_inst = eng(parsed_expr)
--> 353 ret = eng_inst.evaluate()
354
355 if parsed_expr.assigner is None:
~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/computation/engines.py in evaluate(self)
78
79 # make sure no names in resolvers and locals/globals clash
---> 80 res = self._evaluate()
81 return reconstruct_object(
82 self.result_type, res, self.aligned_axes, self.expr.terms.return_type
~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/computation/engines.py in _evaluate(self)
119 scope = env.full_scope
120 _check_ne_builtin_clash(self.expr)
--> 121 return ne.evaluate(s, local_dict=scope)
122
123
~/opt/anaconda3/lib/python3.9/site-packages/numexpr/necompiler.py in evaluate(ex, local_dict, global_dict, out, order, casting, **kwargs)
821
822 # Create a signature
--> 823 signature = [(name, getType(arg)) for (name, arg) in
824 zip(names, arguments)]
825
~/opt/anaconda3/lib/python3.9/site-packages/numexpr/necompiler.py in <listcomp>(.0)
821
822 # Create a signature
--> 823 signature = [(name, getType(arg)) for (name, arg) in
824 zip(names, arguments)]
825
~/opt/anaconda3/lib/python3.9/site-packages/numexpr/necompiler.py in getType(a)
703 if kind == 'U':
704 raise ValueError('NumExpr 2 does not support Unicode as a dtype.')
--> 705 raise ValueError("unknown type %s" % a.dtype.name)
706
707
ValueError: unknown type object
Use replace instead str.replace than eval all expressions:
>>> df.replace(r'<(.*)', r'\1/2', regex=True).apply(pd.eval)
var_1 var_2 var_3
0 2.0 88.72 0.045
1 0.5 5.00 0.045
2 2.0 17.60 0.045
\1 will be replace by the first capture group .*
Update
Alternative:
out = df.melt(ignore_index=False)
m = out['value'].str.startswith('<')
out.loc[m, 'value'] = out.loc[m, 'value'].str.strip('<').astype(float) / 2
out = out.reset_index().pivot('index', 'variable', 'value') \
.rename_axis(index=None, columns=None)
Output:
>>> out
var_1 var_2 var_3
0 2.0 88.72 0.045
1 0.5 5 0.045
2 2 17.6 0.045
Update
Alternative using melt to flatten your dataframe and pivot to reshape to your original dataframe:
df1 = df.melt(ignore_index=False)
m = df1['value'].str.startswith('<')
df1['value'] = df1['value'].mask(~m).str[1:].astype(float).div(2) \
.fillna(df1['value']).astype(float)
df1 = df1.reset_index().pivot_table('value', 'index', 'variable') \
.rename_axis(index=None, columns=None)
Output:
>>> df1
var_1 var_2 var_3
0 2.0 88.72 0.045
1 0.5 5.00 0.045
2 2.0 17.60 0.045

Eliminate characters in a string and convert it in float64 with Pandas

fellows,
I am trying to eliminate some characters in a Object column in a Pandas dataframe, in order to convert it to int64 and make some calculations. The specific column has population (column "populacaoTCU2019") values but, as some values were listed wrongly as 10.320(2), 23.320(14), 43.223(23), etc, I need to eliminate the "." (which I think it can be done with str.replace(".","")). But the values in the parenthesis are more complicate, as the values vary through the rows. Can anyone give me a help on this matter?
<class 'pandas.core.frame.DataFrame'>
Int64Index: 617658 entries, 4145 to 624062
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
9 populacaoTCU2019 617658 non-null object
dtypes: datetime64[ns](1), float64(5), int64(6), object(5)
memory usage: 84.8+ MB
And I get the error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-23-4b87875ecf91> in <module>
1 #vamos criar uma coluna nova para indicar o valor dividido pela população. Mas precisa converter os dados de objet e int64 em float64
2 #Para as cidades
----> 3 df_cidades['novos_obitos_rel'] = 100000*df_cidades['obitosNovos'].astype('float64')/df_cidades['populacaoTCU2019'].astype('float64')
4 df_cidades['obitos_acum_rel'] = 100000*df_cidades['obitosAcumulado'].astype('float64')/df_cidades['populacaoTCU2019'].astype('float64')
~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors)
5696 else:
5697 # else, only a single dtype is given
-> 5698 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors)
5699 return self._constructor(new_data).__finalize__(self)
5700
~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in astype(self, dtype, copy, errors)
580
581 def astype(self, dtype, copy: bool = False, errors: str = "raise"):
--> 582 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
583
584 def convert(self, **kwargs):
~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in apply(self, f, filter, **kwargs)
440 applied = b.apply(f, **kwargs)
441 else:
--> 442 applied = getattr(b, f)(**kwargs)
443 result_blocks = _extend_blocks(applied, result_blocks)
444
~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors)
623 vals1d = values.ravel()
624 try:
--> 625 values = astype_nansafe(vals1d, dtype, copy=True)
626 except (ValueError, TypeError):
627 # e.g. astype_nansafe can fail on object-dtype of strings
~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
895 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
896 # Explicit copy, or required since NumPy can't view from / to object.
--> 897 return arr.astype(dtype, copy=True)
898
899 return arr.view(dtype)
ValueError: could not convert string to float: '32.105(2)'

Python Pandas Style to every nth row

I'm working on a Python project w/ Pandas and looking to implement a style to every Nth row. I've been able to select every Nth row using iloc but cannot get the style to work with a basic function. Here's my example in context:
data = [[1,2,3],[2,3,4],[3,4,5],[4,5,6]]
df = pd.DataFrame(data)
df
0 1 2
0 1 2 3
1 2 3 4
2 3 4 5
3 4 5 6
df.iloc[1::2, :]
0 1 2
1 2 3 4
3 4 5 6
At this point everything returns as normal, but when applying the below function, I receive a too many indexes error which I can't seem to resolve
def highlight_everyother(s):
if s.iloc[1::2, :]:
return ['background-color: yellow']*3
df.style.apply(highlight_everyother, axis=1)
ERROR:
IndexingError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
~\Anaconda3\lib\site-packages\pandas\io\formats\style.py in _repr_html_(self)
180 Hooks into Jupyter notebook rich display system.
181 """
--> 182 return self.render()
183
184 #Appender(
~\Anaconda3\lib\site-packages\pandas\io\formats\style.py in render(self, **kwargs)
535 * table_attributes
536 """
--> 537 self._compute()
538 # TODO: namespace all the pandas keys
539 d = self._translate()
~\Anaconda3\lib\site-packages\pandas\io\formats\style.py in _compute(self)
610 r = self
611 for func, args, kwargs in self._todo:
--> 612 r = func(self)(*args, **kwargs)
613 return r
614
~\Anaconda3\lib\site-packages\pandas\io\formats\style.py in _apply(self, func, axis, subset, **kwargs)
618 data = self.data.loc[subset]
619 if axis is not None:
--> 620 result = data.apply(func, axis=axis, result_type="expand", **kwargs)
621 result.columns = data.columns
622 else:
~\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
6876 kwds=kwds,
6877 )
-> 6878 return op.get_result()
6879
6880 def applymap(self, func) -> "DataFrame":
~\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
184 return self.apply_raw()
185
--> 186 return self.apply_standard()
187
188 def apply_empty_result(self):
~\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
311
312 # compute the result using the series generator
--> 313 results, res_index = self.apply_series_generator()
314
315 # wrap results
~\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
339 else:
340 for i, v in enumerate(series_gen):
--> 341 results[i] = self.f(v)
342 keys.append(v.name)
343
<ipython-input-49-a5b996f8d6c8> in highlight_everyother(s)
11
12 def highlight_everyother(s):
---> 13 if s.iloc[1::2, :]:
14 return ['background-color: yellow']*3
15
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
1760 except (KeyError, IndexError, AttributeError):
1761 pass
-> 1762 return self._getitem_tuple(key)
1763 else:
1764 # we by definition only have the 0th axis
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup)
2065 def _getitem_tuple(self, tup: Tuple):
2066
-> 2067 self._has_valid_tuple(tup)
2068 try:
2069 return self._getitem_lowerdim(tup)
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in _has_valid_tuple(self, key)
699 for i, k in enumerate(key):
700 if i >= self.ndim:
--> 701 raise IndexingError("Too many indexers")
702 try:
703 self._validate_key(k, i)
IndexingError: Too many indexers
Any help would be appreciated. Thank you.
I would apply on axis=0 in case df is not index by rangeIndex:
def highlight_everyother(s):
return ['background-color: yellow; color:blue' if x%2==1 else ''
for x in range(len(s))]
df.style.apply(highlight_everyother)
Output:
You are passing one row at a time to highlight_everyother. That's why you were getting the error. The below should work.
def highlight_everyother(s):
if s.name%2==1:
return ['background-color: yellow']*3
else:
return ['background-color: white']*3
df.style.apply(highlight_everyother, axis=1)

Remove % symbol from all the rows of a column of dataframe and convert the entire column values into float

A column of a dataframe named 'int.rate' has values like: 11.26%, 13.67%,..... where I need to remove the '%' symbol from all the rows of the column 'int.rate' and convert entire column values into float. I have already tried all other codes mentioned but even they threw errors so I need to know the exact code for this task. Please help! Below is the code that I tried:
x = data['int.rate'].str.split('%').astype(float)
which gives
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-1-52f9e2c36b19> in <module>()
4
5 #Code starts here
----> 6 x = data['int.rate'].str.split('%').astype(float)
7 print(x)
/opt/greyatom/kernel-gateway/runtime-environments/python/lib/python3.6/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
176 else:
177 kwargs[new_arg_name] = new_arg_value
--> 178 return func(*args, **kwargs)
179 return wrapper
180 return _deprecate_kwarg
/opt/greyatom/kernel-gateway/runtime-environments/python/lib/python3.6/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
4999 # else, only a single dtype is given
5000 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5001 **kwargs)
5002 return self._constructor(new_data).__finalize__(self)
5003
/opt/greyatom/kernel-gateway/runtime-environments/python/lib/python3.6/site-packages/pandas/core/internals.py in astype(self, dtype, **kwargs)
3712
3713 def astype(self, dtype, **kwargs):
-> 3714 return self.apply('astype', dtype=dtype, **kwargs)
3715
3716 def convert(self, **kwargs):
/opt/greyatom/kernel-gateway/runtime-environments/python/lib/python3.6/site-packages/pandas/core/internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
3579
3580 kwargs['mgr'] = self
-> 3581 applied = getattr(b, f)(**kwargs)
3582 result_blocks = _extend_blocks(applied, result_blocks)
3583
/opt/greyatom/kernel-gateway/runtime-environments/python/lib/python3.6/site-packages/pandas/core/internals.py in astype(self, dtype, copy, errors, values, **kwargs)
573 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
574 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 575 **kwargs)
576
577 def _astype(self, dtype, copy=False, errors='raise', values=None,
/opt/greyatom/kernel-gateway/runtime-environments/python/lib/python3.6/site-packages/pandas/core/internals.py in _astype(self, dtype, copy, errors, values, klass, mgr, **kwargs)
662
663 # _astype_nansafe works fine with 1-d only
--> 664 values = astype_nansafe(values.ravel(), dtype, copy=True)
665 values = values.reshape(self.shape)
666
/opt/greyatom/kernel-gateway/runtime-environments/python/lib/python3.6/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy)
728
729 if copy:
--> 730 return arr.astype(dtype, copy=True)
731 return arr.view(dtype)
732
ValueError: setting an array element with a sequence.
split just split str, when you need to remove characters at ends of str you might use strip. Try doing:
x = data['int.rate'].str.strip('%').astype(float)
in place of:
x = data['int.rate'].str.split('%').astype(float)
Instead of split, use replace. See the demo code:
import pandas as pd
# initialize list of lists
data = [['tom', '10.2%'], ['nick', '15.7%'], ['juli', '14.67%']]
# Create the pandas DataFrame
df = pd.DataFrame(data, columns = ['Name', 'Interest'])
# print dataframe.
df
Name Interest
0 tom 10.2%
1 nick 15.7%
2 juli 14.67%
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Name 3 non-null object
1 Interest 3 non-null object
dtypes: object(2)
memory usage: 176.0+ bytes
df['Interest'] = df['Interest'].str.replace('%', '').astype(float)
df
Name Interest
0 tom 10.20
1 nick 15.70
2 juli 14.67
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Name 3 non-null object
1 Interest 3 non-null float64
dtypes: float64(1), object(1)
memory usage: 176.0+ bytes

Possible bug with `xarray.Dataset.groupby()`?

I'm using Xarray version 0.8.0, Python 3.5.1, on Mac OS X El Capitan 10.11.6.
The following code works as expected.
id_data_array = xarray.DataArray([280, 306, 280], coords={"index": range(3)})
random = numpy.random.rand(3)
score_data_array = xarray.DataArray(random, coords={"index": range(3)})
score_dataset = xarray.Dataset({"id": id_data_array, "score": score_data_array})
print(score_dataset)
print("======")
print(score_dataset.groupby("id").count())
Output:
<xarray.Dataset>
Dimensions: (index: 3)
Coordinates:
* index (index) int64 0 1 2
Data variables:
id (index) int64 280 306 280
score (index) float64 0.8358 0.7536 0.9495
======
<xarray.Dataset>
Dimensions: (id: 2)
Coordinates:
* id (id) int64 280 306
Data variables:
score (id) int64 2 1
In [ ]:
However, if I change just one little thing, to make the elements of id_data_array all distinct, then there is an error.
Code:
id_data_array = xarray.DataArray([280, 306, 120], coords={"index": range(3)})
random = numpy.random.rand(3)
score_data_array = xarray.DataArray(random, coords={"index": range(3)})
score_dataset = xarray.Dataset({"id": id_data_array, "score": score_data_array})
print(score_dataset)
print("======")
print(score_dataset.groupby("id").count())
Output:
<xarray.Dataset>
Dimensions: (index: 3)
Coordinates:
* index (index) int64 0 1 2
Data variables:
id (index) int64 280 306 120
score (index) float64 0.1353 0.0437 0.1687
======
---------------------------------------------------------------------------
InvalidIndexError Traceback (most recent call last)
<ipython-input-92-cc412270ba2e> in <module>()
5 print(score_dataset)
6 print("======")
----> 7 print(score_dataset.groupby("id").count())
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/common.py in wrapped_func(self, dim, keep_attrs, **kwargs)
44 return self.reduce(func, dim, keep_attrs,
45 numeric_only=numeric_only, allow_lazy=True,
---> 46 **kwargs)
47 return wrapped_func
48
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/groupby.py in reduce(self, func, dim, keep_attrs, **kwargs)
605 def reduce_dataset(ds):
606 return ds.reduce(func, dim, keep_attrs, **kwargs)
--> 607 return self.apply(reduce_dataset)
608
609 def assign(self, **kwargs):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/groupby.py in apply(self, func, **kwargs)
562 kwargs.pop('shortcut', None) # ignore shortcut if set (for now)
563 applied = (func(ds, **kwargs) for ds in self._iter_grouped())
--> 564 combined = self._concat(applied)
565 result = self._maybe_restore_empty_groups(combined)
566 return result
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/groupby.py in _concat(self, applied)
570 concat_dim, positions = self._infer_concat_args(applied_example)
571
--> 572 combined = concat(applied, concat_dim)
573 reordered = _maybe_reorder(combined, concat_dim, positions)
574 return reordered
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/combine.py in concat(objs, dim, data_vars, coords, compat, positions, indexers, mode, concat_over)
114 raise TypeError('can only concatenate xarray Dataset and DataArray '
115 'objects, got %s' % type(first_obj))
--> 116 return f(objs, dim, data_vars, coords, compat, positions)
117
118
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/combine.py in _dataset_concat(datasets, dim, data_vars, coords, compat, positions)
276 if coord is not None:
277 # add concat dimension last to ensure that its in the final Dataset
--> 278 result[coord.name] = coord
279
280 return result
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/dataset.py in __setitem__(self, key, value)
536 raise NotImplementedError('cannot yet use a dictionary as a key '
537 'to set Dataset values')
--> 538 self.update({key: value})
539
540 def __delitem__(self, key):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/dataset.py in update(self, other, inplace)
1434 dataset.
1435 """
-> 1436 variables, coord_names, dims = dataset_update_method(self, other)
1437
1438 return self._replace_vars_and_dims(variables, coord_names, dims,
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/merge.py in dataset_update_method(dataset, other)
490 priority_arg = 1
491 indexes = dataset.indexes
--> 492 return merge_core(objs, priority_arg=priority_arg, indexes=indexes)
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/merge.py in merge_core(objs, compat, join, priority_arg, explicit_coords, indexes)
371
372 coerced = coerce_pandas_values(objs)
--> 373 aligned = deep_align(coerced, join=join, copy=False, indexes=indexes)
374 expanded = expand_variable_dicts(aligned)
375
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/alignment.py in deep_align(list_of_variable_maps, join, copy, indexes)
146 out.append(variables)
147
--> 148 aligned = partial_align(*targets, join=join, copy=copy, indexes=indexes)
149
150 for key, aligned_obj in zip(keys, aligned):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/alignment.py in partial_align(*objects, **kwargs)
109 valid_indexers = dict((k, v) for k, v in joined_indexes.items()
110 if k in obj.dims)
--> 111 result.append(obj.reindex(copy=copy, **valid_indexers))
112 return tuple(result)
113
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/dataset.py in reindex(self, indexers, method, tolerance, copy, **kw_indexers)
1216
1217 variables = alignment.reindex_variables(
-> 1218 self.variables, self.indexes, indexers, method, tolerance, copy=copy)
1219 return self._replace_vars_and_dims(variables)
1220
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/alignment.py in reindex_variables(variables, indexes, indexers, method, tolerance, copy)
218 target = utils.safe_cast_to_index(indexers[name])
219 indexer = index.get_indexer(target, method=method,
--> 220 **get_indexer_kwargs)
221
222 to_shape[name] = len(target)
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pandas/indexes/base.py in get_indexer(self, target, method, limit, tolerance)
2080
2081 if not self.is_unique:
-> 2082 raise InvalidIndexError('Reindexing only valid with uniquely'
2083 ' valued Index objects')
2084
InvalidIndexError: Reindexing only valid with uniquely valued Index objects
To me this seems buggy because if this is the desired behaviour then it would be very strange. Surely, we should include the case when all the elements of the DataArray we're grouping by are distinct?
Update
I've now uninstalled and reinstalled Xarray. The new Xarray is version 0.8.1, and it seems to work fine. So it may indeed be a bug in Xarray 0.8.0.

Categories

Resources