ValueError: could not convert string to float: '571,2' - python

I wrote this code: df['Liquid Milk'] = df['Liquid Milk'].replace("", np.nan).astype('float64')
I got an error below, not sure where is an error, have tried many different way, but still same error. Any help, appreciated.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-52-607dcacd5a1a> in <module>
----> 1 m['Liquid Milk(Mil Litres)']=m['Liquid Milk(Mil Litres)'].replace("", np.nan).astype('float64')
2
3
4
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
5679 # else, only a single dtype is given
5680 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5681 **kwargs)
5682 return self._constructor(new_data).__finalize__(self)
5683
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in astype(self, dtype, **kwargs)
529
530 def astype(self, dtype, **kwargs):
--> 531 return self.apply('astype', dtype=dtype, **kwargs)
532
533 def convert(self, **kwargs):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
393 copy=align_copy)
394
--> 395 applied = getattr(b, f)(**kwargs)
396 result_blocks = _extend_blocks(applied, result_blocks)
397
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
533 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534 **kwargs)
535
536 def _astype(self, dtype, copy=False, errors='raise', values=None,
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
631
632 # _astype_nansafe works fine with 1-d only
--> 633 values = astype_nansafe(values.ravel(), dtype, copy=True)
634
635 # TODO(extension)
/usr/local/lib/python3.6/dist-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
700 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
701 # Explicit copy, or required since NumPy can't view from / to object.
--> 702 return arr.astype(dtype, copy=True)
703
704 return arr.view(dtype)
ValueError: could not convert string to float: '571,2'

A float number's integer and fraction part must be separated by a . not a ,. Replace all ,s with .s.
float("571.2")
would work
float("571,2")
would fail.

Related

Convert comma separator objects to numeric in Pandas

I have a table with columns of data type object and int.
One of them is dollar amount with dollar sign($) and comma separator. I would like to use describe() to summarise the dataframe so I tried to read the file by taking into account the $ sign, then convert the object into integer:
df= pd.read_excel(r'C:\Users\xxxx\df.xlsx','my_df' ,engine="openpyxl", thousands=',')
df['my_col'] = df['my_col'].replace({'\$':''}, regex = True)
df['my_col'].astype(str).astype(int)
df.describe(datetime_is_numeric=True)
but it caught error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-133-2011d1ad889e> in <module>
4
5 df['my_col'] = df['my_col'].replace({'\$':''}, regex = True)
----> 6 df['my_col'].astype(str).astype(int)
7 df.describe(datetime_is_numeric=True)
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5535 else:
5536 # else, only a single dtype is given
-> 5537 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors,)
5538 return self._constructor(new_data).__finalize__(self, method="astype")
5539
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
565 self, dtype, copy: bool = False, errors: str = "raise"
566 ) -> "BlockManager":
--> 567 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
568
569 def convert(
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in apply(self, f, align_keys, **kwargs)
394 applied = b.apply(f, **kwargs)
395 else:
--> 396 applied = getattr(b, f)(**kwargs)
397 result_blocks = _extend_blocks(applied, result_blocks)
398
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
588 vals1d = values.ravel()
589 try:
--> 590 values = astype_nansafe(vals1d, dtype, copy=True)
591 except (ValueError, TypeError):
592 # e.g. astype_nansafe can fail on object-dtype of strings
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
964 # work around NumPy brokenness, #1987
965 if np.issubdtype(dtype.type, np.integer):
--> 966 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
967
968 # if we have a datetime/timedelta array of objects
pandas\_libs\lib.pyx in pandas._libs.lib.astype_intsafe()
ValueError: invalid literal for int() with base 10: '500.00'
If I were to change df['my_col'].astype(str).astype(int) to df['my_col'].astype(str).astype(float), it would catch the error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-134-65da7cbc042f> in <module>
4
5 df['my_col'] = df['my_col'].replace({'\$':''}, regex = True)
----> 6 df['my_col'].astype(str).astype(int)
7 df.describe(datetime_is_numeric=True)
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5535 else:
5536 # else, only a single dtype is given
-> 5537 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors,)
5538 return self._constructor(new_data).__finalize__(self, method="astype")
5539
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
565 self, dtype, copy: bool = False, errors: str = "raise"
566 ) -> "BlockManager":
--> 567 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
568
569 def convert(
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in apply(self, f, align_keys, **kwargs)
394 applied = b.apply(f, **kwargs)
395 else:
--> 396 applied = getattr(b, f)(**kwargs)
397 result_blocks = _extend_blocks(applied, result_blocks)
398
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
588 vals1d = values.ravel()
589 try:
--> 590 values = astype_nansafe(vals1d, dtype, copy=True)
591 except (ValueError, TypeError):
592 # e.g. astype_nansafe can fail on object-dtype of strings
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
987 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
988 # Explicit copy, or required since NumPy can't view from / to object.
--> 989 return arr.astype(dtype, copy=True)
990
991 return arr.view(dtype)
ValueError: could not convert string to float: '5,000.00'
Change replace adding one more condition
df['my_col'] = df['my_col'].replace({'\$':'',',':''}, regex = True)

Lifetimes package: float() argument must be a string or a number, not 'Day'

Getting the following error while using the summary_data_from_transaction_data utility function included within the Lifestyles python package. Using pandas version 0.2 on Google Colab.
TypeError: float() argument must be a string or a number, not 'Day'
Any help will be much appreciated.
Code:
data_summary = summary_data_from_transaction_data(data_final, customer_id_col = "CustomerID", datetime_col = "InvoiceDate", monetary_value_col = "Sales", observation_period_end = "2011-12-09", freq = "D")
Stacktrace:
/usr/local/lib/python3.6/dist-packages/lifetimes/utils.py in summary_data_from_transaction_data(transactions, customer_id_col, datetime_col, monetary_value_col, datetime_format, observation_period_end, freq)
194 summary_columns.append('monetary_value')
195
--> 196 return customers[summary_columns].astype("float64")
197
198
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
5880 # else, only a single dtype is given
5881 new_data = self._data.astype(
-> 5882 dtype=dtype, copy=copy, errors=errors, **kwargs
5883 )
5884 return self._constructor(new_data).__finalize__(self)
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in astype(self, dtype, **kwargs)
579
580 def astype(self, dtype, **kwargs):
--> 581 return self.apply("astype", dtype=dtype, **kwargs)
582
583 def convert(self, **kwargs):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
436 kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)
437
--> 438 applied = getattr(b, f)(**kwargs)
439 result_blocks = _extend_blocks(applied, result_blocks)
440
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
557
558 def astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
--> 559 return self._astype(dtype, copy=copy, errors=errors, values=values, **kwargs)
560
561 def _astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
641 # _astype_nansafe works fine with 1-d only
642 vals1d = values.ravel()
--> 643 values = astype_nansafe(vals1d, dtype, copy=True, **kwargs)
644
645 # TODO(extension)
/usr/local/lib/python3.6/dist-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
727 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
728 # Explicit copy, or required since NumPy can't view from / to object.
--> 729 return arr.astype(dtype, copy=True)
730
731 return arr.view(dtype)
TypeError: float() argument must be a string or a number, not 'Day'
Sample data in the data_final df and associated dtypes are as per the attachments.
sample data
dtypes
Thanks for any help.
Apologies folks - I was able to resolve my issue after updating the Lifetimes package to the latest 0.11.1 version in Colab!

The astype function is not working mysteriously

so Im trying to transforming this values in a float to be able to sum(). The problem is there is something weird that wont let me accomplish it
Data:
cw= pd.DataFrame({ "campaign": "151515151515" ,
"Media_Cost": "$ 14,52" })
cw.dtypes
Media_Cost object
My attempts,
I tried all lines of code bellow, one at the time, neither works mysteriously..
cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','')
# Attempt 1
cw.Media_Cost = cw.Media_Cost.astype(float)
# Attempt 3
cw.Media_Cost = len(float(cw.Media_Cost))
# Attempt 4
cw.Media_Cost = cw.Media_Cost.apply(lambda x: float(cw.Media_Cost))
Error persist..
cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','').str.replace(',', '.').astype(float)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-382-f5688d76abed> in <module>
1 # cw.Media_Cost = cw.Media_Cost.apply(lambda x: float(cw.Media_Cost))
----> 2 cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','').str.replace(',', '.').astype(float)
3
4 # cw.Media_Cost = float(cw.Media_Cost)
5
~\Anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors, **kwargs)
5689 # else, only a single dtype is given
5690 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5691 **kwargs)
5692 return self._constructor(new_data).__finalize__(self)
5693
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, **kwargs)
529
530 def astype(self, dtype, **kwargs):
--> 531 return self.apply('astype', dtype=dtype, **kwargs)
532
533 def convert(self, **kwargs):
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
393 copy=align_copy)
394
--> 395 applied = getattr(b, f)(**kwargs)
396 result_blocks = _extend_blocks(applied, result_blocks)
397
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
533 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534 **kwargs)
535
536 def _astype(self, dtype, copy=False, errors='raise', values=None,
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
631
632 # _astype_nansafe works fine with 1-d only
--> 633 values = astype_nansafe(values.ravel(), dtype, copy=True)
634
635 # TODO(extension)
~\Anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
700 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
701 # Explicit copy, or required since NumPy can't view from / to object.
--> 702 return arr.astype(dtype, copy=True)
703
704 return arr.view(dtype)
ValueError: could not convert string to float: '1.443.48'
You can try:
cw = pd.DataFrame({"campaign": "151515151515", "Media_Cost": "$ 1,443.48" }, index=[0])
cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','').str.replace(',', '').astype(float)
cw.dtypes
Result:
campaign object
Media_Cost float64
dtype: object

Why aren't my objects being converted to strings?

Starting with a python object, I'm getting an error when I try to convert the string to a float using astype(string).astype(float).
I've used regular expressions to remove the units and spaces and removed rows with NA.
df['Length'] = df['Length'].astype(str).astype(float)
ValueError Traceback (most recent call last)
<ipython-input-137-724df1c0091a> in <module>
1 df['Length'] = df['Length'].astype(str).astype(float)
2 #df['Length'].astype(str).astype(float)
3 #df['Width'].astype(str).astype(float)
/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
5689 # else, only a single dtype is given
5690 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5691 **kwargs)
5692 return self._constructor(new_data).__finalize__(self)
5693
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in astype(self, dtype, **kwargs)
529
530 def astype(self, dtype, **kwargs):
--> 531 return self.apply('astype', dtype=dtype, **kwargs)
532
533 def convert(self, **kwargs):
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
393 copy=align_copy)
394
--> 395 applied = getattr(b, f)(**kwargs)
396 result_blocks = _extend_blocks(applied, result_blocks)
397
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
533 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534 **kwargs)
535
536 def _astype(self, dtype, copy=False, errors='raise', values=None,
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
631
632 # _astype_nansafe works fine with 1-d only
--> 633 values = astype_nansafe(values.ravel(), dtype, copy=True)
634
635 # TODO(extension)
/anaconda3/lib/python3.7/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
700 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
701 # Explicit copy, or required since NumPy can't view from / to object.
--> 702 return arr.astype(dtype, copy=True)
703
704 return arr.view(dtype)
ValueError: could not convert string to float:
As John pointed out the error is on converting string to float.
To visually check for empty strings use df['Length'] == ''.
To count the number of empty strings use: sum(df['Length'] == '')
To drop the rows with empty strings use: df = df[df['Length'] != '']. This will modify your whole data frame and not just df['Length'].
Hope that helps.

Error setting an array element with a sequence, when using Pandas, astype (float)

I have large dataset which i am reading from text file and I want to perform an operation on it. I use
T[fields[0:-1]]=T[fields[0:-1]].astype(float)
to be sure that all the values are float. I get the Error setting an array element with a sequence on one the columns. I changed T.replace('NaN', np.nan) the NaN to nan but still the same issue. I used
dtypeCount =[T.iloc[:,i].apply(type).value_counts() for i in range(T.shape[1])]
to determine the type of the data on that column and this is the results
Name: PD_PRESSURE, dtype: int64, <class 'NoneType'> 3676479
<class 'float'> 192217
Due to size of the dataset I can't figure out where this coming from. Any thought on how I can solve this or how how I can find what is causing this?
Thanks in advance.
Update: Full Error message
ValueError Traceback (most recent call last)
<ipython-input-33-fa0a78194654> in <module>()
162 if Aggregate_Flag==1:
163 # This line make sure that all the data are defined as float
--> 164 T[fields[0:-1]]=T[fields[0:-1]].astype(float)
165 # defining the function inside the loop is not the best practice. However, since the number of iterations
166 #( number of file are small), I put it insider the loop to improve the readibility of the code.
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
116 else:
117 kwargs[new_arg_name] = new_arg_value
--> 118 return func(*args, **kwargs)
119 return wrapper
120 return _deprecate_kwarg
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors, **kwargs)
4002 # else, only a single dtype is given
4003 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 4004 **kwargs)
4005 return self._constructor(new_data).__finalize__(self)
4006
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in astype(self, dtype, **kwargs)
3460
3461 def astype(self, dtype, **kwargs):
-> 3462 return self.apply('astype', dtype=dtype, **kwargs)
3463
3464 def convert(self, **kwargs):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
3327
3328 kwargs['mgr'] = self
-> 3329 applied = getattr(b, f)(**kwargs)
3330 result_blocks = _extend_blocks(applied, result_blocks)
3331
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in astype(self, dtype, copy, errors, values, **kwargs)
542 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
543 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 544 **kwargs)
545
546 def _astype(self, dtype, copy=False, errors='raise', values=None,
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in _astype(self, dtype, copy, errors, values, klass, mgr, **kwargs)
623
624 # _astype_nansafe works fine with 1-d only
--> 625 values = astype_nansafe(values.ravel(), dtype, copy=True)
626 values = values.reshape(self.shape)
627
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy)
701
702 if copy:
--> 703 return arr.astype(dtype)
704 return arr.view(dtype)
705
ValueError: setting an array element with a sequence.
If I exclude column PD_PRESSURE, I don't receive any error.
I also tried T['PD_PRESSURE'].dtype(float) and I get the error but for other columns it works fine.
If I run T[fields[0:-1]]=T[fields[0:-1]] it works fine by itself, based on these I thought probably the error is coming from PD_Pressure column.
T.info() returns
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3868696 entries, 2000-01-01 to 2017-04-11
columns (total 6 columns):
A object
B object
C object
D object
PD_PRESSURE object
F object
dtypes: object(6)
memory usage: 1.0+ GB

Categories

Resources