Starting with a python object, I'm getting an error when I try to convert the string to a float using astype(string).astype(float).
I've used regular expressions to remove the units and spaces and removed rows with NA.
df['Length'] = df['Length'].astype(str).astype(float)
ValueError Traceback (most recent call last)
<ipython-input-137-724df1c0091a> in <module>
1 df['Length'] = df['Length'].astype(str).astype(float)
2 #df['Length'].astype(str).astype(float)
3 #df['Width'].astype(str).astype(float)
/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
5689 # else, only a single dtype is given
5690 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5691 **kwargs)
5692 return self._constructor(new_data).__finalize__(self)
5693
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in astype(self, dtype, **kwargs)
529
530 def astype(self, dtype, **kwargs):
--> 531 return self.apply('astype', dtype=dtype, **kwargs)
532
533 def convert(self, **kwargs):
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
393 copy=align_copy)
394
--> 395 applied = getattr(b, f)(**kwargs)
396 result_blocks = _extend_blocks(applied, result_blocks)
397
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
533 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534 **kwargs)
535
536 def _astype(self, dtype, copy=False, errors='raise', values=None,
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
631
632 # _astype_nansafe works fine with 1-d only
--> 633 values = astype_nansafe(values.ravel(), dtype, copy=True)
634
635 # TODO(extension)
/anaconda3/lib/python3.7/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
700 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
701 # Explicit copy, or required since NumPy can't view from / to object.
--> 702 return arr.astype(dtype, copy=True)
703
704 return arr.view(dtype)
ValueError: could not convert string to float:
As John pointed out the error is on converting string to float.
To visually check for empty strings use df['Length'] == ''.
To count the number of empty strings use: sum(df['Length'] == '')
To drop the rows with empty strings use: df = df[df['Length'] != '']. This will modify your whole data frame and not just df['Length'].
Hope that helps.
Related
I wrote this code: df['Liquid Milk'] = df['Liquid Milk'].replace("", np.nan).astype('float64')
I got an error below, not sure where is an error, have tried many different way, but still same error. Any help, appreciated.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-52-607dcacd5a1a> in <module>
----> 1 m['Liquid Milk(Mil Litres)']=m['Liquid Milk(Mil Litres)'].replace("", np.nan).astype('float64')
2
3
4
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
5679 # else, only a single dtype is given
5680 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5681 **kwargs)
5682 return self._constructor(new_data).__finalize__(self)
5683
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in astype(self, dtype, **kwargs)
529
530 def astype(self, dtype, **kwargs):
--> 531 return self.apply('astype', dtype=dtype, **kwargs)
532
533 def convert(self, **kwargs):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
393 copy=align_copy)
394
--> 395 applied = getattr(b, f)(**kwargs)
396 result_blocks = _extend_blocks(applied, result_blocks)
397
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
533 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534 **kwargs)
535
536 def _astype(self, dtype, copy=False, errors='raise', values=None,
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
631
632 # _astype_nansafe works fine with 1-d only
--> 633 values = astype_nansafe(values.ravel(), dtype, copy=True)
634
635 # TODO(extension)
/usr/local/lib/python3.6/dist-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
700 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
701 # Explicit copy, or required since NumPy can't view from / to object.
--> 702 return arr.astype(dtype, copy=True)
703
704 return arr.view(dtype)
ValueError: could not convert string to float: '571,2'
A float number's integer and fraction part must be separated by a . not a ,. Replace all ,s with .s.
float("571.2")
would work
float("571,2")
would fail.
I have a table with columns of data type object and int.
One of them is dollar amount with dollar sign($) and comma separator. I would like to use describe() to summarise the dataframe so I tried to read the file by taking into account the $ sign, then convert the object into integer:
df= pd.read_excel(r'C:\Users\xxxx\df.xlsx','my_df' ,engine="openpyxl", thousands=',')
df['my_col'] = df['my_col'].replace({'\$':''}, regex = True)
df['my_col'].astype(str).astype(int)
df.describe(datetime_is_numeric=True)
but it caught error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-133-2011d1ad889e> in <module>
4
5 df['my_col'] = df['my_col'].replace({'\$':''}, regex = True)
----> 6 df['my_col'].astype(str).astype(int)
7 df.describe(datetime_is_numeric=True)
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5535 else:
5536 # else, only a single dtype is given
-> 5537 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors,)
5538 return self._constructor(new_data).__finalize__(self, method="astype")
5539
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
565 self, dtype, copy: bool = False, errors: str = "raise"
566 ) -> "BlockManager":
--> 567 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
568
569 def convert(
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in apply(self, f, align_keys, **kwargs)
394 applied = b.apply(f, **kwargs)
395 else:
--> 396 applied = getattr(b, f)(**kwargs)
397 result_blocks = _extend_blocks(applied, result_blocks)
398
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
588 vals1d = values.ravel()
589 try:
--> 590 values = astype_nansafe(vals1d, dtype, copy=True)
591 except (ValueError, TypeError):
592 # e.g. astype_nansafe can fail on object-dtype of strings
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
964 # work around NumPy brokenness, #1987
965 if np.issubdtype(dtype.type, np.integer):
--> 966 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
967
968 # if we have a datetime/timedelta array of objects
pandas\_libs\lib.pyx in pandas._libs.lib.astype_intsafe()
ValueError: invalid literal for int() with base 10: '500.00'
If I were to change df['my_col'].astype(str).astype(int) to df['my_col'].astype(str).astype(float), it would catch the error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-134-65da7cbc042f> in <module>
4
5 df['my_col'] = df['my_col'].replace({'\$':''}, regex = True)
----> 6 df['my_col'].astype(str).astype(int)
7 df.describe(datetime_is_numeric=True)
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5535 else:
5536 # else, only a single dtype is given
-> 5537 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors,)
5538 return self._constructor(new_data).__finalize__(self, method="astype")
5539
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
565 self, dtype, copy: bool = False, errors: str = "raise"
566 ) -> "BlockManager":
--> 567 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
568
569 def convert(
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in apply(self, f, align_keys, **kwargs)
394 applied = b.apply(f, **kwargs)
395 else:
--> 396 applied = getattr(b, f)(**kwargs)
397 result_blocks = _extend_blocks(applied, result_blocks)
398
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
588 vals1d = values.ravel()
589 try:
--> 590 values = astype_nansafe(vals1d, dtype, copy=True)
591 except (ValueError, TypeError):
592 # e.g. astype_nansafe can fail on object-dtype of strings
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
987 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
988 # Explicit copy, or required since NumPy can't view from / to object.
--> 989 return arr.astype(dtype, copy=True)
990
991 return arr.view(dtype)
ValueError: could not convert string to float: '5,000.00'
Change replace adding one more condition
df['my_col'] = df['my_col'].replace({'\$':'',',':''}, regex = True)
Getting the following error while using the summary_data_from_transaction_data utility function included within the Lifestyles python package. Using pandas version 0.2 on Google Colab.
TypeError: float() argument must be a string or a number, not 'Day'
Any help will be much appreciated.
Code:
data_summary = summary_data_from_transaction_data(data_final, customer_id_col = "CustomerID", datetime_col = "InvoiceDate", monetary_value_col = "Sales", observation_period_end = "2011-12-09", freq = "D")
Stacktrace:
/usr/local/lib/python3.6/dist-packages/lifetimes/utils.py in summary_data_from_transaction_data(transactions, customer_id_col, datetime_col, monetary_value_col, datetime_format, observation_period_end, freq)
194 summary_columns.append('monetary_value')
195
--> 196 return customers[summary_columns].astype("float64")
197
198
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
5880 # else, only a single dtype is given
5881 new_data = self._data.astype(
-> 5882 dtype=dtype, copy=copy, errors=errors, **kwargs
5883 )
5884 return self._constructor(new_data).__finalize__(self)
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in astype(self, dtype, **kwargs)
579
580 def astype(self, dtype, **kwargs):
--> 581 return self.apply("astype", dtype=dtype, **kwargs)
582
583 def convert(self, **kwargs):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
436 kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)
437
--> 438 applied = getattr(b, f)(**kwargs)
439 result_blocks = _extend_blocks(applied, result_blocks)
440
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
557
558 def astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
--> 559 return self._astype(dtype, copy=copy, errors=errors, values=values, **kwargs)
560
561 def _astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
641 # _astype_nansafe works fine with 1-d only
642 vals1d = values.ravel()
--> 643 values = astype_nansafe(vals1d, dtype, copy=True, **kwargs)
644
645 # TODO(extension)
/usr/local/lib/python3.6/dist-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
727 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
728 # Explicit copy, or required since NumPy can't view from / to object.
--> 729 return arr.astype(dtype, copy=True)
730
731 return arr.view(dtype)
TypeError: float() argument must be a string or a number, not 'Day'
Sample data in the data_final df and associated dtypes are as per the attachments.
sample data
dtypes
Thanks for any help.
Apologies folks - I was able to resolve my issue after updating the Lifetimes package to the latest 0.11.1 version in Colab!
so Im trying to transforming this values in a float to be able to sum(). The problem is there is something weird that wont let me accomplish it
Data:
cw= pd.DataFrame({ "campaign": "151515151515" ,
"Media_Cost": "$ 14,52" })
cw.dtypes
Media_Cost object
My attempts,
I tried all lines of code bellow, one at the time, neither works mysteriously..
cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','')
# Attempt 1
cw.Media_Cost = cw.Media_Cost.astype(float)
# Attempt 3
cw.Media_Cost = len(float(cw.Media_Cost))
# Attempt 4
cw.Media_Cost = cw.Media_Cost.apply(lambda x: float(cw.Media_Cost))
Error persist..
cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','').str.replace(',', '.').astype(float)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-382-f5688d76abed> in <module>
1 # cw.Media_Cost = cw.Media_Cost.apply(lambda x: float(cw.Media_Cost))
----> 2 cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','').str.replace(',', '.').astype(float)
3
4 # cw.Media_Cost = float(cw.Media_Cost)
5
~\Anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors, **kwargs)
5689 # else, only a single dtype is given
5690 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5691 **kwargs)
5692 return self._constructor(new_data).__finalize__(self)
5693
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, **kwargs)
529
530 def astype(self, dtype, **kwargs):
--> 531 return self.apply('astype', dtype=dtype, **kwargs)
532
533 def convert(self, **kwargs):
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
393 copy=align_copy)
394
--> 395 applied = getattr(b, f)(**kwargs)
396 result_blocks = _extend_blocks(applied, result_blocks)
397
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
533 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534 **kwargs)
535
536 def _astype(self, dtype, copy=False, errors='raise', values=None,
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
631
632 # _astype_nansafe works fine with 1-d only
--> 633 values = astype_nansafe(values.ravel(), dtype, copy=True)
634
635 # TODO(extension)
~\Anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
700 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
701 # Explicit copy, or required since NumPy can't view from / to object.
--> 702 return arr.astype(dtype, copy=True)
703
704 return arr.view(dtype)
ValueError: could not convert string to float: '1.443.48'
You can try:
cw = pd.DataFrame({"campaign": "151515151515", "Media_Cost": "$ 1,443.48" }, index=[0])
cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','').str.replace(',', '').astype(float)
cw.dtypes
Result:
campaign object
Media_Cost float64
dtype: object
My dataframe contains a categorical feature 'Street' which can take 1 of the 2 possible values 'Grvl' or 'Pave'. I am trying to convert this categorical feature in to numerical value before fitting ML algorithm. My code looks like this
dataset['Street']=dataset['Street'].map({'Grvl':0,'Pave':1}).astype(int)
I have filled missing values with the most occurring value in the dataframe
dataset['Street'].isnull().sum()
I am getting following error
ValueError Traceback (most recent call last)
<ipython-input-59-86f0b031335a> in <module>()
2 print dataset['Street'].isnull().sum()
3 #dataset['MSZoning'] = dataset['MSZoning'].map( {'A': 0, 'C': 1,'FV': 2,'I':3,'RH':4,'RL':5,'RP':6,'RM':7} ).astype(int)
----> 4 dataset['Street']=dataset['Street'].map({'Grvl':0,'Pave':1}).astype(int)
5 dataset['LotShape']=dataset['LotShape'].map({'Reg':0,'IR1':1,'IR2':2,'IR3':3}).astype(int)
6 dataset['LandContour']=dataset['LandContour'].map({'Lvl':0,'Bnk':1,'HLS':2,'Low':3}).astype(int)
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\generic.pyc in astype(self, dtype, copy, raise_on_error, **kwargs)
2948
2949 mgr = self._data.astype(dtype=dtype, copy=copy,
-> 2950 raise_on_error=raise_on_error, **kwargs)
2951 return self._constructor(mgr).__finalize__(self)
2952
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\internals.pyc in astype(self, dtype, **kwargs)
2936
2937 def astype(self, dtype, **kwargs):
-> 2938 return self.apply('astype', dtype=dtype, **kwargs)
2939
2940 def convert(self, **kwargs):
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\internals.pyc in apply(self, f, axes, filter, do_integrity_check, consolidate, raw, **kwargs)
2888
2889 kwargs['mgr'] = self
-> 2890 applied = getattr(b, f)(**kwargs)
2891 result_blocks = _extend_blocks(applied, result_blocks)
2892
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\internals.pyc in astype(self, dtype, copy, raise_on_error, values, **kwargs)
432 **kwargs):
433 return self._astype(dtype, copy=copy, raise_on_error=raise_on_error,
--> 434 values=values, **kwargs)
435
436 def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\internals.pyc in _astype(self, dtype, copy, raise_on_error, values, klass, mgr, **kwargs)
475
476 # _astype_nansafe works fine with 1-d only
--> 477 values = com._astype_nansafe(values.ravel(), dtype, copy=True)
478 values = values.reshape(self.shape)
479
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\common.pyc in _astype_nansafe(arr, dtype, copy)
1912
1913 if np.isnan(arr).any():
-> 1914 raise ValueError('Cannot convert NA to integer')
1915 elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer):
1916 # work around NumPy brokenness, #1987
ValueError: Cannot convert NA to integer
You have NaN values in your dataframe ! Since You cannot convert series from object to integer (using asType(int)) if there are missing values you should fill missing values before !
dataset['Street'].isnull().sum() dosn't fill missing values
you can do it using pandas.DataFrame.fillna or sklearn.preprocessing.Imputer