Why aren't my objects being converted to strings? - python

Starting with a python object, I'm getting an error when I try to convert the string to a float using astype(string).astype(float).
I've used regular expressions to remove the units and spaces and removed rows with NA.
df['Length'] = df['Length'].astype(str).astype(float)
ValueError Traceback (most recent call last)
<ipython-input-137-724df1c0091a> in <module>
1 df['Length'] = df['Length'].astype(str).astype(float)
2 #df['Length'].astype(str).astype(float)
3 #df['Width'].astype(str).astype(float)
/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
5689 # else, only a single dtype is given
5690 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5691 **kwargs)
5692 return self._constructor(new_data).__finalize__(self)
5693
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in astype(self, dtype, **kwargs)
529
530 def astype(self, dtype, **kwargs):
--> 531 return self.apply('astype', dtype=dtype, **kwargs)
532
533 def convert(self, **kwargs):
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
393 copy=align_copy)
394
--> 395 applied = getattr(b, f)(**kwargs)
396 result_blocks = _extend_blocks(applied, result_blocks)
397
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
533 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534 **kwargs)
535
536 def _astype(self, dtype, copy=False, errors='raise', values=None,
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
631
632 # _astype_nansafe works fine with 1-d only
--> 633 values = astype_nansafe(values.ravel(), dtype, copy=True)
634
635 # TODO(extension)
/anaconda3/lib/python3.7/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
700 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
701 # Explicit copy, or required since NumPy can't view from / to object.
--> 702 return arr.astype(dtype, copy=True)
703
704 return arr.view(dtype)
ValueError: could not convert string to float:

As John pointed out the error is on converting string to float.
To visually check for empty strings use df['Length'] == ''.
To count the number of empty strings use: sum(df['Length'] == '')
To drop the rows with empty strings use: df = df[df['Length'] != '']. This will modify your whole data frame and not just df['Length'].
Hope that helps.

Related

ValueError: could not convert string to float: '571,2'

I wrote this code: df['Liquid Milk'] = df['Liquid Milk'].replace("", np.nan).astype('float64')
I got an error below, not sure where is an error, have tried many different way, but still same error. Any help, appreciated.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-52-607dcacd5a1a> in <module>
----> 1 m['Liquid Milk(Mil Litres)']=m['Liquid Milk(Mil Litres)'].replace("", np.nan).astype('float64')
2
3
4
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
5679 # else, only a single dtype is given
5680 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5681 **kwargs)
5682 return self._constructor(new_data).__finalize__(self)
5683
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in astype(self, dtype, **kwargs)
529
530 def astype(self, dtype, **kwargs):
--> 531 return self.apply('astype', dtype=dtype, **kwargs)
532
533 def convert(self, **kwargs):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
393 copy=align_copy)
394
--> 395 applied = getattr(b, f)(**kwargs)
396 result_blocks = _extend_blocks(applied, result_blocks)
397
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
533 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534 **kwargs)
535
536 def _astype(self, dtype, copy=False, errors='raise', values=None,
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
631
632 # _astype_nansafe works fine with 1-d only
--> 633 values = astype_nansafe(values.ravel(), dtype, copy=True)
634
635 # TODO(extension)
/usr/local/lib/python3.6/dist-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
700 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
701 # Explicit copy, or required since NumPy can't view from / to object.
--> 702 return arr.astype(dtype, copy=True)
703
704 return arr.view(dtype)
ValueError: could not convert string to float: '571,2'
A float number's integer and fraction part must be separated by a . not a ,. Replace all ,s with .s.
float("571.2")
would work
float("571,2")
would fail.

Convert comma separator objects to numeric in Pandas

I have a table with columns of data type object and int.
One of them is dollar amount with dollar sign($) and comma separator. I would like to use describe() to summarise the dataframe so I tried to read the file by taking into account the $ sign, then convert the object into integer:
df= pd.read_excel(r'C:\Users\xxxx\df.xlsx','my_df' ,engine="openpyxl", thousands=',')
df['my_col'] = df['my_col'].replace({'\$':''}, regex = True)
df['my_col'].astype(str).astype(int)
df.describe(datetime_is_numeric=True)
but it caught error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-133-2011d1ad889e> in <module>
4
5 df['my_col'] = df['my_col'].replace({'\$':''}, regex = True)
----> 6 df['my_col'].astype(str).astype(int)
7 df.describe(datetime_is_numeric=True)
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5535 else:
5536 # else, only a single dtype is given
-> 5537 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors,)
5538 return self._constructor(new_data).__finalize__(self, method="astype")
5539
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
565 self, dtype, copy: bool = False, errors: str = "raise"
566 ) -> "BlockManager":
--> 567 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
568
569 def convert(
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in apply(self, f, align_keys, **kwargs)
394 applied = b.apply(f, **kwargs)
395 else:
--> 396 applied = getattr(b, f)(**kwargs)
397 result_blocks = _extend_blocks(applied, result_blocks)
398
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
588 vals1d = values.ravel()
589 try:
--> 590 values = astype_nansafe(vals1d, dtype, copy=True)
591 except (ValueError, TypeError):
592 # e.g. astype_nansafe can fail on object-dtype of strings
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
964 # work around NumPy brokenness, #1987
965 if np.issubdtype(dtype.type, np.integer):
--> 966 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
967
968 # if we have a datetime/timedelta array of objects
pandas\_libs\lib.pyx in pandas._libs.lib.astype_intsafe()
ValueError: invalid literal for int() with base 10: '500.00'
If I were to change df['my_col'].astype(str).astype(int) to df['my_col'].astype(str).astype(float), it would catch the error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-134-65da7cbc042f> in <module>
4
5 df['my_col'] = df['my_col'].replace({'\$':''}, regex = True)
----> 6 df['my_col'].astype(str).astype(int)
7 df.describe(datetime_is_numeric=True)
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5535 else:
5536 # else, only a single dtype is given
-> 5537 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors,)
5538 return self._constructor(new_data).__finalize__(self, method="astype")
5539
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
565 self, dtype, copy: bool = False, errors: str = "raise"
566 ) -> "BlockManager":
--> 567 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
568
569 def convert(
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in apply(self, f, align_keys, **kwargs)
394 applied = b.apply(f, **kwargs)
395 else:
--> 396 applied = getattr(b, f)(**kwargs)
397 result_blocks = _extend_blocks(applied, result_blocks)
398
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
588 vals1d = values.ravel()
589 try:
--> 590 values = astype_nansafe(vals1d, dtype, copy=True)
591 except (ValueError, TypeError):
592 # e.g. astype_nansafe can fail on object-dtype of strings
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
987 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
988 # Explicit copy, or required since NumPy can't view from / to object.
--> 989 return arr.astype(dtype, copy=True)
990
991 return arr.view(dtype)
ValueError: could not convert string to float: '5,000.00'
Change replace adding one more condition
df['my_col'] = df['my_col'].replace({'\$':'',',':''}, regex = True)

Lifetimes package: float() argument must be a string or a number, not 'Day'

Getting the following error while using the summary_data_from_transaction_data utility function included within the Lifestyles python package. Using pandas version 0.2 on Google Colab.
TypeError: float() argument must be a string or a number, not 'Day'
Any help will be much appreciated.
Code:
data_summary = summary_data_from_transaction_data(data_final, customer_id_col = "CustomerID", datetime_col = "InvoiceDate", monetary_value_col = "Sales", observation_period_end = "2011-12-09", freq = "D")
Stacktrace:
/usr/local/lib/python3.6/dist-packages/lifetimes/utils.py in summary_data_from_transaction_data(transactions, customer_id_col, datetime_col, monetary_value_col, datetime_format, observation_period_end, freq)
194 summary_columns.append('monetary_value')
195
--> 196 return customers[summary_columns].astype("float64")
197
198
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
5880 # else, only a single dtype is given
5881 new_data = self._data.astype(
-> 5882 dtype=dtype, copy=copy, errors=errors, **kwargs
5883 )
5884 return self._constructor(new_data).__finalize__(self)
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in astype(self, dtype, **kwargs)
579
580 def astype(self, dtype, **kwargs):
--> 581 return self.apply("astype", dtype=dtype, **kwargs)
582
583 def convert(self, **kwargs):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
436 kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)
437
--> 438 applied = getattr(b, f)(**kwargs)
439 result_blocks = _extend_blocks(applied, result_blocks)
440
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
557
558 def astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
--> 559 return self._astype(dtype, copy=copy, errors=errors, values=values, **kwargs)
560
561 def _astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
641 # _astype_nansafe works fine with 1-d only
642 vals1d = values.ravel()
--> 643 values = astype_nansafe(vals1d, dtype, copy=True, **kwargs)
644
645 # TODO(extension)
/usr/local/lib/python3.6/dist-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
727 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
728 # Explicit copy, or required since NumPy can't view from / to object.
--> 729 return arr.astype(dtype, copy=True)
730
731 return arr.view(dtype)
TypeError: float() argument must be a string or a number, not 'Day'
Sample data in the data_final df and associated dtypes are as per the attachments.
sample data
dtypes
Thanks for any help.
Apologies folks - I was able to resolve my issue after updating the Lifetimes package to the latest 0.11.1 version in Colab!

The astype function is not working mysteriously

so Im trying to transforming this values in a float to be able to sum(). The problem is there is something weird that wont let me accomplish it
Data:
cw= pd.DataFrame({ "campaign": "151515151515" ,
"Media_Cost": "$ 14,52" })
cw.dtypes
Media_Cost object
My attempts,
I tried all lines of code bellow, one at the time, neither works mysteriously..
cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','')
# Attempt 1
cw.Media_Cost = cw.Media_Cost.astype(float)
# Attempt 3
cw.Media_Cost = len(float(cw.Media_Cost))
# Attempt 4
cw.Media_Cost = cw.Media_Cost.apply(lambda x: float(cw.Media_Cost))
Error persist..
cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','').str.replace(',', '.').astype(float)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-382-f5688d76abed> in <module>
1 # cw.Media_Cost = cw.Media_Cost.apply(lambda x: float(cw.Media_Cost))
----> 2 cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','').str.replace(',', '.').astype(float)
3
4 # cw.Media_Cost = float(cw.Media_Cost)
5
~\Anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors, **kwargs)
5689 # else, only a single dtype is given
5690 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5691 **kwargs)
5692 return self._constructor(new_data).__finalize__(self)
5693
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, **kwargs)
529
530 def astype(self, dtype, **kwargs):
--> 531 return self.apply('astype', dtype=dtype, **kwargs)
532
533 def convert(self, **kwargs):
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
393 copy=align_copy)
394
--> 395 applied = getattr(b, f)(**kwargs)
396 result_blocks = _extend_blocks(applied, result_blocks)
397
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
533 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534 **kwargs)
535
536 def _astype(self, dtype, copy=False, errors='raise', values=None,
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
631
632 # _astype_nansafe works fine with 1-d only
--> 633 values = astype_nansafe(values.ravel(), dtype, copy=True)
634
635 # TODO(extension)
~\Anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
700 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
701 # Explicit copy, or required since NumPy can't view from / to object.
--> 702 return arr.astype(dtype, copy=True)
703
704 return arr.view(dtype)
ValueError: could not convert string to float: '1.443.48'
You can try:
cw = pd.DataFrame({"campaign": "151515151515", "Media_Cost": "$ 1,443.48" }, index=[0])
cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','').str.replace(',', '').astype(float)
cw.dtypes
Result:
campaign object
Media_Cost float64
dtype: object

Error while converting categorical feature to numerical feature in Pandas

My dataframe contains a categorical feature 'Street' which can take 1 of the 2 possible values 'Grvl' or 'Pave'. I am trying to convert this categorical feature in to numerical value before fitting ML algorithm. My code looks like this
dataset['Street']=dataset['Street'].map({'Grvl':0,'Pave':1}).astype(int)
I have filled missing values with the most occurring value in the dataframe
dataset['Street'].isnull().sum()
I am getting following error
ValueError Traceback (most recent call last)
<ipython-input-59-86f0b031335a> in <module>()
2 print dataset['Street'].isnull().sum()
3 #dataset['MSZoning'] = dataset['MSZoning'].map( {'A': 0, 'C': 1,'FV': 2,'I':3,'RH':4,'RL':5,'RP':6,'RM':7} ).astype(int)
----> 4 dataset['Street']=dataset['Street'].map({'Grvl':0,'Pave':1}).astype(int)
5 dataset['LotShape']=dataset['LotShape'].map({'Reg':0,'IR1':1,'IR2':2,'IR3':3}).astype(int)
6 dataset['LandContour']=dataset['LandContour'].map({'Lvl':0,'Bnk':1,'HLS':2,'Low':3}).astype(int)
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\generic.pyc in astype(self, dtype, copy, raise_on_error, **kwargs)
2948
2949 mgr = self._data.astype(dtype=dtype, copy=copy,
-> 2950 raise_on_error=raise_on_error, **kwargs)
2951 return self._constructor(mgr).__finalize__(self)
2952
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\internals.pyc in astype(self, dtype, **kwargs)
2936
2937 def astype(self, dtype, **kwargs):
-> 2938 return self.apply('astype', dtype=dtype, **kwargs)
2939
2940 def convert(self, **kwargs):
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\internals.pyc in apply(self, f, axes, filter, do_integrity_check, consolidate, raw, **kwargs)
2888
2889 kwargs['mgr'] = self
-> 2890 applied = getattr(b, f)(**kwargs)
2891 result_blocks = _extend_blocks(applied, result_blocks)
2892
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\internals.pyc in astype(self, dtype, copy, raise_on_error, values, **kwargs)
432 **kwargs):
433 return self._astype(dtype, copy=copy, raise_on_error=raise_on_error,
--> 434 values=values, **kwargs)
435
436 def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\internals.pyc in _astype(self, dtype, copy, raise_on_error, values, klass, mgr, **kwargs)
475
476 # _astype_nansafe works fine with 1-d only
--> 477 values = com._astype_nansafe(values.ravel(), dtype, copy=True)
478 values = values.reshape(self.shape)
479
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\common.pyc in _astype_nansafe(arr, dtype, copy)
1912
1913 if np.isnan(arr).any():
-> 1914 raise ValueError('Cannot convert NA to integer')
1915 elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer):
1916 # work around NumPy brokenness, #1987
ValueError: Cannot convert NA to integer
You have NaN values in your dataframe ! Since You cannot convert series from object to integer (using asType(int)) if there are missing values you should fill missing values before !
dataset['Street'].isnull().sum() dosn't fill missing values
you can do it using pandas.DataFrame.fillna or sklearn.preprocessing.Imputer

Categories

Resources