I wrote this code: df['Liquid Milk'] = df['Liquid Milk'].replace("", np.nan).astype('float64')
I got an error below, not sure where is an error, have tried many different way, but still same error. Any help, appreciated.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-52-607dcacd5a1a> in <module>
----> 1 m['Liquid Milk(Mil Litres)']=m['Liquid Milk(Mil Litres)'].replace("", np.nan).astype('float64')
2
3
4
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
5679 # else, only a single dtype is given
5680 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5681 **kwargs)
5682 return self._constructor(new_data).__finalize__(self)
5683
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in astype(self, dtype, **kwargs)
529
530 def astype(self, dtype, **kwargs):
--> 531 return self.apply('astype', dtype=dtype, **kwargs)
532
533 def convert(self, **kwargs):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
393 copy=align_copy)
394
--> 395 applied = getattr(b, f)(**kwargs)
396 result_blocks = _extend_blocks(applied, result_blocks)
397
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
533 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534 **kwargs)
535
536 def _astype(self, dtype, copy=False, errors='raise', values=None,
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
631
632 # _astype_nansafe works fine with 1-d only
--> 633 values = astype_nansafe(values.ravel(), dtype, copy=True)
634
635 # TODO(extension)
/usr/local/lib/python3.6/dist-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
700 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
701 # Explicit copy, or required since NumPy can't view from / to object.
--> 702 return arr.astype(dtype, copy=True)
703
704 return arr.view(dtype)
ValueError: could not convert string to float: '571,2'
A float number's integer and fraction part must be separated by a . not a ,. Replace all ,s with .s.
float("571.2")
would work
float("571,2")
would fail.
Related
I have a table with columns of data type object and int.
One of them is dollar amount with dollar sign($) and comma separator. I would like to use describe() to summarise the dataframe so I tried to read the file by taking into account the $ sign, then convert the object into integer:
df= pd.read_excel(r'C:\Users\xxxx\df.xlsx','my_df' ,engine="openpyxl", thousands=',')
df['my_col'] = df['my_col'].replace({'\$':''}, regex = True)
df['my_col'].astype(str).astype(int)
df.describe(datetime_is_numeric=True)
but it caught error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-133-2011d1ad889e> in <module>
4
5 df['my_col'] = df['my_col'].replace({'\$':''}, regex = True)
----> 6 df['my_col'].astype(str).astype(int)
7 df.describe(datetime_is_numeric=True)
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5535 else:
5536 # else, only a single dtype is given
-> 5537 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors,)
5538 return self._constructor(new_data).__finalize__(self, method="astype")
5539
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
565 self, dtype, copy: bool = False, errors: str = "raise"
566 ) -> "BlockManager":
--> 567 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
568
569 def convert(
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in apply(self, f, align_keys, **kwargs)
394 applied = b.apply(f, **kwargs)
395 else:
--> 396 applied = getattr(b, f)(**kwargs)
397 result_blocks = _extend_blocks(applied, result_blocks)
398
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
588 vals1d = values.ravel()
589 try:
--> 590 values = astype_nansafe(vals1d, dtype, copy=True)
591 except (ValueError, TypeError):
592 # e.g. astype_nansafe can fail on object-dtype of strings
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
964 # work around NumPy brokenness, #1987
965 if np.issubdtype(dtype.type, np.integer):
--> 966 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
967
968 # if we have a datetime/timedelta array of objects
pandas\_libs\lib.pyx in pandas._libs.lib.astype_intsafe()
ValueError: invalid literal for int() with base 10: '500.00'
If I were to change df['my_col'].astype(str).astype(int) to df['my_col'].astype(str).astype(float), it would catch the error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-134-65da7cbc042f> in <module>
4
5 df['my_col'] = df['my_col'].replace({'\$':''}, regex = True)
----> 6 df['my_col'].astype(str).astype(int)
7 df.describe(datetime_is_numeric=True)
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5535 else:
5536 # else, only a single dtype is given
-> 5537 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors,)
5538 return self._constructor(new_data).__finalize__(self, method="astype")
5539
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
565 self, dtype, copy: bool = False, errors: str = "raise"
566 ) -> "BlockManager":
--> 567 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
568
569 def convert(
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\managers.py in apply(self, f, align_keys, **kwargs)
394 applied = b.apply(f, **kwargs)
395 else:
--> 396 applied = getattr(b, f)(**kwargs)
397 result_blocks = _extend_blocks(applied, result_blocks)
398
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
588 vals1d = values.ravel()
589 try:
--> 590 values = astype_nansafe(vals1d, dtype, copy=True)
591 except (ValueError, TypeError):
592 # e.g. astype_nansafe can fail on object-dtype of strings
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
987 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
988 # Explicit copy, or required since NumPy can't view from / to object.
--> 989 return arr.astype(dtype, copy=True)
990
991 return arr.view(dtype)
ValueError: could not convert string to float: '5,000.00'
Change replace adding one more condition
df['my_col'] = df['my_col'].replace({'\$':'',',':''}, regex = True)
Getting the following error while using the summary_data_from_transaction_data utility function included within the Lifestyles python package. Using pandas version 0.2 on Google Colab.
TypeError: float() argument must be a string or a number, not 'Day'
Any help will be much appreciated.
Code:
data_summary = summary_data_from_transaction_data(data_final, customer_id_col = "CustomerID", datetime_col = "InvoiceDate", monetary_value_col = "Sales", observation_period_end = "2011-12-09", freq = "D")
Stacktrace:
/usr/local/lib/python3.6/dist-packages/lifetimes/utils.py in summary_data_from_transaction_data(transactions, customer_id_col, datetime_col, monetary_value_col, datetime_format, observation_period_end, freq)
194 summary_columns.append('monetary_value')
195
--> 196 return customers[summary_columns].astype("float64")
197
198
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
5880 # else, only a single dtype is given
5881 new_data = self._data.astype(
-> 5882 dtype=dtype, copy=copy, errors=errors, **kwargs
5883 )
5884 return self._constructor(new_data).__finalize__(self)
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in astype(self, dtype, **kwargs)
579
580 def astype(self, dtype, **kwargs):
--> 581 return self.apply("astype", dtype=dtype, **kwargs)
582
583 def convert(self, **kwargs):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
436 kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)
437
--> 438 applied = getattr(b, f)(**kwargs)
439 result_blocks = _extend_blocks(applied, result_blocks)
440
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
557
558 def astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
--> 559 return self._astype(dtype, copy=copy, errors=errors, values=values, **kwargs)
560
561 def _astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
641 # _astype_nansafe works fine with 1-d only
642 vals1d = values.ravel()
--> 643 values = astype_nansafe(vals1d, dtype, copy=True, **kwargs)
644
645 # TODO(extension)
/usr/local/lib/python3.6/dist-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
727 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
728 # Explicit copy, or required since NumPy can't view from / to object.
--> 729 return arr.astype(dtype, copy=True)
730
731 return arr.view(dtype)
TypeError: float() argument must be a string or a number, not 'Day'
Sample data in the data_final df and associated dtypes are as per the attachments.
sample data
dtypes
Thanks for any help.
Apologies folks - I was able to resolve my issue after updating the Lifetimes package to the latest 0.11.1 version in Colab!
so Im trying to transforming this values in a float to be able to sum(). The problem is there is something weird that wont let me accomplish it
Data:
cw= pd.DataFrame({ "campaign": "151515151515" ,
"Media_Cost": "$ 14,52" })
cw.dtypes
Media_Cost object
My attempts,
I tried all lines of code bellow, one at the time, neither works mysteriously..
cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','')
# Attempt 1
cw.Media_Cost = cw.Media_Cost.astype(float)
# Attempt 3
cw.Media_Cost = len(float(cw.Media_Cost))
# Attempt 4
cw.Media_Cost = cw.Media_Cost.apply(lambda x: float(cw.Media_Cost))
Error persist..
cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','').str.replace(',', '.').astype(float)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-382-f5688d76abed> in <module>
1 # cw.Media_Cost = cw.Media_Cost.apply(lambda x: float(cw.Media_Cost))
----> 2 cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','').str.replace(',', '.').astype(float)
3
4 # cw.Media_Cost = float(cw.Media_Cost)
5
~\Anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors, **kwargs)
5689 # else, only a single dtype is given
5690 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5691 **kwargs)
5692 return self._constructor(new_data).__finalize__(self)
5693
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, **kwargs)
529
530 def astype(self, dtype, **kwargs):
--> 531 return self.apply('astype', dtype=dtype, **kwargs)
532
533 def convert(self, **kwargs):
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
393 copy=align_copy)
394
--> 395 applied = getattr(b, f)(**kwargs)
396 result_blocks = _extend_blocks(applied, result_blocks)
397
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
533 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534 **kwargs)
535
536 def _astype(self, dtype, copy=False, errors='raise', values=None,
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
631
632 # _astype_nansafe works fine with 1-d only
--> 633 values = astype_nansafe(values.ravel(), dtype, copy=True)
634
635 # TODO(extension)
~\Anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
700 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
701 # Explicit copy, or required since NumPy can't view from / to object.
--> 702 return arr.astype(dtype, copy=True)
703
704 return arr.view(dtype)
ValueError: could not convert string to float: '1.443.48'
You can try:
cw = pd.DataFrame({"campaign": "151515151515", "Media_Cost": "$ 1,443.48" }, index=[0])
cw["Media_Cost"] = cw["Media_Cost"].str.replace('$','').str.replace(',', '').astype(float)
cw.dtypes
Result:
campaign object
Media_Cost float64
dtype: object
Starting with a python object, I'm getting an error when I try to convert the string to a float using astype(string).astype(float).
I've used regular expressions to remove the units and spaces and removed rows with NA.
df['Length'] = df['Length'].astype(str).astype(float)
ValueError Traceback (most recent call last)
<ipython-input-137-724df1c0091a> in <module>
1 df['Length'] = df['Length'].astype(str).astype(float)
2 #df['Length'].astype(str).astype(float)
3 #df['Width'].astype(str).astype(float)
/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
5689 # else, only a single dtype is given
5690 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5691 **kwargs)
5692 return self._constructor(new_data).__finalize__(self)
5693
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in astype(self, dtype, **kwargs)
529
530 def astype(self, dtype, **kwargs):
--> 531 return self.apply('astype', dtype=dtype, **kwargs)
532
533 def convert(self, **kwargs):
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
393 copy=align_copy)
394
--> 395 applied = getattr(b, f)(**kwargs)
396 result_blocks = _extend_blocks(applied, result_blocks)
397
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
533 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534 **kwargs)
535
536 def _astype(self, dtype, copy=False, errors='raise', values=None,
/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
631
632 # _astype_nansafe works fine with 1-d only
--> 633 values = astype_nansafe(values.ravel(), dtype, copy=True)
634
635 # TODO(extension)
/anaconda3/lib/python3.7/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
700 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
701 # Explicit copy, or required since NumPy can't view from / to object.
--> 702 return arr.astype(dtype, copy=True)
703
704 return arr.view(dtype)
ValueError: could not convert string to float:
As John pointed out the error is on converting string to float.
To visually check for empty strings use df['Length'] == ''.
To count the number of empty strings use: sum(df['Length'] == '')
To drop the rows with empty strings use: df = df[df['Length'] != '']. This will modify your whole data frame and not just df['Length'].
Hope that helps.
I have large dataset which i am reading from text file and I want to perform an operation on it. I use
T[fields[0:-1]]=T[fields[0:-1]].astype(float)
to be sure that all the values are float. I get the Error setting an array element with a sequence on one the columns. I changed T.replace('NaN', np.nan) the NaN to nan but still the same issue. I used
dtypeCount =[T.iloc[:,i].apply(type).value_counts() for i in range(T.shape[1])]
to determine the type of the data on that column and this is the results
Name: PD_PRESSURE, dtype: int64, <class 'NoneType'> 3676479
<class 'float'> 192217
Due to size of the dataset I can't figure out where this coming from. Any thought on how I can solve this or how how I can find what is causing this?
Thanks in advance.
Update: Full Error message
ValueError Traceback (most recent call last)
<ipython-input-33-fa0a78194654> in <module>()
162 if Aggregate_Flag==1:
163 # This line make sure that all the data are defined as float
--> 164 T[fields[0:-1]]=T[fields[0:-1]].astype(float)
165 # defining the function inside the loop is not the best practice. However, since the number of iterations
166 #( number of file are small), I put it insider the loop to improve the readibility of the code.
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
116 else:
117 kwargs[new_arg_name] = new_arg_value
--> 118 return func(*args, **kwargs)
119 return wrapper
120 return _deprecate_kwarg
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors, **kwargs)
4002 # else, only a single dtype is given
4003 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 4004 **kwargs)
4005 return self._constructor(new_data).__finalize__(self)
4006
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in astype(self, dtype, **kwargs)
3460
3461 def astype(self, dtype, **kwargs):
-> 3462 return self.apply('astype', dtype=dtype, **kwargs)
3463
3464 def convert(self, **kwargs):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
3327
3328 kwargs['mgr'] = self
-> 3329 applied = getattr(b, f)(**kwargs)
3330 result_blocks = _extend_blocks(applied, result_blocks)
3331
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in astype(self, dtype, copy, errors, values, **kwargs)
542 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
543 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 544 **kwargs)
545
546 def _astype(self, dtype, copy=False, errors='raise', values=None,
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in _astype(self, dtype, copy, errors, values, klass, mgr, **kwargs)
623
624 # _astype_nansafe works fine with 1-d only
--> 625 values = astype_nansafe(values.ravel(), dtype, copy=True)
626 values = values.reshape(self.shape)
627
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy)
701
702 if copy:
--> 703 return arr.astype(dtype)
704 return arr.view(dtype)
705
ValueError: setting an array element with a sequence.
If I exclude column PD_PRESSURE, I don't receive any error.
I also tried T['PD_PRESSURE'].dtype(float) and I get the error but for other columns it works fine.
If I run T[fields[0:-1]]=T[fields[0:-1]] it works fine by itself, based on these I thought probably the error is coming from PD_Pressure column.
T.info() returns
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3868696 entries, 2000-01-01 to 2017-04-11
columns (total 6 columns):
A object
B object
C object
D object
PD_PRESSURE object
F object
dtypes: object(6)
memory usage: 1.0+ GB