Applying if-else statement to create new column - python

I have applied if-else statement in function change_test, but it has resulted in None values in each test column, here is my code:
def change_test(df):
if ((df['product_id'] == 7.99) & (df['refunded'] == 1)):
df['test'] = 0
elif (df['product_id'] == 49.99):
df['test'] == 49.99
else :
df['test'] = df['product_id'] * (df['days_used_app'] / 7)
df['test'] = df.apply(change_test, axis = 1)
And here is my dataframe before applying this function

You should return value in apply function
def change_test(row):
if ((row['product_id'] == 7.99) & (row['refunded'] == 1)):
return 0
elif (row['product_id'] == 49.99):
return 49.99
else :
return row['product_id'] * (row['days_used_app'] / 7)
df['test'] = df.apply(change_test, axis = 1)

Related

Python: Is there a pandas function to detect persons Online login status and assign identification numbers to them based on the login status?

I have this pandas dataframe, I am trying to assign status identification numbers at Status column to each user depending whether the LAST_ACCESSED column for the user (AD_USER_NAME) is [Offline, = 0, Available/Online = 1, Away = 2]
[Table before][1]
So that the resulting table will be like this
[Table After][2]
[1]: https://i.stack.imgur.com/3I5BA.png
[2]: https://i.stack.imgur.com/bXuTJ.png
I tried to use this code for the 'If function' loop but it didn't work
def flag(df1):
if (df1['LAST_ACCESSED'] = df1['None'] and df1['LAST_ACCESSED'] = 0]):
return 0
elif (df1['LAST_ACCESSED'] > 0 and (df1['LAST_ACCESSED'] = df1['Run_date_Time']):
return 1
elif (df1['LAST_ACCESSED'] > 5 and df1['LAST_ACCESSED'] < df1['Run_date_Time']):
return 2
df1['Status'] = df1.apply(flag, axis = 1)
Make a boolean array for each condition then use them when assigning values. Use & instead of and. Use == for comparison, = is used for assignments.
zero_mask = (df1['LAST_ACCESSED'] == 0) & (df1['LAST_ACCESSED'] == df1['None'])
one_mask = (df1['LAST_ACCESSED'] > 0) & (df1['LAST_ACCESSED'] == df1['Run_date_Time'])
two_mask = (df1['LAST_ACCESSED'] > 5) & (df1['LAST_ACCESSED'] < df1['Run_date_Time'])
df1.loc[zero_mask,'Status'] = 0
df1.loc[one_mask,'Status'] = 1
df1.loc[two_mask,'Status'] = 2

TypeError: 'float' object is not subscriptable (While tryng to apply a function on Pandas DataFrame)

I am trying to apply this function on a pandas dataframe. But I am getting this error. I'd like to know what does it mean, and how to rectify it?
def fill_age(x):
Age = x['Age']
Pclass = x['Pclass']
Sex = x['Sex']
if pd.isnull(Age):
if Pclass == 1:
return 34.61 + np.random.normal(loc =0, scale = 13.61)
elif (Pclass == 1) & (Sex == 'male'):
return 41.2813 + np.random.normal(loc = 0, scale = 15.14)
elif (Pclass == 2) & (Sex == 'female'):
return 28.72 + np.random.normal(loc = 0, scale = 12.87)
elif (Pclass == 2) & (Sex == 'male'):
return 30.74 + np.random.normal(loc = 0, scale= 14.79)
elif (Pclass == 3) & (Sex == 'female'):
return 21.75 + np.random.normal(loc = 0, scale = 12.73)
elif (Pclass == 3) & (Sex == 'male'):
return 26.51 + np.random.normal(loc = 0, scale= 12.16)
else:
pass
else:
return Age
train['Age'] = train['Age'].apply(fill_age)
Note: train is a pandas dataframe
You don't show where:
train['Age'] = train['Age'].apply(fill_age)
is coming from, but I suspect ```train`` is actually a float not a dict.

Apply result to dataset after df.iterrows

df = pd.read_csv('./test22.csv')
df.head(5)
df = df.replace(np.nan, None)
for index,col in df.iterrows():
# Extract only if date1 happened earlier than date2
load = 'No'
if col['date1'] == None or col['date2'] == None:
load = 'yes'
elif int(str(col['date1'])[:4]) >= int(str(col['date2'])[:4]) and \
(len(str(col['date1'])) == 4 or len(str(col['date2'])) == 4):
load = 'yes'
elif int(str(col['date1'])[:6]) >= int(str(col['date2'])[:6]) and \
(len(str(col['date1'])) == 6 or len(str(col['date2'])) == 6):
load = 'yes'
elif int(str(col['date1'])[:8]) >= int(str(col['date2'])[:8]):
load = 'yes'
df.head(5)
After preprocessing using iterrows in dataset, If you look at the above code (attached code), it will not be reflected in the actual dataset. I want to reflect the result in actual dataset.
How can I apply it to the actual dataset?
Replace your for loop with a function that returns a boolean, then you can use df.apply to apply it to all rows, and then filter your dataframe by that value:
def should_load(x):
if x['date1'] == None or x['date2'] == None:
return True
elif int(str(x['date1'])[:4]) >= int(str(x['date2'])[:4]) and \
(len(str(x['date1'])) == 4 or len(str(x['date2'])) == 4):
return True
elif int(str(x['date1'])[:6]) >= int(str(x['date2'])[:6]) and \
(len(str(x['date1'])) == 6 or len(str(x['date2'])) == 6):
return True
elif int(str(x['date1'])[:8]) >= int(str(x['date2'])[:8]):
return True
return False
df[df.apply(should_load, axis=1)].head(5)

Difference between two similar if loops in Python

I have two codes which should perform the same thing but in the first, I am not getting the result but in the second one I am getting output
if (Method == "EMM" ):
if ((Loan_Obligation/12)+EMI) !=0:
DSCR_Post = EBITDA_EMM/((Loan_Obligation/12)+EMI)
else:
0
elif (Method != "EMM" ):
if ((Loan_Obligation/12)+EMI) !=0:
DSCR_Post = EBITDA/((Loan_Obligation/12)+EMI)
else:
0
and other one is:
if (Method == "EMM"):
DSCR_Post = EBITDA_EMM/((Loan_Obligation/12)+EMI) if ((Loan_Obligation/12)+EMI) !=0 else 0
else:
DSCR_Post = EBITDA/((Loan_Obligation/12)+EMI) if ((Loan_Obligation/12)+EMI) !=0 else 0
print('DSCR_Post:',DSCR_Post)
Can someone help me what is the difference between the two codes
In your first code snippet, you are not assigning the 0 to DSCR_Post as you do in the second. Modify as follows:
if Method == "EMM" :
if (Loan_Obligation / 12) + EMI !=0:
DSCR_Post = EBITDA_EMM / ((Loan_Obligation / 12) + EMI)
else:
DSCR_Post = 0 # the 0 has to be assigned!
else: # you do not need a condition here! It can either be equal or not, no third state possible.
if (Loan_Obligation / 12) + EMI !=0:
DSCR_Post = EBITDA / ((Loan_Obligation / 12) + EMI)
else:
DSCR_Post = 0
print('DSCR_Post:',DSCR_Post)
Which can be simplified to the following:
ebid = EBITDA_EMM if Method == "EMM" else EBITDA
DSCR_Post = 0 # 0 will be overwritten if ...
if (Loan_Obligation / 12) + EMI != 0:
DSCR_Post = ebid / ((Loan_Obligation / 12) + EMI)
print('DSCR_Post:',DSCR_Post)

Conditional column in DataFrame: where's the mistake?

Let's start with a Pandas DataFrame df with numerical columns pS, pS0 and pE:
import pandas as pd
df = pd.DataFrame([[0.1,0.2,0.7],[0.3,0.6,0.1],[0.9,0.1,0.0]],
columns=['pS','pE','pS0'])
We want to build a column indicating which of the 3 previous is dominating. I achieved it this way:
def class_morph(x):
y = [x['pE'],x['pS'],x['pS0']]
y.sort(reverse=True)
if (y[0] == y[1]):
return 'U'
elif (x['pE'] == y[0]):
return 'E'
elif (x['pS'] == y[0]):
return 'S'
elif (x['pS0'] == y[0]):
return 'S0'
df['Morph'] = df.apply(class_morph, axis=1)
Which gives the correct result:
But my initial try was the following:
def class_morph(x):
if (x['pE'] > np.max(x['pS'],x['pS0'])):
return 'E'
elif (x['pS'] > np.max(x['pE'],x['pS0'])):
return 'S'
elif (x['pS0'] > np.max(x['pS'],x['pE'])):
return 'S0'
else:
return 'U'
Which returned something wrong:
Could somebody explain to me what is my mistake in my first try?

Categories

Resources