If condition true for loop other rows

If condition true for loop other rows - python

I should check if status is 'Yes' in column 'Bad_loan', if true, check other names in 'client_name', if client has another loans set value 'Yes' for all his loans
def bad_loan(df):
for row in df:
status = row['bad_loan']
name = row['name_client']
if status == 'Yes':
for n in df:
name_client = df['name_client']
if name == name_client:
df['bad_loan'] = 'Yes'
else:
df['bad_loan'] = 'No'
bad_loan(df)
it returns TypeError: string indices must be integers

for row in df:
You can't iterate this way with pandas
Try iloc like this
def bad_loan(df):
for i in range(len(df)):
row = df.iloc[i]
status = row['bad_loan']
name = row['name_client']
if status == 'Yes':
for n in df:
name_client = df['name_client']
if name == name_client:
df['bad_loan'] = 'Yes'
else:
df['bad_loan'] = 'No'
bad_loan(df)

import numpy as np
import pandas as pd
#make a list of all the names with bad loans:
names = df[df["bad_loan"]=="Yes"]["name_client"]
#set bad loan to yes if name in list
df["bad_loan"] = np.where(df["name_client"].isin(names),"Yes","No")

Related

Python: Is there a pandas function to detect persons Online login status and assign identification numbers to them based on the login status?

I have this pandas dataframe, I am trying to assign status identification numbers at Status column to each user depending whether the LAST_ACCESSED column for the user (AD_USER_NAME) is [Offline, = 0, Available/Online = 1, Away = 2]
[Table before][1]
So that the resulting table will be like this
[Table After][2]
[1]: https://i.stack.imgur.com/3I5BA.png
[2]: https://i.stack.imgur.com/bXuTJ.png
I tried to use this code for the 'If function' loop but it didn't work
def flag(df1):
if (df1['LAST_ACCESSED'] = df1['None'] and df1['LAST_ACCESSED'] = 0]):
return 0
elif (df1['LAST_ACCESSED'] > 0 and (df1['LAST_ACCESSED'] = df1['Run_date_Time']):
return 1
elif (df1['LAST_ACCESSED'] > 5 and df1['LAST_ACCESSED'] < df1['Run_date_Time']):
return 2
df1['Status'] = df1.apply(flag, axis = 1)

Make a boolean array for each condition then use them when assigning values. Use & instead of and. Use == for comparison, = is used for assignments.
zero_mask = (df1['LAST_ACCESSED'] == 0) & (df1['LAST_ACCESSED'] == df1['None'])
one_mask = (df1['LAST_ACCESSED'] > 0) & (df1['LAST_ACCESSED'] == df1['Run_date_Time'])
two_mask = (df1['LAST_ACCESSED'] > 5) & (df1['LAST_ACCESSED'] < df1['Run_date_Time'])
df1.loc[zero_mask,'Status'] = 0
df1.loc[one_mask,'Status'] = 1
df1.loc[two_mask,'Status'] = 2

why df.loc[0][0] giving first second and not the first in python pandas

i am working on this project after a while and i am confused because i dont know why df.loc[0][0] is giving second row and not the first.
below is my code:
from datetime import datetime
import pandas as pd
#import numpy as np
import os
import time
def get_month():
c_m = time.strftime("%b")
return c_m
def get_full_month():
c_m = time.strftime("%B")
return c_m
def get_year():
c_y = time.strftime("%Y")
return c_y
def get_day():
day = datetime.now().strftime('%A')
return day
def get_date():
currentdate = datetime.now().day
return currentdate
def update_day(df):
df.loc[0][get_date()] = get_day()
def update_date(df):
df.loc[1][get_date()] = get_date()
def update_marks(df):
df[str(get_date())].replace({"0": "3", "0": "3"}, inplace=True)
def create_file(df):
df.to_csv(f"{get_month()}-{get_year()}.csv", index=False)
def update_last(df):
thirty_day = ["April","June","September","November"]
#print(get_month())
if get_full_month() in thirty_day:
print("update_last")
if "31" in df.columns:
df.pop("31")
def update_sunday(df):
if get_day() == "saturday":
df[str(int(get_date())+1)].replace({"0": "3", "0": "3"}, inplace=True)
df.loc[0][get_date+1()] = "Sunday"
def update_late(df):
late_teacher_numbers = input("how many teachers are late(TYPE IN NUMBERS) : ")
if late_teacher_numbers.isnumeric:
for f in range(int(late_teacher_numbers)):
all_teacher = df.iloc[2:,0].tolist()
#print(all_teacher)
print("TEACHERS CODE LIST:\n ")
teachers_code_list = [print(f"CODE NO {i} = TEACHER NAME {v}") for i,v in enumerate(all_teacher)]
user = input("enter teacher code : ")
if user.isnumeric():
opt = input("decide between \"LATE\" \n \"V.LATE\" \n \"ABSENT\" \nenter option : ")
opt = opt.upper()
if opt == "LATE":
opt = 2
df.at[int(user)+2, str(get_date())] = opt
elif opt == "V.LATE":
opt = 1
df.at[int(user)+2, str(get_date())] = opt
elif opt == "ABSENT":
opt = 0
df.at[int(user)+2, str(get_date())] = opt
else:
print("PLEASE TYPE CODE FROM TEACHERS CODE LIST\n THANKYOU")
print('PLEASE TRY AGAIN')
main()
else:
print("PLEASE USE NUMERIC VALUES")
main()
def update_csv(df):
df.to_csv(f"{get_month()}-{get_year()}.csv", index=False)
def check_file(file):
if os.path.isfile(file):
return True
else:
return False
def main():
file_name = f"{get_month()}-{get_year()}.csv"
if check_file(file_name) == False:
df = pd.read_csv("reg format.csv")
create_file(df)
df = pd.read_csv(file_name)
update_date(df)
update_day(df)
update_last(df)
update_marks(df)
update_sunday(df)
late = input("If any teacher is late \npress \"Y\" else
press\"N\" : ")
if late.upper() == "Y":
update_late(df)
update_csv(df)
print("ATTENDACE SHEET OF THIS MONTH : ")
print(df)
print("TODAYS ATTENDANCE SHEET")
print(df[str(get_date())])
else:
print("ATTENDACE SHEET OF THIS MONTH : ")
print(df)
print("TODAYS ATTENDANCE SHEET")
print(df[str(get_date())])
else:
df = pd.read_csv(file_name)
update_last(df)
update_date(df)
update_day(df)
update_marks(df)
#update_sunday(df)
late = input("If any teacher is late \npress \"Y\" else
press\"N\" : ")
if late.upper() == "Y":
update_late(df)
update_csv(df)
print("ATTENDACE SHEET OF THIS MONTH : ")
print(df)
print("TODAYS ATTENDANCE SHEET")
print(df[str(get_date())])
else:
print("ATTENDACE SHEET OF THIS MONTH : ")
print(df)
print("TODAYS ATTENDANCE SHEET")
print(df[str(get_date())])
main()
reg format file
for suppose
def update_day(df):
df.loc[0][get_date()] = get_day()
this updates the second row but not the first
I want to update the first row

You might want to check your column/row names as that might interfere with the results returned by loc. Remember that loc is a label-based indexer, while iloc is position-based.
Therefore, for the first cell in the first row, you can use iloc, as follows:
df.iloc[0, 0]
To get the entire first row, you can use either:
df.iloc[0, :]
Or more succintly:
df.iloc[0]

More efficient way than iterrows

I have a DataFrame df with columns action and pointerID. In the code snippet below I'm iterating through every row which is pretty inefficient because the DataFrame is pretty large. Is there a more efficient way to do this?
annotated = []
curr_pointers = []
count = 1
for index, row in df.iterrows():
action = row["action"]
id = row["pointerID"]
if action == "ACTION_MOVE":
annotated.append(curr_pointers[id])
elif (action == "ACTION_POINTER_DOWN") or (action == "ACTION_DOWN"):
if row["actionIndex"] != id:
continue
if id >= len(curr_pointers):
curr_pointers.append(count)
else:
curr_pointers[id] = count
annotated.append(count)
count = count + 1
elif (action == "ACTION_POINTER_UP") or (action == "ACTION_UP") or (action == "ACTION_CANCEL"):
if row["actionIndex"] != id:
continue
annotated.append(curr_pointers[id])
else:
print("{} unknown".format(action))

Apply result to dataset after df.iterrows

df = pd.read_csv('./test22.csv')
df.head(5)
df = df.replace(np.nan, None)
for index,col in df.iterrows():
# Extract only if date1 happened earlier than date2
load = 'No'
if col['date1'] == None or col['date2'] == None:
load = 'yes'
elif int(str(col['date1'])[:4]) >= int(str(col['date2'])[:4]) and \
(len(str(col['date1'])) == 4 or len(str(col['date2'])) == 4):
load = 'yes'
elif int(str(col['date1'])[:6]) >= int(str(col['date2'])[:6]) and \
(len(str(col['date1'])) == 6 or len(str(col['date2'])) == 6):
load = 'yes'
elif int(str(col['date1'])[:8]) >= int(str(col['date2'])[:8]):
load = 'yes'
df.head(5)
After preprocessing using iterrows in dataset, If you look at the above code (attached code), it will not be reflected in the actual dataset. I want to reflect the result in actual dataset.
How can I apply it to the actual dataset?

Replace your for loop with a function that returns a boolean, then you can use df.apply to apply it to all rows, and then filter your dataframe by that value:
def should_load(x):
if x['date1'] == None or x['date2'] == None:
return True
elif int(str(x['date1'])[:4]) >= int(str(x['date2'])[:4]) and \
(len(str(x['date1'])) == 4 or len(str(x['date2'])) == 4):
return True
elif int(str(x['date1'])[:6]) >= int(str(x['date2'])[:6]) and \
(len(str(x['date1'])) == 6 or len(str(x['date2'])) == 6):
return True
elif int(str(x['date1'])[:8]) >= int(str(x['date2'])[:8]):
return True
return False
df[df.apply(should_load, axis=1)].head(5)

Confused by using too many for loops, if and else statements

I am developing a plugin for the GIS software, QGIS. I created a QTableWidget and wish to extract values from it:
Problem is, I use a lot of for loops and if else statements which, up until the last few lines, seems to work fine. I can't seem to follow the logic now as the line print constraint_name only prints off the last value "Example_2". I could take it out of its corresponding else statement and then it will print all values correctly but I need to set it inside a condition:
qTable = self.dockwidget.tableWidget # QTableWidget
example_group = root.findGroup('Main group') # Group containing sub groups
all_items = []
gis_map = QgsMapLayerRegistry.instance().mapLayersByName( "Map" )[0] # Layer map in QGIS
idx = gis_map.fieldNameIndex("Rank") # Get "Rank" attribute field from gis_map
for row in range(qTable.rowCount()):
for col in [0]: # For first column "Constraint name"
constraint_item = qTable.item(row, col)
constraint_name = str(constraint_item.text())
for col in [1]: # For second column "Rank"
item = qTable.item(row, col)
item_string = str(item.text())
all_items.append(item_string)
for group in example_group.children(): # Search for specific group
if group.name() == "Sub group":
if len(set(all_items)) == 1: # If all items are the same
# If "Rank" field exists in layer map
if idx == -1:
print 'success'
else:
print 'fail'
else:
if idx == -1:
print constraint_name
else:
print 'fail'
Is there a way to tidy this up and still get the correct results?

My sincere thanks to the commenters who directed me to a much more efficient solution, here is the working code which works (I'm sure it can be refined further):
qTable = self.dockwidget.tableWidget
example_group = root.findGroup('Main group')
all_items = []
gis_map = QgsMapLayerRegistry.instance().mapLayersByName( "Map" )[0]
idx = gis_map.fieldNameIndex("Rank")
for row in range(qTable.rowCount()):
constraint_item = qTable.item(row, 0)
constraint_name = str(constraint_item.text())
item = qTable.item(row, 1)
item_string = str(item.text())
all_items.append(item_string)
for group in example_group.children():
if group.name() == "Sub group":
if idx == -1:
if len(set(all_items)) == 1:
print 'success'
else:
print 'fail'
else:
print constraint_name

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

If condition true for loop other rows - python

import numpy as np import pandas as pd #make a list of all the names with bad loans: names = df[df["bad_loan"]=="Yes"]["name_client"] #set bad loan to yes if name in list df["bad_loan"] = np.where(df["name_client"].isin(names),"Yes","No")

Related

Python: Is there a pandas function to detect persons Online login status and assign identification numbers to them based on the login status?

why df.loc[0][0] giving first second and not the first in python pandas

More efficient way than iterrows

Apply result to dataset after df.iterrows

Confused by using too many for loops, if and else statements

Categories

Resources