I should check if status is 'Yes' in column 'Bad_loan', if true, check other names in 'client_name', if client has another loans set value 'Yes' for all his loans
def bad_loan(df):
for row in df:
status = row['bad_loan']
name = row['name_client']
if status == 'Yes':
for n in df:
name_client = df['name_client']
if name == name_client:
df['bad_loan'] = 'Yes'
else:
df['bad_loan'] = 'No'
bad_loan(df)
it returns TypeError: string indices must be integers
for row in df:
You can't iterate this way with pandas
Try iloc like this
def bad_loan(df):
for i in range(len(df)):
row = df.iloc[i]
status = row['bad_loan']
name = row['name_client']
if status == 'Yes':
for n in df:
name_client = df['name_client']
if name == name_client:
df['bad_loan'] = 'Yes'
else:
df['bad_loan'] = 'No'
bad_loan(df)
import numpy as np
import pandas as pd
#make a list of all the names with bad loans:
names = df[df["bad_loan"]=="Yes"]["name_client"]
#set bad loan to yes if name in list
df["bad_loan"] = np.where(df["name_client"].isin(names),"Yes","No")
Related
I have this pandas dataframe, I am trying to assign status identification numbers at Status column to each user depending whether the LAST_ACCESSED column for the user (AD_USER_NAME) is [Offline, = 0, Available/Online = 1, Away = 2]
[Table before][1]
So that the resulting table will be like this
[Table After][2]
[1]: https://i.stack.imgur.com/3I5BA.png
[2]: https://i.stack.imgur.com/bXuTJ.png
I tried to use this code for the 'If function' loop but it didn't work
def flag(df1):
if (df1['LAST_ACCESSED'] = df1['None'] and df1['LAST_ACCESSED'] = 0]):
return 0
elif (df1['LAST_ACCESSED'] > 0 and (df1['LAST_ACCESSED'] = df1['Run_date_Time']):
return 1
elif (df1['LAST_ACCESSED'] > 5 and df1['LAST_ACCESSED'] < df1['Run_date_Time']):
return 2
df1['Status'] = df1.apply(flag, axis = 1)
Make a boolean array for each condition then use them when assigning values. Use & instead of and. Use == for comparison, = is used for assignments.
zero_mask = (df1['LAST_ACCESSED'] == 0) & (df1['LAST_ACCESSED'] == df1['None'])
one_mask = (df1['LAST_ACCESSED'] > 0) & (df1['LAST_ACCESSED'] == df1['Run_date_Time'])
two_mask = (df1['LAST_ACCESSED'] > 5) & (df1['LAST_ACCESSED'] < df1['Run_date_Time'])
df1.loc[zero_mask,'Status'] = 0
df1.loc[one_mask,'Status'] = 1
df1.loc[two_mask,'Status'] = 2
i am working on this project after a while and i am confused because i dont know why df.loc[0][0] is giving second row and not the first.
below is my code:
from datetime import datetime
import pandas as pd
#import numpy as np
import os
import time
def get_month():
c_m = time.strftime("%b")
return c_m
def get_full_month():
c_m = time.strftime("%B")
return c_m
def get_year():
c_y = time.strftime("%Y")
return c_y
def get_day():
day = datetime.now().strftime('%A')
return day
def get_date():
currentdate = datetime.now().day
return currentdate
def update_day(df):
df.loc[0][get_date()] = get_day()
def update_date(df):
df.loc[1][get_date()] = get_date()
def update_marks(df):
df[str(get_date())].replace({"0": "3", "0": "3"}, inplace=True)
def create_file(df):
df.to_csv(f"{get_month()}-{get_year()}.csv", index=False)
def update_last(df):
thirty_day = ["April","June","September","November"]
#print(get_month())
if get_full_month() in thirty_day:
print("update_last")
if "31" in df.columns:
df.pop("31")
def update_sunday(df):
if get_day() == "saturday":
df[str(int(get_date())+1)].replace({"0": "3", "0": "3"}, inplace=True)
df.loc[0][get_date+1()] = "Sunday"
def update_late(df):
late_teacher_numbers = input("how many teachers are late(TYPE IN NUMBERS) : ")
if late_teacher_numbers.isnumeric:
for f in range(int(late_teacher_numbers)):
all_teacher = df.iloc[2:,0].tolist()
#print(all_teacher)
print("TEACHERS CODE LIST:\n ")
teachers_code_list = [print(f"CODE NO {i} = TEACHER NAME {v}") for i,v in enumerate(all_teacher)]
user = input("enter teacher code : ")
if user.isnumeric():
opt = input("decide between \"LATE\" \n \"V.LATE\" \n \"ABSENT\" \nenter option : ")
opt = opt.upper()
if opt == "LATE":
opt = 2
df.at[int(user)+2, str(get_date())] = opt
elif opt == "V.LATE":
opt = 1
df.at[int(user)+2, str(get_date())] = opt
elif opt == "ABSENT":
opt = 0
df.at[int(user)+2, str(get_date())] = opt
else:
print("PLEASE TYPE CODE FROM TEACHERS CODE LIST\n THANKYOU")
print('PLEASE TRY AGAIN')
main()
else:
print("PLEASE USE NUMERIC VALUES")
main()
def update_csv(df):
df.to_csv(f"{get_month()}-{get_year()}.csv", index=False)
def check_file(file):
if os.path.isfile(file):
return True
else:
return False
def main():
file_name = f"{get_month()}-{get_year()}.csv"
if check_file(file_name) == False:
df = pd.read_csv("reg format.csv")
create_file(df)
df = pd.read_csv(file_name)
update_date(df)
update_day(df)
update_last(df)
update_marks(df)
update_sunday(df)
late = input("If any teacher is late \npress \"Y\" else
press\"N\" : ")
if late.upper() == "Y":
update_late(df)
update_csv(df)
print("ATTENDACE SHEET OF THIS MONTH : ")
print(df)
print("TODAYS ATTENDANCE SHEET")
print(df[str(get_date())])
else:
print("ATTENDACE SHEET OF THIS MONTH : ")
print(df)
print("TODAYS ATTENDANCE SHEET")
print(df[str(get_date())])
else:
df = pd.read_csv(file_name)
update_last(df)
update_date(df)
update_day(df)
update_marks(df)
#update_sunday(df)
late = input("If any teacher is late \npress \"Y\" else
press\"N\" : ")
if late.upper() == "Y":
update_late(df)
update_csv(df)
print("ATTENDACE SHEET OF THIS MONTH : ")
print(df)
print("TODAYS ATTENDANCE SHEET")
print(df[str(get_date())])
else:
print("ATTENDACE SHEET OF THIS MONTH : ")
print(df)
print("TODAYS ATTENDANCE SHEET")
print(df[str(get_date())])
main()
reg format file
for suppose
def update_day(df):
df.loc[0][get_date()] = get_day()
this updates the second row but not the first
I want to update the first row
You might want to check your column/row names as that might interfere with the results returned by loc. Remember that loc is a label-based indexer, while iloc is position-based.
Therefore, for the first cell in the first row, you can use iloc, as follows:
df.iloc[0, 0]
To get the entire first row, you can use either:
df.iloc[0, :]
Or more succintly:
df.iloc[0]
I have a DataFrame df with columns action and pointerID. In the code snippet below I'm iterating through every row which is pretty inefficient because the DataFrame is pretty large. Is there a more efficient way to do this?
annotated = []
curr_pointers = []
count = 1
for index, row in df.iterrows():
action = row["action"]
id = row["pointerID"]
if action == "ACTION_MOVE":
annotated.append(curr_pointers[id])
elif (action == "ACTION_POINTER_DOWN") or (action == "ACTION_DOWN"):
if row["actionIndex"] != id:
continue
if id >= len(curr_pointers):
curr_pointers.append(count)
else:
curr_pointers[id] = count
annotated.append(count)
count = count + 1
elif (action == "ACTION_POINTER_UP") or (action == "ACTION_UP") or (action == "ACTION_CANCEL"):
if row["actionIndex"] != id:
continue
annotated.append(curr_pointers[id])
else:
print("{} unknown".format(action))
df = pd.read_csv('./test22.csv')
df.head(5)
df = df.replace(np.nan, None)
for index,col in df.iterrows():
# Extract only if date1 happened earlier than date2
load = 'No'
if col['date1'] == None or col['date2'] == None:
load = 'yes'
elif int(str(col['date1'])[:4]) >= int(str(col['date2'])[:4]) and \
(len(str(col['date1'])) == 4 or len(str(col['date2'])) == 4):
load = 'yes'
elif int(str(col['date1'])[:6]) >= int(str(col['date2'])[:6]) and \
(len(str(col['date1'])) == 6 or len(str(col['date2'])) == 6):
load = 'yes'
elif int(str(col['date1'])[:8]) >= int(str(col['date2'])[:8]):
load = 'yes'
df.head(5)
After preprocessing using iterrows in dataset, If you look at the above code (attached code), it will not be reflected in the actual dataset. I want to reflect the result in actual dataset.
How can I apply it to the actual dataset?
Replace your for loop with a function that returns a boolean, then you can use df.apply to apply it to all rows, and then filter your dataframe by that value:
def should_load(x):
if x['date1'] == None or x['date2'] == None:
return True
elif int(str(x['date1'])[:4]) >= int(str(x['date2'])[:4]) and \
(len(str(x['date1'])) == 4 or len(str(x['date2'])) == 4):
return True
elif int(str(x['date1'])[:6]) >= int(str(x['date2'])[:6]) and \
(len(str(x['date1'])) == 6 or len(str(x['date2'])) == 6):
return True
elif int(str(x['date1'])[:8]) >= int(str(x['date2'])[:8]):
return True
return False
df[df.apply(should_load, axis=1)].head(5)
I am developing a plugin for the GIS software, QGIS. I created a QTableWidget and wish to extract values from it:
Problem is, I use a lot of for loops and if else statements which, up until the last few lines, seems to work fine. I can't seem to follow the logic now as the line print constraint_name only prints off the last value "Example_2". I could take it out of its corresponding else statement and then it will print all values correctly but I need to set it inside a condition:
qTable = self.dockwidget.tableWidget # QTableWidget
example_group = root.findGroup('Main group') # Group containing sub groups
all_items = []
gis_map = QgsMapLayerRegistry.instance().mapLayersByName( "Map" )[0] # Layer map in QGIS
idx = gis_map.fieldNameIndex("Rank") # Get "Rank" attribute field from gis_map
for row in range(qTable.rowCount()):
for col in [0]: # For first column "Constraint name"
constraint_item = qTable.item(row, col)
constraint_name = str(constraint_item.text())
for col in [1]: # For second column "Rank"
item = qTable.item(row, col)
item_string = str(item.text())
all_items.append(item_string)
for group in example_group.children(): # Search for specific group
if group.name() == "Sub group":
if len(set(all_items)) == 1: # If all items are the same
# If "Rank" field exists in layer map
if idx == -1:
print 'success'
else:
print 'fail'
else:
if idx == -1:
print constraint_name
else:
print 'fail'
Is there a way to tidy this up and still get the correct results?
My sincere thanks to the commenters who directed me to a much more efficient solution, here is the working code which works (I'm sure it can be refined further):
qTable = self.dockwidget.tableWidget
example_group = root.findGroup('Main group')
all_items = []
gis_map = QgsMapLayerRegistry.instance().mapLayersByName( "Map" )[0]
idx = gis_map.fieldNameIndex("Rank")
for row in range(qTable.rowCount()):
constraint_item = qTable.item(row, 0)
constraint_name = str(constraint_item.text())
item = qTable.item(row, 1)
item_string = str(item.text())
all_items.append(item_string)
for group in example_group.children():
if group.name() == "Sub group":
if idx == -1:
if len(set(all_items)) == 1:
print 'success'
else:
print 'fail'
else:
print constraint_name