Get excel data in order using python - python

I'm using python to get a list functions from the excel in order.
Excel image
I want to get result like that
Login
Submit
forgot password
Delete
(Submitted)
Next>confirm
save
cancel
upload
delete
...
I struggling with the excel indexes.
I think my logic is totally wrong.
import pandas as pd
df = pd.read_excel(fileNameMatrix,sheet_name="doTestCase")
lastRowNumber = len(df)
newDict = df.to_dict()
newJson = df.to_json()
document = docx.Document()
# print(newDict)
datalist = []
def sorting():
controller = True
dataexist = ""
j = 0
listindex = 0
while controller:
try:
if j == 0:
menulist = []
func2 = newDict['Menu']
for i in range(len(func2)):
if type(func2[i]) == str:
menulist.append()
listindex = listindex + 1
dataexist = "1"
datalist.append(menulist)
elif j > 0:
func2 = newDict['Unnamed: '+str(j)]
unnamelist = []
for i in range(len(func2)):
if type(func2[i]) == str:
dataexist = "1"
unnamelist.append()
datalist.append(unnamelist)
# unnamelist = []
if dataexist == "1":
j = j+1
except:
return datalist
controller = False
sorting()
Result
[['Login', 'delete ', 'userinfo'], ['Submit', '(submitted)',
'test'], ['forgot password', 'Next>confirm', 'Save'], ['save',
'delete'], ['cancel'], ['upload']]
Any logic suggesting would be appreciated.

IIUC, use bfill and dropna:
df = (pd.read_excel(fileNameMatrix,sheet_name='doTestCase', header=None)
.bfill(axis=1)[0].dropna())
print(df)
# Output
0 Login
1 Submit
2 forgot password
5 delete
6 (submitted)
8 Next>confirm
9 save
10 cancel
11 upload
12 delete
13 Save
15 test
18 userinfo
Name: 0, dtype: object

Related

While and Append in pandas python

I am trying to Call Api in a while loop and append the dataframe. But it is not appending .
#Max timestamp
MaxTs = 1635876000
api_key = "api_key"
cnt = 0
while cnt < 4:
url = f"https://min-api.cryptocompare.com/data/v2/histohour?fsym=BTC&tsym=USD&limit=2000&toTs={MaxTs}&api_key={api_key}"
r = requests.get(url)
data = r.json()
price_df = pd.DataFrame(data['Data']['Data'])
i = 0
reccnt = 2000
while i < reccnt:
currTs = price_df.iloc[i]['time']
if currTs < MaxTs:
MaxTs = currTs
i = i + 1
if cnt == 0:
#Copying the Orginal df to new df.
newdf = price_df.copy()
else:
#when counter increases append the df.
newdf.append(price_df)
print(MaxTs)
cnt = cnt + 1
You should increase cnt inside the while loop, not outside.
But after you perform a correction you will get several copies of the same price_df. Is that what you are trying to get?

Create an organized DF from a List of mixed type items (Python)

I have a list of items in a 'variable:value' format, but the same 'variable' can appear multiple times. The only thing I know is that all values that follow the 'ID' category belong to the same 'ID', so I know how many rows I need (3 in this example).
I need to create a dataframe from this list. The problem I am encountering is that I cannot add a string value to my DF ('could not convert str to float'). I am not sure how to proceed.
mylist = ['ID:1', 'Date: Oct 2', 'B:88', 'C:noun', 'D:44', 'ID:2', 'B:55', 'C:noun', 'D:45', 'ID:3',
'Date:Sept 5', 'B:55', 'C:verb']
categories = []
for i in mylist:
var = i.split(":")
categories.append(var[0])
variables = list(set(categories))
df = np.empty((3,len(variables)))
df = pd.DataFrame(df)
counter = -1
for i in mylist:
item = i.split(":")
category = item[0]
value = item[1]
tracker = -1
for j in variables:
tracker = tracker + 1
if j == category:
float(value)
df[counter, tracker] = value
if category == "ID":
counter = counter + 1
float(value)
df[counter, 0] = value
In addition, I've tried converting the items in the list to dictionary, but I am not sure if that's the best way to achieve my goal:
df = np.empty((3,len(variables)))
df = pd.DataFrame(df, columns = variables)
mydict = {}
counter = -1
for i in mylist:
item = i.split(":")
category = item[0]
value = item[1]
mydict = {category:value}
if category == "ID":
counter = counter + 1
df[counter] = pd.DataFrame.from_dict(mydict)
else:
df[counter] = pd.DataFrame.from_dict(mydict)
Edit:
I solved it. Code below:
df = np.empty((0,len(variables)))
df = pd.DataFrame(df, columns = variables)
mydict = {}
counter = 0
for i in mylist:
item = i.split(":")
category = item[0]
value = item[1]
mynewdef = {category:value}
counter = counter + 1
if counter == len(mylist):
df = df.append(mydict, ignore_index = True)
df = df.iloc[1:]
elif category == 'ID':
df = df.append(mydict, ignore_index = True)
mydict = {}
mydict.update(mynewdef)
else:
mydict.update(mynewdef)
Perhaps this works
df = pd.DataFrame([e.split(':') for e in my_list],
columns=['key', 'value'])
df = df.pivot(columns='key', values='value') #not tested

Export data efficiently to CSV using python

I am using my arduino to analyze analog inputs and I am accessing the arduino using the pyfirmata library and I ambasically measuring voltages using the 6 analog inputs on my arduino Uno. I need to find a way to live time feed this data into a CSV efficiently... I am not sure on the best way to do that
Any suggestion would help but please write out the code you suggest. I would prefer to use Pandas if possible because it's easier
voltage0 through voltage5 are my variables and I am trying to report those in a nice format that will later have to be analyzed
import time
from datetime import datetime
import pyfirmata
import pandas as pd
board = pyfirmata.Arduino('/dev/ttyACM1')
analog_pin0 = board.get_pin('a:0:i')
analog_pin1 = board.get_pin('a:1:i')
analog_pin2 = board.get_pin('a:2:i')
analog_pin3 = board.get_pin('a:3:i')
analog_pin4 = board.get_pin('a:4:i')
analog_pin5 = board.get_pin('a:5:i')
it = pyfirmata.util.Iterator(board)
it.start()
analog_pin0.enable_reporting()
analog_pin1.enable_reporting()
analog_pin2.enable_reporting()
analog_pin3.enable_reporting()
analog_pin4.enable_reporting()
analog_pin5.enable_reporting()
data = []
count = 0
x = 0
start = 0
while x <= 1000:
reading0 = analog_pin0.read()
if reading0 != None:
voltage0 = reading0 * 5
voltage0 = round(voltage0,2)
else:
voltage0 = float('nan')
reading1 = analog_pin1.read()
if reading1 != None:
voltage1 = reading1 * 5
voltage1 = round(voltage1,2)
else:
voltage1 = float('nan')
reading2 = analog_pin2.read()
if reading2 != None:
voltage2 = reading2 * 5
voltage2 = round(voltage2,2)
else:
voltage2 = float('nan')
reading3 = analog_pin3.read()
if reading3 != None:
voltage3 = reading3 * 5
voltage3 = round(voltage3,2)
else:
voltage3 = float('nan')
reading4 = analog_pin4.read()
if reading4 != None:
voltage4 = reading4 * 5
voltage4 = round(voltage4,2)
else:
voltage4 = float('nan')
reading5 = analog_pin5.read()
if reading5 != None:
voltage5 = reading5 * 5
voltage5 = round(voltage5,2)
else:
voltage5 = float('nan')
datarow = {'Voltage0': voltage0, 'Voltage1': voltage1, 'Voltage2' : voltage2, 'Voltage3': voltage3, 'Voltage4' : voltage4, 'Voltage5' : voltage5, 'Time' : time.strftime("%Y-%m-%d_%H:%M:%S")}
data.append(datarow)
if count%500 == 0:
dataframe = pd.DataFrame(data)
dataframe.to_csv('data.csv')
x += 1
count += 1
#time.sleep(1)enter code here
Your code seems to work, but it's not very efficient. Every 500 iterations, you rewrite all your data instead of updating your file with the new data in the end. You might consider saving it this way instead:
if count%500 == 0:
dataframe = pd.DataFrame(data)
dataframe.to_csv('data.csv',mode='a',header=False)
data = []
If it's still not fast enough, you might consider saving your data to a binary format such as .npy (numpy format), and convert it later to csv.

Single list.count instead of multiple

Im parsed list of crew witch one looks like:
20;mechanic;0;68
21;cook;0;43
22;scientist;0;79
23;manager;1;65
24;mechanic;1;41
etc
And now I'm trying to figure out how to count number of workers who have 60 or more stamina( the last element in each employee )
There is my code:
with open('employee.txt', 'r') as employee_list:
count = 0
for employee in employee_list.readlines():
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
count += 1
print(count)
Print from terminal:
1
2
3
...
90
And there is the right answer I think, but is there anyway to get only one 'total' count, not a 90ty strings ?
Just print one line after the loop is done.
with open('employee.txt', 'r') as employee_list:
count = 0
for employee in employee_list.readlines():
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
count += 1
print(count)
But I would also recommend using pandas for data manipulation. For example:
df = pd.read_csv('employee.txt', sep=';')
df.columns = ['col1', 'col2', 'col3', 'stamina']
Then just filter and get the size:
df[df.stamina >= 60].size
So after a day of thinking I wrote this and get right answer ( maybe someone will find this helpful):
def total_resist_count():
# with open('employee.txt', 'r') as employee_list:
employee_list = [input() for i in range(120)]
candidates = []
for employee in employee_list:
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
candidates.append(employee_data)
return candidates
required_professionals = {
'computers specialist': 5,
'cook': 3,
'doctor': 5,
'electrical engineer': 4,
'manager': 1,
'mechanic': 8,
'scientist': 14
}
expedition_total = 40
female_min = 21
male_min = 12
def validate_solution(cur_team, num_females, num_males):
global expedition_total, female_min, male_min
if sum(cur_team) != expedition_total or num_females < female_min or num_males < male_min:
return False
num_of_free_vacancies = 0
for k in required_professionals:
num_of_free_vacancies += required_professionals[k]
if num_of_free_vacancies > 0:
return False
return True
TEAM = None
def backtrack(candidates, cur_team, num_females, num_males):
global required_professionals, expedition_total, TEAM
if sum(cur_team) > expedition_total or TEAM is not None:
return
if validate_solution(cur_team, num_females, num_males):
team = []
for i, used in enumerate(cur_team):
if used == 1:
team.append(candidates[i])
TEAM = team
return
for i in range(len(candidates)):
if cur_team[i] == 0 and required_professionals[candidates[i][1]] > 0:
cur_team[i] = 1
required_professionals[candidates[i][1]] -= 1
if candidates[i][2] == '1':
backtrack(candidates, cur_team, num_females, num_males + 1)
else:
backtrack(candidates, cur_team, num_females + 1, num_males)
required_professionals[candidates[i][1]] += 1
cur_team[i] = 0
if __name__ == '__main__':
ec = decode_fcc_message()
candidates = total_resist_count(ec)
cur_team = [0] * len(candidates)
backtrack(candidates, cur_team, 0, 0)
s = ""
for t in TEAM:
s += str(t[0]) + ';'
print(s)

Why is my code looping at just one line in the while loop instead over the whole block?

Sorry for the unsophisticated question title but I need help desperately:
My objective at work is to create a script that pulls all the records from exacttarget salesforce marketing cloud API. I have successfully setup the API calls, and successfully imported the data into DataFrames.
The problem I am running into is two-fold that I need to keep pulling records till "Results_Message" in my code stops reading "MoreDataAvailable" and I need to setup logic which allows me to control the date from either within the API call or from parsing the DataFrame.
My code is getting stuck at line 44 where "print Results_Message" is looping around the string "MoreDataAvailable"
Here is my code so far, on lines 94 and 95 you will see my attempt at parsing the date directly from the dataframe but no luck and no luck on line 32 where I have specified the date:
import ET_Client
import pandas as pd
AggreateDF = pd.DataFrame()
Data_Aggregator = pd.DataFrame()
#Start_Date = "2016-02-20"
#End_Date = "2016-02-25"
#retrieveDate = '2016-07-25T13:00:00.000'
Export_Dir = 'C:/temp/'
try:
debug = False
stubObj = ET_Client.ET_Client(False, debug)
print '>>>BounceEvents'
getBounceEvent = ET_Client.ET_BounceEvent()
getBounceEvent.auth_stub = stubObj
getBounceEvent.search_filter = {'Property' : 'EventDate','SimpleOperator' : 'greaterThan','Value' : '2016-02-22T13:00:00.000'}
getResponse1 = getBounceEvent.get()
ResponseResultsBounces = getResponse1.results
Results_Message = getResponse1.message
print(Results_Message)
#EventDate = "2016-05-09"
print "This is orginial " + str(Results_Message)
#print ResponseResultsBounces
i = 1
while (Results_Message == 'MoreDataAvailable'):
#if i > 5: break
print Results_Message
results1 = getResponse1.results
#print(results1)
i = i + 1
ClientIDBounces = []
partner_keys1 = []
created_dates1 = []
modified_date1 = []
ID1 = []
ObjectID1 = []
SendID1 = []
SubscriberKey1 = []
EventDate1 = []
EventType1 = []
TriggeredSendDefinitionObjectID1 = []
BatchID1 = []
SMTPCode = []
BounceCategory = []
SMTPReason = []
BounceType = []
for BounceEvent in ResponseResultsBounces:
ClientIDBounces.append(str(BounceEvent['Client']['ID']))
partner_keys1.append(BounceEvent['PartnerKey'])
created_dates1.append(BounceEvent['CreatedDate'])
modified_date1.append(BounceEvent['ModifiedDate'])
ID1.append(BounceEvent['ID'])
ObjectID1.append(BounceEvent['ObjectID'])
SendID1.append(BounceEvent['SendID'])
SubscriberKey1.append(BounceEvent['SubscriberKey'])
EventDate1.append(BounceEvent['EventDate'])
EventType1.append(BounceEvent['EventType'])
TriggeredSendDefinitionObjectID1.append(BounceEvent['TriggeredSendDefinitionObjectID'])
BatchID1.append(BounceEvent['BatchID'])
SMTPCode.append(BounceEvent['SMTPCode'])
BounceCategory.append(BounceEvent['BounceCategory'])
SMTPReason.append(BounceEvent['SMTPReason'])
BounceType.append(BounceEvent['BounceType'])
df1 = pd.DataFrame({'ClientID': ClientIDBounces, 'PartnerKey': partner_keys1,
'CreatedDate' : created_dates1, 'ModifiedDate': modified_date1,
'ID':ID1, 'ObjectID': ObjectID1,'SendID':SendID1,'SubscriberKey':SubscriberKey1,
'EventDate':EventDate1,'EventType':EventType1,'TriggeredSendDefinitionObjectID':TriggeredSendDefinitionObjectID1,
'BatchID':BatchID1,'SMTPCode':SMTPCode,'BounceCategory':BounceCategory,'SMTPReason':SMTPReason,'BounceType':BounceType})
#print df1
#df1 = df1[(df1.EventDate > "2016-02-20") & (df1.EventDate < "2016-02-25")]
#AggreateDF = AggreateDF[(AggreateDF.EventDate > Start_Date) and (AggreateDF.EventDate < End_Date)]
print(df1['ID'].max())
AggreateDF = AggreateDF.append(df1)
print(AggreateDF.shape)
#df1 = df1[(df1.EventDate > "2016-02-20") and (df1.EventDate < "2016-03-25")]
#AggreateDF = AggreateDF[(AggreateDF.EventDate > Start_Date) and (AggreateDF.EventDate < End_Date)]
print("Final Aggregate DF is: " + str(AggreateDF.shape))
#EXPORT TO CSV
AggreateDF.to_csv(Export_Dir +'DataTest1.csv')
#with pd.option_context('display.max_rows',10000):
#print (df_masked1.shape)
#print df_masked1
except Exception as e:
print 'Caught exception: ' + str(e.message)
print e
Before my code parses the data, the orginal format I get of the data is a SOAP response, this is what it look like(below). Is it possible to directly parse records based on EventDate from the SOAP response?
}, (BounceEvent){
Client =
(ClientID){
ID = 1111111
}
PartnerKey = None
CreatedDate = 2016-05-12 07:32:20.000937
ModifiedDate = 2016-05-12 07:32:20.000937
ID = 1111111
ObjectID = "1111111"
SendID = 1111111
SubscriberKey = "aaa#aaaa.com"
EventDate = 2016-05-12 07:32:20.000937
EventType = "HardBounce"
TriggeredSendDefinitionObjectID = "aa111aaa"
BatchID = 1111111
SMTPCode = "1111111"
BounceCategory = "Hard bounce - User Unknown"
SMTPReason = "aaaa"
BounceType = "immediate"
Hope this makes sense, this is my desperately plea for help.
Thank you in advance!
You don't seem to be updating Results_Message in your loop, so it's always going to have the value it gets in line 29: Results_Message = getResponse1.message. Unless there's code involved that you didn't share, that is.

Categories

Resources