Duplicated data in excel using Openpyxl - python

I have created a python script that will append data in excel. However, data that are being transferred in excel is having multiple duplication. Can someone help me fix my script?
tree = ET.parse('users.xml')
root = tree.getroot()
#create excel
wb = Workbook()
ws = wb.active
ws.title = ("Active Users")
df=pd.DataFrame(columns=["Login", "User Name", "Role", "Status"])
for user in root.findall('user'):
login = user.find('login').text
for m in tls.getUserByLogin(login):
user_status = int(m.get("isActive"))
if user_status == 1:
lastname = m.get("lastName")
firstname = m.get("firstName")
userLogin = m.get("login")
activeStatus = ("Active User")
role = m.get("globalRole")
tproject = m.get("tprojectRoles")
print("Login: " + userLogin + " " + lastname + " " + firstname + " Role: " + str(role['name']) + " " + str(activeStatus))
df.loc[len(df.index)] =[userLogin, lastname, str(role['name']), str(activeStatus)]
for row in dataframe_to_rows(df, index = False):
ws.append(row)
else:
inactive = (str(m.get("firstName")) + " " + str(m.get("lastName")) +": User is not Active")
print(inactive)
wb.save(filename = 'userData.xlsx')
The output in excel is this:
Login = A1 , User Name = B1, Role = C1, Status = D1
Login User Name Role Status
admin Administrator Admin Active
Login User Name Role Status
admin Administrator Admin Active
user1 Pedro leader Active
Login User Name Role Status
admin Administrator Admin Active
user1 Pedro leader Active
user2 Juan leader Active
Also, for my else loop for inactive users, is it possible to append them in the same excel file to another sheet? Thank you all

Hi to #Redox and #taipei thank you for your quick responses and answers,
I have resolve my duplication issues in a different format :)
def getUserDetail():
tree = ET.parse('users.xml')
root = tree.getroot()
#create excel
workbook = Workbook()
ws = workbook.active
ws.title = ("Active Users")
ws.append(['Login', 'User Name', 'Role', 'Status'])
#logins = []
for user in root.findall('user'):
login = user.find('login').text
# logins.append(login)
# for index in range(10):
# login = logins[index]
for m in tls.getUserByLogin(login):
user_status = int(m.get("isActive"))
if user_status == 1:
lastname = m.get("lastName")
firstname = m.get("firstName")
userLogin = m.get("login")
activeStatus = ("Active User")
role = m.get("globalRole")
tproject = m.get("tprojectRoles")
print("Login: " + userLogin + " " + lastname + " " + firstname + " Role: " + str(role['name']) + " " + str(activeStatus))
data = [[userLogin, lastname + firstname, str(role['name']), str(activeStatus)]]
for row in data:
ws.append(row)
else:
inactive = (str(m.get("firstName")) + " " + str(m.get("lastName")) +": User is not Active")
print(inactive)
### MOVED code here - note it should be outside ALL for loops ####
workbook.save(filename = 'userData.xlsx')
getUserDetail()

The ws.append() and ws.save should be outside of the ALL for loops, including the first one. Updated code here.
tree = ET.parse('users.xml')
root = tree.getroot()
#create excel
wb = Workbook()
ws = wb.active
ws.title = ("Active Users")
df=pd.DataFrame(columns=["Login", "User Name", "Role", "Status"])
for user in root.findall('user'):
login = user.find('login').text
for m in tls.getUserByLogin(login):
user_status = int(m.get("isActive"))
if user_status == 1:
lastname = m.get("lastName")
firstname = m.get("firstName")
userLogin = m.get("login")
activeStatus = ("Active User")
role = m.get("globalRole")
tproject = m.get("tprojectRoles")
print("Login: " + userLogin + " " + lastname + " " + firstname + " Role: " + str(role['name']) + " " + str(activeStatus))
df.loc[len(df.index)] =[userLogin, lastname, str(role['name']), str(activeStatus)]
else:
inactive = (str(m.get("firstName")) + " " + str(m.get("lastName")) +": User is not Active")
print(inactive)
### MOVED code here - note it should be outside ALL for loops ####
for row in dataframe_to_rows(df, index = False):
ws.append(row)
wb.save(filename = 'userData.xlsx')

Are you sure that users.xml only contains a unique user?
If you're not sure, I think it's better to check existing user logic.
to achieve that you can use a dictionary or array to temporary store your user in a loop and check if the current user was exists
. . .
user_tmp = []
for user in root.findall('user'):
login = user.find('login').text
# Check if login is in the list
if login not in user_tmp:
user_tmp.append(login)
else:
# if login is in the list, continue the loop
continue
. . .
since you are using the Pandas data frame, you can generate multiple sheets when saving the data frame with toExcel
# Example, you generate an active user in df_active and inactive user in # create a excel writer object
with pd.ExcelWriter("path to file\filename.xlsx") as writer:
# use to_excel function and specify the sheet_name and index
# to store the dataframe in specified sheet
df_active.to_excel(writer, sheet_name="Active", index=False)
df_inactive.to_excel(writer, sheet_name="Inactive", index=False)
I hope you can get hints to solve your issues from my suggestions.

Related

List index out of range when writing to a document with selenium

I am trying to write uni names, department names and ratings to a file from https://www.whatuni.com/university-course-reviews/?pageno=14. It goes well until I reach a post without a department name it gives me the error
file.write(user_name[k].text + ";" + uni_names[k].text + ";" + department[k].text + ";" + date_posted[k].text +
IndexError: list index out of range
Here is the code I use. I believe I need to somehow write null or use space when the department doesn't exist. I use if not and else but it didn't work for me. I would appreciate any help. Thank you
for i in range(20):
try:
driver.refresh()
uni_names = driver.find_elements_by_xpath('//div[#class="rlst_wrap"]/h2/a')
department_names = driver.find_elements_by_xpath('//div[#class="rlst_wrap"]/h3/a')
user_name = driver.find_elements_by_xpath('//div[#class="rev_name"]')
date_posted = driver.find_elements_by_xpath('//div[#class="rev_dte"]')
uni_rev = driver.find_elements_by_xpath('(//div[#class="reviw_rating"]/div[#class="rate_new"]/p)')
uni_rating = driver.find_elements_by_xpath('(//div[#class="reviw_rating"]/div[#class="rate_new"]/span[starts-with(#class,"ml5")])')
job_prospects = driver.find_elements_by_xpath('//span[text()="Job Prospects"]/following-sibling::span')
course_and_lecturers = driver.find_elements_by_xpath('//span[text()="Course and Lecturers"]/following-sibling::span')
if not course_and_lecturers:
lecturers= "None"
else:
lecturers = course_and_lecturers
uni_facilities = driver.find_elements_by_xpath('//span[text()= "Facilities" or "Uni Facilities"]/following-sibling::span')
if not uni_facilities:
facilities = "None"
else:
facilities = uni_facilities
student_support = driver.find_elements_by_xpath('//span[text()="Student Support"]/following-sibling::span')
if not student_support:
support = "None"
else:
support = student_support
with open('uni_scraping.csv', 'a') as file:
for k in range(len(uni_names)):
if not department_names:
department = "None"
else:
department = department_names
file.write(user_name[k].text + ";" + uni_names[k].text + ";" + department[k].text + ";" + date_posted[k].text +
";" + uni_rating[k].get_attribute("class") + ";" + job_prospects[k].get_attribute("class") +
";" + lecturers[k].get_attribute("class") + ";" + facilities[k].get_attribute("class") +
";" + support[k].get_attribute("class") + ";" + uni_rev[k].text + "\n")
next_page = driver.find_element_by_class_name('mr0')
next_page.click()
file.close()
except exceptions.StaleElementReferenceException as e:
print('e')
pass
driver.close()
Thank you Vimizen for the answer. I did what you suggested and it worked for me. I wrote something like this.
driver = webdriver.Chrome()
driver.get("https://www.whatuni.com/university-course-reviews/?pageno=14")
posts = []
driver.refresh()
post_elements = driver.find_elements_by_xpath('//div[#class="rlst_row"]')
for post_element_index in range(len(post_elements)):
post_element = post_elements[post_element_index]
uni_name = post_element.find_element_by_tag_name('h2')
try:
department_name = post_element.find_element_by_tag_name('h3')
department = department_name
department = department.text
except NoSuchElementException:
department = "aaaaaaaa"
user_name = post_element.find_element_by_class_name('rev_name')
postdict = {
"uni_name": uni_name.text,
"department": department,
"user_name": user_name.text
}
posts.append(postdict)
print(posts)
driver.close()
Best
You had a good feeling when you tried if not department_names but it only works if the list is empty. In your case, the issue is that the list is too short.
Due to the universitie whithout departments, department_names will be a shorter list than uni_names.
As a result, in you loop for k in range(len(uni_names)): the department[k].text will not always be the department of the uni with the same index, and at some point k will have a greater value than your department list. That's why department[k] will cause an error.
I don't know what is most efficient way to go around this but I think that you could get larger elements with the full details of every uni (the whole rlst_wrap for example), then search in it the details for the uni (with regexp for example). That way you would know when there is no department, and avoid the issue.

GCP Security Command Center API - how to get source_properties

When you're on the Google Console, Security Command Center, Findings, you can click on an item to view the details. There is a section that lists "Attributes" and "Source Properties". I would like to get some of these values. The code below is taken from this page (https://cloud.google.com/security-command-center/docs/how-to-api-list-findings) and modified to get what I need:
from google.cloud import securitycenter
client = securitycenter.SecurityCenterClient()
organization_id = "<my organization id>"
org_name = "organizations/{org_id}".format(org_id=organization_id)
finding_result_iterator = client.list_findings(request={"parent": all_sources, "filter": 'severity="HIGH"'})
for i, finding_result in enumerate(finding_result_iterator):
sourceId = finding_result.finding.resource_name
title = finding_result.finding.category
alertTime = finding_result.finding.event_time
serviceName = finding_result.resource.type_
description = ""
additionalInfo = ""
I would like to get the "explanation" and "recommendation" values from Source Properties, but I don't know where to get them. The reference page shows the output for each finding_result in the loop. The Console displays these properties, but I don't know how to get them and I've been searching on the interwebs for a answer. I'm hoping someone here has the answer.
So, I was being a bit impatient with my question, both here and with Google Support. When I tightened up the filters for my call, I found records that do indeed have the two values I was looking for. For those who are interested, I've included some junky test code below.
from google.cloud import securitycenter
client = securitycenter.SecurityCenterClient()
organization_id = "<my org id>"
org_name = "organizations/{org_id}".format(org_id=organization_id)
all_sources = "{org_name}/sources/-".format(org_name=org_name)
finding_result_iterator = client.list_findings(request={"parent": all_sources, "filter": 'severity="HIGH" AND state="ACTIVE" AND category!="Persistence: IAM Anomalous Grant" AND category!="MFA_NOT_ENFORCED"'})
for i, finding_result in enumerate(finding_result_iterator):
sourceId = finding_result.finding.resource_name
projectId = finding_result.resource.project_display_name
title = finding_result.finding.category
alertTime = finding_result.finding.event_time
serviceName = finding_result.resource.type_
description = ""
additionalInfo = ""
externalUri = ""
if hasattr(finding_result.finding,"external_uri"):
externalUri = finding_result.finding.external_uri
sourceProps = finding_result.finding.source_properties
for item in sourceProps:
if (item == "Explanation"):
description = str(sourceProps[item])
if (item == "Recommendation"):
additionalInfo = str(sourceProps[item])
print("TITLE: " + title)
print(" PROJECT ID: " + projectId)
print(" DESCRIPTION: " + description)
print(" SOURCE ID: " + sourceId)
print(" ALERT TIME: {}".format(alertTime))
print(" SERVICE NAME: " + serviceName)
print(" ADDITIONAL INFO: Recommendation: " + additionalInfo)
if len(externalUri) > 0:
print(", External URI: " + externalUri)
if i < 1:
break
So while the question was a bit of a waste, the code might help someone else trying to work with the Security Command Center API.

Read Outlook Shared Calendars via Python

Using Python, how do you read Outlook's Shared Calendar events, and hopefully, also using a time filter?
Here is a relevant post, but to answer this fully:
import win32com.client # for outlook
import datetime
"""This code reads shared calendars."""
# set variables
days = 3
begin = datetime.date.today()
end = begin + datetime.timedelta(days=days)
events = [] # to write results from calendar loop
# begin importing calendar
Outlook = win32com.client.Dispatch("Outlook.Application")
ns = Outlook.GetNamespace("MAPI")
# turn this into a list to read more calendars
recipient = ns.CreateRecipient("username") # cmd whoami to find this
resolved = recipient.Resolve() # checks for username in address book
# olFolderCalendar = 9
# appointments = ns.GetDefaultFolder(9).Items # for personal calendar
appointments = ns.GetSharedDefaultFolder(recipient, 9).Items
# filtering criteria
# https://learn.microsoft.com/en-us/office/vba/api/outlook.items.includerecurrences
appointments.Sort("[Start]") # suspect problem
appointments.IncludeRecurrences = "True"
restriction = "[Start] >= '" + begin.strftime("%m/%d/%Y") \
+ "' AND [End] <= '" + end.strftime("%m/%d/%Y") + "'"
# list of appointments
restrictedItems = appointments.Restrict(restriction)
# loop through all calendar events, and add elements to list
count = 0
for app in restrictedItems:
count += 1 # no len(range(restrictedItems)) allowed
# display values
print()
print("item: " + str(count))
print("start: \t\t" + str(app.Start))
print("subject: \t" + app.Subject)
print("end: \t\t" + str(app.End))
print("recurring: \t" + str(app.IsRecurring))
print("status: \t" + str(app.MeetingStatus))
# collect values
app_instance = [app.Subject,
app.Start,
app.End,
app.BusyStatus]
events.append(app_instance)

Creating a Linux Userlist with Python

I am writing a script in Python that will look at all of the groups AND all of the users on a linux system, and output me a file.
I have a situation where if an account is in a certain group, I want to set the user ID myself (in this example, it is because the account/s is a Non User Account)
Code below:
#/usr/bin/python
import grp,pwd,os
from os.path import expanduser
destdir = expanduser("~")
destfile = '/newfile.txt'
appname = 'app name'
groupid = ''
userid = ''
#delete old feed file and create file
if os.path.exists(destdir + destfile):
os.remove(destdir + destfile)
print "file deleted...creating new file"
output = open(destdir + destfile, 'w+')
output.write('ACCOUNTID|USERLINKID|APPLICATIONROLE|APPLICATION' + '\n')
else:
print "no file to delete...creating file"
output = open(destdir + destfile, 'w+')
output.write('ACCOUNTID|USERLINKID|APPLICATIONROLE|APPLICATION' + '\n')
#get user/group data for all users non primary groups
#documentation: https://docs.python.org/2/library/grp.html
groups = grp.getgrall()
for group in groups:
groupid = group[2]
print groupid #checking to see if group ids print correctly. Yes it does
for user in group[3]:
if groupid == '33': #Issue is here!
userid = 'qwerty'
print userid #testing var
output.write(user + '|' + userid + '|' + group[0] + '|' + appname + '\n')
The issue is here:
if groupid == '33': #Issue is here!
userid = 'qwerty'
print userid #testing var
The variable "userid" is never set to it's value and never prints anything while testing.
The group "33" does have users in it and exists. I cannot figure out why this doesn't work :(
I have another piece of code that does this for users (as I am looking at both Primary and Secondary groups, and once I figure out this part, I can fix the rest)
Your validate of groupid is against a string, it is an integer
if groupid == 31: # then do something

I keep getting list out of range trying to convert mysqldatabase into html

I dont get why it keeps giving me the list out of range error for sys.argv[1]. From my understanding I am passing data to user_database. Help please
import sys, MySQLdb
def PrintFields(database, table):
host = 'localhost'
user = 'root'
password = 'boysnblue1'
conn = MySQLdb.Connection(db=parking_report, host=localhost, user=root, passwd=boysnblue1)
mysql = conn.cursor()
sql = """ SHOW COLUMNS FROM %s """ % table
mysql.execute("select id, date, time, status, from report_table ")
fields=mysql.fetchall()
print '<table border="0"><tr><th>order</th><th>name</th><th>type</th><th>description</th></tr>'
print '<tbody>'
counter = 0
for field in fields:
counter = counter + 1
id = field[0]
date = field[1]
time = field[2]
status = field[3]
print '<tr><td>' + str(counter) + '</td><td>' + id + '</td><td>' + date + '</td><td>' + time + '</td><td>' + status + ' </td></tr>'
print '</tbody>'
print '</table>'
mysql.close()
conn.close()
users_database = sys.argv[1]
users_table = sys.argv[2]
print "Wikified HTML for " + users_database + "." + users_table
print "========================"
PrintFields(users_database, users_table)
sys.argv is a list containing the name of the program's file and all of the arguments it was passed on the command line.
If you run python script2.py, the contents of sys.argv will be ['script2.py'].
If you run python script2.py database_name table_name, the contents of sys.argv will be ['script2.py', 'database_name', 'table_name'], which is what your program is currently configured to expect:
users_database = sys.argv[1]
users_table = sys.argv[2]
Since you are calling it the first way, sys.argv[1] does not exist, and you get your error that the index (1) is out of range (it only goes to 0).

Categories

Resources