When I use gspread_dataframe and gspread to export data from a Jupyter Notebook to a Google Spreadsheet, rows are shuffled, which is not what I want. Can anyone help?
emp_uniq = gspread_df['E_code'].unique() dict_e_code = {} x_df = pd.DataFrame()
for emp in list(emp_uniq):
dict_e_code[emp] = gspread_df[gspread_df['E_code']==emp]
a = dict_e_code[emp][["Month","E_code","Allocation_ID","Project","Effective_days","Percentage_Util","Onsite/Offshore","Mapping","Payroll"]]
a_fin = a.groupby(['Month','E_code','Project','Allocation_ID','Percentage_Util','Onsite/Offshore']).sum()['Effective_days']
new_df = a_fin.to_frame().reset_index()
new_df['Percentage_Util'] = (new_df['Effective_days']/new_df['Effective_days'].sum())*100
x_df = x_df.append(new_df)
x_df.reset_index(inplace=True)
x_df.drop(['index'],axis=1,inplace=True)
sheet = client.open(input("Please Enter Main Spreadsheet as Output Data: ")) worksheet_new = sheet.worksheet(input("Please Enter Sheet Name as Output Data:"))
existing = get_as_dataframe(worksheet_new)
a = existing.append(x_df)
gsdf.set_with_dataframe(worksheet_new, a)
Related
i made a script that compare datas form diferent sheets, all godd, now i want to add this updates sheet instead of the old one on the entire excel and keeping the other sheets.
import numpy as np
import pandas as pd
from timestampdirectory import createdir
import openpyxl
from openpyxl import workbook
from openpyxl import worksheet
import os
import time
def svnanalysis():
dest = createdir()
dfSvnUsers = pd.read_excel(os.path.join(dest, "SvnUsers.xlsx"))
dfSvnGroupMembership = pd.read_excel(os.path.join(dest, "SvnGroupMembership.xlsx"))
dfSvnRepoGroupAccess = pd.read_excel(os.path.join(dest, "SvnRepoGroupAccess.xlsx"))
dfsvnReposSize = pd.read_excel(os.path.join(dest, "svnReposSize.xlsx"))
dfsvnRepoLastChangeDate = pd.read_excel(os.path.join(dest, "svnRepoLastChangeDate.xlsx"))
dfUserDetails = pd.read_excel(r"D:\GIT-files\Automate-Stats\SVN_sample_files\CM_UsersDetails.xlsx")
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails',index = False)
dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
dfSvnGroupMembership.to_excel(xlwriter, sheet_name='SvnGroupMembership', index = False )
dfSvnRepoGroupAccess.to_excel(xlwriter, sheet_name='SvnRepoGroupAccess', index = False)
dfsvnReposSize.to_excel(xlwriter, sheet_name='svnReposSize', index = False)
dfsvnRepoLastChangeDate.to_excel(xlwriter, sheet_name='svnRepoLastChangeDate',index= False)
xlwriter.close()
whats above its in the same script where i used some xlsx files an create only 1 xlsx with those files as sheets, now below i make some changes in SvnUser sheet and i want to upload it on the excel instead of old sheet SvnUser, and keep the other sheets
# xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-SvnAnalysis.xlsx'))
svnUsers = pd.read_excel(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'), sheet_name="SvnUsers")
details = pd.read_excel(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'), sheet_name="UserDetails")
svnUsers = svnUsers.assign(SVNaccount=svnUsers["accountName"].isin(details["Account"]).astype(bool))
print(svnUsers)
# dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
# xlwriter.close()
The easiest way to achieve that would be to overwrite the entire excel sheet, for example like this:
import pandas as pd
# create dataframe from excel file
df = pd.read_excel(
'2022-06-15-Usage-SvnAnalysis.xlsx',
engine='openpyxl',
sheet_name='UserDetails'
)
res_df = ..... # calculate the df you want to write
# overwrite excel sheet with dataframe
with pd.ExcelWriter(
'2022-06-15-Usage-SvnAnalysis.xlsx',
engine='openpyxl',
mode='a',
if_sheet_exists='replace'
) as writer:
res_df.to_excel(writer, sheet_name='UserDetails')
I am trying to copy the data from multiple excel into one excel. I am novice to python and openpyxl. So i have opened each file and went row by row and copied them. I want to do this with multiple files. How do i loop through row and columns and copy the data consider the column in all the files are same order?
import openpyxl as xl
from openpyxl import workbook
incident_wb = xl.load_workbook('incident resolved yesterday.xlsx')
incident_sheet = incident_wb['Page 1']
combined_wb = xl.Workbook()
combined_sheet = combined_wb.active
combined_sheet.title = "combined_sheet"
combined_wb.save('combined_sheet.xlsx')
for row in range(1, incident_sheet.max_row+1):
incident_no = incident_sheet.cell(row,1)
opened_date = incident_sheet.cell(row,2)
shrt_desc = incident_sheet.cell(row,3)
requester = incident_sheet.cell(row,4)
incdnt_type = incident_sheet.cell(row,5)
priority = incident_sheet.cell(row,6)
assgn_grp = incident_sheet.cell(row,7)
assgn_to = incident_sheet.cell(row,8)
updated = incident_sheet.cell(row,9)
status = incident_sheet.cell(row,10)
sub_status = incident_sheet.cell(row,11)
##copy the data into the new sheet
incident_no_1 = combined_sheet.cell(row,1)
incident_no_1.value = incident_no.value
opened_date_1 = combined_sheet.cell(row,2)
opened_date_1.value = opened_date.value
shrt_desc_1 = combined_sheet.cell(row,3)
shrt_desc_1.value = shrt_desc.value
requester_1 = combined_sheet.cell(row,4)
requester_1.value = requester.value
incdnt_type_1 = combined_sheet.cell(row,5)
incdnt_type_1.value = incdnt_type.value
priority_1 = combined_sheet.cell(row,6)
priority_1.value = priority.value
assgn_grp_1 = combined_sheet.cell(row,7)
assgn_grp_1.value = assgn_grp.value
assgn_to_1 = combined_sheet.cell(row,8)
assgn_to_1.value = assgn_to.value
updated_1 = combined_sheet.cell(row,9)
updated_1.value = updated.value
status_1 = combined_sheet.cell(row,10)
status_1.value = status.value
sub_status_1 = combined_sheet.cell(row,11)
sub_status_1.value = sub_status.value
##print(f"The incident resolved yesterday {incident_no.value}")
combined_wb.save('combined_sheet.xlsx')
An alternative approach would be to build a list of date from multiple excel files and then write it to another file.
As a proof of concept:
import openpyxl as xl
from openpyxl import workbook
def provide_data(workbookName, sheetName):
wb = xl.load_workbook(workbookName)
sheet = wb[sheetName]
return [[y.value for y in x] for x in sheet.iter_rows()]
# This creates an array of rows, which contain an array of cell values.
# It will be much better to provide mapping for cells and return business object.
def save_data(list_of_sheets):
combined_wb = xl.Workbook()
combined_sheet = combined_wb.active
combined_sheet.title = "combined_sheet"
for sheet in list_of_sheets:
for row in sheet:
combined_sheet.append(row) # combining multiple rows.
combined_wb.save('combined_sheet.xlsx')
workSheetsToCopy = [['incident resolved yesterday.xlsx', 'Page 1'], ['other.xlsx', 'Page 1']]
workSheetsToCopy = [provide_data(x[0], x[1]) for x in workSheetsToCopy]
save_data(workSheetsToCopy)
I want to push data to a sheet.
When i'm printing my code it is working, but on my sheet it prints only the last element of my list.
Here is my code :
import json
from urllib.request import urlopen
import pygsheets
import pandas as pd
with urlopen("myapiurl") as response: source = response.read()
data = json.loads(source)
#authorization
gc = pygsheets.authorize(service_file='myjsonfile')
#open the google spreadsheet
sh = gc.open('Test')
#select the first sheet
wks = sh[0]
# Create empty dataframe
df = pd.DataFrame()
#update the first sheet with df, starting at cell B2.
wks.set_dataframe(df,(1,1))
for item in data["resultsPage"]["results"]["Entry"]:
id = item["event"]["id"]
print(id)
df['id_string'] = id
You can save ids in a list id_s an finally copy it in the colum of DataFrame.
it will work because you don't rewrite the colum.
import json
from urllib.request import urlopen
import pygsheets
import pandas as pd
with urlopen("myapiurl") as response: source = response.read()
data = json.loads(source)
#authorization
gc = pygsheets.authorize(service_file='myjsonfile')
#open the google spreadsheet
sh = gc.open('Test')
#select the first sheet
wks = sh[0]
# Create empty dataframe
df = pd.DataFrame()
#update the first sheet with df, starting at cell B2.
wks.set_dataframe(df,(1,1))
id_s=[]
for item in data["resultsPage"]["results"]["Entry"]:
id = item["event"]["id"]
print(id)
id_s.append(id)
df['id_string'] = id_s
I am trying to read out an excel sheet and save it into my database. This is my excel table:
My code where I am trying to get the data and save it into an data array looks like this (workbook was loaded before with load_workbook):
def getExampleData(workbook):
sheet = workbook.get_sheet_by_name('ExampleData')
titleCell = sheet.cell('D1')
assert titleCell.value = u'Overview'
startRow = (2, 1)
endRow = (6, 1) #this is the first empty row and I have it hardcodened
to this time but it needs to be dynamically found
in the future
data['ExData'] = {}
for i in range(startRow, endRow):
exData = {}
exData['Name'] = sheet.cell(row=I, column=1).value
exData['Param1'] = sheet.cell(row=I, column=2).value
exData['Param2'] = sheet.cell(row=I, column=3).value
exData['Param3'] = sheet.cell(row=I, column=4).value
data['ExData'| = exData
return data['ExData']
and then I want it to load it into my database table named ExampleDB (the whole project is made with Django so I am loading the ExampleDB just with an import) like this:
def saveExampleData():
xData = data['ExData']
ex = ExampleDB.objects.filter(name = xData['Name'], param1 = xData['Param1'],
param2 = xData['Param2'], param3 = xData['Param3])
ex.save()
I just want to say that I KNOW that this functions don't work but I think they show what I want to do. Maybe someone can help me to understand how this works.
I am thankful for any help!
IIUC, here is a solution, using pandas and sqlalchemy:
from sqlalchemy import create_engine
import pandas as pd
db = create_engine('sqlite:///stocks.db')
df = pd.read_excel('excelfile.xlsx')
df.to_sql('required_table', db, if_exists='append')
I am trying to use and Add-In for Excel that gets removed when I use win32com.client forcing me to restart my computer. I have found a work around using xlrd, openpyxl, and pandas but I have run into a completely new issue.
I first open Excel with the pandas and read through the file extracting the information that I require.
xl = pandas.ExcelFile(xlsx)
sheets = xl.sheet_names
df = xl.parse(sheets[2])
I then have to go into the same workbook and update the Meter Name and the date.
for i, value in enumerate(dataList):
wb = openpyxl.load_workbook(xlsx)
worksheets = wb.sheetnames
worksheet = wb.get_sheet_by_name(worksheets[0])
rowCoordinate = i
meterName = value[0]
creationDate = value[1]
units = value[2]
worksheet.cell(row=1, column=2).value = meterName
wb.save(copyXlsx)
dateList = []
for k, dateRange in enumerate(value[3]):
sDate = dateRange[0]
eDate = dateRange[1]
wb = openpyxl.load_workbook(copyXlsx)
worksheets = wb.sheetnames
worksheet = wb.get_sheet_by_name(worksheets[0])
worksheet.cell(row=2, column=2).value = sDate
worksheet.cell(row=3, column=2).value = eDate
wb.save(copyXlsx1)
print meterName, dateRange
xl1 = pandas.ExcelFile(copyXlsx1)
sheets = xl1.sheet_names
df = xl.parse(sheets[0])
print df
My issue is that the excel file opens and write the information perfectly. but pandas has all the header information updated but the numbers are the same from the original document. I have gone in and explored the Intermediate Excel Document and it doesn't match the number pandas shows