I am trying to add Pandas dataframe to all the worksheets in an Excel file.However the starting header index is always becoming B1 wheres I am trying to fit it from A1.
Below is the code:
import os
import xlwt
from xlwt.Workbook import *
from pandas import ExcelWriter
import xlsxwriter
from openpyxl import Workbook, load_workbook
Categories = ["Column" + str(column) for column in range(1,10)]
wb1 = Workbook()
for i in range(1,5):
ws = wb1.create_sheet("1_"+ str(i))
for i in range(5):
ws = wb1.create_sheet("2_"+ str(i))
for i in range(5):
ws = wb1.create_sheet("3_"+ str(i))
for i in range(5):
ws = wb1.create_sheet("4_"+ str(i))
for i in range(5):
ws = wb1.create_sheet("5_"+ str(i))
for i in range(5):
ws = wb1.create_sheet("6_"+ str(i))
wb1.save('FrameFiles.xlsx')
df = pd.DataFrame(columns=Categories)
book = load_workbook('FrameFiles.xlsx')
writer = pd.ExcelWriter('FrameFiles.xlsx',engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
for i in wb1.sheetnames:
df.to_excel(writer, sheet_name=i,index=True,startrow=1,startcol=1)
writer.save()
And the output is coming as following :
enter image description here
I want the header to start from A1 position not B1.I have tried with startrow=0 and startcol=0 also but the result is same. Any suggestion to solve this issue would be highy appreciated.
The empty column is your index column but you have no index in your dataframe so it is empty. try using index=False and you should get what you are expecting
df.to_excel(writer, sheet_name=i,index=False,startrow=0,startcol=0)
I am trying to create 3 different dataframes to output in my excel file in 3 separate worksheet called df, df_OK, df_KO. However the code below only outputs df and is not creating the other 2 dataframes df_OK, df_KO to have in the same Excel file but in 2 separate worksheets.
Any suggestions? Thanks
class blah:
def __init__(self, path, file_in, file_out):
self.path = path
self.file_in = file_in
self.file_out = file_out
def process_file(self):
df = pd.read_excel(self.path + self.file_in)
df_OK = df.loc[df['Status'] == 'OK']
df_KO = df.loc[df['Status'] == 'KO']
df_OK.loc['Total'] = df_OK[['Price']].sum(axis=0)
writer = pd.ExcelWriter(self.path + self.file_out, engine='xlsxwriter')
dfs = {
'All': df,
'OK': df_OK,
'KO': df_KO
}
for sheet_name in dfs.keys():
dfs[sheet_name].to_excel(writer, sheet_name=sheet_name, index=False)
writer.save()
b = blah('C:/Users/......./',
'path...',
'file_in....',
'file_out...')
b.process_file()
It is because you overwrite the same Excel file in every iteration of your for sheet_name in dfs.keys() loop. So every time you write an Excel file with only a single sheet to the same filename, thus overwriting the previous document.
You should move the writer.save() outside your loop like so:
for sheet_name in dfs.keys():
dfs[sheet_name].to_excel(writer, sheet_name=sheet_name, index=False)
writer.save()
import pandas as pd
from openpyxl import load_workbook
#This is how to make a new dataframe and transfer it to a demo excel file in sheet1
# dataframe Name and Age columns
df = pd.DataFrame({'#': [1, 2, 3, 4], 'Name': ['John Doe', 'Jamie Perkins', 'Daryl Walker', 'Dave Mann'], 'Age': [10, 20, 30, 45]})
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('demo.xlsx', engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet1', index=False)
# Close the Pandas Excel writer and output the Excel file.
writer.save()
#Up next is how to add new information to a sheet1
# new dataframe with same columns
df2 = pd.DataFrame({'#': [5, 6, 7, 8], 'Name': ['E','F','G','H'], 'Age': [100,70,40,60]})
writer = pd.ExcelWriter('demo.xlsx', engine='openpyxl')
# try to open an existing workbook
writer.book = load_workbook('demo.xlsx')
# copy existing sheets
writer.sheets = dict((ws.title, ws) for ws in writer.book.worksheets)
# read existing file
reader = pd.read_excel(r'demo.xlsx')
# write out the new sheet
df2.to_excel(writer,index=False,header=False,startrow=len(reader)+1)
writer.close()
#What I want to figure out is how to modify a name based on input from user and update the information to the existing cell in the excel file
num = input("Which name do you want to change?(choose a number)\n")
number = int(num)
if number == 1:
nName = input("What is the new Name\n")
df3 = pd.DataFrame({'Name': [nName]})
writer = pd.ExcelWriter('demo.xlsx', engine ='openpyxl')
writer.book = load_workbook('demo.xlsx')
reader = pd.read_excel(r'demo.xlsx')
df3.to_excel(writer,index=False,header=False,startrow=1)
writer.close()
elif number == 2:
nName = input("What is the new Name\n")
df4 = pd.DataFrame({'Name': [nName]})
writer = pd.ExcelWriter('demo.xlsx', engine ='openpyxl')
writer.book = load_workbook('demo.xlsx')
reader = pd.read_excel(r'demo.xlsx')
df4.to_excel(writer,index=False,header=False,startrow=2)
writer.close()
elif number == 3:
nName = input("What is the new Name\n")
df5 = pd.DataFrame({'Name': [nName]})
writer = pd.ExcelWriter('demo.xlsx', engine ='openpyxl')
writer.book = load_workbook('demo.xlsx')
reader = pd.read_excel(r'demo.xlsx')
df5.to_excel(writer,index=False,header=False,startrow=3)
writer.close()
elif number == 4:
nName = input("What is the new Name\n")
df6 = pd.DataFrame({'Name': [nName]})
writer = pd.ExcelWriter('demo.xlsx', engine ='openpyxl')
writer.book = load_workbook('demo.xlsx')
reader = pd.read_excel(r'demo.xlsx')
df6.to_excel(writer,index=False,header=False,startrow=4)
writer.close()
elif number == 5:
nName = input("What is the new Name\n")
df7 = pd.DataFrame({'Name': [nName]})
writer = pd.ExcelWriter('demo.xlsx', engine ='openpyxl')
writer.book = load_workbook('demo.xlsx')
reader = pd.read_excel(r'demo.xlsx')
df7.to_excel(writer,index=False,header=False,startrow=5)
writer.close()
elif number == 6:
nName = input("What is the new Name\n")
df8 = pd.DataFrame({'Name': [nName]})
writer = pd.ExcelWriter('demo.xlsx', engine ='openpyxl')
writer.book = load_workbook('demo.xlsx')
reader = pd.read_excel(r'demo.xlsx')
df8.to_excel(writer,index=False,header=False,startrow=6)
writer.close()
elif number == 7:
nName = input("What is the new Name\n")
df9 = pd.DataFrame({'Name': [nName]})
writer = pd.ExcelWriter('demo.xlsx', engine ='openpyxl')
writer.book = load_workbook('demo.xlsx')
reader = pd.read_excel(r'demo.xlsx')
df9.to_excel(writer,index=False,header=False,startrow=7)
writer.close()
elif number == 8:
nName = input("What is the new Name\n")
df10 = pd.DataFrame({'Name': [nName]})
writer = pd.ExcelWriter('demo.xlsx', engine ='openpyxl')
writer.book = load_workbook('demo.xlsx')
reader = pd.read_excel(r'demo.xlsx')
df10.to_excel(writer,index=False,header=False,startrow=8)
writer.close()
else:
print("Wrong input please try again")
I know how to make a new dataframe and transfer it to a demo excel file and call it sheet1. I also know how to append new data into a current existing excel file.
What I want to know is how to modify a cell based on input from user and update the information to the existing cell in the excel file. Using the if statements seems like it works but does not update the excel file. I also want to know if there is an easier way around this besides using a lot of else if's. I only want to change one name at a time.
I'd appreciate any help I can get!
Edit: Thank you for the feedback on the syntax unfortunately I'm still running into the error of the excel file not updating there is no error on the terminal yet there it is not updating.
When I ran your code, I was getting a new worksheet "Sheet11" with the updated data, so you need to specify the sheetname and startcol in your to_excel.
If your excel is small, you can modify the DataFrame and rewrite the entire sheet each time. Below I concatenate the 2 DataFrames into one then modify from there.
#What I want to figure out is how to modify a name based on input from user and update the information to the existing cell in the excel file
df = pd.concat([df, df2], ignore_index=True)
print(df)
num = input("Which name do you want to change?(choose a number)\n")
number = int(num)
writer = pd.ExcelWriter('demo.xlsx', engine ='openpyxl')
if number in [1, 2, 3, 4, 5, 6, 7, 8]:
nName = input("What is the new Name\n")
df.loc[df['#']==number, 'Name'] = nName
print(df)
df.to_excel(writer, sheet_name='Sheet1', index=False)
writer.close()
else:
print("Wrong input please try again")
EDIT: If you just want to update the one cell, this works:
#What I want to figure out is how to modify a name based on input from user and update the information to the existing cell in the excel file
df = pd.concat([df, df2], ignore_index=True)
print(df)
num = input("Which name do you want to change?(choose a number)\n")
number = int(num)
writer = pd.ExcelWriter('demo.xlsx', engine='openpyxl')
#Need to load the workbook, or it overwrites all data on the sheet.
writer.book = load_workbook('demo.xlsx')
#Evidently need to load the sheet names in order to write to an existing sheet
writer.sheets = dict((ws.title, ws) for ws in writer.book.worksheets)
if number in [1, 2, 3, 4, 5, 6, 7, 8]:
nName = input("What is the new Name\n")
df.loc[df['#']==number, 'Name'] = nName
print(df)
df.loc[df['#']==number, 'Name'].to_excel(writer, sheet_name='Sheet1', index=False, header=None, startrow=number, startcol=1)
writer.save()
else:
print("Wrong input please try again")
Does the requirement for writer.book = load_workbook('demo.xlsx') imply that the writer is just rewriting the entire book each time?
I am updating an existing Excel workbook using pandas. When using an ExcelWriter object, can I overwrite a sheet if it exists and otherwise create a new sheet? The code I have appends new sheets, but when I try to overwrite an existing sheet it appends a new sheet with a slightly varied name (ex: If sheet 'data1' exists, running the code appends a new sheet named 'data1 1').
import pandas as pd
import openpyxl
path = 'test-out.xlsx'
book = openpyxl.load_workbook(path)
df1 = pd.DataFrame({'a': range(10), 'b': range(10)})
writer = pd.ExcelWriter(path, mode='a')
writer.book = book
df1.to_excel(writer, sheet_name='data1')
writer.save()
Pass the sheets to the writer with writer.sheets = dict((ws.title, ws) for ws in book.worksheets):
import pandas as pd
import openpyxl
path = 'test-out.xlsx'
book = openpyxl.load_workbook(path)
df1 = pd.DataFrame({'a': range(10), 'b': range(10)})
writer = pd.ExcelWriter(path, mode='a')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df1.to_excel(writer, sheet_name='data1')
writer.save()
Edit:
Seems like you don't even need mode='w', writer = pd.ExcelWriter(path, mode='a') is still working...
I am trying to create a timed backup system for my excel document with Python as multiple users will be accessing it.
I want to change the path of the file not to my local directory.
Here's the code;
import pandas as pd
import datetime
import numpy
now = datetime.datetime.now()
ct = now.strftime("%Y-%m-%d %H.%M")
table = pd.read_excel(r'Z:\new\Planner_New.xlsx',
sheet_name = 'Jan18',
header = 0,
index_col = 0,
usecols = "A:AY",
convert_float = True)
writer = pd.ExcelWriter('Planner' + ct + '.xlsx', engine='xlsxwriter')
table.to_excel(writer, sheet_name = "Jan18")
workbook = writer.book
worksheet = writer.sheets['Jan18']
format1 = workbook.add_format({'num_format': '0%'})
worksheet.set_column('H:AY', None, format1)
writer.save()
writer.close()
I have tried
outpath = (r'Z:\backup')
writer.save(outpath)
writer.close()
But get back
TypeError: save() takes 1 positional argument but 2 were given
You need to specify the save location when you create the ExcelWriter object:
writer = pd.ExcelWriter(r'Z:\backup\Planner' + ct + '.xlsx', engine='xlsxwriter')
...
writer.save()
writer.close()