ExcelWriter overwriting sheets when writing - python

This program should take the contents of individual sheets and put them into one excel workbook. It almost does that, but it is overwriting instead of appending new sheets into the final workbook. I read that pandas excel writer is the way to go with this, any ideas as to why its having this behavior?
import xlwt, csv, os
from openpyxl import load_workbook
import pandas as pd
from pandas import ExcelWriter
csv_folder = r'C:\Users\Me\Desktop\Test_Folder\\'
for fil in os.listdir(csv_folder):
if '.xlsx' not in fil:
continue
else:
pass
df = pd.read_excel(csv_folder+fil, encoding = 'utf8')
file_name = fil.replace('.xlsx','')
writer = pd.ExcelWriter('condensed_output.xlsx', engine = 'xlsxwriter')
df.to_excel(writer, sheet_name = file_name)
writer.save()
#writer.close()

Make sure the writer.save() is outside of the loop. Also be aware of the character limit on sheetnames, so if the file names are the same up to a certain point, you run the risk of writing over a sheetname that way as well.
import xlwt, csv, os
from openpyxl import load_workbook
import pandas as pd
from pandas import ExcelWriter
csv_folder = r'C:\Users\Me\Desktop\Test_Folder\\'
writer = pd.ExcelWriter('condensed_output.xlsx', engine = 'xlsxwriter')
for fil in os.listdir(csv_folder):
if '.xlsx' not in fil:
continue
else:
pass
df = pd.read_excel(csv_folder+fil, encoding = 'utf8')
file_name = fil.replace('.xlsx','')
df.to_excel(writer, sheet_name = file_name)
writer.save() #make sure this is outside of the loop.
ETA: establish the writer outside of the loop as well

Related

Excel(1) with 10 sheets sorted wrto specific column. Loaded data to excel(2). excel(2) has only 1 sheet which is last sheet of excel(1)

import xlrd
import pandas as pd
wb = xlrd.open_workbook("excel_1.xlsx")
sheets = wb.sheet_names()
xl = pd.ExcelFile("excel_1.xlsx")
for sheet in sheets:
df = xl.parse(sheet)
df = df.sort_values(by="column2")
writer = pd.ExcelWriter("excel_2.xlsx")
df.to_excel(writer, sheet_name=sheet, index=False)
writer.save()
In excel_2.xlsx i can only find the sorted sheet of the last sheet of excel_1.xlsx.
Please help me in this regard.
Thank You
Create and save the writer outside of the loop.
You're overwriting the file for each sheet now.
import xlrd
import pandas as pd
wb = xlrd.open_workbook("excel_1.xlsx")
xl = pd.ExcelFile("excel_1.xlsx")
writer = pd.ExcelWriter("excel_2.xlsx")
for sheet in wb.sheet_names():
df = xl.parse(sheet).sort_values(by="column2")
df.to_excel(writer, sheet_name=sheet, index=False)
writer.save()

exporting lists in python as excel rows, why it just export first row?

I try to export 2 specific lines from multiple (.txt) files in path as excel rows, and exporting them into excel files.. But the (.xlsx) file just containing first row (just exported lines of one text file).
import pandas as pd
import linecache
import xlsxwriter as xlsw
import os
import glob
directory=('C:\Users\john\Desktop')
os.chdir(directory)
files=glob.glob('*.txt')
for filename in files:
name = linecache.getline(filename,5)
id = linecache.getline(filename,13)
info = [name,id]
final_list = []
for i in info:
final_list.append(i.strip())
print (final_list)
df = pd.DataFrame(final_list)
df = df.transpose()
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='welcome',startrow=1,startcol=0, header=False, index=False)
writer.save()
Use:
import pandas as pd
import linecache
import xlsxwriter as xlsw
import os
import glob
directory=('test')
os.chdir(directory)
files=glob.glob('*.txt')
final_list = []
for filename in files:
name = linecache.getline(filename,5)
id = linecache.getline(filename,13)
info = [name,id]
final_list.append([i.strip() for i in info])
print (final_list)
df = pd.DataFrame(final_list)
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='welcome',startrow=1,startcol=0, header=False, index=False)
writer.save()

pandas ExcelWriter 'if_sheet_exists' is not working

Here is a multi sheet excel file opened and operated on one sheet taken in a dataframe and then copied back. Now, a new sheet (sheet1) is being created while doing this. Objective however is to overwrite the old target sheet. When I am trying deleting the sheet before pasting data from dataframe, it says 'sheet' does not exist.
Here is the code:
import openpyxl as op
import pandas as pd
basePath = filePath
wbk = op.load_workbook(basePath + "file.xlsx")
writer = pd.ExcelWriter(basePath + "file.xlsx", engine = 'openpyxl', mode="a", if_sheet_exists="replace")
writer.book = wbk
df = pd.read_excel(basePath + "file.xlsx", sheet_name="sheet")
df.insert(0,"newCol2","")
#wbk.remove_sheet(wbk.get_sheet_by_name('sheet'))
df.to_excel(writer, sheet_name = 'sheet', index=False)
writer.save()
writer.close()
What am I doing wrong?

Append row to top of excel sheet python

How can I append a row at the top of an excel sheet? Goal as follows:
The file itself is written by using pandas.df.to_excel as follows:
import pandas
with pandas.ExcelWriter(output_filename) as writer:
for file in files:
df = pandas.read_csv(file)
df.to_excel(writer, sheet_name=file.replace(".csv", "").replace("_", " ").title(), index=False)
Here is one way to do it using XlsxWriter as the Excel engine:
with pandas.ExcelWriter(output_filename, engine='xlsxwriter') as writer:
for file in files:
df = pandas.read_csv(file)
sheet_name = file.replace(".csv", "").replace("_", " ").title()
df.to_excel(writer, sheet_name=sheet_name, index=False, startrow=1)
worksheet = writer.sheets[sheet_name]
worksheet.write('A1', 'Here is some additional text')
You can use openpyxl to edit your Excel file afterwards:
import contextlib
import openpyxl
import pandas as pd
new_row = "THIS ROW IS APPENDED AFTER THE FILE IS WRITTEN BY PANDAS"
with contextlib.closing(openpyxl.open(output_filename)) as wb:
for file in files:
sheet_name = file.replace(".csv", "").replace("_", " ").title()
sheet = wb[sheet_name]
sheet.insert_rows(0)
sheet["A1"] = new_row
wb.save(output_filename)

Update SQL output data into Existing Excel in respective sheet using python

I am new to Python programming and seeking for some help/guidance in correcting my python code.
Here my query is.
I have one Excel file which has (7 Tabs).
I have one Folder which contains 7 different text file and each text file contains respective Tab SQL Query and each text file name is same as the Tab Name which is available in Excel File.
I have a written a Python code to loop through all the text file one by one and execute that each text file SQL query and whatever data will come in output that output data should dump into existing excel file in that respective sheet/tab. i am using pandas to do this, however, code is working fine but while updating data into excel pandas is removing all existing sheets from the file and updating only current output data into excel file.
Example: if Python code execute a text file(Filename: Data) and after executing this SQL query we got some data and this data should dump into excel file (sheetname: Data).
<pre><code>
import pypyodbc
import pandas as pd
import os
import ctypes
from pandas import ExcelWriter
fpath = r"C:\MNaveed\DataScience\Python Practice New\SQL Queries"
xlfile = r"C:\MNaveed\DataScience\Python Practice New\SQL Queries\Open_Case_Data.xlsx"
cnxn = pypyodbc.connect('Driver={SQL Server};Server=MyServerName;Database=MyDatabaseName;Trusted_Connection=Yes')
cursor = cnxn.cursor()
for subdir, dirs, files in os.walk(fpath):
for file in files:
#print(os.path.join(subdir,file))
filepath = os.path.join(subdir,file)
#print("FilePath: ", filepath)
if filepath.endswith(".txt"):
if file != "ClosedAging_Cont.txt":
txtdata = open(filepath, 'r')
script = txtdata.read().strip()
txtdata.close()
cursor.execute(script)
if file == "ClosedAging.txt":
txtdata = open(os.path.join(subdir,"ClosedAging_Cont.txt"), 'r')
script = txtdata.read().strip()
txtdata.close()
cursor.execute(script)
col = [desc[0] for desc in cursor.description]
data = cursor.fetchall()
df = pd.DataFrame(list(data),columns=col)
#save_xls(df,xlfile)
writer = pd.ExcelWriter(xlfile)
flnm = file.replace('.txt','').strip()
df.to_excel(writer,sheet_name=flnm,index=False)
writer.save()
print(file, " : Successfully Updated.")
else:
print(file, " : Ignoring this File")
else:
print(file, " : Ignoring this File")
ctypes.windll.user32.MessageBoxW(0,"Open Case Reporting Data Successfully Updated","Open Case Reporting",1)
</pre></code>
By looping through the text files, you overwrite the Excel file inside the loop each time. Instead instantiate pd.ExcelWriter(xlfile) and call writer.save() outside the loop.
The following example is adapted from the xlswriter documentation
You can find more information about multiple sheets here: xlswriter documentaion - multiple sheets
import pandas as pd
# Create a Pandas Excel writer using XlsxWriter as the engine outside the loop.
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
# Sample loop, replace with directory browsing loop
for i in range(7):
# Sample Pandas dataframe. Replace with SQL query and resulting data frame.
df = pd.DataFrame({'DataFromSQLQuery': ['SQL query result {0}'.format(i)]})
# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet{0}'.format(i))
# Close the Pandas Excel writer and output the Excel file.
writer.save()
The following code addresses the concrete question but is untested.
import pypyodbc
import pandas as pd
import os
import ctypes
from pandas import ExcelWriter
fpath = r"C:\MNaveed\DataScience\Python Practice New\SQL Queries"
xlfile = r"C:\MNaveed\DataScience\Python Practice New\SQL Queries\Open_Case_Data.xlsx"
cnxn = pypyodbc.connect('Driver={SQL Server};Server=MyServerName;Database=MyDatabaseName;Trusted_Connection=Yes')
cursor = cnxn.cursor()
# Create a Pandas Excel writer using XlsxWriter as the engine outside the loop
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
# File loop
for subdir, dirs, files in os.walk(fpath):
for file in files:
filepath = os.path.join(subdir,file)
if filepath.endswith(".txt"):
if file != "ClosedAging_Cont.txt":
txtdata = open(filepath, 'r')
script = txtdata.read().strip()
txtdata.close()
cursor.execute(script)
if file == "ClosedAging.txt":
txtdata = open(os.path.join(subdir,"ClosedAging_Cont.txt"), 'r')
script = txtdata.read().strip()
txtdata.close()
cursor.execute(script)
col = [desc[0] for desc in cursor.description]
data = cursor.fetchall()
# Data frame from original question
df = pd.DataFrame(list(data),columns=col)
# Convert the dataframe to an XlsxWriter Excel object
flnm = file.replace('.txt','').strip()
df.to_excel(writer, sheet_name=flnm, index=False)
print(file, " : Successfully Updated.")
else:
print(file, " : Ignoring this File")
else:
print(file, " : Ignoring this File")
# Close the Pandas Excel writer and output the Excel file
writer.save()
ctypes.windll.user32.MessageBoxW(0,"Open Case Reporting Data Successfully Updated","Open Case Reporting",1)

Categories

Resources