Writing xls from python row wise using pandas

Writing xls from python row wise using pandas - python

I have sucessfully created .xlsx files using pandas
df = pd.DataFrame([list of array])
'''
:param data: Data Rows
:param filename: name of the file
:return:
'''
df = pd.DataFrame(data)
# my "Excel" file, which is an in-memory output file (buffer)
# for the new workbook
excel_file = BytesIO()
writer = pd.ExcelWriter(excel_file, engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1_test')
writer.save()
writer.close()
# important step, rewind the buffer or when it is read() you'll get nothing
# but an error message when you try to open your zero length file in Excel
excel_file.seek(0)
# set the mime type so that the browser knows what to do with the file
response = HttpResponse(excel_file.read(),
content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
# set the file name in the Content-Disposition header
response['Content-Disposition'] = 'attachment; filename=' + filename + '.xlsx'
return response
But I have issue here,
There is unnecessary SNo. which i dont want, how to do I remove that.
There is SNo. as first row and column, How do i remove that?

according to the documentation here
to_excel default set index to write as a new column,
use index as False
df.to_excel(writer, sheet_name='Sheet1_test',index=False)

You can take reference from this https://medium.com/better-programming/using-python-pandas-with-excel-d5082102ca27 post of medium for this.

Related

How to save a pandas dataframe as excel table to SharePoint using Python

I am trying to save a pandas dataframe as an excel table to a sharepoint site. I have two separate blocks of code which achieve the below.(thanks for Stackoverflow community)
A script which can save a pandas df as excel table using ExcelWriter on local storage.
A Script which can save a local file to Sharepoint online.
I am very confused on how to combine these two to save a df to sharepoint online but the excel file should be a table not just a range of data. Kindly help
SCRIPT 1 to save excel range as table on local
##############################################################################
#
# An example of adding a dataframe to an worksheet table in an xlsx file
# using Pandas and XlsxWriter.
#
# Tables in Excel are used to group rows and columns of data into a single
# structure that can be referenced in a formula or formatted collectively.
#
# SPDX-License-Identifier: BSD-2-Clause
# Copyright 2013-2021, John McNamara, jmcnamara#cpan.org
#
import pandas as pd
# Create a Pandas dataframe from some data.
df = pd.DataFrame({
'Country': ['China', 'India', 'United States', 'Indonesia'],
'Population': [1404338840, 1366938189, 330267887, 269603400],
'Rank': [1, 2, 3, 4]})
# Order the columns if necessary.
df = df[['Rank', 'Country', 'Population']]
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('pandas_table.xlsx', engine='xlsxwriter')
# Write the dataframe data to XlsxWriter. Turn off the default header and
# index and skip one row to allow us to insert a user defined header.
df.to_excel(writer, sheet_name='Sheet1', startrow=1, header=False, index=False)
# Get the xlsxwriter workbook and worksheet objects.
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Get the dimensions of the dataframe.
(max_row, max_col) = df.shape
# Create a list of column headers, to use in add_table().
column_settings = [{'header': column} for column in df.columns]
# Add the Excel table structure. Pandas will add the data.
worksheet.add_table(0, 0, max_row, max_col - 1, {'columns': column_settings})
# Make the columns wider for clarity.
worksheet.set_column(0, max_col - 1, 12)
# Close the Pandas Excel writer and output the Excel file.
writer.save()
SCRIPT 2 to save any file to sharepoint
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
import os
baseurl = 'https://testsite.sharepoint.com/'
basesite = '/sites/project' # every share point has a home.
siteurl = baseurl + basesite
localpath = "pandas_table.xlsx"
remotepath = "Shared Documents/General/file.xlsx" # existing folder path under sharepoint site.
ctx_auth = AuthenticationContext(siteurl)
ctx_auth.acquire_token_for_user(<username>, <password>)
ctx = ClientContext(siteurl, ctx_auth) # make sure you auth to the siteurl.
with open(localpath, 'rb') as content_file:
file_content = content_file.read()
dir, name = os.path.split(remotepath)
file = ctx.web.get_folder_by_server_relative_url(dir).upload_file(name, file_content).execute_query()
I am not really sure how I can use the writer.save() with the sharepoint connector. Kindly advise
thanks in advance.

Given that you have your data in a df the following code will write to sharepoint using the O365 library.
from io import BytesIO
from tempfile import gettempdir
from O365 import Account, FileSystemTokenBackend
import pandas as pd
O365_CLIENT_ID = "client"
O365_SECRET = "secret"
O365_TENANT_ID = "<name>"
O365_SHAREPOINT = "<name>.sharepoint.com"
O365_SITE = "/sites/..."
def save_file(folder_path, filename, data):
"""save file to O365."""
account = Account(
(O365_CLIENT_ID, O365_SECRET),
auth_flow_type="credentials",
tenant_id=O365_TENANT_ID,
token_backend=FileSystemTokenBackend(
token_path=gettempdir(), token_filename="o365_token.txt"
),
)
if account.authenticate():
drive = (
account.sharepoint()
.get_site(O365_SHAREPOINT, O365_SITE)
.get_default_document_library()
)
subfolders = folder_path.split("/")
if len(subfolders) != 0:
items = drive.get_items()
for subfolder in subfolders:
try:
subfolder_drive = list(filter(lambda x, sf=subfolder: sf in x.name, items))[0]
items = subfolder_drive.get_items()
except Exception as excep: # pylint: disable=broad-except
raise f"Path {folder_path} does not exist." from excep
else:
subfolder_drive = drive.get_root_folder()
subfolder_drive.upload_file(
item=None,
item_name=filename,
stream=data,
stream_size=data.getbuffer().nbytes,
)
with BytesIO() as buf:
df.to_excel(buf, index=False)
buf.seek(0)
save_file('folder/to/upload/to', 'filename.xlsx', buf)
Writing to multiple sheets:
with BytesIO() as buf:
with pd.ExcelWriter( # pylint: disable=abstract-class-instantiated
buf,
engine="xlsxwriter",
) as writer:
df.to_excel(
writer,
sheet_name='sheet1',
index=False,
)
df.to_excel(
writer,
sheet_name='sheet2',
index=False,
)
buf.seek(0)
save_file('folder/to/upload/to', 'filename.xlsx', buf)

How to fix ValueError in pandas

I am moving an application from a classic Tkinter GUI to a Django cloud-based application and am receiving a
ValueError: Invalid file path or buffer object type: <class 'bool'>
when trying to run a function which calls on pandas.
Exception Location: C:\Users\alfor\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\io\common.py in get_filepath_or_buffer, line 232
I have not tried much because I cannot find this same Error in searches.
I do not believe this function even runs AT ALL because my media folder is not getting a new directory where the file would be saved.. but I could be wrong.
The beginning of the function that is having issues looks like this:
def runpayroll():
man_name = 'Jessica Jones'
sar_file = os.path.isfile('media/reports/Stylist_Analysis.xls')
sar_file2 = os.path.isfile('media/reports/Stylist_Analysis.xls')
tips_file = os.path.isfile('media/reports/Tips_By_Employee_Report.xls')
hours_wk1_file = os.path.isfile('media/reports/Employee_Hours1.xls')
hours_wk2_file = os.path.isfile('media/reports/Employee_Hours2.xls')
retention_file = os.path.isfile('media/reports/SC_Client_Retention_Report.xls')
efficiency_file = os.path.isfile('media/reports/Employee_Service_Efficiency.xls')
df_sar = pd.read_excel(sar_file,
sheet_name=0, header=None, skiprows=4)
df_sar2 = pd.read_excel(sar_file2,
sheet_name=0, header=None, skiprows=4)
df_tips = pd.read_excel(tips_file,
sheet_name=0, header=None, skiprows=0)
df_hours1 = pd.read_excel(hours_wk1_file,
header=None, skiprows=5)
df_hours2 = pd.read_excel(hours_wk2_file,
header=None, skiprows=5)
df_retention = pd.read_excel(retention_file, sheet_name=0,
header=None, skiprows=8)
df_efficiency = pd.read_excel(efficiency_file, sheet_name=0,
header=None, skiprows=5)
The only code I have changed from the rest of this function is this which I am assuming does not matter because it is only a file location..
writer = pd.ExcelWriter('/media/payroll.xlsx', engine='xlsxwriter')
and instead of asking the user for a file save location using tkinter I used...
with open(file_path, 'rb') as f:
response = HttpResponse(f, content_type=guess_type(file_path)[0])
response['Content-Length'] = len(response.content)
return response
Expected results are to open a few excel sheets, do some work to the dataframes, and to spit out an excel sheet to the user.

I believe you need change for each file from:
sar_file = os.path.isfile('media/reports/Stylist_Analysis.xls')
to:
sar_file = 'media/reports/Stylist_Analysis.xls'
because os.path.isfile:
Return True if path is an existing regular file. This follows symbolic links, so both islink() and isfile() can be true for the same path.

printing a dataframe is showing updated data, but to_csv is writing old data to csv file

I am reading the csv data to a data frame, after that the python code is performing some updates in the data frame. On printing the data frame I can see the updated values, but on writing back to the csv. I am still getting the old values.
to_csv is not writing the updated DataFrame
The issue is similar to the above link, but I just have a single class code with no multiple reference to the same file, still I am getting the same issue and I do not have enough reputation to comment on that post.
import win32com.client as win32
import pandas as pd
class MailDispatcher:
global file_location
file_location = 'C:\Pilot Run\Files\\'
delivery_status = pd.read_csv(file_location + "delivery_status.csv")
for index, row in delivery_status.iterrows():
# Mail trigger
outlook = win32.Dispatch('outlook.application')
mail = outlook.CreateItem(0)
mail.To = 'abc#gmail.com'
mail.Subject = 'test ' + str(row[3])
if mail.Send() is None:
row[4] = 'Y'
print(row) #printing correct data
outlook.Quit()
print(delivery_status)
#output = open(file_location + 'delivery_status.csv', 'w')
# writing old data in csv
delivery_status.to_csv(file_location + 'delivery_status.csv', index=None, header=True)
#output.close()
##Tried with the below code to as per some comments, still getting the same issue:
output = open(file_location + 'delivery_status.csv', 'w')
delivery_status.to_csv(output, index=None, header=True)
output.close()

Openpyxl remove pivot table fields

I am using trying to export using openpyxl and the new pivot table functionality.
I currently have a workbook with two sheets (RawData and Pivot)
I load in the workbook, put data into raw data, close and export.
The Pivot table automatically updates the data when i open the file, however it shows the PivotTable fields / editing options on the right side bar. Does anyone know how to remove this?
Preferably I would like a solution using openpyxl
wb = load_workbook(filename=template_file_path)
wb.remove_sheet(wb["RawData"])
ws = wb.create_sheet("RawData")
ws = wb["RawData"]
# ws.title = "RawData"
# #Add Headers
ws.append(data['headers'][1:])
# #Add raw data
for row in data['data']:
ws.append(row[1:])
#File settings
file_name = str(template_file_path).replace(' ', '_') + "_export.xlsx"
response = HttpResponse(save_virtual_workbook(wb), content_type='application/vnd.ms-excel')
response['Content-Disposition'] = 'attachment; filename= "{0}"'.format(file_name)
return response

After the first line you can add
ws_pivot = wb["Pivot"]
pivot = ws_pivot._pivots[0]
pivot.disableFieldList = True
to achieve this.

You can remove the pivot tables from a worksheet by setting the collection to an empty list: ws._pivots = []

Update SQL output data into Existing Excel in respective sheet using python

I am new to Python programming and seeking for some help/guidance in correcting my python code.
Here my query is.
I have one Excel file which has (7 Tabs).
I have one Folder which contains 7 different text file and each text file contains respective Tab SQL Query and each text file name is same as the Tab Name which is available in Excel File.
I have a written a Python code to loop through all the text file one by one and execute that each text file SQL query and whatever data will come in output that output data should dump into existing excel file in that respective sheet/tab. i am using pandas to do this, however, code is working fine but while updating data into excel pandas is removing all existing sheets from the file and updating only current output data into excel file.
Example: if Python code execute a text file(Filename: Data) and after executing this SQL query we got some data and this data should dump into excel file (sheetname: Data).
<pre><code>
import pypyodbc
import pandas as pd
import os
import ctypes
from pandas import ExcelWriter
fpath = r"C:\MNaveed\DataScience\Python Practice New\SQL Queries"
xlfile = r"C:\MNaveed\DataScience\Python Practice New\SQL Queries\Open_Case_Data.xlsx"
cnxn = pypyodbc.connect('Driver={SQL Server};Server=MyServerName;Database=MyDatabaseName;Trusted_Connection=Yes')
cursor = cnxn.cursor()
for subdir, dirs, files in os.walk(fpath):
for file in files:
#print(os.path.join(subdir,file))
filepath = os.path.join(subdir,file)
#print("FilePath: ", filepath)
if filepath.endswith(".txt"):
if file != "ClosedAging_Cont.txt":
txtdata = open(filepath, 'r')
script = txtdata.read().strip()
txtdata.close()
cursor.execute(script)
if file == "ClosedAging.txt":
txtdata = open(os.path.join(subdir,"ClosedAging_Cont.txt"), 'r')
script = txtdata.read().strip()
txtdata.close()
cursor.execute(script)
col = [desc[0] for desc in cursor.description]
data = cursor.fetchall()
df = pd.DataFrame(list(data),columns=col)
#save_xls(df,xlfile)
writer = pd.ExcelWriter(xlfile)
flnm = file.replace('.txt','').strip()
df.to_excel(writer,sheet_name=flnm,index=False)
writer.save()
print(file, " : Successfully Updated.")
else:
print(file, " : Ignoring this File")
else:
print(file, " : Ignoring this File")
ctypes.windll.user32.MessageBoxW(0,"Open Case Reporting Data Successfully Updated","Open Case Reporting",1)
</pre></code>

By looping through the text files, you overwrite the Excel file inside the loop each time. Instead instantiate pd.ExcelWriter(xlfile) and call writer.save() outside the loop.
The following example is adapted from the xlswriter documentation
You can find more information about multiple sheets here: xlswriter documentaion - multiple sheets
import pandas as pd
# Create a Pandas Excel writer using XlsxWriter as the engine outside the loop.
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
# Sample loop, replace with directory browsing loop
for i in range(7):
# Sample Pandas dataframe. Replace with SQL query and resulting data frame.
df = pd.DataFrame({'DataFromSQLQuery': ['SQL query result {0}'.format(i)]})
# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet{0}'.format(i))
# Close the Pandas Excel writer and output the Excel file.
writer.save()
The following code addresses the concrete question but is untested.
import pypyodbc
import pandas as pd
import os
import ctypes
from pandas import ExcelWriter
fpath = r"C:\MNaveed\DataScience\Python Practice New\SQL Queries"
xlfile = r"C:\MNaveed\DataScience\Python Practice New\SQL Queries\Open_Case_Data.xlsx"
cnxn = pypyodbc.connect('Driver={SQL Server};Server=MyServerName;Database=MyDatabaseName;Trusted_Connection=Yes')
cursor = cnxn.cursor()
# Create a Pandas Excel writer using XlsxWriter as the engine outside the loop
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
# File loop
for subdir, dirs, files in os.walk(fpath):
for file in files:
filepath = os.path.join(subdir,file)
if filepath.endswith(".txt"):
if file != "ClosedAging_Cont.txt":
txtdata = open(filepath, 'r')
script = txtdata.read().strip()
txtdata.close()
cursor.execute(script)
if file == "ClosedAging.txt":
txtdata = open(os.path.join(subdir,"ClosedAging_Cont.txt"), 'r')
script = txtdata.read().strip()
txtdata.close()
cursor.execute(script)
col = [desc[0] for desc in cursor.description]
data = cursor.fetchall()
# Data frame from original question
df = pd.DataFrame(list(data),columns=col)
# Convert the dataframe to an XlsxWriter Excel object
flnm = file.replace('.txt','').strip()
df.to_excel(writer, sheet_name=flnm, index=False)
print(file, " : Successfully Updated.")
else:
print(file, " : Ignoring this File")
else:
print(file, " : Ignoring this File")
# Close the Pandas Excel writer and output the Excel file
writer.save()
ctypes.windll.user32.MessageBoxW(0,"Open Case Reporting Data Successfully Updated","Open Case Reporting",1)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Writing xls from python row wise using pandas - python

according to the documentation here to_excel default set index to write as a new column, use index as False df.to_excel(writer, sheet_name='Sheet1_test',index=False)

You can take reference from this https://medium.com/better-programming/using-python-pandas-with-excel-d5082102ca27 post of medium for this.

Related

How to save a pandas dataframe as excel table to SharePoint using Python

How to fix ValueError in pandas

printing a dataframe is showing updated data, but to_csv is writing old data to csv file

Openpyxl remove pivot table fields

Update SQL output data into Existing Excel in respective sheet using python

Categories

Resources