merging .xlsx files into one without overwriting data

merging .xlsx files into one without overwriting data - python

import os
import tkinter as tk
from tkinter import filedialog
import pandas as pd
import xlrd
import openpyxl
from openpyxl import load_workbook
import datetime
from dataclasses import dataclass
from openpyxl.styles import Font,Color,Alignment,Border,Side,colors
import numpy as np
from xlsxwriter.utility import xl_rowcol_to_cell
import xlwt
from xlwt import Workbook
import functools
import numpy as np
from itertools import repeat, chain
import glob
root= tk.Tk()
canvas1 = tk.Canvas(root, width = 300, height = 300, bg = 'lightsteelblue')
canvas1.pack()
def getExcel():
global df
import_file_path = filedialog.askopenfilename()
df = pd.read_excel (import_file_path)
del df['PART CODE']
del df['SUPPLIER CODE']
del df['COMPANY OR SUB-CONT']
del df['SUB-CONT UNIT COST']
del df['HLR']
del df['HOURLY MULTIPLIER']
del df['MATERIAL MULTIPLIER']
del df ['ROUGH-IN HOURS']
del df['FINISH HOURS']
del df['PRELIMINARY TEXT']
del df['FORMAL TEXT']
del df['SUBCONT TEXT']
del df['Part Image Path']
df.insert(0,'Id','')
df.insert(1,'M','M')
df.insert(3,'SubCategory', '')
df.insert(4,'DrillDowns', '')
df.insert(6, 'Name', '')
df = df.rename(columns={'PART UNIT TYPE': 'MeasurementType'})
df.insert(8,'OnOffSwitch', 'No')
df['SubCategory'] = df['CATEGORY']
df = df.rename(columns={'PART DESCRIPTION': 'Note'})
df['Name'] = df['Note']
df = df.rename(columns={'COMPANY UNIT COST': 'PRICE'})
new_row = pd.DataFrame({'Id':'(BLANK = NEW)', 'M':'P', 'CATEGORY':'Brand',
'SubCategory':'Name', 'DrillDowns':'Price - Bathroom {nwPFGtikvZ}', 'Name':'Price - HVAC {cp7lAPx4IO}', 'Note': 'Price - XPS1 {qX8FFEVmqP}',
'MeasurementType':'Price - PRIME {atGoZ7zLsE}', 'OnOffSwitch':'Price - ARCHIVE {NtbEEROpa9}', 'NeedToReplace':'Price - FLOORING {AskrHJL9ab}', 'NeedToReplace1':'Price - TEST {jOn0TaUDmU}', 'NeedToReplace2':'Price - Kitchen Refacing {9iFFUgrQBr}', 'NeedToReplace3':'Price - EAGLE EYES {X8ExSUDoFH}', 'NeedToReplace4':'Price - Basement {ajuemFbXaL}', 'NeedToReplace5': 'Price - Egress Windows {69790nzjKb}'},
index =[0])
# simply concatenate both dataframes
df = pd.concat([new_row, df]).reset_index(drop = True)
df = df.fillna('')
#new_row1 = pd.DataFrame({'Id':' ', 'M':'P', 'CATEGORY': 'SITE-PREP'}, index= [2])
switches = df['M'].ne(df['M'].shift(16000))
idx = switches[switches].index
df_new = pd.DataFrame(index=idx + 1.5)
df = pd.concat([df, df_new]).sort_index()
#df = pd.concat([new_row1, df]).reset_index(drop = True)
df = df.fillna('NO VALUE')
df.M = df.M.replace({'NO VALUE': "P"})
df.Id = df.Id.replace({'NO VALUE': ""})
df.DrillDowns = df.DrillDowns.replace({'NO VALUE': "xxxx"})
df.Name = df.Name.replace({'NO VALUE': "xxxx"})
df.Note = df.Note.replace({'NO VALUE': "xxxx"})
df.MeasurementType = df.MeasurementType.replace({'NO VALUE': "xxxx"})
df.OnOffSwitch = df.OnOffSwitch.replace({'NO VALUE': "xxxx"})
df.NeedToReplace = df.NeedToReplace.replace({'NO VALUE': "xxxx"})
df.NeedToReplace1 = df.NeedToReplace1.replace({'NO VALUE': "xxxx"})
df.NeedToReplace2 = df.NeedToReplace2.replace({'NO VALUE': "xxxx"})
df.NeedToReplace3 = df.NeedToReplace3.replace({'NO VALUE': "xxxx"})
df.NeedToReplace5 = df.NeedToReplace3.replace({'NO VALUE': "xxxx"})
df['NeedToReplace4'] = df['PRICE'].shift(1)
df = df.fillna("Price - Basement {ajuemFbXaL}")
df.NeedToReplace4 = df.NeedToReplace4.replace({'NO VALUE': ""})
del df['PRICE']
df = df.rename(columns={'NeedToReplace': ''})
df = df.rename(columns={'NeedToReplace1': ' '})
df = df.rename(columns={'NeedToReplace2': ' '})
df = df.rename(columns={'NeedToReplace3': ' '})
df = df.rename(columns={'NeedToReplace4': ' '})
df = df.rename(columns={'NeedToReplace5': ' '})
df.CATEGORY.replace('NO VALUE',df.Note.shift(1),inplace=True)
df.SubCategory.replace('NO VALUE',df.Name.shift(1),inplace=True)
pd.set_option('display.max_rows', df.shape[0]+1)
pd.set_option('display.max_colwidth', None)
browseButton_Excel = tk.Button(text='Select Excel File', command=getExcel, bg='green', fg='white', font=('helvetica', 12, 'bold'))
canvas1.create_window(150, 150, window=browseButton_Excel)
root.mainloop()
df.reset_index(drop=True)
df.to_excel(r'C:\Users\Larso\Desktop\ClearEstimatesEstimate\LeapPriceGuideExport.xlsx', sheet_name='Price Guide', index = False)
file1 = pd.read_excel("LeapPriceGuideExport.xlsx")
file2 = pd.read_excel("test.xlsx")
file3 = file1.merge(file2, on="ID", how="outer")
file3.to_excel("merged.xlsx")
This is my current code above, im editing an excel file to go into an existing .xlsx file, but every time i try to merge the files i get error code
FileNotFoundError: [Errno 2] No such file or directory: 'LeapPriceGuideExport.xlsx'
i clearly see the file in the folder, but not sure what i am missing, thank you, and please let me know if things didnt come through clearly, i am stilling getting used to posting and asking questions, thank you.

Related

Loop through multiple xml files

I'm fairly new to python and would like to loop through multiple xml files. I'm currently using the existing code to pull in sample2 xml file:
import xml.etree.ElementTree as ET
import pandas as pd
import os
tree=ET.parse("sample2.xml")
root = tree.getroot()
qty=root.iterfind(".//Qty")
pri=root.iterfind(".//PriceAmount")
cor=root.iterfind(".//AuctionIdentification")
data =[]
for x, y, z in zip(qty, pri, cor):
#print(x.get("v"), y.get("v"))
a = x.get("v"), y.get("v"), z.get("v")
data.append(a)
df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
df['Qty'] = df['Qty'].astype(float)
df['Price'] = df['Price'].astype(float)
#print(df)
total = df['Qty'].sum()
price = df['Price'].mean()
border = df.loc[0,'Border']
df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])
df2['Qty'] = [total]
df2['Price'] = [price]
df2['Border'] = [str(border)[0:12]]
I tried adding soup xml to the below line of code but this didn't work
tree=ET.parse("sample2.xml , "soup xml")
root = tree.getroot()

Consider turning your code into a function and calling it for the various files you need:
import xml.etree.ElementTree as ET
import pandas as pd
import os
def my_xml_processor(filename):
tree=ET.parse(filename)
root = tree.getroot()
qty=root.iterfind(".//Qty")
pri=root.iterfind(".//PriceAmount")
cor=root.iterfind(".//AuctionIdentification")
data =[]
for x, y, z in zip(qty, pri, cor):
#print(x.get("v"), y.get("v"))
a = x.get("v"), y.get("v"), z.get("v")
data.append(a)
df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
df['Qty'] = df['Qty'].astype(float)
df['Price'] = df['Price'].astype(float)
#print(df)
total = df['Qty'].sum()
price = df['Price'].mean()
border = df.loc[0,'Border']
df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])
df2['Qty'] = [total]
df2['Price'] = [price]
df2['Border'] = [str(border)[0:12]]
return df2
You can then call it for your files:
my_xml_processor("sample2.xml")
my_xml_processor("soup.xml")
EDIT: these are some minor code changes that I'd recommend:
import xml.etree.ElementTree as ET
import pandas as pd
import os
def my_xml_processor(filename:str)->pd.DataFrame: # <- Add type hints
root = ET.parse(filename).getroot() # <- tree is not used
qty = root.iterfind(".//Qty")
pri = root.iterfind(".//PriceAmount")
cor = root.iterfind(".//AuctionIdentification")
data = [ # <- This could be a list comprehension
(x.get('v'), y.get('v'), z.get('v'))
for x,y,z in zip(qty, pri, cor)
]
df = (pd
.DataFrame(data, columns=["Qty", "Price" , "Border"])
.astype({
'Qty': float,
'Price': float,
})
)
df2 = df.agg({
'Qty':'sum',
'Price':'mean',
'Border': lambda x: str(x[0])[:12]
}).to_frame().T
return df2

You could use your existing code, but running it in a loop for each filename you have, something like:
import xml.etree.ElementTree as ET
import pandas as pd
import os
files = ['sample2.xml', 'sample3.xml', 'sample4.xml']
for file in files: #read each filename from above list
tree=ET.parse(file)
root = tree.getroot()
qty=root.iterfind(".//Qty")
pri=root.iterfind(".//PriceAmount")
cor=root.iterfind(".//AuctionIdentification")
data =[]
for x, y, z in zip(qty, pri, cor):
#print(x.get("v"), y.get("v"))
a = x.get("v"), y.get("v"), z.get("v")
data.append(a)
df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
df['Qty'] = df['Qty'].astype(float)
df['Price'] = df['Price'].astype(float)
#print(df)
total = df['Qty'].sum()
price = df['Price'].mean()
border = df.loc[0,'Border']
df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])
df2['Qty'] = [total]
df2['Price'] = [price]
df2['Border'] = [str(border)[0:12]]

How to Edit Cell of Streamlit AgGrid's Row?

I have already created AgGrid by loading data from a csv file. I am adding rows one by one via an external button. But when I try to edit the line I added, it disappears. I would be very grateful if you could help me where the error is. The codes are as follows.
import pandas as pd
import streamlit as st
from st_aggrid import AgGrid, GridUpdateMode, JsCode
from st_aggrid.grid_options_builder import GridOptionsBuilder
import sys
import os
import altair as alt
from streamlit.runtime.legacy_caching import caching
def data_upload():
df = pd.read_csv("data.csv")
return df
if 'grid' in st.session_state:
grid_table = st.session_state['grid']
df = pd.DataFrame(grid_table['data'])
df.to_csv(“data.csv”, index=False)
else:
df = data_upload()
gd = GridOptionsBuilder.from_dataframe(df)
gd.configure_column("Location", editable=True)
gd.configure_column("HourlyRate", editable=True)
gd.configure_column("CollaboratorName", editable=True)
gridOptions = gd.build()
button = st.sidebar.button("Add Line")
if "button_state" not in st.session_state:
st.session_state.button_state = False
if button or st.session_state.button_state:
st.session_state.button_state = True
data = [['', '', 0]]
df_empty = pd.DataFrame(data, columns=['CollaboratorName', 'Location', "HourlyRate"])
df = pd.concat([df, df_empty], axis=0, ignore_index=True)
df.to_csv(“data.csv”, index=False)
gd= GridOptionsBuilder.from_dataframe(df)
grid_table = AgGrid(df,
gridOptions=gridOptions,
fit_columns_on_grid_load=True,
height=500,
width='100%',
theme="streamlit",
key= 'unique',
update_mode=GridUpdateMode.GRID_CHANGED,
reload_data=True,
allow_unsafe_jscode=True,
editable=True
)
if 'grid' not in st.session_state:
st.session_state['grid'] = grid_table
else:
grid_table_df = pd.DataFrame(grid_table['data'])
grid_table_df.to_csv(“data.csv”, index=False)
You can see the running app from here enter image description here

This one has a different approach but the goal could be the same.
Two radio buttons are created, if value is yes new line will be created, if value is no there is no new line.
If you want to add a new line, select yes and then add your entry. Then press the update button in the sidebar.
If you want to edit but not add a new line, select no, edit existing entry and then press the update button.
Code
import streamlit as st
from st_aggrid import AgGrid, GridOptionsBuilder
import pandas as pd
def data_upload():
df = pd.read_csv("data.csv")
return df
def show_grid(newline):
st.header("This is AG Grid Table")
df = data_upload()
if newline == 'yes':
data = [['', '', 0]]
df_empty = pd.DataFrame(data, columns=['CollaboratorName', 'Location', "HourlyRate"])
df = pd.concat([df, df_empty], axis=0, ignore_index=True)
gb = GridOptionsBuilder.from_dataframe(df)
gb.configure_default_column(editable=True)
grid_table = AgGrid(
df,
height=400,
gridOptions=gb.build(),
fit_columns_on_grid_load=True,
allow_unsafe_jscode=True,
)
return grid_table
def update(grid_table):
grid_table_df = pd.DataFrame(grid_table['data'])
grid_table_df.to_csv('data.csv', index=False)
# start
addline = st.sidebar.radio('Add New Line', options=['yes', 'no'], index=1, horizontal=True)
grid_table = show_grid(addline)
st.sidebar.button("Update", on_click=update, args=[grid_table])

That happened because of your if button: statement. Streamlit button has no callbacks so any user entry under a st.button() will always reload the page so you end up losing the data, to prevent this, you can either initialize a session state fo your button or you can use st.checkbox() in place of st.button().
In this case I am going to fix your code by initializing a session state of the button.
def data_upload():
df = pd.read_csv("data.csv")
return df
st.header("This is AG Grid Table")
if 'grid' in st.session_state:
grid_table = st.session_state['grid']
df = pd.DataFrame(grid_table['data'])
df.to_csv('data.csv', index=False)
else:
df = data_upload()
gd = GridOptionsBuilder.from_dataframe(df)
gd.configure_column("Location", editable=True)
gd.configure_column("HourlyRate", editable=True)
gd.configure_column("CollaboratorName", editable=True)
gridOptions = gd.build()
def update():
caching.clear_cache()
button = st.sidebar.button("Add Line")
# Initialized session states # New code
if "button_state" not in st.session_state:
st.session_state.button_state = False
if button or st.session_state.button_state:
st.session_state.button_state = True # End of new code
data = [['', '', 0]]
df_empty = pd.DataFrame(data, columns=['CollaboratorName', 'Location', "HourlyRate"])
df = pd.concat([df, df_empty], axis=0, ignore_index=True)
gd= GridOptionsBuilder.from_dataframe(df)
df.to_csv('data.csv', index=False)
gridOptions = gd.build()
grid_table = AgGrid(df,
gridOptions=gridOptions,
fit_columns_on_grid_load=True,
height=500,
width='100%',
theme="streamlit",
key= 'unique',
update_mode=GridUpdateMode.GRID_CHANGED,
reload_data=True,
allow_unsafe_jscode=True,
editable=True
)
if 'grid' not in st.session_state:
st.session_state['grid'] = grid_table
grid_table_df = pd.DataFrame(grid_table['data'])
grid_table_df.to_csv('data.csv', index=False)
I think your code should work fine now with regards to the button issue.

py to exe : failed to execute script pyi_rth_win32comgenpy

I'm creating a simple calculation program using tkinter module and want to convert to exe as I want it to be executable at any pc. But somehow the error message show (failed to execute script pyi_rth_win32comgenpy).
I've try used pyinstaller ( cmd and the one on GitHub at : https://github.com/brentvollebregt/auto-py-to-exe) but to no avail. I also try using both types of python file (.py and .pyw)
from tkinter import *
from tkinter.filedialog import askopenfilename
import pandas as pd
from tkinter import messagebox
from pandastable import Table, TableModel
class Window(Frame):
def __init__(self, master =None):
Frame.__init__(self, master)
self.master = master
self.init_window()
def init_window(self):
self.master.title('GUI')
self.pack(fill=BOTH, expand=1)
quitButton = Button(self, text='quit', command=self.client_exit)
quitButton.place(x=0, y=230)
# fileButton = Button(self, text='Browse Data Set', command=self.import_data)
# fileButton.place(x=150, y=0)
fileButton = Button(self, text='SBO', command=self.sbo)
fileButton.place(x=200, y=50)
fileButton = Button(self, text='CBO', command=self.cbo)
fileButton.place(x=150, y=50)
# menu = Menu(self.master)
# self.master.config(menu=menu)
#
# file = Menu(menu)
# file.add_command(label='Save',command=self.client_exit)
# file.add_command(label='Exit', command= self.client_exit)
# menu.add_cascade(label='File', menu=file)
#
# edit = Menu(menu)
# edit.add_command(label='Undo')
# menu.add_cascade(label='Edit', menu=edit)
def client_exit(self):
exit()
# def import_data(self):
#
# csv_file_path = askopenfilename()
# # print(csv_file_path)
# df = pd.read_excel(csv_file_path)
# return df
def sbo(self):
csv_file_path = askopenfilename()
df = pd.read_excel(csv_file_path)
data = df.drop(df.index[0]) # remove first row
data['BOVal%'] = data['BOVal%'].astype(str) # convert to string
data['BOQty%'] = data['BOQty%'].astype(str)
data['CustomerPONo'] = data['CustomerPONo'].astype(str)
data['OrdNo'] = data['OrdNo'].astype(str)
data['VendorNo'] = data['VendorNo'].astype(str)
pivot = data.pivot_table(index='Style', aggfunc='sum') # first pivot
pivoted = pd.DataFrame(pivot.to_records()) # flattened
pivoted = pivoted.sort_values(by=['BOVal'], ascending=False) # sort largest to smallest
pivoted['Ranking'] = range(1, len(pivoted) + 1) # Ranking
cols = pivoted.columns.tolist()
cols = cols[-1:] + cols[:-1]
pivoted = pivoted[cols]
pivoted = pivoted.set_index('Ranking')
col = df.columns.tolist()
col = (col[22:23] + col[15:17] + col[:14] + col[17:22] + col[23:37]) # rearrange column
data = df[col]
data = data.sort_values(by=['BOVal'], ascending=False) # sort value
data['Ranking'] = range(1, len(data) + 1) # Set rank
colm = data.columns.tolist()
colm = colm[-1:] + colm[:-1] # rearrange rank column
data = data[colm]
data = data.set_index('Ranking')
# sumboval = data['BOVal'].sum()
# sumboqty = data['BOQty'].sum()
# rounded = sumboval.round()
dates = data['SnapShotDate']
# print(dates)
dates = dates.iloc[1].strftime('%d%m%Y')
sos = data['SOS']
sos = sos[2]
result = pivoted.iloc[:10, :3]
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('%s SBO %s .xlsx' % (sos, dates), engine='xlsxwriter')
# Write each dataframe to a different worksheet.
result.to_excel(writer, sheet_name='pivot')
df.to_excel(writer, sheet_name=dates)
data.to_excel(writer, sheet_name='SBO')
# Close the Pandas Excel writer and output the Excel file.
writer.save()
messagebox.showinfo("Note", "Calculation Completed")
def cbo(self):
csv_file_path = askopenfilename()
Stylemat = askopenfilename()
df = pd.read_excel(csv_file_path)
sm = pd.read_excel(Stylemat)
df = df.drop(df.index[0])
df.insert(loc=8, column='PH', value=['' for i in range(df.shape[0])])
df.insert(loc=9, column='Site', value=['' for i in range(df.shape[0])])
df['Region'] = df['Region'].fillna('"NA"')
df['S&OP Style Aggrt'] = df['S&OP Style Aggrt'].astype(str)
sm['Style'] = sm['Style'].astype(str)
dates = df['Date_Rp']
# print(dates)
dates = dates.iloc[1]
w = list(dates)
w[1] = '-'
w[3] = '-'
temp = w[0]
w[0] = w[2]
w[2] = temp
dates = "".join(w)
rowcount = len(df)
rowstyle = len(sm)
i = 0
j = 0
Style = []
for i in range(rowcount):
for j in range(rowstyle):
if df.iloc[i, 7] == sm.iloc[j, 0]:
df.iloc[i, 8] = 'Horizon'
df.iloc[i, 9] = sm.iloc[j, 2]
table = pd.pivot_table(df[df.PH == 'Horizon'], index='S&OP Style Aggrt', columns='Region',
values='Net CBO Value', aggfunc='sum')
table['Grand Total'] = table.sum(axis=1)
table = table.sort_values(by=['Grand Total'], ascending=False)
table['Ranking'] = range(1, len(table) + 1)
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('CBO %s .xlsx' % dates, engine='xlsxwriter')
# Write each dataframe to a different worksheet.
table.to_excel(writer, sheet_name='pivot')
df.to_excel(writer, sheet_name=dates)
sm.to_excel(writer, sheet_name='StyleMat')
# Close the Pandas Excel writer and output the Excel file.
writer.save()
messagebox.showinfo("Note", "Calculation Completed")
root = Tk()
root.geometry('400x300')
app = Window(root)
root.mainloop()
I'd like to know how to find the main reason for this error and where to look for it, is it either my scripting method is incorrect or is there any additional file or module that I need. Appreciate in advance for your help. Thank you

I uninstalled everything related to win32 (pypiwin32, pywin32, pywin32-ctypes, pywinpty) and then installed again and magically it worked.
Took the idea from here and here.

this is quite late, but the answer to that issue is just the py to exe cannot execute on numpy 1.17. after downgrade to numpy 1.16, the program can run normally.

You are getting this error failed to execute script pyi_rth_win32comgenpy as result of not including the images you used for you icons and labels
I included images of icon, Question mark and the title
copy this images and include it the directory you have your pyi_rth_win32comgenpy executable.

Python: Pandas style background color not visible in excel file

I am creating an excel file with multiple sheets using xlsxwriter as engine.
In each sheet the row color is based on value of column named colour
But the color is not visible in my excel file.
import pandas as pd
def row_colour(row):
return ['background-color:'+row.colour.lower()for i in row]
writer = pd.ExcelWriter('try.xlsx', engine='xlsxwriter')
cols = ['subject','colour']
df1 = pd.DataFrame([['Math','DarkRed'],['Science','Yellow']],columns=cols)
df2 = pd.DataFrame([['English','Orange'],['History','Green']],columns=cols)
df3 = pd.DataFrame([['Geography','DarkRed'],['Civic','Yellow']],columns=cols)
df1.style.apply(row_colour,axis=1)
df2.style.apply(row_colour,axis=1)
df3.style.apply(row_colour,axis=1)
df1.to_excel(writer, sheet_name='Sheet 1')
df2.to_excel(writer, sheet_name='Sheet 2')
df3.to_excel(writer, sheet_name='Sheet 3')
writer.save()
In output no color is visible:
The accepted answer is right for the above question.
I have improved the task by deleting the color column since it's only use was to color the rows.
Code for it:
import pandas as pd
def row_colour(table,color):
print("table: \n "+str(table))
print("table shape : "+str(table.shape))
color_data = []
for index,row in table.iterrows():
color.iloc[index]
if str(color.iloc[index]['colour']) == "DarkRed":
c= 'background-color:red'
else:
c= 'background-color:'+str(color.iloc[index]['colour'])
color_data.append([c for i in range(len(row))])
return pd.DataFrame(color_data,index=table.index, columns=table.columns)
writer = pd.ExcelWriter('try.xlsx', engine='xlsxwriter')
cols = ['subject','colour']
df1 = pd.DataFrame([['Math','DarkRed'],['Science','Yellow']],columns=cols)
df2 = pd.DataFrame([['English','Orange'],['History','Green']],columns=cols)
df3 = pd.DataFrame([['Geography','DarkRed'],['Civic','Yellow']],columns=cols)
color = pd.DataFrame(columns=['colour'])
color['colour']=df1['colour']
df1 = df1.drop(['colour'],axis=1)
df1=df1.style.apply(row_colour,axis=None,color=color)
color = pd.DataFrame(columns=['colour'])
color['colour']=df2['colour']
df2=df2.drop(['colour'],axis=1)
df2=df2.style.apply(row_colour,axis=None,color=color)
color = pd.DataFrame(columns=['colour'])
color['colour']=df3['colour']
df3=df3.drop(['colour'],axis=1)
df3=df3.style.apply(row_colour,axis=None,color=color)
df1.to_excel(writer, sheet_name='Sheet 1')
df2.to_excel(writer, sheet_name='Sheet 2')
df3.to_excel(writer, sheet_name='Sheet 3')
writer.save()

The function is ok, you just have to reassign df1, df2, df3. This should work:
import pandas as pd
def row_colour(row):
return ['background-color:'+row.colour.lower()for i in row]
writer = pd.ExcelWriter('try.xlsx', engine='xlsxwriter')
cols = ['subject','colour']
df1 = pd.DataFrame([['Math','DarkRed'],['Science','Yellow']],columns=cols)
df2 = pd.DataFrame([['English','Orange'],['History','Green']],columns=cols)
df3 = pd.DataFrame([['Geography','DarkRed'],['Civic','Yellow']],columns=cols)
df1 = df1.style.apply(row_colour,axis=1)
df2 = df2.style.apply(row_colour,axis=1)
df3 = df3.style.apply(row_colour,axis=1)
df1.to_excel(writer, sheet_name='Sheet 1')
df2.to_excel(writer, sheet_name='Sheet 2')
df3.to_excel(writer, sheet_name='Sheet 3')
writer.save()
to_excel here is a method of pandas.io.formats.style.Styler rather than the original dataframe.

As an answer to your comment, I came up with a more complex solution.
The colours are now read from the DataFrame before being dropped. Then passed as an argument to a row-colouring function.
The key points are my use of zip and pd.IndexSlice for subsetting df.style.apply. I hope this suits your colouring needs.
import pandas as pd
def colour_row(row, colour):
return ['background-color:'+ colour.lower() for i in row]
def colour_df(df, colour_col):
colours = list(df['colour'])
df = df.drop('colour', axis = 1)
coloured_df = df.style
for i, colour in zip(range(len(df)), colours):
coloured_df = coloured_df.apply(colour_row, axis=1, subset=pd.IndexSlice[i,:], colour=colour)
return coloured_df
writer = pd.ExcelWriter('try.xlsx', engine='xlsxwriter')
cols = ['subject','colour']
df1 = pd.DataFrame([['Math','DarkRed'],['Science','Yellow']],columns=cols)
df2 = pd.DataFrame([['English','Orange'],['History','Green']],columns=cols)
df3 = pd.DataFrame([['Geography','DarkRed'],['Civic','Yellow']],columns=cols)
sheet_num = 1
for df in [df1, df2, df3]:
sheet_name = 'Sheet ' + str(sheet_num)
df = colour_df(df, 'colour')
df.to_excel(writer, sheet_name = sheet_name)
sheet_num += 1
writer.save()

Excel Styles and Chart from Pandas Dataframe in Single Excel

I am having a Pandas dataframe, which I need to write it to an excel and then do color formating and plot a chart on the same sheet.
I have used StyleFrame to do the Coloring & Borders to my Dataframe, but this StyleFrame not works with Pandas XlsxWriter Object. And in turn, Plotting of chart from this styled dataframe is not working.
Can any one please share the solution for this!?
import pandas as pd
import xlfunc
import genfunc
from StyleFrame import StyleFrame, Styler, utils
def applystyle(df):
sf=StyleFrame(df)
sf.apply_column_style(cols_to_style=df.columns,
styler_obj=Styler(bg_color=utils.colors.white, bold=True, font=utils.fonts.calibri,
font_size=8), style_header=True)
sf.apply_headers_style(
styler_obj=Styler(bg_color=utils.colors.blue, bold=True, font_size=8, font_color=utils.colors.white,
number_format=utils.number_formats.general, protection=False))
sf.set_column_width(columns=sf.columns, width=15)
sf.set_row_height(rows=sf.row_indexes, height=12)
return sf
def createchart(regionname,workbook,outxl,sheetname,cellid,charttitle,startrow,startcol,endrow,endcol):
worksheet = outxl.sheets[sheetname]
kpichart = workbook.add_chart({'type': 'line'})
bsckpi = workbook.add_chart({'type': 'column'})
for col_num in range(startcol+1, endcol-1):
kpichart.set_title(
{'name': charttitle, 'name_font': {'name': 'Cambria', 'size': 18, 'color': '#000000'}})
kpichart.add_series({
'name': [sheetname, startrow, col_num],
'categories': [sheetname, startrow+1, 1, endrow, 1],
'values': [sheetname, startrow+1, col_num, endrow, col_num],
})
nodekpi.add_series({
'name': [sheetname, startrow, endcol-1],
'categories': [sheetname,startrow+1,1,endrow,1],
'values': [sheetname,startrow+1,endcol-1,endrow,endcol-1],
'y2_axis': True,
})
kpichart.combine(nodekpi)
kpichart.set_x_axis({'name': 'Date'})
kpichart.set_x_axis({'num_font': {'name': 'Cambria', 'size': 10, 'color': '#000000'}})
kpichart.set_y_axis({'name': charttitle, 'minor_gridlines': {'visible': True, 'color': '#FFFFFF'}})
kpichart.set_y_axis({'num_font': {'name': 'Cambria', 'size': 10, 'color': '#000000'}})
worksheet.insert_chart(cellid, kpichart, {'x_scale': 1.75, 'y_scale': 1.25})
def df_to_xl(regionname,hostlist,timerdf,hostData):
outxl = pd.ExcelWriter("timer_audit_data.xlsx", engine='xlsxwriter')
stylexl = StyleFrame.ExcelWriter("timer_audit_data_styled.xlsx")
outxl.sheets.clear()
workbook = outxl.book
stylebook = stylexl.book
style_timerdf = applystyle(timerdf)
style_timerdf.to_excel(stylexl, sheet_name='Parameters', index=False)
timerdfnew = timerdf
# Pivot
timer_summary = timerdfnew.pivot_table(values='NODE', index='HOST', columns='timer', aggfunc="count")
timer_summary = applystyle(timer_summary)
timer_summary.to_excel(stylexl, sheet_name='Parameters', startrow=0, startcol=15)
for hostname in hostlist:
hostname = genfunc.toString(hostname)
hostData.to_excel(outxl,sheet_name=hostname,startrow=1,startcol=1,index=False)
createchart(regionname,workbook,outxl,hostname,'G2',"Timer Performance"+hostname,1,1,tr+1,tc+1)
#In this Section I am pasting the dataframe to excel workbook & I am preparing a chart in the same sheet
#But here i would like to apply some colors and borders to hostData pasted in excel and then I want to prepare chart in the same sheet
outxl.save()
stylexl.save()

It would be much better if you draw your chart using openpyxl package since StyleFrame uses openpyxl engine and xlsxwriter package does not work with existing files as far as I know.
This example works for me with openpyxl version 2.2.5 and StyleFrame version 1.2
import pandas as pd
from openpyxl.charts import BarChart, Reference, Series
from StyleFrame import StyleFrame, Styler, utils
def apply_style(sf):
sf.apply_headers_style(styler_obj=Styler(bg_color=utils.colors.black, font_color=utils.colors.white, bold=True))
sf.apply_column_style(cols_to_style=sf.columns, styler_obj=Styler(font_color=utils.colors.blue))
return sf
def draw_chart_openpyxl(worksheet):
# pos: (row, column)
chart = BarChart()
labels = Reference(worksheet, pos1=(2, 1), pos2=(4, 1))
valuesA = Reference(worksheet, pos1=(2, 2), pos2=(4, 2))
seriesA = Series(valuesA, title='A', labels=labels)
chart.append(seriesA)
valuesB = Reference(worksheet, pos1=(2, 3), pos2=(4, 3))
seriesB = Series(valuesB, title='B', labels=labels)
chart.append(seriesB)
chart.drawing.top = 100
chart.drawing.left = 200
chart.drawing.width = 300
chart.drawing.height = 200
worksheet.add_chart(chart)
return worksheet
if __name__ == '__main__':
ew = StyleFrame.ExcelWriter('output.xlsx')
df = pd.DataFrame({
'A': [1, 2, 3],
'B': [4, 5, 6]
}, columns=['A', 'B'], index=['Row 1', 'Row 2', 'Row 3'])
sf = StyleFrame(df)
sf = apply_style(sf)
sf.to_excel(excel_writer=ew, sheet_name='Sheet1')
draw_chart_openpyxl(ew.book.get_sheet_by_name('Sheet1'))
ew.save()
You can find here many usage examples.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

merging .xlsx files into one without overwriting data - python

Related

Loop through multiple xml files

How to Edit Cell of Streamlit AgGrid's Row?

py to exe : failed to execute script pyi_rth_win32comgenpy

Python: Pandas style background color not visible in excel file

Excel Styles and Chart from Pandas Dataframe in Single Excel

Categories

Resources