how to set a column to DATE format in xlsxwriter - python

I am working on a project where I am writing out onto an xlsx spreadsheet and need to format the one column for 'Date'. I get the program to run and all but the column format is still set to 'General'.
Try this in a different way with different code to see if anyone answers.:
for row in cur.execute('''SELECT `Mapline`,`Plant`,`Date`,`Action` from AEReport'''):
lengthOfHeadings = len(row)
output = '%s-%s.xlsx' % ("AEReport",now.strftime("%m%d%Y-%H%M"))
workbook = xlsxwriter.Workbook(output, {'strings_to_numbers':True})
worksheet = workbook.add_worksheet()
format=workbook.add_format({'font_size':'8','border':True})
format2=workbook.add_format({'font_size':'8','border':True,'num_format':'mm/dd/yy hh:mm'})
count = 0
for name in range(0,lengthOfHeadings):
if name==row[2]:
name=int(name)
worksheet.write(counter, count, row[name],format2)
else:
worksheet.write(counter, count, row[name],format)
count += 1
counter += 1
Slihthinden

To get the date time format working, you would have to get the date value converted to a excel serial date value.
Here is an example showing how does it work:
import pandas as pd
data = pd.DataFrame({'test_date':pd.date_range('1/1/2011', periods=12, freq='M') })
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
data.test_date = data.test_date - pd.datetime(1899, 12, 31)
pd.core.format.header_style = None
data.to_excel(writer, sheet_name='test', index=False)
workbook = writer.book
worksheet = writer.sheets['test']
formatdict = {'num_format':'mm/dd/yyyy'}
fmt = workbook.add_format(formatdict)
worksheet.set_column('A:A', None, fmt)
writer.save()
This is how the output will look like:

from datetime import datetime
date_format = workbook.add_format({'num_format':'yyyy-mm-dd hh:mm:ss'})
worksheet.write(0, 0, datetime.today(),date_format)
result:
image from Excel Generated

date = workbook.add_format({'num_format': 'dd-mm-yyyy'})
worksheet.write(1, 1 , 02-12-199, date)

Related

Export DataFrame timedelta column to timestamp Excel column

I have a DataFrame that contains a datetime64 and a timedelta64. Unfortunately, I can't export the latter to a properly formatted hh:mm:ss column in an Excel file:
import pandas as pd
data = {
"date": [
"2023-02-05",
"2023-02-05",
"2022-12-02",
"2022-11-29",
"2022-11-18",
],
"duration": [
"01:07:48",
"05:23:06",
"02:41:58",
"00:35:11",
"02:00:20",
],
}
df = pd.DataFrame(data)
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
df['duration'] = pd.to_timedelta(df['duration'])
with pd.ExcelWriter(
"df.xlsx",
datetime_format="YYYY-MM-DD",
engine="xlsxwriter",
) as writer:
workbook = writer.book
time_format = workbook.add_format({"num_format": "HH:MM:SS"})
df.to_excel(writer, sheet_name="sheet", index=False)
worksheet = writer.sheets["sheet"]
worksheet.set_column("A:A", 20)
worksheet.set_column("B:B", 50, cell_format=time_format)
The resulting Excel file will display like this:
So, the date_time format in the ExcelWriter object is applied correctly for column A, as well as the width setting for column B, but the number formatting isn't working.
What am I doing wrong?
The reason that the column format isn't being applied is that Pandas is applying a cell number format of "0" to the timedelta values. The cell format overrides the column format so that isn't applied. You can verify this by adding the following at the end of the with statement and you will see that it is formatted as expected:
worksheet.write(7, 1, .5)
I'm not sure what is the best way to work around but you could iterate over the timedelta values and rewrite them out to override the pandas formatted values. Something like this:
import pandas as pd
data = {
"date": [
"2023-02-05",
"2023-02-05",
"2022-12-02",
"2022-11-29",
"2022-11-18",
],
"duration": [
"01:07:48",
"05:23:06",
"02:41:58",
"00:35:11",
"02:00:20",
],
}
df = pd.DataFrame(data)
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
df['duration'] = pd.to_timedelta(df['duration'])
with pd.ExcelWriter(
"df.xlsx",
datetime_format="YYYY-MM-DD",
engine="xlsxwriter",
) as writer:
workbook = writer.book
time_format = workbook.add_format({"num_format": "HH:MM:SS"})
df.to_excel(writer, sheet_name="sheet", index=False)
worksheet = writer.sheets["sheet"]
worksheet.set_column("A:A", 20)
worksheet.set_column("B:B", 50, cell_format=time_format)
col = df.columns.get_loc('duration')
for row, timedelta in enumerate(df['duration'], 1):
worksheet.write(row, col, timedelta)
Output:
You could also covert the timedelta back to a number (like Pandas does) since dates or times in Excel are just numbers anyway with a format.
Something like this, which will give the same result as above:
df = pd.DataFrame(data)
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
df['duration'] = pd.to_timedelta(df['duration']).dt.total_seconds() / 86400
with pd.ExcelWriter(
"df.xlsx",
datetime_format="YYYY-MM-DD",
engine="xlsxwriter",
) as writer:
workbook = writer.book
time_format = workbook.add_format({"num_format": "HH:MM:SS"})
df.to_excel(writer, sheet_name="sheet", index=False)
worksheet = writer.sheets["sheet"]
worksheet.set_column("A:A", 20)
worksheet.set_column("B:B", 50, cell_format=time_format)
The problem is that Excel is measuring your time in day units. For example, for your first value, (1:07:48 = 4068s) you are getting a duration of (4048/(24*3600)) days.
You have the posible solutions here:
formatting timedelta64 when using pandas.to_excel

Using xlsx writer to write custom date format

I am writing a dataframe into excel and using xlsx writer to format my date columns to a custom format but the excel always contains a datetime value and ignores the custom formatting specified in my code. Here is the code:
writer = ExcelWriter(path+'test.xlsx', engine='xlsxwriter')
workbook = writer.book
df.to_excel(writer,sheet_name='sheet1', index=False, startrow = 1, header=False)
worksheet1 = writer.sheets['sheet1']
fmt = workbook.add_format({'num_format':'d-mmm-yy'})
worksheet1.set_column('C:C', None, fmt)
# Adjusting column width
worksheet1.set_column(0, 20, 12)
# Adding back the header row
column_list = df.columns
for idx, val in enumerate(column_list):
worksheet1.write(0, idx, val)
writer.save()
Here I want 'd-mmm-yy' format for column C but the exported excel contains datetime values. I also don't want to use strftime to convert my columns to strings to ensure easy date filtering in excel.
Excel output:
The reason this doesn't work as expected is because Pandas uses a default datetime format with datetime objects and it applies this format at the cell level. In XlsxWriter, and Excel, a cell format overrides a column format so you column format has no effect.
The easiest way to handle this is to specify the Pandas date (or datetime) format as a parameter in pd.ExcelWriter():
import pandas as pd
from datetime import date
df = pd.DataFrame({'Dates': [date(2020, 2, 1),
date(2020, 2, 2),
date(2020, 2, 3),
date(2020, 2, 4),
date(2020, 2, 5)]})
writer = pd.ExcelWriter('pandas_datetime.xlsx',
engine='xlsxwriter',
date_format='d-mmm-yy')
df.to_excel(writer, sheet_name='Sheet1')
writer.save()
Output:
See also this Pandas Datetime example from the XlsxWriter docs.

Use pandas to calculate month and week from a given date column in excel and append to another column in same sheet

I am working on a pandas program, where i fetch rows from other excel sheets and append them to the main file:
import pandas as pd
from openpyxl import load_workbook
#reading all three ticket excel sheets
df1 = pd.read_excel("sheet a.xlsx")
df2 = pd.read_excel("sheet b.xlsx")
df3 = pd.read_excel("sheet c.xlsx")
#Creating Panadas Excel writer using xlsxwriter as engine
writer = pd.ExcelWriter(r"main_excel.xlsx", engine = "openpyxl")
writer.book = load_workbook(r"main_excel.xlsx")
sheets = writer.book.sheetnames
reader1 = pd.read_excel(r"main_excel.xlsx", "sheet a")
reader2 = pd.read_excel(r"main_excel.xlsx", "sheet b")
reader3 = pd.read_excel(r"main_excel.xlsx", "sheet c")
df1.to_excel(writer, sheet_name =sheets[0], index = False, header = False,startrow=len(reader1)+1)
df2.to_excel(writer, sheet_name =sheets[2], index = False, header = False,startrow=len(reader2)+1)
df3.to_excel(writer, sheet_name =sheets[4], index = False, header = False,startrow=len(reader3)+1)
writer.save()
writer.close()
After writing the data to the excel file, I have to calculate the month and the week number from the dates in the data and fill in the missing columns.
The data gets appended each week, so i would to append the data to the pre-existing columns.
is there a way to do that without writing the formula in the excel sheet itself? by coding it in the program?
You can convert your date column using the code below:
df['Opened'] = pd.to_datetime(df['Opened'])
Then you can get your other columns using:
df['Month'] = df['Opened'].dt.month_name()
df['Week'] = df['Opened'].dt.week

how to format a column in pandas using a column name

I want to format a column in a dataframe to have ',' between large numbers once i send the df to_excel. i have a code that works but it selects the column based on its position. I want a code to select the column based on its name and not position. can someone help me please?
df.to_excel(writer, sheet_name = 'Final Trade List')
wb = writer.book
ws = writer.sheets['Final Trade List']
format = wb.add_format({'num_format': '#,##'})
ws.set_column('O:O', 12, format) # this code works but its based on position and not name
ws.set_column(df['$ to buy'], 12, format) # this gives me an error
writer.save()
TypeError: cannot convert the series to <class 'int'>
This should do the trick:
import pandas as pd
df['columnname'] = pd.Series([format(val, ',') for val in df['columnname']], index = df.index)

Subtract cell in one column from the one prior in xlsx file in Python

I have an xlsx file with multiple sheets. In the sheets there is column A
with time stamps (as strings). I need to subtract the cells from the one above it to see how much time has elapsed.
ex. COLUMN A
02/23/2017 08:25:39
02/23/2017 08:55:56
02/23/2017 08:55:57
02/23/2017 08:56:12
Here is what I have so far.....Thank you in advance.
import xlrd
from datetime import datetime
def open_file(path):
# Open and read an Excel file
book = xlrd.open_workbook(path)
# get the first worksheet
first_sheet = book.sheet_by_index(0)
# read first column
column_values = first_sheet.col_values(0,0)
column_list = []
for i in column_values:
i = datetime.strptime(i, '%m/%d/%Y %H:%M:%S')
column_list.append(i)
print(column_list[1] - column_list[0])
if __name__ == "__main__":
path = '02-23-2017.xlsx'
open_file(path)
You may want to check out pandas. It handles calculations like this quickly.
import pandas as pd
# create a dictionary of data frames, one for each sheet
df_dict = pd.read_excel('C:/path/to/file.xlsx', sheets=None, header=None)
# iterate over each data frame
for df_key in df_dict:
# pull the time data from the first columns
t = pd.to_datetime(df_dict[df_key].iloc[:,0])
# calculate the time difference using .diff(1), fillna makes the first cell 0
dt = t.diff(1).fillna(0)
# assign the difference to a new columns in the data frame
df_dict[df_key]['time_delta'] = dt
# create a writer to make a new excel file
writer = pd.ExcelWriter('C:/path/to/new_file.xlsx')
# write each sheet to file
for name, df in df_dict.items():
df.to_excel(writer, 'sheet{}'.format(name))
writer.save()
Based on your existing code, you could do the following:
import xlrd
from datetime import datetime
def open_file(path):
# Open and read an Excel file
book = xlrd.open_workbook(path)
# Open each sheet
for sheet in book.sheet_names():
current_sheet = book.sheet_by_name(sheet)
# Read first column and convert to datetime objects
column_values = [datetime.strptime(i, '%m/%d/%Y %H:%M:%S') for i in current_sheet.col_values(0, 0)]
# Create a list of timedelta differences
cur = column_values[0]
differences = []
for i in column_values[1:]:
differences.append(i - cur)
cur = i
print(sheet)
for d in differences:
print(" {}".format(d))
if __name__ == "__main__":
path = '02-23-2017.xlsx'
open_file(path)
Assuming each of the sheets has the same format, this would give you something like:
Sheet1
0:30:17
0:00:01
0:00:15
Sheet2
0:30:17
0:00:01
0:00:15
Sheet3
0:30:17
0:00:01
0:00:15

Categories

Resources