Apply borders to all cells in a range with openpyxl - python

I have a script that takes a pandas dataframe and chops it up into several hundred chunks and saves each chunk as a separate excel file. Each chunk will have the same number of columns but the number of rows varies. I've figured out how to apply all the other necessary formatting to these files with openpyxl, but I haven't yet determined the fastest way to apply borders. Also, I think I'm just not applying borders correctly, because the code below (which I suspect shouldn't need to loop over each cell individually) doesn't apply any borders.
from openpyxl.style import Border
wb = load_workbook(filename = _fname)
ws = wb.worksheets[0]
for _row in ws.range('A1:L'+str(ws.get_highest_row() ) ):
for _cell in _row:
_cell.style.borders.left.border_style = Border.BORDER_THIN
_cell.style.borders.right.border_style = Border.BORDER_THIN
_cell.style.borders.top.border_style = Border.BORDER_THIN
_cell.style.borders.bottom.border_style = Border.BORDER_THIN
wb.save(_fname)
So this code works, but it doesn't apply the border I expect (the default border in excel) and it takes a lot more steps than I'd prefer. My expectation is that I should be able to do something like this:
from openpyxl.style import Border
wb = load_workbook(filename = _fname)
ws = wb.worksheets[0]
_range = ws.some_range_func('A1:L'+str(ws.get_highest_row() ) ):
_range.style.borders.all_borders = Borders.BORDER_THIN
Does this functionality exist? If not, can someone please be so kind as to at least explain how to apply the default border style and not this slightly thicker border? None of Border.BORDER_THICK, Border.BORDER_MEDIUM, Border.BORDER_THIN, or Border.BORDER_HAIR seem correct.
Thanks!

In more pythonic way for openpyxl==3.0.5:
from openpyxl.styles import Border, Side
def set_border(ws, cell_range):
thin = Side(border_style="thin", color="000000")
for row in ws[cell_range]:
for cell in row:
cell.border = Border(top=thin, left=thin, right=thin, bottom=thin)
set_border(worksheet, 'A5:C10')

May be this is handy:
from openpyxl.reader.excel import load_workbook
from openpyxl.style import Border
def set_border(ws, cell_range):
rows = was[cell_range]
for row in rows:
row[0].style.borders.left.border_style = Border.BORDER_THIN
row[-1].style.borders.right.border_style = Border.BORDER_THIN
for c in rows[0]:
c.style.borders.top.border_style = Border.BORDER_THIN
for c in rows[-1]:
c.style.borders.bottom.border_style = Border.BORDER_THIN
#usage example:
ws = load_workbook('example.xlsx').get_active_sheet()
set_broder(ws, "C3:H10")
It performs reasonably fast.

There is a slight modification to answer from #Karimov
Below is how your code should be
from openpyxl.styles import Border, Side, Font, Alignment
def __format_ws__(self, ws, cell_range):
border = Border(left=Side(border_style='thin', color='000000'),
right=Side(border_style='thin', color='000000'),
top=Side(border_style='thin', color='000000'),
bottom=Side(border_style='thin', color='000000'))
rows = ws[cell_range]
for row in rows:
for cell in row:
cell.border = border
A much faster way that uses list comprehension is below:
def __format_ws__(self, ws, cell_range):
#applying border and alignment
font = Font(size=9)
align=Alignment(horizontal='left', vertical='center')
border = Border(left=Side(border_style='thin', color='000000'),
right=Side(border_style='thin', color='000000'),
top=Side(border_style='thin', color='000000'),
bottom=Side(border_style='thin', color='000000'))
rows = [rows for rows in ws[cell_range]]
flattened = [item for sublist in rows for item in sublist]
[(setattr(cell,'border',border), setattr(cell,'font',font), setattr(cell,'alignment',align)) for cell in flattened]
The way you use it is:
self.__format_ws__(ws=writer.book.worksheets[0], cell_range='A1:G10')

Decision that works on openpyxl 2.3.5
from openpyxl.styles import Border, Side
def set_border(ws, cell_range):
border = Border(left=Side(border_style='thin', color='000000'),
right=Side(border_style='thin', color='000000'),
top=Side(border_style='thin', color='000000'),
bottom=Side(border_style='thin', color='000000'))
rows = ws.iter_rows(cell_range)
for row in rows:
for cell in row:
cell.border = border
set_border(worksheet, 'A5:C10')

#user698585 your approach seems nice but it doesn't work anymore as the present version of the openpyxl change the implementation. So this should be updated into e.g.
ws.cell(row=1, column=1).style.border.top.border_style = borders.BORDER_MEDIUM
but it results with an error that changing the style is not allowed.
As a workaround I just defined a dedicated styles, but they are just a duplication of the present styles plus border definition - not so good solution as works only if you know what style has the cell under the change.
border_style = Style(font=Font(name='Console', size=10, bold=False,
color=Color(openpyxl.styles.colors.BLACK)),
fill=PatternFill(patternType='solid', fgColor=Color(rgb='00C5D9F1')),
border=Border(bottom=Side(border_style='medium', color=Color(rgb='FF000000'))))

if you need styling (borders...) for pandas excel dataframe my fork just got merged into master
https://github.com/pydata/pandas/pull/2370#issuecomment-10898427
as for you borders problems.
setting all borders at once does not seam to work in openpyxl.
In [34]: c.style.borders.all_borders.border_style = openpyxl.style.Border.BORDER_THIN
In [36]: c.style
'Calibri':11:False:False:False:False:'none':False:'FF000000':'none':0:'FFFFFFFF':'FF000000':'none':'FF000000':'none':'FF000000':'none':'FF000000':'none':'FF000000':'none':'FF000000':0:'thin':'FF000000':'none':'FF000000':'none':'FF000000':'none':'FF000000':'none':'FF000000':'general':'bottom':0:False:False:0:'General':0:'inherit':'inherit'
setting individually works ('thin':'FF000000')
In [37]: c.style.borders.top.border_style = openpyxl.style.Border.BORDER_THIN
In [38]: c.style
Out[38]: 'Calibri':11:False:False:False:False:'none':False:'FF000000':'none':0:'FFFFFFFF':'FF000000':'none':'FF000000':'none':'FF000000':'thin':'FF000000':'none':'FF000000':'none':'FF000000':0:'thin':'FF000000':'none':'FF000000':'none':'FF000000':'none':'FF000000':'none':'FF000000':'general':'bottom':0:False:False:0:'General':0:'inherit':'inherit'
maybe a bug in openpyxl. but no big deal just wrap setting bottom , top, left, right in function

Had the same issue but couldn't find anything that fixes this problem for 2019 because of depreciation. I have something that works below.. could be better but works for all intends and purposes.
def set_border(ws, cell_range):
rows = ws[cell_range]
for row in rows:
if row == rows[0][0] or row == rows[0][-1] or row == rows[-1][0] or row == rows[-1][-1]:
pass
else:
row[0].border = Border(left=Side(style='thin'))
row[-1].border = Border(right=Side(style='thin'))
for c in rows[0]:
c.border = Border(top=Side(style='thin'))
for c in rows[-1]:
c.border = Border(bottom=Side(style='thin'))
rows[0][0].border = Border(left=Side(style='thin'), top=Side(style='thin'))
rows[0][-1].border = Border(right=Side(style='thin'), top=Side(style='thin'))
rows[-1][0].border = Border(left=Side(style='thin'), bottom=Side(style='thin'))
rows[-1][-1].border = Border(right=Side(style='thin'), bottom=Side(style='thin'))

def set_border(ws, cell_range, style='thin'):
rows = ws[cell_range]
for row in rows:
temp_row = copy(row[0].border)
row[0].border = Border(left=Side(style=style), right=temp_row.right, top=temp_row.top, bottom=temp_row.bottom)
temp_row = copy(row[-1].border)
row[-1].border = Border(right=Side(style=style), left=temp_row.left, top=temp_row.top, bottom=temp_row.bottom)
for c in rows[0]:
temp_row = copy(c.border)
c.border = Border(top=Side(style=style), left=temp_row.left, bottom=temp_row.bottom, right=temp_row.right)
for c in rows[-1]:
temp_row = copy(c.border)
c.border = Border(bottom=Side(style=style), left=temp_row.left, top=temp_row.top, right=temp_row.right)
This keeps the existing borders of the side and you can also style your border

seems there is no built-in for this task, and we have to make some steps ourselves, like:
#need make conversion from alphabet to number due to range function
def A2N(s,e):
return range(ord(s), ord(e)+1)
#B1 is the border you defined
#Assume you trying border A1-Q1 ... A3-Q3
X = A2N('A','Q')
#print X
your_desired_sheet_range_rows = range(1,4)
#need two loop to go through cells
for row in your_desired_sheet_rows:
for col in X:
ca = chr(col)
sheet[ca+str(row)].border=B1

Related

Convert openpyxl to pandas

After doing some research I understood that pandas can give us better performace comparing to openpyxl.
I'm trying to convert this code from openpyxl to pandas:
def get_restaurant_data(self, res: Restaurant, sf_id: int) -> RestaurantSchema:
sheet = self.workbook["VendorInfo"]
for row in sheet.iter_rows(min_row=2):
if row[0].value == sf_id:
res.address = self.check_data(row[1].value, res.address)
if self.check_data(row[2].value) and self.check_data(row[3].value):
res.loc = Point(row[3].value, row[2].value)
res.phone = self.check_data(row[4].value, res.manager_phone)
logo = self.download_file(row[5].value)
cover = self.download_file(row[6].value)
if logo:
res.logo.save(self.check_data(row[5].value), logo, save=False)
if cover:
res.cover.save(self.check_data(row[6].value), cover, save=False)
return RestaurantSchema(**res.__dict__)
Anyone can give me a suggestion about how to convert this code?
df = pd.read_excel(self.path,index_col=None)
print(f'>>>{df}')```
Here is the equivalent code using pandas:
import pandas as pd
def get_restaurant_data(self, res: Restaurant, sf_id: int) ->
RestaurantSchema:
sheet = self.workbook["VendorInfo"]
df = pd.DataFrame(sheet.values)
for i, row in df[1:].iterrows():
if row[0] == sf_id:
res.address = self.check_data(row[1], res.address)
if self.check_data(row[2]) and self.check_data(row[3]):
res.loc = Point(row[3], row[2])
res.phone = self.check_data(row[4], res.manager_phone)
logo = self.download_file(row[5])
cover = self.download_file(row[6])
if logo:
res.logo.save(self.check_data(row[5]), logo, save=False)
if cover:
res.cover.save(self.check_data(row[6]), cover, save=False)
return RestaurantSchema(**res.__dict__)
This code uses pandas to read the data from the "VendorInfo" sheet into a dataframe, and then uses the iterrows() method to iterate over the rows of the dataframe. The rest of the code is mostly the same as the original, with the exception of using the [] operator to access the values in each row instead of the value attribute.
I hope this helps!

How to freeze first column using Openpyxl

I am using the following code to freeze the first column of my excel sheer generated using openpyxl. But this is not working and showing me an error.
dim_holder = DimensionHolder(worksheet=worksheet)
for col in range(worksheet.min_column, worksheet.max_column + 1):
dim_holder[get_column_letter(col)] = ColumnDimension(
worksheet, min=col, max=col, width=20)
# worksheet.column_dimensions.group(2, 10)
dim_holder['A'] = ColumnDimension(
worksheet, min=1, max=1, width=35)
worksheet.column_dimensions = dim_holder
worksheet.freeze_panes = 'A'
How can I freeze the entire first column, so that it will be sticky when I do horizontal scrolling ?
As not all code is present, but you have mentioned that you are using openpyxl, I am going to assume that. To freeze panes, you need to give the cell. That will freeze the panes above and to the left of the same. So, when you give A, it will not help, need to change it to B1 instead. A simple example is given below.
wb=openpyxl.load_workbook('YourFile.xlsx')
ws=wb['YourSheet']
c = ws['B1'] ## Freeze everything to left of B (that is A) and no columns to feeze
ws.freeze_panes = c
wb.save('YourFile.xlsx')

Conditional_formatting to find duplicate values using openpyxl

I am trying to have a class highlight any duplicate values in column A using openpyxl.
Currently column A has the following values:
A
A
B
C
A
C
A
The end result would have all of the A's and C's cells colored in red. My code below doesn't throw any errors, but the file when opened after being ran through this doesn't have the any coloring in the duplicated cells.
import openpyxl
from openpyxl import formatting, styles
from openpyxl.formatting import Rule
class Duplicates():
def __init__(self, wb2):
self.wb2 = wb2
ws2=self.wb2.active
self.red_fill = styles.PatternFill(start_color ='ffc7ce', end_color = 'ffc7ce', fill_type = 'solid')
dxf= styles.differential.DifferentialStyle(fill=self.red_fill)
rule = Rule(type='duplicateValues',dxf=dxf,stopIfTrue = None)
ws2.conditional_formatting.add('$A:$A',rule)
self.wb2.save('testing.xlsx')
Duplicates(wb2)
Any help would be much appreciated.
Try this code, it work for me
red_text = Font(color="9C0006")
red_fill = PatternFill(bgColor="FFC7CE")
dxf = DifferentialStyle(font=red_text, fill=red_fill)
rule = Rule(type="duplicateValues", text="highlight", dxf=dxf)
wsRes.conditional_formatting.add('B1:B10000', rule)

How to improve this script. Dividing a column by 3 in openpyxl

Looking for a way to simplify the division of a column. Is there a loop I can use?
from openpyxl import load_workbook
xfile = load_workbook('camdatatest.xlsx')
sheet =xfile.get_sheet_by_name('Sheet1')
sheet['D2'] = '=C2/3' # Want to divide all values in Column C with the new value in D.
sheet['D3'] = '=C4/3'
sheet['D4'] = '=C5/3'
sheet['D5'] = '=C6/3'
sheet['D6'] = '=C7/3'
sheet['D7'] = '=C8/3'
sheet['D8'] = '=C9/3'
sheet['D9'] = '=C10/3'
xfile.save("camdatatestoutput.xlsx")
This is just one of the possibilities.
for i in range(2,11):
sheet['D{}'.format(i)] = '=C{}/3'.format(i)
Notice that the way range works if you want the number 10 to be included, you need to make sure to pass in 11.

Simulate autofit column in xslxwriter

I would like to simulate the Excel autofit function in Python's xlsxwriter. According to this url, it is not directly supported:
http://xlsxwriter.readthedocs.io/worksheet.html
However, it should be quite straightforward to loop through each cell on the sheet and determine the maximum size for the column and just use worksheet.set_column(row, col, width) to set the width.
The complications that is keeping me from just writing this are:
That URL does not specify what the units are for the third argument to set_column.
I can not find a way to measure the width of the item that I want to insert into the cell.
xlsxwriter does not appear to have a method to read back a particular cell. This means I need to keep track of each cell width as I write the cell. It would be better if I could just loop through all the cells, that way a generic routine could be written.
[NOTE: as of Jan 2023 xslxwriter added a new method called autofit. See jmcnamara's answer below]
As a general rule, you want the width of the columns a bit larger than the size of the longest string in the column. The with of 1 unit of the xlsxwriter columns is about equal to the width of one character. So, you can simulate autofit by setting each column to the max number of characters in that column.
Per example, I tend to use the code below when working with pandas dataframes and xlsxwriter.
It first finds the maximum width of the index, which is always the left column for a pandas to excel rendered dataframe. Then, it returns the maximum of all values and the column name for each of the remaining columns moving left to right.
It shouldn't be too difficult to adapt this code for whatever data you are using.
def get_col_widths(dataframe):
# First we find the maximum length of the index column
idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
# Then, we concatenate this to the max of the lengths of column name and its values for each column, left to right
return [idx_max] + [max([len(str(s)) for s in dataframe[col].values] + [len(col)]) for col in dataframe.columns]
for i, width in enumerate(get_col_widths(dataframe)):
worksheet.set_column(i, i, width)
I agree with Cole Diamond. I needed to do something very similar, it worked fine for me. where self.columns is my list of columns
def set_column_width(self):
length_list = [len(x) for x in self.columns]
for i, width in enumerate(length_list):
self.worksheet.set_column(i, i, width)
That URL does not specify what the units are for the third argument to set_column.
The column widths are given in multiples of the width of the '0' character in the font Calibri, size 11 (that's the Excel standard).
I can not find a way to measure the width of the item that I want to insert into the cell.
In order to get a handle on the exact width of a string, you can use tkinter's ability to measure string lengths in pixels, depending on the font/size/weight/etc. If you define a font, e.g.
reference_font = tkinter.font.Font(family='Calibri', size=11)
you can afterwards use its measure method to determine string widths in pixels, e.g.
reference_font.measure('This is a string.')
In order to do this for a cell from your Excel table, you need to take its format into account (it contains all the information on the used font). That means, if you wrote something to your table using worksheet.write(row, col, cell_string, format), you can get the used font like this:
used_font = tkinter.font.Font(family = format.font_name,
size = format.font_size,
weight = ('bold' if format.bold else 'normal'),
slant = ('italic' if format.italic else 'roman'),
underline = format.underline,
overstrike = format.font_strikeout)
and afterwards determine the cell width as
cell_width = used_font.measure(cell_string+' ')/reference_font.measure('0')
The whitespace is added to the string to provide some margin. This way the results are actually very close to Excel's autofit results, so that I assume Excel is doing just that.
For the tkinter magic to work, a tkinter.Tk() instance (a window) has to be open, therefore the full code for a function that returns the required width of a cell would look like this:
import tkinter
import tkinter.font
def get_cell_width(cell_string, format = None):
root = tkinter.Tk()
reference_font = tkinter.font.Font(family='Calibri', size=11)
if format:
used_font = tkinter.font.Font(family = format.font_name,
size = format.font_size,
weight = ('bold' if format.bold else 'normal'),
slant = ('italic' if format.italic else 'roman'),
underline = format.underline,
overstrike = format.font_strikeout)
else:
used_font = reference_font
cell_width = used_font.measure(cell_string+' ')/reference_font.measure('0')
root.update_idletasks()
root.destroy()
return cell_width
Of course you would like to get the root handling and reference font creation out of the function, if it is meant to be executed frequently. Also, it might be faster to use a lookup table format->font for your workbook, so that you do not have to define the used font every single time.
Finally, one could take care of line breaks within the cell string:
pixelwidths = (used_font.measure(part) for part in cell_string.split('\n'))
cell_width = (max(pixelwidths) + used_font.measure(' '))/reference_font.measure('0')
Also, if you are using the Excel filter function, the dropdown arrow symbol needs another 18 pixels (at 100% zoom in Excel). And there might be merged cells spanning multiple columns... A lot of room for improvements!
xlsxwriter does not appear to have a method to read back a particular cell. This means I need to keep track of each cell width as I write the cell. It would be better if I could just loop through all the cells, that way a generic routine could be written.
If you do not like to keep track within your own data structure, there are at least three ways to go:
(A) Register a write handler to do the job:
You can register a write handler for all standard types. In the handler function, you simply pass on the write command, but also do the bookkeeping wrt. column widths. This way, you only need to read and set the optimal column width in the end (before closing the workbook).
# add worksheet attribute to store column widths
worksheet.colWidths = [0]*number_of_used_columns
# register write handler
for stdtype in [str, int, float, bool, datetime, timedelta]:
worksheet.add_write_handler(stdtype, colWidthTracker)
def colWidthTracker(sheet, row, col, value, format):
# update column width
sheet.colWidths[col] = max(sheet.colWidths[col], get_cell_width(value, format))
# forward write command
if isinstance(value, str):
if value == '':
sheet.write_blank(row, col, value, format)
else:
sheet.write_string(row, col, value, format)
elif isinstance(value, int) or isinstance(value, float):
sheet.write_number(row, col, value, format)
elif isinstance(value, bool):
sheet.write_boolean(row, col, value, format)
elif isinstance(value, datetime) or isinstance(value, timedelta):
sheet.write_datetime(row, col, value, format)
else:
raise TypeError('colWidthTracker cannot handle this type.')
# and in the end...
for col in columns_to_be_autofitted:
worksheet.set_column(col, col, worksheet.colWidths[col])
(B) Use karolyi's answer above to go through the data stored within XlsxWriter's internal variables. However, this is discouraged by the module's author, since it might break in future releases.
(C) Follow the recommendation of jmcnamara: Inherit from and override the default worksheet class and add in some autofit code, like this example: xlsxwriter.readthedocs.io/example_inheritance2.html
I recently ran into this same issue and this is what I came up with:
r = 0
c = 0
for x in list:
worksheet.set_column('{0}:{0}'.format(chr(c + ord('A'))), len(str(x)) + 2)
worksheet.write(r, c, x)
c += 1
In my example r would be the row number you are outputting to, c would be the column number you are outputting to (both 0 indexed), and x would be the value from list that you are wanting to be in the cell.
the '{0}:{0}'.format(chr(c + ord('A'))) piece takes the column number provided and converts it to the column letter accepted by xlsxwriter, so if c = 0 set_column would see 'A:A', if c = 1 then it would see 'B:B', and so on.
the len(str(x)) + 2 piece determines the length of the string you are trying to output then adds 2 to it to ensure that the excel cell is wide enough as the length of the string does not exactly correlate to the width of the cell. You may want to play with rather you add 2 or possibly more depending on your data.
The units that xlsxwriter accepts is a little harder to explain. When you are in excel and you hover over where you can change the column width you will see Width: 8.43 (64 pixels). In this example the unit it accepts is the 8.43, which I think is centimeters? But excel does not even provide a unit, at least not explicitly.
Note: I have only tried this answer on excel files that contain 1 row of data. If you will have multiple rows, you will need to have a way to determine which row will have the 'longest' information and only apply this to that row. But if each column will be roughly the same size regardless of row, then this should work fine for you.
Good luck and I hope this helps!
Update from January 2023.
XlsxWriter 3.0.6+ now supports a autofit() worksheet method:
from xlsxwriter.workbook import Workbook
workbook = Workbook('autofit.xlsx')
worksheet = workbook.add_worksheet()
# Write some worksheet data to demonstrate autofitting.
worksheet.write(0, 0, "Foo")
worksheet.write(1, 0, "Food")
worksheet.write(2, 0, "Foody")
worksheet.write(3, 0, "Froody")
worksheet.write(0, 1, 12345)
worksheet.write(1, 1, 12345678)
worksheet.write(2, 1, 12345)
worksheet.write(0, 2, "Some longer text")
worksheet.write(0, 3, "http://ww.google.com")
worksheet.write(1, 3, "https://github.com")
# Autofit the worksheet.
worksheet.autofit()
workbook.close()
Output:
Or using Pandas:
import pandas as pd
# Create a Pandas dataframe from some data.
df = pd.DataFrame({
'Country': ['China', 'India', 'United States', 'Indonesia'],
'Population': [1404338840, 1366938189, 330267887, 269603400],
'Rank': [1, 2, 3, 4]})
# Order the columns if necessary.
df = df[['Rank', 'Country', 'Population']]
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('pandas_autofit.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1', index=False)
# Get the xlsxwriter workbook and worksheet objects.
workbook = writer.book
worksheet = writer.sheets['Sheet1']
worksheet.autofit()
# Close the Pandas Excel writer and output the Excel file.
writer.close()
Output:
Cole Diamond's answer is awesome. I just updated the subroutine to handle multiindex rows and columns.
def get_col_widths(dataframe):
# First we find the maximum length of the index columns
idx_max = [max([len(str(s)) for s in dataframe.index.get_level_values(idx)] + [len(str(idx))]) for idx in dataframe.index.names]
# Then, we concatenate this to the max of the lengths of column name and its values for each column, left to right
return idx_max + [max([len(str(s)) for s in dataframe[col].values] + \
[len(str(x)) for x in col] if dataframe.columns.nlevels > 1 else [len(str(col))]) for col in dataframe.columns]
There is another workaround to simulate Autofit that I've found on the Github site of xlsxwriter. I've modified it to return the approximate size of horizontal text (column width) or 90° rotated text (row height):
from PIL import ImageFont
def get_cell_size(value, font_name, font_size, dimension="width"):
""" value: cell content
font_name: The name of the font in the target cell
font_size: The size of the font in the target cell """
font = ImageFont.truetype(font_name, size=font_size)
(size, h) = font.getsize(str(value))
if dimension == "height":
return size * 0.92 # fit value experimentally determined
return size * 0.13 # fit value experimentally determined
This doesn't address bold text or other format elements that might affect the text size. Otherwise it works pretty well.
To find the width for your columns for autofit:
def get_col_width(data, font_name, font_size, min_width=1):
""" Assume 'data' to be an iterable (rows) of iterables (columns / cells)
Also, every cell is assumed to have the same font and font size.
Returns a list with the autofit-width per column """
colwidth = [min_width for col in data[0]]
for x, row in enumerate(data):
for y, value in enumerate(row):
colwidth[y] = max(colwidth[y], get_cell_size(value, font_name, font_size))
return colwidth
My version that will go over the one worksheet and autoset the field lengths:
from typing import Optional
from xlsxwriter.worksheet import (
Worksheet, cell_number_tuple, cell_string_tuple)
def get_column_width(worksheet: Worksheet, column: int) -> Optional[int]:
"""Get the max column width in a `Worksheet` column."""
strings = getattr(worksheet, '_ts_all_strings', None)
if strings is None:
strings = worksheet._ts_all_strings = sorted(
worksheet.str_table.string_table,
key=worksheet.str_table.string_table.__getitem__)
lengths = set()
for row_id, colums_dict in worksheet.table.items(): # type: int, dict
data = colums_dict.get(column)
if not data:
continue
if type(data) is cell_string_tuple:
iter_length = len(strings[data.string])
if not iter_length:
continue
lengths.add(iter_length)
continue
if type(data) is cell_number_tuple:
iter_length = len(str(data.number))
if not iter_length:
continue
lengths.add(iter_length)
if not lengths:
return None
return max(lengths)
def set_column_autowidth(worksheet: Worksheet, column: int):
"""
Set the width automatically on a column in the `Worksheet`.
!!! Make sure you run this function AFTER having all cells filled in
the worksheet!
"""
maxwidth = get_column_width(worksheet=worksheet, column=column)
if maxwidth is None:
return
worksheet.set_column(first_col=column, last_col=column, width=maxwidth)
just call set_column_autowidth with the column.
Some of the solutions given here were too elaborate for the rather simple thing that I was looking for: every column had to be sized so that all its values fits nicely. So I wrote my own solution. It basically iterates over all columns, and for each column it gets all string values (including the column name itself) and then takes the longest string as the maximal width for that column.
# Set the width of the columns to the max. string length in that column
# ~ simulates Excel's "autofit" functionality
for col_idx, colname in enumerate(df.columns):
max_width = max([len(colname)]+[len(str(s)) for s in df[colname]])
worksheet.set_column(col_idx, col_idx, max_width+1) # + 1 to add some padding
Here is a version of code that supports MultiIndex for row and column - it is not pretty but works for me. It expands on #cole-diamond answer:
def _xls_make_columns_wide_enough(dataframe, worksheet, padding=1.1, index=True):
def get_col_widths(dataframe, padding, index):
max_width_idx = []
if index and isinstance(dataframe.index, pd.MultiIndex):
# Index name lengths
max_width_idx = [len(v) for v in dataframe.index.names]
# Index value lengths
for column, content in enumerate(dataframe.index.levels):
max_width_idx[column] = max(max_width_idx[column],
max([len(str(v)) for v in content.values]))
elif index:
max_width_idx = [
max([len(str(s))
for s in dataframe.index.values] + [len(str(dataframe.index.name))])
]
if isinstance(dataframe.columns, pd.MultiIndex):
# Take care of columns - headers first.
max_width_column = [0] * len(dataframe.columns.get_level_values(0))
for level in range(len(dataframe.columns.levels)):
values = dataframe.columns.get_level_values(level).values
max_width_column = [
max(v1, len(str(v2))) for v1, v2 in zip(max_width_column, values)
]
# Now content.
for idx, col in enumerate(dataframe.columns):
max_width_column[idx] = max(max_width_column[idx],
max([len(str(v)) for v in dataframe[col].values]))
else:
max_width_column = [
max([len(str(s)) for s in dataframe[col].values] + [len(col)])
for col in dataframe.columns
]
return [round(v * padding) for v in max_width_idx + max_width_column]
for i, width in enumerate(get_col_widths(dataframe, padding, index)):
worksheet.set_column(i, i, width)
Openpyxl easily handles this task. Just install the module and insert the below line of code in your file
# Imorting the necessary modules
try:
from openpyxl.cell import get_column_letter
except ImportError:
from openpyxl.utils import get_column_letter
from openpyxl.utils import column_index_from_string
from openpyxl import load_workbook
import openpyxl
from openpyxl import Workbook
for column_cells in sheet.columns:
new_column_length = max(len(str(cell.value)) for cell in column_cells)
new_column_letter = (get_column_letter(column_cells[0].column))
if new_column_length > 0:
sheet.column_dimensions[new_column_letter].width = new_column_length*1.23

Categories

Resources