Writing worksheet with pandas not possible / Workbook-error? - python

I would like to save a dataframe to a new worksheet in an Excel sheet with the following code / function:
def save_xls(stock, content, filename):
book = load_workbook(filename)
writer = pd.ExcelWriter(
filename,
# engine='openpyxl',
engine_kwargs={"options": {"strings_to_numbers": True}},
)
writer.book = book
pd.DataFrame(content).to_excel(writer, sheet_name=stock, header=False, index=False)
But unfortunately, I get this error:
Traceback (most recent call last):
File "C:\Users\Polzi\Documents\DEV\Python-Private\FairValueCalc.py", line 303, in <module>
save_xls (stock, output, OUT)
File "C:\Users\Polzi\Documents\DEV\Python-Private\FairValueCalc.py", line 36, in save_xls
pd.DataFrame (content).to_excel (writer,
File "C:\Users\Polzi\Documents\DEV\.venv\NormalScraping\lib\site-packages\pandas\core\generic.py", line 2357, in to_excel
formatter.write(
File "C:\Users\Polzi\Documents\DEV\.venv\NormalScraping\lib\site-packages\pandas\io\formats\excel.py", line 892, in write
writer.write_cells(
File "C:\Users\Polzi\Documents\DEV\.venv\NormalScraping\lib\site-packages\pandas\io\excel\_xlsxwriter.py", line 219, in write_cells
wks = self.book.add_worksheet(sheet_name)
AttributeError: 'Workbook' object has no attribute 'add_worksheet'
How can I write this dataframe to a new worksheet in the Excel file?

Related

Saving changes to workbook using openpyxl

An excel sheet export from an app adds blank columns and rows to the output data. I'm trying to remove it before massaging the data.
So I have some simple code to delete the first 3 rows and first 2 columsn, then save the changes.
When I use Workbook.save(path) I get a traceback error. I researched on stackoverflow and it looks like this simple code should be working. I've even tried just putting the filename but that also giving similar error.
I'm not sure why I'm getting this error.
Update: I corrected the typo Workbook.save(path) to workbook.save(path).
import openpyxl
import pandas as pd
from openpyxl import Workbook
# Give the location of the file
path = 'C:\\Users\\cbout\\Desktop\\2022 Data.xlsx'
filename = '2022 Data.xlsx'
DF = pd.read_excel(path, sheet_name=1)
# # To open the workbook
# # workbook object is created
workbook = openpyxl.load_workbook(path)
# # Get workbook active sheet objectmfrom the active attribute
worksheet = workbook.active
#Delete first 2 columns.
worksheet.delete_cols(0,2)
#Delete first 3 rows.
worksheet.delete_rows(0,4)
workbook.save(path)
New Traceback Error after updates
PS C:\Users\cbout\Documents\GitHub> & "C:/Program Files/Python310/python.exe" c:/Users/cbout/Documents/GitHub/Python/Stewardship.py
Traceback (most recent call last):
File "c:\Users\cbout\Documents\GitHub\Python\Stewardship.py", line 9, in <module>
stewardDF = pd.read_excel(path, sheet_name=1)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\util\_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\io\excel\_base.py", line 457, in read_excel
io = ExcelFile(io, storage_options=storage_options, engine=engine)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\io\excel\_base.py", line 1385, in __init__
engine = config.get_option(f"io.excel.{ext}.reader", silent=True)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\_config\config.py", line 256, in __call__
return self.__func__(*args, **kwds)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\_config\config.py", line 128, in _get_option
key = _get_single_key(pat, silent)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\_config\config.py", line 114, in _get_single_key
raise OptionError(f"No such keys(s): {repr(pat)}")
pandas._config.config.OptionError: No such keys(s): 'io.excel.zip.reader'

TypeError: expected <class 'openpyxl.styles.fills.Fill'>

I`m trying to download and then open excel file (report) generated by marketplace with openpyxl.
import requests
import config
import openpyxl
link = 'https://api.telegram.org/file/bot' + config.TOKEN + '/documents/file_66.xlsx'
def save_open(link):
filename = link.split('/')[-1]
r = requests.get(link)
with open(filename, 'wb') as new_file:
new_file.write(r.content)
wb = openpyxl.open ('file_66.xlsx')
ws = wb.active
cell = ws['B2'].value
print (cell)
save_open(link)
After running this code I got the above:
Traceback (most recent call last):
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\base.py", line 55, in _convert
value = expected_type(value)
TypeError: Fill() takes no arguments
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Home\Documents\myPython\bot_WB\main.py", line 20, in <module>
save_open(link)
File "C:\Users\Home\Documents\myPython\bot_WB\main.py", line 14, in save_open
wb = openpyxl.open ('file_66.xlsx')
File "C:\Python 3.9\lib\site-packages\openpyxl\reader\excel.py", line 317, in load_workbook
reader.read()
File "C:\Python 3.9\lib\site-packages\openpyxl\reader\excel.py", line 281, in read
apply_stylesheet(self.archive, self.wb)
File "C:\Python 3.9\lib\site-packages\openpyxl\styles\stylesheet.py", line 198, in apply_stylesheet
stylesheet = Stylesheet.from_tree(node)
File "C:\Python 3.9\lib\site-packages\openpyxl\styles\stylesheet.py", line 103, in from_tree
return super(Stylesheet, cls).from_tree(node)
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\serialisable.py", line 103, in from_tree
return cls(**attrib)
File "C:\Python 3.9\lib\site-packages\openpyxl\styles\stylesheet.py", line 74, in __init__
self.fills = fills
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\sequence.py", line 26, in __set__
seq = [_convert(self.expected_type, value) for value in seq]
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\sequence.py", line 26, in <listcomp>
seq = [_convert(self.expected_type, value) for value in seq]
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\base.py", line 57, in _convert
raise TypeError('expected ' + str(expected_type))
TypeError: expected <class 'openpyxl.styles.fills.Fill'>
[Finished in 1.6s]
If you run file properties/details you can see that this file was generated by "Go Exelize" (author: xuri). To run this file you need to separate code in two parts. First: download file. Then you need to manually open it with MS Excel, save file and close it (after this "Go Excelize" switch to "Microsoft Excel"). And only after that you can run the second part of the code correctly with no errors. Can anyone help me to handle this problem?
I had the same problem, "TypeError('expected ' + str(expected_type))", using pandas.read_excel, which uses openpyxl. If I open the file, save and close it, it will work with both, pandas and openpyxl.
Upon further attempts I could open the file using the "read_only=True" in openpyxl, but while iterating over the rows I would still get the error, but only when all the rows ended, in the end of the file.
I belive it could be something in the EOF (end of file) and openpyxl don't have ways of treating it.
Here is the code that I used to test and worked for me:
import openpyxl
wb = openpyxl.load_workbook(my_file_name, read_only=True)
ws = wb.worksheets[0]
lis = []
try:
for row in ws.iter_rows():
lis.append([cell.value for cell in row])
except TypeError:
print('Skip error in EOF')
Used openpyxl==3.0.10

raise BadZipFile("File is not a zip file") zipfile.BadZipFile: File is not a zip file

Am trying to append Data to an Excel Sheet
If I create an excel sheet in normal way by opening excel sheet and inserting the data by typing maully to it and then saving it and use loadworkbook it works but when I create an excel sheet using "writer = pd.ExcelWriter('demo.xlsx', engine='openpyxl')" I get error.
import pandas as pd
from openpyxl import load_workbook
# new dataframe with same columns
df = pd.DataFrame({'Name': ['E','F','G','H'],
'Age': [100,70,40,60]})
writer = pd.ExcelWriter('demo.xlsx', engine='openpyxl')
# try to open an existing workbook
writer.book = load_workbook('demo.xlsx')
# copy existing sheets
writer.sheets = dict((ws.title, ws) for ws in writer.book.worksheets)
# read existing file
reader = pd.read_excel(r'demo.xlsx')
# write out the new sheet
df.to_excel(writer,index=False,header=False,startrow=len(reader)+1)
writer.close()
Error
Traceback (most recent call last):
File "excel1.py", line 8, in <module>
writer.book = load_workbook('demo.xlsx')
File "/home/pi/.local/lib/python3.7/site-packages/openpyxl/reader/excel.py", line 316, in load_workbook
data_only, keep_links)
File "/home/pi/.local/lib/python3.7/site-packages/openpyxl/reader/excel.py", line 124, in __init__
self.archive = _validate_archive(fn)
File "/home/pi/.local/lib/python3.7/site-packages/openpyxl/reader/excel.py", line 96, in _validate_archive
archive = ZipFile(filename, 'r')
File "/usr/lib/python3.7/zipfile.py", line 1222, in __init__
self._RealGetContents()
File "/usr/lib/python3.7/zipfile.py", line 1289, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
https://qastack.ru/programming/20219254/how-to-write-to-an-existing-excel-file-without-overwriting-data-using-pandas
FILE_NAME = 'pattern.xlsx'
wb = openpyxl.load_workbook(filename=FILE_NAME)
writer = pd.ExcelWriter(FILE_NAME, engine='openpyxl')
writer.book = wb

Pandas apply().to_excel() got DataFrame is not callable

empty_stock_list = [
{
'row_index': <num>,
'column_index': <num>
},
...
]
with pd.ExcelWriter(OUTPUT_FILE, engine='xlsxwriter') as writer:
df = pd.concat([header_row, data_price], ignore_index=False, sort=False).reset_index(drop=True)
df_color = df.copy()
df_color.iloc[:,:] = 'font-color: black'
for empty_stock in empty_stock_list:
df_color.iloc[empty_stock['row_index'], empty_stock['column_index']] = 'font-color: #FF0000'
df.style.apply(df_color, axis=None).\
to_excel(writer, sheet_name=sheet_name, index=False, header=None)
I have this code above, but always get this error: TypeError: 'DataFrame' object is not callable. Basically what I'm trying to do is to make the cell color into a red color if a stock is empty (Based on the data row_index and column_index).
Tried to follow the documentation, but I can't seem to make this right.
Below is the traceback error messages:
Traceback (most recent call last):
File "main.py", line 129, in <module>
df.style.apply(df_color, axis=None).\
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/style.py", line 229, in to_excel
formatter.write(
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/excel.py", line 734, in write
writer.write_cells(
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/excel/_xlsxwriter.py", line 212, in write_cells
for cell in cells:
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/excel.py", line 688, in get_formatted_cells
for cell in itertools.chain(self._format_header(), self._format_body()):
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/excel.py", line 590, in _format_regular_rows
for cell in self._generate_body(coloffset):
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/excel.py", line 674, in _generate_body
styles = self.styler._compute().ctx
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/style.py", line 625, in _compute
r = func(self)(*args, **kwargs)
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/style.py", line 642, in _apply
result = func(data, **kwargs)
TypeError: 'DataFrame' object is not callable
I'm using these dependencies:
python 3.8.0
pandas 1.1.3
xlrd 1.2.0
XlsxWriter 1.3.7
I think you need create function and pass to Styler.apply and change font-color to color:
def func(df):
df_color = pd.DataFrame('color: black', index=df.index, columns=df.columns)
for empty_stock in empty_stock_list:
i = empty_stock['row_index']
j = empty_stock['column_index']
df_color.iloc[i,j] = 'color: #FF0000'
return df_color
with pd.ExcelWriter(OUTPUT_FILE, engine='xlsxwriter') as writer:
df = pd.concat([header_row, data_price],
ignore_index=False,
sort=False).reset_index(drop=True)
(df.style.apply(func, axis=None)
.to_excel(writer, sheet_name=sheet_name, index=False, header=None))

Openpyxl Workbook.save function creates a corrupt and un-openable Excel (.xlsx) file

I have tried using August William's solution to this issue, but that also didn't work. I am not switching workbook types, i.e. .xlsm to .xlsx, which appears to be a separate issue. I have looked through Openpyxl's Manual trying to find maybe a bug report or bug fix, but to no avail. The below is my very simple code. Following that is the python error message which results in a workbook being created, but it is corrupted and fails to load. Any help is appreciated.
-Thanks!!
from openpyxl import Workbook
dashbrd = Workbook()
fp = dashbrd.active
fp.title = 'Sheet Name Goes Here'
fp['A1'] = 'Header'
fp['B1'] = '2nd Header'
fp['C1'] = '3rd Header'
fp['D1'] = '4th Header'
fp['E1'] = '5th Header'
fp['F1'] = 'You get the idea'
fp['G1'] = 'Another Header'
fp['H1'] = 'Blah blah blah'
fp['I1'] = 'Yadda yadda yadda'
dashbrd.save("S:\\folder1\\folder2\\folder3\\MyBook.xlsx")
**************************************************************************************
Traceback (most recent call last):
File "C:\Users\NotaDirtyUser\Documents\Scripts\HeaderTest.py", line 26, in <module>
dashbrd.save("S:\\folder1\\folder2\\folder3\\MyBook.xlsx")
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\workbook\workbook.py", line 408, in save
save_workbook(self, filename)
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 293, in save_workbook
writer.save()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 275, in save
self.write_data()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 75, in write_data
self._write_worksheets()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 215, in _write_worksheets
self.write_worksheet(ws)
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 200, in write_worksheet
writer.write()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\worksheet\_writer.py", line 354, in write
self.write_top()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\worksheet\_writer.py", line 98, in write_top
self.write_properties()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\worksheet\_writer.py", line 60, in write_properties
self.xf.send(props.to_tree())
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\worksheet\_writer.py", line 294, in get_stream
xf.write(el)
File "src/lxml/serializer.pxi", line 1652, in lxml.etree._IncrementalFileWriter.write
TypeError: got invalid input value of type <class 'xml.etree.ElementTree.Element'>, expected string or Element
Like I said above , yesterday I had the same problem ..I found the solution in this link:
https://python-forum.io/Thread-Need-help-in-understanding-this-particular-Traceback-TypeError
In reference to this error :
TypeError: got invalid input value of type , expected string or Element
In summary the solution was to install openpyxl to another version :
pip uninstall openpyxl
pip install openpyxl==3.0.1
I can't find a good reference, but I recall having stumbled upon the same, and the solution was to use the older format (.xls, which is a completely different format) instead. Seems like a generic problem of openpyxl that wasn't resolved at the moment.
A working way to append to .xlsx (works for me):
from openpyxl import load_workbook
writer = pd.ExcelWriter(filename, engine='openpyxl')
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title: ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
# save the workbook
writer.save()
same problem for me not able to reopen a file created by openpyxl version > 3:
in 3.0.3:
File "D:\MyProg.py", line 251, in chargerSynthese
self.wbs = load_workbook(filename=self.nomfichierXLSX)
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 314, in load_workbook
reader.read()
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 279, in read
self.read_worksheets()
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 227, in read_worksheets
ws_parser.bind_all()
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\worksheet_reader.py", line 426, in bind_all
self.bind_cells()
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\worksheet_reader.py", line 337, in bind_cells
for idx, row in self.parser.parse():
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\worksheet_reader.py", line 153, in parse
row = self.parse_row(element)
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\worksheet_reader.py", line 264, in parse_row
self.row_counter = int(attrs['r'])
ValueError: invalid literal for int() with base 10: '2.0'
attrs={'r':'2.0'} don't know where it come from (but origin in a save worksheet by openpyxl 3.0.3) then when you do this int(attrs['r']) in _reader.py: crash!
solution back to 2.6.4 version!
reply to myself!
openpyxl 3.0.3 works well but is less permissive than 2.6 versions, here is my test code you must put int in row= , not float
from openpyxl import __version__
from openpyxl import load_workbook
from openpyxl import Workbook
wbs = Workbook()
wbs.active.title = 'titi'
mycell=wbs['titi'].cell(row = 1.0, column = 1)
mycell.value=22
wbs.save('toto.xlsx')
print('openpyxl __version__:',__version__)
wbi = load_workbook(filename='toto.xlsx')
for i in range(0,30):
wbi['titi'].append([i,'tata'])
wbi.save('toto.xlsx')
# result1:
# openpyxl __version__: 2.6.3
# result2:
# openpyxl __version__: 3.0.3
# Traceback (most recent call last):
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\py\essais\crashxlsx.py", line 13, in <module>
# wbi = load_workbook(filename='toto.xlsx')
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 314, in load_workbook
# reader.read()
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 279, in read
# self.read_worksheets()
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 227, in read_worksheets
# ws_parser.bind_all()
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\worksheet\_reader.py", line 426, in bind_all
# self.bind_cells()
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\worksheet\_reader.py", line 337, in bind_cells
# for idx, row in self.parser.parse():
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\worksheet\_reader.py", line 153, in parse
# row = self.parse_row(element)
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\worksheet\_reader.py", line 264, in parse_row
# self.row_counter = int(attrs['r'])
# ValueError: invalid literal for int() with base 10: '1.0'
same problem with me with openpyxl 2.0.2 version. I uninstalled and reinstalled 2.0.10 and add it into my projec

Categories

Resources