An excel sheet export from an app adds blank columns and rows to the output data. I'm trying to remove it before massaging the data.
So I have some simple code to delete the first 3 rows and first 2 columsn, then save the changes.
When I use Workbook.save(path) I get a traceback error. I researched on stackoverflow and it looks like this simple code should be working. I've even tried just putting the filename but that also giving similar error.
I'm not sure why I'm getting this error.
Update: I corrected the typo Workbook.save(path) to workbook.save(path).
import openpyxl
import pandas as pd
from openpyxl import Workbook
# Give the location of the file
path = 'C:\\Users\\cbout\\Desktop\\2022 Data.xlsx'
filename = '2022 Data.xlsx'
DF = pd.read_excel(path, sheet_name=1)
# # To open the workbook
# # workbook object is created
workbook = openpyxl.load_workbook(path)
# # Get workbook active sheet objectmfrom the active attribute
worksheet = workbook.active
#Delete first 2 columns.
worksheet.delete_cols(0,2)
#Delete first 3 rows.
worksheet.delete_rows(0,4)
workbook.save(path)
New Traceback Error after updates
PS C:\Users\cbout\Documents\GitHub> & "C:/Program Files/Python310/python.exe" c:/Users/cbout/Documents/GitHub/Python/Stewardship.py
Traceback (most recent call last):
File "c:\Users\cbout\Documents\GitHub\Python\Stewardship.py", line 9, in <module>
stewardDF = pd.read_excel(path, sheet_name=1)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\util\_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\io\excel\_base.py", line 457, in read_excel
io = ExcelFile(io, storage_options=storage_options, engine=engine)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\io\excel\_base.py", line 1385, in __init__
engine = config.get_option(f"io.excel.{ext}.reader", silent=True)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\_config\config.py", line 256, in __call__
return self.__func__(*args, **kwds)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\_config\config.py", line 128, in _get_option
key = _get_single_key(pat, silent)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\_config\config.py", line 114, in _get_single_key
raise OptionError(f"No such keys(s): {repr(pat)}")
pandas._config.config.OptionError: No such keys(s): 'io.excel.zip.reader'
Related
I would like to save a dataframe to a new worksheet in an Excel sheet with the following code / function:
def save_xls(stock, content, filename):
book = load_workbook(filename)
writer = pd.ExcelWriter(
filename,
# engine='openpyxl',
engine_kwargs={"options": {"strings_to_numbers": True}},
)
writer.book = book
pd.DataFrame(content).to_excel(writer, sheet_name=stock, header=False, index=False)
But unfortunately, I get this error:
Traceback (most recent call last):
File "C:\Users\Polzi\Documents\DEV\Python-Private\FairValueCalc.py", line 303, in <module>
save_xls (stock, output, OUT)
File "C:\Users\Polzi\Documents\DEV\Python-Private\FairValueCalc.py", line 36, in save_xls
pd.DataFrame (content).to_excel (writer,
File "C:\Users\Polzi\Documents\DEV\.venv\NormalScraping\lib\site-packages\pandas\core\generic.py", line 2357, in to_excel
formatter.write(
File "C:\Users\Polzi\Documents\DEV\.venv\NormalScraping\lib\site-packages\pandas\io\formats\excel.py", line 892, in write
writer.write_cells(
File "C:\Users\Polzi\Documents\DEV\.venv\NormalScraping\lib\site-packages\pandas\io\excel\_xlsxwriter.py", line 219, in write_cells
wks = self.book.add_worksheet(sheet_name)
AttributeError: 'Workbook' object has no attribute 'add_worksheet'
How can I write this dataframe to a new worksheet in the Excel file?
I`m trying to download and then open excel file (report) generated by marketplace with openpyxl.
import requests
import config
import openpyxl
link = 'https://api.telegram.org/file/bot' + config.TOKEN + '/documents/file_66.xlsx'
def save_open(link):
filename = link.split('/')[-1]
r = requests.get(link)
with open(filename, 'wb') as new_file:
new_file.write(r.content)
wb = openpyxl.open ('file_66.xlsx')
ws = wb.active
cell = ws['B2'].value
print (cell)
save_open(link)
After running this code I got the above:
Traceback (most recent call last):
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\base.py", line 55, in _convert
value = expected_type(value)
TypeError: Fill() takes no arguments
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Home\Documents\myPython\bot_WB\main.py", line 20, in <module>
save_open(link)
File "C:\Users\Home\Documents\myPython\bot_WB\main.py", line 14, in save_open
wb = openpyxl.open ('file_66.xlsx')
File "C:\Python 3.9\lib\site-packages\openpyxl\reader\excel.py", line 317, in load_workbook
reader.read()
File "C:\Python 3.9\lib\site-packages\openpyxl\reader\excel.py", line 281, in read
apply_stylesheet(self.archive, self.wb)
File "C:\Python 3.9\lib\site-packages\openpyxl\styles\stylesheet.py", line 198, in apply_stylesheet
stylesheet = Stylesheet.from_tree(node)
File "C:\Python 3.9\lib\site-packages\openpyxl\styles\stylesheet.py", line 103, in from_tree
return super(Stylesheet, cls).from_tree(node)
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\serialisable.py", line 103, in from_tree
return cls(**attrib)
File "C:\Python 3.9\lib\site-packages\openpyxl\styles\stylesheet.py", line 74, in __init__
self.fills = fills
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\sequence.py", line 26, in __set__
seq = [_convert(self.expected_type, value) for value in seq]
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\sequence.py", line 26, in <listcomp>
seq = [_convert(self.expected_type, value) for value in seq]
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\base.py", line 57, in _convert
raise TypeError('expected ' + str(expected_type))
TypeError: expected <class 'openpyxl.styles.fills.Fill'>
[Finished in 1.6s]
If you run file properties/details you can see that this file was generated by "Go Exelize" (author: xuri). To run this file you need to separate code in two parts. First: download file. Then you need to manually open it with MS Excel, save file and close it (after this "Go Excelize" switch to "Microsoft Excel"). And only after that you can run the second part of the code correctly with no errors. Can anyone help me to handle this problem?
I had the same problem, "TypeError('expected ' + str(expected_type))", using pandas.read_excel, which uses openpyxl. If I open the file, save and close it, it will work with both, pandas and openpyxl.
Upon further attempts I could open the file using the "read_only=True" in openpyxl, but while iterating over the rows I would still get the error, but only when all the rows ended, in the end of the file.
I belive it could be something in the EOF (end of file) and openpyxl don't have ways of treating it.
Here is the code that I used to test and worked for me:
import openpyxl
wb = openpyxl.load_workbook(my_file_name, read_only=True)
ws = wb.worksheets[0]
lis = []
try:
for row in ws.iter_rows():
lis.append([cell.value for cell in row])
except TypeError:
print('Skip error in EOF')
Used openpyxl==3.0.10
def getFile():
global filename, path, path2
filename = QtWidgets.QFileDialog.getOpenFileName()[0]
path = filename
print(path)
I think the problem is in this function
def getTo():
wb = load_workbook(filename = filename)
and there is mising some settings in filename functon,
i tried using unicode, but it doesnt solve the problem
Traceback (most recent call last):
File "C:/Users/pro10/PycharmProjects/Program/gui5.py", line 118, in getTo
sheet['A' + str(rows)] = text1
File "C:\Users\pro10\PycharmProjects\pythonProject\venv\lib\site-packages\openpyxl\worksheet\worksheet.py", line 313, in __setitem__
self[key].value = value
File "C:\Users\pro10\PycharmProjects\pythonProject\venv\lib\site-packages\openpyxl\cell\cell.py", line 216, in value
self._bind_value(value)
File "C:\Users\pro10\PycharmProjects\pythonProject\venv\lib\site-packages\openpyxl\cell\cell.py", line 199, in _bind_value
raise ValueError("Cannot convert {0!r} to Excel".format(value))
ValueError: Cannot convert <function text1 at 0x0000023DFE3D3A60> to Excel
Did you import the load_workbook function, like this?
from openpyxl import load_workbook
I have tried using August William's solution to this issue, but that also didn't work. I am not switching workbook types, i.e. .xlsm to .xlsx, which appears to be a separate issue. I have looked through Openpyxl's Manual trying to find maybe a bug report or bug fix, but to no avail. The below is my very simple code. Following that is the python error message which results in a workbook being created, but it is corrupted and fails to load. Any help is appreciated.
-Thanks!!
from openpyxl import Workbook
dashbrd = Workbook()
fp = dashbrd.active
fp.title = 'Sheet Name Goes Here'
fp['A1'] = 'Header'
fp['B1'] = '2nd Header'
fp['C1'] = '3rd Header'
fp['D1'] = '4th Header'
fp['E1'] = '5th Header'
fp['F1'] = 'You get the idea'
fp['G1'] = 'Another Header'
fp['H1'] = 'Blah blah blah'
fp['I1'] = 'Yadda yadda yadda'
dashbrd.save("S:\\folder1\\folder2\\folder3\\MyBook.xlsx")
**************************************************************************************
Traceback (most recent call last):
File "C:\Users\NotaDirtyUser\Documents\Scripts\HeaderTest.py", line 26, in <module>
dashbrd.save("S:\\folder1\\folder2\\folder3\\MyBook.xlsx")
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\workbook\workbook.py", line 408, in save
save_workbook(self, filename)
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 293, in save_workbook
writer.save()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 275, in save
self.write_data()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 75, in write_data
self._write_worksheets()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 215, in _write_worksheets
self.write_worksheet(ws)
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 200, in write_worksheet
writer.write()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\worksheet\_writer.py", line 354, in write
self.write_top()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\worksheet\_writer.py", line 98, in write_top
self.write_properties()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\worksheet\_writer.py", line 60, in write_properties
self.xf.send(props.to_tree())
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\worksheet\_writer.py", line 294, in get_stream
xf.write(el)
File "src/lxml/serializer.pxi", line 1652, in lxml.etree._IncrementalFileWriter.write
TypeError: got invalid input value of type <class 'xml.etree.ElementTree.Element'>, expected string or Element
Like I said above , yesterday I had the same problem ..I found the solution in this link:
https://python-forum.io/Thread-Need-help-in-understanding-this-particular-Traceback-TypeError
In reference to this error :
TypeError: got invalid input value of type , expected string or Element
In summary the solution was to install openpyxl to another version :
pip uninstall openpyxl
pip install openpyxl==3.0.1
I can't find a good reference, but I recall having stumbled upon the same, and the solution was to use the older format (.xls, which is a completely different format) instead. Seems like a generic problem of openpyxl that wasn't resolved at the moment.
A working way to append to .xlsx (works for me):
from openpyxl import load_workbook
writer = pd.ExcelWriter(filename, engine='openpyxl')
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title: ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
# save the workbook
writer.save()
same problem for me not able to reopen a file created by openpyxl version > 3:
in 3.0.3:
File "D:\MyProg.py", line 251, in chargerSynthese
self.wbs = load_workbook(filename=self.nomfichierXLSX)
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 314, in load_workbook
reader.read()
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 279, in read
self.read_worksheets()
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 227, in read_worksheets
ws_parser.bind_all()
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\worksheet_reader.py", line 426, in bind_all
self.bind_cells()
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\worksheet_reader.py", line 337, in bind_cells
for idx, row in self.parser.parse():
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\worksheet_reader.py", line 153, in parse
row = self.parse_row(element)
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\worksheet_reader.py", line 264, in parse_row
self.row_counter = int(attrs['r'])
ValueError: invalid literal for int() with base 10: '2.0'
attrs={'r':'2.0'} don't know where it come from (but origin in a save worksheet by openpyxl 3.0.3) then when you do this int(attrs['r']) in _reader.py: crash!
solution back to 2.6.4 version!
reply to myself!
openpyxl 3.0.3 works well but is less permissive than 2.6 versions, here is my test code you must put int in row= , not float
from openpyxl import __version__
from openpyxl import load_workbook
from openpyxl import Workbook
wbs = Workbook()
wbs.active.title = 'titi'
mycell=wbs['titi'].cell(row = 1.0, column = 1)
mycell.value=22
wbs.save('toto.xlsx')
print('openpyxl __version__:',__version__)
wbi = load_workbook(filename='toto.xlsx')
for i in range(0,30):
wbi['titi'].append([i,'tata'])
wbi.save('toto.xlsx')
# result1:
# openpyxl __version__: 2.6.3
# result2:
# openpyxl __version__: 3.0.3
# Traceback (most recent call last):
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\py\essais\crashxlsx.py", line 13, in <module>
# wbi = load_workbook(filename='toto.xlsx')
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 314, in load_workbook
# reader.read()
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 279, in read
# self.read_worksheets()
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 227, in read_worksheets
# ws_parser.bind_all()
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\worksheet\_reader.py", line 426, in bind_all
# self.bind_cells()
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\worksheet\_reader.py", line 337, in bind_cells
# for idx, row in self.parser.parse():
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\worksheet\_reader.py", line 153, in parse
# row = self.parse_row(element)
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\worksheet\_reader.py", line 264, in parse_row
# self.row_counter = int(attrs['r'])
# ValueError: invalid literal for int() with base 10: '1.0'
same problem with me with openpyxl 2.0.2 version. I uninstalled and reinstalled 2.0.10 and add it into my projec
I'm trying to load a CSV file into a spark DataFrame. This is what I have done so far:
# sc is an SparkContext.
appName = "testSpark"
master = "local"
conf = SparkConf().setAppName(appName).setMaster(master)
sc = SparkContext(conf=conf)
sqlContext = sql.SQLContext(sc)
# csv path
text_file = sc.textFile("hdfs:///path/to/sensordata20171008223515.csv")
df = sqlContext.load(source="com.databricks.spark.csv", header = 'true', path = text_file)
print df.schema()
Here's the trace:
Traceback (most recent call last):
File "/home/centos/main.py", line 16, in <module>
df = sc.textFile(text_file).map(lambda line: (line.split(';')[0], line.split(';')[1])).collect()
File "/usr/hdp/2.5.6.0-40/spark/python/lib/pyspark.zip/pyspark/context.py", line 474, in textFile
File "/usr/hdp/2.5.6.0-40/spark/python/lib/py4j-0.9-src.zip/py4j/java_gateway.py", line 804, in __call__
File "/usr/hdp/2.5.6.0-40/spark/python/lib/py4j-0.9-src.zip/py4j/protocol.py", line 278, in get_command_part
AttributeError: 'RDD' object has no attribute '_get_object_id'
I'm new to spark. So if anyone could tell me what I've done wrong this would be very helpful.
You cannot pass RDD to csv reader. You should use path directly:
df = sqlContext.load(source="com.databricks.spark.csv",
header = 'true', path = "hdfs:///path/to/sensordata20171008223515.csv")
Only a limited number of formats (notably JSON) supports RDD as an input argument.