Python Pandas Errno 13 while saving dataframe to xlsx - python

I am slowly loosing my mind. I'm trying to save pandas dataframe to xlsx.
save_path = self.SAVE_L['text']+ f"\df_{part}_.xlsx"
writer = pandas.ExcelWriter(save_path , engine='xlsxwriter',date_format='DD/MM/YYYY',datetime_format='DD/MM/YYYY')
df.to_excel(writer , index=False, sheet_name='df', engine='xlsxwriter')
writer.save()
However, for bigger df (but still in range of excel files - around 550 000 lines) I'm getting this error:
//NETWORK_DISK/save_path\df_part1_.xlsx
Exception in Tkinter callback
Traceback (most recent call last):
File "C:\Program Files\Python37\lib\tkinter\__init__.py", line 1705, in __call__
return self.func(*args)
File "PATH to script/Prog 0.31.py", line 460, in Process
scr_t=self.scr_list)
File "PATH to script/Prog 0.31.py", line 829, in script_PROC
writer.save()
File "C:\Program Files\Python37\lib\site-packages\pandas\io\excel.py", line 1952, in save
return self.book.close()
File "C:\Program Files\Python37\lib\site-packages\xlsxwriter\workbook.py", line 310, in close
self._store_workbook()
File "C:\Program Files\Python37\lib\site-packages\xlsxwriter\workbook.py", line 636, in _store_workbook
xlsx_file.write(os_filename, xml_filename)
File "C:\Program Files\Python37\lib\zipfile.py", line 1746, in write
with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
PermissionError: [Errno 13] Permission denied: 'C:\\Users\\Pandriej\\AppData\\Local\\Temp\\tmpfi53pba3'
I can save it to .csv without any problem. I also can divide it into smaller chunks and save them individually. The problem is I need the entire output to be saved as xlsx.
Is there any way I can fix this? Make Pandas actually save the file?

Related

"OSError: [Errno 9] Bad file descriptor" error when trying to save excel file with openpyxl in python

I have a couple of excel files I want to merge into one.
I need the second column on all the files to be copied into separate columns in a new Microsoft Excel file.
For this, I am using the openpyxl library in a python script.
This is my code:
import os
from openpyxl import load_workbook
def mergeDataFiles():
path = "C:\\Users\\ethan\\Desktop\\Benzoyl Chloride\\Benzoyl Chloride"
# source excel files
origin_files = list()
for path, subdirs, files in os.walk(path):
for file_index in range(len(files)):
origin_files.append(files[file_index])
# destination excel file
destination_file = path + ".xlsx"
destination_workbook = load_workbook(destination_file)
destination_sheet = destination_workbook["Sheet1"]
# copy data from source files to destination file
for origin_file_index in range(1, len(origin_files)):
origin_workbook = load_workbook(path + "\\" + origin_files[origin_file_index - 1])
origin_sheet = origin_workbook['Data']
destination_sheet.cell(row=1, column=origin_file_index).value = origin_files[origin_file_index - 1]
for i in range(1, 500):
# read cell value from source excel file
data = origin_sheet.cell(row=i, column=2)
# write the value to destination excel file
destination_sheet.cell(row=i + 1, column=origin_file_index).value = data.value
# saving the destination excel file
destination_workbook.save(destination_file)
if __name__ == "__main__":
mergeDataFiles()
When I run the code, I get an error on the last line in the function: OSError: [Errno 9] Bad file descriptor.
Full traceback:
C:\Users\ethan\.venv\Scripts\python.exe "C:/Users/ethan/Coding/Python/Copy Excel Data/main.py"
Traceback (most recent call last):
File "C:\Users\ethan\Coding\Python\Copy Excel Data\main.py", line 32, in <module>
mergeDataFiles()
File "C:\Users\ethan\Coding\Python\Copy Excel Data\main.py", line 28, in mergeDataFiles
destination_workbook.save(destination_file)
File "C:\Users\ethan\.venv\Lib\site-packages\openpyxl\workbook\workbook.py", line 407, in save
save_workbook(self, filename)
File "C:\Users\ethan\.venv\Lib\site-packages\openpyxl\writer\excel.py", line 293, in save_workbook
writer.save()
File "C:\Users\ethan\.venv\Lib\site-packages\openpyxl\writer\excel.py", line 275, in save
self.write_data()
File "C:\Users\ethan\.venv\Lib\site-packages\openpyxl\writer\excel.py", line 67, in write_data
archive.writestr(ARC_APP, tostring(props.to_tree()))
File "C:\Program Files\Python311\Lib\zipfile.py", line 1830, in writestr
with self.open(zinfo, mode='w') as dest:
File "C:\Program Files\Python311\Lib\zipfile.py", line 1204, in close
self._fileobj.seek(self._zinfo.header_offset)
OSError: [Errno 9] Bad file descriptor
Exception ignored in: <function ZipFile.__del__ at 0x000001D101443D80>
Traceback (most recent call last):
File "C:\Program Files\Python311\Lib\zipfile.py", line 1870, in __del__
self.close()
File "C:\Program Files\Python311\Lib\zipfile.py", line 1892, in close
self._fpclose(fp)
File "C:\Program Files\Python311\Lib\zipfile.py", line 1992, in _fpclose
fp.close()
OSError: [Errno 9] Bad file descriptor
Process finished with exit code 1
I have tried changing the file names and locations, having the destination file open and closed, scouring the internet for solutions and at this point I'm not sure what else I can try.
I am running the code on Windows 10 22H2, with an intel i5 cpu.
Please assist me with this issue, if you know how to solve it.

Getting "At least one sheet must be visible" when trying to open excel spreadsheet. What to do?

When trying to open an excel spreadsheet do same changes and save it:
import openpyxl
workbook = openpyxl.load_workbook(filename = 'sample.xlsx', read_only=False)
workbook.save('test.xlsx')
I get the following upon calling save():
Traceback (most recent call last):
File "read_pyxl.py", line 4, in <module>
workbook.save('test.xlsx')
File "/home/martin/myvenv/lib/python3.8/site-packages/openpyxl/workbook/workbook.py", line 407, in save
save_workbook(self, filename)
File "/home/martin/myvenv/lib/python3.8/site-packages/openpyxl/writer/excel.py", line 293, in save_workbook
writer.save()
File "/home/martin/myvenv/lib/python3.8/site-packages/openpyxl/writer/excel.py", line 275, in save
self.write_data()
File "/home/martin/myvenv/lib/python3.8/site-packages/openpyxl/writer/excel.py", line 89, in write_data
archive.writestr(ARC_WORKBOOK, writer.write())
File "/home/martin/myvenv/lib/python3.8/site-packages/openpyxl/workbook/_writer.py", line 148, in write
self.write_views()
File "/home/martin/myvenv/lib/python3.8/site-packages/openpyxl/workbook/_writer.py", line 135, in write_views
active = get_active_sheet(self.wb)
File "/home/martin/myvenv/lib/python3.8/site-packages/openpyxl/workbook/_writer.py", line 33, in get_active_sheet
raise IndexError("At least one sheet must be visible")
IndexError: At least one sheet must be visible
>>>
How do I fix this ?
Turned out in this case sample.xlsx had all sheets set to "hidden".
If at least one of the sheets is set to be visible the code works.
A weird limitation, but it exists.

Why can't I access the excel file in python?

I'm trying to use some data that I have in an excel file. However, I'm getting an error saying that it doesn't find the file. I've looked up and the directory and the file name are correct, What am I doing wrong?
Here is the code:
import os
import pandas as pd
print(os.getcwd())
df = pd.read_excel(r'C:/Users/Eder/Desktop/TFG/Data/Interpolation_sample.xlsx',
index_col =0,parse_dates=True, sheet_name='sheet3')
And the answer from the console:
runcell(0, 'C:/Users/Eder/untitled0.py')
C:\Users\Eder\Desktop\TFG\Data
Traceback (most recent call last):
File "C:\Users\Eder\untitled0.py", line 14, in <module>
index_col =0,parse_dates=True, sheet_name='sheet3')
File "E:\Anaconda3\lib\site-packages\pandas\util\_decorators.py", line 299, in wrapper
return func(*args, **kwargs)
File "E:\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 336, in read_excel
io = ExcelFile(io, storage_options=storage_options, engine=engine)
File "E:\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 1072, in __init__
content=path_or_buffer, storage_options=storage_options
File "E:\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 950, in inspect_excel_format
content_or_path, "rb", storage_options=storage_options, is_text=False
File "E:\Anaconda3\lib\site-packages\pandas\io\common.py", line 651, in get_handle
handle = open(handle, ioargs.mode)
FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\Eder\\Desktop\\TFG\\Data\\Interpolation_sample.xlsx'
I've figured out a way to solve the problem. I just changed the name of the file from 'Interpolation_sample' to 'Interpolation sample'. I don't know why, but the underscore in the file name is what was causing this error.

Tablib xlsx file badZip file issue

I am getting error on opening xlsx extension file in windows 8 using tablib library.
python version - 2.7.14
error is as follows:
python suit_simple_sheet_product.py
Traceback (most recent call last):
File "suit_simple_sheet_product.py", line 19, in <module>
data = tablib.Dataset().load(open(BASE_PATH).read())
File "C:\Python27\lib\site-packages\tablib\core.py", line 446, in load
format = detect_format(in_stream)
File "C:\Python27\lib\site-packages\tablib\core.py", line 1157, in detect_format
if fmt.detect(stream):
File "C:\Python27\lib\site-packages\tablib\formats\_xls.py", line 25, in detect
xlrd.open_workbook(file_contents=stream)
File "C:\Python27\lib\site-packages\xlrd\__init__.py", line 120, in open_workbook
zf = zipfile.ZipFile(timemachine.BYTES_IO(file_contents))
File "C:\Python27\lib\zipfile.py", line 770, in __init__
self._RealGetContents()
File "C:\Python27\lib\zipfile.py", line 811, in _RealGetContents
raise BadZipfile, "File is not a zip file"
zipfile.BadZipfile: File is not a zip file
path location is as follows =
BASE_PATH = 'C:\Users\anju\Downloads\automate\catalog-5090 fabric detail and price list.xlsx'
Excel .xlsx files are actually zip files. In order for the unzip to work correctly, the file must be opened in binary mode, as such your need to open the file using:
import tablib
BASE_PATH = r'c:\my folder\my_test.xlsx'
data = tablib.Dataset().load(open(BASE_PATH, 'rb').read())
print data
Add r before your string to stop Python from trying to interpret the backslash characters in your path.

Python Pandas: Error tokenizing data. C error: EOF inside string starting when reading 1GB CSV file

I'm reading a 1 GB CSV file in chunks of 10,000 rows. The file has 1106012 rows and 171 columns, other smaller sized file does not show any error and finish off successfully but when i read this 1 GB file it shows error every time on exactly line number 1106011 which is a second last line of file, i can manually remove that line but that is not the solution because i have hundreds of other file of that same size and i cannot fix all the lines manually. can anyone help me with that please.
def extract_csv_to_sql(input_file_name, header_row, size_of_chunk, eachRow):
df = pd.read_csv(input_file_name,
header=None,
nrows=size_of_chunk,
skiprows=eachRow,
low_memory=False,
error_bad_lines=False,
sep=',')
# engine='python'
# quoting=csv.QUOTE_NONE
# encoding='utf-8'
df.columns = header_row
df = df.drop_duplicates(keep='first')
df = df.apply(lambda x: x.astype(str).str.lower())
return df
I'm then calling this function within a loop and works just fine.
huge_chunk_return = extract_csv_to_sql(huge_input_filename, huge_header_row, the_size_of_chunk_H, each_Row_H)
I read this Pandas ParserError EOF character when reading multiple csv files to HDF5, this read_csv() & EOF character in string cause parsing issue and this https://github.com/pandas-dev/pandas/issues/11654 and many more and tried to include read_csv parameter such as
engine='python'
quoting=csv.QUOTE_NONE // Hangs and even the python shell, don't know why
encoding='utf-8'
but none of it worked, its still throwing the following error
Error:
Traceback (most recent call last):
File "C:\Users\WCan\Desktop\wcan_new_python\pandas_test_3.py", line 115, in <module>
huge_chunk_return = extract_csv_to_sql(huge_input_filename, huge_header_row, the_size_of_chunk_H, each_Row_H)
File "C:\Users\WCan\Desktop\wcan_new_python\pandas_test_3.py", line 24, in extract_csv_to_sql
sep=',')
File "C:\Users\WCan\AppData\Local\Programs\Python\Python36\lib\site-packages\pandas\io\parsers.py", line 655, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\WCan\AppData\Local\Programs\Python\Python36\lib\site-packages\pandas\io\parsers.py", line 411, in _read
data = parser.read(nrows)
File "C:\Users\WCan\AppData\Local\Programs\Python\Python36\lib\site-packages\pandas\io\parsers.py", line 1005, in read
ret = self._engine.read(nrows)
File "C:\Users\WCan\AppData\Local\Programs\Python\Python36\lib\site-packages\pandas\io\parsers.py", line 1748, in read
data = self._reader.read(nrows)
File "pandas\_libs\parsers.pyx", line 893, in pandas._libs.parsers.TextReader.read (pandas\_libs\parsers.c:10885)
File "pandas\_libs\parsers.pyx", line 966, in pandas._libs.parsers.TextReader._read_rows (pandas\_libs\parsers.c:11884)
File "pandas\_libs\parsers.pyx", line 953, in pandas._libs.parsers.TextReader._tokenize_rows (pandas\_libs\parsers.c:11755)
File "pandas\_libs\parsers.pyx", line 2184, in pandas._libs.parsers.raise_parser_error (pandas\_libs\parsers.c:28765)
pandas.errors.ParserError: Error tokenizing data. C error: EOF inside string starting at line 1106011
>>>
If you are under linux, try to remove all non printable caracter.
Try to load your file after this operation.
tr -dc '[:print:]\n' < file > newfile
I inquired many solutions, some of them worked but It affected the calculous used this one and it will skip the line that is causing the error:
pd.read_csv(file,engine='python', error_bad_lines=False)
#engine='python' provides a better output

Categories

Resources