Pandas apply().to_excel() got DataFrame is not callable - python

empty_stock_list = [
{
'row_index': <num>,
'column_index': <num>
},
...
]
with pd.ExcelWriter(OUTPUT_FILE, engine='xlsxwriter') as writer:
df = pd.concat([header_row, data_price], ignore_index=False, sort=False).reset_index(drop=True)
df_color = df.copy()
df_color.iloc[:,:] = 'font-color: black'
for empty_stock in empty_stock_list:
df_color.iloc[empty_stock['row_index'], empty_stock['column_index']] = 'font-color: #FF0000'
df.style.apply(df_color, axis=None).\
to_excel(writer, sheet_name=sheet_name, index=False, header=None)
I have this code above, but always get this error: TypeError: 'DataFrame' object is not callable. Basically what I'm trying to do is to make the cell color into a red color if a stock is empty (Based on the data row_index and column_index).
Tried to follow the documentation, but I can't seem to make this right.
Below is the traceback error messages:
Traceback (most recent call last):
File "main.py", line 129, in <module>
df.style.apply(df_color, axis=None).\
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/style.py", line 229, in to_excel
formatter.write(
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/excel.py", line 734, in write
writer.write_cells(
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/excel/_xlsxwriter.py", line 212, in write_cells
for cell in cells:
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/excel.py", line 688, in get_formatted_cells
for cell in itertools.chain(self._format_header(), self._format_body()):
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/excel.py", line 590, in _format_regular_rows
for cell in self._generate_body(coloffset):
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/excel.py", line 674, in _generate_body
styles = self.styler._compute().ctx
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/style.py", line 625, in _compute
r = func(self)(*args, **kwargs)
File "/home/michaelharley/.local/lib/python3.8/site-packages/pandas/io/formats/style.py", line 642, in _apply
result = func(data, **kwargs)
TypeError: 'DataFrame' object is not callable
I'm using these dependencies:
python 3.8.0
pandas 1.1.3
xlrd 1.2.0
XlsxWriter 1.3.7

I think you need create function and pass to Styler.apply and change font-color to color:
def func(df):
df_color = pd.DataFrame('color: black', index=df.index, columns=df.columns)
for empty_stock in empty_stock_list:
i = empty_stock['row_index']
j = empty_stock['column_index']
df_color.iloc[i,j] = 'color: #FF0000'
return df_color
with pd.ExcelWriter(OUTPUT_FILE, engine='xlsxwriter') as writer:
df = pd.concat([header_row, data_price],
ignore_index=False,
sort=False).reset_index(drop=True)
(df.style.apply(func, axis=None)
.to_excel(writer, sheet_name=sheet_name, index=False, header=None))

Related

Saving changes to workbook using openpyxl

An excel sheet export from an app adds blank columns and rows to the output data. I'm trying to remove it before massaging the data.
So I have some simple code to delete the first 3 rows and first 2 columsn, then save the changes.
When I use Workbook.save(path) I get a traceback error. I researched on stackoverflow and it looks like this simple code should be working. I've even tried just putting the filename but that also giving similar error.
I'm not sure why I'm getting this error.
Update: I corrected the typo Workbook.save(path) to workbook.save(path).
import openpyxl
import pandas as pd
from openpyxl import Workbook
# Give the location of the file
path = 'C:\\Users\\cbout\\Desktop\\2022 Data.xlsx'
filename = '2022 Data.xlsx'
DF = pd.read_excel(path, sheet_name=1)
# # To open the workbook
# # workbook object is created
workbook = openpyxl.load_workbook(path)
# # Get workbook active sheet objectmfrom the active attribute
worksheet = workbook.active
#Delete first 2 columns.
worksheet.delete_cols(0,2)
#Delete first 3 rows.
worksheet.delete_rows(0,4)
workbook.save(path)
New Traceback Error after updates
PS C:\Users\cbout\Documents\GitHub> & "C:/Program Files/Python310/python.exe" c:/Users/cbout/Documents/GitHub/Python/Stewardship.py
Traceback (most recent call last):
File "c:\Users\cbout\Documents\GitHub\Python\Stewardship.py", line 9, in <module>
stewardDF = pd.read_excel(path, sheet_name=1)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\util\_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\io\excel\_base.py", line 457, in read_excel
io = ExcelFile(io, storage_options=storage_options, engine=engine)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\io\excel\_base.py", line 1385, in __init__
engine = config.get_option(f"io.excel.{ext}.reader", silent=True)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\_config\config.py", line 256, in __call__
return self.__func__(*args, **kwds)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\_config\config.py", line 128, in _get_option
key = _get_single_key(pat, silent)
File "C:\Users\cbout\AppData\Roaming\Python\Python310\site-packages\pandas\_config\config.py", line 114, in _get_single_key
raise OptionError(f"No such keys(s): {repr(pat)}")
pandas._config.config.OptionError: No such keys(s): 'io.excel.zip.reader'

Writing worksheet with pandas not possible / Workbook-error?

I would like to save a dataframe to a new worksheet in an Excel sheet with the following code / function:
def save_xls(stock, content, filename):
book = load_workbook(filename)
writer = pd.ExcelWriter(
filename,
# engine='openpyxl',
engine_kwargs={"options": {"strings_to_numbers": True}},
)
writer.book = book
pd.DataFrame(content).to_excel(writer, sheet_name=stock, header=False, index=False)
But unfortunately, I get this error:
Traceback (most recent call last):
File "C:\Users\Polzi\Documents\DEV\Python-Private\FairValueCalc.py", line 303, in <module>
save_xls (stock, output, OUT)
File "C:\Users\Polzi\Documents\DEV\Python-Private\FairValueCalc.py", line 36, in save_xls
pd.DataFrame (content).to_excel (writer,
File "C:\Users\Polzi\Documents\DEV\.venv\NormalScraping\lib\site-packages\pandas\core\generic.py", line 2357, in to_excel
formatter.write(
File "C:\Users\Polzi\Documents\DEV\.venv\NormalScraping\lib\site-packages\pandas\io\formats\excel.py", line 892, in write
writer.write_cells(
File "C:\Users\Polzi\Documents\DEV\.venv\NormalScraping\lib\site-packages\pandas\io\excel\_xlsxwriter.py", line 219, in write_cells
wks = self.book.add_worksheet(sheet_name)
AttributeError: 'Workbook' object has no attribute 'add_worksheet'
How can I write this dataframe to a new worksheet in the Excel file?

Error when using pandas to convert dates on the dataframe or when reading the csv file

I need to import a csv file using pandas that have a date field in the format 'year.decimal day' such as '1980.042' which would be in the format 'DD/MM/YYYY', '11/02/1980'.
File sample:
data
1980.042
1980.125
1980.208
1980.292
1980.375
1980.458
1980.542
1980.625
1980.708
Using pd.to_datetime I can transform it like this:
d = '1980.042'
print(pd.to_datetime(d, format = '%Y.%j'))
Output:
1980-02-11 00:00:00
My first attempt was to read the file and convert the dataframe column:
import pandas as pd
df = pd.read_csv('datas.csv')
print(df.dtypes, '\n\n', df.head())
df['data'] = p
d.to_datetime(df['data'], '%Y.%j')
Output:
data float64
dtype: object
data
0 1980.042
1 1980.125
2 1980.208
3 1980.292
4 1980.375
Traceback (most recent call last):
File "datas.py", line 4, in <module>
df['data'] = pd.to_datetime(df['data'], '%Y.%j')
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 451, in to_datetime
values = _convert_listlike(arg._values, True, format)
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 368, in _convert_listlike
require_iso8601=require_iso8601
File "pandas/_libs/tslib.pyx", line 492, in pandas._libs.tslib.array_to_datetime
File "pandas/_libs/tslib.pyx", line 513, in pandas._libs.tslib.array_to_datetime
AssertionError
The second attempt was to transform the column into a str and then a date:
import pandas as pd
df = pd.read_csv('datas.csv')
print(df.dtypes, '\n\n', df.head())
df['data'] = df['data'].astype(str)
df['data'] = pd.to_datetime(df['data'], '%Y.%j')
Output:
data float64
dtype: object
data
0 1980.042
1 1980.125
2 1980.208
3 1980.292
4 1980.375
Traceback (most recent call last):
File "datas.py", line 6, in <module>
df['data'] = pd.to_datetime(df['data'], '%Y.%j')
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 451, in to_datetime
values = _convert_listlike(arg._values, True, format)
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 368, in _convert_listlike
require_iso8601=require_iso8601
File "pandas/_libs/tslib.pyx", line 492, in pandas._libs.tslib.array_to_datetime
File "pandas/_libs/tslib.pyx", line 513, in pandas._libs.tslib.array_to_datetime
AssertionError
Then I realized that for some internal floating point issue the data was getting more than three decimal places. So I rounded it up to just three decimal places before convert:
import pandas as pd
df = pd.read_csv('datas.csv')
print(df.dtypes, '\n\n', df.head())
df['data'] = df['data'].round(3).astype(str)
print(df.dtypes, '\n\n', df.head())
df['data'] = pd.to_datetime(df['data'], '%Y.%j')
Output:
data float64
dtype: object
data
0 1980.042
1 1980.125
2 1980.208
3 1980.292
4 1980.375
data object
dtype: object
data
0 1980.042
1 1980.125
2 1980.208
3 1980.292
4 1980.375
Traceback (most recent call last):
File "datas.py", line 8, in <module>
df['data'] = pd.to_datetime(df['data'], '%Y.%j')
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 451, in to_datetime
values = _convert_listlike(arg._values, True, format)
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 368, in _convert_listlike
require_iso8601=require_iso8601
File "pandas/_libs/tslib.pyx", line 492, in pandas._libs.tslib.array_to_datetime
File "pandas/_libs/tslib.pyx", line 513, in pandas._libs.tslib.array_to_datetime
AssertionError
Finally, I looking at the pandas documentation and in some forums that I could define the data type when reading the file and also apply a lambda function:
import pandas as pd
date_parser = lambda col: pd.to_datetime(str(col), format = '%Y.%j')
df = pd.read_csv('datas.csv', parse_dates = ['data'], date_parser = date_parser)
print(df.dtypes, '\n\n', df.head())
Output:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 377, in _convert_listlike
values, tz = conversion.datetime_to_datetime64(arg)
File "pandas/_libs/tslibs/conversion.pyx", line 188, in pandas._libs.tslibs.conversion.datetime_to_datetime64
TypeError: Unrecognized value type: <class 'str'>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "datas.py", line 5, in <module>
df = pd.read_csv('datas.csv', parse_dates = ['data'], date_parser = date_parser)
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 678, in parser_f
return _read(filepath_or_buffer, kwds)
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 446, in _read
data = parser.read(nrows)
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 1036, in read
ret = self._engine.read(nrows)
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 1921, in read
names, data = self._do_date_conversions(names, data)
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 1675, in _do_date_conversions
self.index_names, names, keep_date_col=self.keep_date_col)
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 3066, in _process_date_conversion
data_dict[colspec] = converter(data_dict[colspec])
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 3033, in converter
return generic_parser(date_parser, *date_cols)
File "/usr/lib/python3/dist-packages/pandas/io/date_converters.py", line 39, in generic_parser
results[i] = parse_func(*args)
File "datas.py", line 3, in <lambda>
date_parser = lambda col: pd.to_datetime(str(col), format = '%Y.%j')
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 469, in to_datetime
result = _convert_listlike(np.array([arg]), box, format)[0]
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 380, in _convert_listlike
raise e
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 347, in _convert_listlike
errors=errors)
File "pandas/_libs/tslibs/strptime.pyx", line 163, in pandas._libs.tslibs.strptime.array_strptime
ValueError: unconverted data remains: 5
Anyway, nothing works, has anyone been there? Any suggestions for doing the file reading with the correct data type or for converting the column on the dataframe?
I really hadn't realized the problem with the data.
Removing those with decimal parts greater than 365, I tested Tuhin Sharma's idea.
Unfortunately, it returns the value of the first line for all dataframe lines.
But I used the datetime module, as suggested by Tuhin Sharma, in a lambda function when reading the file as follows:
Sample file:
data
1980.042
1980.125
1980.208
1980.292
Code:
import pandas as pd
import datetime
date_parser = lambda col: datetime.datetime.strptime(col, '%Y.%j')
df = pd.read_csv('datas.csv', parse_dates = ['data'], date_parser = date_parser)
print(df)
Output:
data
0 1980-02-11
1 1980-05-04
2 1980-07-26
3 1980-10-18
You could try using datetime module. You can try the following code:-
import pandas as pd
import numpy as np
import datetime
import pandas as pd
df = pd.read_csv('datas.csv',dtype=str)
df["data"] = df["data"].map(lambda x: datetime.datetime.strptime(x,'%Y.%j'))
However this code will fail. Because your data has problem.
1980.375
1980.458
1980.542
1980.625
1980.708
For these values if you see the number of days is greater than 365 (3 decimal places), which is not possible and thats why it will throw error.
Hope this helps!!
You can try the following code as well which is a lot cleaner:-
import pandas as pd
import datetime
date_parser = lambda x: datetime.datetime.strptime(x, '%Y.%j')
df = pd.read_csv('datas.csv', parse_dates = ['data'], date_parser = date_parser)
print(df)

Openpyxl Workbook.save function creates a corrupt and un-openable Excel (.xlsx) file

I have tried using August William's solution to this issue, but that also didn't work. I am not switching workbook types, i.e. .xlsm to .xlsx, which appears to be a separate issue. I have looked through Openpyxl's Manual trying to find maybe a bug report or bug fix, but to no avail. The below is my very simple code. Following that is the python error message which results in a workbook being created, but it is corrupted and fails to load. Any help is appreciated.
-Thanks!!
from openpyxl import Workbook
dashbrd = Workbook()
fp = dashbrd.active
fp.title = 'Sheet Name Goes Here'
fp['A1'] = 'Header'
fp['B1'] = '2nd Header'
fp['C1'] = '3rd Header'
fp['D1'] = '4th Header'
fp['E1'] = '5th Header'
fp['F1'] = 'You get the idea'
fp['G1'] = 'Another Header'
fp['H1'] = 'Blah blah blah'
fp['I1'] = 'Yadda yadda yadda'
dashbrd.save("S:\\folder1\\folder2\\folder3\\MyBook.xlsx")
**************************************************************************************
Traceback (most recent call last):
File "C:\Users\NotaDirtyUser\Documents\Scripts\HeaderTest.py", line 26, in <module>
dashbrd.save("S:\\folder1\\folder2\\folder3\\MyBook.xlsx")
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\workbook\workbook.py", line 408, in save
save_workbook(self, filename)
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 293, in save_workbook
writer.save()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 275, in save
self.write_data()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 75, in write_data
self._write_worksheets()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 215, in _write_worksheets
self.write_worksheet(ws)
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\writer\excel.py", line 200, in write_worksheet
writer.write()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\worksheet\_writer.py", line 354, in write
self.write_top()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\worksheet\_writer.py", line 98, in write_top
self.write_properties()
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\worksheet\_writer.py", line 60, in write_properties
self.xf.send(props.to_tree())
File "C:\ProgramData\Anaconda3\lib\site-packages\openpyxl\worksheet\_writer.py", line 294, in get_stream
xf.write(el)
File "src/lxml/serializer.pxi", line 1652, in lxml.etree._IncrementalFileWriter.write
TypeError: got invalid input value of type <class 'xml.etree.ElementTree.Element'>, expected string or Element
Like I said above , yesterday I had the same problem ..I found the solution in this link:
https://python-forum.io/Thread-Need-help-in-understanding-this-particular-Traceback-TypeError
In reference to this error :
TypeError: got invalid input value of type , expected string or Element
In summary the solution was to install openpyxl to another version :
pip uninstall openpyxl
pip install openpyxl==3.0.1
I can't find a good reference, but I recall having stumbled upon the same, and the solution was to use the older format (.xls, which is a completely different format) instead. Seems like a generic problem of openpyxl that wasn't resolved at the moment.
A working way to append to .xlsx (works for me):
from openpyxl import load_workbook
writer = pd.ExcelWriter(filename, engine='openpyxl')
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title: ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
# save the workbook
writer.save()
same problem for me not able to reopen a file created by openpyxl version > 3:
in 3.0.3:
File "D:\MyProg.py", line 251, in chargerSynthese
self.wbs = load_workbook(filename=self.nomfichierXLSX)
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 314, in load_workbook
reader.read()
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 279, in read
self.read_worksheets()
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 227, in read_worksheets
ws_parser.bind_all()
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\worksheet_reader.py", line 426, in bind_all
self.bind_cells()
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\worksheet_reader.py", line 337, in bind_cells
for idx, row in self.parser.parse():
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\worksheet_reader.py", line 153, in parse
row = self.parse_row(element)
File "D:\MyPython\python3.8.2-x64\lib\site-packages\openpyxl\worksheet_reader.py", line 264, in parse_row
self.row_counter = int(attrs['r'])
ValueError: invalid literal for int() with base 10: '2.0'
attrs={'r':'2.0'} don't know where it come from (but origin in a save worksheet by openpyxl 3.0.3) then when you do this int(attrs['r']) in _reader.py: crash!
solution back to 2.6.4 version!
reply to myself!
openpyxl 3.0.3 works well but is less permissive than 2.6 versions, here is my test code you must put int in row= , not float
from openpyxl import __version__
from openpyxl import load_workbook
from openpyxl import Workbook
wbs = Workbook()
wbs.active.title = 'titi'
mycell=wbs['titi'].cell(row = 1.0, column = 1)
mycell.value=22
wbs.save('toto.xlsx')
print('openpyxl __version__:',__version__)
wbi = load_workbook(filename='toto.xlsx')
for i in range(0,30):
wbi['titi'].append([i,'tata'])
wbi.save('toto.xlsx')
# result1:
# openpyxl __version__: 2.6.3
# result2:
# openpyxl __version__: 3.0.3
# Traceback (most recent call last):
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\py\essais\crashxlsx.py", line 13, in <module>
# wbi = load_workbook(filename='toto.xlsx')
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 314, in load_workbook
# reader.read()
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 279, in read
# self.read_worksheets()
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\reader\excel.py", line 227, in read_worksheets
# ws_parser.bind_all()
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\worksheet\_reader.py", line 426, in bind_all
# self.bind_cells()
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\worksheet\_reader.py", line 337, in bind_cells
# for idx, row in self.parser.parse():
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\worksheet\_reader.py", line 153, in parse
# row = self.parse_row(element)
# File "D:\Users\T0015039\Documents\Mes Outils Personnels\python3.8.2_pyscripter3.6.3-x64\python3.8.2-x64\lib\site-packages\openpyxl\worksheet\_reader.py", line 264, in parse_row
# self.row_counter = int(attrs['r'])
# ValueError: invalid literal for int() with base 10: '1.0'
same problem with me with openpyxl 2.0.2 version. I uninstalled and reinstalled 2.0.10 and add it into my projec

pandas: write empty DataFrame to HDF file

Is there a way to force pandas to write an empty DataFrame to an HDF file?
import pandas as pd
df = pd.DataFrame(columns=['x','y'])
df.to_hdf('temp.h5', 'xxx')
df2 = pd.read_hdf('temp.h5', 'xxx')
Output:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File ".../Python-3.6.3/lib/python3.6/site-packages/pandas/io/pytables.py", line 389, in read_hdf
return store.select(key, auto_close=auto_close, **kwargs)
File ".../Python-3.6.3/lib/python3.6/site-packages/pandas/io/pytables.py", line 740, in select
return it.get_result()
File ".../Python-3.6.3/lib/python3.6/site-packages/pandas/io/pytables.py", line 1518, in get_result
results = self.func(self.start, self.stop, where)
File ".../Python-3.6.3/lib/python3.6/site-packages/pandas/io/pytables.py", line 733, in func
columns=columns)
File ".../Python-3.6.3/lib/python3.6/site-packages/pandas/io/pytables.py", line 2986, in read
idx=i), start=_start, stop=_stop)
File ".../Python-3.6.3/lib/python3.6/site-packages/pandas/io/pytables.py", line 2575, in read_index
_, index = self.read_index_node(getattr(self.group, key), **kwargs)
File ".../Python-3.6.3/lib/python3.6/site-packages/pandas/io/pytables.py", line 2676, in read_index_node
data = node[start:stop]
File ".../Python-3.6.3/lib/python3.6/site-packages/tables/vlarray.py", line 675, in __getitem__
return self.read(start, stop, step)
File ".../Python-3.6.3/lib/python3.6/site-packages/tables/vlarray.py", line 811, in read
listarr = self._read_array(start, stop, step)
File "tables/hdf5extension.pyx", line 2106, in tables.hdf5extension.VLArray._read_array (tables/hdf5extension.c:24649)
ValueError: cannot set WRITEABLE flag to True of this array
Writing with format='table':
import pandas as pd
df = pd.DataFrame(columns=['x','y'])
df.to_hdf('temp.h5', 'xxx', format='table')
df2 = pd.read_hdf('temp.h5', 'xxx')
Output:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File ".../Python-3.6.3/lib/python3.6/site-packages/pandas/io/pytables.py", line 389, in read_hdf
return store.select(key, auto_close=auto_close, **kwargs)
File ".../Python-3.6.3/lib/python3.6/site-packages/pandas/io/pytables.py", line 722, in select
raise KeyError('No object named {key} in the file'.format(key=key))
KeyError: 'No object named xxx in the file'
Pandas version: 0.24.2
Thank you for your help!
Putting empty DataFrame into HDFStore in fixed format should work (maybe you need to check versions of other packages, e.g. tables):
# Versions
pd.__version__
tables.__version__
# DF
df = pd.DataFrame(columns=['x','y'])
df
# Dump in fixed format
with pd.HDFStore('temp.h5') as store:
store.put('df', df, format='f')
print('Read:')
store.select('df')
>>> '0.24.2'
>>> '3.5.1'
>>> x y
>>>
>>> Read:
>>> x y
Pytable really forbids to do so (at least it was), but for fixed pandas has its workaround.
But as discussed in same github issue there are made some efforts to fix this behavior for table as well. But looks like solution is still 'hangs in the air' because it was so at the end of march.

Categories

Resources