python pandas Access Excel MemoryError - python

All,
I've been trying to use pandas in python to load a table from access then write the data to an excel file see following code:
When running it the code (python 3.5.2) I receive the following output:
<!-- language: Python -->
import pandas as pd
import pypyodbc
conn = 'DSN=MyDSNTest'
cnxn = pypyodbc.connect(conn)
crsr = cnxn.cursor()
qy = """select * from mytbl;"""
df = pd.read_sql(qy, cnxn)
cnxn.commit()
crsr.close()
cnxn.close()
print ("read into dataframe")
#writer = pd.ExcelWriter('c:/tmp/test.xlsx')
#df.to_excel(writer, 'Data')
df.to_excel('E:/Reports/AnalyticsInput/tblHistoryAC.xlsx', Data',index=False)
# Close the Pandas Excel writer and output the Excel file.
#writer.save()
read into dataframe 199966 Traceback (most recent call last):
File "C:\Users\jeff\test.py", line 23, in
df.to_excel('c:/tmp/MyTest.xlsx', 'Data', index=False) File "C:\Python35-32\lib\site-packages\pandas\core\frame.py", line 1466, in
to_excel
excel_writer.save() File "C:\Python35-32\lib\site-packages\pandas\io\excel.py", line 790, in
save
return self.book.save(self.path) File "C:\Python35-32\lib\site-packages\openpyxl\workbook\workbook.py", line
345, in save
save_workbook(self, filename) File "C:\Python35-32\lib\site-packages\openpyxl\writer\excel.py", line 266,
in save_workbook
writer.save(filename) File "C:\Python35-32\lib\site-packages\openpyxl\writer\excel.py", line 248,
in save
self.write_data() File "C:\Python35-32\lib\site-packages\openpyxl\writer\excel.py", line 81,
in write_data
self._write_worksheets() File "C:\Python35-32\lib\site-packages\openpyxl\writer\excel.py", line 197,
in _write_worksheets
xml = ws._write() File "C:\Python35-32\lib\site-packages\openpyxl\worksheet\worksheet.py",
line 870, in _write
return write_worksheet(self) File "C:\Python35-32\lib\site-packages\openpyxl\writer\worksheet.py", line
107, in write_worksheet
write_rows(xf, ws) MemoryError
While the file is 200,000 rows I'd have to believe there is something else or another way to produce the xlsx file without getting a memory error.
Any ideas? Thanks!
Jeff

Related

Working with json file to convert to a sqlite table format in python

I have data formatted in .json file. The end goal is to reformat the data to sqlite table and store into a database for further analysis.
Here is a sample of the data:
{"_id":{"$oid":"60551"},"barcode":"511111019862","category":"Baking","categoryCode":"BAKING","cpg":{"$id":{"$oid":"601ac114be37ce2ead437550"},"$ref":"Cogs"},"name":"test brand #1612366101024","topBrand":false}
{"_id":{"$oid":"601c5460be37ce2ead43755f"},"barcode":"511111519928","brandCode":"STARBUCKS","category":"Beverages","categoryCode":"BEVERAGES","cpg":{"$id":{"$oid":"5332f5fbe4b03c9a25efd0ba"},"$ref":"Cogs"},"name":"Starbucks","topBrand":false}
{"_id":{"$oid":"601ac142be37ce2ead43755d"},"barcode":"511111819905","brandCode":"TEST BRANDCODE #1612366146176","category":"Baking","categoryCode":"BAKING","cpg":{"$id":{"$oid":"601ac142be37ce2ead437559"},"$ref":"Cogs"},"name":"test brand #1612366146176","topBrand":false}
{"_id":{"$oid":"601ac142be37ce2ead43755a"},"barcode":"511111519874","brandCode":"TEST BRANDCODE #1612366146051","category":"Baking","categoryCode":"BAKING","cpg":{"$id":{"$oid":"601ac142be37ce2ead437559"},"$ref":"Cogs"},"name":"test brand #1612366146051","topBrand":false}
Followed by the code:
import pandas as pd
import json
import sqlite3
# Open json file and convert to a list
with open("users.json") as f:
dat = [json.loads(line.strip()) for line in f]
# create a datafrom from json file
df = pd.DataFrame(dat)
#open database connection
con = sqlite3.connect("fetch_rewards.db")
c = con.cursor()
df.to_sql("users", con)
c.close()
The error I am getting:
Traceback (most recent call last):
File "C:\Users\mohammed.alabbas\Desktop\sqlite\import_csv.py", line 16, in <module>
df.to_sql("users", con)
File "C:\Users\name\AppData\Roaming\Python\Python39\site-packages\pandas\core\generic.py", line 2605, in to_sql
sql.to_sql(
File "C:\Users\name\AppData\Roaming\Python\Python39\site-packages\pandas\io\sql.py", line 589, in to_sql
pandas_sql.to_sql(
File "C:\Users\name\AppData\Roaming\Python\Python39\site-packages\pandas\io\sql.py", line 1828, in to_sql
table.insert(chunksize, method)
File "C:\Users\mname\AppData\Roaming\Python\Python39\site-packages\pandas\io\sql.py", line 830, in insert
exec_insert(conn, keys, chunk_iter)
File "C:\Users\mname\AppData\Roaming\Python\Python39\site-packages\pandas\io\sql.py", line 1555, in _execute_insert
conn.executemany(self.insert_statement(num_rows=1), data_list)
sqlite3.InterfaceError: Error binding parameter 1 - probably unsupported type.
Thanks in advance

Python Pandas Errno 13 while saving dataframe to xlsx

I am slowly loosing my mind. I'm trying to save pandas dataframe to xlsx.
save_path = self.SAVE_L['text']+ f"\df_{part}_.xlsx"
writer = pandas.ExcelWriter(save_path , engine='xlsxwriter',date_format='DD/MM/YYYY',datetime_format='DD/MM/YYYY')
df.to_excel(writer , index=False, sheet_name='df', engine='xlsxwriter')
writer.save()
However, for bigger df (but still in range of excel files - around 550 000 lines) I'm getting this error:
//NETWORK_DISK/save_path\df_part1_.xlsx
Exception in Tkinter callback
Traceback (most recent call last):
File "C:\Program Files\Python37\lib\tkinter\__init__.py", line 1705, in __call__
return self.func(*args)
File "PATH to script/Prog 0.31.py", line 460, in Process
scr_t=self.scr_list)
File "PATH to script/Prog 0.31.py", line 829, in script_PROC
writer.save()
File "C:\Program Files\Python37\lib\site-packages\pandas\io\excel.py", line 1952, in save
return self.book.close()
File "C:\Program Files\Python37\lib\site-packages\xlsxwriter\workbook.py", line 310, in close
self._store_workbook()
File "C:\Program Files\Python37\lib\site-packages\xlsxwriter\workbook.py", line 636, in _store_workbook
xlsx_file.write(os_filename, xml_filename)
File "C:\Program Files\Python37\lib\zipfile.py", line 1746, in write
with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
PermissionError: [Errno 13] Permission denied: 'C:\\Users\\Pandriej\\AppData\\Local\\Temp\\tmpfi53pba3'
I can save it to .csv without any problem. I also can divide it into smaller chunks and save them individually. The problem is I need the entire output to be saved as xlsx.
Is there any way I can fix this? Make Pandas actually save the file?

How to import CSV files to SQLite3 with python

This is my code.
import sqlite3
import pandas
db = sqlite3.connect('testdb.db')
df = pandas.read_csv('testcsv.csv')
df.to_sql('testTable', 'db', if_exists='append', index=False)
I got the last two lines of code from another article on stackoverflow, but it doesn't work for me. This is the error I get, even after I installed sqlalchemy, because it complained that it wasn't installed.
Traceback (most recent call last):
File "C:/Users/pitye/PycharmProjects/gradeCalcV2/venv/sqlite.py", line 7, in <module>
df.to_sql('testTable', 'db', if_exists='append', index=False)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\pandas\core\generic.py", line 2663, in to_sql
method=method,
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\pandas\io\sql.py", line 503, in to_sql
pandas_sql = pandasSQL_builder(con, schema=schema)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\pandas\io\sql.py", line 577, in pandasSQL_builder
con = _engine_builder(con)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\pandas\io\sql.py", line 564, in _engine_builder
con = sqlalchemy.create_engine(con)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\sqlalchemy\engine\__init__.py", line 479, in create_engine
return strategy.create(*args, **kwargs)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\sqlalchemy\engine\strategies.py", line 54, in create
u = url.make_url(name_or_url)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\sqlalchemy\engine\url.py", line 229, in make_url
return _parse_rfc1738_args(name_or_url)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\sqlalchemy\engine\url.py", line 291, in _parse_rfc1738_args
"Could not parse rfc1738 URL from string '%s'" % name
sqlalchemy.exc.ArgumentError: Could not parse rfc1738 URL from string 'db'
I just want to create a table from a CSV file in SQLite. Is this even the right way of doing it, or am I waaay off?
I think you just have to replace
df.to_sql('testTable', 'db', if_exists='append', index=False)
With
df.to_sql('testTable', db, if_exists='append', index=False)

Save self-generated Excel file as UploadedFile

I have a huge string that contains CSV data. I want to convert it to an Excel file (.xslx) and save it as an UploadedFile/SimpleUploadedFile. I googled as best as I could and came up with the following. result_data being the huge string, obviously.
from io import StringIO
import pandas
from django.core.files.uploadedfile import SimpleUploadedFile
### irrelevant code
result_data = StringIO(result_data)
df = pandas.DataFrame.from_csv(result_data, sep=';')
writer = pandas.ExcelWriter('file.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
result_file = writer.book
result_data.seek(0)
mimetype = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
object.xls_file = SimpleUploadedFile('filename.xlsx', result_data.read(), content_type=mimetype)
object.save()
I've tried numerous replacements for result_data.read() such as result_data, result_file, result_file.read(), but so far none of them has worked.
EDIT: I modified my code according to jmcnamara's suggestions, but got an error from writer.save().
output = StringIO()
result_data = StringIO(result_data)
df = pandas.DataFrame.from_csv(result_data, sep=';')
writer = pandas.ExcelWriter(output, engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
writer.save()
Traceback:
Traceback (most recent call last):
File "manage.py", line 10, in <module>
execute_from_command_line(sys.argv)
File "/venv/lib/python3.4/site-packages/django/core/management/__init__.py", line 338, in execute_from_command_line
utility.execute()
File "/venv/lib/python3.4/site-packages/django/core/management/__init__.py", line 330, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/venv/lib/python3.4/site-packages/django/core/management/base.py", line 390, in run_from_argv
self.execute(*args, **cmd_options)
File "/venv/lib/python3.4/site-packages/django/core/management/base.py", line 441, in execute
output = self.handle(*args, **options)
File "/commands/create.py", line 67, in handle
writer.save()
File "/venv/lib/python3.4/site-packages/pandas/io/excel.py", line 1413, in save
return self.book.close()
File "/venv/lib/python3.4/site-packages/xlsxwriter/workbook.py", line 296, in close
self._store_workbook()
File "/venv/lib/python3.4/site-packages/xlsxwriter/workbook.py", line 541, in _store_workbook
xlsx_file.write(os_filename, xml_filename)
File "/usr/lib/python3.4/zipfile.py", line 1373, in write
self.fp.write(zinfo.FileHeader(zip64))
TypeError: string argument expected, got 'bytes'
Exception ignored in: <bound method ZipFile.__del__ of <zipfile.ZipFile object at 0x7fe5fa2077f0>>
Traceback (most recent call last):
File "/usr/lib/python3.4/zipfile.py", line 1466, in __del__
self.close()
File "/usr/lib/python3.4/zipfile.py", line 1573, in close
self.fp.write(endrec)
TypeError: string argument expected, got 'bytes'
You probably need to close/save the xlsx file created by pandas before trying to read the data:
writer.save()
Also, with Pandas 0.17+ you can use a StringIO/BytesIO object as a filehandle to pd.ExcelWriter. For example:
import pandas as pd
import StringIO
output = StringIO.StringIO()
# Use the StringIO object as the filehandle.
writer = pd.ExcelWriter(output, engine='xlsxwriter')
# Write the data frame to the StringIO object.
pd.DataFrame().to_excel(writer, sheet_name='Sheet1')
writer.save()
xlsx_data = output.getvalue()
# Do something with the data...

Python Error when reading data from .xls file

I need to read a few xls files into Python.The sample data file can be found through Link:data.file. I tried:
import pandas as pd
pd.read_excel('data.xls',sheet=1)
But it gives an error message:
ERROR *** codepage 21010 -> encoding 'unknown_codepage_21010' ->
LookupError: unknown encoding: unknown_codepage_21010 Traceback (most
recent call last):
File "", line 1, in
pd.read_excel('data.xls',sheet=1)
File "C:\Anaconda3\lib\site-packages\pandas\io\excel.py", line 113,
in read_excel
return ExcelFile(io, engine=engine).parse(sheetname=sheetname, **kwds)
File "C:\Anaconda3\lib\site-packages\pandas\io\excel.py", line 150,
in init
self.book = xlrd.open_workbook(io)
File "C:\Anaconda3\lib\site-packages\xlrd__init__.py", line 435, in
open_workbook
ragged_rows=ragged_rows,
File "C:\Anaconda3\lib\site-packages\xlrd\book.py", line 116, in
open_workbook_xls
bk.parse_globals()
File "C:\Anaconda3\lib\site-packages\xlrd\book.py", line 1170, in
parse_globals
self.handle_codepage(data)
File "C:\Anaconda3\lib\site-packages\xlrd\book.py", line 794, in
handle_codepage
self.derive_encoding()
File "C:\Anaconda3\lib\site-packages\xlrd\book.py", line 775, in
derive_encoding
_unused = unicode(b'trial', self.encoding)
File "C:\Anaconda3\lib\site-packages\xlrd\timemachine.py", line 30,
in
unicode = lambda b, enc: b.decode(enc)
LookupError: unknown encoding: unknown_codepage_21010
Anyone could help with this problem?
PS: I know if I open the file in windows excel, and resave it, the code could work, but I am looking for a solution without manual adjustment.
using the ExcelFile class, I was successfully able to read the file into python.
let me know if this helps!
import xlrd
import pandas as pd
xls = pd.ExcelFile(’C:\data.xls’)
xls.parse(’Index Constituents Data’, index_col=None, na_values=[’NA’])
The below worked for me.
import xlrd
my_xls = xlrd.open_workbook('//myshareddrive/something/test.xls',encoding_override="gb2312")

Categories

Resources