Reading SQLite DB table into Python - python

I am having trouble reading data into Python from a sqlite database located in the same directory as the Jupyter notebook where I am doing my work.
The error message (below) led me to believe that the table Player_Attributes does not exist in the database, but after exploring it with DB Browser for SQLite, I see that is is indeed there.
Any guidance would be appreciated.
Code:
cnx = sqlite3.connect('database.sqlite')
df = pd.read_sql_query('SELECT * FROM Player_Attributes', cnx)
Error message:
--------------------------------------------------------------------------- OperationalError Traceback (most recent call last) /home/captain/anaconda3/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs) 1403 else:
-> 1404 cur.execute(*args) 1405 return cur
OperationalError: no such table: Player_Attributes
During handling of the above exception, another exception occurred:
DatabaseError Traceback (most recent call last) <ipython-input-4-c51fe6ea9537> in <module>()
5
6 cnx = sqlite3.connect('database.sqlite')
----> 7 df = pd.read_sql_query('SELECT * FROM Player_Attributes', cnx)
/home/captain/anaconda3/lib/python3.5/site-packages/pandas/io/sql.py in read_sql_query(sql, con, index_col, coerce_float, params, parse_dates, chunksize)
330 return pandas_sql.read_query(
331 sql, index_col=index_col, params=params, coerce_float=coerce_float,
--> 332 parse_dates=parse_dates, chunksize=chunksize)
333
334
/home/captain/anaconda3/lib/python3.5/site-packages/pandas/io/sql.py in read_query(self, sql, index_col, coerce_float, params, parse_dates, chunksize) 1437 1438 args = _convert_params(sql, params)
-> 1439 cursor = self.execute(*args) 1440 columns = [col_desc[0] for col_desc in cursor.description] 1441
/home/captain/anaconda3/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs) 1414 ex = DatabaseError( 1415 "Execution failed on sql '%s': %s" % (args[0], exc))
-> 1416 raise_with_traceback(ex) 1417 1418 #staticmethod
/home/captain/anaconda3/lib/python3.5/site-packages/pandas/compat/__init__.py in raise_with_traceback(exc, traceback)
342 if traceback == Ellipsis:
343 _, _, traceback = sys.exc_info()
--> 344 raise exc.with_traceback(traceback)
345 else:
346 # this version of raise is a syntax error in Python 3
/home/captain/anaconda3/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs) 1402 cur.execute(*args, **kwargs) 1403 else:
-> 1404 cur.execute(*args) 1405 return cur 1406 except Exception as exc:
DatabaseError: Execution failed on sql 'SELECT * FROM Player_Attributes': no such table: Player_Attributes

cnx = sqlite3.connect('database.sqlite')
In this line, instead of just specifying the name of the document, you have to provide the complete path of the file.

Related

How to minimize code when using sqlalchemy query and pandas dataframes

I am trying to upgrade my query code by modernizing it.
My old code (bellow). First query joins two tables and selects the rating for each song title together with artist for title and the second gets the genres for each title (association table is used):
items = []
query = db.session.query(Rating, Song).filter(Rating.id==Song.id).all()
for x in query:
dic = {
"rating": x[0],
"title": x[1].title,
"artist": x[1].artist,
"genre": Genre.query.filter(Genre.songs.any(title=x[1].title)).all(),
}
items.append(dic)
My cleaner code. I use pandas dataframes now instead of dictionaries. This gives me the error ArgumentError: SQL expression element or literal value expected, got somethingsomething
query = db.session.query(Rating, Song).filter(Rating.id==Song.id).all()
df = pd.DataFrame(query, columns=["rating", "title"])
for item in df.title:
df['genre'] = (Genre.query.filter(Genre.songs.any(title=item)).all())
How do I get this to work?
Are there more effiecient ways of coding this?
Complete error produced
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\engine\base.py in execute(self, statement, *multiparams, **params)
1194 try:
-> 1195 meth = statement._execute_on_connection
1196 except AttributeError as err:
AttributeError: 'BaseQuery' object has no attribute '_execute_on_connection'
The above exception was the direct cause of the following exception:
ObjectNotExecutableError Traceback (most recent call last)
<ipython-input-4-99ffacdf2d91> in <module>
1 query = db.session.query(Rating, Song).filter(Rating.id==Song.id)
----> 2 df = pd.read_sql_query(query, db.engine)
3 df
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\sql.py in read_sql_query(sql, con, index_col, coerce_float, params, parse_dates, chunksize)
381 coerce_float=coerce_float,
382 parse_dates=parse_dates,
--> 383 chunksize=chunksize,
384 )
385
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\sql.py in read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize)
1292 args = _convert_params(sql, params)
1293
-> 1294 result = self.execute(*args)
1295 columns = result.keys()
1296
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\sql.py in execute(self, *args, **kwargs)
1160 def execute(self, *args, **kwargs):
1161 """Simple passthrough to SQLAlchemy connectable"""
-> 1162 return self.connectable.execution_options().execute(*args, **kwargs)
1163
1164 def read_table(
<string> in execute(self, statement, *multiparams, **params)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\util\deprecations.py in warned(fn, *args, **kwargs)
388 if not skip_warning:
389 _warn_with_version(message, version, wtype, stacklevel=3)
--> 390 return fn(*args, **kwargs)
391
392 doc = func.__doc__ is not None and func.__doc__ or ""
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\engine\base.py in execute(self, statement, *multiparams, **params)
3036 """
3037 connection = self.connect(close_with_result=True)
-> 3038 return connection.execute(statement, *multiparams, **params)
3039
3040 #util.deprecated_20(
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\engine\base.py in execute(self, statement, *multiparams, **params)
1196 except AttributeError as err:
1197 util.raise_(
-> 1198 exc.ObjectNotExecutableError(statement), replace_context=err
1199 )
1200 else:
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\util\compat.py in raise_(***failed resolving arguments***)
209
210 try:
--> 211 raise exception
212 finally:
213 # credit to
ObjectNotExecutableError: Not an executable object: <flask_sqlalchemy.BaseQuery object at 0x000001CBC5F14A48>
First, create just one query to return all the data you need in one go, where the grouping of Genres is done uisng the GROUP_CONCAT function:
query = (
db.session
.query(
Rating.rating,
Song.title,
Song.artist,
db.func.GROUP_CONCAT(Genre.category, ", ").label("genres")
)
.select_from(Song)
.where(Rating.id == Song.id)
.join(Genre, Song.genres)
.group_by(
Rating.rating,
Song.title,
Song.artist,
)
)
Then use the pandas method to get it into a dataframe:
df = pd.read_sql_query(query.statement, db.engine)
Where print(df) should produce something like this:
rating title artist genres
0 2.0 title 2 art-2 pop
1 3.0 title 3 art-3 rock, pop
2 4.0 title 4 art-3 other
3 5.0 title 5 art-4 rock, pop, other

Python Multiprocessing Parallel Insert Into Oracle SQL

I'm currently trying to create a table and insert values in an Oracle SQL database.
I managed to make it work using df.to_sql(name=table_name, con=conn, if_exists='append', index=False) but it took 1h30m to upload a DataFrame of only 10000 rows * 5 columns.
This made me look into Multiprocessing, so I tried following the answer Siddhi Kiran Bajracharya gave in this thread
Which turned out like this:
import pandas as pd
from sqlalchemy import create_engine
import config
LOCATION = r"C:\Oracle\instantclient_19_6"
os.environ["PATH"] = LOCATION + ";" + os.environ["PATH"]
conn = create_engine('oracle+cx_oracle://' + config.user + ':' + config.pw +
'#' + config.host + ':' + config.port + '/?service_name=' + config.db +'?charset=latin-1')
import math
from multiprocessing.dummy import Pool as ThreadPool
def insert_df(df, *args, **kwargs):
nworkers = 4 # number of workers that executes insert in parallel fashion
chunk = math.floor(df.shape[0] / nworkers) # number of chunks
chunks = [(chunk * i, (chunk * i) + chunk) for i in range(nworkers)]
chunks.append((chunk * nworkers, df.shape[0]))
pool = ThreadPool(nworkers)
def worker(chunk):
i, j = chunk
df.iloc[i:j, :].to_sql(*args, **kwargs)
pool.map(worker, chunks)
pool.close()
pool.join()
insert_df(df, f'{table_name}', conn, if_exists='append', index=False)
The problem is that this last code runs for 20mins, only inserts 9 rows into the Table, and then raises the following error DatabaseError: (cx_Oracle.DatabaseError) ORA-00955: name is already used by an existing object
Full Traceback:
---------------------------------------------------------------------------
DatabaseError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1248 self.dialect.do_execute(
-> 1249 cursor, statement, parameters, context
1250 )
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
579 def do_execute(self, cursor, statement, parameters, context=None):
--> 580 cursor.execute(statement, parameters)
581
DatabaseError: ORA-00955: name is already used by an existing object
The above exception was the direct cause of the following exception:
DatabaseError Traceback (most recent call last)
<ipython-input-73-b50275447767> in <module>
20
21
---> 22 insert_df(df, f'{table_name}', conn, if_exists='append', index=False)
<ipython-input-73-b50275447767> in insert_df(df, *args, **kwargs)
14 df.iloc[i:j, :].to_sql(*args, **kwargs)
15
---> 16 pool.map(worker, chunks)
17 pool.close()
18 pool.join()
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in map(self, func, iterable, chunksize)
266 in a list that is returned.
267 '''
--> 268 return self._map_async(func, iterable, mapstar, chunksize).get()
269
270 def starmap(self, func, iterable, chunksize=None):
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in get(self, timeout)
655 return self._value
656 else:
--> 657 raise self._value
658
659 def _set(self, i, obj):
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
119 job, i, func, args, kwds = task
120 try:
--> 121 result = (True, func(*args, **kwds))
122 except Exception as e:
123 if wrap_exception and func is not _helper_reraises_exception:
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in mapstar(args)
42
43 def mapstar(args):
---> 44 return list(map(*args))
45
46 def starmapstar(args):
<ipython-input-73-b50275447767> in worker(chunk)
12 def worker(chunk):
13 i, j = chunk
---> 14 df.iloc[i:j, :].to_sql(*args, **kwargs)
15
16 pool.map(worker, chunks)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
2710 chunksize=chunksize,
2711 dtype=dtype,
-> 2712 method=method,
2713 )
2714
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
516 chunksize=chunksize,
517 dtype=dtype,
--> 518 method=method,
519 )
520
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method)
1317 dtype=dtype,
1318 )
-> 1319 table.create()
1320 table.insert(chunksize, method=method)
1321 if not name.isdigit() and not name.islower():
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in create(self)
654 )
655 else:
--> 656 self._execute_create()
657
658 def _execute_insert(self, conn, keys, data_iter):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in _execute_create(self)
636 # Inserting table into database, add to MetaData object
637 self.table = self.table.tometadata(self.pd_sql.meta)
--> 638 self.table.create()
639
640 def create(self):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\schema.py in create(self, bind, checkfirst)
868 if bind is None:
869 bind = _bind_or_error(self)
--> 870 bind._run_visitor(ddl.SchemaGenerator, self, checkfirst=checkfirst)
871
872 def drop(self, bind=None, checkfirst=False):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, connection, **kwargs)
2044 ):
2045 with self._optional_conn_ctx_manager(connection) as conn:
-> 2046 conn._run_visitor(visitorcallable, element, **kwargs)
2047
2048 class _trans_ctx(object):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, **kwargs)
1613
1614 def _run_visitor(self, visitorcallable, element, **kwargs):
-> 1615 visitorcallable(self.dialect, self, **kwargs).traverse_single(element)
1616
1617
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\visitors.py in traverse_single(self, obj, **kw)
136 meth = getattr(v, "visit_%s" % obj.__visit_name__, None)
137 if meth:
--> 138 return meth(obj, **kw)
139
140 def iterate(self, obj):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\ddl.py in visit_table(self, table, create_ok, include_foreign_key_constraints, _is_metadata_operation)
824 table,
825 include_foreign_key_constraints= # noqa
--> 826 include_foreign_key_constraints,
827 )
828 # fmt: on
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object_, *multiparams, **params)
986 raise exc.ObjectNotExecutableError(object_)
987 else:
--> 988 return meth(self, multiparams, params)
989
990 def _execute_function(self, func, multiparams, params):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\ddl.py in _execute_on_connection(self, connection, multiparams, params)
70
71 def _execute_on_connection(self, connection, multiparams, params):
---> 72 return connection._execute_ddl(self, multiparams, params)
73
74 def execute(self, bind=None, target=None):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_ddl(self, ddl, multiparams, params)
1048 compiled,
1049 None,
-> 1050 compiled,
1051 )
1052 if self._has_events or self.engine._has_events:
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1251 except BaseException as e:
1252 self._handle_dbapi_exception(
-> 1253 e, statement, parameters, cursor, context
1254 )
1255
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1471 util.raise_from_cause(newraise, exc_info)
1472 elif should_wrap:
-> 1473 util.raise_from_cause(sqlalchemy_exception, exc_info)
1474 else:
1475 util.reraise(*exc_info)
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in raise_from_cause(exception, exc_info)
396 exc_type, exc_value, exc_tb = exc_info
397 cause = exc_value if exc_value is not exception else None
--> 398 reraise(type(exception), exception, tb=exc_tb, cause=cause)
399
400
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
150 value.__cause__ = cause
151 if value.__traceback__ is not tb:
--> 152 raise value.with_traceback(tb)
153 raise value
154
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1247 if not evt_handled:
1248 self.dialect.do_execute(
-> 1249 cursor, statement, parameters, context
1250 )
1251 except BaseException as e:
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
578
579 def do_execute(self, cursor, statement, parameters, context=None):
--> 580 cursor.execute(statement, parameters)
581
582 def do_execute_no_params(self, cursor, statement, context=None):
DatabaseError: (cx_Oracle.DatabaseError) ORA-00955: name is already used by an existing object
[SQL:
CREATE TABLE "TEST_TABLE_DELETE" (
"id" CLOB,
"name" CLOB,
"var1" CLOB,
"var2" CLOB,
"var3" CLOB,
"var4" CLOB,
"var5" CLOB,
"var6" CLOB,
"var7" CLOB,
"var8" CLOB,
"var9" CLOB,
"var10" CLOB,
"var11" CLOB,
"var12" FLOAT,
"var13" CLOB,
"var14" CLOB
)
]
(Background on this error at: http://sqlalche.me/e/4xp6)
Any pointers to help me solve this issue would be greatly appreciated.
Thanks!
Luti
if you're using to_sql, with string columns in your dataframe, you better do something like this:
dtyp = {c:types.VARCHAR(data[c].str.len().max()) for c in data.columns[data.dtypes == 'object'].tolist()}
data.to_sql('table_name.....',con=...,if_exists='append'
, index=False
, dtype = dtyp)
For 10k rows, it should be very fast.

Python Sql code error - sqlite3.OperationalError: too many SQL variables

I am trying the below code but i am getting error
if not os.path.isfile('train.db'):
disk_engine = create_engine('sqlite:///train.db')
start = dt.datetime.now()
chunksize = 15000
j = 0
index_start = 1
for df in pd.read_csv('final_features.csv', names=['Unnamed: 0','id','is_duplicate','cwc_min','cwc_max','csc_min','csc_max','ctc_min','ctc_max','last_word_eq','first_word_eq','abs_len_diff','mean_len','token_set_ratio','token_sort_ratio','fuzz_ratio','fuzz_partial_ratio','longest_substr_ratio','freq_qid1','freq_qid2','q1len','q2len','q1_n_words','q2_n_words','word_Common','word_Total','word_share','freq_q1+q2','freq_q1-q2','0_x','1_x','2_x','3_x','4_x','5_x','6_x','7_x','8_x','9_x','10_x','11_x','12_x','13_x','14_x','15_x','16_x','17_x','18_x','19_x','20_x','21_x','22_x','23_x','24_x','25_x','26_x','27_x','28_x','29_x','30_x','31_x','32_x','33_x','34_x','35_x','36_x','37_x','38_x','39_x','40_x','41_x','42_x','43_x','44_x','45_x','46_x','47_x','48_x','49_x','50_x','51_x','52_x','53_x','54_x','55_x','56_x','57_x','58_x','59_x','60_x','61_x','62_x','63_x','64_x','65_x','66_x','67_x','68_x','69_x','70_x','71_x','72_x','73_x','74_x','75_x','76_x','77_x','78_x','79_x','80_x','81_x','82_x','83_x','84_x','85_x','86_x','87_x','88_x','89_x','90_x','91_x','92_x','93_x','94_x','95_x','0_y','1_y','2_y','3_y','4_y','5_y','6_y','7_y','8_y','9_y','10_y','11_y','12_y','13_y','14_y','15_y','16_y','17_y','18_y','19_y','20_y','21_y','22_y','23_y','24_y','25_y','26_y','27_y','28_y','29_y','30_y','31_y','32_y','33_y','34_y','35_y','36_y','37_y','38_y','39_y','40_y','41_y','42_y','43_y','44_y','45_y','46_y','47_y','48_y','49_y','50_y','51_y','52_y','53_y','54_y','55_y','56_y','57_y','58_y','59_y','60_y','61_y','62_y','63_y','64_y','65_y','66_y','67_y','68_y','69_y','70_y','71_y','72_y','73_y','74_y','75_y','76_y','77_y','78_y','79_y','80_y','81_y','82_y','83_y','84_y','85_y','86_y','87_y','88_y','89_y','90_y','91_y','92_y','93_y','94_y','95_y'], chunksize=chunksize, iterator=True, encoding='utf-8', ):
df.index += index_start
j+=1
print('{} rows'.format(j*chunksize))
df.to_sql('data', disk_engine, if_exists='append')
index_start = df.index[-1] + 1
This is the o/p that i am getting
15000 rows
---------------------------------------------------------------------------
OperationalError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1192 parameters,
-> 1193 context)
1194 except BaseException as e:
~\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
506 def do_execute(self, cursor, statement, parameters, context=None):
--> 507 cursor.execute(statement, parameters)
508
OperationalError: too many SQL variables
The above exception was the direct cause of the following exception:
OperationalError Traceback (most recent call last)
<ipython-input-83-b376654c990a> in <module>()
14 j+=1
15 print('{} rows'.format(j*chunksize))
---> 16 df.to_sql('data', disk_engine, if_exists='append')
17 index_start = df.index[-1] + 1
~\Anaconda3\lib\site-packages\pandas\core\generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype)
2125 ... df2.to_excel(writer, sheet_name='Sheet_name_2')
2126
-> 2127 ExcelWriter can also be used to append to an existing Excel file:
2128
2129 >>> with pd.ExcelWriter('output.xlsx',
~\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype)
448 index=True,
449 index_label=None,
--> 450 chunksize=None,
451 dtype=None,
452 method=None,
~\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
1147
1148 #staticmethod
-> 1149 def _query_iterator(
1150 result, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None
1151 ):
~\Anaconda3\lib\site-packages\pandas\io\sql.py in insert(self, chunksize)
661 ----------
662 conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection
--> 663 keys : list of str
664 Column names
665 data_iter : generator of list
~\Anaconda3\lib\site-packages\pandas\io\sql.py in _execute_insert(self, conn, keys, data_iter)
636 return str(CreateTable(self.table).compile(self.pd_sql.connectable))
637
--> 638 def _execute_create(self):
639 # Inserting table into database, add to MetaData object
640 self.table = self.table.tometadata(self.pd_sql.meta)
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object, *multiparams, **params)
946 raise exc.ObjectNotExecutableError(object)
947 else:
--> 948 return meth(self, multiparams, params)
949
950 def _execute_function(self, func, multiparams, params):
~\Anaconda3\lib\site-packages\sqlalchemy\sql\elements.py in _execute_on_connection(self, connection, multiparams, params)
267 def _execute_on_connection(self, connection, multiparams, params):
268 if self.supports_execution:
--> 269 return connection._execute_clauseelement(self, multiparams, params)
270 else:
271 raise exc.ObjectNotExecutableError(self)
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_clauseelement(self, elem, multiparams, params)
1058 compiled_sql,
1059 distilled_params,
-> 1060 compiled_sql, distilled_params
1061 )
1062 if self._has_events or self.engine._has_events:
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1198 parameters,
1199 cursor,
-> 1200 context)
1201
1202 if self._has_events or self.engine._has_events:
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1411 util.raise_from_cause(
1412 sqlalchemy_exception,
-> 1413 exc_info
1414 )
1415 else:
~\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in raise_from_cause(exception, exc_info)
201 exc_type, exc_value, exc_tb = exc_info
202 cause = exc_value if exc_value is not exception else None
--> 203 reraise(type(exception), exception, tb=exc_tb, cause=cause)
204
205 if py3k:
~\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
184 value.__cause__ = cause
185 if value.__traceback__ is not tb:
--> 186 raise value.with_traceback(tb)
187 raise value
188
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1191 statement,
1192 parameters,
-> 1193 context)
1194 except BaseException as e:
1195 self._handle_dbapi_exception(
~\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
505
506 def do_execute(self, cursor, statement, parameters, context=None):
--> 507 cursor.execute(statement, parameters)
508
509 def do_execute_no_params(self, cursor, statement, context=None):
I have already tried with various chunksize values but it won't worked. Can anyone please suggest me to fix this error. I am running this code in jupyter notebook. I already have updated versions of pandas and other libraries so there is no compatability issue.
This error is related to the number of parameters being passed to sqlite3. In essence, what's happening behind the scenes is that there's a SQL query being issued to the db engine: INSERT INTO myTable (col1, col2, col3,..., col_n) VALUES (?, ?, ?,..., ?), where the ? are the values from your dataframe being passed to the database.
This error occurred because your dataframe is very wide (has lots of columns), so during insertion, many parameters are being passed into the SQL statement. You can actually see in the error stack that justifies my explanation:
--> 507 cursor.execute(statement, parameters)
Simply, SQLite can only handle a limited number of parameters being passed. This is simply a limitation of SQLite. You can scroll down to #9 in this page for more info on this.
Setting chunksize will not resolve your problem. My suggestion is using another db like postgres or mysql.

"not all arguments converted during string formatting" when to_sql

I'm trying the following piece of code, which I found in a 2016 book:
import MySQLdb
import pandas as pd
# database setup omitted for the sake of brevity
nr_customers = 100
colnames = ["movie%i" %i for i in range(1, 33)]
pd.np.random.seed(2015)
generated_customers = pd.np.random.randint(0,2,32 * nr_customers).reshape(nr_customers,32)
data = pd.DataFrame(generated_customers, columns = list(colnames))
data.to_sql('cust',mc,index=True,if_exists='replace',index_label='cust_id')
And it's just giving me the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/MySQLdb/cursors.py in execute(self, query, args)
242 try:
--> 243 query = query % args
244 except TypeError as m:
TypeError: not all arguments converted during string formatting
During handling of the above exception, another exception occurred:
ProgrammingError Traceback (most recent call last)
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
1430 else:
-> 1431 cur.execute(*args)
1432 return cur
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/MySQLdb/cursors.py in execute(self, query, args)
244 except TypeError as m:
--> 245 self.errorhandler(self, ProgrammingError, str(m))
246
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/MySQLdb/connections.py in defaulterrorhandler(***failed resolving arguments***)
51 if errorclass is not None:
---> 52 raise errorclass(errorvalue)
53 else:
ProgrammingError: not all arguments converted during string formatting
During handling of the above exception, another exception occurred:
DatabaseError Traceback (most recent call last)
<ipython-input-24-125bb185f2f4> in <module>
4 generated_customers = pd.np.random.randint(0,2,32 * nr_customers).reshape(nr_customers,32)
5 data = pd.DataFrame(generated_customers, columns = list(colnames))
----> 6 data.to_sql('cust',mc,index=True,if_exists='replace',index_label='cust_id')
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/pandas/core/generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
2529 sql.to_sql(self, name, con, schema=schema, if_exists=if_exists,
2530 index=index, index_label=index_label, chunksize=chunksize,
-> 2531 dtype=dtype, method=method)
2532
2533 def to_pickle(self, path, compression='infer',
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/pandas/io/sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
458 pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
459 index_label=index_label, schema=schema,
--> 460 chunksize=chunksize, dtype=dtype, method=method)
461
462
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/pandas/io/sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method)
1544 if_exists=if_exists, index_label=index_label,
1545 dtype=dtype)
-> 1546 table.create()
1547 table.insert(chunksize, method)
1548
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/pandas/io/sql.py in create(self)
570
571 def create(self):
--> 572 if self.exists():
573 if self.if_exists == 'fail':
574 raise ValueError(
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/pandas/io/sql.py in exists(self)
558
559 def exists(self):
--> 560 return self.pd_sql.has_table(self.name, self.schema)
561
562 def sql_schema(self):
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/pandas/io/sql.py in has_table(self, name, schema)
1556 "WHERE type='table' AND name={wld};").format(wld=wld)
1557
-> 1558 return len(self.execute(query, [name, ]).fetchall()) > 0
1559
1560 def get_table(self, table_name, schema=None):
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
1443 "Execution failed on sql '{sql}': {exc}".format(
1444 sql=args[0], exc=exc))
-> 1445 raise_with_traceback(ex)
1446
1447 #staticmethod
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/pandas/compat/__init__.py in raise_with_traceback(exc, traceback)
418 if traceback == Ellipsis:
419 _, _, traceback = sys.exc_info()
--> 420 raise exc.with_traceback(traceback)
421 else:
422 # this version of raise is a syntax error in Python 3
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
1429 cur.execute(*args, **kwargs)
1430 else:
-> 1431 cur.execute(*args)
1432 return cur
1433 except Exception as exc:
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/MySQLdb/cursors.py in execute(self, query, args)
243 query = query % args
244 except TypeError as m:
--> 245 self.errorhandler(self, ProgrammingError, str(m))
246
247 if isinstance(query, unicode):
~/anaconda3/envs/TestEnv/lib/python3.7/site-packages/MySQLdb/connections.py in defaulterrorhandler(***failed resolving arguments***)
50 raise errorvalue
51 if errorclass is not None:
---> 52 raise errorclass(errorvalue)
53 else:
54 raise Exception(errorvalue)
DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting
Which I can resume in "DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting"
I've tried different approaches, like using f"${}" and so on, but the error is the same.
The code isn't completely the same as in the book, since I had to remove the flavor = 'mysql' argument used in to_sql.
I'm using:
mysql Ver 8.0.15 for osx10.13 on x86_64 (Homebrew)
Python 3.7.2
conda 4.6.7
pandas 0.24.2 py37h0a44026_0
mysql-connector-c 6.1.11 hccea1a4_0
mysqlclient 1.3.14 py37h1de35cc_0
Nevermind. Just changed to use sqlalchemy with pymysql and saved a lot of time and LOCs:
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://user:password#localhost/database')
...
data.to_sql(table, con = engine)

update pandas to postgres

I'm trying to upload a pandas dataframe to a PostgreSQL database but I'm encountering an error.
import pandas as pd
import psycopg2
import pandas.io.sql as sql
conn_string = "host='localhost' dbname='**' user='postgres' password='**' port=5432"
conn = psycopg2.connect(conn_string)
cursor = conn.cursor()
hh = pd.read_csv("C:/opus/data/mrcog/inputs/synthpop/synth_hhlds.csv")
hh.to_sql('buildings_updated', conn)
but when i try to upload the table, I'm getting an error that i dont understand what is telling.
DatabaseError Traceback (most recent call last)
<ipython-input-12-b1b2758437b2> in <module>()
16
17 hh = pd.read_csv("C:/opus/data/mrcog/inputs/synthpop/synth_hhlds.csv")
---> 18 hh.to_sql('buildings_updated', conn)
C:\Anaconda2\lib\site-packages\pandas\core\generic.pyc in to_sql(self, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
1343 sql.to_sql(self, name, con, flavor=flavor, schema=schema,
1344 if_exists=if_exists, index=index, index_label=index_label,
-> 1345 chunksize=chunksize, dtype=dtype)
1346
1347 def to_pickle(self, path, compression='infer'):
C:\Anaconda2\lib\site-packages\pandas\io\sql.pyc in to_sql(frame, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
469 pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
470 index_label=index_label, schema=schema,
--> 471 chunksize=chunksize, dtype=dtype)
472
473
C:\Anaconda2\lib\site-packages\pandas\io\sql.pyc in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
1503 if_exists=if_exists, index_label=index_label,
1504 dtype=dtype)
-> 1505 table.create()
1506 table.insert(chunksize)
1507
C:\Anaconda2\lib\site-packages\pandas\io\sql.pyc in create(self)
584
585 def create(self):
--> 586 if self.exists():
587 if self.if_exists == 'fail':
588 raise ValueError("Table '%s' already exists." % self.name)
C:\Anaconda2\lib\site-packages\pandas\io\sql.pyc in exists(self)
572
573 def exists(self):
--> 574 return self.pd_sql.has_table(self.name, self.schema)
575
576 def sql_schema(self):
C:\Anaconda2\lib\site-packages\pandas\io\sql.pyc in has_table(self, name, schema)
1515 "WHERE type='table' AND name=%s;") % wld
1516
-> 1517 return len(self.execute(query, [name, ]).fetchall()) > 0
1518
1519 def get_table(self, table_name, schema=None):
C:\Anaconda2\lib\site-packages\pandas\io\sql.pyc in execute(self, *args, **kwargs)
1414 ex = DatabaseError(
1415 "Execution failed on sql '%s': %s" % (args[0], exc))
-> 1416 raise_with_traceback(ex)
1417
1418 #staticmethod
C:\Anaconda2\lib\site-packages\pandas\io\sql.pyc in execute(self, *args, **kwargs)
1402 cur.execute(*args, **kwargs)
1403 else:
-> 1404 cur.execute(*args)
1405 return cur
1406 except Exception as exc:
DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': relation "sqlite_master" does not exist
LINE 1: SELECT name FROM sqlite_master WHERE type='table' AND name=?...
How is it possible to fix this error? It should be a straight forward to upload the dataframe. I'm using pandas version 4.3.34.
the docs says:
flavor : ‘sqlite’, default None
Deprecated since version 0.19.0: ‘sqlite’ is the only supported option
if SQLAlchemy is not used.
http://pandas.pydata.org/pandas-docs/version/0.22/generated/pandas.DataFrame.to_sql.html
So it seems like you should be using SQLAlchemy for this, unless your error is about the table existing already, but it seems more likely it is because you are not using SQLLite or SQLAlchemy to proxy to a real DB like Postgres.
another part of the doc:
if_exists : {‘fail’, ‘replace’, ‘append’}, default ‘fail’
fail: If table exists, do nothing. replace: If table exists, drop it,
recreate it, and insert data. append: If table exists, insert data.
Create if does not exist.

Categories

Resources