Related
I have a MySQL query that runs just fine in Workbench but I can't get to work in Python using sqlalchemy. The statement ran fine until I added AND _deleted_at IS NULL. I have figured out that the column name is the problem and assume that the underscores are the issue. The "GroupId" column and the "_deleted_at" column are in the same table "contact_group_assign".
The full query string that works is
sql ='SELECT *, CASE WHEN Purchaser20 <> "0" THEN SUBSTR(Purchaser20,1,POSITION(";" IN Purchaser20)-1)\
ELSE 0 END AS PurchaserId \
FROM (SELECT sales.ItemName, sales.Qty, sales.ProductId, contact.Id AS ContactId, concat(contact.FirstName, " ", contact.LastName) as NAME, \
CASE WHEN contact._data LIKE "%%PurchaserID%%" THEN SUBSTR(contact._data,POSITION("PurchaserID" IN contact._data)+15,20)\
ELSE 0 END as Purchaser20 \
FROM contact_group_assign registry\
LEFT JOIN (select order_item.ItemName, order_item.Qty, order_item.ProductId, invoice.ContactId \
FROM order_item\
LEFT JOIN invoice_item ON order_item.Id=invoice_item.OrderItemId\
LEFT JOIN invoice ON invoice_item.InvoiceId = invoice.Id\
LEFT JOIN payment ON invoice_item.InvoiceId = payment.InvoiceId\
LEFT JOIN invoice_payment ON invoice_item.InvoiceId = invoice_payment.InvoiceId\
WHERE ProductId IN (3682,3684,3686,3688,3690,3692,3694,3696,3698,3700,3702,3704,3706,3708)\
) sales\
ON registry.ContactId = sales.ContactId\
JOIN contact ON registry.ContactId=contact.Id\
WHERE GroupId = 22492\
) a'
The line that breaks is the Where clause
WHERE GroupId = 22492 AND _deleted_at IS NULL\
The error I receive is:
---------------------------------------------------------------------------
OperationalError Traceback (most recent call last)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:1808, in Connection._execute_context(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)
1807 if not evt_handled:
-> 1808 self.dialect.do_execute(
1809 cursor, statement, parameters, context
1810 )
1812 if self._has_events or self.engine._has_events:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\default.py:732, in DefaultDialect.do_execute(self, cursor, statement, parameters, context)
731 def do_execute(self, cursor, statement, parameters, context=None):
--> 732 cursor.execute(statement, parameters)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\cursors.py:148, in Cursor.execute(self, query, args)
146 query = self.mogrify(query, args)
--> 148 result = self._query(query)
149 self._executed = query
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\cursors.py:310, in Cursor._query(self, q)
309 self._clear_result()
--> 310 conn.query(q)
311 self._do_get_result()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\connections.py:548, in Connection.query(self, sql, unbuffered)
547 self._execute_command(COMMAND.COM_QUERY, sql)
--> 548 self._affected_rows = self._read_query_result(unbuffered=unbuffered)
549 return self._affected_rows
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\connections.py:775, in Connection._read_query_result(self, unbuffered)
774 result = MySQLResult(self)
--> 775 result.read()
776 self._result = result
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\connections.py:1156, in MySQLResult.read(self)
1155 try:
-> 1156 first_packet = self.connection._read_packet()
1158 if first_packet.is_ok_packet():
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\connections.py:725, in Connection._read_packet(self, packet_type)
724 self._result.unbuffered_active = False
--> 725 packet.raise_for_error()
726 return packet
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\protocol.py:221, in MysqlPacket.raise_for_error(self)
220 print("errno =", errno)
--> 221 err.raise_mysql_exception(self._data)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\err.py:143, in raise_mysql_exception(data)
142 errorclass = InternalError if errno < 1000 else OperationalError
--> 143 raise errorclass(errno, errval)
OperationalError: (1052, "Column '_deleted_at' in where clause is ambiguous")
The above exception was the direct cause of the following exception:
OperationalError Traceback (most recent call last)
c:\Users\Chris Hill\OneDrive - Brave Thinking Institute\dblv_05_22\final\create list with guest designation.ipynb Cell 2' in <cell line: 1>()
----> 1 registry = pd.read_sql(sql, cnx)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:592, in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)
583 return pandas_sql.read_table(
584 sql,
585 index_col=index_col,
(...)
589 chunksize=chunksize,
590 )
591 else:
--> 592 return pandas_sql.read_query(
593 sql,
594 index_col=index_col,
595 params=params,
596 coerce_float=coerce_float,
597 parse_dates=parse_dates,
598 chunksize=chunksize,
599 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:1557, in SQLDatabase.read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize, dtype)
1509 """
1510 Read SQL query into a DataFrame.
1511
(...)
1553
1554 """
1555 args = _convert_params(sql, params)
-> 1557 result = self.execute(*args)
1558 columns = result.keys()
1560 if chunksize is not None:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:1402, in SQLDatabase.execute(self, *args, **kwargs)
1400 def execute(self, *args, **kwargs):
1401 """Simple passthrough to SQLAlchemy connectable"""
-> 1402 return self.connectable.execution_options().execute(*args, **kwargs)
File <string>:2, in execute(self, statement, *multiparams, **params)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\deprecations.py:401, in _decorate_with_warning.<locals>.warned(fn, *args, **kwargs)
399 if not skip_warning:
400 _warn_with_version(message, version, wtype, stacklevel=3)
--> 401 return fn(*args, **kwargs)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3152, in Engine.execute(self, statement, *multiparams, **params)
3134 """Executes the given construct and returns a
3135 :class:`_engine.CursorResult`.
3136
(...)
3149
3150 """
3151 connection = self.connect(close_with_result=True)
-> 3152 return connection.execute(statement, *multiparams, **params)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:1280, in Connection.execute(self, statement, *multiparams, **params)
1271 if isinstance(statement, util.string_types):
1272 util.warn_deprecated_20(
1273 "Passing a string to Connection.execute() is "
1274 "deprecated and will be removed in version 2.0. Use the "
(...)
1277 "driver-level SQL string."
1278 )
-> 1280 return self._exec_driver_sql(
1281 statement,
1282 multiparams,
1283 params,
1284 _EMPTY_EXECUTION_OPTS,
1285 future=False,
1286 )
1288 try:
1289 meth = statement._execute_on_connection
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:1584, in Connection._exec_driver_sql(self, statement, multiparams, params, execution_options, future)
1574 (
1575 statement,
1576 distilled_params,
(...)
1580 statement, distilled_parameters, execution_options
1581 )
1583 dialect = self.dialect
-> 1584 ret = self._execute_context(
1585 dialect,
1586 dialect.execution_ctx_cls._init_statement,
1587 statement,
1588 distilled_parameters,
1589 execution_options,
1590 statement,
1591 distilled_parameters,
1592 )
1594 if not future:
1595 if self._has_events or self.engine._has_events:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:1851, in Connection._execute_context(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)
1848 branched.close()
1850 except BaseException as e:
-> 1851 self._handle_dbapi_exception(
1852 e, statement, parameters, cursor, context
1853 )
1855 return result
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:2032, in Connection._handle_dbapi_exception(self, e, statement, parameters, cursor, context)
2030 util.raise_(newraise, with_traceback=exc_info[2], from_=e)
2031 elif should_wrap:
-> 2032 util.raise_(
2033 sqlalchemy_exception, with_traceback=exc_info[2], from_=e
2034 )
2035 else:
2036 util.raise_(exc_info[1], with_traceback=exc_info[2])
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
204 exception.__cause__ = replace_context
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
212 del exception, replace_context, from_, with_traceback
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:1808, in Connection._execute_context(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)
1806 break
1807 if not evt_handled:
-> 1808 self.dialect.do_execute(
1809 cursor, statement, parameters, context
1810 )
1812 if self._has_events or self.engine._has_events:
1813 self.dispatch.after_cursor_execute(
1814 self,
1815 cursor,
(...)
1819 context.executemany,
1820 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\default.py:732, in DefaultDialect.do_execute(self, cursor, statement, parameters, context)
731 def do_execute(self, cursor, statement, parameters, context=None):
--> 732 cursor.execute(statement, parameters)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\cursors.py:148, in Cursor.execute(self, query, args)
144 pass
146 query = self.mogrify(query, args)
--> 148 result = self._query(query)
149 self._executed = query
150 return result
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\cursors.py:310, in Cursor._query(self, q)
308 self._last_executed = q
309 self._clear_result()
--> 310 conn.query(q)
311 self._do_get_result()
312 return self.rowcount
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\connections.py:548, in Connection.query(self, sql, unbuffered)
546 sql = sql.encode(self.encoding, "surrogateescape")
547 self._execute_command(COMMAND.COM_QUERY, sql)
--> 548 self._affected_rows = self._read_query_result(unbuffered=unbuffered)
549 return self._affected_rows
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\connections.py:775, in Connection._read_query_result(self, unbuffered)
773 else:
774 result = MySQLResult(self)
--> 775 result.read()
776 self._result = result
777 if result.server_status is not None:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\connections.py:1156, in MySQLResult.read(self)
1154 def read(self):
1155 try:
-> 1156 first_packet = self.connection._read_packet()
1158 if first_packet.is_ok_packet():
1159 self._read_ok_packet(first_packet)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\connections.py:725, in Connection._read_packet(self, packet_type)
723 if self._result is not None and self._result.unbuffered_active is True:
724 self._result.unbuffered_active = False
--> 725 packet.raise_for_error()
726 return packet
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\protocol.py:221, in MysqlPacket.raise_for_error(self)
219 if DEBUG:
220 print("errno =", errno)
--> 221 err.raise_mysql_exception(self._data)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymysql\err.py:143, in raise_mysql_exception(data)
141 if errorclass is None:
142 errorclass = InternalError if errno < 1000 else OperationalError
--> 143 raise errorclass(errno, errval)
OperationalError: (pymysql.err.OperationalError) (1052, "Column '_deleted_at' in where clause is ambiguous")
This was resolved by referencing the alias for _deleted_at. The line
WHERE GroupId = 22492\
was changed to
WHERE GroupId = 22492 AND registry._deleted_at IS NULL\
More than one table in the query had the same column "_deleted_at"
I'm currently trying to create a table and insert values in an Oracle SQL database.
I managed to make it work using df.to_sql(name=table_name, con=conn, if_exists='append', index=False) but it took 1h30m to upload a DataFrame of only 10000 rows * 5 columns.
This made me look into Multiprocessing, so I tried following the answer Siddhi Kiran Bajracharya gave in this thread
Which turned out like this:
import pandas as pd
from sqlalchemy import create_engine
import config
LOCATION = r"C:\Oracle\instantclient_19_6"
os.environ["PATH"] = LOCATION + ";" + os.environ["PATH"]
conn = create_engine('oracle+cx_oracle://' + config.user + ':' + config.pw +
'#' + config.host + ':' + config.port + '/?service_name=' + config.db +'?charset=latin-1')
import math
from multiprocessing.dummy import Pool as ThreadPool
def insert_df(df, *args, **kwargs):
nworkers = 4 # number of workers that executes insert in parallel fashion
chunk = math.floor(df.shape[0] / nworkers) # number of chunks
chunks = [(chunk * i, (chunk * i) + chunk) for i in range(nworkers)]
chunks.append((chunk * nworkers, df.shape[0]))
pool = ThreadPool(nworkers)
def worker(chunk):
i, j = chunk
df.iloc[i:j, :].to_sql(*args, **kwargs)
pool.map(worker, chunks)
pool.close()
pool.join()
insert_df(df, f'{table_name}', conn, if_exists='append', index=False)
The problem is that this last code runs for 20mins, only inserts 9 rows into the Table, and then raises the following error DatabaseError: (cx_Oracle.DatabaseError) ORA-00955: name is already used by an existing object
Full Traceback:
---------------------------------------------------------------------------
DatabaseError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1248 self.dialect.do_execute(
-> 1249 cursor, statement, parameters, context
1250 )
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
579 def do_execute(self, cursor, statement, parameters, context=None):
--> 580 cursor.execute(statement, parameters)
581
DatabaseError: ORA-00955: name is already used by an existing object
The above exception was the direct cause of the following exception:
DatabaseError Traceback (most recent call last)
<ipython-input-73-b50275447767> in <module>
20
21
---> 22 insert_df(df, f'{table_name}', conn, if_exists='append', index=False)
<ipython-input-73-b50275447767> in insert_df(df, *args, **kwargs)
14 df.iloc[i:j, :].to_sql(*args, **kwargs)
15
---> 16 pool.map(worker, chunks)
17 pool.close()
18 pool.join()
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in map(self, func, iterable, chunksize)
266 in a list that is returned.
267 '''
--> 268 return self._map_async(func, iterable, mapstar, chunksize).get()
269
270 def starmap(self, func, iterable, chunksize=None):
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in get(self, timeout)
655 return self._value
656 else:
--> 657 raise self._value
658
659 def _set(self, i, obj):
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
119 job, i, func, args, kwds = task
120 try:
--> 121 result = (True, func(*args, **kwds))
122 except Exception as e:
123 if wrap_exception and func is not _helper_reraises_exception:
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in mapstar(args)
42
43 def mapstar(args):
---> 44 return list(map(*args))
45
46 def starmapstar(args):
<ipython-input-73-b50275447767> in worker(chunk)
12 def worker(chunk):
13 i, j = chunk
---> 14 df.iloc[i:j, :].to_sql(*args, **kwargs)
15
16 pool.map(worker, chunks)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
2710 chunksize=chunksize,
2711 dtype=dtype,
-> 2712 method=method,
2713 )
2714
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
516 chunksize=chunksize,
517 dtype=dtype,
--> 518 method=method,
519 )
520
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method)
1317 dtype=dtype,
1318 )
-> 1319 table.create()
1320 table.insert(chunksize, method=method)
1321 if not name.isdigit() and not name.islower():
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in create(self)
654 )
655 else:
--> 656 self._execute_create()
657
658 def _execute_insert(self, conn, keys, data_iter):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in _execute_create(self)
636 # Inserting table into database, add to MetaData object
637 self.table = self.table.tometadata(self.pd_sql.meta)
--> 638 self.table.create()
639
640 def create(self):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\schema.py in create(self, bind, checkfirst)
868 if bind is None:
869 bind = _bind_or_error(self)
--> 870 bind._run_visitor(ddl.SchemaGenerator, self, checkfirst=checkfirst)
871
872 def drop(self, bind=None, checkfirst=False):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, connection, **kwargs)
2044 ):
2045 with self._optional_conn_ctx_manager(connection) as conn:
-> 2046 conn._run_visitor(visitorcallable, element, **kwargs)
2047
2048 class _trans_ctx(object):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, **kwargs)
1613
1614 def _run_visitor(self, visitorcallable, element, **kwargs):
-> 1615 visitorcallable(self.dialect, self, **kwargs).traverse_single(element)
1616
1617
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\visitors.py in traverse_single(self, obj, **kw)
136 meth = getattr(v, "visit_%s" % obj.__visit_name__, None)
137 if meth:
--> 138 return meth(obj, **kw)
139
140 def iterate(self, obj):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\ddl.py in visit_table(self, table, create_ok, include_foreign_key_constraints, _is_metadata_operation)
824 table,
825 include_foreign_key_constraints= # noqa
--> 826 include_foreign_key_constraints,
827 )
828 # fmt: on
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object_, *multiparams, **params)
986 raise exc.ObjectNotExecutableError(object_)
987 else:
--> 988 return meth(self, multiparams, params)
989
990 def _execute_function(self, func, multiparams, params):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\ddl.py in _execute_on_connection(self, connection, multiparams, params)
70
71 def _execute_on_connection(self, connection, multiparams, params):
---> 72 return connection._execute_ddl(self, multiparams, params)
73
74 def execute(self, bind=None, target=None):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_ddl(self, ddl, multiparams, params)
1048 compiled,
1049 None,
-> 1050 compiled,
1051 )
1052 if self._has_events or self.engine._has_events:
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1251 except BaseException as e:
1252 self._handle_dbapi_exception(
-> 1253 e, statement, parameters, cursor, context
1254 )
1255
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1471 util.raise_from_cause(newraise, exc_info)
1472 elif should_wrap:
-> 1473 util.raise_from_cause(sqlalchemy_exception, exc_info)
1474 else:
1475 util.reraise(*exc_info)
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in raise_from_cause(exception, exc_info)
396 exc_type, exc_value, exc_tb = exc_info
397 cause = exc_value if exc_value is not exception else None
--> 398 reraise(type(exception), exception, tb=exc_tb, cause=cause)
399
400
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
150 value.__cause__ = cause
151 if value.__traceback__ is not tb:
--> 152 raise value.with_traceback(tb)
153 raise value
154
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1247 if not evt_handled:
1248 self.dialect.do_execute(
-> 1249 cursor, statement, parameters, context
1250 )
1251 except BaseException as e:
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
578
579 def do_execute(self, cursor, statement, parameters, context=None):
--> 580 cursor.execute(statement, parameters)
581
582 def do_execute_no_params(self, cursor, statement, context=None):
DatabaseError: (cx_Oracle.DatabaseError) ORA-00955: name is already used by an existing object
[SQL:
CREATE TABLE "TEST_TABLE_DELETE" (
"id" CLOB,
"name" CLOB,
"var1" CLOB,
"var2" CLOB,
"var3" CLOB,
"var4" CLOB,
"var5" CLOB,
"var6" CLOB,
"var7" CLOB,
"var8" CLOB,
"var9" CLOB,
"var10" CLOB,
"var11" CLOB,
"var12" FLOAT,
"var13" CLOB,
"var14" CLOB
)
]
(Background on this error at: http://sqlalche.me/e/4xp6)
Any pointers to help me solve this issue would be greatly appreciated.
Thanks!
Luti
if you're using to_sql, with string columns in your dataframe, you better do something like this:
dtyp = {c:types.VARCHAR(data[c].str.len().max()) for c in data.columns[data.dtypes == 'object'].tolist()}
data.to_sql('table_name.....',con=...,if_exists='append'
, index=False
, dtype = dtyp)
For 10k rows, it should be very fast.
I am trying the below code but i am getting error
if not os.path.isfile('train.db'):
disk_engine = create_engine('sqlite:///train.db')
start = dt.datetime.now()
chunksize = 15000
j = 0
index_start = 1
for df in pd.read_csv('final_features.csv', names=['Unnamed: 0','id','is_duplicate','cwc_min','cwc_max','csc_min','csc_max','ctc_min','ctc_max','last_word_eq','first_word_eq','abs_len_diff','mean_len','token_set_ratio','token_sort_ratio','fuzz_ratio','fuzz_partial_ratio','longest_substr_ratio','freq_qid1','freq_qid2','q1len','q2len','q1_n_words','q2_n_words','word_Common','word_Total','word_share','freq_q1+q2','freq_q1-q2','0_x','1_x','2_x','3_x','4_x','5_x','6_x','7_x','8_x','9_x','10_x','11_x','12_x','13_x','14_x','15_x','16_x','17_x','18_x','19_x','20_x','21_x','22_x','23_x','24_x','25_x','26_x','27_x','28_x','29_x','30_x','31_x','32_x','33_x','34_x','35_x','36_x','37_x','38_x','39_x','40_x','41_x','42_x','43_x','44_x','45_x','46_x','47_x','48_x','49_x','50_x','51_x','52_x','53_x','54_x','55_x','56_x','57_x','58_x','59_x','60_x','61_x','62_x','63_x','64_x','65_x','66_x','67_x','68_x','69_x','70_x','71_x','72_x','73_x','74_x','75_x','76_x','77_x','78_x','79_x','80_x','81_x','82_x','83_x','84_x','85_x','86_x','87_x','88_x','89_x','90_x','91_x','92_x','93_x','94_x','95_x','0_y','1_y','2_y','3_y','4_y','5_y','6_y','7_y','8_y','9_y','10_y','11_y','12_y','13_y','14_y','15_y','16_y','17_y','18_y','19_y','20_y','21_y','22_y','23_y','24_y','25_y','26_y','27_y','28_y','29_y','30_y','31_y','32_y','33_y','34_y','35_y','36_y','37_y','38_y','39_y','40_y','41_y','42_y','43_y','44_y','45_y','46_y','47_y','48_y','49_y','50_y','51_y','52_y','53_y','54_y','55_y','56_y','57_y','58_y','59_y','60_y','61_y','62_y','63_y','64_y','65_y','66_y','67_y','68_y','69_y','70_y','71_y','72_y','73_y','74_y','75_y','76_y','77_y','78_y','79_y','80_y','81_y','82_y','83_y','84_y','85_y','86_y','87_y','88_y','89_y','90_y','91_y','92_y','93_y','94_y','95_y'], chunksize=chunksize, iterator=True, encoding='utf-8', ):
df.index += index_start
j+=1
print('{} rows'.format(j*chunksize))
df.to_sql('data', disk_engine, if_exists='append')
index_start = df.index[-1] + 1
This is the o/p that i am getting
15000 rows
---------------------------------------------------------------------------
OperationalError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1192 parameters,
-> 1193 context)
1194 except BaseException as e:
~\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
506 def do_execute(self, cursor, statement, parameters, context=None):
--> 507 cursor.execute(statement, parameters)
508
OperationalError: too many SQL variables
The above exception was the direct cause of the following exception:
OperationalError Traceback (most recent call last)
<ipython-input-83-b376654c990a> in <module>()
14 j+=1
15 print('{} rows'.format(j*chunksize))
---> 16 df.to_sql('data', disk_engine, if_exists='append')
17 index_start = df.index[-1] + 1
~\Anaconda3\lib\site-packages\pandas\core\generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype)
2125 ... df2.to_excel(writer, sheet_name='Sheet_name_2')
2126
-> 2127 ExcelWriter can also be used to append to an existing Excel file:
2128
2129 >>> with pd.ExcelWriter('output.xlsx',
~\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype)
448 index=True,
449 index_label=None,
--> 450 chunksize=None,
451 dtype=None,
452 method=None,
~\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
1147
1148 #staticmethod
-> 1149 def _query_iterator(
1150 result, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None
1151 ):
~\Anaconda3\lib\site-packages\pandas\io\sql.py in insert(self, chunksize)
661 ----------
662 conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection
--> 663 keys : list of str
664 Column names
665 data_iter : generator of list
~\Anaconda3\lib\site-packages\pandas\io\sql.py in _execute_insert(self, conn, keys, data_iter)
636 return str(CreateTable(self.table).compile(self.pd_sql.connectable))
637
--> 638 def _execute_create(self):
639 # Inserting table into database, add to MetaData object
640 self.table = self.table.tometadata(self.pd_sql.meta)
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object, *multiparams, **params)
946 raise exc.ObjectNotExecutableError(object)
947 else:
--> 948 return meth(self, multiparams, params)
949
950 def _execute_function(self, func, multiparams, params):
~\Anaconda3\lib\site-packages\sqlalchemy\sql\elements.py in _execute_on_connection(self, connection, multiparams, params)
267 def _execute_on_connection(self, connection, multiparams, params):
268 if self.supports_execution:
--> 269 return connection._execute_clauseelement(self, multiparams, params)
270 else:
271 raise exc.ObjectNotExecutableError(self)
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_clauseelement(self, elem, multiparams, params)
1058 compiled_sql,
1059 distilled_params,
-> 1060 compiled_sql, distilled_params
1061 )
1062 if self._has_events or self.engine._has_events:
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1198 parameters,
1199 cursor,
-> 1200 context)
1201
1202 if self._has_events or self.engine._has_events:
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1411 util.raise_from_cause(
1412 sqlalchemy_exception,
-> 1413 exc_info
1414 )
1415 else:
~\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in raise_from_cause(exception, exc_info)
201 exc_type, exc_value, exc_tb = exc_info
202 cause = exc_value if exc_value is not exception else None
--> 203 reraise(type(exception), exception, tb=exc_tb, cause=cause)
204
205 if py3k:
~\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
184 value.__cause__ = cause
185 if value.__traceback__ is not tb:
--> 186 raise value.with_traceback(tb)
187 raise value
188
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1191 statement,
1192 parameters,
-> 1193 context)
1194 except BaseException as e:
1195 self._handle_dbapi_exception(
~\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
505
506 def do_execute(self, cursor, statement, parameters, context=None):
--> 507 cursor.execute(statement, parameters)
508
509 def do_execute_no_params(self, cursor, statement, context=None):
I have already tried with various chunksize values but it won't worked. Can anyone please suggest me to fix this error. I am running this code in jupyter notebook. I already have updated versions of pandas and other libraries so there is no compatability issue.
This error is related to the number of parameters being passed to sqlite3. In essence, what's happening behind the scenes is that there's a SQL query being issued to the db engine: INSERT INTO myTable (col1, col2, col3,..., col_n) VALUES (?, ?, ?,..., ?), where the ? are the values from your dataframe being passed to the database.
This error occurred because your dataframe is very wide (has lots of columns), so during insertion, many parameters are being passed into the SQL statement. You can actually see in the error stack that justifies my explanation:
--> 507 cursor.execute(statement, parameters)
Simply, SQLite can only handle a limited number of parameters being passed. This is simply a limitation of SQLite. You can scroll down to #9 in this page for more info on this.
Setting chunksize will not resolve your problem. My suggestion is using another db like postgres or mysql.
I'm processing data from an Excel spreadsheet and uploading it to Oracle. Pandas fails on the command to_sql with a certain edge case. I have a particular column COMMENTS which usually contains strings, however in one row a user typed a number (i.e. 500).
I am forcing it to read as a number using the dtype argument so I would expect the number to be uploaded to the Oracle table as a VARCHAR:
dataWrite.to_sql(name = 'FB_DATA_HOURLY', schema = 'schemaName', index = False, con = conWrite, if_exists='append', dtype={'COMMENTS': VARCHAR(length=200)})
However there is still a TypeError as it tries to upload this number to the VARCHAR column.
TypeError Traceback (most recent call last)
<ipython-input-99-1864db124148> in readExcel_05142019(version, filepath)
72 conWrite = oracle_db.connect()
---> 73 dataWrite.to_sql(name = 'FB_DATA_HOURLY', schema = 'schemaName', index = False, con = conWrite, if_exists='append', dtype={'COMMENTS': VARCHAR(length=200)})
74
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
2710 chunksize=chunksize,
2711 dtype=dtype,
-> 2712 method=method,
2713 )
2714
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
516 chunksize=chunksize,
517 dtype=dtype,
--> 518 method=method,
519 )
520
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method)
1318 )
1319 table.create()
-> 1320 table.insert(chunksize, method=method)
1321 if not name.isdigit() and not name.islower():
1322 # check for potentially case sensitivity issues (GH7815)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py in insert(self, chunksize, method)
754
755 chunk_iter = zip(*[arr[start_i:end_i] for arr in data_list])
--> 756 exec_insert(conn, keys, chunk_iter)
757
758 def _query_iterator(
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py in _execute_insert(self, conn, keys, data_iter)
668 """
669 data = [dict(zip(keys, row)) for row in data_iter]
--> 670 conn.execute(self.table.insert(), data)
671
672 def _execute_insert_multi(self, conn, keys, data_iter):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object_, *multiparams, **params)
980 raise exc.ObjectNotExecutableError(object_)
981 else:
--> 982 return meth(self, multiparams, params)
983
984 def _execute_function(self, func, multiparams, params):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\sql\elements.py in _execute_on_connection(self, connection, multiparams, params)
285 def _execute_on_connection(self, connection, multiparams, params):
286 if self.supports_execution:
--> 287 return connection._execute_clauseelement(self, multiparams, params)
288 else:
289 raise exc.ObjectNotExecutableError(self)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_clauseelement(self, elem, multiparams, params)
1099 distilled_params,
1100 compiled_sql,
-> 1101 distilled_params,
1102 )
1103 if self._has_events or self.engine._has_events:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1248 except BaseException as e:
1249 self._handle_dbapi_exception(
-> 1250 e, statement, parameters, cursor, context
1251 )
1252
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1476 util.raise_from_cause(sqlalchemy_exception, exc_info)
1477 else:
-> 1478 util.reraise(*exc_info)
1479
1480 finally:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
151 if value.__traceback__ is not tb:
152 raise value.with_traceback(tb)
--> 153 raise value
154
155 def u(s):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1224 if not evt_handled:
1225 self.dialect.do_executemany(
-> 1226 cursor, statement, parameters, context
1227 )
1228 elif not parameters and context.no_parameters:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\dialects\oracle\cx_oracle.py in do_executemany(self, cursor, statement, parameters, context)
1126 if isinstance(parameters, tuple):
1127 parameters = list(parameters)
-> 1128 cursor.executemany(statement, parameters)
1129
1130 def do_begin_twophase(self, connection, xid):
TypeError: expecting string or bytes object
I was able to resolve this problem by converting the column type from Object to String, while making sure to leave null values as nulls instead of a string "nan".
# convert any string values that are purely numeric to be strings, while preserving nulls
dataWrite['COMMENTS'] = dataWrite['COMMENTS'].where(dataWrite['COMMENTS'].isnull(), dataWrite['COMMENTS'].astype(str))
I am using SQLAchemy and python to dynamically run SQL query. But it is giving the error.
This is my command to run the query:
data = engine.execute(m_query, week=Cohort_week, metric=metric, p1=val1, p2=val2).fetchall()
here Cohort_week, val1,val2 are integers and metric is a string.
Here is my SQL query:
select cus.week,pdp_views, id, :metric,
case
when :metric <= :p2 then 3
when :metric > :p2 and :metric < :p1 then 2
when :metric >= :p1 then 1
end as HML
from
dev.master_abtest_customers cus
where cus.week= :week
The error is:
DataError Traceback (most recent call last)
<ipython-input-25-30f7e92cbea3> in <module>()
12 m_query = text(m_query)
13
---> 14 data = engine.execute(m_query, week=week, metric=metric, p1=val1, p2=val2).fetchall()
15
16 data = pd.DataFrame(data)
C:\Users\MI0185\AppData\Local\Enthought\Canopy\User\lib\site-packages\sqlalchemy-1.1.0b1-py2.7-win-amd64.egg\sqlalchemy\engine\base.pyc in execute(self, statement, *multiparams, **params)
2050
2051 connection = self.contextual_connect(close_with_result=True)
-> 2052 return connection.execute(statement, *multiparams, **params)
2053
2054 def scalar(self, statement, *multiparams, **params):
C:\Users\MI0185\AppData\Local\Enthought\Canopy\User\lib\site-packages\sqlalchemy-1.1.0b1-py2.7-win-amd64.egg\sqlalchemy\engine\base.pyc in execute(self, object, *multiparams, **params)
945 type(object))
946 else:
--> 947 return meth(self, multiparams, params)
948
949 def _execute_function(self, func, multiparams, params):
C:\Users\MI0185\AppData\Local\Enthought\Canopy\User\lib\site-packages\sqlalchemy-1.1.0b1-py2.7-win-amd64.egg\sqlalchemy\sql\elements.pyc in _execute_on_connection(self, connection, multiparams, params)
260
261 def _execute_on_connection(self, connection, multiparams, params):
--> 262 return connection._execute_clauseelement(self, multiparams, params)
263
264 def unique_params(self, *optionaldict, **kwargs):
C:\Users\MI0185\AppData\Local\Enthought\Canopy\User\lib\site-packages\sqlalchemy-1.1.0b1-py2.7-win-amd64.egg\sqlalchemy\engine\base.pyc in _execute_clauseelement(self, elem, multiparams, params)
1053 compiled_sql,
1054 distilled_params,
-> 1055 compiled_sql, distilled_params
1056 )
1057 if self._has_events or self.engine._has_events:
C:\Users\MI0185\AppData\Local\Enthought\Canopy\User\lib\site-packages\sqlalchemy-1.1.0b1-py2.7-win-amd64.egg\sqlalchemy\engine\base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args)
1189 parameters,
1190 cursor,
-> 1191 context)
1192
1193 if self._has_events or self.engine._has_events:
C:\Users\MI0185\AppData\Local\Enthought\Canopy\User\lib\site-packages\sqlalchemy-1.1.0b1-py2.7-win-amd64.egg\sqlalchemy\engine\base.pyc in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1384 util.raise_from_cause(
1385 sqlalchemy_exception,
-> 1386 exc_info
1387 )
1388 else:
C:\Users\MI0185\AppData\Local\Enthought\Canopy\User\lib\site-packages\sqlalchemy-1.1.0b1-py2.7-win-amd64.egg\sqlalchemy\util\compat.pyc in raise_from_cause(exception, exc_info)
200 exc_type, exc_value, exc_tb = exc_info
201 cause = exc_value if exc_value is not exception else None
--> 202 reraise(type(exception), exception, tb=exc_tb, cause=cause)
203
204 if py3k:
C:\Users\MI0185\AppData\Local\Enthought\Canopy\User\lib\site-packages\sqlalchemy-1.1.0b1-py2.7-win-amd64.egg\sqlalchemy\engine\base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args)
1182 statement,
1183 parameters,
-> 1184 context)
1185 except Exception as e:
1186 self._handle_dbapi_exception(
C:\Users\MI0185\AppData\Local\Enthought\Canopy\User\lib\site-packages\sqlalchemy-1.1.0b1-py2.7-win-amd64.egg\sqlalchemy\engine\default.pyc in do_execute(self, cursor, statement, parameters, context)
460
461 def do_execute(self, cursor, statement, parameters, context=None):
--> 462 cursor.execute(statement, parameters)
463
464 def do_execute_no_params(self, cursor, statement, context=None):
DataError: (psycopg2.DataError) invalid input syntax for integer: "pdp_views"
[SQL: 'select cus.week, id, %(metric)s,\n case\n when %(metric)s <= %(p2)s then 3\n when %(metric)s > %(p2)s and %(metric)s < %(p1)s then 2 \n when %(metric)s >= %(p1)s then 1\n end as HML\n from \n dev.master_abtest_customers cus\n where cus.week= %(week)s\n\t\t\n\t\t'] [parameters: {'p2': 20L, 'week': 22L, 'metric': u'pdp_views', 'p1': 40L}]
I have tried type casting val1,val2 and week as int (i.e. using int(val1) etc). but still same error.
Can you please help me with this
It looks like you want to compare the values in the column pdp_views with p1 and p2 but what you're actually doing is comparing the string 'pdp_views' with p1 and p2, which have incompatible types.
You should build the query dynamically instead:
def get_m_query(week, metric, p1, p2):
cus = master_abtest_customers.alias("cus")
metric = getattr(cus.c, metric)
return select([
cus.c.week,
cus.c.pdp_views,
cus.c.id,
metric,
case([
(metric <= p2, 3),
(and_(metric > p2, metric < p1), 2),
(metric >= p1, 1),
]).label("HML"),
]).select_from(cus).where(cus.week == week)