How to get column names from custom TextClause query in SQL Alchemy? - python

I have an app that typically takes a SQL Alchemy selectable as an input and uses reflection.Inspector.from_engine(engine).get_columns(selectable.name, schema=selectable.schema) to get the columns for additional downstream logic.
This app also allows the user to pass a custom TextClause query as an input. Is it possible to reverse engineer the column names from the TextClause as is done with the selectable object?
What have I tried?
>>> import sqlalchemy as sa
>>> query = "SELECT col_1, col_2 FROM table"
>>> selectable = sa.text(query)
# The steps above cannot be altered
>>> type(selectable)
<class 'sqlalchemy.sql.elements.TextClause'>
>>> connection_string = "postgresql+psycopg2://<user>:<password>#localhost:5432/<db>"
>>> engine = sa.create_engine(connection_string)
>>> columns = sa.engine.reflection.Inspector.from_engine(engine).get_columns(selectable, schema=None)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/user/opt/anaconda3/envs/env/lib/python3.7/site-packages/sqlalchemy/engine/reflection.py", line 498, in get_columns
conn, table_name, schema, info_cache=self.info_cache, **kw
File "<string>", line 2, in get_columns
File "/Users/user/opt/anaconda3/envs/env/lib/python3.7/site-packages/sqlalchemy/engine/reflection.py", line 55, in cache
ret = fn(self, con, *args, **kw)
File "/Users/user/opt/anaconda3/envs/env/lib/python3.7/site-packages/sqlalchemy/dialects/postgresql/base.py", line 3578, in get_columns
connection, table_name, schema, info_cache=kw.get("info_cache")
File "<string>", line 2, in get_table_oid
File "/Users/user/opt/anaconda3/envs/env/lib/python3.7/site-packages/sqlalchemy/engine/reflection.py", line 55, in cache
ret = fn(self, con, *args, **kw)
File "/Users/user/opt/anaconda3/envs/env/lib/python3.7/site-packages/sqlalchemy/dialects/postgresql/base.py", line 3457, in get_table_oid
raise exc.NoSuchTableError(table_name)
sqlalchemy.exc.NoSuchTableError: SELECT col_1, col_2 FROM table

Related

sqlalchemy core with postgresql in python, conneting.execute(..) error

I am learning sqlalchemy core with postgresql database in python.
I tried to run the following script and got this error message:
from sqlalchemy import create_engine
from sqlalchemy import Table, MetaData, String
engine = create_engine('postgresql://postgres:123456#localhost:5432/red30')
with engine.connect() as connection:
meta = MetaData(engine)
sales_table = Table('sales', meta)
# Create
insert_statement = sales_table.insert().values(order_num=1105911,
order_type='Retail',
cust_name='Syman Mapstone',
prod_number='EB521',
prod_name='Understanding Artificial Intelligence',
quantity=3,
price=19.5,
discount=0,
order_total=58.5)
connection.execute(insert_statement)
# Read
select_statement = sales_table.select().limit(10)
result_set = connection.execute(select_statement)
for r in result_set:
print(r)
# Update
update_statement = sales_table.update().where(sales_table.c.order_num==1105910).values(quantity=2, order_total=39)
connection.execute(update_statement)
# Confirm Update: Read
reselect_statement = sales_table.select().where(sales_table.c.order_num==1105910)
updated_set = connection.execute(reselect_statement)
for u in updated_set:
print(u)
# Delete
delete_statement = sales_table.delete().where(sales_table.c.order_num==1105910)
connection.execute(delete_statement)
# Confirm Delete: Read
not_found_set = connection.execute(reselect_statement)
print(not_found_set.rowcount)
error message:
(postgres-prac) E:\xfile\postgresql\postgres-prac>python postgres-sqlalchemy-core.py
Traceback (most recent call last):
File "postgres-sqlalchemy-core.py", line 20, in <module>
connection.execute(insert_statement)
File "E:\xfile\postgresql\postgres-prac\lib\site-packages\sqlalchemy\engine\ba
se.py", line 1414, in execute
return meth(
File "E:\xfile\postgresql\postgres-prac\lib\site-packages\sqlalchemy\sql\eleme
nts.py", line 485, in _execute_on_connection
return connection._execute_clauseelement(
File "E:\xfile\postgresql\postgres-prac\lib\site-packages\sqlalchemy\engine\ba
se.py", line 1630, in _execute_clauseelement
compiled_sql, extracted_params, cache_hit = elem._compile_w_cache(
File "E:\xfile\postgresql\postgres-prac\lib\site-packages\sqlalchemy\sql\eleme
nts.py", line 651, in _compile_w_cache
compiled_sql = self._compiler(
File "E:\xfile\postgresql\postgres-prac\lib\site-packages\sqlalchemy\sql\eleme
nts.py", line 290, in _compiler
return dialect.statement_compiler(dialect, self, **kw)
File "E:\xfile\postgresql\postgres-prac\lib\site-packages\sqlalchemy\sql\compi
ler.py", line 1269, in __init__
Compiled.__init__(self, dialect, statement, **kwargs)
File "E:\xfile\postgresql\postgres-prac\lib\site-packages\sqlalchemy\sql\compi
ler.py", line 710, in __init__
self.string = self.process(self.statement, **compile_kwargs)
File "E:\xfile\postgresql\postgres-prac\lib\site-packages\sqlalchemy\sql\compi
ler.py", line 755, in process
return obj._compiler_dispatch(self, **kwargs)
File "E:\xfile\postgresql\postgres-prac\lib\site-packages\sqlalchemy\sql\visit
ors.py", line 143, in _compiler_dispatch
return meth(self, **kw) # type: ignore # noqa: E501
File "E:\xfile\postgresql\postgres-prac\lib\site-packages\sqlalchemy\sql\compi
ler.py", line 5317, in visit_insert
crud_params_struct = crud._get_crud_params(
File "E:\xfile\postgresql\postgres-prac\lib\site-packages\sqlalchemy\sql\crud.
py", line 326, in _get_crud_params
raise exc.CompileError(
sqlalchemy.exc.CompileError: Unconsumed column names: order_type, quantity, cust
_name, discount, prod_number, price, order_total, order_num, prod_name
You define your table as an empty table:
sales_table = Table('sales', meta)
So when trying to insert a record with all those keywords, they cannot be mapped to columns and do not get consumed, hence the Unconsumed column names error.
You need to define the table columns in your Table creation. See the following example from the docs:
from sqlalchemy import Table, Column, Integer, String
user = Table(
"user",
metadata_obj,
Column("user_id", Integer, primary_key=True),
Column("user_name", String(16), nullable=False),
Column("email_address", String(60)),
Column("nickname", String(50), nullable=False),
)

sqalchemy update bindparam primary key

The following code throws "sqlalchemy.exc.CompileError: Unconsumed column names: _id".
User = Table('users', metadata,
Column('id', Integer, primary_key=True),
Column('score', Integer)
)
values = [
{'score': 2, '_id': 1},
{'score': 3, '_id': 3}
]
query = User.update().where(User.c.id == bindparam('_id')).values(score=bindparam('score'))
await db.execute_many(query, values)
db is an instance of databases.Database. Notice that I have to the name '_id' because SQLalchemy says 'id' is reserved.
Is there any solution other than updating each row individullay?
Database.execute_many() calls Connection.execute_many() which breaks your query up into separate individual queries (one per element in values), here's the method (source):
async def execute_many(
self, query: typing.Union[ClauseElement, str], values: list
) -> None:
queries = [self._build_query(query, values_set) for values_set in values]
async with self._query_lock:
await self._connection.execute_many(queries)
Note that it calls the _build_query() method (source):
#staticmethod
def _build_query(
query: typing.Union[ClauseElement, str], values: dict = None
) -> ClauseElement:
if isinstance(query, str):
query = text(query)
return query.bindparams(**values) if values is not None else query
elif values:
return query.values(**values)
return query
As you aren't passing a str query and you are passing values, control enters the elif values: condition handling where the individual dict of values is unpacked into the .values() method on your query (which is Update.values()). That essentially makes the query it's trying to compile this:
query = (
User.update()
.where(User.c.id == bindparam("_id"))
.values(score=bindparam("score"))
.values(score=2, _id=1)
)
That second values clause results in a new Update with new bind params that are trying to set values for both score and _id. This causes compilation of the query to fail as there is no _id column on the table.
So the MCVE to reproduce the error is really this:
from sqlalchemy.dialects import postgresql
User.update().values(score=2, _id=1).compile(dialect=postgresql.dialect())
Which raises:
Traceback (most recent call last):
File ".\main.py", line 31, in <module>
User.update().values(score=2, _id=1).compile(dialect=postgresql.dialect())
File "<string>", line 1, in <lambda>
File "C:\Users\peter\Documents\git\stackoverflow\58668615-sqalchemy-update-bindparam-primary-key\.venv\lib\site-packages\sqlalchemy\sql\elements.py", line 462, in compile
return self._compiler(dialect, bind=bind, **kw)
File "C:\Users\peter\Documents\git\stackoverflow\58668615-sqalchemy-update-bindparam-primary-key\.venv\lib\site-packages\sqlalchemy\sql\elements.py", line 468, in _compiler
return dialect.statement_compiler(dialect, self, **kw)
File "C:\Users\peter\Documents\git\stackoverflow\58668615-sqalchemy-update-bindparam-primary-key\.venv\lib\site-packages\sqlalchemy\sql\compiler.py", line 571, in __init__
Compiled.__init__(self, dialect, statement, **kwargs)
File "C:\Users\peter\Documents\git\stackoverflow\58668615-sqalchemy-update-bindparam-primary-key\.venv\lib\site-packages\sqlalchemy\sql\compiler.py", line 319, in __init__
self.string = self.process(self.statement, **compile_kwargs)
File "C:\Users\peter\Documents\git\stackoverflow\58668615-sqalchemy-update-bindparam-primary-key\.venv\lib\site-packages\sqlalchemy\sql\compiler.py", line 350, in process
return obj._compiler_dispatch(self, **kwargs)
File "C:\Users\peter\Documents\git\stackoverflow\58668615-sqalchemy-update-bindparam-primary-key\.venv\lib\site-packages\sqlalchemy\sql\visitors.py", line 92, in _compiler_dispatch
return meth(self, **kw)
File "C:\Users\peter\Documents\git\stackoverflow\58668615-sqalchemy-update-bindparam-primary-key\.venv\lib\site-packages\sqlalchemy\sql\compiler.py", line 2569, in visit_update
self, update_stmt, crud.ISUPDATE, **kw
File "C:\Users\peter\Documents\git\stackoverflow\58668615-sqalchemy-update-bindparam-primary-key\.venv\lib\site-packages\sqlalchemy\sql\crud.py", line 62, in _setup_crud_params
return _get_crud_params(compiler, stmt, **kw)
File "C:\Users\peter\Documents\git\stackoverflow\58668615-sqalchemy-update-bindparam-primary-key\.venv\lib\site-packages\sqlalchemy\sql\crud.py", line 177, in _get_crud_params
% (", ".join("%s" % c for c in check))
sqlalchemy.exc.CompileError: Unconsumed column names: _id
To summarise the issue, you build a query with bind params passed to both Update.where() and Update.values(). You then pass that query and your values to Database.execute_many() where they unpack the individual elements of your values list into a second call of Update.values() on your query which replaces your query with one that tries to set a value for an _id column which doesn't exist.
Is there any solution other than updating each row individullay?
Well the query works just fine when using sqlalchemy engine as well as query:
# using a sqlalchemy engine
engine.execute(query, values)
Otherwise, what should work is sending the query in as a string to Database.execute_many() as that will mean the query gets handled in the if isinstance(query, str): part of the _build_query() method which will avoid the second .values() call being made on the query:
db.execute_many(str(query), values)

KeyError while executing SQL with parameters

I am getting KeyError while running below code. I am trying to pass parameters using separate parameters variable.
Code:
import teradata
host,username,password = 'hostname','uname', 'pwd'
udaExec = teradata.UdaExec (appName="APtest", version="1.0", logConsole=False)
connect = udaExec.connect(method="odbc",system=host, username=username, password=password, dsn="dsnname")
val1='NULL'
val2='NULL'
parameters={'param1':val1, 'param2': val2}
qry="""
SELECT number
FROM table
WHERE number = %(param1)s
AND col=%(param2)s
"""
connect.execute(qry, parameters)
Error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/tmp/lib/python2.7/site-packages/teradata/udaexec.py", line 675, in execute
self.internalCursor.execute(query, params, **kwargs)
File "/tmp/lib/python2.7/site-packages/teradata/udaexec.py", line 745, in execute
self._execute(self.cursor.execute, query, params, **kwargs)
File "/tmp/lib/python2.7/site-packages/teradata/udaexec.py", line 787, in _execute
logParamCharLimit)
File "/tmp/lib/python2.7/site-packages/teradata/udaexec.py", line 875, in _getParamsString
if isinstance(params[0], (list, tuple)):
KeyError: 0
If i write the query in below manner then it works but i have very long list of parameters therefore need it in separate parameter variable.
This works:
qry="""
SELECT number
FROM table
WHERE number = '%s'
AND col='%s'
""" % (val1, val2)
Apparently, teradata does not support dictionaries for parameters. Use a list instead.
parameters = [val1, val2]
qry="""
SELECT number
FROM table
WHERE number = %s
AND col=%s
"""
connect.execute(qry, parameters)

superset slice query error

When press the Query button on the slice page of superset,
The following error occurs.
Traceback (most recent call last):
File "/home/sf400002285/superset_env/venv/lib/python3.4/site-packages/superset/viz.py", line 251, in get_payload
data = self.get_data(df)
File "/home/sf400002285/superset_env/venv/lib/python3.4/site-packages/superset/viz.py", line 1106, in get_data
row = df.groupby(self.groupby).sum()[self.metrics[0]].copy()
File "/home/sf400002285/superset_env/venv/lib/python3.4/site-packages/pandas/core/generic.py", line 4416, in groupby
**kwargs)
File "/home/sf400002285/superset_env/venv/lib/python3.4/site-packages/pandas/core/groupby.py", line 1699, in groupby
return klass(obj, by, **kwds)
File "/home/sf400002285/superset_env/venv/lib/python3.4/site-packages/pandas/core/groupby.py", line 392, in __init__
mutated=self.mutated)
File "/home/sf400002285/superset_env/venv/lib/python3.4/site-packages/pandas/core/groupby.py", line 2690, in _get_grouper
raise KeyError(gpr)
KeyError: 'SCODE'
SQL query is follows.
SELECT "SCODE",
"sum__COUNT"
FROM
(SELECT "SCODE" AS "SCODE",
sum(COUNT) AS "sum__COUNT"
FROM
(SELECT CASE 性別コード
WHEN 5 THEN
WHEN 6 THEN '男性既婚'
WHEN 7 THEN '女性独身'
WHEN 8 THEN '女性既婚'
else 'その他'
END as scode ,
COUNT(*) count
FROM 顧客特定
GROUP BY 性別コード) expr_qry
GROUP BY "SCODE"
ORDER BY sum(COUNT) DESC)
WHERE ROWNUM <= 50000
version
superset = 0.19.1
cx_Oracle = 5.3
Please tell me how to solve it

Streaming results with Blaze and SqlAlchemy

I am trying to use Blaze/Odo to read a large (~70M rows) result set from Redshift. By default SqlAlchemy witll try to read the whole result into memory, before starting to process it. This can be prevented by either
execution_options(stream_results=True) on the engine/session or yield_per(sane_number) on the query. When working from Blaze SqlAchemy queries are generated behind the covers, leaving the execution_options approach. Unfortunately the following throws and error.
from sqlalchemy import create_engine
from blaze import Data
redshift_params = (redshift_user, redshift_pass, redshift_endpoint, port, dbname)
engine_string = "redshift+psycopg2://%s:%s#%s:%d/%s" % redshift_params
engine = create_engine(engine_string,
execution_options=dict(stream_results=True)
)
db = Data(engine)
The exception is:
...
/home/mahler/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/result.pyc in __buffer_rows(self)
1124 return
1125 size = getattr(self, '_bufsize', 1)
-> 1126 self.__rowbuffer = collections.deque(self.cursor.fetchmany(size))
1127 self._bufsize = self.size_growth.get(size, size)
1128 if self._max_row_buffer is not None:
InternalError: (psycopg2.InternalError) opening multiple cursors from within the same client connection is not allowed.
If I leave out the execution_options=dict(stream_results=True) then the above works, but doing something like
odo(db.mytable, 'mytable.bcolz')
will run out of memory for large tables.
Using execution_options(stream_results=True) does work with pandas.read_csv. The following code works fine, using only moderate amounts of memory:
from sqlalchemy import create_engine
import pandas as pd
redshift_params = (redshift_user, redshift_pass, redshift_endpoint, port, dbname)
engine_string = "postgresql+psycopg2://%s:%s#%s:%d/%s" % redshift_params
engine = create_engine(engine_string,
execution_options=dict(stream_results=True)
)
compression='bz2'
res = pd.read_sql_query(queryString
engine,
chunksize=2**20)
for i, df in enumerate(res):
df.to_csv('results-%s.csv.%s' % (i, compression), compression=compression)
This is the complete stack trace:
...
Data(engine)
No handlers could be found for logger "sqlalchemy.pool.QueuePool"
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/mahler/anaconda/lib/python2.7/site-packages/blaze/interactive.py", line 122, in Data
dshape = discover(data)
File "/home/mahler/anaconda/lib/python2.7/site-packages/multipledispatch/dispatcher.py", line 164, in __call__
return func(*args, **kwargs)
File "/home/mahler/anaconda/lib/python2.7/site-packages/odo/backends/sql.py", line 242, in discover
return discover(metadata)
File "/home/mahler/anaconda/lib/python2.7/site-packages/multipledispatch/dispatcher.py", line 164, in __call__
return func(*args, **kwargs)
File "/home/mahler/anaconda/lib/python2.7/site-packages/odo/backends/sql.py", line 248, in discover
metadata.reflect(views=metadata.bind.dialect.supports_views)
File "/home/mahler/anaconda/lib/python2.7/site-packages/sqlalchemy/sql/schema.py", line 3623, in reflect
bind.dialect.get_view_names(conn, schema)
File "<string>", line 2, in get_view_names
File "/home/mahler/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/reflection.py", line 42, in cache
return fn(self, con, *args, **kw)
File "/home/mahler/anaconda/lib/python2.7/site-packages/sqlalchemy/dialects/postgresql/base.py", line 2347, in get_view_names
for row in connection.execute(s)]
File "/home/mahler/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/result.py", line 713, in __iter__
row = self.fetchone()
File "/home/mahler/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/result.py", line 1026, in fetchone
self.cursor, self.context)
File "/home/mahler/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.py", line 1341, in _handle_dbapi_exception
exc_info
File "/home/mahler/anaconda/lib/python2.7/site-packages/sqlalchemy/util/compat.py", line 200, in raise_from_cause
reraise(type(exception), exception, tb=exc_tb)
File "/home/mahler/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/result.py", line 1017, in fetchone
row = self._fetchone_impl()
File "/home/mahler/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/result.py", line 1139, in _fetchone_impl
self.__buffer_rows()
File "/home/mahler/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/result.py", line 1126, in __buffer_rows
self.__rowbuffer = collections.deque(self.cursor.fetchmany(size))
sqlalchemy.exc.InternalError: (psycopg2.InternalError) opening multiple cursors from within the same client connection is not allowed.

Categories

Resources