Retrieve query results as dict in SQLAlchemy - python

I am using flask SQLAlchemy and I have the following code to get users from database with raw SQL query from a MySQL database:
connection = engine.raw_connection()
cursor = connection.cursor()
cursor.execute("SELECT * from User where id=0")
results = cursor.fetchall()
results variable is a tuple and I want it to be of type dict(). Is there a way to achieve this?
when I was using pymysql to build the db connection I was able to do
cursor = connection.cursor(pymysql.cursors.DictCursor)
Is there something similar in SQLAlchemy?
Note: The reason I want to do this change is to get rid of using pymysql in my code, and only use SQLAlcehmy features, i.e. I do not want to have ´´´import pymysql´´´ in my code anywhere.

results is a tuple and I want it to be of type dict()
Updated answer for SQLAlchemy 1.4:
Version 1.4 has deprecated the old engine.execute() pattern and changed the way .execute() operates internally. .execute() now returns a CursorResult object with a .mappings() method:
import sqlalchemy as sa
# …
with engine.begin() as conn:
qry = sa.text("SELECT FirstName, LastName FROM clients WHERE ID < 3")
resultset = conn.execute(qry)
results_as_dict = resultset.mappings().all()
pprint(results_as_dict)
"""
[{'FirstName': 'Gord', 'LastName': 'Thompson'},
{'FirstName': 'Bob', 'LastName': 'Loblaw'}]
"""
(Previous answer for SQLAlchemy 1.3)
SQLAlchemy already does this for you if you use engine.execute instead of raw_connection(). With engine.execute, fetchone will return a SQLAlchemy Row object and fetchall will return a list of Row objects. Row objects can be accessed by key, just like a dict:
sql = "SELECT FirstName, LastName FROM clients WHERE ID = 1"
result = engine.execute(sql).fetchone()
print(type(result)) # <class 'sqlalchemy.engine.result.Row'>
print(result['FirstName']) # Gord
If you need a true dict object then you can just convert it:
my_dict = dict(result)
print(my_dict) # {'FirstName': 'Gord', 'LastName': 'Thompson'}

If raw_connection() is returning a PyMySQL Connection object then you can continue to use DictCursor like so:
engine = create_engine("mysql+pymysql://root:whatever#localhost:3307/mydb")
connection = engine.raw_connection()
cursor = connection.cursor(pymysql.cursors.DictCursor)
cursor.execute("SELECT 1 AS foo, 'two' AS bar")
result = cursor.fetchall()
print(result) # [{'foo': 1, 'bar': 'two'}]

you can use sqlalchemy cursor and cursor's description.
def rows_as_dicts(cursor):
"""convert tuple result to dict with cursor"""
col_names = [i[0] for i in cursor.description]
return [dict(zip(col_names, row)) for row in cursor]
db = SQLAlchemy(app)
# get cursor
cursor = db.session.execute(sql).cursor
# tuple result to dict
result = rows_as_dicts(cursor)

I would personally use pandas:
import pandas as pd
connection = engine.raw_connection()
df = pd.read_sql_query('SELECT * from User where id=0' , connection)
mydict = df.to_dict()

Related

Increase speed of SQLAlchemy Insert.execute()

Consider following working code of copy a souce sqlite database to target sqlite database:
# Create two database.
import sqlite3
import pandas as pd
import time
cn_src = sqlite3.connect('source.db')
df=pd.DataFrame({"x":[1,2],"y":[2.0,3.0]})
df.to_sql("A", cn_src, if_exists="replace", index=False)
cn_tgt = sqlite3.connect('target.db')
cn_src.close()
cn_tgt.close()
from sqlalchemy import create_engine, MetaData, event
from sqlalchemy.sql import sqltypes
# create sqlalchemy conneciton
src_engine = create_engine("sqlite:///source.db")
src_metadata = MetaData(bind=src_engine)
exclude_tables = ('sqlite_master', 'sqlite_sequence', 'sqlite_temp_master')
tgt_engine = create_engine("sqlite:///target.db")
tgt_metadata = MetaData(bind=tgt_engine)
#event.listens_for(src_metadata, "column_reflect")
def genericize_datatypes(inspector, tablename, column_dict):
column_dict["type"] = column_dict["type"].as_generic(allow_nulltype=True)
tgt_conn = tgt_engine.connect()
tgt_metadata.reflect()
# delete tables in target database.
for table in reversed(tgt_metadata.sorted_tables):
if table.name not in exclude_tables:
print('dropping table =', table.name)
table.drop()
tgt_metadata.clear()
tgt_metadata.reflect()
src_metadata.reflect()
# copy table
for table in src_metadata.sorted_tables:
if table.name not in exclude_tables:
table.create(bind=tgt_engine)
# Update meta information
tgt_metadata.clear()
tgt_metadata.reflect()
# Copy data
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert()
for index, row in enumerate(src_table.select().execute()):
print("table =", table.name, "Inserting row", index)
start=time.time()
stmt.execute(row._asdict())
end=time.time()
print(end-start)
The code was mainly borrowed from other source. The problem is the time end-start is about 0.017 in my computer which is too large. Is there any way to speed up? I have tried set isolation_level=None in create_engine but no luck.
It seems like that Insert object has no executemany method so we can't use bulk inserting.
It seems like that Insert object has no executemany method so we can't use bulk inserting.
SQLAlchemy does not implement separate execute() and executemany() methods. Its execute() method looks at the parameters it receives and
if they consist of a single dict object (i.e., a single row) then it calls execute() at the driver level, or
if they consist of a list of dict objects (i.e., multiple rows) then it calls executemany() at the driver level.
Note also that you are using deprecated usage patterns, specifically MetaData(bind=…). You should be doing something more like this:
import sqlalchemy as sa
engine = sa.create_engine("sqlite://")
tbl = sa.Table(
"tbl",
sa.MetaData(),
sa.Column("id", sa.Integer, primary_key=True, autoincrement=False),
sa.Column("txt", sa.String),
)
tbl.create(engine)
with engine.begin() as conn:
stmt = sa.insert(tbl)
params = [
dict(id=1, txt="foo"),
dict(id=2, txt="bar"),
]
conn.execute(stmt, params)
# check results
with engine.begin() as conn:
print(conn.exec_driver_sql("SELECT * FROM tbl").all())
# [(1, 'foo'), (2, 'bar')]
I come up with a solution using transaction:
# Copy data
trans=tgt_conn.begin()
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert().execution_options(autocommit=False)
for index, row in enumerate(src_table.select().execute()):
tgt_conn.execute(stmt, row._asdict()) # must use tgt_conn.execute(), not stmt.execute()
trans.commit()
tgt_conn.close()

Read PostgreSQL array data in Python

After a query with Python's psycopg2
SELECT
id,
array_agg(еnty_pub_uuid) AS ptr_entity_public
FROM table
GROUP BY id
I get returned an array:
{a630e0a3-c544-11ea-9b8c-b73c488956ba,c2f03d24-2402-11eb-ab91-3f8e49eb63e7}
How can I parse this to a list in python?
Is there a builtin function in psycopg2?
psycopg2 cares about type conversations between python and postgres:
import psycopg2
conn = psycopg2.connect("...")
cur = conn.cursor()
cur.execute(
"select user_id, array_agg(data_name) from user_circles where user_id = '81' group by user_id"
)
res = cur.fetchall()
print(res[0])
print(type(res[0][1]))
Out:
('81', ['f085b2e3-b943-429e-850f-4ecf358abcbc', '65546d63-be96-4711-a4c1-a09f48fbb7f0', '81d03c53-9d71-4b18-90c9-d33322b0d3c6', '00000000-0000-0000-0000-000000000000'])
<class 'list'>
you need to register the UUID type for python and postgres to infer types.
import psycopg2.extras
psycopg2.extras.register_uuid()
sql = """
SELECT
id,
array_agg(еnty_pub_uuid) AS ptr_entity_public
FROM table
GROUP BY id
"""
cursor = con.cursor()
cursor.execute(sql)
results = cursor.fetchall()
for r in results:
print(type(r[1]))

Python Script to Execute SQL in Loop

I am trying to write python script which calls mysql SQL.
I am relying on first query result to run second query. Problem is that it's throwing error.
Error : cursor2.execute(field_sql,id)
import boto3
import importlib
import psutil
import pymysql
import pymysql.cursors
import subprocess
import sys
rdsConn = pymysql.connect(host = 'XXXX'),
db = 'XXXX',
user = 'XXXX',
password = 'XXXX',
charset = 'utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
cursor1 = rdsConn.cursor()
cursor2 = rdsConn.cursor()
name = 'Test'
sql = "select id from Table1 where name = %s"
cursor1.execute(sql,name)
result = cursor1.fetchall()
for id in result:
field_sql= "select columnname from Table2 where id = %s"
cursor2.execute(field_sql,id)
fieldresult = cursor2.fetchall()
for fieldrow in fieldresult:
print(fieldrow)
cursor1.close()
cursor2.close()
rdsConn.close()
Your query uses a dict cursor, so it will return a list of dicts, e.g:
[{'id': 1}, {'id': '2'}, ...]
Which means your id* will be a dict, not a tuple as it would be otherwise. Which means you're passing your arguments to the second query as a dict. If you do so, you need to use named parameters using the pyformat style:
for rowdict in result:
field_sql = "select columnname from Table2 where id = %(id)s"
cursor2.execute(field_sql, rowdict)
fieldresult = cursor2.fetchall()
for fieldrow in fieldresult:
print(fieldrow)
You'll see that the printed fieldrows are also dicts.
Also, query parameters should be passed either as dict (named parameters) or as tuple (positional parameters). pymysql accepts the form cursor.execute(sql, "name"), other dbapi2 connectors don't. The canonical form would be cursor.execute(sql, ("name",)).
*btw, you shouldn't use id as name, it hides the builtin id function

Replace L, in SQL results in python

I'm running pyodbc connected to my db and when i run a simply query I get a load of results back such as
(7L, )(12L,) etc.
How do I replace the the 'L, ' with '' so I can pass the ids into another query
Thanks
Here's my code
import pyodbc
cnxn = pyodbc.connect('DSN=...;UID=...;PWD=...', ansi=True)
cursor = cnxn.cursor()
rows = cursor.execute("select id from orders")
for row in rows:
test = cursor.execute("select name from customer where order_id = %(id)s" %{'id':row})
print test
Use parameters:
...
test = cursor.execute("select name from customer where order_id = ?", row.id)
...
The L after the number indicates that the value is a long type.

Using dict_cursor in django

To get a cursor in django I do:
from django.db import connection
cursor = connection.cursor()
How would I get a dict cursor in django, the equivalent of -
import MySQLdb
connection = (establish connection)
dict_cursor = connection.cursor(MySQLdb.cursors.DictCursor)
Is there a way to do this in django? When I tried cursor = connection.cursor(MySQLdb.cursors.DictCursor) I got a Exception Value: cursor() takes exactly 1 argument (2 given). Or do I need to connect directly with the python-mysql driver?
The django docs suggest using dictfetchall:
def dictfetchall(cursor):
"Returns all rows from a cursor as a dict"
desc = cursor.description
return [
dict(zip([col[0] for col in desc], row))
for row in cursor.fetchall()
]
Is there a performance difference between using this and creating a dict_cursor?
No there is no such support for DictCursor in Django.
But you can write a small function to that for you.
See docs: Executing custom SQL directly:
def dictfetchall(cursor):
"Returns all rows from a cursor as a dict"
desc = cursor.description
return [
dict(zip([col[0] for col in desc], row))
for row in cursor.fetchall()
]
>>> cursor.execute("SELECT id, parent_id from test LIMIT 2");
>>> dictfetchall(cursor)
[{'parent_id': None, 'id': 54360982L}, {'parent_id': None, 'id': 54360880L}]
Easily done with Postgres at least, i'm sure mysql has similar ( Django 1.11)
from django.db import connections
from psycopg2.extras import NamedTupleCursor
def scan_tables(app):
conn = connections['default']
conn.ensure_connection()
with conn.connection.cursor(cursor_factory=NamedTupleCursor) as cursor:
cursor.execute("SELECT table_name, column_name "
"FROM information_schema.columns AS c "
"WHERE table_name LIKE '{}_%'".format(app))
columns = cursor.fetchall()
for column in columns:
print(column.table_name, column.column_name)
scan_tables('django')
Obviously feel free to use DictCursor, RealDictCursor, LoggingCursor etc
The following code converts the result set into a dictionary.
from django.db import connections
cursor = connections['default'].cursor()
columns = (x.name for x in cursor.description)
result = cursor.fetchone()
result = dict(zip(columns, result))
If the result set has multiple rows, iterate over the cursor instead.
columns = [x.name for x in cursor.description]
for row in cursor:
row = dict(zip(columns, row))
The main Purpose of using RealDictCursor is to get data in list of dictionary format.
And the apt solution is this and that too without using django ORM
def fun(request):
from django.db import connections
import json
from psycopg2.extras import RealDictCursor
con = connections['default']
con.ensure_connection()
cursor= con.connection.cursor(cursor_factory=RealDictCursor)
cursor.execute("select * from Customer")
columns=cursor.fetchall()
columns=json.dumps(columns)
output:
[{...},{...},{......}]

Categories

Resources