Backstory
I have a questionnaire that asks sensitive questions most of which are true/false. The majority of the time the values are false which poses a challenge when keeping the data private at rest. When encrypting each question into a separate column, it is really easy to tell which value is true and which is false with a bit of guessing. To combat this, the questions and answers are put into a dictionary object with some salt (nonsense that changes randomly) then encrypted. Making it impossible without the key to know what the answers were.
Method
Below is an example of the model used to encrypt the data with salt at rest making it impossible to look at the data and know the contents.
import sqlalchemy as sa
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy_utils.types import JSONType
from sqlalchemy_utils.types.encrypted.encrypted_type import StringEncryptedType, AesEngine
Base = declarative_base()
class SensitiveQuestionnaire(Base):
user_id = sa.Column(sa.Integer, primary_key=True, autoincrement=True)
_data = data: dict = sa.Column(StringEncryptedType(JSONType, 'secret', AesEngine, 'pkcs5'),
nullable=False, default=lambda: {'_salt': salt_shaker()})
# values are viewed using a python property to look into the `_data` dict
#property
def sensitive_question(self) -> Optional[float]:
return self._data.get('sensitive_question')
# values are set into the `_data` dict
#sensitive_question.setter
def sensitive_question(self, value: bool) -> None:
self._data['sensitive_question'] = value
# in a real example there would be 20+ properties that map to questions
def __init__(self, **kwargs):
# Sqlalchemy does not use the __init__ method so we are free to set object defaults here
self._data = {'_salt': salt_shaker()}
for key in kwargs:
setattr(self, key, kwargs[key])
#property
def _salt(self) -> str:
return self._data['_salt']
def salt_shaker():
return ''.join([random.choice('hldjs..' for i in range(50)])
The Problem
After the SensitiveQuestionnaire object is initialized none of the changes are persisted in the database.
# GIVEN a questionnaire
questionnaire = model.SensitiveQuestionnaire(user_id=1)
db.session.add()
db.session.commit()
# WHEN updating the questionnaire and saving it to the database
questionnaire.sensitive_question= True
db.session.commit()
# THEN we get the questionnaire from the database
db_questionnaire = model.SensitiveQuestionnaire.query\
.filter(model.SensitiveQuestionnaire.user_id == 1).first()
# THEN the sensitive_question value is persisted
assert db_questionnaire.sensitive_question is True
Value from the db_questionnaire.sensitive_question is None when it should be True.
After spending the better part of the day to figure this out, the cause of the issue is how Sqlalchemy knows when there is a change. The short version is sqlalchemy uses python's __setitem__ to hook in sqlalchemy's change() method letting it know there was a change. More info can be found in sqlalchemy's docs.
The answer is to wrap the StringEncryptedType in a MultableDict Type
Mutation Tracking
Provide support for tracking of in-place changes to scalar values, which are propagated into ORM change events on owning parent objects.
From SqlAlchemy's docs: https://docs.sqlalchemy.org/en/13/orm/extensions/mutable.html
Solution
Condensed version... wrapping the StringEncryptedType in a MutableDict
_data = data: dict = sa.Column(
MutableDict.as_mutable(StringEncryptedType(JSONType, 'secret', AesEngine, 'pkcs5')),
nullable=False, default=lambda: {'_salt': salt_shaker()})
Full version from the question above
import sqlalchemy as sa
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.mutable import MutableDict
from sqlalchemy_utils.types import JSONType
from sqlalchemy_utils.types.encrypted.encrypted_type import StringEncryptedType, AesEngine
Base = declarative_base()
class SensitiveQuestionnaire(Base):
user_id: int = sa.Column(sa.Integer, primary_key=True, autoincrement=True)
# The MutableDict.as_mutable below is what changed!
_data = data: dict = sa.Column(
MutableDict.as_mutable(StringEncryptedType(JSONType, 'secret', AesEngine, 'pkcs5')),
nullable=False, default=lambda: {'_salt': salt_shaker()})
#property
def sensitive_question(self) -> Optional[float]:
return self._data.get('sensitive_question')
# values are set into the `_data` dict
#sensitive_question.setter
def sensitive_question(self, value: bool) -> None:
self._data['sensitive_question'] = value
# in a real example there would be 20+ properties that map to questions
def __init__(self, **kwargs):
self._data = {'_salt': salt_shaker()}
for key in kwargs:
setattr(self, key, kwargs[key])
#property
def _salt(self) -> str:
return self._data['_salt']
def salt_shaker():
return ''.join([random.choice('hldjs..' for i in range(50)])
I have the following relationship set up in a model:
role_profiles = Table('roleprofile', Base.metadata,
Column('role_id', Integer, ForeignKey('role.id')),
Column('profile_id', Integer, ForeignKey('profile.id'))
)
class profile(Base):
__tablename__ = 'profile'
# Columns...
roles = relationship('role', secondary=role_profiles, backref='profiles')
class role(Base):
__tablename__ = 'role'
# Columns...
So as I now understand that it works is that the roles property on the profile object will contain a list of role classes (which it does).
What I want to do is to serialize for each property of the model class generically. It works fine for the top class profile and I determine that there is a list of roles that I should recurse into:
# I need a statement here to check if the field.value is a backref
#if field.value is backref:
# continue
if isinstance(field.value, list):
# Get the json for the list
value = serialize.serialize_to_json(field.value)
else:
# Get the json for the value
value = cls._serialize(field.value)
The problem is that the backref of the relationship adds a pointer back to the profile. The same profile is then serialized and it recurse the roles over and over again until stack overflow.
Is there a way to determine that the property is a backref added by the relationship?
Update
Maybe I should add that it works fine in this case if I remove the backref since I don't need it but I would like to keep it in.
Update
As a temporary fix I added a class property to my base class:
class BaseModelMixin(object):
"""Base mixin for models using stamped data"""
__backref__ = None
and add it like this:
class role(Base):
__tablename__ = 'role'
__backref__ = ('profiles', )
# Columns...
and use it like this in my recursion:
if self.__backref__ and property_name in self.__backref__:
continue
If there is a better way please let me know because this doesn't look optimal.
Not sure if this is the best practice, but this code works for me. It returns True if the attribute is a reference, False if a regular column type.
def is_relation(orm_object, attr_name):
return hasattr(getattr(orm_object.__class__, attr_name).property, 'mapper')
You can create a __relationships__ in your class BaseModelMixin as a #property, which has a list of all relationships name which are not as a backref name in a model.
class BaseModelMixin(object):
"""Base mixin for models using stamped data"""
#property
def __relationships__(self):
"""
Return a list of relationships name which are not as a backref
name in model
"""
back_ref_relationships = list()
items = self.__mapper__.relationships.items()
for (key, value) in items:
if isinstance(value.backref, tuple):
back_ref_relationships.append(key)
return back_ref_relationships
As you have two class profile and role, so
>>> p = profile()
>>> p.__relationships__
# ['roles']
>>> r = role()
>>> r.__relationships__
# []
have a look at inspect
e.g.
from sqlalchemy import inspect
mapper = inspect(MyModelClass)
# dir(mapper)
# mapper.relationships.keys()
So say I have some classes X, Y and Z using SQLAlchemy declarative syntax to define some simple columns and relationships
Requirements:
At the class level, (X|Y|Z).primary_keys returns a collection of
the respective class' primary keys' (InstrumentedAttribute
objects) I also want (X|Y|Z).relations to reference the class'
relations in the same way
At the instance level, I would like the same attributes to reference
those attributes' instantiated values, whether they've been
populated using my own constructors, individual attributes
setters, or whatever SQLAlchemy does when it retrieves rows from
the db.
So far I have the following.
import collections
import sqlalchemy
import sqlalchemy.ext.declarative
from sqlalchemy import MetaData, Column, Table, ForeignKey, Integer, String, Date, Text
from sqlalchemy.orm import relationship, backref
class IndexedMeta(sqlalchemy.ext.declarative.DeclarativeMeta):
"""Metaclass to initialize some class-level collections on models"""
def __new__(cls, name, bases, defaultdict):
cls.pk_columns = set()
cls.relations = collections.namedtuple('RelationshipItem', 'one many')( set(), set())
return super().__new__(cls, name, bases, defaultdict)
Base = sqlalchemy.ext.declarative.declarative_base(metaclass=IndexedMeta)
def build_class_lens(cls, key, inst):
"""Populates the 'indexes' of primary key and relationship attributes with the attributes' names. Additionally, separates "x to many" relationships from "x to one" relationships and associates "x to one" relathionships with the local-side foreign key column"""
if isinstance(inst.property, sqlalchemy.orm.properties.ColumnProperty):
if inst.property.columns[0].primary_key:
cls.pk_columns.add(inst.key)
elif isinstance(inst.property, sqlalchemy.orm.properties.RelationshipProperty):
if inst.property.direction.name == ('MANYTOONE' or 'ONETOONE'):
local_column = cls.__mapper__.get_property_by_column(inst.property.local_side[0]).key
cls.relations.one.add( (local_column, inst.key) )
else:
cls.relations.many.add(inst.key)
sqlalchemy.event.listen(Base, 'attribute_instrument', build_class_lens)
class Meeting(Base):
__tablename__ = 'meetings'
def __init__(self, memo):
self.memo = memo
id = Column(Integer, primary_key=True)
date = Column(Date)
memo = Column('note', String(60), nullable=True)
category_name = Column('category', String(60), ForeignKey('categories.name'))
category = relationship("Category", backref=backref('meetings'))
topics = relationship("Topic",
secondary=meetings_topics,
backref="meetings")
...
...
Ok, so that gets me by on the class level, though I feel like I am doing silly things with metaclasses, and I get some strange intermittent errors where the 'sqlalchemy' module allegedly isn't recognized in build_class_lens and evals to Nonetype.
I am not quite sure how I should proceed at the instance level.
I've looked into the events interface. I see the ORM event init, but it seems to run prior to the __init__ function defined on my models, meaning the instance attributes haven't yet been populated at that time, so I can't build my 'lens' on them.
I also wonder if the Attribute event set might be of help. That is my next try, though i still wonder if it is the most appropriate way.
All in all I really wonder if I am missing some really elegant way to approach this problem.
I think the metaclass thing with declarative goes by the old XML saying, "if you have a problem, and use XML, now you have two problems". The metaclass in Python is useful pretty much as a hook to detect the construction of new classes, and that's about it. We now have enough events that there shouldn't be any need to use a metaclass beyond what declarative already does.
In this case I'd go a little further and say that the approach of trying to actively build up these collections is not really worth it - it's much easier to generate them lazily, as below:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
import collections
from sqlalchemy.orm.properties import RelationshipProperty
class memoized_classproperty(object):
"""A decorator that evaluates once at the class level,
assigns the new value to the class.
"""
def __init__(self, fget, doc=None):
self.fget = fget
self.__doc__ = doc or fget.__doc__
self.__name__ = fget.__name__
def __get__(desc, self, cls):
result = desc.fget(cls)
setattr(cls, desc.__name__, result)
return result
class Lens(object):
#memoized_classproperty
def pk_columns(cls):
return class_mapper(cls).primary_key
#memoized_classproperty
def relations(cls):
props = collections.namedtuple('RelationshipItem', 'one many')(set(), set())
# 0.8 will have "inspect(cls).relationships" here
mapper = class_mapper(cls)
for item in mapper.iterate_properties:
if isinstance(item, RelationshipProperty):
if item.direction.name == ('MANYTOONE' or 'ONETOONE'):
local_column = mapper.get_property_by_column(item.local_side[0]).key
props.one.add((local_column, item.key))
else:
props.many.add(item.key)
return props
Base= declarative_base(cls=Lens)
meetings_topics = Table("meetings_topics", Base.metadata,
Column('topic_id', Integer, ForeignKey('topic.id')),
Column('meetings_id', Integer, ForeignKey('meetings.id')),
)
class Meeting(Base):
__tablename__ = 'meetings'
def __init__(self, memo):
self.memo = memo
id = Column(Integer, primary_key=True)
date = Column(Date)
memo = Column('note', String(60), nullable=True)
category_name = Column('category', String(60), ForeignKey('categories.name'))
category = relationship("Category", backref=backref('meetings'))
topics = relationship("Topic",
secondary=meetings_topics,
backref="meetings")
class Category(Base):
__tablename__ = 'categories'
name = Column(String(50), primary_key=True)
class Topic(Base):
__tablename__ = 'topic'
id = Column(Integer, primary_key=True)
print Meeting.pk_columns
print Meeting.relations.one
# assignment is OK, since prop is memoized
Meeting.relations.one.add("FOO")
print Meeting.relations.one
I have a sqlalchemy model, where all most all tables/objects have a notes field. So to try follow the DRY principle, I moved the field to a mixin class.
class NotesMixin(object):
notes = sa.Column(sa.String(4000) , nullable=False, default='')
class Service(Base, NotesMixin):
__tablename__ = "service"
service_id = sa.Column(sa.Integer, primary_key=True)
name = sa.Column(sa.String(255), nullable=False, index=True, unique=True)
class Datacenter(Base, NotesMixin):
__tablename__ = "datacenter"
datacenter_id = sa.Column(sa.Integer, primary_key=True)
name = sa.Column(sa.String(255), nullable=False, index=True, unique=True)
class Network(Base, NotesMixin, StatusMixin):
__tablename__ = "network"
network_id = sa.Column(sa.Integer, primary_key=True)
etc...
Now the notes column is the first column in the model/db. I know it does not affect the functionality of my app, but it irritates me a bit to see notes before id, etc. Any way to move it to the end?
Found a cleaner solution:
Use the sqlalchemy.ext.declarative.declared_attr decorator in sqlalchemy 0.6.5 (sqlalchemy.util.classproperty in sqlalchemy <= 0.6.4)
class NotesMixin(object):
#declared_attr
def notes(cls):
return sa.Column(sa.String(4000) , nullable=False, default='')
According to the docs, this is "for columns that have foreign keys, as well as for the variety of mapper-level constructs that require destination-explicit context". While this is strictly speaking not the case here, it does so by calling the method (and creating the column) when the subclass is constructed, thus avoiding the need to make a copy. Which means the mixin column will come at the end. Probably a better solution than hacking _creation_order...
The easy answer: just create the database tables yourself, instead of having sqlalchemy do it with metadata.create_all().
If you don't find that acceptable, I'm afraid this would require a (small) change in sqlalchemy.ext.declarative itself, or you'd have to create your own metaclass and pass it to declarative_base() with the metaclass keyword argument. That class will then get used instead of the default DeclarativeMeta.
Explanation: sqlalchemy uses the creation order of the column properties, which it stores in the "private" attribute ._creation_order (generated when Column() is called). The declarative extension does mixin columns by creating a copy of the column object from your mixin class, and adding that to the class. The ._creation_order of this copy is set to the same value as the original property of the mixin class. As the mixin class is of course created first, it's column properties will have a lower creation order than the subclass.
So, to make your request possible, a new creation order should be assigned when the copy is made, rather than taking the original. You could try and make your own metaclass based on this explanation, and use that. But you might also try and ask the sqlalchemy developers. Maybe they are willing to accept this as a bug/feature request? At least, it seems like a minor (one line) change, that would not have a any effect other than the change you ask for (which arguably is better too).
One can also change the order of columns upon CREATE TABLE compilation (here exemplified for the postgresql dialect):
from sqlalchemy.schema import CreateTable
from sqlalchemy.ext.compiler import compiles
#compiles(CreateTable, 'postgresql')
def _compile_create_table(element, compiler, **kwargs):
element.columns = element.columns[::-1] # reverse order of columns
return compiler.visit_create_table(element)
This then works with metadata.create_all().
I know it has been a while, but I found a very simple solution for this:
class PriorityColumn(Column):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._creation_order = 1
This is a drop-in replacement for Column, if you are working with Mixins and you want your Derived class' attributes to be first.
class A:
a = Column(Integer)
b = Column(String)
class B(A, Base):
c = PriorityColumn(Integer)
d = PriorityColumn(Float)
# Your table will look like this:
# B(c, d, a, b)
I found that I could set the column order (to the last position) on the Mixin using:
#declared_attr
def notes(cls):
col = sa.Column(sa.String(4000) , nullable=False, default='')
# get highest column order of all Column objects of this class.
last_position = max([value._creation_order
for key, value in vars(cls).items()
if isinstance(value, Column)])
col._creation_order = last_position + 0.5
return col
class Service(Base, NotesMixin):
__tablename__ = "service"
service_id = sa.Column(sa.Integer, primary_key=True)
name = sa.Column(sa.String(255), nullable=False, index=True, unique=True)
To set the column order based on the location of another column (similar to
alter table `some_table` modify `some_colum` `some_type` after
`some_other_column;
see https://stackoverflow.com/a/3822219/488331)
You can use:
#declared_attr
def notes(cls):
col = sa.Column(sa.String(4000) , nullable=False, default='')
col._creation_order = cls.some_other_column._creation_order + 0.5
return col
NOTE: If you use + 1 you end up 2 columns back. I don't really understand why you can even use a decimal.
To set the column order based off of the location of the first column (make this always the 4th column) you could do:
#declared_attr
def notes(cls):
col = sa.Column(sa.String(4000) , nullable=False, default='')
# get lowest column order of all Column objects of this class.
start_position = min([value._creation_order
for key, value in vars(cls).items()
if isinstance(value, Column)])
col._creation_order = start_position + 3.5
return col
Is there a simple way to iterate over column name and value pairs?
My version of SQLAlchemy is 0.5.6
Here is the sample code where I tried using dict(row):
import sqlalchemy
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
print "sqlalchemy version:",sqlalchemy.__version__
engine = create_engine('sqlite:///:memory:', echo=False)
metadata = MetaData()
users_table = Table('users', metadata,
Column('id', Integer, primary_key=True),
Column('name', String),
)
metadata.create_all(engine)
class User(declarative_base()):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
name = Column(String)
def __init__(self, name):
self.name = name
Session = sessionmaker(bind=engine)
session = Session()
user1 = User("anurag")
session.add(user1)
session.commit()
# uncommenting next line throws exception 'TypeError: 'User' object is not iterable'
#print dict(user1)
# this one also throws 'TypeError: 'User' object is not iterable'
for u in session.query(User).all():
print dict(u)
Running this code on my system outputs:
Traceback (most recent call last):
File "untitled-1.py", line 37, in <module>
print dict(u)
TypeError: 'User' object is not iterable
You may access the internal __dict__ of a SQLAlchemy object, like the following:
for u in session.query(User).all():
print u.__dict__
As per #zzzeek in comments:
note that this is the correct answer for modern versions of
SQLAlchemy, assuming "row" is a core row object, not an ORM-mapped
instance.
for row in resultproxy:
row_as_dict = row._mapping # SQLAlchemy 1.4 and greater
# row_as_dict = dict(row) # SQLAlchemy 1.3 and earlier
background on row._mapping, new as of SQLAlchemy 1.4: https://docs.sqlalchemy.org/en/stable/core/connections.html#sqlalchemy.engine.Row._mapping
I couldn't get a good answer so I use this:
def row2dict(row):
d = {}
for column in row.__table__.columns:
d[column.name] = str(getattr(row, column.name))
return d
Edit: if above function is too long and not suited for some tastes here is a one liner (python 2.7+)
row2dict = lambda r: {c.name: str(getattr(r, c.name)) for c in r.__table__.columns}
In SQLAlchemy v0.8 and newer, use the inspection system.
from sqlalchemy import inspect
def object_as_dict(obj):
return {c.key: getattr(obj, c.key)
for c in inspect(obj).mapper.column_attrs}
user = session.query(User).first()
d = object_as_dict(user)
Note that .key is the attribute name, which can be different from the column name, e.g. in the following case:
class_ = Column('class', Text)
This method also works for column_property.
rows have an _asdict() function which gives a dict
In [8]: r1 = db.session.query(Topic.name).first()
In [9]: r1
Out[9]: (u'blah')
In [10]: r1.name
Out[10]: u'blah'
In [11]: r1._asdict()
Out[11]: {'name': u'blah'}
Assuming the following functions will be added to the class User the following will return all key-value pairs of all columns:
def columns_to_dict(self):
dict_ = {}
for key in self.__mapper__.c.keys():
dict_[key] = getattr(self, key)
return dict_
unlike the other answers all but only those attributes of the object are returned which are Column attributes at class level of the object. Therefore no _sa_instance_state or any other attribute SQLalchemy or you add to the object are included. Reference
EDIT: Forget to say, that this also works on inherited Columns.
hybrid_property extention
If you also want to include hybrid_property attributes the following will work:
from sqlalchemy import inspect
from sqlalchemy.ext.hybrid import hybrid_property
def publics_to_dict(self) -> {}:
dict_ = {}
for key in self.__mapper__.c.keys():
if not key.startswith('_'):
dict_[key] = getattr(self, key)
for key, prop in inspect(self.__class__).all_orm_descriptors.items():
if isinstance(prop, hybrid_property):
dict_[key] = getattr(self, key)
return dict_
I assume here that you mark Columns with an beginning _ to indicate that you want to hide them, either because you access the attribute by an hybrid_property or you simply do not want to show them. Reference
Tipp all_orm_descriptors also returns hybrid_method and AssociationProxy if you also want to include them.
Remarks to other answers
Every answer (like 1, 2 ) which based on the __dict__ attribute simply returns all attributes of the object. This could be much more attributes then you want. Like I sad this includes _sa_instance_state or any other attribute you define on this object.
Every answer (like 1, 2 ) which is based on the dict() function only works on SQLalchemy row objects returned by session.execute() not on the classes you define to work with, like the class User from the question.
The solving answer which is based on row.__table__.columns will definitely not work. row.__table__.columns contains the column names of the SQL Database. These can only be equal to the attributes name of the python object. If not you get an AttributeError.
For answers (like 1, 2 ) based on class_mapper(obj.__class__).mapped_table.c it is the same.
as #balki mentioned:
The _asdict() method can be used if you're querying a specific field because it is returned as a KeyedTuple.
In [1]: foo = db.session.query(Topic.name).first()
In [2]: foo._asdict()
Out[2]: {'name': u'blah'}
Whereas, if you do not specify a column you can use one of the other proposed methods - such as the one provided by #charlax. Note that this method is only valid for 2.7+.
In [1]: foo = db.session.query(Topic).first()
In [2]: {x.name: getattr(foo, x.name) for x in foo.__table__.columns}
Out[2]: {'name': u'blah'}
Old question, but since this the first result for "sqlalchemy row to dict" in Google it deserves a better answer.
The RowProxy object that SqlAlchemy returns has the items() method:
http://docs.sqlalchemy.org/en/latest/core/connections.html#sqlalchemy.engine.RowProxy.items
It simply returns a list of (key, value) tuples. So one can convert a row to dict using the following:
In Python <= 2.6:
rows = conn.execute(query)
list_of_dicts = [dict((key, value) for key, value in row.items()) for row in rows]
In Python >= 2.7:
rows = conn.execute(query)
list_of_dicts = [{key: value for (key, value) in row.items()} for row in rows]
A very simple solution: row._asdict().
sqlalchemy.engine.Row._asdict() (v1.4)
sqlalchemy.util.KeyedTuple._asdict() (v1.3)
> data = session.query(Table).all()
> [row._asdict() for row in data]
with sqlalchemy 1.4
session.execute(select(User.id, User.username)).mappings().all()
>> [{'id': 1, 'username': 'Bob'}, {'id': 2, 'username': 'Alice'}]
Following #balki answer, since SQLAlchemy 0.8 you can use _asdict(), available for KeyedTuple objects. This renders a pretty straightforward answer to the original question. Just, change in your example the last two lines (the for loop) for this one:
for u in session.query(User).all():
print u._asdict()
This works because in the above code u is an object of type class KeyedTuple, since .all() returns a list of KeyedTuple. Therefore it has the method _asdict(), which nicely returns u as a dictionary.
WRT the answer by #STB: AFAIK, anything that .all() returns is a list of KeypedTuple. Therefore, the above works either if you specify a column or not, as long as you are dealing with the result of .all() as applied to a Query object.
from sqlalchemy.orm import class_mapper
def asdict(obj):
return dict((col.name, getattr(obj, col.name))
for col in class_mapper(obj.__class__).mapped_table.c)
Refer to Alex Brasetvik's Answer, you can use one line of code to solve the problem
row_as_dict = [dict(row) for row in resultproxy]
Under the comment section of Alex Brasetvik's Answer, zzzeek the creator of SQLAlchemy stated this is the "Correct Method" for the problem.
I've found this post because I was looking for a way to convert a SQLAlchemy row into a dict. I'm using SqlSoup... but the answer was built by myself, so, if it could helps someone here's my two cents:
a = db.execute('select * from acquisizioni_motes')
b = a.fetchall()
c = b[0]
# and now, finally...
dict(zip(c.keys(), c.values()))
You could try to do it in this way.
for u in session.query(User).all():
print(u._asdict())
It use a built-in method in the query object that return a dictonary object of the query object.
references: https://docs.sqlalchemy.org/en/latest/orm/query.html
With python 3.8+, we can do this with dataclass, and the asdict method that comes with it:
from dataclasses import dataclass, asdict
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import Column, String, Integer, create_engine
Base = declarative_base()
engine = create_engine('sqlite:///:memory:', echo=False)
#dataclass
class User(Base):
__tablename__ = 'users'
id: int = Column(Integer, primary_key=True)
name: str = Column(String)
email = Column(String)
def __init__(self, name):
self.name = name
self.email = 'hello#example.com'
Base.metadata.create_all(engine)
SessionMaker = sessionmaker(bind=engine)
session = SessionMaker()
user1 = User("anurag")
session.add(user1)
session.commit()
query_result = session.query(User).one() # type: User
print(f'{query_result.id=:}, {query_result.name=:}, {query_result.email=:}')
# query_result.id=1, query_result.name=anurag, query_result.email=hello#example.com
query_result_dict = asdict(query_result)
print(query_result_dict)
# {'id': 1, 'name': 'anurag'}
The key is to use the #dataclass decorator, and annotate each column with its type (the : str part of the name: str = Column(String) line).
Also note that since the email is not annotated, it is not included in query_result_dict.
The expression you are iterating through evaluates to list of model objects, not rows. So the following is correct usage of them:
for u in session.query(User).all():
print u.id, u.name
Do you realy need to convert them to dicts? Sure, there is a lot of ways, but then you don't need ORM part of SQLAlchemy:
result = session.execute(User.__table__.select())
for row in result:
print dict(row)
Update: Take a look at sqlalchemy.orm.attributes module. It has a set of functions to work with object state, that might be useful for you, especially instance_dict().
I've just been dealing with this issue for a few minutes.
The answer marked as correct doesn't respect the type of the fields.
Solution comes from dictalchemy adding some interesting fetures.
https://pythonhosted.org/dictalchemy/
I've just tested it and works fine.
Base = declarative_base(cls=DictableModel)
session.query(User).asdict()
{'id': 1, 'username': 'Gerald'}
session.query(User).asdict(exclude=['id'])
{'username': 'Gerald'}
class User(object):
def to_dict(self):
return dict([(k, getattr(self, k)) for k in self.__dict__.keys() if not k.startswith("_")])
That should work.
You can convert sqlalchemy object to dictionary like this and return it as json/dictionary.
Helper functions:
import json
from collections import OrderedDict
def asdict(self):
result = OrderedDict()
for key in self.__mapper__.c.keys():
if getattr(self, key) is not None:
result[key] = str(getattr(self, key))
else:
result[key] = getattr(self, key)
return result
def to_array(all_vendors):
v = [ ven.asdict() for ven in all_vendors ]
return json.dumps(v)
Driver Function:
def all_products():
all_products = Products.query.all()
return to_array(all_products)
Two ways:
1.
for row in session.execute(session.query(User).statement):
print(dict(row))
2.
selected_columns = User.__table__.columns
rows = session.query(User).with_entities(*selected_columns).all()
for row in rows :
print(row._asdict())
Here is how Elixir does it. The value of this solution is that it allows recursively including the dictionary representation of relations.
def to_dict(self, deep={}, exclude=[]):
"""Generate a JSON-style nested dict/list structure from an object."""
col_prop_names = [p.key for p in self.mapper.iterate_properties \
if isinstance(p, ColumnProperty)]
data = dict([(name, getattr(self, name))
for name in col_prop_names if name not in exclude])
for rname, rdeep in deep.iteritems():
dbdata = getattr(self, rname)
#FIXME: use attribute names (ie coltoprop) instead of column names
fks = self.mapper.get_property(rname).remote_side
exclude = [c.name for c in fks]
if dbdata is None:
data[rname] = None
elif isinstance(dbdata, list):
data[rname] = [o.to_dict(rdeep, exclude) for o in dbdata]
else:
data[rname] = dbdata.to_dict(rdeep, exclude)
return data
With this code you can also to add to your query "filter" or "join" and this work!
query = session.query(User)
def query_to_dict(query):
def _create_dict(r):
return {c.get('name'): getattr(r, c.get('name')) for c in query.column_descriptions}
return [_create_dict(r) for r in query]
For the sake of everyone and myself, here is how I use it:
def run_sql(conn_String):
output_connection = engine.create_engine(conn_string, poolclass=NullPool).connect()
rows = output_connection.execute('select * from db1.t1').fetchall()
return [dict(row) for row in rows]
As OP stated, calling the dict initializer raises an exception with the message "User" object is not iterable. So the real question is how to make a SQLAlchemy Model iterable?
We'll have to implement the special methods __iter__ and __next__, but if we inherit directly from the declarative_base model, we would still run into the undesirable "_sa_instance_state" key. What's worse, is we would have to loop through __dict__.keys() for every call to __next__ because the keys() method returns a View -- an iterable that is not indexed. This would increase the time complexity by a factor of N, where N is the number of keys in __dict__. Generating the dict would cost O(N^2). We can do better.
We can implement our own Base class that implements the required special methods and stores a list of of the column names that can be accessed by index, reducing the time complexity of generating the dict to O(N). This has the added benefit that we can define the logic once and inherit from our Base class anytime we want our model class to be iterable.
class IterableBase(declarative_base()):
__abstract__ = True
def _init_keys(self):
self._keys = [c.name for c in self.__table__.columns]
self._dict = {c.name: getattr(self, c.name) for c in self.__table__.columns}
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._init_keys()
def __setattr__(self, name, value):
super().__setattr__(name, value)
if name not in ('_dict', '_keys', '_n') and '_dict' in self.__dict__:
self._dict[name] = value
def __iter__(self):
self._n = 0
return self
def __next__(self):
if self._n >= len(self._keys):
raise StopIteration
self._n += 1
key = self._keys[self._n-1]
return (key, self._dict[key])
Now the User class can inherit directly from our IterableBase class.
class User(IterableBase):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
name = Column(String)
You can confirm that calling the dict function with a User instance as an argument returns the desired dictionary, sans "_sa_instance_state". You may have noticed the __setattr__ method that was declared in the IterableBase class. This ensures the _dict is updated when attributes are mutated or set after initialization.
def main():
user1 = User('Bob')
print(dict(user1))
# outputs {'id': None, 'name': 'Bob'}
user1.id = 42
print(dict(user1))
# outputs {'id': 42, 'name': 'Bob'}
if __name__ == '__main__':
main()
After querying the database using following SQLAlchemy code:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
SQLALCHEMY_DATABASE_URL = 'sqlite:///./examples/sql_app.db'
engine = create_engine(SQLALCHEMY_DATABASE_URL, echo=True)
query = sqlalchemy.select(TABLE)
result = engine.execute(query).fetchall()
You can use this one-liner:
query_dict = [record._mapping for record in results]
I have a variation on Marco Mariani's answer, expressed as a decorator. The main difference is that it'll handle lists of entities, as well as safely ignoring some other types of return values (which is very useful when writing tests using mocks):
#decorator
def to_dict(f, *args, **kwargs):
result = f(*args, **kwargs)
if is_iterable(result) and not is_dict(result):
return map(asdict, result)
return asdict(result)
def asdict(obj):
return dict((col.name, getattr(obj, col.name))
for col in class_mapper(obj.__class__).mapped_table.c)
def is_dict(obj):
return isinstance(obj, dict)
def is_iterable(obj):
return True if getattr(obj, '__iter__', False) else False
To complete #Anurag Uniyal 's answer, here is a method that will recursively follow relationships:
from sqlalchemy.inspection import inspect
def to_dict(obj, with_relationships=True):
d = {}
for column in obj.__table__.columns:
if with_relationships and len(column.foreign_keys) > 0:
# Skip foreign keys
continue
d[column.name] = getattr(obj, column.name)
if with_relationships:
for relationship in inspect(type(obj)).relationships:
val = getattr(obj, relationship.key)
d[relationship.key] = to_dict(val) if val else None
return d
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
first_name = Column(TEXT)
address_id = Column(Integer, ForeignKey('addresses.id')
address = relationship('Address')
class Address(Base):
__tablename__ = 'addresses'
id = Column(Integer, primary_key=True)
city = Column(TEXT)
user = User(first_name='Nathan', address=Address(city='Lyon'))
# Add and commit user to session to create ids
to_dict(user)
# {'id': 1, 'first_name': 'Nathan', 'address': {'city': 'Lyon'}}
to_dict(user, with_relationship=False)
# {'id': 1, 'first_name': 'Nathan', 'address_id': 1}
We can get a list of object in dict:
def queryset_to_dict(query_result):
query_columns = query_result[0].keys()
res = [list(ele) for ele in query_result]
dict_list = [dict(zip(query_columns, l)) for l in res]
return dict_list
query_result = db.session.query(LanguageMaster).all()
dictvalue=queryset_to_dict(query_result)
from copy import copy
def to_record(row):
record = copy(row.__dict__)
del record["_sa_instance_state"]
return record
If not using copy, you might run into errors.