Related
Backstory
I have a questionnaire that asks sensitive questions most of which are true/false. The majority of the time the values are false which poses a challenge when keeping the data private at rest. When encrypting each question into a separate column, it is really easy to tell which value is true and which is false with a bit of guessing. To combat this, the questions and answers are put into a dictionary object with some salt (nonsense that changes randomly) then encrypted. Making it impossible without the key to know what the answers were.
Method
Below is an example of the model used to encrypt the data with salt at rest making it impossible to look at the data and know the contents.
import sqlalchemy as sa
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy_utils.types import JSONType
from sqlalchemy_utils.types.encrypted.encrypted_type import StringEncryptedType, AesEngine
Base = declarative_base()
class SensitiveQuestionnaire(Base):
user_id = sa.Column(sa.Integer, primary_key=True, autoincrement=True)
_data = data: dict = sa.Column(StringEncryptedType(JSONType, 'secret', AesEngine, 'pkcs5'),
nullable=False, default=lambda: {'_salt': salt_shaker()})
# values are viewed using a python property to look into the `_data` dict
#property
def sensitive_question(self) -> Optional[float]:
return self._data.get('sensitive_question')
# values are set into the `_data` dict
#sensitive_question.setter
def sensitive_question(self, value: bool) -> None:
self._data['sensitive_question'] = value
# in a real example there would be 20+ properties that map to questions
def __init__(self, **kwargs):
# Sqlalchemy does not use the __init__ method so we are free to set object defaults here
self._data = {'_salt': salt_shaker()}
for key in kwargs:
setattr(self, key, kwargs[key])
#property
def _salt(self) -> str:
return self._data['_salt']
def salt_shaker():
return ''.join([random.choice('hldjs..' for i in range(50)])
The Problem
After the SensitiveQuestionnaire object is initialized none of the changes are persisted in the database.
# GIVEN a questionnaire
questionnaire = model.SensitiveQuestionnaire(user_id=1)
db.session.add()
db.session.commit()
# WHEN updating the questionnaire and saving it to the database
questionnaire.sensitive_question= True
db.session.commit()
# THEN we get the questionnaire from the database
db_questionnaire = model.SensitiveQuestionnaire.query\
.filter(model.SensitiveQuestionnaire.user_id == 1).first()
# THEN the sensitive_question value is persisted
assert db_questionnaire.sensitive_question is True
Value from the db_questionnaire.sensitive_question is None when it should be True.
After spending the better part of the day to figure this out, the cause of the issue is how Sqlalchemy knows when there is a change. The short version is sqlalchemy uses python's __setitem__ to hook in sqlalchemy's change() method letting it know there was a change. More info can be found in sqlalchemy's docs.
The answer is to wrap the StringEncryptedType in a MultableDict Type
Mutation Tracking
Provide support for tracking of in-place changes to scalar values, which are propagated into ORM change events on owning parent objects.
From SqlAlchemy's docs: https://docs.sqlalchemy.org/en/13/orm/extensions/mutable.html
Solution
Condensed version... wrapping the StringEncryptedType in a MutableDict
_data = data: dict = sa.Column(
MutableDict.as_mutable(StringEncryptedType(JSONType, 'secret', AesEngine, 'pkcs5')),
nullable=False, default=lambda: {'_salt': salt_shaker()})
Full version from the question above
import sqlalchemy as sa
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.mutable import MutableDict
from sqlalchemy_utils.types import JSONType
from sqlalchemy_utils.types.encrypted.encrypted_type import StringEncryptedType, AesEngine
Base = declarative_base()
class SensitiveQuestionnaire(Base):
user_id: int = sa.Column(sa.Integer, primary_key=True, autoincrement=True)
# The MutableDict.as_mutable below is what changed!
_data = data: dict = sa.Column(
MutableDict.as_mutable(StringEncryptedType(JSONType, 'secret', AesEngine, 'pkcs5')),
nullable=False, default=lambda: {'_salt': salt_shaker()})
#property
def sensitive_question(self) -> Optional[float]:
return self._data.get('sensitive_question')
# values are set into the `_data` dict
#sensitive_question.setter
def sensitive_question(self, value: bool) -> None:
self._data['sensitive_question'] = value
# in a real example there would be 20+ properties that map to questions
def __init__(self, **kwargs):
self._data = {'_salt': salt_shaker()}
for key in kwargs:
setattr(self, key, kwargs[key])
#property
def _salt(self) -> str:
return self._data['_salt']
def salt_shaker():
return ''.join([random.choice('hldjs..' for i in range(50)])
I have the following relationship set up in a model:
role_profiles = Table('roleprofile', Base.metadata,
Column('role_id', Integer, ForeignKey('role.id')),
Column('profile_id', Integer, ForeignKey('profile.id'))
)
class profile(Base):
__tablename__ = 'profile'
# Columns...
roles = relationship('role', secondary=role_profiles, backref='profiles')
class role(Base):
__tablename__ = 'role'
# Columns...
So as I now understand that it works is that the roles property on the profile object will contain a list of role classes (which it does).
What I want to do is to serialize for each property of the model class generically. It works fine for the top class profile and I determine that there is a list of roles that I should recurse into:
# I need a statement here to check if the field.value is a backref
#if field.value is backref:
# continue
if isinstance(field.value, list):
# Get the json for the list
value = serialize.serialize_to_json(field.value)
else:
# Get the json for the value
value = cls._serialize(field.value)
The problem is that the backref of the relationship adds a pointer back to the profile. The same profile is then serialized and it recurse the roles over and over again until stack overflow.
Is there a way to determine that the property is a backref added by the relationship?
Update
Maybe I should add that it works fine in this case if I remove the backref since I don't need it but I would like to keep it in.
Update
As a temporary fix I added a class property to my base class:
class BaseModelMixin(object):
"""Base mixin for models using stamped data"""
__backref__ = None
and add it like this:
class role(Base):
__tablename__ = 'role'
__backref__ = ('profiles', )
# Columns...
and use it like this in my recursion:
if self.__backref__ and property_name in self.__backref__:
continue
If there is a better way please let me know because this doesn't look optimal.
Not sure if this is the best practice, but this code works for me. It returns True if the attribute is a reference, False if a regular column type.
def is_relation(orm_object, attr_name):
return hasattr(getattr(orm_object.__class__, attr_name).property, 'mapper')
You can create a __relationships__ in your class BaseModelMixin as a #property, which has a list of all relationships name which are not as a backref name in a model.
class BaseModelMixin(object):
"""Base mixin for models using stamped data"""
#property
def __relationships__(self):
"""
Return a list of relationships name which are not as a backref
name in model
"""
back_ref_relationships = list()
items = self.__mapper__.relationships.items()
for (key, value) in items:
if isinstance(value.backref, tuple):
back_ref_relationships.append(key)
return back_ref_relationships
As you have two class profile and role, so
>>> p = profile()
>>> p.__relationships__
# ['roles']
>>> r = role()
>>> r.__relationships__
# []
have a look at inspect
e.g.
from sqlalchemy import inspect
mapper = inspect(MyModelClass)
# dir(mapper)
# mapper.relationships.keys()
I've been trying to figure out how to iterate over the list of columns defined in a SQLAlchemy model. I want it for writing some serialization and copy methods to a couple of models. I can't just iterate over the obj.__dict__ since it contains a lot of SA specific items.
Anyone know of a way to just get the id and desc names from the following?
class JobStatus(Base):
__tablename__ = 'jobstatus'
id = Column(Integer, primary_key=True)
desc = Column(Unicode(20))
In this small case I could easily create a:
def logme(self):
return {'id': self.id, 'desc': self.desc}
but I'd prefer something that auto-generates the dict (for larger objects).
You could use the following function:
def __unicode__(self):
return "[%s(%s)]" % (self.__class__.__name__, ', '.join('%s=%s' % (k, self.__dict__[k]) for k in sorted(self.__dict__) if '_sa_' != k[:4]))
It will exclude SA magic attributes, but will not exclude the relations. So basically it might load the dependencies, parents, children etc, which is definitely not desirable.
But it is actually much easier because if you inherit from Base, you have a __table__ attribute, so that you can do:
for c in JobStatus.__table__.columns:
print c
for c in JobStatus.__table__.foreign_keys:
print c
See How to discover table properties from SQLAlchemy mapped object - similar question.
Edit by Mike: Please see functions such as Mapper.c and Mapper.mapped_table. If using 0.8 and higher also see Mapper.attrs and related functions.
Example for Mapper.attrs:
from sqlalchemy import inspect
mapper = inspect(JobStatus)
for column in mapper.attrs:
print column.key
You can get the list of defined properties from the mapper. For your case you're interested in only ColumnProperty objects.
from sqlalchemy.orm import class_mapper
import sqlalchemy
def attribute_names(cls):
return [prop.key for prop in class_mapper(cls).iterate_properties
if isinstance(prop, sqlalchemy.orm.ColumnProperty)]
I realise that this is an old question, but I've just come across the same requirement and would like to offer an alternative solution to future readers.
As Josh notes, full SQL field names will be returned by JobStatus.__table__.columns, so rather than the original field name id, you will get jobstatus.id. Not as useful as it could be.
The solution to obtaining a list of field names as they were originally defined is to look the _data attribute on the column object, which contains the full data. If we look at JobStatus.__table__.columns._data, it looks like this:
{'desc': Column('desc', Unicode(length=20), table=<jobstatus>),
'id': Column('id', Integer(), table=<jobstatus>, primary_key=True, nullable=False)}
From here you can simply call JobStatus.__table__.columns._data.keys() which gives you a nice, clean list:
['id', 'desc']
Assuming you're using SQLAlchemy's declarative mapping, you can use __mapper__ attribute to get at the class mapper. To get all mapped attributes (including relationships):
obj.__mapper__.attrs.keys()
If you want strictly column names, use obj.__mapper__.column_attrs.keys(). See the documentation for other views.
https://docs.sqlalchemy.org/en/latest/orm/mapping_api.html#sqlalchemy.orm.mapper.Mapper.attrs
self.__table__.columns will "only" give you the columns defined in that particular class, i.e. without inherited ones. if you need all, use self.__mapper__.columns. in your example i'd probably use something like this:
class JobStatus(Base):
...
def __iter__(self):
values = vars(self)
for attr in self.__mapper__.columns.keys():
if attr in values:
yield attr, values[attr]
def logme(self):
return dict(self)
To get an as_dict method on all of my classes I used a Mixin class which uses the technics described by Ants Aasma.
class BaseMixin(object):
def as_dict(self):
result = {}
for prop in class_mapper(self.__class__).iterate_properties:
if isinstance(prop, ColumnProperty):
result[prop.key] = getattr(self, prop.key)
return result
And then use it like this in your classes
class MyClass(BaseMixin, Base):
pass
That way you can invoke the following on an instance of MyClass.
> myclass = MyClass()
> myclass.as_dict()
Hope this helps.
I've played arround with this a bit further, I actually needed to render my instances as dict as the form of a HAL object with it's links to related objects. So I've added this little magic down here, which will crawl over all properties of the class same as the above, with the difference that I will crawl deeper into Relaionship properties and generate links for these automatically.
Please note that this will only work for relationships have a single primary key
from sqlalchemy.orm import class_mapper, ColumnProperty
from functools import reduce
def deepgetattr(obj, attr):
"""Recurses through an attribute chain to get the ultimate value."""
return reduce(getattr, attr.split('.'), obj)
class BaseMixin(object):
def as_dict(self):
IgnoreInstrumented = (
InstrumentedList, InstrumentedDict, InstrumentedSet
)
result = {}
for prop in class_mapper(self.__class__).iterate_properties:
if isinstance(getattr(self, prop.key), IgnoreInstrumented):
# All reverse relations are assigned to each related instances
# we don't need to link these, so we skip
continue
if isinstance(prop, ColumnProperty):
# Add simple property to the dictionary with its value
result[prop.key] = getattr(self, prop.key)
if isinstance(prop, RelationshipProperty):
# Construct links relaions
if 'links' not in result:
result['links'] = {}
# Get value using nested class keys
value = (
deepgetattr(
self, prop.key + "." + prop.mapper.primary_key[0].key
)
)
result['links'][prop.key] = {}
result['links'][prop.key]['href'] = (
"/{}/{}".format(prop.key, value)
)
return result
self.__dict__
returns a dict where keys are attribute names and values the values of the object.
/!\ there is a supplementary attribute: '_sa_instance_state'
but you can handle it :)
While row._asdict() worked for most of the cases, I needed some approach that also works after object creation process (db.session.add etc.). The idea is to create a method to_dict accessing columns on the table object and use standard getattr.
class Inventory(db.Model):
__tablename__ = 'inventory'
id = db.Column('id', db.Integer(), primary_key=True)
date = db.Column('date', db.DateTime, nullable=False, default=datetime.utcnow)
item = db.Column('item', db.String(100))
def to_dict(self):
return {
column.name: getattr(self, column.name, None)
for column in Inventory.__table__.columns
}
record = Inventory(item="gloves")
db.session.add(record)
db.session.commit()
# print(record._asdict()) # << that doesn't work
print(record.to_dict()) # << that works as intended
This solution will produce dict with columns only - no meta attributes or anything that you will have to manually clean after the next major update (if any).
PS. I use flask-sqlalchemy but it does not change the idea
I want to get the data of a particular instance of Model dynamically. I used this code.
def to_json(instance):
# get columns data
data = {}
columns = list(instance.__table__.columns)
for column in columns:
data[column.name] = instance.__dict__[column.name]
return data
To map a model from sqlalchemy to a json, taking into account relationships, I use this code
from sqlalchemy.orm import class_mapper
from sqlalchemy.ext.declarative import DeclarativeMeta
from sqlalchemy.orm import ColumnProperty
from sqlalchemy.orm import RelationshipProperty
class BaseMixin(object):
"""BaseMixin"""
__repr_hide = ["created_at", "updated_at"]
__insert_hide = []
#property
def _repr_hide(self):
return self.__repr_hide
#_repr_hide.setter
def _repr_hide(self, k):
self.__repr_hide.append(k)
#property
def _insert_hide(self):
return self.__insert_hide
#_insert_hide.setter
def _insert_hide(self, k):
self.__insert_hide.append(k)
def serialize(self, obj):
"""serialize from json"""
for k, v in obj.items():
if k in self.__repr_hide:
continue
if k in self.__insert_hide:
continue
if k in self.__table__.c.keys():
setattr(self, k, v)
return self
def deserialize(self, backref=None):
"""deserialize to json"""
res = dict()
for prop in class_mapper(self.__class__).iterate_properties:
if prop.key in self.__repr_hide:
continue
if isinstance(prop, ColumnProperty):
res[prop.key] = getattr(self, prop.key)
for prop in class_mapper(self.__class__).iterate_properties:
if prop.key in self.__repr_hide:
continue
if isinstance(prop, RelationshipProperty):
if prop.key == str(backref):
continue
key, value = prop.key, getattr(self, prop.key)
if value is None:
res[key] = None
elif isinstance(value.__class__, DeclarativeMeta):
res[key] = value.deserialize(backref=self.__table__)
else:
res[key] = [i.deserialize(backref=self.__table__) for i in value]
return res
def __iter__(self):
return iter(self.deserialize().items())
def __repr__(self):
vals = ", ".join(
"%s=%r" % (n, getattr(self, n))
for n in self.__table__.c.keys()
if n not in self._repr_hide
)
return "<%s={%s}>" % (self.__class__.__name__, vals)
I know this is an old question, but what about:
class JobStatus(Base):
...
def columns(self):
return [col for col in dir(self) if isinstance(col, db.Column)]
Then, to get column names: jobStatus.columns()
That would return ['id', 'desc']
Then you can loop through, and do stuff with the columns and values:
for col in jobStatus.colums():
doStuff(getattr(jobStatus, col))
Is there a simple way to iterate over column name and value pairs?
My version of SQLAlchemy is 0.5.6
Here is the sample code where I tried using dict(row):
import sqlalchemy
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
print "sqlalchemy version:",sqlalchemy.__version__
engine = create_engine('sqlite:///:memory:', echo=False)
metadata = MetaData()
users_table = Table('users', metadata,
Column('id', Integer, primary_key=True),
Column('name', String),
)
metadata.create_all(engine)
class User(declarative_base()):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
name = Column(String)
def __init__(self, name):
self.name = name
Session = sessionmaker(bind=engine)
session = Session()
user1 = User("anurag")
session.add(user1)
session.commit()
# uncommenting next line throws exception 'TypeError: 'User' object is not iterable'
#print dict(user1)
# this one also throws 'TypeError: 'User' object is not iterable'
for u in session.query(User).all():
print dict(u)
Running this code on my system outputs:
Traceback (most recent call last):
File "untitled-1.py", line 37, in <module>
print dict(u)
TypeError: 'User' object is not iterable
You may access the internal __dict__ of a SQLAlchemy object, like the following:
for u in session.query(User).all():
print u.__dict__
As per #zzzeek in comments:
note that this is the correct answer for modern versions of
SQLAlchemy, assuming "row" is a core row object, not an ORM-mapped
instance.
for row in resultproxy:
row_as_dict = row._mapping # SQLAlchemy 1.4 and greater
# row_as_dict = dict(row) # SQLAlchemy 1.3 and earlier
background on row._mapping, new as of SQLAlchemy 1.4: https://docs.sqlalchemy.org/en/stable/core/connections.html#sqlalchemy.engine.Row._mapping
I couldn't get a good answer so I use this:
def row2dict(row):
d = {}
for column in row.__table__.columns:
d[column.name] = str(getattr(row, column.name))
return d
Edit: if above function is too long and not suited for some tastes here is a one liner (python 2.7+)
row2dict = lambda r: {c.name: str(getattr(r, c.name)) for c in r.__table__.columns}
In SQLAlchemy v0.8 and newer, use the inspection system.
from sqlalchemy import inspect
def object_as_dict(obj):
return {c.key: getattr(obj, c.key)
for c in inspect(obj).mapper.column_attrs}
user = session.query(User).first()
d = object_as_dict(user)
Note that .key is the attribute name, which can be different from the column name, e.g. in the following case:
class_ = Column('class', Text)
This method also works for column_property.
rows have an _asdict() function which gives a dict
In [8]: r1 = db.session.query(Topic.name).first()
In [9]: r1
Out[9]: (u'blah')
In [10]: r1.name
Out[10]: u'blah'
In [11]: r1._asdict()
Out[11]: {'name': u'blah'}
Assuming the following functions will be added to the class User the following will return all key-value pairs of all columns:
def columns_to_dict(self):
dict_ = {}
for key in self.__mapper__.c.keys():
dict_[key] = getattr(self, key)
return dict_
unlike the other answers all but only those attributes of the object are returned which are Column attributes at class level of the object. Therefore no _sa_instance_state or any other attribute SQLalchemy or you add to the object are included. Reference
EDIT: Forget to say, that this also works on inherited Columns.
hybrid_property extention
If you also want to include hybrid_property attributes the following will work:
from sqlalchemy import inspect
from sqlalchemy.ext.hybrid import hybrid_property
def publics_to_dict(self) -> {}:
dict_ = {}
for key in self.__mapper__.c.keys():
if not key.startswith('_'):
dict_[key] = getattr(self, key)
for key, prop in inspect(self.__class__).all_orm_descriptors.items():
if isinstance(prop, hybrid_property):
dict_[key] = getattr(self, key)
return dict_
I assume here that you mark Columns with an beginning _ to indicate that you want to hide them, either because you access the attribute by an hybrid_property or you simply do not want to show them. Reference
Tipp all_orm_descriptors also returns hybrid_method and AssociationProxy if you also want to include them.
Remarks to other answers
Every answer (like 1, 2 ) which based on the __dict__ attribute simply returns all attributes of the object. This could be much more attributes then you want. Like I sad this includes _sa_instance_state or any other attribute you define on this object.
Every answer (like 1, 2 ) which is based on the dict() function only works on SQLalchemy row objects returned by session.execute() not on the classes you define to work with, like the class User from the question.
The solving answer which is based on row.__table__.columns will definitely not work. row.__table__.columns contains the column names of the SQL Database. These can only be equal to the attributes name of the python object. If not you get an AttributeError.
For answers (like 1, 2 ) based on class_mapper(obj.__class__).mapped_table.c it is the same.
as #balki mentioned:
The _asdict() method can be used if you're querying a specific field because it is returned as a KeyedTuple.
In [1]: foo = db.session.query(Topic.name).first()
In [2]: foo._asdict()
Out[2]: {'name': u'blah'}
Whereas, if you do not specify a column you can use one of the other proposed methods - such as the one provided by #charlax. Note that this method is only valid for 2.7+.
In [1]: foo = db.session.query(Topic).first()
In [2]: {x.name: getattr(foo, x.name) for x in foo.__table__.columns}
Out[2]: {'name': u'blah'}
Old question, but since this the first result for "sqlalchemy row to dict" in Google it deserves a better answer.
The RowProxy object that SqlAlchemy returns has the items() method:
http://docs.sqlalchemy.org/en/latest/core/connections.html#sqlalchemy.engine.RowProxy.items
It simply returns a list of (key, value) tuples. So one can convert a row to dict using the following:
In Python <= 2.6:
rows = conn.execute(query)
list_of_dicts = [dict((key, value) for key, value in row.items()) for row in rows]
In Python >= 2.7:
rows = conn.execute(query)
list_of_dicts = [{key: value for (key, value) in row.items()} for row in rows]
A very simple solution: row._asdict().
sqlalchemy.engine.Row._asdict() (v1.4)
sqlalchemy.util.KeyedTuple._asdict() (v1.3)
> data = session.query(Table).all()
> [row._asdict() for row in data]
with sqlalchemy 1.4
session.execute(select(User.id, User.username)).mappings().all()
>> [{'id': 1, 'username': 'Bob'}, {'id': 2, 'username': 'Alice'}]
Following #balki answer, since SQLAlchemy 0.8 you can use _asdict(), available for KeyedTuple objects. This renders a pretty straightforward answer to the original question. Just, change in your example the last two lines (the for loop) for this one:
for u in session.query(User).all():
print u._asdict()
This works because in the above code u is an object of type class KeyedTuple, since .all() returns a list of KeyedTuple. Therefore it has the method _asdict(), which nicely returns u as a dictionary.
WRT the answer by #STB: AFAIK, anything that .all() returns is a list of KeypedTuple. Therefore, the above works either if you specify a column or not, as long as you are dealing with the result of .all() as applied to a Query object.
from sqlalchemy.orm import class_mapper
def asdict(obj):
return dict((col.name, getattr(obj, col.name))
for col in class_mapper(obj.__class__).mapped_table.c)
Refer to Alex Brasetvik's Answer, you can use one line of code to solve the problem
row_as_dict = [dict(row) for row in resultproxy]
Under the comment section of Alex Brasetvik's Answer, zzzeek the creator of SQLAlchemy stated this is the "Correct Method" for the problem.
I've found this post because I was looking for a way to convert a SQLAlchemy row into a dict. I'm using SqlSoup... but the answer was built by myself, so, if it could helps someone here's my two cents:
a = db.execute('select * from acquisizioni_motes')
b = a.fetchall()
c = b[0]
# and now, finally...
dict(zip(c.keys(), c.values()))
You could try to do it in this way.
for u in session.query(User).all():
print(u._asdict())
It use a built-in method in the query object that return a dictonary object of the query object.
references: https://docs.sqlalchemy.org/en/latest/orm/query.html
With python 3.8+, we can do this with dataclass, and the asdict method that comes with it:
from dataclasses import dataclass, asdict
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import Column, String, Integer, create_engine
Base = declarative_base()
engine = create_engine('sqlite:///:memory:', echo=False)
#dataclass
class User(Base):
__tablename__ = 'users'
id: int = Column(Integer, primary_key=True)
name: str = Column(String)
email = Column(String)
def __init__(self, name):
self.name = name
self.email = 'hello#example.com'
Base.metadata.create_all(engine)
SessionMaker = sessionmaker(bind=engine)
session = SessionMaker()
user1 = User("anurag")
session.add(user1)
session.commit()
query_result = session.query(User).one() # type: User
print(f'{query_result.id=:}, {query_result.name=:}, {query_result.email=:}')
# query_result.id=1, query_result.name=anurag, query_result.email=hello#example.com
query_result_dict = asdict(query_result)
print(query_result_dict)
# {'id': 1, 'name': 'anurag'}
The key is to use the #dataclass decorator, and annotate each column with its type (the : str part of the name: str = Column(String) line).
Also note that since the email is not annotated, it is not included in query_result_dict.
The expression you are iterating through evaluates to list of model objects, not rows. So the following is correct usage of them:
for u in session.query(User).all():
print u.id, u.name
Do you realy need to convert them to dicts? Sure, there is a lot of ways, but then you don't need ORM part of SQLAlchemy:
result = session.execute(User.__table__.select())
for row in result:
print dict(row)
Update: Take a look at sqlalchemy.orm.attributes module. It has a set of functions to work with object state, that might be useful for you, especially instance_dict().
I've just been dealing with this issue for a few minutes.
The answer marked as correct doesn't respect the type of the fields.
Solution comes from dictalchemy adding some interesting fetures.
https://pythonhosted.org/dictalchemy/
I've just tested it and works fine.
Base = declarative_base(cls=DictableModel)
session.query(User).asdict()
{'id': 1, 'username': 'Gerald'}
session.query(User).asdict(exclude=['id'])
{'username': 'Gerald'}
class User(object):
def to_dict(self):
return dict([(k, getattr(self, k)) for k in self.__dict__.keys() if not k.startswith("_")])
That should work.
You can convert sqlalchemy object to dictionary like this and return it as json/dictionary.
Helper functions:
import json
from collections import OrderedDict
def asdict(self):
result = OrderedDict()
for key in self.__mapper__.c.keys():
if getattr(self, key) is not None:
result[key] = str(getattr(self, key))
else:
result[key] = getattr(self, key)
return result
def to_array(all_vendors):
v = [ ven.asdict() for ven in all_vendors ]
return json.dumps(v)
Driver Function:
def all_products():
all_products = Products.query.all()
return to_array(all_products)
Two ways:
1.
for row in session.execute(session.query(User).statement):
print(dict(row))
2.
selected_columns = User.__table__.columns
rows = session.query(User).with_entities(*selected_columns).all()
for row in rows :
print(row._asdict())
Here is how Elixir does it. The value of this solution is that it allows recursively including the dictionary representation of relations.
def to_dict(self, deep={}, exclude=[]):
"""Generate a JSON-style nested dict/list structure from an object."""
col_prop_names = [p.key for p in self.mapper.iterate_properties \
if isinstance(p, ColumnProperty)]
data = dict([(name, getattr(self, name))
for name in col_prop_names if name not in exclude])
for rname, rdeep in deep.iteritems():
dbdata = getattr(self, rname)
#FIXME: use attribute names (ie coltoprop) instead of column names
fks = self.mapper.get_property(rname).remote_side
exclude = [c.name for c in fks]
if dbdata is None:
data[rname] = None
elif isinstance(dbdata, list):
data[rname] = [o.to_dict(rdeep, exclude) for o in dbdata]
else:
data[rname] = dbdata.to_dict(rdeep, exclude)
return data
With this code you can also to add to your query "filter" or "join" and this work!
query = session.query(User)
def query_to_dict(query):
def _create_dict(r):
return {c.get('name'): getattr(r, c.get('name')) for c in query.column_descriptions}
return [_create_dict(r) for r in query]
For the sake of everyone and myself, here is how I use it:
def run_sql(conn_String):
output_connection = engine.create_engine(conn_string, poolclass=NullPool).connect()
rows = output_connection.execute('select * from db1.t1').fetchall()
return [dict(row) for row in rows]
As OP stated, calling the dict initializer raises an exception with the message "User" object is not iterable. So the real question is how to make a SQLAlchemy Model iterable?
We'll have to implement the special methods __iter__ and __next__, but if we inherit directly from the declarative_base model, we would still run into the undesirable "_sa_instance_state" key. What's worse, is we would have to loop through __dict__.keys() for every call to __next__ because the keys() method returns a View -- an iterable that is not indexed. This would increase the time complexity by a factor of N, where N is the number of keys in __dict__. Generating the dict would cost O(N^2). We can do better.
We can implement our own Base class that implements the required special methods and stores a list of of the column names that can be accessed by index, reducing the time complexity of generating the dict to O(N). This has the added benefit that we can define the logic once and inherit from our Base class anytime we want our model class to be iterable.
class IterableBase(declarative_base()):
__abstract__ = True
def _init_keys(self):
self._keys = [c.name for c in self.__table__.columns]
self._dict = {c.name: getattr(self, c.name) for c in self.__table__.columns}
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._init_keys()
def __setattr__(self, name, value):
super().__setattr__(name, value)
if name not in ('_dict', '_keys', '_n') and '_dict' in self.__dict__:
self._dict[name] = value
def __iter__(self):
self._n = 0
return self
def __next__(self):
if self._n >= len(self._keys):
raise StopIteration
self._n += 1
key = self._keys[self._n-1]
return (key, self._dict[key])
Now the User class can inherit directly from our IterableBase class.
class User(IterableBase):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
name = Column(String)
You can confirm that calling the dict function with a User instance as an argument returns the desired dictionary, sans "_sa_instance_state". You may have noticed the __setattr__ method that was declared in the IterableBase class. This ensures the _dict is updated when attributes are mutated or set after initialization.
def main():
user1 = User('Bob')
print(dict(user1))
# outputs {'id': None, 'name': 'Bob'}
user1.id = 42
print(dict(user1))
# outputs {'id': 42, 'name': 'Bob'}
if __name__ == '__main__':
main()
After querying the database using following SQLAlchemy code:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
SQLALCHEMY_DATABASE_URL = 'sqlite:///./examples/sql_app.db'
engine = create_engine(SQLALCHEMY_DATABASE_URL, echo=True)
query = sqlalchemy.select(TABLE)
result = engine.execute(query).fetchall()
You can use this one-liner:
query_dict = [record._mapping for record in results]
I have a variation on Marco Mariani's answer, expressed as a decorator. The main difference is that it'll handle lists of entities, as well as safely ignoring some other types of return values (which is very useful when writing tests using mocks):
#decorator
def to_dict(f, *args, **kwargs):
result = f(*args, **kwargs)
if is_iterable(result) and not is_dict(result):
return map(asdict, result)
return asdict(result)
def asdict(obj):
return dict((col.name, getattr(obj, col.name))
for col in class_mapper(obj.__class__).mapped_table.c)
def is_dict(obj):
return isinstance(obj, dict)
def is_iterable(obj):
return True if getattr(obj, '__iter__', False) else False
To complete #Anurag Uniyal 's answer, here is a method that will recursively follow relationships:
from sqlalchemy.inspection import inspect
def to_dict(obj, with_relationships=True):
d = {}
for column in obj.__table__.columns:
if with_relationships and len(column.foreign_keys) > 0:
# Skip foreign keys
continue
d[column.name] = getattr(obj, column.name)
if with_relationships:
for relationship in inspect(type(obj)).relationships:
val = getattr(obj, relationship.key)
d[relationship.key] = to_dict(val) if val else None
return d
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
first_name = Column(TEXT)
address_id = Column(Integer, ForeignKey('addresses.id')
address = relationship('Address')
class Address(Base):
__tablename__ = 'addresses'
id = Column(Integer, primary_key=True)
city = Column(TEXT)
user = User(first_name='Nathan', address=Address(city='Lyon'))
# Add and commit user to session to create ids
to_dict(user)
# {'id': 1, 'first_name': 'Nathan', 'address': {'city': 'Lyon'}}
to_dict(user, with_relationship=False)
# {'id': 1, 'first_name': 'Nathan', 'address_id': 1}
We can get a list of object in dict:
def queryset_to_dict(query_result):
query_columns = query_result[0].keys()
res = [list(ele) for ele in query_result]
dict_list = [dict(zip(query_columns, l)) for l in res]
return dict_list
query_result = db.session.query(LanguageMaster).all()
dictvalue=queryset_to_dict(query_result)
from copy import copy
def to_record(row):
record = copy(row.__dict__)
del record["_sa_instance_state"]
return record
If not using copy, you might run into errors.
I want to create a new type of field for django models that is basically a ListOfStrings. So in your model code you would have the following:
models.py:
from django.db import models
class ListOfStringsField(???):
???
class myDjangoModelClass():
myName = models.CharField(max_length=64)
myFriends = ListOfStringsField() #
other.py:
myclass = myDjangoModelClass()
myclass.myName = "bob"
myclass.myFriends = ["me", "myself", "and I"]
myclass.save()
id = myclass.id
loadedmyclass = myDjangoModelClass.objects.filter(id__exact=id)
myFriendsList = loadedclass.myFriends
# myFriendsList is a list and should equal ["me", "myself", "and I"]
How would you go about writing this field type, with the following stipulations?
We don't want to do create a field which just crams all the strings together and separates them with a token in one field like this. It is a good solution in some cases, but we want to keep the string data normalized so tools other than django can query the data.
The field should automatically create any secondary tables needed to store the string data.
The secondary table should ideally have only one copy of each unique string. This is optional, but would be nice to have.
Looking in the Django code it looks like I would want to do something similar to what ForeignKey is doing, but the documentation is sparse.
This leads to the following questions:
Can this be done?
Has it been done (and if so where)?
Is there any documentation on Django about how to extend and override their model classes, specifically their relationship classes? I have not seen a lot of documentation on that aspect of their code, but there is this.
This is comes from this question.
There's some very good documentation on creating custom fields here.
However, I think you're overthinking this. It sounds like you actually just want a standard foreign key, but with the additional ability to retrieve all the elements as a single list. So the easiest thing would be to just use a ForeignKey, and define a get_myfield_as_list method on the model:
class Friends(model.Model):
name = models.CharField(max_length=100)
my_items = models.ForeignKey(MyModel)
class MyModel(models.Model):
...
def get_my_friends_as_list(self):
return ', '.join(self.friends_set.values_list('name', flat=True))
Now calling get_my_friends_as_list() on an instance of MyModel will return you a list of strings, as required.
What you have described sounds to me really similar to the tags.
So, why not using django tagging?
It works like a charm, you can install it independently from your application and its API is quite easy to use.
I also think you're going about this the wrong way. Trying to make a Django field create an ancillary database table is almost certainly the wrong approach. It would be very difficult to do, and would likely confuse third party developers if you are trying to make your solution generally useful.
If you're trying to store a denormalized blob of data in a single column, I'd take an approach similar to the one you linked to, serializing the Python data structure and storing it in a TextField. If you want tools other than Django to be able to operate on the data then you can serialize to JSON (or some other format that has wide language support):
from django.db import models
from django.utils import simplejson
class JSONDataField(models.TextField):
__metaclass__ = models.SubfieldBase
def to_python(self, value):
if value is None:
return None
if not isinstance(value, basestring):
return value
return simplejson.loads(value)
def get_db_prep_save(self, value):
if value is None:
return None
return simplejson.dumps(value)
If you just want a django Manager-like descriptor that lets you operate on a list of strings associated with a model then you can manually create a join table and use a descriptor to manage the relationship. It's not exactly what you need, but this code should get you started.
Thanks for all those that answered. Even if I didn't use your answer directly the examples and links got me going in the right direction.
I am not sure if this is production ready, but it appears to be working in all my tests so far.
class ListValueDescriptor(object):
def __init__(self, lvd_parent, lvd_model_name, lvd_value_type, lvd_unique, **kwargs):
"""
This descriptor object acts like a django field, but it will accept
a list of values, instead a single value.
For example:
# define our model
class Person(models.Model):
name = models.CharField(max_length=120)
friends = ListValueDescriptor("Person", "Friend", "CharField", True, max_length=120)
# Later in the code we can do this
p = Person("John")
p.save() # we have to have an id
p.friends = ["Jerry", "Jimmy", "Jamail"]
...
p = Person.objects.get(name="John")
friends = p.friends
# and now friends is a list.
lvd_parent - The name of our parent class
lvd_model_name - The name of our new model
lvd_value_type - The value type of the value in our new model
This has to be the name of one of the valid django
model field types such as 'CharField', 'FloatField',
or a valid custom field name.
lvd_unique - Set this to true if you want the values in the list to
be unique in the table they are stored in. For
example if you are storing a list of strings and
the strings are always "foo", "bar", and "baz", your
data table would only have those three strings listed in
it in the database.
kwargs - These are passed to the value field.
"""
self.related_set_name = lvd_model_name.lower() + "_set"
self.model_name = lvd_model_name
self.parent = lvd_parent
self.unique = lvd_unique
# only set this to true if they have not already set it.
# this helps speed up the searchs when unique is true.
kwargs['db_index'] = kwargs.get('db_index', True)
filter = ["lvd_parent", "lvd_model_name", "lvd_value_type", "lvd_unique"]
evalStr = """class %s (models.Model):\n""" % (self.model_name)
evalStr += """ value = models.%s(""" % (lvd_value_type)
evalStr += self._params_from_kwargs(filter, **kwargs)
evalStr += ")\n"
if self.unique:
evalStr += """ parent = models.ManyToManyField('%s')\n""" % (self.parent)
else:
evalStr += """ parent = models.ForeignKey('%s')\n""" % (self.parent)
evalStr += "\n"
evalStr += """self.innerClass = %s\n""" % (self.model_name)
print evalStr
exec (evalStr) # build the inner class
def __get__(self, instance, owner):
value_set = instance.__getattribute__(self.related_set_name)
l = []
for x in value_set.all():
l.append(x.value)
return l
def __set__(self, instance, values):
value_set = instance.__getattribute__(self.related_set_name)
for x in values:
value_set.add(self._get_or_create_value(x))
def __delete__(self, instance):
pass # I should probably try and do something here.
def _get_or_create_value(self, x):
if self.unique:
# Try and find an existing value
try:
return self.innerClass.objects.get(value=x)
except django.core.exceptions.ObjectDoesNotExist:
pass
v = self.innerClass(value=x)
v.save() # we have to save to create the id.
return v
def _params_from_kwargs(self, filter, **kwargs):
"""Given a dictionary of arguments, build a string which
represents it as a parameter list, and filter out any
keywords in filter."""
params = ""
for key in kwargs:
if key not in filter:
value = kwargs[key]
params += "%s=%s, " % (key, value.__repr__())
return params[:-2] # chop off the last ', '
class Person(models.Model):
name = models.CharField(max_length=120)
friends = ListValueDescriptor("Person", "Friend", "CharField", True, max_length=120)
Ultimately I think this would still be better if it were pushed deeper into the django code and worked more like the ManyToManyField or the ForeignKey.
I think what you want is a custom model field.