How to nest marshmallow schema using data from same source? - python

I want to group contact_type and contact_value into contact nested object in output for following model:
class Account(Base):
__tablename__ = 'accounts'
id = sa.Column(sa.Integer, primary_key=True)
contact_type = sa.Column(sa.String)
contact_value = sa.Column(sa.String)
This result is expected:
{
"id": 1,
"contact": {
"type": "phone",
"value": "1234567"
}
}
What is the best way to implement this?

ma = Marshmallow()
class AccountContactSchema(ma.Schema):
type = ma.Str()
value = ma.Str()
class AccountSchema(ma.Schema):
id = ma.Int()
contact = ma.Nested(AccountContactSchema)
account_schema = AccountSchema()

I don't know if this is the best way, but you can do this:
from marshmallow import fields
class AccountSchema(ma.Schema):
id = ma.Int()
contact = fields.Function(lambda x : {'type': x.contact_type, 'value': x.contact_value})
and use:
>>> acc = Account(id=1, contact_type="phone", contact_value="1234567")
>>> acc_dict, errors = AccountSchema().dump(acc)
>>> print acc_dict
{u'contact': {'type': 'phone', 'value': '1234567'}, u'id': 1}
Or you can do in other styles of dict:
class AccountSchema(ma.Schema):
id = ma.Int()
contact = fields.Function(lambda x : {x.contact_type : x.contact_value})
This will result:
{u'contact': {'phone': '1234567'}, u'id': 1}
Take a look at Custom Fields

Related

Failing to parse complex JSON using Python and Marshmallow

I am using Marshmallow to create a mapper for a JSON file. Following are the details:
My JSON File:
{
"version": "3.0",
"name": "A1",
"request": {
"startdate": "26022022",
"enddate": "26022022",
"records": 1000
},
"ranking": {
"90": {
"name": "N1",
"class1": "C1"
},
"98": {
"name": "N2",
"class1": "C2"
},
"86": {
"name": "N3",
"class1": "C3"
}
}
}
My mapper class:
class RequestMapper(Schema):
startdate = fields.String()
enddate = fields.String()
records = fields.Int()
class Ranking(Schema):
name = fields.String()
class1 = fields.String()
class RankingMapper(Schema):
rank = fields.Nested(Ranking(), dataKey = fields.Int)
class SampleSchema(Schema):
name = fields.Str()
request = fields.Nested(RequestMapper())
ranking = fields.Nested(RankingMapper())
Code to call Mapper:
print("\n\nOutput using mapper")
pprint(mapper.SampleSchema().dump(data), indent=3)
print("\n\n")
Following is the output:
Output using mapper
{ 'name': 'A1',
'ranking': {},
'request': {'enddate': '26022022', 'records': 1000, 'startdate': '26022022'}}
I am not getting any data for ranking as datakey [90, 98, 86...] are dynamic and am not sure how to create mapper for dynamic keys please.
Any inputs will be helpful.
Thank you
When nesting schemas, pass the class NAME, not a class instance:
class RankingMapper(Schema):
rank = fields.Nested(Ranking, dataKey = fields.Int)
class SampleSchema(Schema):
name = fields.Str()
request = fields.Nested(RequestMapper)
ranking = fields.Nested(RankingMapper)

SQLAlchemy - eager load for many to many secondary relation not working as expected

In our system, we have entities Item and Store, and they are related to the Stock entity.
An item can be stocked in multiple stores, and also multiple items can be stocked in one store, so a simple many to many relation.
However, when describing this relation with a secondary reference:
stores = relationship(
'Store',
secondary='stock',
backref='items'
)
SQLAlchemy loads all socks for that related Store, not just those that are related to the referenced item.
eg. when we specify a relation that generates following sql:
SELECT item.id AS item_id, store.id AS store_id, stock.id AS stock_id, stock.store_id AS stock_store_id, stock.item_id AS stock_item_id
FROM item
LEFT OUTER JOIN (stock AS stock_1 JOIN store ON store.id = stock_1.store_id) ON item.id = stock_1.item_id
LEFT OUTER JOIN stock ON store.id = stock.store_id AND stock.item_id = item.id
WHERE stock.item_id = item.id
That returns following data:
item_id, store_id, stock_id, stock_store_id, stock_item_id,
1, 1, 1, 1, 1
2, 1, 2, 1, 2
1, 2, 3, 2, 1
2, 2, 4, 2, 2
The actual loaded data is following:
items = [{
id: 1,
stores: [
{
id: 1,
stocks: [
{ id: 1, item_id: 1 },
{ id: 2, item_id: 2 } <- should not be loaded items[0].id != 2
]
},
{
id: 2,
stocks: [
{ id: 3, item_id: 1 },
{ id: 4, item_id: 2 } <- should not be loaded items[0].id != 2
]
}
]
},
{
id: 2,
stores: [
{
id: 1,
stocks: [
{ id: 2, item_id: 2 },
{ id: 1, item_id: 1 } <- should not be loaded items[1].id != 1
]
},
{
id: 2,
stocks: [
{ id: 4, item_id: 2 },
{ id: 3, item_id: 1 } <- should not be loaded items[1].id != 1
]
}
]
}]
For reference, take a look at the declaration of the entities and their relationships, as well as the query object:
Base = declarative_base()
class Item(Base):
__tablename__ = 'item'
id = Column(Integer, primary_key=True)
stores = relationship(
'Store',
secondary='stock',
backref='items'
)
class Store(Base):
__tablename__ = 'store'
id = Column(Integer, primary_key=True)
class Stock(Base):
__tablename__ = 'stock'
id = Column(Integer, primary_key=True)
store_id = Column(Integer, ForeignKey(Store.id), nullable=False)
item_id = Column(Integer, ForeignKey(Item.id), nullable=False)
item = relationship(Item, backref='stocks')
store = relationship(Store, backref='stocks')
items = session.query(
Item
).outerjoin(
Item.stores,
(Stock, and_(Store.id == Stock.store_id, Stock.item_id == Item.id))
).filter(
Stock.item_id == Item.id,
).options(
contains_eager(
Item.stores
).contains_eager(
Store.stocks
)
).all()
That's because the stores with the same id are the same Store instance.
It's probably better to do explicit filtering when serializing/displaying the results.
That said, it's possible to override Item's __getattribute__ to intercept Item.stores to return _ItemStore wrappers, that only return stocks with the same item_id as the parent Item.id.
class Item(Base):
# ...
class _ItemStore:
def __init__(self, store, item_id):
self.id = store.id
self._item_id = item_id
self._store = store
#property
def stocks(self):
return [stock for stock in self._store.stocks if stock.item_id == self._item_id]
def __getattribute__(self, item):
value = super().__getattribute__(item)
if item == 'stores':
value = [self._ItemStore(store, self.id) for store in value]
return value
Adding a simple cache so that item.stores == item.stores:
def __getattribute__(self, item):
value = super().__getattribute__(item)
if item == 'stores':
cache = getattr(self, '_stores', None)
if cache is None:
cache = self._stores = {}
item_id = self.id
item_store_cls = self._ItemStore
value = [cache.setdefault(id(store), item_store_cls(store, item_id)) for store in value]
return value

Validated data fields differs from data fields Django Rest Nested serializers

So I was playing around with serializers in django and wanted to change the names of my fields in my response when I realized my changes had not been taken in count I did some digging and saw that my validated_data differs from my data.
My goal here is to give a python object to a serializer which has different fields than the name I want to return so I used the 'source=' argument to my field.
Note that changing the name of the python object's field is not an option.
Here's the python object:
class Flow(object):
"""Just a regular python object"""
def __init__(self, name=None, flow_uid=None, data_type=None, parent=None, container=None):
"""This has more fields than the serializer is waiting"""
self._parent = None
self._container = None
self.name = name
self.data_type = data_type
self.flow_uid = flow_uid
And the following serializers (I am using a nested representation)
serializers.py
from rest_framework.fields import CharField, IntegerField, ListField, JSONField
from rest_framework.serializers import Serializer
class OutputSerializer(Serializer):
uid = CharField(max_length=36)
name = CharField(max_length=100)
description = CharField(max_length=100)
class FlowSerializer(Serializer):
uid = CharField(source='flow_uid', max_length=36) # I want 'uid' in my response not 'flow_uid'
name = CharField(max_length=100)
data_type = CharField(max_length=100)
class Meta:
fields = '___all___'
def to_representation(self, instance):
instance = super(FlowSerializer, self).to_representation(instance)
#Here instance = OrderedDict([('uid', 'uid_value'), ('name', 'name_value'), ('data_type', 'data_value')])
return instance
class FlowOutputSerializer(OutputSerializer):
columns = FlowSerializer(many=True)
viewsets.py
class AddTransformationViewSet(ViewSet):
"""Handle available "actions" for BrickModel operations"""
def list(self, request, parent_lookup_analyses: str):
"""The method I call for this test"""
flow1 = Flow(name="name1", flow_uid='flow_uid_value1', data_type='str')
flow2 = Flow(name="name2", flow_uid='flow_uid_value2', data_type='str')
flow1_ser = FlowSerializer(flow1)
flow2_ser = FlowSerializer(flow2)
dummy_col = {
"name": "output_name",
"description": "output_description",
"uid": "output_uid",
"columns":
[
flow2_ser.data, # Debug: {'uid': 'flow_uid_value2', 'name': 'name2', 'data_type': 'str'}
flow1_ser.data # Debug: {'uid': 'flow_uid_value1', 'name': 'name1', 'data_type': 'str'}
]
}
#Debug dummy_col: {'name': 'output_name', 'description': 'output_description', 'uid': 'output_uid', 'columns': [{'uid': 'flow_uid_value2', 'name': 'name2', 'data_type': 'str'}, {'uid': 'flow_uid_value1', 'name': 'name1', 'data_type': 'str'}]}
dummy_serializer: FlowOutputSerializer = FlowOutputSerializer(data=dummy_col)
dummy_serializer.is_valid(raise_exception=True)
# Debug dummy_serializer.data: {'uid': 'output_uid', 'name': 'output_name', 'description': 'output_description', 'columns': [OrderedDict([('uid', 'flow_uid_value2'), ('name', 'name2'), ('data_type', 'str')]), OrderedDict([('uid', 'flow_uid_value1'), ('name', 'name1'), ('data_type', 'str')])]}
# Debug dummy_serializer.validated_data: OrderedDict([('uid', 'output_uid'), ('name', 'output_name'), ('description', 'output_description'), ('columns', [OrderedDict([('flow_uid', 'flow_uid_value2'), ('name', 'name2'), ('data_type', 'str')]), OrderedDict([('flow_uid', 'flow_uid_value1'), ('name', 'name1'), ('data_type', 'str')])])])
return Response(data=dummy_serializer.validated_data, status=status.HTTP_201_CREATED)
Expected_response:
{
...
"columns": [
{
"uid": "flow_uid_value2",
"name": "name2",
"data_type": "str"
},
{
"uid": "flow_uid_value1",
"name": "name1",
"data_type": "str"
}
]
}
What I get (I want 'flow_uid' to be 'uid'):
{
...
"columns": [
{
"flow_uid": "flow_uid_value2",
"name": "name2",
"data_type": "str"
},
{
"flow_uid": "flow_uid_value1",
"name": "name1",
"data_type": "str"
}
]
}
Is there any particular danger in using .data in this case rather than .validated_data?
What is the cause of this behavior?
Is there any particular danger in using .data in this case rather than .validated_data? What is the cause of this behavior?
serializer.validated_data is meant to be used with the Python object. Therefore it will expose flow_uid because of the custom source value.
serializer.data will be the serialised result of the save() after save has been called.
Therefore you should always be using serializer.data in your responses and keep serializer.validated_data in any code that interacts with models or internal project code:
Response(data=dummy_serializer.data, status=status.HTTP_201_CREATED)

How to join nested schema into one JSON object with Flask Marshmallow

I have a problem joining two schema into one nested JSON object. This API returned JSON as a result from 3 related tables join query. After i did query, i use two Marshmellow schema as referenced from here. But the response is not as expected, here is the response:
{
"message": "success",
"device": [
{
"device_name": "Kamar Tidur Utama"
}
],
"sensor": [
{
"value": 23.3683,
"sensor_name": "Temperature"
},
{
"value": 0.0,
"sensor_name": "Motion"
},
{
"value": 90.12,
"sensor_name": "Humidity"
},
{
"value": 15.8667,
"sensor_name": "Current 1"
},
{
"value": 15.0333,
"sensor_name": "Current 2"
}
]
}
What i wanted is the "sensor" object is put inside the "device". So many devices has many sensors like this:
{
"message": "success",
"device": [
{
"device_name": "Kamar Tidur Utama"
"sensor": [
{
"value": 23.3683,
"sensor_name": "Temperature"
},
{
"value": 0.0,
"sensor_name": "Motion"
},
{
"value": 90.12,
"sensor_name": "Humidity"
},
{
"value": 15.8667,
"sensor_name": "Current 1"
},
{
"value": 15.0333,
"sensor_name": "Current 2"
}
]
}
]
}
These are my three related models:
Device.py (a method that do query for API)
#classmethod
def get_device_sensor(cls):
device_sensor_schema = DeviceSensorSchema(many=True)
sensor_value_schema = SensorValueSchema(many=True)
device = Device.query.join(SensorData, Device.id == SensorData.device_id)\
.add_columns(Device.name.label('device_name'))
print(device)
sensor = Sensor.query.join(SensorData, Sensor.id == SensorData.sensor_id)\
.add_columns(Sensor.name.label('sensor_name'), SensorData.value.label('value'))
res_device = device_sensor_schema.dump(device, many=True)
res_sensor = sensor_value_schema.dump(sensor, many=True)
if device:
return jsonify({'message': 'success', 'device': res_device, 'sensor': res_sensor})
else:
return {'message': 'Table device is empty'}, 404
Device.py (two marshmellow schema)
class SensorValueSchema(ma.Schema):
sensor_name = fields.String()
value = fields.Float()
ordered=True
class DeviceSensorSchema(Schema):
device_name = fields.String()
sensors = fields.List(fields.Nested(SensorValueSchema))
Device.py (model class)
class Device(db.Model):
__tablename__ = "devices"
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
name = db.Column(db.String(255))
serial_number = db.Column(db.String(255), unique=True)
used_relay = db.Column(db.Integer, default=0)
created_at = db.Column(db.DateTime(timezone=True), server_default=db.func.now())
updated_at = db.Column(db.DateTime(timezone=True), onupdate=db.func.now())
switches = db.relationship('Switch', backref='device', lazy='dynamic')
power_usages = db.relationship('PowerUsage', backref='device', lazy='dynamic')
power_expenses = db.relationship('PowerExpense', backref='device', lazy='dynamic')
behaviors_dataset = db.relationship('BehaviorDataset', backref='device', lazy='dynamic')
usage_histories = db.relationship('UsageHistory', backref='device', lazy='dynamic')
sensors_data = db.relationship('SensorData', backref='device', lazy='dynamic')
notifications = db.relationship('Notification', backref='device', lazy='dynamic')
device_serial_number = db.relationship('DeviceSerialNumber', backref='device', lazy='dynamic')
Sensor.py (model class)
class Sensor(db.Model):
__tablename__ = "sensors"
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(120))
created_at = db.Column(db.DateTime(timezone=True), server_default=db.func.now())
updated_at = db.Column(db.DateTime(timezone=True), onupdate=db.func.now())
sensors_data = db.relationship('SensorData', backref='sensor', lazy='dynamic')
SensorData.py (model class)
class SensorData(db.Model):
__tablename__ = "sensors_data"
id = db.Column(db.Integer, primary_key=True)
device_id = db.Column(db.Integer, db.ForeignKey('devices.id'))
sensor_id = db.Column(db.Integer, db.ForeignKey('sensors.id'))
value = db.Column(db.Float)
created_at = db.Column(db.DateTime(timezone=True), server_default=db.func.now())
updated_at = db.Column(db.DateTime(timezone=True), onupdate=db.func.now())
You should assign sensor object to each entry in device list. My foolish way of achieving desirable result:
#classmethod
def get_device_sensor(cls):
device_sensor_schema = DeviceSensorSchema(many=True)
sensor_value_schema = SensorValueSchema(many=True)
device = Device.query.join(SensorData, Device.id == SensorData.device_id)\
.add_columns(Device.name.label('device_name'))
print(device)
sensor = Sensor.query.join(SensorData, Sensor.id == SensorData.sensor_id)\
.add_columns(Sensor.name.label('sensor_name'), SensorData.value.label('value'))
res_device[0].sensors = sensor
res_device = device_sensor_schema.dump(device, many=True)
if device:
return jsonify({'message': 'success', 'device': res_device})
else:
return {'message': 'Table device is empty'}, 404
In a better way to approach this you would make use of mapped SQLAlchemy object. In this case Device.sensors_data.

Grouping object properties into new dictionary within Marshmallow schema

I am trying to serialize an object with Marshmallow in such a way that "related" properties are grouped together into a single dictionary that does not exist on the original object. My code:
from marshmallow import Schema, fields, pprint
import json
class StatsSchema(Schema):
population = fields.Int()
rating = fields.Int()
class AnimalSchema(Schema):
name = fields.Str()
features = fields.List(fields.Str())
stats = fields.Nested(StatsSchema)
dog = {
'name':'dog',
'features': ['tongue', 'wet nose'],
'population': 200,
'rating': 10
}
animal_schema = AnimalSchema()
data, errors = animal_schema.dump(dog)
print(json.dumps(data, indent=2))
Actual result:
{
"features": [
"tongue",
"wet nose"
],
"name": "dog"
}
Desired result:
{
"features": [
"tongue",
"wet nose"
],
"name": "dog",
"stats": {"population": 500, "rating": 10}
}
I understand that the "stats" key is missing from the output because it is not on the original object, but I am not sure how to specify that Marshmallow should create the new "stats" key as a new dictionary using the object.
I found one possible way to create the inner dictionary. Not sure if it is the only/best method:
class AnimalSchema(Schema):
name = fields.Str()
features = fields.List(fields.Str())
stats = fields.Method('get_stats')
def get_stats(self, post):
data, err = StatsSchema().dump(post)
return data
This is discussed in https://github.com/marshmallow-code/marshmallow/issues/940.
You could do that
class AnimalSchema(Schema):
name = fields.Str()
features = fields.List(fields.Str())
stats = fields.Nested(StatsSchema, dump_only=True)
class Animal:
[...]
#property
def stats(self):
return {'population': self.population, 'rating': self.rating}

Categories

Resources