Flask-Restplus: how to model string or object? - python

In Flask-Restplus, I need to model an attribute value that maybe either a list of strings or a list of objects.
That is it can look like this:
{
'my_attribute': [
'value1',
'value2'
]
}
or it can look like the following:
{
'my_attribute': [
{
'name': 'value1',
'foo': 'something'
},
{
'name': 'value2',
'foo': 'something else'
}
]
}
How should I model that in Flask-Restplus’ api.model?

I've just figured this out myself. In short, create a custom field class that emits its own JSON schema. In turn the schema uses the oneOf type to specify that this is either a string or an object.
from flask_restplus import fields
element_object = api.model('Element_Object', {
'name': fields.String(),
'foo': fields.String()
})
class StringOrObjectElement(fields.Nested):
__schema_type__ = ['string','object']
def output(self, key, obj):
if isinstance(obj, str):
if key == 'name':
return obj
else:
return 'default_value'
return super().output(key, obj)
def schema(self):
schema_dict = super().schema()
schema_dict.pop('type')
nested_ref = schema_dict.pop('$ref')
schema_dict['oneOf'] = [
{
'type': 'string'
},
{
'$ref': nested_ref
}
]
return schema_dict
root_object = api.model('Root_Object', {
'my_attribute': fields.List(fields.StringOrObjectElement(element_object))

Related

How to convert string to key value in python

I have a Django Application and want to convert a value from a string field which is comma separated to a key vaule pair and add it to a json data block.
class MyClass1(models.Model):
keywords = models.TextField(_('Keywords'), null=True, blank=True)
Example of list:
blue,shirt,s,summer,for women
The JSON data in my code
data = {
"name": self.name,
"type": self.type,
...
"keywords": []
}
I want to split the comma separated string of self.keywords and append it to the keywords field in my json, but as a array like this:
{
"name": keyword,
},
I do the split with the split function, but dont know how to create a key value pair as array and append to keywords.
Expected output:
data = {
"name": "Name of item",
"type": "Type of item",
...
"keywords": [
{
"name": "blue"
},
{
"name": "shirt"
},
...
]
}
You can work with .split():
data = {
'name': self.name,
'type': self.type,
# …
'keywords': [{'name': n} for n in self.keywords.split(',')],
}
It might however be better to work with a custom field. You can define such field with:
from django.db import models
class ListAsCharField(models.Field):
def __init__(self, separator=',', *args, **kwargs):
self.separator = separator
super().__init__(*args, **kwargs)
def get_db_prep_value(self, value, connection, prepared=False):
if isinstance(value, (str, type(None))):
value = self.separator.join(str(x) for x in value)
return super().get_db_prep_value(value, connection, prepared)
def from_db_value(self, value, expression, connection):
if isinstance(value, str):
return value.split(self.separator)
def to_python(self, value):
if isinstance(value, str):
value = value.split(self.separator)
return value
then you can use this in the model to automatically do the wrapping/unwrapping from a list:
class MyClass1(models.Model):
keywords = ListAsCharField(
max_length=256, verbose_name=_('Keywords'), null=True, blank=True
)
Then you can process this with:
data = {
'name': self.name,
'type': self.type,
# …
'keywords': [{'name': n} for n in self.keywords],
}

Validated data fields differs from data fields Django Rest Nested serializers

So I was playing around with serializers in django and wanted to change the names of my fields in my response when I realized my changes had not been taken in count I did some digging and saw that my validated_data differs from my data.
My goal here is to give a python object to a serializer which has different fields than the name I want to return so I used the 'source=' argument to my field.
Note that changing the name of the python object's field is not an option.
Here's the python object:
class Flow(object):
"""Just a regular python object"""
def __init__(self, name=None, flow_uid=None, data_type=None, parent=None, container=None):
"""This has more fields than the serializer is waiting"""
self._parent = None
self._container = None
self.name = name
self.data_type = data_type
self.flow_uid = flow_uid
And the following serializers (I am using a nested representation)
serializers.py
from rest_framework.fields import CharField, IntegerField, ListField, JSONField
from rest_framework.serializers import Serializer
class OutputSerializer(Serializer):
uid = CharField(max_length=36)
name = CharField(max_length=100)
description = CharField(max_length=100)
class FlowSerializer(Serializer):
uid = CharField(source='flow_uid', max_length=36) # I want 'uid' in my response not 'flow_uid'
name = CharField(max_length=100)
data_type = CharField(max_length=100)
class Meta:
fields = '___all___'
def to_representation(self, instance):
instance = super(FlowSerializer, self).to_representation(instance)
#Here instance = OrderedDict([('uid', 'uid_value'), ('name', 'name_value'), ('data_type', 'data_value')])
return instance
class FlowOutputSerializer(OutputSerializer):
columns = FlowSerializer(many=True)
viewsets.py
class AddTransformationViewSet(ViewSet):
"""Handle available "actions" for BrickModel operations"""
def list(self, request, parent_lookup_analyses: str):
"""The method I call for this test"""
flow1 = Flow(name="name1", flow_uid='flow_uid_value1', data_type='str')
flow2 = Flow(name="name2", flow_uid='flow_uid_value2', data_type='str')
flow1_ser = FlowSerializer(flow1)
flow2_ser = FlowSerializer(flow2)
dummy_col = {
"name": "output_name",
"description": "output_description",
"uid": "output_uid",
"columns":
[
flow2_ser.data, # Debug: {'uid': 'flow_uid_value2', 'name': 'name2', 'data_type': 'str'}
flow1_ser.data # Debug: {'uid': 'flow_uid_value1', 'name': 'name1', 'data_type': 'str'}
]
}
#Debug dummy_col: {'name': 'output_name', 'description': 'output_description', 'uid': 'output_uid', 'columns': [{'uid': 'flow_uid_value2', 'name': 'name2', 'data_type': 'str'}, {'uid': 'flow_uid_value1', 'name': 'name1', 'data_type': 'str'}]}
dummy_serializer: FlowOutputSerializer = FlowOutputSerializer(data=dummy_col)
dummy_serializer.is_valid(raise_exception=True)
# Debug dummy_serializer.data: {'uid': 'output_uid', 'name': 'output_name', 'description': 'output_description', 'columns': [OrderedDict([('uid', 'flow_uid_value2'), ('name', 'name2'), ('data_type', 'str')]), OrderedDict([('uid', 'flow_uid_value1'), ('name', 'name1'), ('data_type', 'str')])]}
# Debug dummy_serializer.validated_data: OrderedDict([('uid', 'output_uid'), ('name', 'output_name'), ('description', 'output_description'), ('columns', [OrderedDict([('flow_uid', 'flow_uid_value2'), ('name', 'name2'), ('data_type', 'str')]), OrderedDict([('flow_uid', 'flow_uid_value1'), ('name', 'name1'), ('data_type', 'str')])])])
return Response(data=dummy_serializer.validated_data, status=status.HTTP_201_CREATED)
Expected_response:
{
...
"columns": [
{
"uid": "flow_uid_value2",
"name": "name2",
"data_type": "str"
},
{
"uid": "flow_uid_value1",
"name": "name1",
"data_type": "str"
}
]
}
What I get (I want 'flow_uid' to be 'uid'):
{
...
"columns": [
{
"flow_uid": "flow_uid_value2",
"name": "name2",
"data_type": "str"
},
{
"flow_uid": "flow_uid_value1",
"name": "name1",
"data_type": "str"
}
]
}
Is there any particular danger in using .data in this case rather than .validated_data?
What is the cause of this behavior?
Is there any particular danger in using .data in this case rather than .validated_data? What is the cause of this behavior?
serializer.validated_data is meant to be used with the Python object. Therefore it will expose flow_uid because of the custom source value.
serializer.data will be the serialised result of the save() after save has been called.
Therefore you should always be using serializer.data in your responses and keep serializer.validated_data in any code that interacts with models or internal project code:
Response(data=dummy_serializer.data, status=status.HTTP_201_CREATED)

Save dict of key and list of dict inside key to JSON where dictionary is stored by line

I have a similar question to this previous question. However, my dictionary has a structure like the following
data_dict = {
'refresh_count': 1,
'fetch_date': '10-10-2019',
'modified_date': '',
'data': [
{'date': '10-10-2019', 'title': 'Hello1'},
{'date': '11-10-2019', 'title': 'Hello2'}
]
}
I would like to store it in JSON so that my data is still stored in one dictionary per line. Something like:
{
'refresh_count': 1,
'fetch_date': '10-10-2019',
'modified_date': '',
'data': [
{'date': '10-10-2019', 'title': 'Hello1'},
{'date': '11-10-2019', 'title': 'Hello2'}
]
}
I cannot achieve it using simply using json.dumps (or dump) or the previous solution.
json.dumps(data_dict, indent=2)
>> {
"refresh_count": 1,
"fetch_date": "10-10-2019",
"modified_date": "",
"data": [
{
"date": "10-10-2019",
"title": "Hello1"
},
{
"date": "11-10-2019",
"title": "Hello2"
}
]
}
This is quite a hack, but you can implement a custom JSON encoder that will do what you want (see Custom JSON Encoder in Python With Precomputed Literal JSON). For any object that you do not want to be indented, wrap it with the NoIndent class. The custom JSON encoder will look for this type in the default() method and return a unique string (__N__) and store unindented JSON in self._literal. Later, in the call to encode(), these unique strings are replaced with the unindented JSON.
Note that you need to choose a string format that cannot possibly appear in the encoded data to avoid replacing something unintentionally.
import json
class NoIndent:
def __init__(self, o):
self.o = o
class MyEncoder(json.JSONEncoder):
def __init__(self, *args, **kwargs):
super(MyEncoder, self).__init__(*args, **kwargs)
self._literal = []
def default(self, o):
if isinstance(o, NoIndent):
i = len(self._literal)
self._literal.append(json.dumps(o.o))
return '__%d__' % i
else:
return super(MyEncoder, self).default(o)
def encode(self, o):
s = super(MyEncoder, self).encode(o)
for i, literal in enumerate(self._literal):
s = s.replace('"__%d__"' % i, literal)
return s
data_dict = {
'refresh_count': 1,
'fetch_date': '10-10-2019',
'modified_date': '',
'data': [
NoIndent({'date': '10-10-2019', 'title': 'Hello1'}),
NoIndent({'date': '11-10-2019', 'title': 'Hello2'}),
]
}
s = json.dumps(data_dict, indent=2, cls=MyEncoder)
print(s)
Intermediate representation returned by super(MyEncoder, self).encode(o):
{
"fetch_date": "10-10-2019",
"refresh_count": 1,
"data": [
"__0__",
"__1__"
],
"modified_date": ""
}
Final output:
{
"fetch_date": "10-10-2019",
"refresh_count": 1,
"data": [
{"date": "10-10-2019", "title": "Hello1"},
{"date": "11-10-2019", "title": "Hello2"}
],
"modified_date": ""
}

How to validate structure (or schema) of dictionary in Python?

I have a dictionary with config info:
my_conf = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
}
I want to check if the dictionary follows the structure I need.
I'm looking for something like this:
conf_structure = {
'version': int,
'info': {
'conf_one': float,
'conf_two': str,
'conf_three': bool
}
}
is_ok = check_structure(conf_structure, my_conf)
Is there any solution done to this problem or any library that could make implementing check_structure more easy?
You may use schema (PyPi Link)
schema is a library for validating Python data structures, such as those obtained from config-files, forms, external services or command-line parsing, converted from JSON/YAML (or something else) to Python data-types.
from schema import Schema, And, Use, Optional, SchemaError
def check(conf_schema, conf):
try:
conf_schema.validate(conf)
return True
except SchemaError:
return False
conf_schema = Schema({
'version': And(Use(int)),
'info': {
'conf_one': And(Use(float)),
'conf_two': And(Use(str)),
'conf_three': And(Use(bool)),
Optional('optional_conf'): And(Use(str))
}
})
conf = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
}
print(check(conf_schema, conf))
Without using libraries, you could also define a simple recursive function like this:
def check_structure(struct, conf):
if isinstance(struct, dict) and isinstance(conf, dict):
# struct is a dict of types or other dicts
return all(k in conf and check_structure(struct[k], conf[k]) for k in struct)
if isinstance(struct, list) and isinstance(conf, list):
# struct is list in the form [type or dict]
return all(check_structure(struct[0], c) for c in conf)
elif isinstance(conf, type):
# struct is the type of conf
return isinstance(struct, conf)
else:
# struct is neither a dict, nor list, not type
return False
This assumes that the config can have keys that are not in your structure, as in your example.
Update: New version also supports lists, e.g. like 'foo': [{'bar': int}]
Advice for the future: use Pydantic!
Pydantic enforces type hints at runtime, and provides user friendly errors when data is invalid. Define how data should be in pure, canonical python; validate it with pydantic, as simple as that:
from pydantic import BaseModel
class Info(BaseModel):
conf_one: float
conf_two: str
conf_three: bool
class Config:
extra = 'forbid'
class ConfStructure(BaseModel):
version: int
info: Info
If validation fails pydantic will raise an error with a breakdown of what was wrong:
my_conf_wrong = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
}
my_conf_right = {
'version': 10,
'info': {
'conf_one': 14.5,
'conf_two': 'something',
'conf_three': False
}
}
model = ConfStructure(**my_conf_right)
print(model.dict())
# {'version': 10, 'info': {'conf_one': 14.5, 'conf_two': 'something', 'conf_three': False}}
res = ConfStructure(**my_conf_wrong)
# pydantic.error_wrappers.ValidationError: 1 validation error for ConfStructure
# info -> optional_conf
# extra fields not permitted (type=value_error.extra)
You can build structure using recursion:
def get_type(value):
if isinstance(value, dict):
return {key: get_type(value[key]) for key in value}
else:
return str(type(value))
And then compare required structure with your dictionary:
get_type(current_conf) == get_type(required_conf)
Example:
required_conf = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
}
get_type(required_conf)
{'info': {'conf_two': "<type 'str'>", 'conf_one': "<type 'float'>", 'optional_conf': "<type 'str'>", 'conf_three': "<type 'bool'>"}, 'version': "<type 'int'>"}
Looks like the dict-schema-validator package does exactly what you need:
Here is a simple schema representing a Customer:
{
"_id": "ObjectId",
"created": "date",
"is_active": "bool",
"fullname": "string",
"age": ["int", "null"],
"contact": {
"phone": "string",
"email": "string"
},
"cards": [{
"type": "string",
"expires": "date"
}]
}
Validation:
from datetime import datetime
import json
from dict_schema_validator import validator
with open('models/customer.json', 'r') as j:
schema = json.loads(j.read())
customer = {
"_id": 123,
"created": datetime.now(),
"is_active": True,
"fullname": "Jorge York",
"age": 32,
"contact": {
"phone": "559-940-1435",
"email": "york#example.com",
"skype": "j.york123"
},
"cards": [
{"type": "visa", "expires": "12/2029"},
{"type": "visa"},
]
}
errors = validator.validate(schema, customer)
for err in errors:
print(err['msg'])
Output:
[*] "_id" has wrong type. Expected: "ObjectId", found: "int"
[+] Extra field: "contact.skype" having type: "str"
[*] "cards[0].expires" has wrong type. Expected: "date", found: "str"
[-] Missing field: "cards[1].expires"
You can also use dataclasses_json library. Here is how I would normally do it
from dataclasses import dataclass
from dataclasses_json import dataclass_json, Undefined
from dataclasses_json.undefined import UndefinedParameterError
from typing import Optional
#### define schema #######
#dataclass_json(undefined=Undefined.RAISE)
#dataclass
class Info:
conf_one: float
# conf_two: str
conf_three: bool
optional_conf: Optional[str]
#dataclass_json
#dataclass
class ConfStructure:
version: int
info: Info
####### test for compliance####
try:
ConfStructure.from_dict(my_conf).to_dict()
except KeyError as e:
print('theres a missing parameter')
except UndefinedParameterError as e:
print('extra parameters')
You can use dictify from https://pypi.org/project/dictify/.
Read docs here https://dictify.readthedocs.io/en/latest/index.html
This is how it can be done.
from dictify import Field, Model
class Info(Model):
conf_one = Field(required=True).instance(float)
conf_two = Field(required=True).instance(str)
conf_three = Field(required=True).instance(bool)
optional_conf = Field().instance(str)
class MyConf(Model):
version = Field(required=True).instance(int)
info = Field().model(Info)
my_conf = MyConf() # Invalid without required fields
# Valid
my_conf = MyConf({
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
})
my_conf['info']['conf_one'] = 'hi' # Invalid, won't be assinged
There is a standard for validating JSON files called JSON Schema.
Validators have been implemented in many languages, including the Python. Read also the documentation for more details. In the following example I will use a Python package jsonschema (docs) that I am familiar with.
Given the config data
my_conf = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar',
},
}
and the corresponding config schema
conf_structure = {
'type': 'object',
'properties': {
'version': {'type': 'integer'},
'info': {
'type': 'object',
'properties': {
'conf_one': {'type': 'number'},
'conf_two': {'type': 'string'},
'conf_three': {'type': 'boolean'},
'optional_conf': {'type': 'string'},
},
'required': ['conf_one', 'conf_two', 'conf_three'],
},
},
}
the actual code to validate this data is then as simple as this:
import jsonschema
jsonschema.validate(my_conf, schema=conf_structure)
A big advantage of this approach is that you can store both data and schema as JSON-formatted files.
#tobias_k beat me to it (both in time and quality probably) but here is another recursive function for the task that might be a bit easier for you (and me) to follow:
def check_dict(my_dict, check_against):
for k, v in check_against.items():
if isinstance(v, dict):
return check_dict(my_dict[k], v)
else:
if not isinstance(my_dict[k], v):
return False
return True
The nature of dictionaries, if they are being used in python and not exported as some JSON, is that the order of the dictionary need not be set. Instead, looking up keys returns values (hence a dictionary).
In either case, these functions should provide you with what your looking for for the level of nesting present in the samples you provided.
#assuming identical order of keys is required
def check_structure(conf_structure,my_conf):
if my_conf.keys() != conf_structure.keys():
return False
for key in my_conf.keys():
if type(my_conf[key]) == dict:
if my_conf[key].keys() != conf_structure[key].keys():
return False
return True
#assuming identical order of keys is not required
def check_structure(conf_structure,my_conf):
if sorted(my_conf.keys()) != sorted(conf_structure.keys()):
return False
for key in my_conf.keys():
if type(my_conf[key]) != dict:
return False
else:
if sorted(my_conf[key].keys()) != sorted(conf_structure[key].keys()):
return False
return True
This solution would obviously need to be changed if the level of nesting was greater (i.e. it is configured to assess the similarity in structure of dictionaries that have some values as dictionaries, but not dictionaries where some values these latter dictionaries are also dictionaries).

Return parent value from dict of dicts if child key doesn't exist

In my dictionary below, I want to be able to pull the value from my subkey* attributes. However, if a subkey* does not exist, I want to automatically return the parent's value.
d = {
'key1' : {
'value': "parent-key1",
'subkey1': {
'value': "child1"
},
'subkey2': {
'value': "child2"
}
},
'key2': {
'value': "parent-key2",
'subkey1': {
'value': "child3"
},
'subkey2': {
'value': "child4"
}
}
}
My function stub looks like this:
def get_values(my_dict_value):
try:
return my_dict_value
except KeyError:
# How do I find the parent value of my_dict_value?
My anticipated results are:
>>> get_values(d['key1']['subkey1']['value'])
child1
>>> get_values(d['key1']['subkey3']['value'])
parent-key1
How do I find the parent value in my dictionary?
Here is another way of solving it for nested dictionary with get method:
>>>
>>> d = {
... 'key1' : {
... 'value': "parent-key1",
... 'subkey1': {
... 'value': "child1"
... },
... 'subkey2': {
... 'value': "child2"
... }
... },
... 'key2': {
... 'value': "parent-key2",
... 'subkey1': {
... 'value': "child3"
... },
... 'subkey2': {
... 'value': "child4"
... }
... }}
>>>
>>> def get_value(key, subkey):
... dkey = d.get(key)
... return dkey.get(subkey, {}).get('value', dkey.get('value'))
...
>>> print get_value("key1", "subkey1")
child1
>>> print get_value("key2", "subkey2")
child4
>>> print get_value("key2", "subkey3")
parent-key2
>>>
As SethMMorton mentions, your current method will throw a key error before it gets to the function.
You can get around this by passing two values to your function - the parent and the child. I dropped the third from needing to be passed because your example shows that both use the key of value. The function can handle that automatically.
def get_values(key, subkey):
try:
return d[key][subkey]['value']
except KeyError:
try:
return d[key]['value']
except KeyError:
return "Not found"
This utilizes nested exceptions, which will get very messy if you go much deeper than than. However, the outer try block attempts to return your parent/child/value. If this doesn't exist, it attempts to return the parent/value. If this doesn't exist, it returns Not found
You can call it like this:
print get_values("key1", "subkey1")
print get_values("key1", "subkey3")
print get_values("key4", "subkey1")
This outputs:
child1
parent-key1
Not found

Categories

Resources