Convert JSON to an object with only required fields - python

Is there a way to convert json in to an object with only required fields, such that extra fields are ignored, and if the required fields do not exits throw an error?
If an object's field matches exactly with json fields, we could use something like this:
import json
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
test_json = '{"name": "user", "age":"50"}'
test_dict = json.loads(test_json)
test_obj = Person(**test_dict)
However, I would want the code to silently ignore extra fields in json for example:
test_json = '{"name": "user", "age":"50", "hobby":"swimming"}'
And if required fields are missing, throw an error
test_json = '{"name": "user", "hobby":"swimming"}'
I know you can add checks in when initializing the obj from the dictionary. But there are many fields and the json can come from different places thus formatting could change, so I wonder if there is a library could help achieve the above?

In order to get the extra fields in the object, you can use keyworded arguments (kwargs). For instance, this code can take any number of arguments (larger than 2 since the name and age must be there).
class Person:
def __init__(self, name, age, **kwargs):
self.name = name
self.age = age
print(kwargs)
You can tinker around with this and see if you can get it to fully work as desired.

this code allows you to get only dictionary keys that match the names of your class attributes using the inspect module:
import inspect
import json
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
getting the required field in your class initialization, this would recognize that you will need a name and age variables in your class __init__ method:
argspec = inspect.getfullargspec(Person.__init__)
required = argspec.args
if argspec.defaults:
required = required[:-len(argspec.defaults)]
required.remove('self')
keeping only the names that match object attributes:
test_json = '{"name": "user", "age":"50", "foo": "bar", "bar": "baz"}'
test_dict = json.loads(test_json)
test_dict = {k:v for k, v in test_dict.items() if k in required}
initializing the object:
test_obj = Person(**test_dict)

You can you Pydantic and define your class like in the example bellow:
import json
from pydantic import BaseModel
class Person(BaseModel):
name: str
age: int
# Ignore the extra field
test_json_extra_field = '{"name": "user", "age":"50", "hobby":"swimming"}'
person_01 = Person(**json.loads(test_json_extra_field))
print(person_01)
# throw error because age is not in the json
test_json_no_required_field = '{"name": "user", "hobby":"swimming"}'
person_02 = Person(**json.loads(test_json_no_required_field))
print(person_02)
Pydantic BaseModel will ignore the extra field in test_json_extra_field and throw an error in test_json_no_required_field because age is not in the json info.

Related

Pydantic validations for extra fields that not defined in schema

I am using pydantic for schema validations and I would like to throw an error when any extra field is added to a schema that isn't defined.
from typing import Literal, Union
from pydantic import BaseModel, Field, ValidationError
class Cat(BaseModel):
pet_type: Literal['cat']
meows: int
class Dog(BaseModel):
pet_type: Literal['dog']
barks: float
class Lizard(BaseModel):
pet_type: Literal['reptile', 'lizard']
scales: bool
class Model(BaseModel):
pet: Union[Cat, Dog, Lizard] = Field(..., discriminator='pet_type')
n: int
print(Model(pet={'pet_type': 'dog', 'barks': 3.14, 'eats': 'biscuit'}, n=1))
""" try:
Model(pet={'pet_type': 'dog'}, n=1)
except ValidationError as e:
print(e) """
In the above code, I have added the eats field which is not defined. The pydantic validations are applied and the extra values that I defined are removed in response. I wanna throw an error saying eats is not allowed for Dog or something like that. Is there any way to achieve that?
And is there any chance that we can provide the input directly instead of the pet object?
print(Model({'pet_type': 'dog', 'barks': 3.14, 'eats': 'biscuit', n=1})). I tried without descriminator but those specific validations are missing related to pet_type. Can someone guide me how to achive either one of that?
You can use the extra field in the Config class to forbid extra attributes during model initialisation (by default, additional attributes will be ignored).
For example:
from pydantic import BaseModel, Extra
class Pet(BaseModel):
name: str
class Config:
extra = Extra.forbid
data = {
"name": "some name",
"some_extra_field": "some value",
}
my_pet = Pet.parse_obj(data) # <- effectively the same as Pet(**pet_data)
will raise a VaidationError:
ValidationError: 1 validation error for Pet
some_extra_field
extra fields not permitted (type=value_error.extra)
Works as well when the model is "nested", e.g.:
class PetModel(BaseModel):
my_pet: Pet
n: int
pet_data = {
"my_pet": {"name": "Some Name", "invalid_field": "some value"},
"n": 5,
}
pet_model = PetModel.parse_obj(pet_data)
# Effectively the same as
# pet_model = PetModel(my_pet={"name": "Some Name", "invalid_field": "some value"}, n=5)
will raise:
ValidationError: 1 validation error for PetModel
my_pet -> invalid_field
extra fields not permitted (type=value_error.extra)
Pydantic is made to validate your input with the schema. In your case, you want to remove one of its validation feature.
I think you should create a new class that inherit from BaseModel
class ModifiedBaseModel(BaseModel):
def __init__(__pydantic_self__, **data: Any) -> None:
registered, not_registered = __pydantic_self__.filter_data(data)
super().__init__(**registered)
for k, v in not_registered.items():
__pydantic_self__.__dict__[k] = v
#classmethod
def filter_data(cls, data):
registered_attr = {}
not_registered_attr = {}
annots = cls.__annotations__
for k, v in data.items():
if k in annots:
registered_attr[k] = v
else:
not_registered_attr[k] = v
return registered_attr, not_registered_attr
then create your validation classes
class Cat(ModifiedBaseModel):
pet_type: Literal['cat']
meows: int
now you can create a new Cat without worries about undefined attribute. Like this
my_cat = Cat(pet_type='cat', meows=3, name='blacky', age=3)
2nd question, to put the input directly from dict you can use double asterisk **
Dog(**my_dog_data_in_dict)
or
Dog(**{'pet_type': 'dog', 'barks': 3.14, 'eats': 'biscuit', n=1})

Generating Python Class from JSON Schema

I am trying to generate a new class in Python starting from a JSON Schema previously defined and created. Then I would like to use the autogenerated class to read a JSON file.
My problem is that I manage to create the class from the schema using "python_jsonschema_objects" or "marshmallow_jsonschema" but then when I create an object belonging to that class python do not suggest the elements inside of that class. (I would like to type object.name, and I would like "name" to be suggested by python because it understands that name is a property of object).
Moreover the class that these tools create is in the first case an "abc.class" and in the second "class 'marshmallow.schema.GeneratedSchema'".
I leave a code example here:
from marshmallow import Schema, fields, post_load
from marshmallow_jsonschema import JSONSchema
from pprint import pprint
import json
class User(object):
def __init__(self, name, age):
self.name = name
self.age = age
def __repr__(self):
return f'I am {self.name} and my age is {self.age}'
class UserSchema(Schema):
name = fields.String()
age = fields.Integer()
#post_load
def make(self, data):
return User(data)
schema = UserSchema()
json_schema = JSONSchema()
print(json_schema.dump(schema))
with open(abs_path + "schema_test_file.json" , 'w') as outfile:
json.dump(json_schema.dump(schema), outfile)
with open(abs_path + "schema_test_file.json" ) as json_file:
data = json.load(json_file)
schema = UserSchema().from_dict(data) **class 'marshmallow.schema.GeneratedSchema'**
user = schema()
user.name = "Marco" **I would like here that python suggest name and age as properties of schema**
user.age = 14
I hope I have been clear enough.

What's Python JSON serializable?

I have two ways to represent Python object by json.dumps()
First:
person = {
"name": "John",
"age": 30,
"city": "New York"
}
Second:
class Person:
def _init_(self, name, age, city):
self.name = name
self.age = age
self.city = city
person = Person("John", 30, "New York")
Then I tried p1 = json.dumps(person), the second way would say it's not JSON serializable.
So basically for Python, json.dumps only work for built-in object like dict object?
If you are asking about vanilla Python, serialization could be done this way:
import json
class Person:
def __init__(self, name, age, city):
self.name = name
self.age = age
self.city = city
def to_json(self):
return json.dumps(self, default=lambda o: o.__dict__)
person = Person("John", 30, "New York")
print(person.to_json())
So we're just converting an object to a dict using __dict__ attribute.
But if you need something more sophisticated, you might need DRF (Django REST Framework) or pydantic. An example how it could be done with DRF:
from rest_framework import serializers
from rest_framework.serializers import Serializer
class Person:
def __init__(self, name, age, city):
self.name = name
self.age = age
self.city = city
class PersonSerializer(Serializer):
name = serializers.CharField()
age = serializers.IntegerField()
city = serializers.CharField()
def create(self, validated_data):
return Person(**validated_data)
person = Person("John", 30, "New York")
print(PersonSerializer(person).data)
This way you have a much better control over it. See docs.
Yes, the json module only knows how to serialize certain built-in types. An easy way to work with classes that have "fields" is the dataclasses module.
Example:
from dataclasses import dataclass, asdict
import json
#dataclass
class Person:
name: str
age: int
city: str
person = Person("John", 30, "New York")
print(json.dumps(asdict(person)))
The asdict() function converts the dataclass instance into a dict which json can serialize.
With Python 3.6+ you can use dataclasses in Python along with the asdict helper function to convert a dataclass instance to a Python dict object.
Note: For 3.6, you'll need a backport for the dataclasses module, as it only became a builtin to Python starting in 3.7.
import json
from dataclasses import dataclass, asdict
#dataclass
class Person:
name: str
age: int
city: str
person = Person("John", 30, "New York")
person_dict = asdict(person)
json_string = json.dumps(person_dict, indent=2)
print(json_string)
Out:
{
"name": "John",
"age": 30,
"city": "New York"
}
If you have a more complex use case, or end up with needing to (de)serialize a nested dataclass model, I'd check out an external library like the dataclass-wizard that supports such a use case in particular.

How to assign Python Dict Keys to corresponding Python Object Attributes

Suppose I have a python class like:
class User:
name = None
id = None
dob = None
def __init__(self, id):
self.id = id
Now I am doing something like this:
userObj = User(id=12) # suppose I don't have values for name and dob yet
## some code here and this code gives me name and dob data in dictionary, suppose a function call
user = get_user_data() # this returns the dictionary like {'name': 'John', 'dob': '1992-07-12'}
Now, the way to assign data to user object is userObj.name = user['name'] and userObj.dob = user['dob']. Suppose, User has 100 attributes. I will have to explicitly assign these attributes. Is there an efficient way in Python which I can use to assign the values from a dictionary to the corresponding attributes in the object? Like, name key in the dictionary is assigned to the name attribute in the object.
1. Modify the Class definition
class User():
def __init__(self, id):
self.data = {"id":id}
userObj = User(id=12)
2. Update the dict()
user = {"name":"Frank", "dob":"Whatever"} # Get the remaining data from elsewhere
userObj.data.update(user) # Update the dict in your userObj
print(userObj.data)
Here you go !
Instead of mapping a dict to the variable keys. You can use setattr to set variables in an object.
class User:
name = None
id = None
dob = None
def __init__(self, id):
self.id = id
def map_dict(self, user_info):
for k, v in user_info.items():
setattr(self, k, v)
Then for boiler code to use it.
userObj = User(id=12)
user_dict = {
'name': 'Bob',
'dob': '11-20-1993',
'something': 'blah'
}
userObj.map_dict(user_dict)
First, there is no need to predeclare properties in python.
class Foo:
bar: int # This actually creates a class member, not an instance member
...
If you want to add values to a class instance just use setattr()
d = {
'prop1': 'value1',
'prop2': 'value2',
'prop2': 'value2'
}
x = Foo()
for prop in d.keys():
setattr(x, prop, d[prop])
class User(dict):
def __init__(self, *args, **kwargs):
super(User, self).__init__(*args, **kwargs)
self.__dict__ = self
and then just get your dictionary and do:
userObj = User(dictionary)
EDIT:
user the function setattr() then
[setattr(userObj, key, item) for key,item in dict.items()]
In Case you REALLY need to
This solution is for the case, other solutions dont work for you and you cannot change your class.
Issue
In case you cannot modify your class in any way and you have a dictionary, that contains the information you want to put in your object, you can first get the custom members of your class by using the inspect module:
import inspect
import numpy as np
members = inspect.getmembers(User)
Extract your custom attributes from all members by:
allowed = ["__" not in a[0] for a in members]
and use numpy list comprehention for the extraction itself:
members = np.array(members)["__" not in a[0] for a in members]
Modify the user
So lets say you have the following user and dict and you want to change the users attributes to the values in the dictionary (behaviour for creating a new user is the same)
user = User(1)
dic = {"name":"test", "id": 2, "dob" : "any"}
then you simply use setattr():
for m in members:
setattr(user, m[0], dic[m[0]])
For sure there are better solutins, but this might come in handy in case other things dont work for you
Update
This solution uses the attribute definitions based on your class you use. So in case the dictionary has missing values, this solution might be helpful. Else Rashids solution will work well for you too

Flask Marshmallow JSON fields

I have defined a POST call would that needs data:
{
"one" : "hello",
"two" : "world",
"three" : {
"ab": "123",
"cd": false
}
}
For this, I am able to define one and two, but unsure what is the right was to define three. How can I specify a JSON field in Marshmallow? I am able to define basic fields such as:
from marshmallow import Schema, post_load, fields
class Foo(object):
def __init__(self, one, two=None):
self.one = one
self.two = two
class MySchema(Schema):
one = fields.String(required=True)
two = fields.String()
#post_load
def create_foo(self, data, **kwargs):
return Foo(**data)
How do I define three in MySchema? Should I:
simply put it as a string and do manipulation to load it as a json using json.loads()/json.dumps()? Or is there a way to define it properly?
define it as a fields.Dict?
can I define a separate Schema for this field
should I extend field.Field?
I am looking at https://marshmallow.readthedocs.io/en/3.0/api_reference.html, though still not sure. A JSON sub-field or a nested JSON seems like a common use-case, yet I am not able to find anything relevant on this.
This can be done with nested schemas: https://marshmallow.readthedocs.io/en/3.0/nesting.html
Your schema would look something like:
class MySchema(Schema):
one = fields.String(required=True)
two = fields.String()
three = fields.Nested(ThreeSchema)
class ThreeSchema(Schema):
ab = fields.String()
cd = fields.Boolean()
You can create your own field
import json
from marshmallow import fields
class JSON(fields.Field):
def _deserialize(self, value, attr, data, **kwargs):
if value:
try:
return json.loads(value)
except ValueError:
return None
return None
...
from marshmallow import fields, Schema
from schemas.base import JSON
class ObjectSchema(Schema):
id = fields.Integer()
data = JSON()
If you want to support arbitrary nested values in the field, rather than defining a schema for them, you can use:
fields.Dict() (to accept an arbitrary Python dict, or, equivalently, an arbitrary JSON object), or
fields.Raw() (for arbitrary Python objects, or, equivalently, arbitrary JSON values)
An example script you can run that uses both of the above, based on the example in the question:
import json
from marshmallow import Schema, fields, post_load
class Foo(object):
def __init__(self, one, two=None, three=None, four=None):
self.one = one
self.two = two
self.three = three
self.four = four
class MySchema(Schema):
one = fields.String(required=True)
two = fields.String()
three = fields.Dict()
four = fields.Raw()
#post_load
def create_foo(self, data, **kwargs):
return Foo(**data)
post_data = json.loads(
"""{
"one" : "hello",
"two" : "world",
"three" : {
"ab": "123",
"cd": false
},
"four" : 567
}"""
)
foo = MySchema().load(post_data)
print(foo.one)
print(foo.two)
print(foo.three)
print(foo.four)

Categories

Resources