How validate few fields with Pydantic in huge nested json? - python

I have dict-like object, like:
data = {
# A lot of data here
'json_data_feed':
{'address':
{'name': 'home_sweet_home'}
}
# A lot of data here
}
And i want to create Pydantic model with few fields. Im trying to do this:
class OfferById(pydantic.BaseModel):
short_address: str = pydantic.Field(..., alias='name')
#pydantic.validator('short_address', pre=True)
def validate_short_address(cls, value):
return value['json_data_feed']['address']
And it fails with exception:
Some = OfferById(**data)
File "pydantic/main.py", line 406, in pydantic.main.BaseModel.__init__
pydantic.error_wrappers.ValidationError: 1 validation error for OfferById
name
field required (type=value_error.missing)
Are there any solution here?

You can achieve this by means of root validator. For example:
class OfferById(BaseModel):
short_address: str = Field(..., alias='name')
#root_validator(pre=True)
def validate_short_address(cls, values):
values['name'] = values['json_data_feed']['address']['name']
return values
print(OfferById(**data))

Related

with Pydantic, how can i create my own ValidationError reason

it seems impossible to set a regex constraint with a __root__ field like this one:
class Cars(BaseModel):
__root__: Dict[str, CarData]
so, i've resorted to doing it at the endpoint:
#app.post("/cars")
async def get_cars(cars: Cars = Body(...)):
x = cars.json()
y = json.loads(x)
keys = list(y.keys())
try:
if any([re.search(r'^\d+$', i) is None for i in keys]):
raise ValidationError
except ValidationError as ex:
return 'wrong type'
return 'works'
this works well in that i get wrong type returned if i dont use a digit in the request body.
but i'd like to return something similar to what pydantic returns but with a custom message:
{
"detail": [
{
"loc": [
"body",
"__root__",
],
"msg": "hey there, you can only use digits!",
"type": "type_error.???"
}
]
}
You can pass your own error string by using raise ValidationError("Wrong data type").
Hope it helps.
if it helps anyone, here is how i validated a dynamic field:
class Cars(BaseModel):
__root__: Dict[str, CarData]
#pydantic.root_validator(pre=True)
#classmethod
def car_id_is_digit(cls, fields):
car_ids = list(list(fields.values())[0].keys())
print(car_ids)
if any([bool(re.search(r'^\d+$', car_id)) == False for car_id in car_ids]):
raise ValueError("car_id must be a string that is a digit.")
else:
return fields
since a regular field validator requires a field name as an argument, i used the root_validator which validates all fields - and does not require that argument.
all this, because __root__ cannot be referenced in the regular field validator, it seems.
however, this means you can only have __root__ fields - and they will all be under the same validation rules...not sure how to added more fields with this.

Pydantic validations for extra fields that not defined in schema

I am using pydantic for schema validations and I would like to throw an error when any extra field is added to a schema that isn't defined.
from typing import Literal, Union
from pydantic import BaseModel, Field, ValidationError
class Cat(BaseModel):
pet_type: Literal['cat']
meows: int
class Dog(BaseModel):
pet_type: Literal['dog']
barks: float
class Lizard(BaseModel):
pet_type: Literal['reptile', 'lizard']
scales: bool
class Model(BaseModel):
pet: Union[Cat, Dog, Lizard] = Field(..., discriminator='pet_type')
n: int
print(Model(pet={'pet_type': 'dog', 'barks': 3.14, 'eats': 'biscuit'}, n=1))
""" try:
Model(pet={'pet_type': 'dog'}, n=1)
except ValidationError as e:
print(e) """
In the above code, I have added the eats field which is not defined. The pydantic validations are applied and the extra values that I defined are removed in response. I wanna throw an error saying eats is not allowed for Dog or something like that. Is there any way to achieve that?
And is there any chance that we can provide the input directly instead of the pet object?
print(Model({'pet_type': 'dog', 'barks': 3.14, 'eats': 'biscuit', n=1})). I tried without descriminator but those specific validations are missing related to pet_type. Can someone guide me how to achive either one of that?
You can use the extra field in the Config class to forbid extra attributes during model initialisation (by default, additional attributes will be ignored).
For example:
from pydantic import BaseModel, Extra
class Pet(BaseModel):
name: str
class Config:
extra = Extra.forbid
data = {
"name": "some name",
"some_extra_field": "some value",
}
my_pet = Pet.parse_obj(data) # <- effectively the same as Pet(**pet_data)
will raise a VaidationError:
ValidationError: 1 validation error for Pet
some_extra_field
extra fields not permitted (type=value_error.extra)
Works as well when the model is "nested", e.g.:
class PetModel(BaseModel):
my_pet: Pet
n: int
pet_data = {
"my_pet": {"name": "Some Name", "invalid_field": "some value"},
"n": 5,
}
pet_model = PetModel.parse_obj(pet_data)
# Effectively the same as
# pet_model = PetModel(my_pet={"name": "Some Name", "invalid_field": "some value"}, n=5)
will raise:
ValidationError: 1 validation error for PetModel
my_pet -> invalid_field
extra fields not permitted (type=value_error.extra)
Pydantic is made to validate your input with the schema. In your case, you want to remove one of its validation feature.
I think you should create a new class that inherit from BaseModel
class ModifiedBaseModel(BaseModel):
def __init__(__pydantic_self__, **data: Any) -> None:
registered, not_registered = __pydantic_self__.filter_data(data)
super().__init__(**registered)
for k, v in not_registered.items():
__pydantic_self__.__dict__[k] = v
#classmethod
def filter_data(cls, data):
registered_attr = {}
not_registered_attr = {}
annots = cls.__annotations__
for k, v in data.items():
if k in annots:
registered_attr[k] = v
else:
not_registered_attr[k] = v
return registered_attr, not_registered_attr
then create your validation classes
class Cat(ModifiedBaseModel):
pet_type: Literal['cat']
meows: int
now you can create a new Cat without worries about undefined attribute. Like this
my_cat = Cat(pet_type='cat', meows=3, name='blacky', age=3)
2nd question, to put the input directly from dict you can use double asterisk **
Dog(**my_dog_data_in_dict)
or
Dog(**{'pet_type': 'dog', 'barks': 3.14, 'eats': 'biscuit', n=1})

Python Marshmallow Field can be two different types

I want to specify a marshmallow schema. For one of my fields, I want it to be validated however it can be EITHER a string or a list of strings. I have tried the Raw field type however that is allows everything through. Is there a way to just validate the two types that I want?
Something like,
value = fields.Str() or fields.List()
I had the same issue today, and I came up with this solution:
class ValueField(fields.Field):
def _deserialize(self, value, attr, data, **kwargs):
if isinstance(value, str) or isinstance(value, list):
return value
else:
raise ValidationError('Field should be str or list')
class Foo(Schema):
value = ValueField()
other_field = fields.Integer()
You can create a custom field and overload the _deserialize method so that it validates if the code isinstance of desired types.
I hope it'll work for you.
foo.load({'value': 'asdf', 'other_field': 1})
>>> {'other_field': 1, 'value': 'asdf'}
foo.load({'value': ['asdf'], 'other_field': 1})
>>> {'other_field': 1, 'value': ['asdf']}
foo.load({'value': 1, 'other_field': 1})
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "/Users/webinterpret/Envs/gl-gs-onboarding-api/lib/python3.7/site-packages/marshmallow/schema.py", line 723, in load
data, many=many, partial=partial, unknown=unknown, postprocess=True
File "/Users/webinterpret/Envs/gl-gs-onboarding-api/lib/python3.7/site-packages/marshmallow/schema.py", line 904, in _do_load
raise exc
marshmallow.exceptions.ValidationError: {'value': ['Field should be str or list']}
solution for Mapping(s), similar to the above:
from typing import List, Mapping, Any
from marshmallow import Schema, fields
from marshmallow.exceptions import ValidationError
class UnionField(fields.Field):
"""Field that deserializes multi-type input data to app-level objects."""
def __init__(self, val_types: List[fields.Field]):
self.valid_types = val_types
super().__init__()
def _deserialize(
self, value: Any, attr: str = None, data: Mapping[str, Any] = None, **kwargs
):
"""
_deserialize defines a custom Marshmallow Schema Field that takes in mutli-type input data to
app-level objects.
Parameters
----------
value : {Any}
The value to be deserialized.
Keyword Parameters
----------
attr : {str} [Optional]
The attribute/key in data to be deserialized. (default: {None})
data : {Optional[Mapping[str, Any]]}
The raw input data passed to the Schema.load. (default: {None})
Raises
----------
ValidationError : Exception
Raised when the validation fails on a field or schema.
"""
errors = []
# iterate through the types being passed into UnionField via val_types
for field in self.valid_types:
try:
# inherit deserialize method from Fields class
return field.deserialize(value, attr, data, **kwargs)
# if error, add error message to error list
except ValidationError as error:
errors.append(error.messages)
raise ValidationError(errors)
Use:
class SampleSchema(Schema):
ex_attr = fields.Dict(keys=fields.Str(), values=UnionField([fields.Str(), fields.Number()]))
Credit: Anna K
The marshmallow-oneofschema project has a nice solution here.
https://github.com/marshmallow-code/marshmallow-oneofschema
From their sample code:
import marshmallow
import marshmallow.fields
from marshmallow_oneofschema import OneOfSchema
class Foo:
def __init__(self, foo):
self.foo = foo
class Bar:
def __init__(self, bar):
self.bar = bar
class FooSchema(marshmallow.Schema):
foo = marshmallow.fields.String(required=True)
#marshmallow.post_load
def make_foo(self, data, **kwargs):
return Foo(**data)
class BarSchema(marshmallow.Schema):
bar = marshmallow.fields.Integer(required=True)
#marshmallow.post_load
def make_bar(self, data, **kwargs):
return Bar(**data)
class MyUberSchema(OneOfSchema):
type_schemas = {"foo": FooSchema, "bar": BarSchema}
def get_obj_type(self, obj):
if isinstance(obj, Foo):
return "foo"
elif isinstance(obj, Bar):
return "bar"
else:
raise Exception("Unknown object type: {}".format(obj.__class__.__name__))
MyUberSchema().dump([Foo(foo="hello"), Bar(bar=123)], many=True)
# => [{'type': 'foo', 'foo': 'hello'}, {'type': 'bar', 'bar': 123}]
MyUberSchema().load(
[{"type": "foo", "foo": "hello"}, {"type": "bar", "bar": 123}], many=True
)
# => [Foo('hello'), Bar(123)]
Solution
Based on #bwl1289 answer.
In addition, this custom field is inspired by from typing import Union.
# encoding: utf-8
"""
Marshmallow fields
------------------
Extension on the already available marshmallow fields
"""
from marshmallow import ValidationError, fields
class UnionField(fields.Field):
"""Field that deserializes multi-type input data to app-level objects."""
def __init__(self, types: list = [], *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
if types:
self.types = types
else:
raise AttributeError('No types provided on union field')
def _deserialize(self, value, attr, data, **kwargs):
if bool([isinstance(value, i) for i in self.types if isinstance(value, i)]):
return value
else:
raise ValidationError(
f'Field shoud be any of the following types: [{", ".join([str(i) for i in self.types])}]'
)
__init__(self, types)
New parameter "types". Which accepts a list of default types within Python alongside the default parameters of a marshmallow field.
super copies default class in current class.
If this "types" parameter is empty we raises by default an AttributeError.
_deserialize()
Checks if current value is oneof the self.types provided in the __init__.
Raises ValidationError with a formatted error message based on self.types.
Example
# encoding: utf-8
"""
Example
-------
Example for utilization
"""
from marshmallow import Schema
class AllTypes(Schema):
"""
Example schema
"""
some_field = UnionField(
types=[str, int, float, dict, list, bool, set, tuple],
metadata={
"description": "Multiple types.",
},
)
UnitTest
# encoding: utf-8
"""
Test custom marshmallow fields
"""
from marshmallow import Schema, ValidationError
import pytest
def test_union_field():
class MultiType(Schema):
test = UnionField(
types=[str, int],
metadata={
"description": "String and Integer.",
},
)
class AllTypes(Schema):
test = UnionField(
types=[str, int, float, dict, list, bool, set, tuple],
metadata={
"description": "Multiple types",
},
)
with pytest.raises(AttributeError):
class NoTypes(Schema):
test = UnionField(
types=[],
metadata={
"description": "No Type.",
},
)
m = MultiType()
m.load({'test': 'test'})
m.load({'test': 123})
with pytest.raises(ValidationError):
m.load({'test': 123.123})
m.load({'test': {'test': 'test'}})
m.load({'test': ['test', 'test']})
m.load({'test': False})
m.load({'test': set([1, 1, 2, 3, 4])})
m.load({'test': (1, 1, 2, 3, 4,)})
a = AllTypes()
a.load({'test': 'test'})
a.load({'test': 123})
a.load({'test': 123.123})
a.load({'test': {'test': 'test'}})
a.load({'test': ['test', 'test']})
a.load({'test': False})
a.load({'test': set([1, 1, 2, 3, 4])})
a.load({'test': (1, 1, 2, 3, 4,)})
assert 1 == 1

'Missing data' when try to load data with data_key using marshmallow

I try to use marshmallow 2.18.0 on python 3.7 for validating data. I waiting for json {'name': 'foo', 'emailAddress': 'x#x.org'} and load it with schema:
class FooLoad(Schema):
name = fields.Str()
email = fields.Email(data_key='emailAddress', required=True)
I except that data_key on load will return me somesing like {'name': 'foo', 'email': 'x#x.org'}, but i got error in errors field:
schema_load = FooLoad()
after_load = schema_load.load({'name': 'foo', 'emailAddress': 'x#x.org'})
after_load.errors # return {'email': ['Missing data for required field.']}
But according example from marshmallow docs with devDependencies or github issue after_load must contain data like {'name': 'foo', 'email': 'x#x.org'}.
I want to deserialize the incoming date with names differ than schema attribute names (specifying what is required on the date_key), but i got errors when try it. How i can deserialize input data with names, different from schema attribute and declarited in data_key field of this attributes?
data_key was introduced in marshmallow 3.
See changelog entry:
Backwards-incompatible: Add data_key parameter to fields for specifying the key in the input and output data dict. This parameter replaces both load_from and dump_to (#717).
and associated pull-request.
When using marshmallow 2, you must use load_from/dump_to:
class FooLoad(Schema):
name = fields.Str()
email = fields.Email(load_from='emailAddress', dump_to='emailAddress', required=True)
You're using marshmallow 2 but reading the docs for marshmallow 3.
Note that marshmallow 3 contains a bunch of improvements and is in RC state, so if you're starting a project, you could go for marshmallow 3 and save yourself some transition work in the future.
I was experiencing the same phenomenon, trying to parse an API response. It turned out though I needed to drill 1 level deeper into the response, earlier than I was doing.
The response was:
{
"meta": {
"status": 200,
"message": null
},
"response": {
"ownerId": "…",
"otherData": […]
}
}
Then I was calling:
MySchema().load(response.json())
…
class MySchema(Schema):
owner_id = fields.String(data_key='ownerId')
…
Meta:
unknown = INCLUDE
#post_load
def load_my_object(self, data, **kwargs):
inner = data.get('response', data)
return MyObject(**inner)
But really, it should have been:
inner = data.get('response', data)
return MySchema().load(inner)
…
class MySchema(Schema):
owner_id = fields.String(data_key='ownerId')
…
Meta:
unknown = INCLUDE
#post_load
def load_my_object(self, data, **kwargs):
return MyObject(**data)

Using Python to Map Keys and Data Type In kwargs

I have a class which takes only kwargs. The argument kwargs['content'] contains a user and theme keys:
> content = "{'user': 1, 'theme':'red'}"
> kwargs['content'] = content
> m = Message(**kwargs)
I need a flexible way to model what is in kwargs['content'] and check...
some keys in kwargs['content'] are always present.
always make sure the values are not null and match the type.
This is what I have tried, but I feel like there is a better way.
class Message(object):
def __init__(self, *args, **kwargs):
for field in self._hash_model():
if field not in kwargs['content']:
raise ValidationError('Missing % field'.format(field))
# check type
# turn to json
def _hash_model(self):
"""
My model of values that need to be in content
"""
model = {
'user': int,
'theme': str
}
return model
I'm using Python 3.4.
See if the following works for you. Change the __init__ to
def __init__(self, *args, **kwargs):
content = json.loads(kwargs["content"]) # assuming kwargs has "content" key
for key,val in self._hash_model().items():
field = content.get(key) # returns None if absent
if field and typeof(field) is val:
# go ahead
else:
raise SomeException
Factor out the validation logic to a method if you see fit. Comment if it does not work.

Categories

Resources