Pydantic validations for extra fields that not defined in schema - python

I am using pydantic for schema validations and I would like to throw an error when any extra field is added to a schema that isn't defined.
from typing import Literal, Union
from pydantic import BaseModel, Field, ValidationError
class Cat(BaseModel):
pet_type: Literal['cat']
meows: int
class Dog(BaseModel):
pet_type: Literal['dog']
barks: float
class Lizard(BaseModel):
pet_type: Literal['reptile', 'lizard']
scales: bool
class Model(BaseModel):
pet: Union[Cat, Dog, Lizard] = Field(..., discriminator='pet_type')
n: int
print(Model(pet={'pet_type': 'dog', 'barks': 3.14, 'eats': 'biscuit'}, n=1))
""" try:
Model(pet={'pet_type': 'dog'}, n=1)
except ValidationError as e:
print(e) """
In the above code, I have added the eats field which is not defined. The pydantic validations are applied and the extra values that I defined are removed in response. I wanna throw an error saying eats is not allowed for Dog or something like that. Is there any way to achieve that?
And is there any chance that we can provide the input directly instead of the pet object?
print(Model({'pet_type': 'dog', 'barks': 3.14, 'eats': 'biscuit', n=1})). I tried without descriminator but those specific validations are missing related to pet_type. Can someone guide me how to achive either one of that?

You can use the extra field in the Config class to forbid extra attributes during model initialisation (by default, additional attributes will be ignored).
For example:
from pydantic import BaseModel, Extra
class Pet(BaseModel):
name: str
class Config:
extra = Extra.forbid
data = {
"name": "some name",
"some_extra_field": "some value",
}
my_pet = Pet.parse_obj(data) # <- effectively the same as Pet(**pet_data)
will raise a VaidationError:
ValidationError: 1 validation error for Pet
some_extra_field
extra fields not permitted (type=value_error.extra)
Works as well when the model is "nested", e.g.:
class PetModel(BaseModel):
my_pet: Pet
n: int
pet_data = {
"my_pet": {"name": "Some Name", "invalid_field": "some value"},
"n": 5,
}
pet_model = PetModel.parse_obj(pet_data)
# Effectively the same as
# pet_model = PetModel(my_pet={"name": "Some Name", "invalid_field": "some value"}, n=5)
will raise:
ValidationError: 1 validation error for PetModel
my_pet -> invalid_field
extra fields not permitted (type=value_error.extra)

Pydantic is made to validate your input with the schema. In your case, you want to remove one of its validation feature.
I think you should create a new class that inherit from BaseModel
class ModifiedBaseModel(BaseModel):
def __init__(__pydantic_self__, **data: Any) -> None:
registered, not_registered = __pydantic_self__.filter_data(data)
super().__init__(**registered)
for k, v in not_registered.items():
__pydantic_self__.__dict__[k] = v
#classmethod
def filter_data(cls, data):
registered_attr = {}
not_registered_attr = {}
annots = cls.__annotations__
for k, v in data.items():
if k in annots:
registered_attr[k] = v
else:
not_registered_attr[k] = v
return registered_attr, not_registered_attr
then create your validation classes
class Cat(ModifiedBaseModel):
pet_type: Literal['cat']
meows: int
now you can create a new Cat without worries about undefined attribute. Like this
my_cat = Cat(pet_type='cat', meows=3, name='blacky', age=3)
2nd question, to put the input directly from dict you can use double asterisk **
Dog(**my_dog_data_in_dict)
or
Dog(**{'pet_type': 'dog', 'barks': 3.14, 'eats': 'biscuit', n=1})

Related

with Pydantic, how can i create my own ValidationError reason

it seems impossible to set a regex constraint with a __root__ field like this one:
class Cars(BaseModel):
__root__: Dict[str, CarData]
so, i've resorted to doing it at the endpoint:
#app.post("/cars")
async def get_cars(cars: Cars = Body(...)):
x = cars.json()
y = json.loads(x)
keys = list(y.keys())
try:
if any([re.search(r'^\d+$', i) is None for i in keys]):
raise ValidationError
except ValidationError as ex:
return 'wrong type'
return 'works'
this works well in that i get wrong type returned if i dont use a digit in the request body.
but i'd like to return something similar to what pydantic returns but with a custom message:
{
"detail": [
{
"loc": [
"body",
"__root__",
],
"msg": "hey there, you can only use digits!",
"type": "type_error.???"
}
]
}
You can pass your own error string by using raise ValidationError("Wrong data type").
Hope it helps.
if it helps anyone, here is how i validated a dynamic field:
class Cars(BaseModel):
__root__: Dict[str, CarData]
#pydantic.root_validator(pre=True)
#classmethod
def car_id_is_digit(cls, fields):
car_ids = list(list(fields.values())[0].keys())
print(car_ids)
if any([bool(re.search(r'^\d+$', car_id)) == False for car_id in car_ids]):
raise ValueError("car_id must be a string that is a digit.")
else:
return fields
since a regular field validator requires a field name as an argument, i used the root_validator which validates all fields - and does not require that argument.
all this, because __root__ cannot be referenced in the regular field validator, it seems.
however, this means you can only have __root__ fields - and they will all be under the same validation rules...not sure how to added more fields with this.

pydantic basemodel "field" for validation purposes only

Consider the follwoing code illustrating use of the pydantic BaseModel with validation:
from pydantic import BaseModel, validator
class User(BaseModel, frozen=True):
id_key: int
user_id: int
#validator('user_id')
def id_check(cls, v, values):
if v > 2 * values['id_key'] + 1:
raise ValueError('id check failed.')
return v
user_dict = {'user_id': 10, 'id_key': 60}
u = User(**user_dict)
Now, in my application, I don't really want id_key to be a regular, accessible field in model instances like u--its sole purpose is for validating user_id. For my example, is there a way to have access to id_key for validation purposes but not have it be a standard field?
The values argument you have in your id_check function being the internal dict of attributes already validated for your instance, if you need to have id_key only at instantiation time for checking and not after that, you could simply remove it from values.
from pydantic import BaseModel, validator
class User(BaseModel, frozen=True):
id_key: int
user_id: int
#validator('user_id')
def id_check(cls, v, values):
if v > 2 * values['id_key'] + 1:
raise ValueError('id check failed.')
values.pop('id_key')
return v
user_dict = {'user_id': 10, 'id_key': 60}
u = User(**user_dict)
print(u)
# output:
# user_id=10
There is one additional improvement I'd like to suggest for your code: in its present state, as pydantic runs the validations of all the fields before returning the validation errors, if you pass something completely invalid for id_key like "abc" for example, or omit it, it won't be added to values, and the validation of user_id will crash with KeyError: 'id_key', swallowing all the rest of the validation process and returning no sensible message.
user_dict = {'user_id': 10, 'id_key': 'abc'}
u = User(**user_dict)
# output:
# KeyError: 'id_key'
This is not very explicit, and might cause issues with your application if you expect a pydantic ValidationError. You might want to check that id_key is indeed present in values and raise the error cleanly if not.
from pydantic import BaseModel, validator
class User(BaseModel, frozen=True):
id_key: int
user_id: int
#validator('user_id')
def id_check(cls, v, values):
if 'id_key' not in values or v > 2 * values['id_key'] + 1:
raise ValueError('id check failed.')
values.pop('id_key')
return v
user_dict = {'user_id': 10, 'id_key': 'abc'}
u = User(**user_dict)
# output:
# pydantic.error_wrappers.ValidationError: 2 validation errors for User
# id_key
# value is not a valid integer (type=type_error.integer)
# user_id
# id check failed.(type=value_error)

Pydantic validate subfields on assignment

I'm trying to make sure one of my objects used is always in a correct state. For this I should validate not only on creation but also on assignment, and also on the sub field assignments. Here is a basic example:
from typing import Optional
from pydantic import BaseModel, root_validator
class SubModel(BaseModel):
class Config:
validate_assignment = True
min: Optional[int]
max: Optional[int]
class TestModel(BaseModel):
class Config:
validate_assignment = True
field_1: Optional[SubModel]
#root_validator
def validate(cls, values):
field = values.get("field_1")
if field and field.min and field.max:
if field.min > field.max:
raise ValueError("error")
return values
If I now call
model = TestModel(field_1=SubModel(min=2, max=1))
or
model = TestModel()
field_1 = SubModel(min=2, max=1)
the validation is triggered and the ValueError is raised, which is fine.
But if I do the following
model = TestModel()
field_1 = SubModel()
field_1.min = 2
field_1.max = 1
no validation is triggered.
I know that I could do the validation on SubModel level but in my case (which is a little bit more complex than the basic code shows) I don't want every object of type SubModel to have min <= max but only the one field used in TestModel. Therefor moving the validator to the SubModel is no option for me.
Does anyone have an idea on how to trigger the validator of TestModel when assigning min and max on field_1?
Thank you in advance!

Partial update in FastAPI

I want to implement a put or patch request in FastAPI that supports partial update. The official documentation is really confusing and I can't figure out how to do the request. (I don't know that items is in the documentation since my data will be passed with request's body, not a hard-coded dict).
class QuestionSchema(BaseModel):
title: str = Field(..., min_length=3, max_length=50)
answer_true: str = Field(..., min_length=3, max_length=50)
answer_false: List[str] = Field(..., min_length=3, max_length=50)
category_id: int
class QuestionDB(QuestionSchema):
id: int
async def put(id: int, payload: QuestionSchema):
query = (
questions
.update()
.where(id == questions.c.id)
.values(**payload)
.returning(questions.c.id)
)
return await database.execute(query=query)
#router.put("/{id}/", response_model=QuestionDB)
async def update_question(payload: QuestionSchema, id: int = Path(..., gt=0),):
question = await crud.get(id)
if not question:
raise HTTPException(status_code=404, detail="question not found")
## what should be the stored_item_data, as documentation?
stored_item_model = QuestionSchema(**stored_item_data)
update_data = payload.dict(exclude_unset=True)
updated_item = stored_item_model.copy(update=update_data)
response_object = {
"id": question_id,
"title": payload.title,
"answer_true": payload.answer_true,
"answer_false": payload.answer_false,
"category_id": payload.category_id,
}
return response_object
How can I complete my code to get a successful partial update here?
Posting this here for googlers who are looking for an intuitive solution for creating Optional Versions of their pydantic Models without code duplication.
Let's say we have a User model, and we would like to allow for PATCH requests to update the User. But we need to create a schema that tells FastApi what to expect in the content body, and specifically that all the fields are Optional (Since that's the nature of PATCH requests). We can do so without redefining all the fields
from pydantic import BaseModel
from typing import Optional
# Creating our Base User Model
class UserBase(BaseModel):
username: str
email: str
# And a Model that will be used to create an User
class UserCreate(UserBase):
password: str
Code Duplication ❌
class UserOptional(UserCreate):
username: Optional[str]
email: Optional[str]
password: Optional[str]
One Liner ✅
# Now we can make a UserOptional class that will tell FastApi that all the fields are optional.
# Doing it this way cuts down on the duplication of fields
class UserOptional(UserCreate):
__annotations__ = {k: Optional[v] for k, v in UserCreate.__annotations__.items()}
NOTE: Even if one of the fields on the Model is already Optional, it won't make a difference due to the nature of Optional being typing.Union[type passed to Optional, None] in the background.
i.e typing.Union[str, None] == typing.Optional[str]
You can even make it into a function if your going to be using it more than once:
def convert_to_optional(schema):
return {k: Optional[v] for k, v in schema.__annotations__.items()}
class UserOptional(UserCreate):
__annotations__ = convert_to_optional(UserCreate)
I got this answer on the FastAPI's Github issues.
You could make the fields Optional on the base class and create a new QuestionCreate model that extends the QuestionSchema. As an example:
from typing import Optional
class Question(BaseModel):
title: Optional[str] = None # title is optional on the base schema
...
class QuestionCreate(Question):
title: str # Now title is required
The cookiecutter template here provides some good insight too.
I created a library (pydantic-partial) just for that, converting all the fields in the normal DTO model to being optional. See https://medium.com/#david.danier/how-to-handle-patch-requests-with-fastapi-c9a47ac51f04 for a code example and more detailed explanation.
https://github.com/team23/pydantic-partial/
Based on the answer of #cdraper, I made a partial model factory:
from typing import Mapping, Any, List, Type
from pydantic import BaseModel
def model_annotations_with_parents(model: BaseModel) -> Mapping[str, Any]:
parent_models: List[Type] = [
parent_model for parent_model in model.__bases__
if (
issubclass(parent_model, BaseModel)
and hasattr(parent_model, '__annotations__')
)
]
annotations: Mapping[str, Any] = {}
for parent_model in reversed(parent_models):
annotations.update(model_annotations_with_parents(parent_model))
annotations.update(model.__annotations__)
return annotations
def partial_model_factory(model: BaseModel, prefix: str = "Partial", name: str = None) -> BaseModel:
if not name:
name = f"{prefix}{model.__name__}"
return type(
name, (model,),
dict(
__module__=model.__module__,
__annotations__={
k: Optional[v]
for k, v in model_annotations_with_parents(model).items()
}
)
)
def partial_model(cls: BaseModel) -> BaseModel:
return partial_model_factory(cls, name=cls.__name__)
Can be used with the function partial_model_factory:
PartialQuestionSchema = partial_model_factory(QuestionSchema)
Or with decorator partial_model:
#partial_model
class PartialQuestionSchema(QuestionSchema):
pass

Flask Marshmallow JSON fields

I have defined a POST call would that needs data:
{
"one" : "hello",
"two" : "world",
"three" : {
"ab": "123",
"cd": false
}
}
For this, I am able to define one and two, but unsure what is the right was to define three. How can I specify a JSON field in Marshmallow? I am able to define basic fields such as:
from marshmallow import Schema, post_load, fields
class Foo(object):
def __init__(self, one, two=None):
self.one = one
self.two = two
class MySchema(Schema):
one = fields.String(required=True)
two = fields.String()
#post_load
def create_foo(self, data, **kwargs):
return Foo(**data)
How do I define three in MySchema? Should I:
simply put it as a string and do manipulation to load it as a json using json.loads()/json.dumps()? Or is there a way to define it properly?
define it as a fields.Dict?
can I define a separate Schema for this field
should I extend field.Field?
I am looking at https://marshmallow.readthedocs.io/en/3.0/api_reference.html, though still not sure. A JSON sub-field or a nested JSON seems like a common use-case, yet I am not able to find anything relevant on this.
This can be done with nested schemas: https://marshmallow.readthedocs.io/en/3.0/nesting.html
Your schema would look something like:
class MySchema(Schema):
one = fields.String(required=True)
two = fields.String()
three = fields.Nested(ThreeSchema)
class ThreeSchema(Schema):
ab = fields.String()
cd = fields.Boolean()
You can create your own field
import json
from marshmallow import fields
class JSON(fields.Field):
def _deserialize(self, value, attr, data, **kwargs):
if value:
try:
return json.loads(value)
except ValueError:
return None
return None
...
from marshmallow import fields, Schema
from schemas.base import JSON
class ObjectSchema(Schema):
id = fields.Integer()
data = JSON()
If you want to support arbitrary nested values in the field, rather than defining a schema for them, you can use:
fields.Dict() (to accept an arbitrary Python dict, or, equivalently, an arbitrary JSON object), or
fields.Raw() (for arbitrary Python objects, or, equivalently, arbitrary JSON values)
An example script you can run that uses both of the above, based on the example in the question:
import json
from marshmallow import Schema, fields, post_load
class Foo(object):
def __init__(self, one, two=None, three=None, four=None):
self.one = one
self.two = two
self.three = three
self.four = four
class MySchema(Schema):
one = fields.String(required=True)
two = fields.String()
three = fields.Dict()
four = fields.Raw()
#post_load
def create_foo(self, data, **kwargs):
return Foo(**data)
post_data = json.loads(
"""{
"one" : "hello",
"two" : "world",
"three" : {
"ab": "123",
"cd": false
},
"four" : 567
}"""
)
foo = MySchema().load(post_data)
print(foo.one)
print(foo.two)
print(foo.three)
print(foo.four)

Categories

Resources