I created a couple of dataclasses (similar to Go's structs) and I want to put my response/data inside of the dataclass. I haven't been able to find wether or not the json package supports this out of the box.
Dataclasses
from dataclasses import dataclass
from typing import List
#dataclass
class TakerPays:
currency: str
issuer: str
value: str
#dataclass
class Offers:
account: str
book_directory: str
book_node: str
flags: int
ledger_entry_type: str
owner_node: str
previous_tx_id: str
previous_tx_lgr_seq: int
sequence: int
taker_gets: str
taker_pays: TakerPays
index: str
owner_funds: str
quality: str
#dataclass
class Warnings:
id: int
message: str
#dataclass
class Result:
ledger_hash: str
ledger_index: int
offers: List[Offers]
validated: bool
warnings: List[Warnings]
#dataclass
class Response:
id: int
result: Result
status: str
type: str
Preview of the json that needs to be put into the Response dataclass
{
"id": 4,
"result": {
"ledger_hash": "5848C7DB5024EC3B532AC2F93BA8086A3D6281D3C0746BFE62E7E3CF4853F663",
"ledger_index": 68379996,
"offers": [
{
"Account": "rPbMHxs7vy5t6e19tYfqG7XJ6Fog8EPZLk",
"BookDirectory": "DFA3B6DDAB58C7E8E5D944E736DA4B7046C30E4F460FD9DE4E1D157637A1048F",
"BookNode": "0",
"Flags": 0,
"LedgerEntryType": "Offer",
"OwnerNode": "0",
"PreviousTxnID": "72B8928E31DF89223C7ADE0030685289BAD772C72DF23DDFFB92FF7B48BAC622",
"PreviousTxnLgrSeq": 68379985,
"Sequence": 386826,
"TakerGets": "789784836",
"TakerPays": {
"currency": "USD",
"issuer": "rvYAfWj5gh67oV6fW32ZzP3Aw4Eubs59B",
"value": "646.5472316"
},
"index": "82F565EDEF8661D7D9C92A75E2F0F5DBF2BAAFAE96A5A5A768AD76B933016031",
"owner_funds": "4587408572",
"quality": "0.0000008186371808232591"
}
],
"validated": true,
"warnings": [
{
"id": 1004,
"message": "This is a reporting server. The default behavior of a reporting server is to only return validated data. If you are looking for not yet validated data, include \"ledger_index : current\" in your request, which will cause this server to forward the request to a p2p node. If the forward is successful the response will include \"forwarded\" : \"true\""
}
]
},
"status": "success",
"type": "response"
}
I've not been able to find support to put the json into the dataclass similar to Go's json.Unmarshal and I'm curious to the best way to do this
Unfortunately the builtin modules in Python such as json don't support de-serializing JSON into a nested dataclass model as in this case.
If you're on board with using third-party libraries, a solid option is to leverage the dataclass-wizard library for this task, as shown below; one advantage that it offers - which really helps in this particular example - is auto key casing transforms, such as from "camelCase" and "TitleCase".
Note that if you prefer not to subclass from any Mixin class, you can opt to use the helper functions fromdict and asdict to convert data from/to Python dict objects instead.
from dataclasses import dataclass
from typing import List
from dataclass_wizard import JSONWizard
#dataclass
class TakerPays:
currency: str
issuer: str
value: str
#dataclass
class Offers:
account: str
book_directory: str
book_node: str
flags: int
ledger_entry_type: str
owner_node: str
previous_txn_id: str
previous_txn_lgr_seq: int
sequence: int
taker_gets: str
taker_pays: TakerPays
index: str
owner_funds: str
quality: str
#dataclass
class Warnings:
id: int
message: str
#dataclass
class Result:
ledger_hash: str
ledger_index: int
offers: List[Offers]
validated: bool
warnings: List[Warnings]
#dataclass
class Response(JSONWizard):
id: int
result: Result
status: str
type: str
def main():
string = r"""
{
"id": 4,
"result": {
"ledger_hash": "5848C7DB5024EC3B532AC2F93BA8086A3D6281D3C0746BFE62E7E3CF4853F663",
"ledger_index": 68379996,
"offers": [
{
"Account": "rPbMHxs7vy5t6e19tYfqG7XJ6Fog8EPZLk",
"BookDirectory": "DFA3B6DDAB58C7E8E5D944E736DA4B7046C30E4F460FD9DE4E1D157637A1048F",
"BookNode": "0",
"Flags": 0,
"LedgerEntryType": "Offer",
"OwnerNode": "0",
"PreviousTxnID": "72B8928E31DF89223C7ADE0030685289BAD772C72DF23DDFFB92FF7B48BAC622",
"PreviousTxnLgrSeq": 68379985,
"Sequence": 386826,
"TakerGets": "789784836",
"TakerPays": {
"currency": "USD",
"issuer": "rvYAfWj5gh67oV6fW32ZzP3Aw4Eubs59B",
"value": "646.5472316"
},
"index": "82F565EDEF8661D7D9C92A75E2F0F5DBF2BAAFAE96A5A5A768AD76B933016031",
"owner_funds": "4587408572",
"quality": "0.0000008186371808232591"
}
],
"validated": true,
"warnings": [
{
"id": 1004,
"message": "This is a reporting server. The default behavior of a reporting server is to only return validated data. If you are looking for not yet validated data, include \"ledger_index : current\" in your request, which will cause this server to forward the request to a p2p node. If the forward is successful the response will include \"forwarded\" : \"true\""
}
]
},
"status": "success",
"type": "response"
}
"""
r = Response.from_json(string)
print(repr(r))
if __name__ == '__main__':
main()
Output:
Response(id=4, result=Result(ledger_hash='5848C7DB5024EC3B532AC2F93BA8086A3D6281D3C0746BFE62E7E3CF4853F663', ledger_index=68379996, offers=[Offers(account='rPbMHxs7vy5t6e19tYfqG7XJ6Fog8EPZLk', book_directory='DFA3B6DDAB58C7E8E5D944E736DA4B7046C30E4F460FD9DE4E1D157637A1048F', book_node='0', flags=0, ledger_entry_type='Offer', owner_node='0', previous_txn_id='72B8928E31DF89223C7ADE0030685289BAD772C72DF23DDFFB92FF7B48BAC622', previous_txn_lgr_seq=68379985, sequence=386826, taker_gets='789784836', taker_pays=TakerPays(currency='USD', issuer='rvYAfWj5gh67oV6fW32ZzP3Aw4Eubs59B', value='646.5472316'), index='82F565EDEF8661D7D9C92A75E2F0F5DBF2BAAFAE96A5A5A768AD76B933016031', owner_funds='4587408572', quality='0.0000008186371808232591')], validated=True, warnings=[Warnings(id=1004, message='This is a reporting server. The default behavior of a reporting server is to only return validated data. If you are looking for not yet validated data, include "ledger_index : current" in your request, which will cause this server to forward the request to a p2p node. If the forward is successful the response will include "forwarded" : "true"')]), status='success', type='response')
NB: I noted that two fields in the Offers dataclass have slightly different names than the fields in the JSON object. For example, the field previous_tx_id is associated with the key PreviousTxnID in the JSON object.
Assuming this was intentional, you could easily work around this by defining a field alias mapping, as shown below:
from dataclass_wizard import json_key
# Note: In Python 3.9+ you can import this from `typing` instead
from typing_extensions import Annotated
#dataclass
class Offers:
...
previous_tx_id: Annotated[str, json_key('PreviousTxnID')]
previous_tx_lgr_seq: Annotated[int, json_key('PreviousTxnLgrSeq')]
...
Related
From a similar question, the goal is to create a model like this Typescript interface:
interface ExpandedModel {
fixed: number;
[key: string]: OtherModel;
}
However the OtherModel needs to be validated, so simply using:
class ExpandedModel(BaseModel):
fixed: int
class Config:
extra = "allow"
Won't be enough. I tried root (pydantic docs):
class VariableKeysModel(BaseModel):
__root__: Dict[str, OtherModel]
But doing something like:
class ExpandedModel(VariableKeysModel):
fixed: int
Is not possible due to:
ValueError: root cannot be mixed with other fields
Would something like #root_validator (example from another answer) be helpful in this case?
Thankfully, Python is not TypeScript. As mentioned in the comments here as well, an object is generally not a dictionary and dynamic attributes are considered bad form in almost all cases.
You can of course still set attributes dynamically, but they will for example never be recognized by a static type checker like Mypy or your IDE. This means you will not get auto-suggestions for those dynamic fields. Only attributes that are statically defined within the namespace of the class are considered members of that class.
That being said, you can abuse the extra config option to allow arbitrary fields to by dynamically added to the model, while at the same time enforcing all corresponding values to be of a specific type via a root_validator.
from typing import Any
from pydantic import BaseModel, root_validator
class Foo(BaseModel):
a: int
class Bar(BaseModel):
b: str
#root_validator
def validate_foo(cls, values: dict[str, Any]) -> dict[str, Any]:
for name, value in values.items():
if name in cls.__fields__:
continue # ignore statically defined fields here
values[name] = Foo.parse_obj(value)
return values
class Config:
extra = "allow"
Demo:
if __name__ == "__main__":
from pydantic import ValidationError
bar = Bar.parse_obj({
"b": "xyz",
"foo1": {"a": 1},
"foo2": Foo(a=2),
})
print(bar.json(indent=4))
try:
Bar.parse_obj({
"b": "xyz",
"foo": {"a": "string"},
})
except ValidationError as err:
print(err.json(indent=4))
try:
Bar.parse_obj({
"b": "xyz",
"foo": {"not_a_foo_field": 1},
})
except ValidationError as err:
print(err.json(indent=4))
Output:
{
"b": "xyz",
"foo2": {
"a": 2
},
"foo1": {
"a": 1
}
}
[
{
"loc": [
"__root__",
"a"
],
"msg": "value is not a valid integer",
"type": "type_error.integer"
}
]
[
{
"loc": [
"__root__",
"a"
],
"msg": "field required",
"type": "value_error.missing"
}
]
A better approach IMO is to just put the dynamic name-object-pairs into a dictionary. For example, you could define a separate field foos: dict[str, Foo] on the Bar model and get automatic validation out of the box that way.
Or you ditch the outer base model altogether for that specific case and just handle the data as a native dictionary with Foo values and parse them all via the Foo model.
I've been working with FastAPI for some time, it's a great framework.
However real life scenarios can be surprising, sometimes a non-standard approach is necessary. There's a one case I'd like to ask your help with.
There's a strange external requirement that a model response should be formatted as stated in example:
Desired behavior:
GET /object/1
{status: ‘success’, data: {object: {id:‘1’, category: ‘test’ …}}}
GET /objects
{status: ‘success’, data: {objects: [...]}}}
Current behavior:
GET /object/1 would respond:
{id: 1,field1:"content",... }
GET /objects/ would send a List of Object e.g.,:
{
[
{id: 1,field1:"content",... },
{id: 1,field1:"content",... },
...
]
}
You can substitute 'object' by any class, it's just for description purposes.
How to write a generic response model that will suit those reqs?
I know I can produce response model that would contain status:str and (depending on class) data structure e.g ticket:Ticket or tickets:List[Ticket].
The point is there's a number of classes so I hope there's a more pythonic way to do it.
Thanks for help.
Generic model with static field name
A generic model is a model where one field (or multiple) are annotated with a type variable. Thus the type of that field is unspecified by default and must be specified explicitly during subclassing and/or initialization. But that field is still just an attribute and an attribute must have a name. A fixed name.
To go from your example, say that is your model:
{
"status": "...",
"data": {
"object": {...} # type variable
}
}
Then we could define that model as generic in terms of the type of its object attribute.
This can be done using Pydantic's GenericModel like this:
from typing import Generic, TypeVar
from pydantic import BaseModel
from pydantic.generics import GenericModel
M = TypeVar("M", bound=BaseModel)
class GenericSingleObject(GenericModel, Generic[M]):
object: M
class GenericMultipleObjects(GenericModel, Generic[M]):
objects: list[M]
class BaseGenericResponse(GenericModel):
status: str
class GenericSingleResponse(BaseGenericResponse, Generic[M]):
data: GenericSingleObject[M]
class GenericMultipleResponse(BaseGenericResponse, Generic[M]):
data: GenericMultipleObjects[M]
class Foo(BaseModel):
a: str
b: int
class Bar(BaseModel):
x: float
As you can see, GenericSingleObject reflects the generic type we want for data, whereas GenericSingleResponse is generic in terms of the type parameter M of GenericSingleObject, which is the type of its data attribute.
If we now want to use one of our generic response models, we would need to specify it with a type argument (a concrete model) first, e.g. GenericSingleResponse[Foo].
FastAPI deals with this just fine and can generate the correct OpenAPI documentation. The JSON schema for GenericSingleResponse[Foo] looks like this:
{
"title": "GenericSingleResponse[Foo]",
"type": "object",
"properties": {
"status": {
"title": "Status",
"type": "string"
},
"data": {
"$ref": "#/definitions/GenericSingleObject_Foo_"
}
},
"required": [
"status",
"data"
],
"definitions": {
"Foo": {
"title": "Foo",
"type": "object",
"properties": {
"a": {
"title": "A",
"type": "string"
},
"b": {
"title": "B",
"type": "integer"
}
},
"required": [
"a",
"b"
]
},
"GenericSingleObject_Foo_": {
"title": "GenericSingleObject[Foo]",
"type": "object",
"properties": {
"object": {
"$ref": "#/definitions/Foo"
}
},
"required": [
"object"
]
}
}
}
To demonstrate it with FastAPI:
from fastapi import FastAPI
app = FastAPI()
#app.get("/foo/", response_model=GenericSingleResponse[Foo])
async def get_one_foo() -> dict[str, object]:
return {"status": "foo", "data": {"object": {"a": "spam", "b": 123}}}
Sending a request to that route returns the following:
{
"status": "foo",
"data": {
"object": {
"a": "spam",
"b": 123
}
}
}
Dynamically created model
If you actually want the attribute name to also be different every time, that is obviously no longer possible with static type annotations. In that case we would have to resort to actually creating the model type dynamically via pydantic.create_model.
In that case there is really no point in genericity anymore because type safety is out of the window anyway, at least for the data model. We still have the option to define a GenericResponse model, which we can specify via our dynamically generated models, but this will make every static type checker mad, since we'll be using variables for types. Still, it might make for otherwise concise code.
We just need to define an algorithm for deriving the model parameters:
from typing import Any, Generic, Optional, TypeVar
from pydantic import BaseModel, create_model
from pydantic.generics import GenericModel
M = TypeVar("M", bound=BaseModel)
def create_data_model(
model: type[BaseModel],
plural: bool = False,
custom_plural_name: Optional[str] = None,
**kwargs: Any,
) -> type[BaseModel]:
data_field_name = model.__name__.lower()
if plural:
model_name = f"Multiple{model.__name__}"
if custom_plural_name:
data_field_name = custom_plural_name
else:
data_field_name += "s"
kwargs[data_field_name] = (list[model], ...) # type: ignore[valid-type]
else:
model_name = f"Single{model.__name__}"
kwargs[data_field_name] = (model, ...)
return create_model(model_name, **kwargs)
class GenericResponse(GenericModel, Generic[M]):
status: str
data: M
Using the same Foo and Bar examples as before:
class Foo(BaseModel):
a: str
b: int
class Bar(BaseModel):
x: float
SingleFoo = create_data_model(Foo)
MultipleBar = create_data_model(Bar, plural=True)
This also works as expected with FastAPI including the automatically generated schemas/documentations:
from fastapi import FastAPI
app = FastAPI()
#app.get("/foo/", response_model=GenericResponse[SingleFoo]) # type: ignore[valid-type]
async def get_one_foo() -> dict[str, object]:
return {"status": "foo", "data": {"foo": {"a": "spam", "b": 123}}}
#app.get("/bars/", response_model=GenericResponse[MultipleBar]) # type: ignore[valid-type]
async def get_multiple_bars() -> dict[str, object]:
return {"status": "bars", "data": {"bars": [{"x": 3.14}, {"x": 0}]}}
Output is essentially the same as with the first approach.
You'll have to see, which one works better for you. I find the second option very strange because of the dynamic key/field name. But maybe that is what you need for some reason.
In python 3, how can I deserialize an object structure from json?
Example json:
{ 'name': 'foo',
'some_object': { 'field1': 'bar', 'field2' : '0' },
'some_list_of_objects': [
{ 'field1': 'bar1', 'field2' : '1' },
{ 'field1': 'bar2', 'field2' : '2' },
{ 'field1': 'bar3', 'field2' : '3' },
]
}
Here's my python code:
import json
class A:
name: str
some_object: B
some_list_of_objects: list(C)
def __init__(self, file_name):
with open(file_name, "r") as json_file:
self.__dict__ = json.load(json_file)
class B:
field1: int
field2: str
class C:
field1: int
field2: str
How to force some_object to be of type B and some_list_of_objects to be of type list of C?
As you're using Python 3, I would suggest using dataclasses to model your classes. This should improve your overall code quality and also eliminate the need to explicltly declare an __init__ constructor method for your class, for example.
If you're on board with using a third-party library, I'd suggest looking into an efficient JSON serialization library like the dataclass-wizard that performs implicit type conversion - for example, string to annotated int as below. Note that I'm using StringIO here, which is a file-like object containing a JSON string to de-serialize into a nested class model.
Note: the following approach should work in Python 3.7+.
from __future__ import annotations
from dataclasses import dataclass
from io import StringIO
from dataclass_wizard import JSONWizard
json_data = StringIO("""
{ "name": "foo",
"some_object": { "field1": "bar", "field2" : "0" },
"some_list_of_objects": [
{ "field1": "bar1", "field2" : "1" },
{ "field1": "bar2", "field2" : "2" },
{ "field1": "bar3", "field2" : "3" }
]
}
""")
#dataclass
class A(JSONWizard):
name: str
some_object: B
some_list_of_objects: list[C]
#dataclass
class B:
field1: str
field2: int
#dataclass
class C:
field1: str
field2: int
a = A.from_json(json_data.read())
print(f'{a!r}') # alternatively: print(repr(a))
Output
A(name='foo', some_object=B(field1='bar', field2=0), some_list_of_objects=[C(field1='bar1', field2=1), C(field1='bar2', field2=2), C(field1='bar3', field2=3)])
Loading from a JSON file
As per the suggestions in this post, I would discourage overriding the constructor method to pass the name of a JSON file to load the data from. Instead, I would suggest creating a helper class method as below, that can be invoked like A.from_json_file('file.json') if desired.
#classmethod
def from_json_file(cls, file_name: str):
"""Deserialize json file contents into an A object."""
with open(file_name, 'r') as json_file:
return cls.from_dict(json.load(json_file))
Suggestions
Note that variable annotations (or annotations in general) are subscripted using square brackets [] rather than parentheses as appears in the original version above.
some_list_of_objects: list(C)
In the above solution, I've instead changed that to:
some_list_of_objects: list[C]
This works because using subscripted values in standard collections was introduced in PEP 585. However, using the from __future__ import annotations statement introduced to Python 3.7+ effectively converts all annotations to forward-declared string values, so that new-style annotations that normally only would work in Python 3.10, can also be ported over to Python 3.7+ as well.
One other change I made, was in regards to swapping out the order of declared class annotations. For example, note the below:
class B:
field1: int
field2: str
However, note the corresponding field in the JSON data, that would be deserialized to a B object:
'some_object': { 'field1': 'bar', 'field2' : '0' },
In the above implementation, I've swapped out the field annotations in such cases, so class B for instance is declared as:
class B:
field1: str
field2: int
Setup:
# Pydantic Models
class TMDB_Category(BaseModel):
name: str = Field(alias="strCategory")
description: str = Field(alias="strCategoryDescription")
class TMDB_GetCategoriesResponse(BaseModel):
categories: list[TMDB_Category]
#router.get(path="category", response_model=TMDB_GetCategoriesResponse)
async def get_all_categories():
async with httpx.AsyncClient() as client:
response = await client.get(Endpoint.GET_CATEGORIES)
return TMDB_GetCategoriesResponse.parse_obj(response.json())
Problem:
Alias is being used when creating a response, and I want to avoid it. I only need this alias to correctly map the incoming data but when returning a response, I want to use actual field names.
Actual response:
{
"categories": [
{
"strCategory": "Beef",
"strCategoryDescription": "Beef is ..."
},
{
"strCategory": "Chicken",
"strCategoryDescription": "Chicken is ..."
}
}
Expected response:
{
"categories": [
{
"name": "Beef",
"description": "Beef is ..."
},
{
"name": "Chicken",
"description": "Chicken is ..."
}
}
Switch aliases and field names and use the allow_population_by_field_name model config option:
class TMDB_Category(BaseModel):
strCategory: str = Field(alias="name")
strCategoryDescription: str = Field(alias="description")
class Config:
allow_population_by_field_name = True
Let the aliases configure the names of the fields that you want to return, but enable allow_population_by_field_name to be able to parse data that uses different names for the fields.
An alternate option (which likely won't be as popular) is to use a de-serialization library other than pydantic. For example, the Dataclass Wizard library is one which supports this particular use case. If you need the same round-trip behavior that Field(alias=...) provides, you can pass the all param to the json_field function. Note that with such a library, you do lose out on the ability to perform complete type validation, which is arguably one of pydantic's greatest strengths; however it does, perform type conversion in a similar fashion to pydantic. There are also a few reasons why I feel that validation is not as important, which I do list below.
Reasons why I would argue that data validation is a nice to have
feature in general:
If you're building and passing in the input yourself, you can most likely trust that you know what you are doing, and are passing in the correct data types.
If you're getting the input from another API, then assuming that API has decent docs, you can just grab an example response from their documentation, and use that to model your class structure. You generally don't need any validation if an API documents its response structure clearly.
Data validation takes time, so it can slow down the process slightly, compared to if you just perform type conversion and catch any errors that might occur, without validating the input type beforehand.
So to demonstrate that, here's a simple example for the above use case using the dataclass-wizard library (which relies on the usage of dataclasses instead of pydantic models):
from dataclasses import dataclass
from dataclass_wizard import JSONWizard, json_field
#dataclass
class TMDB_Category:
name: str = json_field('strCategory')
description: str = json_field('strCategoryDescription')
#dataclass
class TMDB_GetCategoriesResponse(JSONWizard):
categories: list[TMDB_Category]
And the code to run that, would look like this:
input_dict = {
"categories": [
{
"strCategory": "Beef",
"strCategoryDescription": "Beef is ..."
},
{
"strCategory": "Chicken",
"strCategoryDescription": "Chicken is ..."
}
]
}
c = TMDB_GetCategoriesResponse.from_dict(input_dict)
print(repr(c))
# TMDB_GetCategoriesResponse(categories=[TMDB_Category(name='Beef', description='Beef is ...'), TMDB_Category(name='Chicken', description='Chicken is ...')])
print(c.to_dict())
# {'categories': [{'name': 'Beef', 'description': 'Beef is ...'}, {'name': 'Chicken', 'description': 'Chicken is ...'}]}
Measuring Performance
If anyone is curious, I've set up a quick benchmark test to compare deserialization and serialization times with pydantic vs. just dataclasses:
from dataclasses import dataclass
from timeit import timeit
from pydantic import BaseModel, Field
from dataclass_wizard import JSONWizard, json_field
# Pydantic Models
class Pydantic_TMDB_Category(BaseModel):
name: str = Field(alias="strCategory")
description: str = Field(alias="strCategoryDescription")
class Pydantic_TMDB_GetCategoriesResponse(BaseModel):
categories: list[Pydantic_TMDB_Category]
# Dataclasses
#dataclass
class TMDB_Category:
name: str = json_field('strCategory', all=True)
description: str = json_field('strCategoryDescription', all=True)
#dataclass
class TMDB_GetCategoriesResponse(JSONWizard):
categories: list[TMDB_Category]
# Input dict which contains sufficient data for testing (100 categories)
input_dict = {
"categories": [
{
"strCategory": f"Beef {i * 2}",
"strCategoryDescription": "Beef is ..." * i
}
for i in range(100)
]
}
n = 10_000
print('=== LOAD (deserialize)')
print('dataclass-wizard: ',
timeit('c = TMDB_GetCategoriesResponse.from_dict(input_dict)',
globals=globals(), number=n))
print('pydantic: ',
timeit('c = Pydantic_TMDB_GetCategoriesResponse.parse_obj(input_dict)',
globals=globals(), number=n))
c = TMDB_GetCategoriesResponse.from_dict(input_dict)
pydantic_c = Pydantic_TMDB_GetCategoriesResponse.parse_obj(input_dict)
print('=== DUMP (serialize)')
print('dataclass-wizard: ',
timeit('c.to_dict()',
globals=globals(), number=n))
print('pydantic: ',
timeit('pydantic_c.dict()',
globals=globals(), number=n))
And the benchmark results (tested on Mac OS Big Sur, Python 3.9.0):
=== LOAD (deserialize)
dataclass-wizard: 1.742989194
pydantic: 5.31538175
=== DUMP (serialize)
dataclass-wizard: 2.300118940
pydantic: 5.582638598
In their docs, pydantic claims to be the fastest library in general, but it's rather straightforward to prove otherwise. As you can see, for the above dataset pydantic is about 2x slower in both the deserialization and serialization process. It’s worth noting that pydantic is already quite fast, though.
Disclaimer: I am the creator (and maintener) of said library.
maybe you could use this approach
from pydantic import BaseModel, Field
class TMDB_Category(BaseModel):
name: str = Field(alias="strCategory")
description: str = Field(alias="strCategoryDescription")
data = {
"strCategory": "Beef",
"strCategoryDescription": "Beef is ..."
}
obj = TMDB_Category.parse_obj(data)
# {'name': 'Beef', 'description': 'Beef is ...'}
print(obj.dict())
I was trying to do something similar (migrate a field pattern to a list of patterns while gracefully handling old versions of the data). The best solution I could find was to do the field mapping in the __init__ method. In the terms of OP, this would be like:
class TMDB_Category(BaseModel):
name: str
description: str
def __init__(self, **data):
if "strCategory" in data:
data["name"] = data.pop("strCategory")
if "strCategoryDescription" in data:
data["description"] = data.pop("strCategoryDescription")
super().__init__(**data)
Then we have:
>>> TMDB_Category(strCategory="name", strCategoryDescription="description").json()
'{"name": "name", "description": "description"}'
If you need to use field aliases to do this but still use the name/description fields in your code, one option is to alter Hernán Alarcón's solution to use properties:
class TMDB_Category(BaseModel):
strCategory: str = Field(alias="name")
strCategoryDescription: str = Field(alias="description")
class Config:
allow_population_by_field_name = True
#property
def name(self):
return self.strCategory
#name.setter
def name(self, value):
self.strCategory = value
#property
def description(self):
return self.strCategoryDescription
#description.setter
def description(self, value):
self.strCategoryDescription = value
That's still a bit awkward, since the repr uses the "alias" names:
>>> TMDB_Category(name="name", description="description")
TMDB_Category(strCategory='name', strCategoryDescription='description')
Use the Config option by_alias.
from fastapi import FastAPI, Path, Query
from pydantic import BaseModel, Field
app = FastAPI()
class Item(BaseModel):
name: str = Field(..., alias="keck")
#app.post("/item")
async def read_items(
item: Item,
):
return item.dict(by_alias=False)
Given the request:
{
"keck": "string"
}
this will return
{
"name": "string"
}
I would like pydantic to choose the model to use for parsing the input dependent on the input value. Is this possible?
MVCE
I have a pydantic model which looks similar to this one:
from typing import List, Literal
from pydantic import BaseModel
class Animal(BaseModel):
name: str
type: Literal["mamal", "bird"]
class Bird(Animal):
max_eggs: int
class Mamal(Animal):
max_offspring: int
class Config(BaseModel):
animals: List[Animal]
cfg = Config.parse_obj(
{
"animals": [
{"name": "eagle", "type": "bird", "max_eggs": 3},
{"name": "Human", "type": "mamal", "max_offspring": 3},
]
}
)
print(cfg.json(indent=4))
gives
{
"animals": [
{
"name": "eagle",
"type": "bird"
<-- missing max_offspring, as "Animal" was used instead of Bird
},
{
"name": "Human",
"type": "mamal"
<-- missing max_offspring, as "Animal" was used instead of Mamal
}
]
}
I know that I could set Config.extra="allow" in Animal, but that is not what I want. I would like pydantic to see that a dictionary with 'type': 'mamal' should use the Mamal model to parse.
Is this possible?
You could add concrete literals to every child class to differentiate and put them in Union from more to less specific order. Like so:
class Animal(BaseModel):
name: str
type: str
class Bird(Animal):
type: Literal["bird"]
max_eggs: int
class Mamal(Animal):
type: Literal["mamal"]
max_offspring: int
class Config(BaseModel):
animals: List[Union[Bird, Mamal, Animal]] # From more specific to less