dataclasses.asdict() not working as expected - python

I'm using dataclass and asdict from dataclasses, and I find that asdict doesn't work as I would expect when I introduce inheritance.
I use dataclasses to help me create dictionaries from classes so I can pass them into django.shortcuts.render.
from dataclasses import dataclass
from dataclasses import asdict
#dataclass
class Base:
name: str
class Test(Base):
def __init__(self, age, *args, **kwargs):
self.age = age
super(Test, self).__init__(*args, **kwargs)
test = Test(age=20, name="john doe")
print(asdict(test))
I would expect the output to be
{"age": 20, "name": "john doe"}
But what I get is only the keyword-value from the base-class
{"name": "john doe"}

The correct implementation for inheritance of a dataclass is covered in the docs:
#dataclass
class Base:
name: str
#dataclass
class Child(Base):
age: int
Without this, the __dataclass_fields__ attribute in the child class, which asdict uses to determine what should be in the dictionary, doesn't know about all of the fields you care about; it only has the inherited version:
>>> Test.__dataclass_fields__
{'name': Field(...)}
>>> Test.__dataclass_fields__ is Base.__dataclass_fields__
True
>>> Child.__dataclass_fields__
{'name': Field(...), 'age': Field(...)}
>>> Child.__dataclass_fields__ is Base.__dataclass_fields__
False
Also note you can simplify the imports to:
from dataclasses import asdict, dataclass

Related

Serialize JSON from Python dataclass that has Decimal [duplicate]

Starting with Python 3.7, there is something called a dataclass:
from dataclasses import dataclass
#dataclass
class Foo:
x: str
However, the following fails:
>>> import json
>>> foo = Foo(x="bar")
>>> json.dumps(foo)
TypeError: Object of type Foo is not JSON serializable
How can I make json.dumps() encode instances of Foo into json objects?
Much like you can add support to the JSON encoder for datetime objects or Decimals, you can also provide a custom encoder subclass to serialize dataclasses:
import dataclasses, json
class EnhancedJSONEncoder(json.JSONEncoder):
def default(self, o):
if dataclasses.is_dataclass(o):
return dataclasses.asdict(o)
return super().default(o)
json.dumps(foo, cls=EnhancedJSONEncoder)
Can't you just use the dataclasses.asdict() function to convert the dataclass
to a dict? Something like:
>>> #dataclass
... class Foo:
... a: int
... b: int
...
>>> x = Foo(1,2)
>>> json.dumps(dataclasses.asdict(x))
'{"a": 1, "b": 2}'
Ways of getting JSONified dataclass instance
There are couple of options to accomplish that goal, selection of each imply analyze on which approach suits best for your needs:
Standard library: dataclass.asdict
import dataclasses
import json
#dataclass.dataclass
class Foo:
x: str
foo = Foo(x='1')
json_foo = json.dumps(dataclasses.asdict(foo)) # '{"x": "1"}'
Picking it back to dataclass instance isn't trivial, so you may want to visit that answer https://stackoverflow.com/a/53498623/2067976
Marshmallow Dataclass
from dataclasses import field
from marshmallow_dataclass import dataclass
#dataclass
class Foo:
x: int = field(metadata={"required": True})
foo = Foo(x='1') # Foo(x='1')
json_foo = foo.Schema().dumps(foo) # '{"x": "1"}'
# Back to class instance.
Foo.Schema().loads(json_foo) # Foo(x=1)
As a bonus for marshmallow_dataclass you may use validation on the field itself, that validation will be used when someone deserialize the object from json using that schema.
Dataclasses Json
from dataclasses import dataclass
from dataclasses_json import dataclass_json
#dataclass_json
#dataclass
class Foo:
x: int
foo = Foo(x='1')
json_foo = foo.to_json() # Foo(x='1')
# Back to class instance
Foo.from_json(json_foo) # Foo(x='1')
Also, in addition to that notice that marshmallow dataclass did type conversion for you whereas dataclassses-json(ver.: 0.5.1) ignores that.
Write Custom Encoder
Follow accepted miracle2k answer and reuse custom json encoder.
If you are ok with using a library for that, you can use dataclasses-json. Here is an example:
from dataclasses import dataclass
from dataclasses_json import dataclass_json
#dataclass_json
#dataclass
class Foo:
x: str
foo = Foo(x="some-string")
foo_json = foo.to_json()
It also supports embedded dataclasses - if your dataclass has a field typed as another dataclass - if all dataclasses envolved have the #dataclass_json decorator.
You can also implement the asdict and json.dumps method within the class. In this case it wouldn't be necessary to import json.dumps into other parts of your project:
from typing import List
from dataclasses import dataclass, asdict, field
from json import dumps
#dataclass
class TestDataClass:
"""
Data Class for TestDataClass
"""
id: int
name: str
tested: bool = False
test_list: List[str] = field(default_factory=list)
#property
def __dict__(self):
"""
get a python dictionary
"""
return asdict(self)
#property
def json(self):
"""
get the json formated string
"""
return dumps(self.__dict__)
test_object_1 = TestDataClass(id=1, name="Hi")
print(test_object_1.__dict__)
print(test_object_1.json)
Output:
{'id': 1, 'name': 'Hi', 'tested': False, 'test_list': []}
{"id": 1, "name": "Hi", "tested": false, "test_list": []}
You can also create a parent class to inherit the methods:
from typing import List
from dataclasses import dataclass, asdict, field
from json import dumps
#dataclass
class SuperTestDataClass:
#property
def __dict__(self):
"""
get a python dictionary
"""
return asdict(self)
#property
def json(self):
"""
get the json formated string
"""
return dumps(self.__dict__)
#dataclass
class TestDataClass(SuperTestDataClass):
"""
Data Class for TestDataClass
"""
id: int
name: str
tested: bool = False
test_list: List[str] = field(default_factory=list)
test_object_1 = TestDataClass(id=1, name="Hi")
print(test_object_1.__dict__)
print(test_object_1.json)
I'd suggest creating a parent class for your dataclasses with a to_json() method:
import json
from dataclasses import dataclass, asdict
#dataclass
class Dataclass:
def to_json(self) -> str:
return json.dumps(asdict(self))
#dataclass
class YourDataclass(Dataclass):
a: int
b: int
x = YourDataclass(a=1, b=2)
x.to_json() # '{"a": 1, "b": 2}'
This is especially useful if you have other functionality to add to all your dataclasses.
The simplest way to encode dataclass and SimpleNamespace objects is to provide the default function to json.dumps() that gets called for objects that can't be otherwise serialized, and return the object __dict__:
json.dumps(foo, default=lambda o: o.__dict__)
dataclass-wizard is a modern option that can work for you. It supports complex types such as date and time, most generics from the typing module, and also a nested dataclass structure.
The "new style" annotations introduced in PEPs 585 and 604 can be ported back to Python 3.7 via a __future__ import as shown below.
from __future__ import annotations # This can be removed in Python 3.10
from dataclasses import dataclass, field
from dataclass_wizard import JSONWizard
#dataclass
class MyClass(JSONWizard):
my_str: str | None
is_active_tuple: tuple[bool, ...]
list_of_int: list[int] = field(default_factory=list)
string = """
{
"my_str": 20,
"ListOfInt": ["1", "2", 3],
"isActiveTuple": ["true", false, 1]
}
"""
instance = MyClass.from_json(string)
print(repr(instance))
# MyClass(my_str='20', is_active_tuple=(True, False, True), list_of_int=[1, 2, 3])
print(instance.to_json())
# '{"myStr": "20", "isActiveTuple": [true, false, true], "listOfInt": [1, 2, 3]}'
# True
assert instance == MyClass.from_json(instance.to_json())
You can install the Dataclass Wizard with pip:
$ pip install dataclass-wizard
A bit of background info:
For serialization, it uses a slightly modified (a bit more efficient) implementation of dataclasses.asdict. When de-serializing JSON to a dataclass instance, the first time it iterates over the dataclass fields and generates a parser for each annotated type, which makes it more efficient when the de-serialization process is run multiple times.
Disclaimer: I am the creator (and maintainer) of this library.
pydantic
With pydantic models you get a dataclasses-like experience and full support for dict and Json conversions (and much more).
Python 3.9 and above:
from typing import Optional
from pydantic import BaseModel, parse_obj_as, parse_raw_as
class Foo(BaseModel):
count: int
size: Optional[float] = None
f1 = Foo(count=10)
print(f1.dict()) # Parse to dict
# > {'count': 10, 'size': None}
f2 = Foo.parse_obj({"count": 20}) # Load from dict
print(f2.json()) # Parse to json
# > {"count": 20, "size": null}
More options:
f3 = Foo.parse_raw('{"count": 30}') # Load from json string
f4 = Foo.parse_file("path/to/data.json") # Load from json file
f_list1 = parse_obj_as(list[Foo], [{"count": 110}, {"count": 120}]) # Load from list of dicts
print(f_list1)
# > [Foo(count=110, size=None), Foo(count=120, size=None)]
f_list2 = parse_raw_as(list[Foo], '[{"count": 130}, {"count": 140}]') # Load from list in json string
print(f_list2)
# > [Foo(count=130, size=None), Foo(count=140, size=None)]
Complex hierarchical data structures
class Bar(BaseModel):
apple = "x"
banana = "y"
class Spam(BaseModel):
foo: Foo
bars: list[Bar]
m = Spam(foo={"count": 4}, bars=[{"apple": "x1"}, {"apple": "x2"}])
print(m)
# > foo=Foo(count=4, size=None) bars=[Bar(apple='x1', banana='y'), Bar(apple='x2', banana='y')]
print(m.dict())
"""
{
'foo': {'count': 4, 'size': None},
'bars': [
{'apple': 'x1', 'banana': 'y'},
{'apple': 'x2', 'banana': 'y'},
],
}
"""
Pydantic supports many standard types (like datetime) and special commonly used types (like EmailStr and HttpUrl):
from datetime import datetime
from pydantic import HttpUrl
class User(BaseModel):
name = "John Doe"
signup_ts: datetime = None
url: HttpUrl = None
u1 = User(signup_ts="2017-07-14 00:00:00")
print(u1)
# > signup_ts=datetime.datetime(2017, 7, 14, 0, 0) url=None name='John Doe'
u2 = User(url="http://example.com")
print(u2)
# > signup_ts=None url=HttpUrl('http://example.com', ) name='John Doe'
u3 = User(url="ht://example.com")
"""
ValidationError: 1 validation error for User
url
URL scheme not permitted (type=value_error.url.scheme; allowed_schemes={'http', 'https'})
"""
If you really need to use json.dumps, write a Custom Encoder:
import json
class EnhancedJSONEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, BaseModel):
return o.dict()
return super().default(o)
json.dumps([{"foo": f2}], cls=EnhancedJSONEncoder)
# > '[{"foo": {"count": 20, "size": null}}]'
A dataclass providing json formating method
import json
from dataclasses import dataclass
#dataclass
class Foo:
x: str
def to_json(self):
return json.dumps(self.__dict__)
Foo("bar").to_json()
>>> '{"x":"bar"}'
A much simpler answer can be found on Reddit using dictionary unpacking
>>> from dataclasses import dataclass
>>> #dataclass
... class MyData:
... prop1: int
... prop2: str
... prop3: int
...
>>> d = {'prop1': 5, 'prop2': 'hi', 'prop3': 100}
>>> my_data = MyData(**d)
>>> my_data
MyData(prop1=5, prop2='hi', prop3=100)
Okay so here is what I did when I was in similar situation.
Create a custom dictionary factory that converts nested data classes into dictionary.
def myfactory(data):
return dict(x for x in data if x[1] is not None)
If foo is your #dataclass, then simply provide your dictionary factory to use "myfactory()" method:
fooDict = asdict(foo, dict_factory=myfactory)
Convert fooDict to json
fooJson = json.dumps(fooDict)
This should work !!

Nested dataclass initialization

I have a JSON object that reads:
j = {"id": 1, "label": "x"}
I have two types:
class BaseModel:
def __init__(self, uuid):
self.uuid = uuid
class Entity(BaseModel):
def __init__(self, id, label):
super().__init__(id)
self.name = name
Note how id is stored as uuid in the BaseModel.
I can load Entity from the JSON object as:
entity = Entity(**j)
I want to re-write my model leveraging dataclass:
#dataclass
class BaseModel:
uuid = str
#dataclass
class Entity:
name = str
Since my JSON object does not have the uuid, entity = Entitye(**j) on the dataclass-based model will throw the following error:
TypeError: __init__() got an unexpected keyword argument 'id'
The "ugly" solutions I can think of:
Rename id to uuid in JSON before initialization:
j["uuid"] = j.pop("id")
Define both id and uuid:
#dataclass
class BaseModel:
uuid = str
#dataclass
class Entity:
id = str
name = str
# either use:
uuid = id
# or use this method
def __post_init__(self):
super().uuid = id
Is there any cleaner solution for this kind of object initialization in the dataclass realm?
might be ruining the idea of removing the original __init__ but how about writing a function to initialize the data class?
def init_entity(j):
j["uuid"] = j.pop("id")
return Entity(**j)
and in your code entity = initEntity(j)
I think the answer here might be to define a classmethod that acts as an alternative constructor to the dataclass.
from dataclasses import dataclass
from typing import TypeVar, Any
#dataclass
class BaseModel:
uuid: str
E = TypeVar('E', bound='Entity')
#dataclass
class Entity(BaseModel):
name: str
#classmethod
def from_json(cls: type[E], **kwargs: Any) -> E:
return cls(kwargs['id'], kwargs['label']
(For the from_json type annotation, you'll need to use typing.Type[E] instead of type[E] if you're on python <= 3.8.)
Note that you need to use colons for your type-annotations within the main body of a dataclass, rather than the = operator, as you were doing.
Example usage in the interactive REPL:
>>> my_json_dict = {'id': 1, 'label': 'x'}
>>> Entity.from_json(**my_json_dict)
Entity(uuid=1, name='x')
It's again questionable how much boilerplate code this saves, however. If you find yourself doing this much work to replicate the behaviour of a non-dataclass class, it's often better just to use a non-dataclass class. Dataclasses are not the perfect solution to every problem, nor do they try to be.
Simplest solution seems to be to use an efficient JSON serialization library that supports key remappings. There are actually tons of them that support this, but dataclass-wizard is one example of a (newer) library that supports this particular use case.
Here's an approach using an alias to dataclasses.field() which should be IDE friendly enough:
from dataclasses import dataclass
from dataclass_wizard import json_field, fromdict, asdict
#dataclass
class BaseModel:
uuid: int = json_field('id', all=True)
#dataclass
class Entity(BaseModel):
name: str = json_field('label', all=True)
j = {"id": 1, "label": "x"}
# De-serialize the dictionary object into an `Entity` instance.
e = fromdict(Entity, j)
repr(e)
# Entity(uuid=1, name='x')
# Assert we get the same object when serializing the instance back to a
# JSON-serializable dict.
assert asdict(e) == j

Json serialization of nested dataclasses

I would need to take the question about json serialization of #dataclass from Make the Python json encoder support Python's new dataclasses a bit further: consider when they are in a nested structure.
Consider:
import json
from attr import dataclass
from dataclasses_json import dataclass_json
#dataclass
#dataclass_json
class Prod:
id: int
name: str
price: float
prods = [Prod(1,'A',25.3),Prod(2,'B',79.95)]
pjson = json.dumps(prods)
That gives us:
TypeError: Object of type Prod is not JSON serializable
Note the above does incorporate one of the answers https://stackoverflow.com/a/59688140/1056563 . It claims to support the nested case via the dataclass_json decorator . Apparently that does not actually work.
I also tried another of the answers https://stackoverflow.com/a/51286749/1056563 :
class EnhancedJSONEncoder(json.JSONEncoder):
def default(s, o):
if dataclasses.is_dataclass(o):
return dataclasses.asdict(o)
return super().default(o)
And I created a helper method for it:
def jdump(s,foo):
return json.dumps(foo, cls=s.c.EnhancedJSONEncoder)
But using that method also did not effect the (error) result. Any further tips?
You can use a pydantic library. From the example in documentation
from pydantic import BaseModel
class BarModel(BaseModel):
whatever: int
class FooBarModel(BaseModel):
banana: float
foo: str
bar: BarModel
m = FooBarModel(banana=3.14, foo='hello', bar={'whatever': 123})
# returns a dictionary:
print(m.dict())
"""
{
'banana': 3.14,
'foo': 'hello',
'bar': {'whatever': 123},
}
"""
print(m.dict(include={'foo', 'bar'}))
#> {'foo': 'hello', 'bar': {'whatever': 123}}
print(m.dict(exclude={'foo', 'bar'}))
#> {'banana': 3.14}
This is actually not a direct answer but more of a reasonable workaround for cases where mutability is not needed (or desirable). The typing based NamedTuple looks and feels quite similar and is probably the inspiration behind the dataclass. If serialization were needed it is likely presently the best alternative.
from typing import NamedTuple
class Prod(NamedTuple):
id: str
name: str
price: str
I made that as a drop-in replacement for the dataclass based Prod class and it works.
import json
from dataclasses import dataclass, asdict
#dataclass
class Prod:
id: int
name: str
price: float
prods = [asdict(Prod(1, 'A', 25.3)), asdict(Prod(2, 'B', 79.95))]
pjson = json.dumps(prods)
print(pjson)
# [{"id": 1, "name": "A", "price": 25.3}, {"id": 2, "name": "B", "price": 79.95}]

Wrong type hints when the attribute name is the same as the class name

from typing import get_type_hints, Optional
from dataclasses import dataclass
#dataclass
class Wife:
name: str = ''
#dataclass
class Husband:
name: str = ''
wife: Optional[Wife] = None
#dataclass
class HusbandNew:
name: str = ''
Wife: Optional[Wife] = None
get_type_hints(Husband)
>>> {'name': str, 'wife': typing.Union[__main__.Wife, NoneType]}
get_type_hints(HusbandNew)
>>> {'name': str, 'Wife': NoneType}
I'm using Python 3.7.4. The only difference between HusbandNew and Husband is the wife/Wife attribute (Wife is also the class name). Why do they produce different type hints?
In python, we usually use PascalCase for class names and snake_case for attribute names. However, I often deal with external APIs which return JSON. I copy that JSON to https://app.quicktype.io/ to generate dataclasses with type annotations. It produces class names same as attribute names.

Make the Python json encoder support Python's new dataclasses

Starting with Python 3.7, there is something called a dataclass:
from dataclasses import dataclass
#dataclass
class Foo:
x: str
However, the following fails:
>>> import json
>>> foo = Foo(x="bar")
>>> json.dumps(foo)
TypeError: Object of type Foo is not JSON serializable
How can I make json.dumps() encode instances of Foo into json objects?
Much like you can add support to the JSON encoder for datetime objects or Decimals, you can also provide a custom encoder subclass to serialize dataclasses:
import dataclasses, json
class EnhancedJSONEncoder(json.JSONEncoder):
def default(self, o):
if dataclasses.is_dataclass(o):
return dataclasses.asdict(o)
return super().default(o)
json.dumps(foo, cls=EnhancedJSONEncoder)
Can't you just use the dataclasses.asdict() function to convert the dataclass
to a dict? Something like:
>>> #dataclass
... class Foo:
... a: int
... b: int
...
>>> x = Foo(1,2)
>>> json.dumps(dataclasses.asdict(x))
'{"a": 1, "b": 2}'
Ways of getting JSONified dataclass instance
There are couple of options to accomplish that goal, selection of each imply analyze on which approach suits best for your needs:
Standard library: dataclass.asdict
import dataclasses
import json
#dataclass.dataclass
class Foo:
x: str
foo = Foo(x='1')
json_foo = json.dumps(dataclasses.asdict(foo)) # '{"x": "1"}'
Picking it back to dataclass instance isn't trivial, so you may want to visit that answer https://stackoverflow.com/a/53498623/2067976
Marshmallow Dataclass
from dataclasses import field
from marshmallow_dataclass import dataclass
#dataclass
class Foo:
x: int = field(metadata={"required": True})
foo = Foo(x='1') # Foo(x='1')
json_foo = foo.Schema().dumps(foo) # '{"x": "1"}'
# Back to class instance.
Foo.Schema().loads(json_foo) # Foo(x=1)
As a bonus for marshmallow_dataclass you may use validation on the field itself, that validation will be used when someone deserialize the object from json using that schema.
Dataclasses Json
from dataclasses import dataclass
from dataclasses_json import dataclass_json
#dataclass_json
#dataclass
class Foo:
x: int
foo = Foo(x='1')
json_foo = foo.to_json() # Foo(x='1')
# Back to class instance
Foo.from_json(json_foo) # Foo(x='1')
Also, in addition to that notice that marshmallow dataclass did type conversion for you whereas dataclassses-json(ver.: 0.5.1) ignores that.
Write Custom Encoder
Follow accepted miracle2k answer and reuse custom json encoder.
If you are ok with using a library for that, you can use dataclasses-json. Here is an example:
from dataclasses import dataclass
from dataclasses_json import dataclass_json
#dataclass_json
#dataclass
class Foo:
x: str
foo = Foo(x="some-string")
foo_json = foo.to_json()
It also supports embedded dataclasses - if your dataclass has a field typed as another dataclass - if all dataclasses envolved have the #dataclass_json decorator.
You can also implement the asdict and json.dumps method within the class. In this case it wouldn't be necessary to import json.dumps into other parts of your project:
from typing import List
from dataclasses import dataclass, asdict, field
from json import dumps
#dataclass
class TestDataClass:
"""
Data Class for TestDataClass
"""
id: int
name: str
tested: bool = False
test_list: List[str] = field(default_factory=list)
#property
def __dict__(self):
"""
get a python dictionary
"""
return asdict(self)
#property
def json(self):
"""
get the json formated string
"""
return dumps(self.__dict__)
test_object_1 = TestDataClass(id=1, name="Hi")
print(test_object_1.__dict__)
print(test_object_1.json)
Output:
{'id': 1, 'name': 'Hi', 'tested': False, 'test_list': []}
{"id": 1, "name": "Hi", "tested": false, "test_list": []}
You can also create a parent class to inherit the methods:
from typing import List
from dataclasses import dataclass, asdict, field
from json import dumps
#dataclass
class SuperTestDataClass:
#property
def __dict__(self):
"""
get a python dictionary
"""
return asdict(self)
#property
def json(self):
"""
get the json formated string
"""
return dumps(self.__dict__)
#dataclass
class TestDataClass(SuperTestDataClass):
"""
Data Class for TestDataClass
"""
id: int
name: str
tested: bool = False
test_list: List[str] = field(default_factory=list)
test_object_1 = TestDataClass(id=1, name="Hi")
print(test_object_1.__dict__)
print(test_object_1.json)
I'd suggest creating a parent class for your dataclasses with a to_json() method:
import json
from dataclasses import dataclass, asdict
#dataclass
class Dataclass:
def to_json(self) -> str:
return json.dumps(asdict(self))
#dataclass
class YourDataclass(Dataclass):
a: int
b: int
x = YourDataclass(a=1, b=2)
x.to_json() # '{"a": 1, "b": 2}'
This is especially useful if you have other functionality to add to all your dataclasses.
The simplest way to encode dataclass and SimpleNamespace objects is to provide the default function to json.dumps() that gets called for objects that can't be otherwise serialized, and return the object __dict__:
json.dumps(foo, default=lambda o: o.__dict__)
dataclass-wizard is a modern option that can work for you. It supports complex types such as date and time, most generics from the typing module, and also a nested dataclass structure.
The "new style" annotations introduced in PEPs 585 and 604 can be ported back to Python 3.7 via a __future__ import as shown below.
from __future__ import annotations # This can be removed in Python 3.10
from dataclasses import dataclass, field
from dataclass_wizard import JSONWizard
#dataclass
class MyClass(JSONWizard):
my_str: str | None
is_active_tuple: tuple[bool, ...]
list_of_int: list[int] = field(default_factory=list)
string = """
{
"my_str": 20,
"ListOfInt": ["1", "2", 3],
"isActiveTuple": ["true", false, 1]
}
"""
instance = MyClass.from_json(string)
print(repr(instance))
# MyClass(my_str='20', is_active_tuple=(True, False, True), list_of_int=[1, 2, 3])
print(instance.to_json())
# '{"myStr": "20", "isActiveTuple": [true, false, true], "listOfInt": [1, 2, 3]}'
# True
assert instance == MyClass.from_json(instance.to_json())
You can install the Dataclass Wizard with pip:
$ pip install dataclass-wizard
A bit of background info:
For serialization, it uses a slightly modified (a bit more efficient) implementation of dataclasses.asdict. When de-serializing JSON to a dataclass instance, the first time it iterates over the dataclass fields and generates a parser for each annotated type, which makes it more efficient when the de-serialization process is run multiple times.
Disclaimer: I am the creator (and maintainer) of this library.
pydantic
With pydantic models you get a dataclasses-like experience and full support for dict and Json conversions (and much more).
Python 3.9 and above:
from typing import Optional
from pydantic import BaseModel, parse_obj_as, parse_raw_as
class Foo(BaseModel):
count: int
size: Optional[float] = None
f1 = Foo(count=10)
print(f1.dict()) # Parse to dict
# > {'count': 10, 'size': None}
f2 = Foo.parse_obj({"count": 20}) # Load from dict
print(f2.json()) # Parse to json
# > {"count": 20, "size": null}
More options:
f3 = Foo.parse_raw('{"count": 30}') # Load from json string
f4 = Foo.parse_file("path/to/data.json") # Load from json file
f_list1 = parse_obj_as(list[Foo], [{"count": 110}, {"count": 120}]) # Load from list of dicts
print(f_list1)
# > [Foo(count=110, size=None), Foo(count=120, size=None)]
f_list2 = parse_raw_as(list[Foo], '[{"count": 130}, {"count": 140}]') # Load from list in json string
print(f_list2)
# > [Foo(count=130, size=None), Foo(count=140, size=None)]
Complex hierarchical data structures
class Bar(BaseModel):
apple = "x"
banana = "y"
class Spam(BaseModel):
foo: Foo
bars: list[Bar]
m = Spam(foo={"count": 4}, bars=[{"apple": "x1"}, {"apple": "x2"}])
print(m)
# > foo=Foo(count=4, size=None) bars=[Bar(apple='x1', banana='y'), Bar(apple='x2', banana='y')]
print(m.dict())
"""
{
'foo': {'count': 4, 'size': None},
'bars': [
{'apple': 'x1', 'banana': 'y'},
{'apple': 'x2', 'banana': 'y'},
],
}
"""
Pydantic supports many standard types (like datetime) and special commonly used types (like EmailStr and HttpUrl):
from datetime import datetime
from pydantic import HttpUrl
class User(BaseModel):
name = "John Doe"
signup_ts: datetime = None
url: HttpUrl = None
u1 = User(signup_ts="2017-07-14 00:00:00")
print(u1)
# > signup_ts=datetime.datetime(2017, 7, 14, 0, 0) url=None name='John Doe'
u2 = User(url="http://example.com")
print(u2)
# > signup_ts=None url=HttpUrl('http://example.com', ) name='John Doe'
u3 = User(url="ht://example.com")
"""
ValidationError: 1 validation error for User
url
URL scheme not permitted (type=value_error.url.scheme; allowed_schemes={'http', 'https'})
"""
If you really need to use json.dumps, write a Custom Encoder:
import json
class EnhancedJSONEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, BaseModel):
return o.dict()
return super().default(o)
json.dumps([{"foo": f2}], cls=EnhancedJSONEncoder)
# > '[{"foo": {"count": 20, "size": null}}]'
A dataclass providing json formating method
import json
from dataclasses import dataclass
#dataclass
class Foo:
x: str
def to_json(self):
return json.dumps(self.__dict__)
Foo("bar").to_json()
>>> '{"x":"bar"}'
A much simpler answer can be found on Reddit using dictionary unpacking
>>> from dataclasses import dataclass
>>> #dataclass
... class MyData:
... prop1: int
... prop2: str
... prop3: int
...
>>> d = {'prop1': 5, 'prop2': 'hi', 'prop3': 100}
>>> my_data = MyData(**d)
>>> my_data
MyData(prop1=5, prop2='hi', prop3=100)
Okay so here is what I did when I was in similar situation.
Create a custom dictionary factory that converts nested data classes into dictionary.
def myfactory(data):
return dict(x for x in data if x[1] is not None)
If foo is your #dataclass, then simply provide your dictionary factory to use "myfactory()" method:
fooDict = asdict(foo, dict_factory=myfactory)
Convert fooDict to json
fooJson = json.dumps(fooDict)
This should work !!

Categories

Resources