Python - generating parent/child dict structure

Python - generating parent/child dict structure - python

I have method:
#staticmethod
def get_blocks():
"""Public method that can be extended to add new blocks.
First item is the most parent. Last item is the most child.
Returns:
blocks (list)
"""
return ['header', 'body', 'footer']
As docstring describes, this method can be extended , to return any kind of blocks in particular order.
So I want to make a mapping that would indicate which block is parent/child to each other (only caring about "nearest" parent/child).
def _get_blocks_mapping(blocks):
mp = {'parent': {}, 'child': {}}
if not blocks:
return mp
mp['parent'][blocks[0]] = None
mp['child'][blocks[-1]] = None
blocks_len = len(blocks)
if blocks_len > 1:
mp['parent'][blocks[-1]] = blocks[-2]
for i in range(1, len(blocks)-1):
mp['parent'][blocks[i]] = blocks[i-1]
mp['child'][blocks[i]] = blocks[i+1]
return mp
So result if we have three blocks like in get_blocks method is this:
{
'parent': {
'header': None,
'body': 'header',
'footer': 'body',
},
'child': {
'header': 'body',
'body': 'footer',
'footer': None
}
}
Well it works, but it is kind of hacky to me. So maybe someone could suggest a better way to create such mapping? (or maybe there is some used way of creating parent/child mapping? Using different structure than I intend to use?)

You want to loop over the list in pairs, giving you the natural parent-child relationships:
mp = {'parent': {}, 'child': {}}
if blocks:
mp['parent'][blocks[0]] = mp['child'][blocks[-1]] = None
for parent, child in zip(blocks, blocks[1:]):
mp['parent'][child] = parent
mp['child'][parent] = child
zip() here pairs up each block with the next one in the list.
Demo:
>>> blocks = ['header', 'body', 'footer']
>>> mp = {'parent': {}, 'child': {}}
>>> if blocks:
... mp['parent'][blocks[0]] = mp['child'][blocks[-1]] = None
... for parent, child in zip(blocks, blocks[1:]):
... mp['parent'][child] = parent
... mp['child'][parent] = child
...
>>> from pprint import pprint
>>> pprint(mp)
{'child': {'body': 'footer', 'footer': None, 'header': 'body'},
'parent': {'body': 'header', 'footer': 'body', 'header': None}}

Related

With format='multipart' in test client, data of nested dict been ignored or removed

I have a nested serializer containing, containing an Image Field in the nested serializer, the serializers are:-
class FloorPlanLocationSerializer(serializers.ModelSerializer):
class Meta:
model = FloorPlan
fields = (
'floor',
'image',
)
extra_kwargs = {'floor': {'required': False}, 'image': {'required': False}}
class LocationSerializer(FilterSerializerByOrgManaged, serializers.ModelSerializer):
floorplan = FloorPlanLocationSerializer(required=False, allow_null=True)
class Meta:
model = Location
fields = (
'id',
'organization',
'name',
'type',
'is_mobile',
'address',
'geometry',
'created',
'modified',
'floorplan',
)
read_only_fields = ('created', 'modified')
def to_representation(self, instance):
request = self.context['request']
data = super().to_representation(instance)
floorplans = instance.floorplan_set.all().order_by('-modified')
floorplan_list = []
for floorplan in floorplans:
dict_ = {
'floor': floorplan.floor,
'image': request.build_absolute_uri(floorplan.image.url),
}
floorplan_list.append(dict_)
data['floorplan'] = floorplan_list
return data
def create(self, validated_data):
floorplan_data = None
if validated_data.get('floorplan'):
floorplan_data = validated_data.pop('floorplan')
instance = self.instance or self.Meta.model(**validated_data)
with transaction.atomic():
instance.full_clean()
instance.save()
if floorplan_data:
floorplan_data['location'] = instance
floorplan_data['organization'] = instance.organization
with transaction.atomic():
fl = FloorPlan.objects.create(**floorplan_data)
fl.full_clean()
fl.save()
return instance
With this above serialzier, it works fine with DRF Browsable page, but when I try to send the data with the test client in multipart format, the nested data gets removed while send the POST request, this is how I wrote the tests:-
def test_create_location_with_floorplan_api(self):
path = reverse('geo_api:list_location')
coords = json.loads(Point(2, 23).geojson)
image = Image.new("RGB", (100, 100))
with tempfile.NamedTemporaryFile(suffix=".png", mode="w+b") as tmp_file:
image.save(tmp_file, format="png")
tmp_file.seek(0)
byio = BytesIO(tmp_file.read())
inm_file = InMemoryUploadedFile(
file=byio,
field_name="avatar",
name="testImage.png",
content_type="image/png",
size=byio.getbuffer().nbytes,
charset=None,
)
data = {
'organization': self._get_org().pk,
'name': 'test-location',
'type': 'indoor',
'is_mobile': False,
'address': 'Via del Corso, Roma, Italia',
'geometry': {'Type': 'Point', 'coordinates': [12.32,43.222]},
'floorplan': {
'floor': 12,
'image': inm_file
},
}
with self.assertNumQueries(6):
response = self.client.post(path, data, format='multipart')
self.assertEqual(response.status_code, 201)
The data doesn't come in the same format as I sent, i.e., when I try to see the data in the to_internal method this is how I receive it:-
<QueryDict: {'organization': ['f6c406e5-0602-44a7-9160-ec109ac29f4c'], 'name': ['test-location'], 'type': ['indoor'], 'is_mobile': ['False'], 'address': ['Via del Corso, Roma, Italia'], 'geometry': ['type', 'coordinates'], 'floorplan': ['floor', 'image']}>
the values of type, coordinates, floorplan are not present inside it.
How can I write a proper tests for the above case???

If you want to post form data, you need to flatten everything the same way a browser would. Maybe this gist will help, flatten_dict_for_form_data. Its quite old and could use some cleanup, but it still works.
This recursively flattens a dict, which you can then send to test client (or to live services):
def flatten_dict_for_formdata(input_dict, sep="[{i}]"):
def __flatten(value, prefix, result_dict, previous=None):
if isinstance(value, dict):
if previous == "dict":
prefix += "."
for key, v in value.items():
__flatten(v, prefix + key, result_dict, "dict")
elif isinstance(value, (list, tuple)):
for i, v in enumerate(value):
__flatten(v, prefix + sep.format(i=i), result_dict)
else:
result_dict[prefix] = value
return result_dict
return __flatten(input_dict, '', {})
>>> flatten_dict_for_formdata({
>>> "name": "Test",
>>> "location": {"lat": 1, "lng": 2},
>>> "sizes": ["S", "M", "XL"]
>>> })
>>> {
>>> "name": "Test",
>>> "location.lat": 1,
>>> "location.lng": 2,
>>> "sizes[0]": "S",
>>> "sizes[1]": "M",
>>> "sizes[2]": "XL"
>>> }

Recursively create same class during instantiation

I have a json config that I want to create a dict from. Because json configs are recursive, any time I see a json value that is an array I want to recursively iterate on it. However this is not doing what I want it to do.
class FieldHandler():
formfields = {}
def __init__(self, fields):
for field in fields:
options = self.get_options(field)
f = getattr(self, "create_field_for_" +
field['type'])(field, options)
self.formfields[field['name']] = f
def get_options(self, field):
options = {}
options['label'] = field['name']
options['help_text'] = field.get("help_text", None)
options['required'] = bool(field.get("required", 0))
return options
def create_field_for_string(self, field, options):
options['max_length'] = int(field.get("max_length", "20"))
return django.forms.CharField(**options)
def create_field_for_int(self, field, options):
options['max_value'] = int(field.get("max_value", "999999999"))
options['min_value'] = int(field.get("min_value", "-999999999"))
return django.forms.IntegerField(**options)
def create_field_for_array(self, field, options):
fh = FieldHandler(field['elements'])
return fh
and instantiating:
fh = FieldHandler([
{'type': 'string', 'name': 'position'},
{'type': 'array', 'name': 'calendar', 'elements': [
{'type': 'string', 'name': 'country'},
{'type': 'string', 'name': 'url'},
]},
{'type': 'int', 'name': 'maxSize'}
])
I expect to get a dict like so:
{
'position': <django.forms.fields.CharField object at 0x10b57af50>,
'calendar': <__main__.FieldHandler instance at 0x10b57c680>,
'maxSize': <django.forms.fields.IntegerField object at 0x10b58e050>,
}
Where calendar itself is expected to be:
{
'url': <django.forms.fields.CharField object at 0x10b58e150>,
'country': <django.forms.fields.CharField object at 0x10b58e0d0>
}
Instead I get:
{
'url': <django.forms.fields.CharField object at 0x10b58e150>,
'position': <django.forms.fields.CharField object at 0x10b57af50>,
'calendar': <__main__.FieldHandler instance at 0x10b57c680>,
'maxSize': <django.forms.fields.IntegerField object at 0x10b58e050>,
'country': <django.forms.fields.CharField object at 0x10b58e0d0>
}
What am I doing wrong? Why are the position and country parameters being set on my global FieldHandler?

formfields is a class attribute that is shared among all instances. Make it an instance attribute instead:
class FieldHandler():
def __init__(self, fields):
self.formfields = {}
# ...
Now, all FieldHandler instances have their own formfields, with only the "inner" calendar handler having the country and url (not position assuming that was a typo) fields.

python-marshmallow: deserializing nested schema with only one exposed key

I am trying to serialize a list of nested objects as scalar values by taking only one field from the nested item. Instead of [{key: value}, ...] I want to receive [value1, value2, ...].
Code:
from marshmallow import *
class MySchema(Schema):
key = fields.String(required=True)
class ParentSchema(Schema):
items = fields.Nested(MySchema, only='key', many=True)
Given the above schemas, I want to serialize some data:
>>> data = {'items': [{'key': 1}, {'key': 2}, {'key': 3}]}
>>> result, errors = ParentSchema().dump(data)
>>> result
{'items': ['1', '2', '3']}
This works as expected, giving me the list of scalar values. However, when trying to deserialize the data using the models above, the data is suddenly invalid:
>>> data, errors = ParentSchema().load(result)
>>> data
{'items': [{}, {}, {}]}
>>> errors
{'items': {0: {}, '_schema': ['Invalid input type.', 'Invalid input type.', 'Invalid input type.'], 1: {}, 2: {}}}
Is there any configuration option I am missing or is this simply not possible?

For anyone stumbling across the same issue, this is the workaround I am using currently:
class MySchema(Schema):
key = fields.String(required=True)
def load(self, data, *args):
data = [
{'key': item} if isinstance(item, str) else item
for item in data
]
return super().load(data, *args)
class ParentSchema(Schema):
items = fields.Nested(MySchema, only='key', many=True)

Targets don't match node IDs in networkx json file

I have a network I want to output to a json file. However, when I output it, node targets become converted to numbers and do not match the node ids which are strings.
For example:
G = nx.DiGraph(data)
G.edges()
results in:
[(22, 'str1'),
(22, 'str2'),
(22, 'str3')]
in python. This is correct.
But in the output, when I write out the data like so...
json.dump(json_graph.node_link_data(G), f,
indent = 4, sort_keys = True, separators=(',',':'))
while the ids for the three target nodes 'str1', 'str2', and 'str3'...
{
"id":"str1"
},
{
"id":"str2"
},
{
"id":"str3"
}
The targets of node 22 have been turned into numbers
{
"source":22,
"target":972
},
{
"source":22,
"target":1261
},
{
"source":22,
"target":1259
}
This happens for all nodes that have string ids
Why is this, and how can I prevent it?
The desired result is that either "target" fields should keep the string ids, or that the string ids become numeric in a way that they match the targets.

Why is this
It's a feature. Not all graph libraries accept strings as identifiers, but all that I know of accept integers.
how can I prevent it?
Replace the ids by node names using the nodes map:
>>> import networkx as nx
>>> import pprint
>>> g = nx.DiGraph()
>>> g.add_edge(1, 'foo')
>>> g.add_edge(2, 'bar')
>>> g.add_edge('foo', 'bar')
>>> res = nx.node_link_data(g)
>>> pprint.pprint(res)
{'directed': True,
'graph': {},
'links': [{'source': 0, 'target': 3},
{'source': 1, 'target': 2},
{'source': 3, 'target': 2}],
'multigraph': False,
'nodes': [{'name': 1}, {'name': 2}, {'name': 'bar'}, {'name': 'foo'}]}
>>> res['links'] = [
{
'source': res['nodes'][link['source']]['name'],
'target': res['nodes'][link['target']]['name']
}
for link in res['links']]
>>> pprint.pprint(res)
{'directed': True,
'graph': {},
'links': [{'source': 1, 'target': 'foo'},
{'source': 2, 'target': 'bar'},
{'source': 'foo', 'target': 'bar'}],
'multigraph': False,
'nodes': [{'name': 1}, {'name': 2}, {'name': 'bar'}, {'name': 'foo'}]}

To make the output conform to the d3 template that is linked in the node_link_data documentation, you can make a couple simple changes to the node_link_data function. Just run the below function and use it instead. All I changed was to trim some of the unnecessary outputs for the template, and to store the graph label instead of an index. The index the original function used for target and destination was created in the function, so it isn't something you can extract from the graph itself, so if you want to be certain that your node labels correspond to your links, it's safest to modify node_link_data.
The D3 Template this creates data for is here
Note that if you use the below data without adding a node or link attribute, you will need to delete the following lines from the d3 template:
.attr("stroke-width", function(d) { return Math.sqrt(d.value); })
and
.attr("fill", function(d) { return color(d.group); })
Modified function:
from itertools import chain, count
import json
import networkx as nx
from networkx.utils import make_str
__author__ = """Aric Hagberg <hagberg#lanl.gov>"""
_attrs = dict(id='id', source='source', target='target', key='key')
def node_link_data(G, attrs=_attrs):
"""Return data in node-link format that is suitable for JSON serialization
and use in Javascript documents.
"""
multigraph = G.is_multigraph()
id_ = attrs['id']
source = attrs['source']
target = attrs['target']
# Allow 'key' to be omitted from attrs if the graph is not a multigraph.
key = None if not multigraph else attrs['key']
if len(set([source, target, key])) < 3:
raise nx.NetworkXError('Attribute names are not unique.')
mapping = dict(zip(G, count()))
data = {}
data['nodes'] = [dict(chain(G.node[n].items(), [(id_, n)])) for n in G]
if multigraph:
data['links'] = [
dict(chain(d.items(),
[(source, u), (target,v), (key, k)]))
for u, v, k, d in G.edges_iter(keys=True, data=True)]
else:
data['links'] = [
dict(chain(d.items(),
[(source, u), (target, v)]))
for u, v, d in G.edges_iter(data=True)]
return data

Flatten a nested dict structure into a dataset

For some post-processing, I need to flatten a structure like this
{'foo': {
'cat': {'name': 'Hodor', 'age': 7},
'dog': {'name': 'Mordor', 'age': 5}},
'bar': { 'rat': {'name': 'Izidor', 'age': 3}}
}
into this dataset:
[{'foobar': 'foo', 'animal': 'dog', 'name': 'Mordor', 'age': 5},
{'foobar': 'foo', 'animal': 'cat', 'name': 'Hodor', 'age': 7},
{'foobar': 'bar', 'animal': 'rat', 'name': 'Izidor', 'age': 3}]
So I wrote this function:
def flatten(data, primary_keys):
out = []
keys = copy.copy(primary_keys)
keys.reverse()
def visit(node, primary_values, prim):
if len(prim):
p = prim.pop()
for key, child in node.iteritems():
primary_values[p] = key
visit(child, primary_values, copy.copy(prim))
else:
new = copy.copy(node)
new.update(primary_values)
out.append(new)
visit(data, { }, keys)
return out
out = flatten(a, ['foo', 'bar'])
I was not really satisfied because I have to use copy.copy to protect my inputs. Obviously, when using flatten one does not want the inputs be altered.
Then I thought about one alternative that uses more global variables (at least global to flatten) and uses an index instead of directly passing primary_keys to visit. However, this does not really help me to get rid of the ugly initial copy:
keys = copy.copy(primary_keys)
keys.reverse()
So here is my final version:
def flatten(data, keys):
data = copy.copy(data)
keys = copy.copy(keys)
keys.reverse()
out = []
values = {}
def visit(node, id):
if id:
id -= 1
for key, child in node.iteritems():
values[keys[id]] = key
visit(child, id)
else:
node.update(values)
out.append(node)
visit(data, len(keys))
return out
Is there a better implementation (that can avoid the use of copy.copy)?

Edit: modified to account for variable dictionary depth.
By using the merge function from my previous answer (below), you can avoid calling update which modifies the caller. There is then no need to copy the dictionary first.
def flatten(data, keys):
out = []
values = {}
def visit(node, id):
if id:
id -= 1
for key, child in node.items():
values[keys[id]] = key
visit(child, id)
else:
out.append(merge(node, values)) # use merge instead of update
visit(data, len(keys))
return out
One thing I don't understand is why you need to protect the keys input. I don't see them being modified anywhere.
Previous answer
How about list comprehension?
def merge(d1, d2):
return dict(list(d1.items()) + list(d2.items()))
[[merge({'foobar': key, 'animal': sub_key}, sub_sub_dict)
for sub_key, sub_sub_dict in sub_dict.items()]
for key, sub_dict in a.items()]
The tricky part was merging the dictionaries without using update (which returns None).

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python - generating parent/child dict structure - python

Related

With format='multipart' in test client, data of nested dict been ignored or removed

Recursively create same class during instantiation

python-marshmallow: deserializing nested schema with only one exposed key

Targets don't match node IDs in networkx json file

Flatten a nested dict structure into a dataset

Categories

Resources