Sorting nested dictionaries using its second key - python

I am trying to sort a nested dictionary using its second key where my dictionary looks like:
my_dictionary = {
"char": {
"3": {
"genman": [
"motion"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
}
}
And my expected output will be:
my_dictionary = {
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"char": {
"3": {
"genman": [
"motion"
]
}
}
}
Tried using the following code:
new_dict = {}
for k, v in my_dictionary.items():
for s in sorted(my_dictionary.itervalues()):
if not s.keys()[0]:
new_val = my_dictionary[k].get(s.keys()[0])
my_dictionary[s.keys()[0]] = new_val
my_dictionary.update(new_dict)
It fails badly, and I am getting the same result as my initial dictionary.

This works:
sorted(my_dictionary.items(), key=lambda x: list(x[1].keys())[0])
Returns:
[('EMPT', {'0': {}}),
('veh', {'1': {'tankers': ['varA', 'varB']}}),
('fast', {'2': {'empty': []}}),
('char', {'3': {'genman': ['motion']}})]
Sorted receives a list of key-value pairs, we sort using the result of lambda x: list(x[1].keys())[0] which takes a list of the keys in the inner dict, then grabs the first key (need to do this because dict_keys directly is not indexable).
Edit: the result is a list of key, value pairs but it can be fed into an OrderedDict to use it as a dict.

actually there is no order for a dict, however you can use OrderedDIct instead.
from collections import OrderedDict
my_dictionary = {
"char": {
"3": {
"genman": [
"motion"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
}
}
s = sorted((list(v.keys())[0], k) for k, v in my_dictionary.items())
new_dic = OrderedDict([(k,my_dictionary[k]) for _, k in s])

Related

Replace all the keys in nested dictionary and merge duplicate keys in python

I have a nested dictionary that represents parent-child relationships. For example:
{
"45273425f5abc05b->s":
{
"12864f455e7c86bb->s": {
"12864f455e7c86bbexternal_call->c": {}
}
},
"c69aead72fcd6ec1->d":
{
"8ade76728bdddf27->d": {
"8ade76728bdddf27external_call->i": {}
},
"b29f07de47c5841f->d": {
"107bec1baede1bff->l": {
"e14ebabea4785c3f->l": {
"e14ebabea4785c3fexternal_call->r": {}
},
"e36b35daa794bd50->l": {
"e36b35daa794bd50external_call->a": {}
}
},
"b29f07de47c5841fexternal_call->l": {}
},
"1906ef2c2897ac01->d": {
"1906ef2c2897ac01external_call->e": {}
}
}
}
I want to do two things with this dictionary. Firstly I want to remove everything before and including "->" i.e I want to update the keys. Secondly, after renaming there will be duplicate values in the nested dictionary. for example the second element in the dictionary. If there are two keys with the same name I want to merge them into one. So, the result will look like the following:
{
"s":
{
"s": {
"c"
}
},
"d":
{
"d": {
"i",
"l": {
"l": {
"r",
"a"
}
},
"e"
}
}
}
How can I achieve this? I have written this code so far.
def alter_dict(nested_dict):
new_dict = {}
for k, v in nested_dict.items():
if isinstance(v, dict):
v = alter_dict(v)
new_key = k.split("->")[1]
new_dict[new_key] = v
return new_dict
It works for a simple one like the first element but doesn't work for the second one. It loses some information. The purpose of this is to create a graph with the dictionary.
You can use recursion:
import json
from collections import defaultdict
def merge(d):
r = defaultdict(list)
for i in d:
for a, b in i.items():
r[a.split('->')[-1]].append(b)
return {a:merge(b) for a, b in r.items()}
data = {'45273425f5abc05b->s': {'12864f455e7c86bb->s': {'12864f455e7c86bbexternal_call->c': {}}}, 'c69aead72fcd6ec1->d': {'8ade76728bdddf27->d': {'8ade76728bdddf27external_call->i': {}}, 'b29f07de47c5841f->d': {'107bec1baede1bff->l': {'e14ebabea4785c3f->l': {'e14ebabea4785c3fexternal_call->r': {}}, 'e36b35daa794bd50->l': {'e36b35daa794bd50external_call->a': {}}}, 'b29f07de47c5841fexternal_call->l': {}}, '1906ef2c2897ac01->d': {'1906ef2c2897ac01external_call->e': {}}}}
print(json.dumps(merge([data]), indent=4))
Output:
{
"s": {
"s": {
"c": {}
}
},
"d": {
"d": {
"i": {},
"l": {
"l": {
"r": {},
"a": {}
}
},
"e": {}
}
}
}

how to format json dumps

I got json dumps like this:
"aaa": {
"bbb": {
"ccc": {
"ddd": "string1",
"eee": "string2"
}
},
"kkk": "string3"
}
And I'd like to format it this way: enclose every key-value pair (separated by :) with {} and then replace : with ,.
I know that I can use re.sub() to replace string patterns, but regular expression does not work with overlapping patterns, so I can match, for example, "ddd": "string1" but not "ccc": {...} at the same time.
For the above json string, I'd like to get:
{"aaa", {
{"bbb", {
{"ccc", {
{"ddd", "string1"},
{"eee", "string2"}
}}
}},
{"kkk", "string3"}
}}
Here's a hack which converts everything to lists and then changes square brackets to curly ones. If your strings might contain square brackets that'll be a problem.
import json
inp = """
{
"aaa": {
"bbb": {
"ccc": {
"ddd": "string1",
"eee": "string2"
}
},
"kkk": "string3"
}
}
"""
inp = json.loads(inp)
def items(d):
if isinstance(d, dict):
return [(k, items(v)) for k, v in d.items()]
return d
inp = items(inp)
print(json.dumps(inp, indent=2).replace("[", "{").replace("]", "}"))
Output:
{
{
"aaa",
{
{
"bbb",
{
{
"ccc",
{
{
"ddd",
"string1"
},
{
"eee",
"string2"
}
}
}
}
},
{
"kkk",
"string3"
}
}
}
}
Note that you are treating dictionary keys as ordered when they aren't, so I made it more explicit with lists.
If it were me, I wouldn't dump to JSON in the first place, I'd serialize the native python data structure straight to C++ initializer list syntax:
myobj = {
"aaa": [
{ "bbb": {
"ccc": [
{"ddd": "string1"},
{"eee": "string2"}
]
}},
{ "kkk": "string3" }
]
}
def pyToCpp(value, key=None):
if key:
return '{{ "{}", {} }}'.format(key, pyToCpp(value))
if type(value) == dict:
for k, v in value.items():
return pyToCpp(v, k)
elif type(value) == list:
l = [pyToCpp(v) for v in value]
return '{{ {} }}'.format(", ".join(l))
else:
return '"{}"'.format(value)
y = pyToCpp(myobj)
print(y)
Output:
{ "aaa", { { "bbb", { "ccc", { { "ddd", "string1" }, { "eee", "string2" } } } }, { "kkk", "string3" } } }
Run it here: https://repl.it/repls/OddFrontUsers

Want to get acces inner element of json with loop

I want to access with loop the inner element alias values of both dims and metrics present in json and appended in separate dimsList and metricsList python lists.
json_obj =
{
"dataset":"246",
"dims":{
"Location":{
"alias":"Location",
"format":""
}
},
"metrics":{
"ToTal_Dwell":[
{
"agg":"sum",
"format":"",
"alias":"ToTal_Dwell"
}
]
},
"filters":"",
"limit":"10"
}
expecting result to be like dimsList = ['Location'] and metricsList = ['ToTal_Dwell']
you can recursively iterate using .items(). every time you see an inner dict you make a recursive call, and an inner list causes a call per inner dict in the list.
try this:
json_obj = {
"dataset": "246",
"dims": {
"Location": {
"alias": "Location",
"format": ""
}
},
"metrics": {
"ToTal_Dwell": [
{
"agg": "sum",
"format": "",
"alias": "ToTal_Dwell"
}
]
},
"filters": "",
"limit": "10"
}
def extract_inner_values(d, key):
results = []
for k, v in d.items():
if k == key:
results.append(v)
if isinstance(v, dict):
results.extend(extract_inner_values(v, key))
if isinstance(v, list):
for inner_d in v:
results.extend(extract_inner_values(inner_d, key))
return results
dimsList = extract_inner_values(json_obj["dims"], "alias")
metricsList = extract_inner_values(json_obj["metrics"], "alias")
print(dimsList)
print(metricsList)
Output:
['Location']
['ToTal_Dwell']

Converting all lists in dict to dict by considering list index as key in python

I am trying to convert all Lists inside dict in dict by considering list index as its key.
Sample Input :
{
"checksum": "c540fcd985bf88c87e48c2bfa1df5498",
"data": {
"sampleMetrics": {
"name": "DNA Library QC Metrics",
"passQualityControl": true,
"metrics": [{
"name": "CONTAMINATION_SCORE",
"value": 1302,
"LSL": 0,
"USL": 3106,
"UOM": "NA"
}]
}
}
}
Expected output :
{
"checksum": "c540fcd985bf88c87e48c2bfa1df5498",
"data": {
"sampleMetrics": {
"name": "DNA Library QC Metrics",
"passQualityControl": true,
"metrics": {
"0": {
"name": "CONTAMINATION_SCORE"
},
"1": {
"value": 1302
},
"2": {
"LSL": 0
},
"3": {
"USL": 3106
},
"4": {
"UOM": "NA"
}
}
}
}
}
Trial :
def list_to_dict_by_index(lst):
print {str(k): str(v) for k, v in enumerate(lst)}
list_to_dict_by_index([ {"d1" : 1}, {"d2" : 2} ])
But this is working for simple list. How can I do the same for all lists in dict?
(No matter wherever list is there in dict.)
List may contain another list:
ex:
sample input2:
"metrics": [{
"name": ["CONTAMINATION_SCORE", "TOTAL_SCORE"],
"value": 1302,
"LSL": 0,
"USL": 3106,
"UOM": "NA"
}]
sample output2:
"metrics" : {
"0": {
"name": {
"0": "CONTAMINATION_SCORE",
"1": "TOTAL_SCORE"
}
},
"1": {
"value": 1302
},
"2": {
"LSL": 0
},
"3": {
"USL": 3106
},
"4": {
"UOM": "NA"
}
}
dic = {
"checksum": "c540fcd985bf88c87e48c2bfa1df5498",
"data": {
"sampleMetrics": {
"name": "DNA Library QC Metrics",
"passQualityControl": True,
"metrics": [{
"name": "CONTAMINATION_SCORE",
"value": 1302,
"LSL": 0,
"USL": 3106,
"UOM": "NA"
}]
}
}
}
dic2 = dic['data']['sampleMetrics']['metrics']
dic3 ={}
for i in dic2:
for index, j in enumerate(i,0):
dic3[index]={j:i[j]}
dic['data']['sampleMetrics']['metrics'] = dic3
print(dic)
"""
output
{
'checksum': 'c540fcd985bf88c87e48c2bfa1df5498',
'data': {
'sampleMetrics': {
'name': 'DNA Library QC Metrics',
'passQualityControl': True,
'metrics': {
0: {
'name': 'CONTAMINATION_SCORE'
},
1: {
'value': 1302
},
2: {
'LSL': 0
},
3: {
'USL': 3106
},
4: {
'UOM': 'NA'
}
}
}
}
}
"""
Your second sample input/ouput contains components that are consistent with the question title, namely, the transformation of lists into dictionaries with list indices as keys:
# input
"name": ["CONTAMINATION_SCORE", "TOTAL_SCORE"]
# output
"name": {
"0": "CONTAMINATION_SCORE",
"1": "TOTAL_SCORE"
}
However both sample input/output contain lists of dictionaries, which are expected to be transformed in a different manner, i.e. into a dictionary of dictionaries with keys as the enumerable indices of the dictionary's entries.
# input
"metrics": [{
...
"USL": 3106,
"UOM": "NA"
}]
# output
"metrics" : {
...
"3": {
"USL": 3106
},
"4": {
"UOM": "NA"
}
}
This is a lot of words that attempt to articulate essentially the following two cases:
{[{'foo': 'bar'}]} => {'0': {'foo': 'bar'}}
{'foo': ['bar']} => {'foo': {'0': 'bar'}}
This may be a source of failure for you. Additionally, your attempt at solution only iterates over the top-most level of the dictionary. You must recursively traverse the dictionary if you want to affect entries at arbitrary levels, i.e. you want something of the form:
from collections import abv
def update(d):
for k, v in d.copy().items():
if isinstance(v, abc.Mapping):
d[k] = update(v)
else:
d[k] = iv
return d
Use iteritems instead of items if you're using python 2 rather than python 3. Also, copy is necessary so that the iterator is not invalidated when the dictionary is mutated.
You can work in an enumerative loop like you initially used to get a working solution. Careful to add recursive calls to affect all levels of the dictionary. Collectively this might look something like the following:
from collections import abc
def list_of_dict_to_dict(d):
dd = {}
for i, (key, val) in enumerate(d.copy().items()):
dd[i] = {}
if isinstance(val, abc.Mapping):
dd[i][key] = transform_dict(val)
elif isinstance(val, list):
dd[i][key] = list_to_dict(val)
else:
dd[i][key] = val
return dd
def list_to_dict(l):
d = {}
for i, val in enumerate(l):
if isinstance(val, abc.Mapping):
d[i] = transform_dict(val)
else:
d[i] = val
return d
def transform_dict(d):
for k, v in d.copy().items():
if isinstance(v, list):
if isinstance(v[0], abc.Mapping) and len(v) == 1:
d[k] = list_of_dict_to_dict(v[0])
else:
d[k] = list_to_dict(v)
elif isinstance(v, abc.Mapping):
d[k] = transform_dict(v)
else:
d[k] = v
return d
This assumes the list of dictionaries case always contains a single dictionary. It isn't clear what you expect in other cases.
What you are asking is clear, but your first example does not match the rule "Converting all lists in dict to dict by considering list index as key". The metrics key is mapped to a list with one element, and that element is a dictionary: [{...}]. Hence, your expected output is:
...
"metrics": {
"0": {
"name": "CONTAMINATION_SCORE",
"value": 1302,
"LSL": 0,
"USL": 3106,
"UOM": "NA"
}
}
...
If this is what you want, you just have to use a DFS:
def list_to_dict_by_key(json_value):
if isinstance(json_value, list):
return {str(i):list_to_dict_by_key(v) for i,v in enumerate(json_value)}
elif isinstance(json_value, dict):
return {k:list_to_dict_by_key(v) for k,v in json_value.items()}
else:
return json_value
The lists are replaced by dictionaries. The values of the dictionaries are processed.
>>> list_to_dict_by_key(sample1)
{'checksum': 'c540fcd985bf88c87e48c2bfa1df5498', 'data': {'sampleMetrics': {'name': 'DNA Library QC Metrics', 'passQualityControl': True, 'metrics': {'0': {'name': 'CONTAMINATION_SCORE', 'value': 1302, 'LSL': 0, 'USL': 3106, 'UOM': 'NA'}}}}}
>>> list_to_dict_by_key(sample2)
{'checksum': 'c540fcd985bf88c87e48c2bfa1df5498', 'data': {'sampleMetrics': {'name': 'DNA Library QC Metrics', 'passQualityControl': True, 'metrics': {'0': {'name': {'0': 'CONTAMINATION_SCORE', '1': 'TOTAL_SCORE'}, 'value': 1302, 'LSL': 0, 'USL': 3106, 'UOM': 'NA'}}}}}
EDIT: sample1 is your first Sample Input, and sample2 is the almost the same: "name": ["CONTAMINATION_SCORE", "TOTAL_SCORE"] replaces "name": "CONTAMINATION_SCORE"

Add the sub dictonary element in list in python

I am trying to add my sub dictionary element in list. It is giving me type error.
Here is dictionary and my code:
{
"key1": "value1",
"key2": {
"skey1": "svalue2",
"skey2": {
"sskey1": [{
"url": "value",
"sid": "511"
},
{
"url": "value",
"sid": "522"
},
{
"url": "value",
"sid": "533"
}]
}
}
}
I want to add the sid into the list like [511,522,533]:
here is my code:
rsId=[]
for i in op['key2']['skey2']['sskey1']:
for k,v in i.items():
if k=='sid':
rsId.append(v)
D = {
"key1":"value1",
"key2":{
"skey1":"svalue2",
"skey2":{
"sskey1":[{
"url":"value",
"sid":"511"
},
{
"url":"value",
"sid":"522"
},
{
"url":"value",
"sid":"533"
} ]
}
}
}
res = []
for i in D['key2']['skey2']['sskey1']:
res.append(i['sid'])
print res
Result:
['511', '522', '533']
or a one line code:
res = [i['sid'] for i in D['key2']['skey2']['sskey1']]
You can use dict comprehension:
rsId = [v for item in op['key2']['skey2']['sskey1'] for k, v in item.items() if k == 'sid']
You can try with one line something like this:
print(list(map(lambda x:x['sid'],data['key2']['skey2']['sskey1'])))
output:
['511', '522', '533']
If you want value in int then:
print(list(map(lambda x:int(x['sid']),data['key2']['skey2']['sskey1'])))
output:
[511, 522, 533]
when data is:
data = {
"key1":"value1",
"key2":{
"skey1":"svalue2",
"skey2":{
"sskey1":[{
"url":"value",
"sid":"511"
},
{
"url":"value",
"sid":"522"
},
{
"url":"value",
"sid":"533"
} ]
}
}
}
Get the int as output
The type error is probably due to the fact that you get a string as item of the list. Let’s see it transforming it to a number wit int() it solves your problem.
The only change to your code is in the last line of code.
op = {
"key1": "value1",
"key2": {
"skey1": "svalue2",
"skey2": {
"sskey1": [{
"url": "value",
"sid": "511"
},
{
"url": "value",
"sid": "522"
},
{
"url": "value",
"sid": "533"
}]
}
}
}
rsId = []
for i in op['key2']['skey2']['sskey1']:
for k, v in i.items():
if k == 'sid':
rsId.append(int(v)) # put the int here
output
>>> rsId
[511, 522, 533]
Another approach: checking every key that has a dictionary as value
op = {
"key1": "value1",
"key2": {
"skey1": "svalue2",
"skey2": {
"sskey1": [
{
"url": "value",
"sid": "511"
},
{
"url": "value",
"sid": "522"
},
{
"url": "value",
"sid": "533"
}
]
}
}
}
l = []
for k in op: # searching in the main dictonary
if type(op[k]) is dict: # if the value contains a dict (sub1)
for k2 in op[k]: # for every key
if type(op[k][k2]) is dict: # if the value is a dict (sub2)
for k3 in op[k][k2]: # for each key of subdict 2
for i in op[k][k2][k3]: # for every item of the list
for k4 in i: # foreach key in the item (a dict)
if k4 == 'sid': # if the key is 'sid'
l.append(int((i[k4]))) # append the value
print(l)
output
[511, 522, 533]

Categories

Resources