aggregating values in dictionary - python

I have a deep dictionary like this:
myDict = { '123456': {
'348adbd39r' : {
'LONDON': {
'c_name': 'abc',
'acct': '84720'
},
'PARIS': {
'c_name': 'xyz',
'acct': '73642'
}
},
'2862aef3' : {
'NYC': {
'c_name': 'hhdls3',
'acct': '92742'
}
},
'82gfg24' : {
'NYC': {
'c_name': 'hquer',
'acct': '34567'
},
'PARIS': {
'c_name': 'ljad',
'acct': '93742'
}
}
}
I want to 'aggregate' it based on the city names. The output should look like below:
outDict = {
'LONDON': {
'c_name': ['abc'],
'acct': ['84720']
},
'PARIS': {
'c_name': ['xyz', 'ljad'],
'acct': ['73642', '93742']
},
'NYC': {
'c_name': ['hhdls3', 'hquer'],
'acct': ['73642', '34567']
}
}
This is what I did:
cust_fields = ['c_name', 'acct']
field_dict = {field: [] for field in cust_fields}
aggregated_dict = {}
city_names = ['LONDON', 'PARIS', 'NYC']
for city in city_names:
aggregated_dict[city] = field_dict
for id, an_dict in myDict.iteritems():
for alphaNum, city_dict in an_dict.iteritems():
for city, acct_dict in city_dict.iteritems():
for field, val in acct_dict.iteritems():
aggregated_dict[city][field].append(val)
But, the above is updating the field-values for all the cities...rather than just the particular city it is working on. Not sure where the logic is wrong. Any help is appreciated (either correct where my mistake is or any new logic...).
Thanks!

The problem you have is you are assigning field_dict to the value of aggregated_dict[city] in your loop over city_names, which is simply assigning the same dictionary to each city. And when you update any reference (for any city) all references are updated.
An easy fix for this is to change
for city in city_names:
aggregated_dict[city] = field_dict
To:
for city in city_names:
aggregated_dict[city] = {field: [] for field in cust_fields}
I would also look at collections.defaultdict for this type of aggregation.
from collections import defaultdict
collected = defaultdict(lambda: defaultdict(list))
for _, city_records in myDict['123456'].items():
for city_name, records in city_records.items():
for record_name, record_value in records.items():
collected[city_name][record_name].append(record_value)
for city_name, records in collected.items():
print city_name
print dict(records)

for key,val in myDict.items():
for key1,val1 in val.items():
for key2,val2 in val1.items():
d = final_dict[key2] if key2 in final_dict else defaultdict(list)
for k,v in val2.items():
d[k].append(v)
final_dict[key2] = d

You can use recursion:
from collections import defaultdict
d1 = defaultdict(dict)
def aggregate(d):
for a, b in d.items():
if a in ['LONDON', 'PARIS', 'NYC']:
global d1
if a not in d1:
d1[a] = {}
d1[a]['c_name'] = [b['c_name']]
d1[a]['acct'] = [b['acct']]
else:
d1[a]['c_name'].append([b['c_name']])
d1[a]['acct'].append(b['acct'])
else:
aggregate(b)
aggregate(myDict)
print(dict(d1))
Output:
{'PARIS': {'acct': ['73642', '93742'], 'c_name': ['xyz', ['ljad']]}, 'NYC': {'acct': ['92742', '34567'], 'c_name': ['hhdls3', ['hquer']]}, 'LONDON': {'acct': ['84720'], 'c_name': ['abc']}}

Related

String array to nested dictionary Python

I'm a begginer in python and I want to make this:
I have a string array I want to make a dictionary with string as keys, but this way:
Transform this:
['Users', 'ID', 'Age']
Into this:
{
'Users': {
'ID': {
'Age': None
}
}
}
You could do this like so:
def tranform_list_to_dict(lst):
new = {}
for item in lst[::-1]:
if not new:
new[item] = None
else:
tmp = {}
tmp[item] = new.copy()
new = dict(tmp)
return new
my_list = ['Users', 'ID', 'Age']
print(tranform_list_to_dict(my_list))
Which will produce:
{
"Users": {
"ID": {
"Age": None
}
}
}
you may do this
list1 = ['User', 'ID', 'Age']
def transform(alist):
alist = alist[::-1]
dic = {alist[0]: None}
for i in range(len(alist)-1):
dic = {alist[i]: dic}
return dic
print(transform(list1))

Python dictionary to dictionary mapping

I'm looking for recursion dynamic function/library to mapping dictionary with specific keys.
Example
my_data = {
'name': 'Test',
'primary_address': {
'full_address': 'Address test',
},
'other_field': 'Other field'
}
mapping_keys = {
'name': 'full_name',
'primary_address__full_address': 'primary_address__address'
}
Expected result
{
'full_name': 'Test',
'primary_address': {
'address': 'Address test',
}
}
What I tried, but it doesn't work with recursion
def mapping_data(json_data, mapping_keys):
mapped_data = []
for data in json_data:
mapped_data_tmp = {}
for key in data:
if key in mapping_keys:
mapped_data_tmp.update({mapping_keys[key]: data[key]})
mapped_data.append(mapped_data_tmp)
return mapped_data
from functools import reduce
import operator
my_data = {
'name': 'Test',
'primary_address': {
'full_address': 'Address test',
},
'other_field': 'Other field'
}
mapping_keys = {
'name': 'full_name',
'primary_address__full_address': 'primary_address__address'
}
def get_item_by_path(data,path):
return reduce(operator.getitem,path,data)
def set_item_by_path(data, path, value):
try:
get_item_by_path(data, path[:-1])[path[-1]] = value
except KeyError:
for key in reversed(path):
value = {key: value}
data[path[0]] = value[path[0]]
def mapKeys(data, mapping_keys):
result = {}
for oldkey in mapping_keys:
oldkeys = oldkey.split('__')
val = get_item_by_path(my_data, oldkeys)
newkeys = mapping_keys[oldkey].split('__')
set_item_by_path(result, newkeys, val)
return result
newDict = mapKeys(my_data, mapping_keys)
def mapping_data(json_data, mapping_keys, prefix=''):
mapped_data = {}
for key, val in json_data.items():
map_key = (prefix + '__' + key) if prefix else key
if map_key in mapping_keys:
key = mapping_keys[map_key].rsplit('__', 1)[-1]
mapped_data[key] = val
if isinstance(val, dict):
val = mapping_data(val, mapping_keys, map_key)
mapped_data[key] = val
return mapped_data
res = mapping_data(my_data, mapping_keys)
print (res)
Output
{
"full_name": "Test",
"primary_address": {
"address": "Address test"
}
}

Remove keys from a nested dict (Python keys)

I'm pretty new in Python, thanks in advance for your help.
I built the following code (I tried the below, I used a dictionary within a dictionary).
The idea is to keep the keys (hair.color) with values(blonde). In this example: remove Micheal.
Code:
def answers(hair_questions):
try:
for i in people:
if people[i]["hair.color"]==hair_questions:
print(people[i])
else:
del people[i]
return people[i]
except:
print("Doesn´t exist")
answers("brown")
On People:
people={
"Anne":
{
"gender":"female",
"skin.color":"white",
"hair.color":"blonde",
"hair.shape":"curly"
}
,
"Michael":
{
"citizenship":"africa",
"gender":"male",
"hair.color":"brown",
"hair.shape":"curly"
}
,
"Ashley":
{
"gender":"female",
"citizenship":"american",
"hair.color":"blonde",
"hair.shape":"curly "
}
}
The code only check the first key: under the condition: values(blonde) i.e. (people[i]["hair.color"]!=brown) it works just for 1 key and then the code gets "stuck"
My current output:
"people"=
"Michael":
{
"citizenship":"africa",
"gender":"male",
"hair.color":"brown",
"hair.shape":"curly"
}
,
"Ashley":
{
"gender":"female",
"citizenship":"american",
"hair.color":"blonde",
"hair.shape":"curly "
}
Instead, I wanted:
"people"=
"Michael":
{
"citizenship":"africa",
"gender":"male",
"hair.color":"brown",
"hair.shape":"curly"
}
I want an output, for this case, (only) Michael.
You can't delete key while iterating for loop:
people={
"Anne":
{
"gender":"female",
"skin.color":"white",
"hair.color":"blonde",
"hair.shape":"curly"
},
"Michael":
{
"citizenship":"africa",
"gender":"male",
"hair.color":"brown",
"hair.shape":"curly"
},
"Ashley":
{
"gender":"female",
"citizenship":"american",
"hair.color":"blonde",
"hair.shape":"curly "
}
}
def answers(hair_questions):
my_dict = {}
for i in people:
if people[i]["hair.color"] in hair_questions:
my_dict[i] = people[i]
return my_dict
print(answers("brown"))
OR
def answers(hair_questions):
my_list = []
for i in people:
if people[i]["hair.color"] not in hair_questions:
my_list.append(i)
for i in my_list:
del people[i]
answers("brown")
print(people)
O/P:
{'Michael': {'citizenship': 'africa', 'gender': 'male', 'hair.color': 'brown', 'hair.shape': 'curly'}}
you can use list comprehension:
brown = {key:value for key,value in people.items() if people[key]["hair.color"] != "blonde"}
print (brown)
what is equal to:
brown= {}
for key,value in people.items():
if people[key]["hair.color"] != "blonde":
brown[key] = value
print (brown)
output:
{'Michael': {'citizenship': 'africa', 'gender': 'male', 'hair.color': 'brown', 'hair.shape': 'curly'}}

Get json object with value with python for loop

When I use:
for reports in raw_data:
for names in reports["names"]:
report_name = json.dumps(names).strip('"')
report_names.append(report_name)
I get the key/object name: 'report1', ...
When I use:
for reports in raw_data:
for names in reports["names"].values():
report_name = json.dumps(names).strip('"')
report_names.append(report_name)
I get the value of the object: 'name1', ...
How do get the object and value together, for example: 'report1': 'name1', ...
The json:
[
{
"names": {
"report1": "name1",
"report2": "name2"
}
},
{
"names": {
"report3": "name3",
"report4": "name4"
}
}
]
You need to loop over each dictionary in the object, then extract each key: value pair from items():
data = [
{
"names": {
"report1": "name1",
"report2": "name2"
}
},
{
"names": {
"report3": "name3",
"report4": "name4"
}
}
]
for d in data:
for k, v in d["names"].items():
print(k, v)
Result:
report1 name1
report2 name2
report3 name3
report4 name4
Or if you can just print out the tuple pairs:
for d in data:
for pair in d["names"].items():
print(pair)
# ('report1', 'name1')
# ('report2', 'name2')
# ('report3', 'name3')
# ('report4', 'name4')
If you want all of the pairs in a list, use a list comprehension:
[pair for d in data for pair in d["names"].items()]
# [('report1', 'name1'), ('report2', 'name2'), ('report3', 'name3'), ('report4', 'name4')]
Try something like this:
import json
with open(r'jsonfile.json', 'r') as f:
qe = json.load(f)
for item in qe:
if item == 'name1':
print(qe)

Update key without affecting the key's values within a nested dictionary

I am trying to update a key while retaining its values within a nested dictionaries.
While I have found a method to do so, I had to create new dictionaries in order to cater for it. As such, wondering if there anyone could provide me with a better insight on the approach I have taken?
init_dict = {
'pageA' : {
0 : {
'menuA' : [
'a01',
'a02'
]
}
},
'pageB' : {
1 : {
'menuB' : [
'b10'
]
}
}
}
changed = {'pageB' : 0, 'pageA' : 1}
condense_dict = {}
for k, v in init_dict.items():
for i in v.keys():
condense_dict[k] = init_dict[k][i]
new_dict = {}
for i in condense_dict:
new_dict[i] = {}
new_dict[i][changed.get(i)] = condense_dict.get(i)
My expected output is as follows:
{
'pageA' : {
1 : {
'menuA' : [
'a01',
'a02'
]
}
},
'pageB' : {
0 : {
'menuB' : [
'b10'
]
}
}
}
You can pop the presumably only key from the sub-dict and assign it to the new key for each entry in changed:
for k, v in changed.items():
init_dict[k][v] = init_dict[k].pop(next(iter(init_dict[k])))
init_dict becomes:
{'pageA': {1: {'menuA': ['a01', 'a02']}}, 'pageB': {0: {'menuB': ['b10']}}}
Using the .pop() method this can be done similar to this (although I'm sure you could rewrite it better)
init_dict = {
'pageA': {
0: {
'menuA' : [
'a01',
'a02'
]
}
},
'pageB': {
1: {
'menuB': [
'b10'
]
}
}
}
print(init_dict)
thing = init_dict.pop('pageA')
sub_thing = thing.pop(0)
redone = {1: sub_thing}
init_dict.update({'pageA': redone})
print(init_dict)
{'pageA': {0: {'menuA': ['a01', 'a02']}}, 'pageB': {1: {'menuB': ['b10']}}}
{'pageA': {1: {'menuA': ['a01', 'a02']}}, 'pageB': {1: {'menuB': ['b10']}}}
You can see it's the same data as we start with, but we changed 0 to 1
Here I use .pop() and change it inplace. With the same init_dict as you:
change_to = {1: 0, 0: 1}
for k, v in init_dict.items():
for old_key in v.keys():
if old_key in change_to:
v[change_to[old_key]] = v.pop(old_key)

Categories

Resources