I have a dict list of dicts.
dict_list = [
{"nameClient": "client.name"},
{"emailClient": "client.email"},
{"Document.typeDocument": "client.type_document"},
{"Document.numberDocument": "client.number_document"},
{"Document.docOne.number": "client.docone_number"},
{"Document.docOne.type": "client.docone_type"},
{"Document.docTwo.number": "client.doctwo_number"},
{"Document.docOne.extra.number": "client.docone_extra_number"},
]
I want to create a dict based on key of this dicts and get value from my class based on value of this dicts.
Client Class and Values initialized:
class Client:
def __init__(self, data):
self.name = data['name']
self.email = data['email']
self.type_document = data['type_document']
self.number_document = data['number_document']
self.docone_number = data['docone_number']
self.docone_type = data['docone_type']
self.doctwo_number = data['doctwo_number']
self.docone_extra_number = data['docone_extra_number']
# this will be passed on build_final_dict(FINAL_DICT, i, k, v, client)
client1 = Client({
"name": "Stack",
"email": "xxxxx#xxxx.com",
"type_document": "TPS",
"number_document": "22222222",
"docone_number": "11111111",
"docone_type": "docone type",
"doctwo_number": "doc two number",
"docone_extra_number": "doc extra number",
})
So I started passing to a function the key/value
FINAL_DICT = {}
for i in dict_list:
for k, v in i.items():
build_final_dict(FINAL_DICT, i, k, v, client)
My build_final_dict()
def build_final_dict(FINAL_DICT, i, k, v, client):
if '.' not in k:
FINAL_DICT[k] = getattr(client, v.replace('client.', ''))
else:
subdict = k.split('.')[0] # ex documento
subdict_key = i.items()[0][0]
subdict_key = subdict_key.replace('%s.' % subdict, '')
subdict_value = i.items()[0][1]
subdict_value = subdict_value.replace('client.', '')
if subdict not in FINAL_DICT:
FINAL_DICT[subdict] = dict()
result_value = getattr(client, subdict_value)
if '.' not in subdict_key:
FINAL_DICT[subdict][subdict_key] = result_value
else:
new_subkey = subdict_key.split('.')[0]
new_subvalue = subdict_key.split('.')[1]
if new_subkey not in FINAL_DICT[subdict]:
FINAL_DICT[subdict][new_subkey] = dict()
build_final_dict(FINAL_DICT[subdict][new_subkey], i, new_subvalue, v, client)
Actual Result:
{
"emailClient":"xxxxx#xxxx.com",
"nameClient":"Stack",
"Document":{
"typeDocument":"TPS",
"numberDocument":"22222222",
"docTwo":{
"number":"doc two number"
},
"docOne":{
"type":"docone type",
"extra":"doc extra number", // should continue creating dict within dict...
"number":"11111111"
}
}
}
Most of the dictionary is right. But the "extra" dictionary that I put inside a dictionary (third sub level) did not create a new dictionary and put the final value.
There is the possibility of having infinite sub dictionaries, I need my script to be prepared for that.
Result I expected (based on Client class Data):
{
"emailClient":"xxxxx#xxxx.com",
"nameClient":"Stack",
"Document":{
"typeDocument":"TPS",
"numberDocument":"22222222",
"docTwo":{
"number":"doc two number"
},
"docOne":{
"type":"docone type",
"extra": { "number": "doc extra number" }, // dict, not string
"number":"11111111"
}
}
}
I think, this will help you in transforming the initial JSON to the structure you need. This is in accordance to your original requirement of
I want to create a dict based on key of this dicts and get value from
my class based on value of this dicts.
I would try this in a split approach, to create nested dicts for every dict in your list (if they have nesting in keys), and then merge them together as single unit.
See an example approach here:
from collections.abc import MutableMapping
from functools import reduce
def merge(d1, d2):
# Merge 2 dictionaries -deep.
for k, v in d1.items():
if k in d2:
if all(isinstance(e, MutableMapping) for e in (v, d2[k])):
d2[k] = merge(v, d2[k])
md = d1.copy()
md.update(d2)
return md
def explode_to_dict(key, value):
# Create nested dicts based on the key structure
if "." in key:
p, c = key.rsplit(".", 1)
return explode_to_dict(p, {c: value})
else:
return {key: value}
if __name__ == '__main__':
dict_list = [
{"nameClient": "client.name"},
{"emailClient": "client.email"},
{"Document.typeDocument": "client.type_document"},
{"Document.numberDocument": "client.number_document"},
{"Document.docOne.number": "client.docone_number"},
{"Document.docOne.type": "client.docone_type"},
{"Document.docTwo.number": "client.doctwo_number"},
{"Document.docOne.extra.number": "client.docone_extra_number"},
]
result_dict = {}
result_dict = reduce(merge, [explode_to_dict(*d.popitem()) for d in dict_list])
print(json.dumps(result_dict))
This provides a structure like below(expected):
{
"nameClient": "client.name",
"emailClient": "client.email",
"Document": {
"typeDocument": "client.type_document",
"numberDocument": "client.number_document",
"docOne": {
"number": "client.docone_number",
"type": "client.docone_type",
"extra": {
"number": "client.docone_extra_number"
}
},
"docTwo": {
"number": "client.doctwo_number"
}
}
}
I suppose, you can proceed with your class manipulation from here!
Related
I have created a var that is equal to t.json. The JSON file is a follows:
{
"groups": {
"customerduy": {
"nonprod": {
"name": "customerduynonprod",
"id": "529646781943",
"owner": "cloudops#coerce.com",
"manager_email": ""
},
"prod": {
"name": "phishing_duyaccountprod",
"id": "241683454720",
"owner": "cloudops#coerce.com",
"manager_email": ""
}
},
"customerduyprod": {
"nonprod": {
"name": "phishing_duyaccountnonprod",
"id": "638968214142",
"owner": "cloudops#coerce.com",
"manager_email": ""
}
},
"ciasuppliergenius": {
"prod": {
"name": "ciasuppliergeniusprod",
"id": "220753788760",
"owner": "cia_developers#coerce.com",
"manager_email": "jarks#coerce.com"
}
}
}
}
my goal was to pars this JSON file and get value for "owner" and output it to a new var. Example below:
t.json = group_map
group_id_aws = group(
group.upper(),
"accounts",
template,
owner = group_map['groups']['prod'],
manager_description = "Groups for teams to access their product accounts.",
The error I keep getting is: KeyError: 'prod'
Owner occurs 4 times, so here is how to get all of them.
import json
# read the json
with open("C:\\test\\test.json") as f:
data = json.load(f)
# get all 4 occurances
owner_1 = data['groups']['customerduy']['nonprod']['owner']
owner_2 = data['groups']['customerduy']['prod']['owner']
owner_3 = data['groups']['customerduyprod']['nonprod']['owner']
owner_4 = data['groups']['ciasuppliergenius']['prod']['owner']
# print results
print(owner_1)
print(owner_2)
print(owner_3)
print(owner_4)
the result:
cloudops#coerce.com
cloudops#coerce.com
cloudops#coerce.com
cia_developers#coerce.com
You get a key error since the key 'prod' is not in 'groups'
What you have is
group_map['groups']['customerduy']['prod']
group_map['groups']['ciasuppliergenius']['prod']
So you will have to extract the 'owner' from each element in the tree:
def s(d,t):
for k,v in d.items():
if t == k:
yield v
try:
for i in s(v,t):
yield i
except:
pass
print(','.join(s(j,'owner')))
If your JSON is loaded in variable data, you can use a recursive function
that deals with the two containers types (dict and list) that can occur
in a JSON file, recursively:
def find_all_values_for_key(d, key, result):
if isinstance(d, dict):
if key in d:
result.append(d[key])
return
for k, v in d.items():
find_all_values_for_key(v, key, result)
elif isinstance(d, list):
for elem in d:
find_all_values_for_key(elem, key, result)
owners = []
find_all_values_for_key(data, 'owner', owners)
print(f'{owners=}')
which gives:
owners=['cloudops#coerce.com', 'cloudops#coerce.com', 'cloudops#coerce.com', 'cia_developers#coerce.com']
This way you don't have to bother with the names of intermediate keys, or in general the structure of your JSON file.
You don't have any lists in your example, but it is trivial to recurse through
them to any dict with an owner key that might "lurk" somewhere nested
under a a list element, so it is better to deal with potential future changes
to the JSON.
I have a json file for which I want to remove the $oid and $date and replace the keys like in the example below:
import json
def key_replacer(dictionary):
new_dictionary = {}
for k,v in dictionary.items():
if k in ('$oid', '$date'):
return v
if isinstance(v, dict):
v = key_replacer(v)
new_dictionary[k] = v
return new_dictionary
data = """{
"_id": {
"$oid": "5e7511c45cb29ef48b8cfcff"
},
"description": "some text",
"startDate": {
"$date": "5e7511c45cb29ef48b8cfcff"
},
"completionDate": {
"$date": "2021-01-05T14:59:58.046Z"
},
"videos":[{"$oid":"5ecf6cc19ad2a4dfea993fed"}]
}"""
info = json.loads(data)
refined = key_replacer(info)
new_data = json.dumps(refined)
print(new_data)
Output: {"_id": "5e7511c45cb29ef48b8cfcff", "description": "some text", "startDate": "5e7511c45cb29ef48b8cfcff", "completionDate": "2021-01-05T14:59:58.046Z", "videos": [{"$oid": "5ecf6cc19ad2a4dfea993fed"}]}. It works the way I want until "videos". How could I remove the $ sign for the "videos" part and replace the key like it happens in the other cases? It doesn't get into the contents of the list and I assume this is the cause.
Your original function doesn't account for the case where the dictionary value is a list. By accommodating for that, you will get the desired result
def key_replacer(dictionary):
new_dictionary = {}
for k, v in dictionary.items():
if k in ('$oid', '$date'):
return v
elif isinstance(v, dict):
v = key_replacer(v)
elif isinstance(v, list):
tmp = []
for itm in v:
tmp.append(key_replacer(itm))
v = tmp
new_dictionary[k] = v
return new_dictionary
NB: If the items in the list is not dictionaries the code will break, so be mindful of that
i've got a dot delimited string which I need to convert to Json. This is an example with different types of strings:
my.dictionary.value -> value
my.dictionary.list[0].value -> value
my.dictionary.list[1].value.list[0].value -> value
I have no problems converting the first type of string using a recursive approach:
def put(d, keys, item):
if "." in keys:
key, rest = keys.split(".", 1)
if key not in d:
d[key] = {}
put(d[key], rest, item)
else:
d[keys] = item
But i'm struggling to find a solution for the lists. Is there a library that provides out of the box string to json conversion? Thank you for your time.
AFAIK, there isn't any modules that would do this
Here is a sample code to converted a series of dotted strings into json format. You just have create a new list when you see the pattern [n] in the string that would be used as a key.
import re
import json
def parse_dotted_strlines(strlines):
res= {}
for line in strlines.splitlines():
parse_dotted_str(line, res)
return res
def parse_dotted_str(s, res):
if '.' in s:
key, rest = s.split('.', 1)
# Check if key represents a list
match = re.search(r'(.*)\[(\d)\]$', key)
if match:
# List
key, index = match.groups()
val = res.get(key, {}) or []
assert type(val) == list, f'Cannot set key {key} as of type list as i
t was earlier marked as {type(val)}'
while len(val) <= int(index):
val.append({})
val[index] = parse_dotted_str(rest, {})
res[key] = val
else:
# Dict
res[key] = parse_dotted_str(rest, res.get(key, {}))
elif '->' in s:
key, val = s.split('->')
res[key.strip()] = val.strip()
return res
Sample input and output
lines = """
my.dictionary.value -> value
my.dictionary.list[0].value -> value
my.dictionary.list[1].value.list[0].value -> value
"""
res = parse_dotted_strlines(lines)
print (json.dumps(res, indent=4))
{
"my": {
"dictionary": {
"value": "value",
"list": [
{
"value": "value"
},
{
"value": {
"list": [
{
"value": "value"
}
]
}
}
]
}
}
}
the json package is what you need
import json
mydict = """{
"str1": "str",
"list1": ["list1_str1", "list1_str2"],
"list2": ["list2_str1", "list2_str2", ["list2_str11", "list_str12"]]
}"""
json.loads(mydict)
>> {'str1': 'str',
'list1': ['list1_str1', 'list1_str2'],
'list2': ['list2_str1', 'list2_str2', ['list2_str11', 'list_str12']]}
I'm looking to flatten a JSON hierarchy of unknown structure to a dictionary, capturing the full key hierarchy in the dictionary result to uniquely identify it.
So far I am able to print the key:value pair for the all the parent/child nodes recursively but I am having trouble:
(1) figuring out how to pass the parent hierarchy keys for a recursive (child) execution and then reset it when it exits the child key.
(2) writing to a single dictionary result - when I define the dictionary within the recursive function, I end up creating multiple dictionaries ... Do I need to wrap this function in a master function to avoid this?
Thanks!
# flatten/enumerate example I'm using
with open('_json\\file.json') as f:
data = json.load(f)
def parse_json_response(content):
if len (content.keys()) > 1 :
for key, value in content.items():
if type(value) is dict:
parse_json_response(value)
else:
print('{}:{}'.format(key,value))
else:
print(value)
if __name__ == '__main__':
parse_json_response(data)
# current result as print
id = 12345
firstName = John
lastName = Smith
DOB = 1980-01-01
phone = 123
line1 = Unit 4
line2 = 3 Main st
# desired result to dictionary {}
id = 12345
fields.firstName = John
fields.lastName = Smith
fields.DOB = 1980-01-01
fields.phone = 123
fields.address.residential.line1 = Unit 4
fields.address.residential.line2 = 3 Main st
You can create the flattened dictionary (rather than just print values), by keeping track of the parent and recursing in the correct spot. That might look something like:
d = {
"ID": "12345",
"fields": {
"firstName": "John",
"lastName": "Smith",
"DOB": "1980-01-01",
"phoneLand": "610292659333",
"address": {
"residential": {
"line1": "Unit 4",
"line2": "3 Main st"
}
}
}
}
def flattenDict(d, parent=None):
ret = {}
for k, v in d.items():
if parent:
k = f'{parent}.{k}'
if isinstance(v, dict):
ret.update(flattenDict(v, k))
else:
ret[k] = v
return ret
flat = flattenDict(d)
flat will be:
{'ID': '12345',
'fields.firstName': 'John',
'fields.lastName': 'Smith',
'fields.DOB': '1980-01-01',
'fields.phoneLand': '610292659333',
'fields.address.residential.line1': 'Unit 4',
'fields.address.residential.line2': '3 Main st'}
You can also arrange the output to be a generator that yields tuples. You can then pass this to dict() for the same result:
def flattenDict(d):
for k, v in d.items():
if isinstance(v, dict):
yield from ((f'{k}.{kk}', v) for kk, v in flattenDict(v))
else:
yield (k, v)
dict(flattenDict(d))
Try this below:
test = {
"ID": "12345",
"fields": {
"firstName": "John",
"lastName": "Smith",
"DOB": "1980-01-01",
"phoneLand": "610292659333",
"address": {
"residential": {
"line1": "Unit 4",
"line2": "3 Main st"
}
}
}
}
def func(d, parent=""):
for key, value in d.items():
if isinstance(value, dict):
func(value, parent=parent+key+".")
else:
print(f"{parent+key} = {value}")
func(test)
Result:
ID = 12345
fields.firstName = John
fields.lastName = Smith
fields.DOB = 1980-01-01
fields.phoneLand = 610292659333
fields.address.residential.line1 = Unit 4
fields.address.residential.line2 = 3 Main st
I have one json file and i need to list all "selftext" elements of all data.
Any example of it ?
data example
{ "data": [
{
"selftext": "hello there",
"textex": true,
},
If you want to be able to find a key from an arbitrary json at an arbitrary level, you should use recursion:
def findkey(data, key, resul = None):
if resul is None: resul=[] # initialize an empty list for the results
if isinstance(data, list): # walk down into lists
for d in data:
findkey(d, key, resul)
elif isinstance(data, dict): # dict processing
for k,v in data.items():
if (k == key) and isinstance(v, str): # the expected key and a string value?
resul.append(v)
elif isinstance(v, list) or isinstance(v, dict):
findkey(v, key, resul) # recurse if value is a list or a dict
return resul
Example:
>>> data = { "data": [
{
"selftext": "hello there",
"textex": True,
},
]}
>>> findkey(data, 'selftext')
['hello there']