I have already tried numerous approaches, but unfortunately do not come to any useful result. I have the following problem: I have a very deep and arbitrary nested dictionary.
d = {
"aaa":{
"bbb":"xyz",
"ccc":{
"description":"xyz",
"data":"abc"
},
"description":"xyz"
},
"xxx":{
"description":"xyz",
"bbb":{
"ccc":{
"ddd":{
"description":"xyz"
},
"aaa":{
"description":{
"hhh": "xyz"
}
},
"zzz":{
"description":"xyz"
}
}
}
},
"lll":{
"description":"xyz",
"bbb":{
"ccc":{
"hhh":{
"description":"xyz"
},
"ooo":{
"description":"xyz",
"aaa":{
"ddd":{
"description":"xyz"
}
},
"zzz":{
"ddd":{
"description":"xyz"
}
}
},
"zzz":{
"description":"xyz"
}
}
}
}
}
Now I want to search all levels of the dictionary and check if the keys "aaa" & "zzz" occur in this level. If this is the case, I want to output the keys with the respective values in a list with tuples.
[('aaa: {'ddd':{'description':'xyz'}', 'zzz: {'description':'xyz'}'),
('aaa:{'ddd':{'description':'xyz'}}', 'zzz:{'description':'xyz'})
]
I know that with
d.keys()
can print all keys in one layer.
I know that with this function I can go through all keys and values in the dictionary
def recursive_items(dictionary):
for key, value in dictionary.items():
if type(value) is dict:
yield from recursive_items(value)
else:
yield (key, value)
However, I am having trouble linking the two and putting the output into a list of tuples.
Try (d is dictionary from your question):
def find(d, keys=("aaa", "zzz")):
if isinstance(d, dict):
if all(k in d for k in keys):
yield tuple((k, d[k]) for k in keys)
for v in d.values():
yield from find(v, keys)
elif isinstance(d, list):
for v in d:
yield from find(v, keys)
print(list(find(d)))
Prints:
[
(
("aaa", {"description": {"hhh": "xyz"}}),
("zzz", {"description": "xyz"})),
(
("aaa", {"ddd": {"description": "xyz"}}),
("zzz", {"ddd": {"description": "xyz"}}),
),
]
I have created a var that is equal to t.json. The JSON file is a follows:
{
"groups": {
"customerduy": {
"nonprod": {
"name": "customerduynonprod",
"id": "529646781943",
"owner": "cloudops#coerce.com",
"manager_email": ""
},
"prod": {
"name": "phishing_duyaccountprod",
"id": "241683454720",
"owner": "cloudops#coerce.com",
"manager_email": ""
}
},
"customerduyprod": {
"nonprod": {
"name": "phishing_duyaccountnonprod",
"id": "638968214142",
"owner": "cloudops#coerce.com",
"manager_email": ""
}
},
"ciasuppliergenius": {
"prod": {
"name": "ciasuppliergeniusprod",
"id": "220753788760",
"owner": "cia_developers#coerce.com",
"manager_email": "jarks#coerce.com"
}
}
}
}
my goal was to pars this JSON file and get value for "owner" and output it to a new var. Example below:
t.json = group_map
group_id_aws = group(
group.upper(),
"accounts",
template,
owner = group_map['groups']['prod'],
manager_description = "Groups for teams to access their product accounts.",
The error I keep getting is: KeyError: 'prod'
Owner occurs 4 times, so here is how to get all of them.
import json
# read the json
with open("C:\\test\\test.json") as f:
data = json.load(f)
# get all 4 occurances
owner_1 = data['groups']['customerduy']['nonprod']['owner']
owner_2 = data['groups']['customerduy']['prod']['owner']
owner_3 = data['groups']['customerduyprod']['nonprod']['owner']
owner_4 = data['groups']['ciasuppliergenius']['prod']['owner']
# print results
print(owner_1)
print(owner_2)
print(owner_3)
print(owner_4)
the result:
cloudops#coerce.com
cloudops#coerce.com
cloudops#coerce.com
cia_developers#coerce.com
You get a key error since the key 'prod' is not in 'groups'
What you have is
group_map['groups']['customerduy']['prod']
group_map['groups']['ciasuppliergenius']['prod']
So you will have to extract the 'owner' from each element in the tree:
def s(d,t):
for k,v in d.items():
if t == k:
yield v
try:
for i in s(v,t):
yield i
except:
pass
print(','.join(s(j,'owner')))
If your JSON is loaded in variable data, you can use a recursive function
that deals with the two containers types (dict and list) that can occur
in a JSON file, recursively:
def find_all_values_for_key(d, key, result):
if isinstance(d, dict):
if key in d:
result.append(d[key])
return
for k, v in d.items():
find_all_values_for_key(v, key, result)
elif isinstance(d, list):
for elem in d:
find_all_values_for_key(elem, key, result)
owners = []
find_all_values_for_key(data, 'owner', owners)
print(f'{owners=}')
which gives:
owners=['cloudops#coerce.com', 'cloudops#coerce.com', 'cloudops#coerce.com', 'cia_developers#coerce.com']
This way you don't have to bother with the names of intermediate keys, or in general the structure of your JSON file.
You don't have any lists in your example, but it is trivial to recurse through
them to any dict with an owner key that might "lurk" somewhere nested
under a a list element, so it is better to deal with potential future changes
to the JSON.
I am trying to create a complex object based on metadata I have. It is an array of attributes which I am iterating and trying to create a dict. For example below is the array:
[
"itemUniqueId",
"itemDescription",
"manufacturerInfo[0].manufacturer.value",
"manufacturerInfo[0].manufacturerPartNumber",
"attributes.noun.value",
"attributes.modifier.value",
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
]
This array should give an output as below:
{
"itemUniqueId": "",
"itemDescription": "",
"manufacturerInfo": [
{
"manufacturer": {
"value": ""
},
"manufacturerPartNumber": ""
}
],
"attributes": {
"noun": {
"value": ""
},
"modifier": {
"value": ""
},
"entityAttributes": [
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
},
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
}
]
}
}
I have written this logic but unable to get the desired output. It should work on both object and array given the metadata.
source_json = [
"itemUniqueId",
"itemDescription",
"manufacturerInfo[0].manufacturer.value",
"manufacturerInfo[0].manufacturerPartNumber",
"attributes.noun.value",
"attributes.modifier.value",
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
]
for row in source_json:
propertyNames = row.split('.')
temp = ''
parent = {}
parentArr = []
parentObj = {}
# if len(propertyNames) > 1:
arrLength = len(propertyNames)
for i, (current) in enumerate(zip(propertyNames)):
if i == 0:
if '[' in current:
parent[current]=parentArr
else:
parent[current] = parentObj
temp = current
if i > 0 and i < arrLength - 1:
if '[' in current:
parent[current] = parentArr
else:
parent[current] = parentObj
temp = current
if i == arrLength - 1:
if '[' in current:
parent[current] = parentArr
else:
parent[current] = parentObj
temp = current
# temp[prev][current] = ""
# finalMapping[target] = target
print(parent)
There's a similar question at Convert Dot notation string into nested Python object with Dictionaries and arrays where the accepted answer works for this question, but has unused code paths (e.g. isInArray) and caters to unconventional conversions expected by that question:
❓ "arrOne[0]": "1,2,3" → "arrOne": ["1", "2", "3"] instead of
✅ "arrOne[0]": "1,2,3" → "arrOne": ["1,2,3"] or
✅ "arrOne[0]": "1", "arrOne[1]": "2", "arrOne[2]": "3" → "arrOne": ["1", "2", "3"]
Here's a refined implementation of the branch function:
def branch(tree, path, value):
key = path[0]
array_index_match = re.search(r'\[([0-9]+)\]', key)
if array_index_match:
# Get the array index, and remove the match from the key
array_index = int(array_index_match[0].replace('[', '').replace(']', ''))
key = key.replace(array_index_match[0], '')
# Prepare the array at the key
if key not in tree:
tree[key] = []
# Prepare the object at the array index
if array_index == len(tree[key]):
tree[key].append({})
# Replace the object at the array index
tree[key][array_index] = value if len(path) == 1 else branch(tree[key][array_index], path[1:], value)
else:
# Prepare the object at the key
if key not in tree:
tree[key] = {}
# Replace the object at the key
tree[key] = value if len(path) == 1 else branch(tree[key], path[1:], value)
return tree
Usage:
VALUE = ''
def create_dict(attributes):
d = {}
for path_str in attributes:
branch(d, path_str.split('.'), VALUE)
return d
source_json = [
"itemUniqueId",
"itemDescription",
"manufacturerInfo[0].manufacturer.value",
"manufacturerInfo[0].manufacturerPartNumber",
"attributes.noun.value",
"attributes.modifier.value",
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
]
assert create_dict(source_json) == {
"itemUniqueId": "",
"itemDescription": "",
"manufacturerInfo": [
{
"manufacturer": {
"value": ""
},
"manufacturerPartNumber": ""
}
],
"attributes": {
"noun": {
"value": ""
},
"modifier": {
"value": ""
},
"entityAttributes": [
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
},
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
}
]
}
}
First we should iterate over whole list and store each 3rd attributes, after that we could change this struct to our desired output:
from typing import Dict, List
source_json = [
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
"attributes.entityAttributes[2].attributeName"
]
def accumulate(source: List) -> Dict:
accumulator = {}
for v in source:
vs = v.split(".")
root_attribute = vs[0]
if not root_attribute in accumulator:
accumulator[root_attribute] = {}
i = vs[1].rfind('[')
k = (vs[1][:i], vs[1][i+1:-1])
if not k in accumulator[root_attribute]:
accumulator[root_attribute][k] = {}
accumulator[root_attribute][k][vs[2]] = ""
return accumulator
def get_result(accumulated: Dict) -> Dict:
result = {}
for k, v in accumulated.items():
result[k] = {}
for (entity, idx), v1 in v.items():
if not entity in result[k]:
result[k][entity] = []
if len(v1) == 3:
result[k][entity].append(v1)
return result
print(get_result(accumulate(source_json)))
The output will be:
{
'attributes':
{
'entityAttributes':
[
{
'attributeName': '',
'attributeValue': '',
'attributeUOM': ''
},
{'attributeName': '',
'attributeValue': '',
'attributeUOM': ''
}
]
}
}
In accumulate function we store 3rd level attributes in Dict with (entityAttributes, 0) ... (entityAttributes, 2) keys.
In get_result function we convert Dict with (entityAttributes, 0) ... (entityAttributes, 2) keys to Dict from string to List.
How about something like this:
import re
import json
source_json = [
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
"attributes.entityAttributes[2].attributeName"
]
def to_object(source_json):
def add_attribute(target, attribute_list):
head, tail = attribute_list[0], attribute_list[1:]
if tail:
add_attribute(target.setdefault(head,{}), tail)
else:
target[head] = ''
target = {}
for row in source_json:
add_attribute(target, re.split(r'[\.\[\]]+',row))
return target
print(json.dumps(to_object(source_json), indent=4))
Note that this will not exactly do what you requested. It interprets stores the array also as an object with keys '0' ... '2'. This makes it easier to implement and also more stable. What would you expect, when the input list missed the entries with entityAttributes[0]. Should the list include an empty element or something different. Anyway you save space by not including this element, which works only if you store the array in an object.
None of the answers provided so far strike me as very intuitive. Here's one way
to tackle the problem with three easy-to-understand functions.
Normalize inputs. First we need a function to normalize the inputs strings. Instead of rules-bearing strings like
'foo[0].bar' – where one must understand that integers
in square brackets imply a list – we want a simple tuple
of keys like ('foo', 0, 'bar').
def attribute_to_keys(a):
return tuple(
int(k) if k.isdigit() else k
for k in a.replace('[', '.').replace(']', '').split('.')
)
Build a uniform data structure. Second, we need a function to assemble a data structure consisting of dicts
of dicts of dicts ... all the way down.
def assemble_data(attributes):
data = {}
for a in attributes:
d = data
for k in attribute_to_keys(a):
d = d.setdefault(k, {})
return convert(data)
def convert(d):
# Just a placeholder for now.
return d
Convert the uniform data. Third, we need to implement a real version of the placeholder. Specifically, we
need it to recursively convert the uniform data structure into our ultimate
goal having (a) empty strings at leaf nodes, and (b) lists rather than dicts
whenever the dict keys are all integers. Note that this even fills in empty
list positions with an empty string (a contingency not covered in your problem
description; adjust as needed if you want a different behavior).
def convert(d):
if not d:
return ''
elif all(isinstance(k, int) for k in d):
return [convert(d.get(i)) for i in range(max(d) + 1)]
else:
return {k : convert(v) for k, v in d.items()}
You can use a custom builder class which implements __getattr__ and __getitem__ to gradually build the underlying object. This building can then be triggered by using eval on each of the attribute strings (note: eval is not safe for input from untrusted sources).
The following is an example implementation:
class Builder:
def __init__(self):
self.obj = None
def __getattr__(self, key):
if self.obj is None:
self.obj = {}
return self.obj.setdefault(key, Builder())
def __getitem__(self, index):
if self.obj is None:
self.obj = []
self.obj.extend(Builder() for _ in range(index+1-len(self.obj)))
return self.obj[index]
def convert(self):
if self.obj is None:
return ''
elif isinstance(self.obj, list):
return [v.convert() for v in self.obj]
elif isinstance(self.obj, dict):
return {k: v.convert() for k,v in self.obj.items()}
else:
assert False
attributes = [
'itemUniqueId',
'itemDescription',
'manufacturerInfo[0].manufacturer.value',
'manufacturerInfo[0].manufacturerPartNumber',
'attributes.noun.value',
'attributes.modifier.value',
'attributes.entityAttributes[0].attributeName',
'attributes.entityAttributes[0].attributeValue',
'attributes.entityAttributes[0].attributeUOM',
'attributes.entityAttributes[1].attributeName',
'attributes.entityAttributes[1].attributeValue',
'attributes.entityAttributes[1].attributeUOM',
]
builder = Builder()
for attr in attributes:
eval(f'builder.{attr}')
result = builder.convert()
import json
print(json.dumps(result, indent=4))
which gives the following output:
{
"itemUniqueId": "",
"itemDescription": "",
"manufacturerInfo": [
{
"manufacturer": {
"value": ""
},
"manufacturerPartNumber": ""
}
],
"attributes": {
"noun": {
"value": ""
},
"modifier": {
"value": ""
},
"entityAttributes": [
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
},
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
}
]
}
}
I am trying to create a JSON file from two JSON files. Here I am reading the key value pairs from input.json and searching the matches in the secondary.json file and finally dumping the output to a new json file.
In the output of test.py I am expecting
{'tire1': {'source': ['test1', 'test2', 'test3']},
'tire6': {'source': ['test10', 'test21', 'test33']}}
instead of
{'tire1': {'source': ['test10', 'test21', 'test33']},
'tire6': {'source': ['test10', 'test21', 'test33']}}
But do not know what's wrong.
test.py
import json
import re
def findkeysvalues(inputDict, key):
if isinstance(inputDict, list):
for i in inputDict:
for x in findkeysvalues(i, key):
yield x
if isinstance(inputDict, dict):
if key in inputDict:
yield inputDict[key]
for j in inputDict.values():
for x in findkeysvalues(j, key):
yield x
def process_JSON_value(jsonFileInput, parentInputKey, key):
with open(jsonFileInput) as jsonFile:
data = json.load(jsonFile)
Dict = { }
for i in data:
if i == parentInputKey:
Dict[i] = data[i]
return list(findkeysvalues(Dict, key))
def createRulesJSON():
with open("input.json") as jsonFile:
data = json.load(jsonFile)
Dict = { }
rules_items_source = list(findkeysvalues(data, "source"))
for p in data:
Dict[p] = { }
for i in rules_items_source:
x = re.findall("\w+", i[0])
sourceItems = process_JSON_value("secondary.json", x[0], "compname")
Dict[p]['source'] = sourceItems
print(Dict)
createRulesJSON()
input.json
{
"tire1": {
"source": [ "{{ 'TEX' | YYYYYYY | join }}" ],
"dest": [ "{{ Microservice.host }}" ],
"port": "555"
},
"tire6": {
"source": [ "{{ 'REP' | LLLLLL | join }}" ],
"dest": [ "{{ Microservice.host2 }}" ],
"port": "555"
}
}
secondary.json
{
"client": {
"name": "anyname"
},
"PEP": {
"tire2": {
"tire3": {
"compname": "test1"
},
"tire4": {
"compname": "test2"
},
"tire5": {
"compname": "test3"
}
}
},
"REP": {
"tire2": {
"cmpname": "vendor1",
"tire3": {
"compname": "test10"
},
"tire4": {
"compname": "test21"
},
"tire5": {
"compname": "test33"
}
}
},
"Microservice": {
"host": "ttttttttttttttttttttt",
"host2": "GGGGGGGGGGGGGGGGGGGGGGGG"
}
}
Two issues:
Your nested loops in createRulesJSON create a Cartesian product on data. The first loop gets all keys from the data, and the nested loop extracts the three-letter code from all data. So you will combine one key with a code that was extracted from the other key's data. There is no attempt to keep these two informations associated, yet that is what you need.
To fix that, change this:
rules_items_source = list(findkeysvalues(data, "source"))
for p in data:
To:
for p in data:
rules_items_source = list(findkeysvalues(data[p], "source"))
From the expected output it seems that you want to map the code "TEX" (in the first file) with the code "PEP" (in the second file). There is nothing that maps these two codes to eachother.
To fix that, I will just assume that you'll correct in one of your files the code to match the other code.
I'm trying to create a Python function to convert the lists (Objects of arrays in ELK term) to dictionary. I found a sample Ruby function which does that and I'm trying to convert it to Python function for my usage. I'm finding hard time to get the output. The output will be inserted back to Elastic Search.
Ruby Function - I found in Internet
def arrays_to_hash(h)
h.each do |k,v|
# If v is nil, an array is being iterated and the value is k.
# If v is not nil, a hash is being iterated and the value is v.
value = v || k
if value.is_a?(Array)
# "value" is replaced with "value_hash" later.
value_hash = {}
value.each_with_index do |v, i|
value_hash[i.to_s] = v
end
h[k] = value_hash
end
if value.is_a?(Hash) || value.is_a?(Array)
arrays_to_hash(value)
end
end
end
Python Function - I'm trying - Upon seeing the O/P i can see the first list inside the dictionary is getting converted but the nested list inside that is still present
def array_path(my_dict):
for k,v in my_dict.items():
if isinstance(v,list):
print (len(v))
for i, item in enumerate(v):
my_dict2[str(i)] = item
my_dict[k] = my_dict2
elif isinstance(v,dict):
array_path(v)
else:
my_dict[k] = v
Input
{
"foo": "bar",
"test": {
"steps": [
{
"response_time": "100"
},
{
"response_time": "101",
"more_nested": [
{
"hello": "world"
},
{
"hello2": "world2"
}
]
}
]
}
}
**
Expected Output
**
{
"foo": "bar",
"test": {
"steps": {
"0": {
"response_time": "100"
},
"1": {
"response_time": "101",
"more_nested": {
"0": {
"hello": "world"
},
"1": {
"hello2": "world2"
}
}
}
}
}
}
Current O/P
{'0': {'response_time': '100'},
'1': {'more_nested': [{'hello': 'world'}, {'hello2': 'world2'}],
'response_time': '101'}}
the original script stopped its check to list, not implementing a solution for a list of dicts. looks ok now
def array_path(my_dict):
if type(my_dict) is dict:
for k, v in my_dict.items():
my_dict[k] = array_path(v)
elif type(my_dict) is list:
return {str(i): array_path(item) for i, item in enumerate(my_dict)}
return my_dict