I am trying to create a complex object based on metadata I have. It is an array of attributes which I am iterating and trying to create a dict. For example below is the array:
[
"itemUniqueId",
"itemDescription",
"manufacturerInfo[0].manufacturer.value",
"manufacturerInfo[0].manufacturerPartNumber",
"attributes.noun.value",
"attributes.modifier.value",
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
]
This array should give an output as below:
{
"itemUniqueId": "",
"itemDescription": "",
"manufacturerInfo": [
{
"manufacturer": {
"value": ""
},
"manufacturerPartNumber": ""
}
],
"attributes": {
"noun": {
"value": ""
},
"modifier": {
"value": ""
},
"entityAttributes": [
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
},
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
}
]
}
}
I have written this logic but unable to get the desired output. It should work on both object and array given the metadata.
source_json = [
"itemUniqueId",
"itemDescription",
"manufacturerInfo[0].manufacturer.value",
"manufacturerInfo[0].manufacturerPartNumber",
"attributes.noun.value",
"attributes.modifier.value",
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
]
for row in source_json:
propertyNames = row.split('.')
temp = ''
parent = {}
parentArr = []
parentObj = {}
# if len(propertyNames) > 1:
arrLength = len(propertyNames)
for i, (current) in enumerate(zip(propertyNames)):
if i == 0:
if '[' in current:
parent[current]=parentArr
else:
parent[current] = parentObj
temp = current
if i > 0 and i < arrLength - 1:
if '[' in current:
parent[current] = parentArr
else:
parent[current] = parentObj
temp = current
if i == arrLength - 1:
if '[' in current:
parent[current] = parentArr
else:
parent[current] = parentObj
temp = current
# temp[prev][current] = ""
# finalMapping[target] = target
print(parent)
There's a similar question at Convert Dot notation string into nested Python object with Dictionaries and arrays where the accepted answer works for this question, but has unused code paths (e.g. isInArray) and caters to unconventional conversions expected by that question:
❓ "arrOne[0]": "1,2,3" → "arrOne": ["1", "2", "3"] instead of
✅ "arrOne[0]": "1,2,3" → "arrOne": ["1,2,3"] or
✅ "arrOne[0]": "1", "arrOne[1]": "2", "arrOne[2]": "3" → "arrOne": ["1", "2", "3"]
Here's a refined implementation of the branch function:
def branch(tree, path, value):
key = path[0]
array_index_match = re.search(r'\[([0-9]+)\]', key)
if array_index_match:
# Get the array index, and remove the match from the key
array_index = int(array_index_match[0].replace('[', '').replace(']', ''))
key = key.replace(array_index_match[0], '')
# Prepare the array at the key
if key not in tree:
tree[key] = []
# Prepare the object at the array index
if array_index == len(tree[key]):
tree[key].append({})
# Replace the object at the array index
tree[key][array_index] = value if len(path) == 1 else branch(tree[key][array_index], path[1:], value)
else:
# Prepare the object at the key
if key not in tree:
tree[key] = {}
# Replace the object at the key
tree[key] = value if len(path) == 1 else branch(tree[key], path[1:], value)
return tree
Usage:
VALUE = ''
def create_dict(attributes):
d = {}
for path_str in attributes:
branch(d, path_str.split('.'), VALUE)
return d
source_json = [
"itemUniqueId",
"itemDescription",
"manufacturerInfo[0].manufacturer.value",
"manufacturerInfo[0].manufacturerPartNumber",
"attributes.noun.value",
"attributes.modifier.value",
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
]
assert create_dict(source_json) == {
"itemUniqueId": "",
"itemDescription": "",
"manufacturerInfo": [
{
"manufacturer": {
"value": ""
},
"manufacturerPartNumber": ""
}
],
"attributes": {
"noun": {
"value": ""
},
"modifier": {
"value": ""
},
"entityAttributes": [
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
},
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
}
]
}
}
First we should iterate over whole list and store each 3rd attributes, after that we could change this struct to our desired output:
from typing import Dict, List
source_json = [
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
"attributes.entityAttributes[2].attributeName"
]
def accumulate(source: List) -> Dict:
accumulator = {}
for v in source:
vs = v.split(".")
root_attribute = vs[0]
if not root_attribute in accumulator:
accumulator[root_attribute] = {}
i = vs[1].rfind('[')
k = (vs[1][:i], vs[1][i+1:-1])
if not k in accumulator[root_attribute]:
accumulator[root_attribute][k] = {}
accumulator[root_attribute][k][vs[2]] = ""
return accumulator
def get_result(accumulated: Dict) -> Dict:
result = {}
for k, v in accumulated.items():
result[k] = {}
for (entity, idx), v1 in v.items():
if not entity in result[k]:
result[k][entity] = []
if len(v1) == 3:
result[k][entity].append(v1)
return result
print(get_result(accumulate(source_json)))
The output will be:
{
'attributes':
{
'entityAttributes':
[
{
'attributeName': '',
'attributeValue': '',
'attributeUOM': ''
},
{'attributeName': '',
'attributeValue': '',
'attributeUOM': ''
}
]
}
}
In accumulate function we store 3rd level attributes in Dict with (entityAttributes, 0) ... (entityAttributes, 2) keys.
In get_result function we convert Dict with (entityAttributes, 0) ... (entityAttributes, 2) keys to Dict from string to List.
How about something like this:
import re
import json
source_json = [
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
"attributes.entityAttributes[2].attributeName"
]
def to_object(source_json):
def add_attribute(target, attribute_list):
head, tail = attribute_list[0], attribute_list[1:]
if tail:
add_attribute(target.setdefault(head,{}), tail)
else:
target[head] = ''
target = {}
for row in source_json:
add_attribute(target, re.split(r'[\.\[\]]+',row))
return target
print(json.dumps(to_object(source_json), indent=4))
Note that this will not exactly do what you requested. It interprets stores the array also as an object with keys '0' ... '2'. This makes it easier to implement and also more stable. What would you expect, when the input list missed the entries with entityAttributes[0]. Should the list include an empty element or something different. Anyway you save space by not including this element, which works only if you store the array in an object.
None of the answers provided so far strike me as very intuitive. Here's one way
to tackle the problem with three easy-to-understand functions.
Normalize inputs. First we need a function to normalize the inputs strings. Instead of rules-bearing strings like
'foo[0].bar' – where one must understand that integers
in square brackets imply a list – we want a simple tuple
of keys like ('foo', 0, 'bar').
def attribute_to_keys(a):
return tuple(
int(k) if k.isdigit() else k
for k in a.replace('[', '.').replace(']', '').split('.')
)
Build a uniform data structure. Second, we need a function to assemble a data structure consisting of dicts
of dicts of dicts ... all the way down.
def assemble_data(attributes):
data = {}
for a in attributes:
d = data
for k in attribute_to_keys(a):
d = d.setdefault(k, {})
return convert(data)
def convert(d):
# Just a placeholder for now.
return d
Convert the uniform data. Third, we need to implement a real version of the placeholder. Specifically, we
need it to recursively convert the uniform data structure into our ultimate
goal having (a) empty strings at leaf nodes, and (b) lists rather than dicts
whenever the dict keys are all integers. Note that this even fills in empty
list positions with an empty string (a contingency not covered in your problem
description; adjust as needed if you want a different behavior).
def convert(d):
if not d:
return ''
elif all(isinstance(k, int) for k in d):
return [convert(d.get(i)) for i in range(max(d) + 1)]
else:
return {k : convert(v) for k, v in d.items()}
You can use a custom builder class which implements __getattr__ and __getitem__ to gradually build the underlying object. This building can then be triggered by using eval on each of the attribute strings (note: eval is not safe for input from untrusted sources).
The following is an example implementation:
class Builder:
def __init__(self):
self.obj = None
def __getattr__(self, key):
if self.obj is None:
self.obj = {}
return self.obj.setdefault(key, Builder())
def __getitem__(self, index):
if self.obj is None:
self.obj = []
self.obj.extend(Builder() for _ in range(index+1-len(self.obj)))
return self.obj[index]
def convert(self):
if self.obj is None:
return ''
elif isinstance(self.obj, list):
return [v.convert() for v in self.obj]
elif isinstance(self.obj, dict):
return {k: v.convert() for k,v in self.obj.items()}
else:
assert False
attributes = [
'itemUniqueId',
'itemDescription',
'manufacturerInfo[0].manufacturer.value',
'manufacturerInfo[0].manufacturerPartNumber',
'attributes.noun.value',
'attributes.modifier.value',
'attributes.entityAttributes[0].attributeName',
'attributes.entityAttributes[0].attributeValue',
'attributes.entityAttributes[0].attributeUOM',
'attributes.entityAttributes[1].attributeName',
'attributes.entityAttributes[1].attributeValue',
'attributes.entityAttributes[1].attributeUOM',
]
builder = Builder()
for attr in attributes:
eval(f'builder.{attr}')
result = builder.convert()
import json
print(json.dumps(result, indent=4))
which gives the following output:
{
"itemUniqueId": "",
"itemDescription": "",
"manufacturerInfo": [
{
"manufacturer": {
"value": ""
},
"manufacturerPartNumber": ""
}
],
"attributes": {
"noun": {
"value": ""
},
"modifier": {
"value": ""
},
"entityAttributes": [
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
},
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
}
]
}
}
Related
I have created a var that is equal to t.json. The JSON file is a follows:
{
"groups": {
"customerduy": {
"nonprod": {
"name": "customerduynonprod",
"id": "529646781943",
"owner": "cloudops#coerce.com",
"manager_email": ""
},
"prod": {
"name": "phishing_duyaccountprod",
"id": "241683454720",
"owner": "cloudops#coerce.com",
"manager_email": ""
}
},
"customerduyprod": {
"nonprod": {
"name": "phishing_duyaccountnonprod",
"id": "638968214142",
"owner": "cloudops#coerce.com",
"manager_email": ""
}
},
"ciasuppliergenius": {
"prod": {
"name": "ciasuppliergeniusprod",
"id": "220753788760",
"owner": "cia_developers#coerce.com",
"manager_email": "jarks#coerce.com"
}
}
}
}
my goal was to pars this JSON file and get value for "owner" and output it to a new var. Example below:
t.json = group_map
group_id_aws = group(
group.upper(),
"accounts",
template,
owner = group_map['groups']['prod'],
manager_description = "Groups for teams to access their product accounts.",
The error I keep getting is: KeyError: 'prod'
Owner occurs 4 times, so here is how to get all of them.
import json
# read the json
with open("C:\\test\\test.json") as f:
data = json.load(f)
# get all 4 occurances
owner_1 = data['groups']['customerduy']['nonprod']['owner']
owner_2 = data['groups']['customerduy']['prod']['owner']
owner_3 = data['groups']['customerduyprod']['nonprod']['owner']
owner_4 = data['groups']['ciasuppliergenius']['prod']['owner']
# print results
print(owner_1)
print(owner_2)
print(owner_3)
print(owner_4)
the result:
cloudops#coerce.com
cloudops#coerce.com
cloudops#coerce.com
cia_developers#coerce.com
You get a key error since the key 'prod' is not in 'groups'
What you have is
group_map['groups']['customerduy']['prod']
group_map['groups']['ciasuppliergenius']['prod']
So you will have to extract the 'owner' from each element in the tree:
def s(d,t):
for k,v in d.items():
if t == k:
yield v
try:
for i in s(v,t):
yield i
except:
pass
print(','.join(s(j,'owner')))
If your JSON is loaded in variable data, you can use a recursive function
that deals with the two containers types (dict and list) that can occur
in a JSON file, recursively:
def find_all_values_for_key(d, key, result):
if isinstance(d, dict):
if key in d:
result.append(d[key])
return
for k, v in d.items():
find_all_values_for_key(v, key, result)
elif isinstance(d, list):
for elem in d:
find_all_values_for_key(elem, key, result)
owners = []
find_all_values_for_key(data, 'owner', owners)
print(f'{owners=}')
which gives:
owners=['cloudops#coerce.com', 'cloudops#coerce.com', 'cloudops#coerce.com', 'cia_developers#coerce.com']
This way you don't have to bother with the names of intermediate keys, or in general the structure of your JSON file.
You don't have any lists in your example, but it is trivial to recurse through
them to any dict with an owner key that might "lurk" somewhere nested
under a a list element, so it is better to deal with potential future changes
to the JSON.
I am looking at building lists of lists within a dictionary from an Excel spreadsheet.
My spreadsheet looks like this:
source_item_id
target_item_id
find_sting
replace_sting
source_id1
target_id1
abcd1
efgh1
source_id1
target_id1
ijkl1
mnop1
source_id1
target_id2
abcd2
efgh2
source_id1
target_id2
ijkl2
mnop2
source_id2
target_id3
qrst
uvwx
source_id2
target_id3
yzab
cdef
source_id2
target_id4
ghij
klmn
source_id2
target_id4
opqr
stuv
My output dictionary should looks like this:
{
"source_id1": [{
"target_id1": [{
"find_string": "abcd1",
"replace_string": "efgh1"
},
{
"find_string": "ijkl1",
"replace_string": "mnop1"
}]
},
{
"target_id2": [{
"find_string": "abcd2",
"replace_string": "efgh2"
},
{
"find_string": "ijkl2",
"replace_string": "mnop2"
}]
}],
"source_id2": [{
"target_id3": [{
"find_string": "qrst",
"replace_string": "uvwx"
},
{
"find_string": "yzab",
"replace_string": "cdef"
}]
},
{
"target_id4": [{
"find_string": "ghij",
"replace_string": "klmn"
},
{
"find_string": "opqr",
"replace_string": "stuv"
}]
}]
}
With the following code I only get the last values in each of the lists:
import xlrd
xls_path = r"C:\data\ItemContent.xlsx"
book = xlrd.open_workbook(xls_path)
sheet_find_replace = book.sheet_by_index(1)
find_replace_dict = dict()
for line in range(1, sheet_find_replace.nrows):
source_item_id = sheet_find_replace.cell(line, 0).value
target_item_id = sheet_find_replace.cell(line, 1).value
find_string = sheet_find_replace.cell(line, 2).value
replace_sting = sheet_find_replace.cell(line, 3).value
find_replace_list = [{"find_string": find_string, "replace_sting": replace_sting}]
find_replace_dict[source_item_id] = [target_item_id]
find_replace_dict[source_item_id].append(find_replace_list)
print(find_replace_dict)
--> result
{
"source_id1": ["target_id2", [{
"find_string": "ijkl2",
"replace_sting": "mnop2"
}
]],
"source_id2": ["target_id4", [{
"find_string": "opqr",
"replace_sting": "stuv"
}
]]
}
Your problem is rather complicated by the fact that you have a list of single-key dictionaries as the value of your source ids, but you can follow a pattern of parsing each line for the relevant items and, and then using those to target where you insert appends, or alternatively create new lists:
def process_line(line) -> Tuple[str, str, dict]:
source_item_id = sheet_find_replace.cell(line, 0).value
target_item_id = sheet_find_replace.cell(line, 1).value
find_string = sheet_find_replace.cell(line, 2).value
replace_string = sheet_find_replace.cell(line, 3).value
return source_item_id, target_item_id, {
"find_string": find_string,
"replace_string": replace_string
}
def find_target(target: str, ls: List[dict]) -> int:
# Find the index of the target id in the list
for i in len(ls):
if ls[i].get(target):
return i
return -1 # Or some other marker
import xlrd
xls_path = r"C:\data\ItemContent.xlsx"
book = xlrd.open_workbook(xls_path)
sheet_find_replace = book.sheet_by_index(1)
result_dict = dict()
for line in range(1, sheet_find_replace.nrows):
source, target, replacer = process_line(line)
# You can check here that the above three are correct
source_list = result_dict.get(source, []) # Leverage the default value of the get function
target_idx = find_target(target, source_list)
target_dict = source_list[target_idx] if target_idx >=0 else {}
replace_list = target_dict.get(target, [])
replace_list.append(replacer)
target_dict[target] = replace_list
if target_idx >= 0:
source_list[target_idx] = target_dict
else:
source_list.append(target_dict)
result_dict[source] = source_list
print(result_dict)
I would note that if source_id pointed to a dictionary rather than a list, this could be radically simplified, since we wouldn't need to search through the list for a potentially already-existing list item and then awkwardly replace or append as needed. If you can change this constraint (remember, you can always convert a dictionary to a list downstream), I might consider doing that.
I am trying to update transaction ID from the following json:
{
"locationId": "5115",
"transactions": [
{
"transactionId": "1603804404-5650",
"source": "WEB"
} ]
I have done following code for the same, but it does not update the transaction id, but it inserts the transaction id to the end of block:-
try:
session = requests.Session()
with open(
"sales.json",
"r") as read_file:
payload = json.load(read_file)
payload["transactionId"] = random.randint(0, 5)
with open(
"sales.json",
"w") as read_file:
json.dump(payload, read_file)
Output:-
{
"locationId": "5115",
"transactions": [
{
"transactionId": "1603804404-5650",
"source": "WEB"
} ]
}
'transactionId': 1
}
Expected Outut:-
{
"locationId": "5115",
"transactions": [
{
"transactionId": "1",
"source": "WEB"
} ]
This would do it, but only in your specific case:
payload["transactions"][0]["transactionId"] = xxx
There should be error handling for cases like "transactions" key is not int the dict, or there are no records or there are more than one
also, you will need to assign =str(your_random_number) not the int if you wish to have the record of type string as the desired output suggests
If you just want to find the transactionId key and you don't know exactly where it may exist. You can do-
from collections.abc import Mapping
def update_key(key, new_value, jsondict):
new_dict = {}
for k, v in jsondict.items():
if isinstance(v, Mapping):
# Recursive traverse if value is a dict
new_dict[k] = update_key(key, new_value, v)
elif isinstance(v, list):
# Traverse through all values of list
# Recursively traverse if an element is a dict
new_dict[k] = [update_key(key, new_value, innerv) if isinstance(innerv, Mapping) else innerv for innerv in v]
elif k == key:
# This is the key to replace with new value
new_dict[k] = new_value
else:
# Just a regular value, assign to new dict
new_dict[k] = v
return new_dict
Given a dict-
{
"locationId": "5115",
"transactions": [
{
"transactionId": "1603804404-5650",
"source": "WEB"
} ]
}
You can do-
>>> update_key('transactionId', 5, d)
{'locationId': '5115', 'transactions': [{'transactionId': 5, 'source': 'WEB'}]}
Yes because transactionId is inside transactions node. So your code should be like:
payload["transactions"][0].transactionId = random.randint(0, 5)
or
payload["transactions"][0]["transactionId"] = random.randint(0, 5)
i've got a dot delimited string which I need to convert to Json. This is an example with different types of strings:
my.dictionary.value -> value
my.dictionary.list[0].value -> value
my.dictionary.list[1].value.list[0].value -> value
I have no problems converting the first type of string using a recursive approach:
def put(d, keys, item):
if "." in keys:
key, rest = keys.split(".", 1)
if key not in d:
d[key] = {}
put(d[key], rest, item)
else:
d[keys] = item
But i'm struggling to find a solution for the lists. Is there a library that provides out of the box string to json conversion? Thank you for your time.
AFAIK, there isn't any modules that would do this
Here is a sample code to converted a series of dotted strings into json format. You just have create a new list when you see the pattern [n] in the string that would be used as a key.
import re
import json
def parse_dotted_strlines(strlines):
res= {}
for line in strlines.splitlines():
parse_dotted_str(line, res)
return res
def parse_dotted_str(s, res):
if '.' in s:
key, rest = s.split('.', 1)
# Check if key represents a list
match = re.search(r'(.*)\[(\d)\]$', key)
if match:
# List
key, index = match.groups()
val = res.get(key, {}) or []
assert type(val) == list, f'Cannot set key {key} as of type list as i
t was earlier marked as {type(val)}'
while len(val) <= int(index):
val.append({})
val[index] = parse_dotted_str(rest, {})
res[key] = val
else:
# Dict
res[key] = parse_dotted_str(rest, res.get(key, {}))
elif '->' in s:
key, val = s.split('->')
res[key.strip()] = val.strip()
return res
Sample input and output
lines = """
my.dictionary.value -> value
my.dictionary.list[0].value -> value
my.dictionary.list[1].value.list[0].value -> value
"""
res = parse_dotted_strlines(lines)
print (json.dumps(res, indent=4))
{
"my": {
"dictionary": {
"value": "value",
"list": [
{
"value": "value"
},
{
"value": {
"list": [
{
"value": "value"
}
]
}
}
]
}
}
}
the json package is what you need
import json
mydict = """{
"str1": "str",
"list1": ["list1_str1", "list1_str2"],
"list2": ["list2_str1", "list2_str2", ["list2_str11", "list_str12"]]
}"""
json.loads(mydict)
>> {'str1': 'str',
'list1': ['list1_str1', 'list1_str2'],
'list2': ['list2_str1', 'list2_str2', ['list2_str11', 'list_str12']]}
I'm trying to create a Python function to convert the lists (Objects of arrays in ELK term) to dictionary. I found a sample Ruby function which does that and I'm trying to convert it to Python function for my usage. I'm finding hard time to get the output. The output will be inserted back to Elastic Search.
Ruby Function - I found in Internet
def arrays_to_hash(h)
h.each do |k,v|
# If v is nil, an array is being iterated and the value is k.
# If v is not nil, a hash is being iterated and the value is v.
value = v || k
if value.is_a?(Array)
# "value" is replaced with "value_hash" later.
value_hash = {}
value.each_with_index do |v, i|
value_hash[i.to_s] = v
end
h[k] = value_hash
end
if value.is_a?(Hash) || value.is_a?(Array)
arrays_to_hash(value)
end
end
end
Python Function - I'm trying - Upon seeing the O/P i can see the first list inside the dictionary is getting converted but the nested list inside that is still present
def array_path(my_dict):
for k,v in my_dict.items():
if isinstance(v,list):
print (len(v))
for i, item in enumerate(v):
my_dict2[str(i)] = item
my_dict[k] = my_dict2
elif isinstance(v,dict):
array_path(v)
else:
my_dict[k] = v
Input
{
"foo": "bar",
"test": {
"steps": [
{
"response_time": "100"
},
{
"response_time": "101",
"more_nested": [
{
"hello": "world"
},
{
"hello2": "world2"
}
]
}
]
}
}
**
Expected Output
**
{
"foo": "bar",
"test": {
"steps": {
"0": {
"response_time": "100"
},
"1": {
"response_time": "101",
"more_nested": {
"0": {
"hello": "world"
},
"1": {
"hello2": "world2"
}
}
}
}
}
}
Current O/P
{'0': {'response_time': '100'},
'1': {'more_nested': [{'hello': 'world'}, {'hello2': 'world2'}],
'response_time': '101'}}
the original script stopped its check to list, not implementing a solution for a list of dicts. looks ok now
def array_path(my_dict):
if type(my_dict) is dict:
for k, v in my_dict.items():
my_dict[k] = array_path(v)
elif type(my_dict) is list:
return {str(i): array_path(item) for i, item in enumerate(my_dict)}
return my_dict