i've got a dot delimited string which I need to convert to Json. This is an example with different types of strings:
my.dictionary.value -> value
my.dictionary.list[0].value -> value
my.dictionary.list[1].value.list[0].value -> value
I have no problems converting the first type of string using a recursive approach:
def put(d, keys, item):
if "." in keys:
key, rest = keys.split(".", 1)
if key not in d:
d[key] = {}
put(d[key], rest, item)
else:
d[keys] = item
But i'm struggling to find a solution for the lists. Is there a library that provides out of the box string to json conversion? Thank you for your time.
AFAIK, there isn't any modules that would do this
Here is a sample code to converted a series of dotted strings into json format. You just have create a new list when you see the pattern [n] in the string that would be used as a key.
import re
import json
def parse_dotted_strlines(strlines):
res= {}
for line in strlines.splitlines():
parse_dotted_str(line, res)
return res
def parse_dotted_str(s, res):
if '.' in s:
key, rest = s.split('.', 1)
# Check if key represents a list
match = re.search(r'(.*)\[(\d)\]$', key)
if match:
# List
key, index = match.groups()
val = res.get(key, {}) or []
assert type(val) == list, f'Cannot set key {key} as of type list as i
t was earlier marked as {type(val)}'
while len(val) <= int(index):
val.append({})
val[index] = parse_dotted_str(rest, {})
res[key] = val
else:
# Dict
res[key] = parse_dotted_str(rest, res.get(key, {}))
elif '->' in s:
key, val = s.split('->')
res[key.strip()] = val.strip()
return res
Sample input and output
lines = """
my.dictionary.value -> value
my.dictionary.list[0].value -> value
my.dictionary.list[1].value.list[0].value -> value
"""
res = parse_dotted_strlines(lines)
print (json.dumps(res, indent=4))
{
"my": {
"dictionary": {
"value": "value",
"list": [
{
"value": "value"
},
{
"value": {
"list": [
{
"value": "value"
}
]
}
}
]
}
}
}
the json package is what you need
import json
mydict = """{
"str1": "str",
"list1": ["list1_str1", "list1_str2"],
"list2": ["list2_str1", "list2_str2", ["list2_str11", "list_str12"]]
}"""
json.loads(mydict)
>> {'str1': 'str',
'list1': ['list1_str1', 'list1_str2'],
'list2': ['list2_str1', 'list2_str2', ['list2_str11', 'list_str12']]}
Related
I have a dict list of dicts.
dict_list = [
{"nameClient": "client.name"},
{"emailClient": "client.email"},
{"Document.typeDocument": "client.type_document"},
{"Document.numberDocument": "client.number_document"},
{"Document.docOne.number": "client.docone_number"},
{"Document.docOne.type": "client.docone_type"},
{"Document.docTwo.number": "client.doctwo_number"},
{"Document.docOne.extra.number": "client.docone_extra_number"},
]
I want to create a dict based on key of this dicts and get value from my class based on value of this dicts.
Client Class and Values initialized:
class Client:
def __init__(self, data):
self.name = data['name']
self.email = data['email']
self.type_document = data['type_document']
self.number_document = data['number_document']
self.docone_number = data['docone_number']
self.docone_type = data['docone_type']
self.doctwo_number = data['doctwo_number']
self.docone_extra_number = data['docone_extra_number']
# this will be passed on build_final_dict(FINAL_DICT, i, k, v, client)
client1 = Client({
"name": "Stack",
"email": "xxxxx#xxxx.com",
"type_document": "TPS",
"number_document": "22222222",
"docone_number": "11111111",
"docone_type": "docone type",
"doctwo_number": "doc two number",
"docone_extra_number": "doc extra number",
})
So I started passing to a function the key/value
FINAL_DICT = {}
for i in dict_list:
for k, v in i.items():
build_final_dict(FINAL_DICT, i, k, v, client)
My build_final_dict()
def build_final_dict(FINAL_DICT, i, k, v, client):
if '.' not in k:
FINAL_DICT[k] = getattr(client, v.replace('client.', ''))
else:
subdict = k.split('.')[0] # ex documento
subdict_key = i.items()[0][0]
subdict_key = subdict_key.replace('%s.' % subdict, '')
subdict_value = i.items()[0][1]
subdict_value = subdict_value.replace('client.', '')
if subdict not in FINAL_DICT:
FINAL_DICT[subdict] = dict()
result_value = getattr(client, subdict_value)
if '.' not in subdict_key:
FINAL_DICT[subdict][subdict_key] = result_value
else:
new_subkey = subdict_key.split('.')[0]
new_subvalue = subdict_key.split('.')[1]
if new_subkey not in FINAL_DICT[subdict]:
FINAL_DICT[subdict][new_subkey] = dict()
build_final_dict(FINAL_DICT[subdict][new_subkey], i, new_subvalue, v, client)
Actual Result:
{
"emailClient":"xxxxx#xxxx.com",
"nameClient":"Stack",
"Document":{
"typeDocument":"TPS",
"numberDocument":"22222222",
"docTwo":{
"number":"doc two number"
},
"docOne":{
"type":"docone type",
"extra":"doc extra number", // should continue creating dict within dict...
"number":"11111111"
}
}
}
Most of the dictionary is right. But the "extra" dictionary that I put inside a dictionary (third sub level) did not create a new dictionary and put the final value.
There is the possibility of having infinite sub dictionaries, I need my script to be prepared for that.
Result I expected (based on Client class Data):
{
"emailClient":"xxxxx#xxxx.com",
"nameClient":"Stack",
"Document":{
"typeDocument":"TPS",
"numberDocument":"22222222",
"docTwo":{
"number":"doc two number"
},
"docOne":{
"type":"docone type",
"extra": { "number": "doc extra number" }, // dict, not string
"number":"11111111"
}
}
}
I think, this will help you in transforming the initial JSON to the structure you need. This is in accordance to your original requirement of
I want to create a dict based on key of this dicts and get value from
my class based on value of this dicts.
I would try this in a split approach, to create nested dicts for every dict in your list (if they have nesting in keys), and then merge them together as single unit.
See an example approach here:
from collections.abc import MutableMapping
from functools import reduce
def merge(d1, d2):
# Merge 2 dictionaries -deep.
for k, v in d1.items():
if k in d2:
if all(isinstance(e, MutableMapping) for e in (v, d2[k])):
d2[k] = merge(v, d2[k])
md = d1.copy()
md.update(d2)
return md
def explode_to_dict(key, value):
# Create nested dicts based on the key structure
if "." in key:
p, c = key.rsplit(".", 1)
return explode_to_dict(p, {c: value})
else:
return {key: value}
if __name__ == '__main__':
dict_list = [
{"nameClient": "client.name"},
{"emailClient": "client.email"},
{"Document.typeDocument": "client.type_document"},
{"Document.numberDocument": "client.number_document"},
{"Document.docOne.number": "client.docone_number"},
{"Document.docOne.type": "client.docone_type"},
{"Document.docTwo.number": "client.doctwo_number"},
{"Document.docOne.extra.number": "client.docone_extra_number"},
]
result_dict = {}
result_dict = reduce(merge, [explode_to_dict(*d.popitem()) for d in dict_list])
print(json.dumps(result_dict))
This provides a structure like below(expected):
{
"nameClient": "client.name",
"emailClient": "client.email",
"Document": {
"typeDocument": "client.type_document",
"numberDocument": "client.number_document",
"docOne": {
"number": "client.docone_number",
"type": "client.docone_type",
"extra": {
"number": "client.docone_extra_number"
}
},
"docTwo": {
"number": "client.doctwo_number"
}
}
}
I suppose, you can proceed with your class manipulation from here!
I am trying to create a complex object based on metadata I have. It is an array of attributes which I am iterating and trying to create a dict. For example below is the array:
[
"itemUniqueId",
"itemDescription",
"manufacturerInfo[0].manufacturer.value",
"manufacturerInfo[0].manufacturerPartNumber",
"attributes.noun.value",
"attributes.modifier.value",
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
]
This array should give an output as below:
{
"itemUniqueId": "",
"itemDescription": "",
"manufacturerInfo": [
{
"manufacturer": {
"value": ""
},
"manufacturerPartNumber": ""
}
],
"attributes": {
"noun": {
"value": ""
},
"modifier": {
"value": ""
},
"entityAttributes": [
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
},
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
}
]
}
}
I have written this logic but unable to get the desired output. It should work on both object and array given the metadata.
source_json = [
"itemUniqueId",
"itemDescription",
"manufacturerInfo[0].manufacturer.value",
"manufacturerInfo[0].manufacturerPartNumber",
"attributes.noun.value",
"attributes.modifier.value",
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
]
for row in source_json:
propertyNames = row.split('.')
temp = ''
parent = {}
parentArr = []
parentObj = {}
# if len(propertyNames) > 1:
arrLength = len(propertyNames)
for i, (current) in enumerate(zip(propertyNames)):
if i == 0:
if '[' in current:
parent[current]=parentArr
else:
parent[current] = parentObj
temp = current
if i > 0 and i < arrLength - 1:
if '[' in current:
parent[current] = parentArr
else:
parent[current] = parentObj
temp = current
if i == arrLength - 1:
if '[' in current:
parent[current] = parentArr
else:
parent[current] = parentObj
temp = current
# temp[prev][current] = ""
# finalMapping[target] = target
print(parent)
There's a similar question at Convert Dot notation string into nested Python object with Dictionaries and arrays where the accepted answer works for this question, but has unused code paths (e.g. isInArray) and caters to unconventional conversions expected by that question:
❓ "arrOne[0]": "1,2,3" → "arrOne": ["1", "2", "3"] instead of
✅ "arrOne[0]": "1,2,3" → "arrOne": ["1,2,3"] or
✅ "arrOne[0]": "1", "arrOne[1]": "2", "arrOne[2]": "3" → "arrOne": ["1", "2", "3"]
Here's a refined implementation of the branch function:
def branch(tree, path, value):
key = path[0]
array_index_match = re.search(r'\[([0-9]+)\]', key)
if array_index_match:
# Get the array index, and remove the match from the key
array_index = int(array_index_match[0].replace('[', '').replace(']', ''))
key = key.replace(array_index_match[0], '')
# Prepare the array at the key
if key not in tree:
tree[key] = []
# Prepare the object at the array index
if array_index == len(tree[key]):
tree[key].append({})
# Replace the object at the array index
tree[key][array_index] = value if len(path) == 1 else branch(tree[key][array_index], path[1:], value)
else:
# Prepare the object at the key
if key not in tree:
tree[key] = {}
# Replace the object at the key
tree[key] = value if len(path) == 1 else branch(tree[key], path[1:], value)
return tree
Usage:
VALUE = ''
def create_dict(attributes):
d = {}
for path_str in attributes:
branch(d, path_str.split('.'), VALUE)
return d
source_json = [
"itemUniqueId",
"itemDescription",
"manufacturerInfo[0].manufacturer.value",
"manufacturerInfo[0].manufacturerPartNumber",
"attributes.noun.value",
"attributes.modifier.value",
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
]
assert create_dict(source_json) == {
"itemUniqueId": "",
"itemDescription": "",
"manufacturerInfo": [
{
"manufacturer": {
"value": ""
},
"manufacturerPartNumber": ""
}
],
"attributes": {
"noun": {
"value": ""
},
"modifier": {
"value": ""
},
"entityAttributes": [
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
},
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
}
]
}
}
First we should iterate over whole list and store each 3rd attributes, after that we could change this struct to our desired output:
from typing import Dict, List
source_json = [
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
"attributes.entityAttributes[2].attributeName"
]
def accumulate(source: List) -> Dict:
accumulator = {}
for v in source:
vs = v.split(".")
root_attribute = vs[0]
if not root_attribute in accumulator:
accumulator[root_attribute] = {}
i = vs[1].rfind('[')
k = (vs[1][:i], vs[1][i+1:-1])
if not k in accumulator[root_attribute]:
accumulator[root_attribute][k] = {}
accumulator[root_attribute][k][vs[2]] = ""
return accumulator
def get_result(accumulated: Dict) -> Dict:
result = {}
for k, v in accumulated.items():
result[k] = {}
for (entity, idx), v1 in v.items():
if not entity in result[k]:
result[k][entity] = []
if len(v1) == 3:
result[k][entity].append(v1)
return result
print(get_result(accumulate(source_json)))
The output will be:
{
'attributes':
{
'entityAttributes':
[
{
'attributeName': '',
'attributeValue': '',
'attributeUOM': ''
},
{'attributeName': '',
'attributeValue': '',
'attributeUOM': ''
}
]
}
}
In accumulate function we store 3rd level attributes in Dict with (entityAttributes, 0) ... (entityAttributes, 2) keys.
In get_result function we convert Dict with (entityAttributes, 0) ... (entityAttributes, 2) keys to Dict from string to List.
How about something like this:
import re
import json
source_json = [
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
"attributes.entityAttributes[2].attributeName"
]
def to_object(source_json):
def add_attribute(target, attribute_list):
head, tail = attribute_list[0], attribute_list[1:]
if tail:
add_attribute(target.setdefault(head,{}), tail)
else:
target[head] = ''
target = {}
for row in source_json:
add_attribute(target, re.split(r'[\.\[\]]+',row))
return target
print(json.dumps(to_object(source_json), indent=4))
Note that this will not exactly do what you requested. It interprets stores the array also as an object with keys '0' ... '2'. This makes it easier to implement and also more stable. What would you expect, when the input list missed the entries with entityAttributes[0]. Should the list include an empty element or something different. Anyway you save space by not including this element, which works only if you store the array in an object.
None of the answers provided so far strike me as very intuitive. Here's one way
to tackle the problem with three easy-to-understand functions.
Normalize inputs. First we need a function to normalize the inputs strings. Instead of rules-bearing strings like
'foo[0].bar' – where one must understand that integers
in square brackets imply a list – we want a simple tuple
of keys like ('foo', 0, 'bar').
def attribute_to_keys(a):
return tuple(
int(k) if k.isdigit() else k
for k in a.replace('[', '.').replace(']', '').split('.')
)
Build a uniform data structure. Second, we need a function to assemble a data structure consisting of dicts
of dicts of dicts ... all the way down.
def assemble_data(attributes):
data = {}
for a in attributes:
d = data
for k in attribute_to_keys(a):
d = d.setdefault(k, {})
return convert(data)
def convert(d):
# Just a placeholder for now.
return d
Convert the uniform data. Third, we need to implement a real version of the placeholder. Specifically, we
need it to recursively convert the uniform data structure into our ultimate
goal having (a) empty strings at leaf nodes, and (b) lists rather than dicts
whenever the dict keys are all integers. Note that this even fills in empty
list positions with an empty string (a contingency not covered in your problem
description; adjust as needed if you want a different behavior).
def convert(d):
if not d:
return ''
elif all(isinstance(k, int) for k in d):
return [convert(d.get(i)) for i in range(max(d) + 1)]
else:
return {k : convert(v) for k, v in d.items()}
You can use a custom builder class which implements __getattr__ and __getitem__ to gradually build the underlying object. This building can then be triggered by using eval on each of the attribute strings (note: eval is not safe for input from untrusted sources).
The following is an example implementation:
class Builder:
def __init__(self):
self.obj = None
def __getattr__(self, key):
if self.obj is None:
self.obj = {}
return self.obj.setdefault(key, Builder())
def __getitem__(self, index):
if self.obj is None:
self.obj = []
self.obj.extend(Builder() for _ in range(index+1-len(self.obj)))
return self.obj[index]
def convert(self):
if self.obj is None:
return ''
elif isinstance(self.obj, list):
return [v.convert() for v in self.obj]
elif isinstance(self.obj, dict):
return {k: v.convert() for k,v in self.obj.items()}
else:
assert False
attributes = [
'itemUniqueId',
'itemDescription',
'manufacturerInfo[0].manufacturer.value',
'manufacturerInfo[0].manufacturerPartNumber',
'attributes.noun.value',
'attributes.modifier.value',
'attributes.entityAttributes[0].attributeName',
'attributes.entityAttributes[0].attributeValue',
'attributes.entityAttributes[0].attributeUOM',
'attributes.entityAttributes[1].attributeName',
'attributes.entityAttributes[1].attributeValue',
'attributes.entityAttributes[1].attributeUOM',
]
builder = Builder()
for attr in attributes:
eval(f'builder.{attr}')
result = builder.convert()
import json
print(json.dumps(result, indent=4))
which gives the following output:
{
"itemUniqueId": "",
"itemDescription": "",
"manufacturerInfo": [
{
"manufacturer": {
"value": ""
},
"manufacturerPartNumber": ""
}
],
"attributes": {
"noun": {
"value": ""
},
"modifier": {
"value": ""
},
"entityAttributes": [
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
},
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
}
]
}
}
I have a json file for which I want to remove the $oid and $date and replace the keys like in the example below:
import json
def key_replacer(dictionary):
new_dictionary = {}
for k,v in dictionary.items():
if k in ('$oid', '$date'):
return v
if isinstance(v, dict):
v = key_replacer(v)
new_dictionary[k] = v
return new_dictionary
data = """{
"_id": {
"$oid": "5e7511c45cb29ef48b8cfcff"
},
"description": "some text",
"startDate": {
"$date": "5e7511c45cb29ef48b8cfcff"
},
"completionDate": {
"$date": "2021-01-05T14:59:58.046Z"
},
"videos":[{"$oid":"5ecf6cc19ad2a4dfea993fed"}]
}"""
info = json.loads(data)
refined = key_replacer(info)
new_data = json.dumps(refined)
print(new_data)
Output: {"_id": "5e7511c45cb29ef48b8cfcff", "description": "some text", "startDate": "5e7511c45cb29ef48b8cfcff", "completionDate": "2021-01-05T14:59:58.046Z", "videos": [{"$oid": "5ecf6cc19ad2a4dfea993fed"}]}. It works the way I want until "videos". How could I remove the $ sign for the "videos" part and replace the key like it happens in the other cases? It doesn't get into the contents of the list and I assume this is the cause.
Your original function doesn't account for the case where the dictionary value is a list. By accommodating for that, you will get the desired result
def key_replacer(dictionary):
new_dictionary = {}
for k, v in dictionary.items():
if k in ('$oid', '$date'):
return v
elif isinstance(v, dict):
v = key_replacer(v)
elif isinstance(v, list):
tmp = []
for itm in v:
tmp.append(key_replacer(itm))
v = tmp
new_dictionary[k] = v
return new_dictionary
NB: If the items in the list is not dictionaries the code will break, so be mindful of that
I have one json file and i need to list all "selftext" elements of all data.
Any example of it ?
data example
{ "data": [
{
"selftext": "hello there",
"textex": true,
},
If you want to be able to find a key from an arbitrary json at an arbitrary level, you should use recursion:
def findkey(data, key, resul = None):
if resul is None: resul=[] # initialize an empty list for the results
if isinstance(data, list): # walk down into lists
for d in data:
findkey(d, key, resul)
elif isinstance(data, dict): # dict processing
for k,v in data.items():
if (k == key) and isinstance(v, str): # the expected key and a string value?
resul.append(v)
elif isinstance(v, list) or isinstance(v, dict):
findkey(v, key, resul) # recurse if value is a list or a dict
return resul
Example:
>>> data = { "data": [
{
"selftext": "hello there",
"textex": True,
},
]}
>>> findkey(data, 'selftext')
['hello there']
json = '{
"app": {
"Garden": {
"Flowers": {
"Red flower": "Rose",
"White Flower": "Jasmine",
"Yellow Flower": "Marigold"
}
},
"Fruits": {
"Yellow fruit": "Mango",
"Green fruit": "Guava",
"White Flower": "groovy"
},
"Trees": {
"label": {
"Yellow fruit": "Pumpkin",
"White Flower": "Bogan"
}
}
}'
Here is my json string, which keeps on changing frquently so the keys position within the dictionary is not same everytime, i need to
search for a key and print it corresponding value, Since the json string changes everytime I have written an recursive function(See below) to search
for key in the new json string and print the value. However now the situation is we have same key multiple times with diff values, how can
i get the complete path of the key so it would be easier to understand which key value it is, for example the result should be like this:
app.Garden.Flowers.white Flower = Jasmine
app.Fruits.White Flower = groovy
app.Trees.label.White Flower = Bogan
My code so far:
import json
with open('data.json') as data_file:
j = json.load(data_file)
# j=json.loads(a)
def find(element, JSON):
if element in JSON:
print JSON[element].encode('utf-8')
for key in JSON:
if isinstance(JSON[key], dict):
find(element, JSON[key])
find(element to search,j)
You could add a string parameter that keeps track of the current JSON path. Something like the following could work:
def find(element, JSON, path, all_paths):
if element in JSON:
path = path + element + ' = ' + JSON[element].encode('utf-8')
print path
all_paths.append(path)
for key in JSON:
if isinstance(JSON[key], dict):
find(element, JSON[key],path + key + '.',all_paths)
You would call it like this:
all_paths = []
find(element_to_search,j,'',all_paths)
def getDictValueFromPath(listKeys, jsonData):
"""
>>> mydict = {
'a': {
'b': {
'c': '1'
}
}
}
>>> mykeys = ['a', 'b']
>>> getDictValueFromPath(mykeys, mydict)
{'c': '1'}
"""
localData = jsonData.copy()
for k in listKeys:
try:
localData = localData[k]
except:
return None
return localData
gist
The following code snippet will give a list of paths that are accessible in the JSON. It's following the convention of JSON paths where [] signifies that the path is a list.
def get_paths(source):
paths = []
if isinstance(source, collections.abc.MutableMapping):
for k, v in source.items():
if k not in paths:
paths.append(k)
for x in get_paths(v):
if k + '.' + x not in paths:
paths.append(k + '.' + x)
elif isinstance(source, collections.abc.Sequence) and not isinstance(source, str):
for x in source:
for y in get_paths(x):
if '[].' + y not in paths:
paths.append('[].' + y)
return paths
Here is a modified version of Brian's answer that supports lists and returns the result:
def find(element, JSON, path='', all_paths=None):
all_paths = [] if all_paths is None else all_paths
if isinstance(JSON, dict):
for key, value in JSON.items():
find(element, value, '{}["{}"]'.format(path, key), all_paths)
elif isinstance(JSON, list):
for index, value in enumerate(JSON):
find(element, value, '{}[{}]'.format(path, index), all_paths)
else:
if JSON == element:
all_paths.append(path)
return all_paths
Usage:
find(JSON, element)