Replacing keys when dict value is a list - python

I have a json file for which I want to remove the $oid and $date and replace the keys like in the example below:
import json
def key_replacer(dictionary):
new_dictionary = {}
for k,v in dictionary.items():
if k in ('$oid', '$date'):
return v
if isinstance(v, dict):
v = key_replacer(v)
new_dictionary[k] = v
return new_dictionary
data = """{
"_id": {
"$oid": "5e7511c45cb29ef48b8cfcff"
},
"description": "some text",
"startDate": {
"$date": "5e7511c45cb29ef48b8cfcff"
},
"completionDate": {
"$date": "2021-01-05T14:59:58.046Z"
},
"videos":[{"$oid":"5ecf6cc19ad2a4dfea993fed"}]
}"""
info = json.loads(data)
refined = key_replacer(info)
new_data = json.dumps(refined)
print(new_data)
Output: {"_id": "5e7511c45cb29ef48b8cfcff", "description": "some text", "startDate": "5e7511c45cb29ef48b8cfcff", "completionDate": "2021-01-05T14:59:58.046Z", "videos": [{"$oid": "5ecf6cc19ad2a4dfea993fed"}]}. It works the way I want until "videos". How could I remove the $ sign for the "videos" part and replace the key like it happens in the other cases? It doesn't get into the contents of the list and I assume this is the cause.

Your original function doesn't account for the case where the dictionary value is a list. By accommodating for that, you will get the desired result
def key_replacer(dictionary):
new_dictionary = {}
for k, v in dictionary.items():
if k in ('$oid', '$date'):
return v
elif isinstance(v, dict):
v = key_replacer(v)
elif isinstance(v, list):
tmp = []
for itm in v:
tmp.append(key_replacer(itm))
v = tmp
new_dictionary[k] = v
return new_dictionary
NB: If the items in the list is not dictionaries the code will break, so be mindful of that

Related

How create dict inside dict by string using dot

I have a dict list of dicts.
dict_list = [
{"nameClient": "client.name"},
{"emailClient": "client.email"},
{"Document.typeDocument": "client.type_document"},
{"Document.numberDocument": "client.number_document"},
{"Document.docOne.number": "client.docone_number"},
{"Document.docOne.type": "client.docone_type"},
{"Document.docTwo.number": "client.doctwo_number"},
{"Document.docOne.extra.number": "client.docone_extra_number"},
]
I want to create a dict based on key of this dicts and get value from my class based on value of this dicts.
Client Class and Values initialized:
class Client:
def __init__(self, data):
self.name = data['name']
self.email = data['email']
self.type_document = data['type_document']
self.number_document = data['number_document']
self.docone_number = data['docone_number']
self.docone_type = data['docone_type']
self.doctwo_number = data['doctwo_number']
self.docone_extra_number = data['docone_extra_number']
# this will be passed on build_final_dict(FINAL_DICT, i, k, v, client)
client1 = Client({
"name": "Stack",
"email": "xxxxx#xxxx.com",
"type_document": "TPS",
"number_document": "22222222",
"docone_number": "11111111",
"docone_type": "docone type",
"doctwo_number": "doc two number",
"docone_extra_number": "doc extra number",
})
So I started passing to a function the key/value
FINAL_DICT = {}
for i in dict_list:
for k, v in i.items():
build_final_dict(FINAL_DICT, i, k, v, client)
My build_final_dict()
def build_final_dict(FINAL_DICT, i, k, v, client):
if '.' not in k:
FINAL_DICT[k] = getattr(client, v.replace('client.', ''))
else:
subdict = k.split('.')[0] # ex documento
subdict_key = i.items()[0][0]
subdict_key = subdict_key.replace('%s.' % subdict, '')
subdict_value = i.items()[0][1]
subdict_value = subdict_value.replace('client.', '')
if subdict not in FINAL_DICT:
FINAL_DICT[subdict] = dict()
result_value = getattr(client, subdict_value)
if '.' not in subdict_key:
FINAL_DICT[subdict][subdict_key] = result_value
else:
new_subkey = subdict_key.split('.')[0]
new_subvalue = subdict_key.split('.')[1]
if new_subkey not in FINAL_DICT[subdict]:
FINAL_DICT[subdict][new_subkey] = dict()
build_final_dict(FINAL_DICT[subdict][new_subkey], i, new_subvalue, v, client)
Actual Result:
{
"emailClient":"xxxxx#xxxx.com",
"nameClient":"Stack",
"Document":{
"typeDocument":"TPS",
"numberDocument":"22222222",
"docTwo":{
"number":"doc two number"
},
"docOne":{
"type":"docone type",
"extra":"doc extra number", // should continue creating dict within dict...
"number":"11111111"
}
}
}
Most of the dictionary is right. But the "extra" dictionary that I put inside a dictionary (third sub level) did not create a new dictionary and put the final value.
There is the possibility of having infinite sub dictionaries, I need my script to be prepared for that.
Result I expected (based on Client class Data):
{
"emailClient":"xxxxx#xxxx.com",
"nameClient":"Stack",
"Document":{
"typeDocument":"TPS",
"numberDocument":"22222222",
"docTwo":{
"number":"doc two number"
},
"docOne":{
"type":"docone type",
"extra": { "number": "doc extra number" }, // dict, not string
"number":"11111111"
}
}
}
I think, this will help you in transforming the initial JSON to the structure you need. This is in accordance to your original requirement of
I want to create a dict based on key of this dicts and get value from
my class based on value of this dicts.
I would try this in a split approach, to create nested dicts for every dict in your list (if they have nesting in keys), and then merge them together as single unit.
See an example approach here:
from collections.abc import MutableMapping
from functools import reduce
def merge(d1, d2):
# Merge 2 dictionaries -deep.
for k, v in d1.items():
if k in d2:
if all(isinstance(e, MutableMapping) for e in (v, d2[k])):
d2[k] = merge(v, d2[k])
md = d1.copy()
md.update(d2)
return md
def explode_to_dict(key, value):
# Create nested dicts based on the key structure
if "." in key:
p, c = key.rsplit(".", 1)
return explode_to_dict(p, {c: value})
else:
return {key: value}
if __name__ == '__main__':
dict_list = [
{"nameClient": "client.name"},
{"emailClient": "client.email"},
{"Document.typeDocument": "client.type_document"},
{"Document.numberDocument": "client.number_document"},
{"Document.docOne.number": "client.docone_number"},
{"Document.docOne.type": "client.docone_type"},
{"Document.docTwo.number": "client.doctwo_number"},
{"Document.docOne.extra.number": "client.docone_extra_number"},
]
result_dict = {}
result_dict = reduce(merge, [explode_to_dict(*d.popitem()) for d in dict_list])
print(json.dumps(result_dict))
This provides a structure like below(expected):
{
"nameClient": "client.name",
"emailClient": "client.email",
"Document": {
"typeDocument": "client.type_document",
"numberDocument": "client.number_document",
"docOne": {
"number": "client.docone_number",
"type": "client.docone_type",
"extra": {
"number": "client.docone_extra_number"
}
},
"docTwo": {
"number": "client.doctwo_number"
}
}
}
I suppose, you can proceed with your class manipulation from here!

Having an issue parsing through this json in python

I have created a var that is equal to t.json. The JSON file is a follows:
{
"groups": {
"customerduy": {
"nonprod": {
"name": "customerduynonprod",
"id": "529646781943",
"owner": "cloudops#coerce.com",
"manager_email": ""
},
"prod": {
"name": "phishing_duyaccountprod",
"id": "241683454720",
"owner": "cloudops#coerce.com",
"manager_email": ""
}
},
"customerduyprod": {
"nonprod": {
"name": "phishing_duyaccountnonprod",
"id": "638968214142",
"owner": "cloudops#coerce.com",
"manager_email": ""
}
},
"ciasuppliergenius": {
"prod": {
"name": "ciasuppliergeniusprod",
"id": "220753788760",
"owner": "cia_developers#coerce.com",
"manager_email": "jarks#coerce.com"
}
}
}
}
my goal was to pars this JSON file and get value for "owner" and output it to a new var. Example below:
t.json = group_map
group_id_aws = group(
group.upper(),
"accounts",
template,
owner = group_map['groups']['prod'],
manager_description = "Groups for teams to access their product accounts.",
The error I keep getting is: KeyError: 'prod'
Owner occurs 4 times, so here is how to get all of them.
import json
# read the json
with open("C:\\test\\test.json") as f:
data = json.load(f)
# get all 4 occurances
owner_1 = data['groups']['customerduy']['nonprod']['owner']
owner_2 = data['groups']['customerduy']['prod']['owner']
owner_3 = data['groups']['customerduyprod']['nonprod']['owner']
owner_4 = data['groups']['ciasuppliergenius']['prod']['owner']
# print results
print(owner_1)
print(owner_2)
print(owner_3)
print(owner_4)
the result:
cloudops#coerce.com
cloudops#coerce.com
cloudops#coerce.com
cia_developers#coerce.com
You get a key error since the key 'prod' is not in 'groups'
What you have is
group_map['groups']['customerduy']['prod']
group_map['groups']['ciasuppliergenius']['prod']
So you will have to extract the 'owner' from each element in the tree:
def s(d,t):
for k,v in d.items():
if t == k:
yield v
try:
for i in s(v,t):
yield i
except:
pass
print(','.join(s(j,'owner')))
If your JSON is loaded in variable data, you can use a recursive function
that deals with the two containers types (dict and list) that can occur
in a JSON file, recursively:
def find_all_values_for_key(d, key, result):
if isinstance(d, dict):
if key in d:
result.append(d[key])
return
for k, v in d.items():
find_all_values_for_key(v, key, result)
elif isinstance(d, list):
for elem in d:
find_all_values_for_key(elem, key, result)
owners = []
find_all_values_for_key(data, 'owner', owners)
print(f'{owners=}')
which gives:
owners=['cloudops#coerce.com', 'cloudops#coerce.com', 'cloudops#coerce.com', 'cia_developers#coerce.com']
This way you don't have to bother with the names of intermediate keys, or in general the structure of your JSON file.
You don't have any lists in your example, but it is trivial to recurse through
them to any dict with an owner key that might "lurk" somewhere nested
under a a list element, so it is better to deal with potential future changes
to the JSON.

Convert dot delimited string to json Python

i've got a dot delimited string which I need to convert to Json. This is an example with different types of strings:
my.dictionary.value -> value
my.dictionary.list[0].value -> value
my.dictionary.list[1].value.list[0].value -> value
I have no problems converting the first type of string using a recursive approach:
def put(d, keys, item):
if "." in keys:
key, rest = keys.split(".", 1)
if key not in d:
d[key] = {}
put(d[key], rest, item)
else:
d[keys] = item
But i'm struggling to find a solution for the lists. Is there a library that provides out of the box string to json conversion? Thank you for your time.
AFAIK, there isn't any modules that would do this
Here is a sample code to converted a series of dotted strings into json format. You just have create a new list when you see the pattern [n] in the string that would be used as a key.
import re
import json
def parse_dotted_strlines(strlines):
res= {}
for line in strlines.splitlines():
parse_dotted_str(line, res)
return res
def parse_dotted_str(s, res):
if '.' in s:
key, rest = s.split('.', 1)
# Check if key represents a list
match = re.search(r'(.*)\[(\d)\]$', key)
if match:
# List
key, index = match.groups()
val = res.get(key, {}) or []
assert type(val) == list, f'Cannot set key {key} as of type list as i
t was earlier marked as {type(val)}'
while len(val) <= int(index):
val.append({})
val[index] = parse_dotted_str(rest, {})
res[key] = val
else:
# Dict
res[key] = parse_dotted_str(rest, res.get(key, {}))
elif '->' in s:
key, val = s.split('->')
res[key.strip()] = val.strip()
return res
Sample input and output
lines = """
my.dictionary.value -> value
my.dictionary.list[0].value -> value
my.dictionary.list[1].value.list[0].value -> value
"""
res = parse_dotted_strlines(lines)
print (json.dumps(res, indent=4))
{
"my": {
"dictionary": {
"value": "value",
"list": [
{
"value": "value"
},
{
"value": {
"list": [
{
"value": "value"
}
]
}
}
]
}
}
}
the json package is what you need
import json
mydict = """{
"str1": "str",
"list1": ["list1_str1", "list1_str2"],
"list2": ["list2_str1", "list2_str2", ["list2_str11", "list_str12"]]
}"""
json.loads(mydict)
>> {'str1': 'str',
'list1': ['list1_str1', 'list1_str2'],
'list2': ['list2_str1', 'list2_str2', ['list2_str11', 'list_str12']]}

Is there any JSON tag filtering example?

I have one json file and i need to list all "selftext" elements of all data.
Any example of it ?
data example
{ "data": [
{
"selftext": "hello there",
"textex": true,
},
If you want to be able to find a key from an arbitrary json at an arbitrary level, you should use recursion:
def findkey(data, key, resul = None):
if resul is None: resul=[] # initialize an empty list for the results
if isinstance(data, list): # walk down into lists
for d in data:
findkey(d, key, resul)
elif isinstance(data, dict): # dict processing
for k,v in data.items():
if (k == key) and isinstance(v, str): # the expected key and a string value?
resul.append(v)
elif isinstance(v, list) or isinstance(v, dict):
findkey(v, key, resul) # recurse if value is a list or a dict
return resul
Example:
>>> data = { "data": [
{
"selftext": "hello there",
"textex": True,
},
]}
>>> findkey(data, 'selftext')
['hello there']

How to generate unique name for each JSON value

I have following JSON, returned from a REST service, where I want to generate a unique names for each value by combining parent keys. For example. name+phone+address+city+name , name+phone+address+city+population+skilled+male and so on.
{
"name": "name",
"phone": "343444444",
"address": {
"lat": 23.444,
"lng": 34.3322,
"city":{
"name": "city name",
"population": {
"skilled": {
"male": 2,
"female": 4
},
"uneducated": {
"male": 20,
"femail": 4
}
}
}
},
"email": "email",
"education": "phd"
}
I want to combine all key names starting from the parent of the JSON tree.
Here is what I am doing
class TestJson
def walk_through(self, json_object):
for k, v in json_object.items():
self.x_path = self.x_path + k
if type(v) is dict:
self.walk_through(v)
else:
print(self.x_path)
self.x_path = ""
This code is printing keys but only starting from the current parent node. I want to combine all keys up to root of the json.
If you ignore the name and phone keys, since they are not ancestors of city name or skilled male and the order of keys is not guaranteed, you can recursively build a flattened dict.
def walk_through(json_object):
d = {}
for k, v in json_object.items():
if isinstance(v, dict):
v = walk_through(v)
for vk, vv in v.items():
d["%s+%s" % (k, vk)] = vv
else:
d[k] = v
return d
print(json.dumps(walk_through(json_object), indent=2))
This prints:
{
"address+city+population+skilled+male": 2,
"name": "name",
"address+lng": 34.3322,
"address+city+name": "city name",
"address+lat": 23.444,
"address+city+population+uneducated+male": 20,
"phone": "343444444",
"address+city+population+uneducated+femail": 4,
"education": "phd",
"email": "email",
"address+city+population+skilled+female": 4
}
Note: this ignores lists an will not find dicts inside them.
If you want to print all keys of your python dict you can do the following:
def print_keys(d):
for key, value in d.iteritems():
print key,
if isinstance(value, dict):
print_keys(value)

Categories

Resources