Want to get acces inner element of json with loop

Want to get acces inner element of json with loop - python

I want to access with loop the inner element alias values of both dims and metrics present in json and appended in separate dimsList and metricsList python lists.
json_obj =
{
"dataset":"246",
"dims":{
"Location":{
"alias":"Location",
"format":""
}
},
"metrics":{
"ToTal_Dwell":[
{
"agg":"sum",
"format":"",
"alias":"ToTal_Dwell"
}
]
},
"filters":"",
"limit":"10"
}
expecting result to be like dimsList = ['Location'] and metricsList = ['ToTal_Dwell']

you can recursively iterate using .items(). every time you see an inner dict you make a recursive call, and an inner list causes a call per inner dict in the list.
try this:
json_obj = {
"dataset": "246",
"dims": {
"Location": {
"alias": "Location",
"format": ""
}
},
"metrics": {
"ToTal_Dwell": [
{
"agg": "sum",
"format": "",
"alias": "ToTal_Dwell"
}
]
},
"filters": "",
"limit": "10"
}
def extract_inner_values(d, key):
results = []
for k, v in d.items():
if k == key:
results.append(v)
if isinstance(v, dict):
results.extend(extract_inner_values(v, key))
if isinstance(v, list):
for inner_d in v:
results.extend(extract_inner_values(inner_d, key))
return results
dimsList = extract_inner_values(json_obj["dims"], "alias")
metricsList = extract_inner_values(json_obj["metrics"], "alias")
print(dimsList)
print(metricsList)
Output:
['Location']
['ToTal_Dwell']

Related

Get value in json file with key while ignoring path

Let's say I have this type of json file:
{
"body": [
{
"content": "abcd"
},
{
"content": "test"
},
{
"paragraph": {
"content": "ok"
}
}
]
}
I want to get every value from "content", but I can't go go through every item in body and use the key "content", bcs it hasn't the same path for every item.

You can try recursion:
dct = {
"body": [
{"content": "abcd"},
{"content": "test"},
{"paragraph": {"content": "ok"}},
]
}
def get_content(o):
if isinstance(o, dict):
for k, v in o.items():
if k == "content":
yield v
else:
yield from get_content(v)
elif isinstance(o, list):
for v in o:
yield from get_content(v)
print(list(get_content(dct)))
Prints:
['abcd', 'test', 'ok']

How to convert nested json to csv with multiple different names?

I've been trying to convert a nested json file to csv. Here is a small example of the json file.
json_data =
{"labels":
{
"longfilename01:png": {
"events": {
"-N8V6uUR__vvB0qv1lPb": {
"t": "2022-08-02T19:54:23.608Z",
"user": "bmEhwNCZT9Wiftgvsopb7vBjO9o1"
}
},
"questions": {
"would-you": {
"-N8V6uUR__vvB0qv1lPb": {
"answer": "no",
"format": 1
}
}
}
},
"longfilename02:png": {
"events": {
"-N8ILnaH-1ylwp2LGvtP": {
"t": "2022-07-31T08:24:23.698Z",
"user": "Qf7C5cXQkXfQanxKPR0rsKW4QzE2"
}
},
"questions": {
"would-you": {
"-N8ILnaH-1ylwp2LGvtP": {
"answer": "yes",
"format": 1
}
}
}
}
I've tried multiple ways to get this output:
Labels
Event
User
Time
Answer
Long filename 01
-N8V6uUR__vvB0qv1lPb
bmEhwNCZT9Wiftgvsopb7vBjO9o1
2022-08-02T19:54:23.608Z
no
Long filename 02
-N8ILnaH-1ylwp2LGvtP
bmEhwNCZT9Wiftgvsopb7vBjO9o1
2022-07-31T08:24:23.698Z
yes
If I normalise with:
f= open('after_labels.json')
data = json.load(f)
df = pd.json_normalize(data)
Or try to flatten the file with multiple functions such as:
def flatten_json(json):
def process_value(keys, value, flattened):
if isinstance(value, dict):
for key in value.keys():
process_value(keys + [key], value[key], flattened)
elif isinstance(value, list):
for idx, v in enumerate(value):
process_value(keys + [str(idx)], v, flattened)
else:
flattened['__'.join(keys)] = value
flattened = {}
for key in json.keys():
process_value([key], json[key], flattened)
return flattened
df = flatten_json(data)
or
from copy import deepcopy
import pandas
def cross_join(left, right):
new_rows = [] if right else left
for left_row in left:
for right_row in right:
temp_row = deepcopy(left_row)
for key, value in right_row.items():
temp_row[key] = value
new_rows.append(deepcopy(temp_row))
return new_rows
def flatten_list(data):
for elem in data:
if isinstance(elem, list):
yield from flatten_list(elem)
else:
yield elem
def json_to_dataframe(data_in):
def flatten_json(data, prev_heading=''):
if isinstance(data, dict):
rows = [{}]
for key, value in data.items():
rows = cross_join(rows, flatten_json(value, prev_heading + '.' + key))
elif isinstance(data, list):
rows = []
for item in data:
[rows.append(elem) for elem in flatten_list(flatten_json(item, prev_heading))]
else:
rows = [{prev_heading[1:]: data}]
return rows
return pandas.DataFrame(flatten_json(data_in))
df = json_to_dataframe(data)
print(df)
It gives me 292 columns and I suspect this is because of the long unique filenames.
I can't change the json file before processing, because that seems like the simple solution to do "filename": "longfilename01:png" as they would then all be consistent and I wouldn't have this problem.
I would be grateful for any other clever ideas on how to solve this.

Try:
json_data = {
"labels": {
"longfilename01:png": {
"events": {
"-N8V6uUR__vvB0qv1lPb": {
"t": "2022-08-02T19:54:23.608Z",
"user": "bmEhwNCZT9Wiftgvsopb7vBjO9o1",
}
},
"questions": {
"would-you": {
"-N8V6uUR__vvB0qv1lPb": {"answer": "no", "format": 1}
}
},
},
"longfilename02:png": {
"events": {
"-N8ILnaH-1ylwp2LGvtP": {
"t": "2022-07-31T08:24:23.698Z",
"user": "Qf7C5cXQkXfQanxKPR0rsKW4QzE2",
}
},
"questions": {
"would-you": {
"-N8ILnaH-1ylwp2LGvtP": {"answer": "yes", "format": 1}
}
},
},
}
}
df = pd.DataFrame(
[
{
"Labels": k,
"Event": list(v["events"])[0],
"User": list(v["events"].values())[0]["user"],
"Time": list(v["events"].values())[0]["t"],
"Answer": list(list(v["questions"].values())[0].values())[0][
"answer"
],
}
for k, v in json_data["labels"].items()
]
)
print(df)
Prints:
Labels Event User Time Answer
0 longfilename01:png -N8V6uUR__vvB0qv1lPb bmEhwNCZT9Wiftgvsopb7vBjO9o1 2022-08-02T19:54:23.608Z no
1 longfilename02:png -N8ILnaH-1ylwp2LGvtP Qf7C5cXQkXfQanxKPR0rsKW4QzE2 2022-07-31T08:24:23.698Z yes

Dictionary replacing values using second dictionary (but searching the value rather than the whole value)

I am trying to replace a value in a dict with another value in a dict. This has been done plenty on here from my searches but I think I have a unique case as I need to search the value for a value rather than replace the whole value.
I have tried this so far and it partially works:
input_data = [
{
"name": "system1",
"other_param": "%param1%",
"secret_text": "%secret%",
"options": {
"conditions": "[\"f--updated_at,geq,\\\"%max_date%\\\"\",\"f--status,eq,\\\"Void\\\"\"]"
}
}
]
variables = {
'%secret%': "abc",
'%param1%': "text_param",
'%max_date%': '2018-01-01'
}
def process_variables(dict_obj, dict_vars):
for k, v in dict_obj.items():
for var, val in dict_vars.items():
dict_obj[k] = str(v).replace(var, val)
return dict_obj
for x in input_data:
print(process_variables(x, variables))
The desired result is:
{
"name": "system1",
"other_param": "text_param",
"secret_text": "abc",
"options": {
"conditions": "[\"f--updated_at,geq,\\\"2018-01-01\\\"\",\"f--status,eq,\\\"Void\\\"\"]"
}
}
Actual result:
{
"name":"system1",
"other_param":"%param1%",
"secret_text":"%secret%",
"options":"{\\'conditions\\': \\'[\"f--updated_at,geq,\\\\\"2018-01-01\\\\\"\",\"f--status,eq,\\\\\"Void\\\\\"\"]\\'}"
}
As you can see it seems to flatten the dict down and not replace the top level values.
Is there a better way to achieve this? I can alter the JSON input if needed but I need to do some sort of variable substitution as I have hundreds of inputs with these common values I plan to iterate over.

You could loop over input_data recursively and replace each value separately. Looping recursively has the benefit that you dont need to care how deeply input_data is nested:
def traverse_dict(dct, replacements):
if isinstance(dct, list):
for i, item in enumerate(dct):
dct[i] = traverse_dict(item, variables)
elif isinstance(dct, dict):
for k, v in dct.items():
dct[k] = traverse_dict(v, variables)
else:
# all values in the JSON end up here
if isinstance(dct, str):
for repl in replacements:
if repl in dct:
dct = dct.replace(repl, replacements[repl])
return dct
return dct
result = traverse_dict(input_data, variables)
print(result)
Output:
[
{
"name": "system1",
"options": {
"conditions": "[\"f--updated_at,geq,\\\"2018-01-01\\\"\",\"f--status,eq,\\\"Void\\\"\"]"
},
"other_param": "text_param",
"secret_text": "abc"
}
]
Makeing input_data more complex, still replaces correctly:
input_data = [
{
"name": "system1",
"other_param": "%param1%",
"secret_text": "%secret%",
"options": {
"conditions": "[\"f--updated_at,geq,\\\"%max_date%\\\"\",\"f--status,eq,\\\"Void\\\"\"]",
"foo": [
{
"bar": {
"baz": "%secret%",
}
}
]
}
}
]
Output:
[
{
"name": "system1",
"options": {
"conditions": "[\"f--updated_at,geq,\\\"2018-01-01\\\"\",\"f--status,eq,\\\"Void\\\"\"]",
"foo": [
{
"bar": {
"baz": "abc"
}
}
]
},
"other_param": "text_param",
"secret_text": "abc"
}
]

I was able to achieve the result treating the input data as a string and using ast (Abstract Syntax Trees):
data = str(input_data)
for k, v in variables.items():
data = data.replace(k, v)
import ast
data = ast.literal_eval(data)
print(type(data))
print(data)
Output:
<class 'list'>
[{'name': 'system1', 'other_param': 'text_param', 'secret_text': 'abc', 'options': {'conditions': '["f--updated_at,geq,\\"2018-01-01\\"","f--status,eq,\\"Void\\""]'}}]
Cant vouch for this being the best way but it simplifies the process. Worth a try?

Sorting nested dictionaries using its second key

I am trying to sort a nested dictionary using its second key where my dictionary looks like:
my_dictionary = {
"char": {
"3": {
"genman": [
"motion"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
}
}
And my expected output will be:
my_dictionary = {
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"char": {
"3": {
"genman": [
"motion"
]
}
}
}
Tried using the following code:
new_dict = {}
for k, v in my_dictionary.items():
for s in sorted(my_dictionary.itervalues()):
if not s.keys()[0]:
new_val = my_dictionary[k].get(s.keys()[0])
my_dictionary[s.keys()[0]] = new_val
my_dictionary.update(new_dict)
It fails badly, and I am getting the same result as my initial dictionary.

This works:
sorted(my_dictionary.items(), key=lambda x: list(x[1].keys())[0])
Returns:
[('EMPT', {'0': {}}),
('veh', {'1': {'tankers': ['varA', 'varB']}}),
('fast', {'2': {'empty': []}}),
('char', {'3': {'genman': ['motion']}})]
Sorted receives a list of key-value pairs, we sort using the result of lambda x: list(x[1].keys())[0] which takes a list of the keys in the inner dict, then grabs the first key (need to do this because dict_keys directly is not indexable).
Edit: the result is a list of key, value pairs but it can be fed into an OrderedDict to use it as a dict.

actually there is no order for a dict, however you can use OrderedDIct instead.
from collections import OrderedDict
my_dictionary = {
"char": {
"3": {
"genman": [
"motion"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
}
}
s = sorted((list(v.keys())[0], k) for k, v in my_dictionary.items())
new_dic = OrderedDict([(k,my_dictionary[k]) for _, k in s])

Add the sub dictonary element in list in python

I am trying to add my sub dictionary element in list. It is giving me type error.
Here is dictionary and my code:
{
"key1": "value1",
"key2": {
"skey1": "svalue2",
"skey2": {
"sskey1": [{
"url": "value",
"sid": "511"
},
{
"url": "value",
"sid": "522"
},
{
"url": "value",
"sid": "533"
}]
}
}
}
I want to add the sid into the list like [511,522,533]:
here is my code:
rsId=[]
for i in op['key2']['skey2']['sskey1']:
for k,v in i.items():
if k=='sid':
rsId.append(v)

D = {
"key1":"value1",
"key2":{
"skey1":"svalue2",
"skey2":{
"sskey1":[{
"url":"value",
"sid":"511"
},
{
"url":"value",
"sid":"522"
},
{
"url":"value",
"sid":"533"
} ]
}
}
}
res = []
for i in D['key2']['skey2']['sskey1']:
res.append(i['sid'])
print res
Result:
['511', '522', '533']
or a one line code:
res = [i['sid'] for i in D['key2']['skey2']['sskey1']]

You can use dict comprehension:
rsId = [v for item in op['key2']['skey2']['sskey1'] for k, v in item.items() if k == 'sid']

You can try with one line something like this:
print(list(map(lambda x:x['sid'],data['key2']['skey2']['sskey1'])))
output:
['511', '522', '533']
If you want value in int then:
print(list(map(lambda x:int(x['sid']),data['key2']['skey2']['sskey1'])))
output:
[511, 522, 533]
when data is:
data = {
"key1":"value1",
"key2":{
"skey1":"svalue2",
"skey2":{
"sskey1":[{
"url":"value",
"sid":"511"
},
{
"url":"value",
"sid":"522"
},
{
"url":"value",
"sid":"533"
} ]
}
}
}

Get the int as output
The type error is probably due to the fact that you get a string as item of the list. Let’s see it transforming it to a number wit int() it solves your problem.
The only change to your code is in the last line of code.
op = {
"key1": "value1",
"key2": {
"skey1": "svalue2",
"skey2": {
"sskey1": [{
"url": "value",
"sid": "511"
},
{
"url": "value",
"sid": "522"
},
{
"url": "value",
"sid": "533"
}]
}
}
}
rsId = []
for i in op['key2']['skey2']['sskey1']:
for k, v in i.items():
if k == 'sid':
rsId.append(int(v)) # put the int here
output
>>> rsId
[511, 522, 533]
Another approach: checking every key that has a dictionary as value
op = {
"key1": "value1",
"key2": {
"skey1": "svalue2",
"skey2": {
"sskey1": [
{
"url": "value",
"sid": "511"
},
{
"url": "value",
"sid": "522"
},
{
"url": "value",
"sid": "533"
}
]
}
}
}
l = []
for k in op: # searching in the main dictonary
if type(op[k]) is dict: # if the value contains a dict (sub1)
for k2 in op[k]: # for every key
if type(op[k][k2]) is dict: # if the value is a dict (sub2)
for k3 in op[k][k2]: # for each key of subdict 2
for i in op[k][k2][k3]: # for every item of the list
for k4 in i: # foreach key in the item (a dict)
if k4 == 'sid': # if the key is 'sid'
l.append(int((i[k4]))) # append the value
print(l)
output
[511, 522, 533]

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Want to get acces inner element of json with loop - python

Related

Get value in json file with key while ignoring path

How to convert nested json to csv with multiple different names?

Dictionary replacing values using second dictionary (but searching the value rather than the whole value)

Sorting nested dictionaries using its second key

Add the sub dictonary element in list in python

Categories

Resources