Let's say I have this type of json file:
{
"body": [
{
"content": "abcd"
},
{
"content": "test"
},
{
"paragraph": {
"content": "ok"
}
}
]
}
I want to get every value from "content", but I can't go go through every item in body and use the key "content", bcs it hasn't the same path for every item.
You can try recursion:
dct = {
"body": [
{"content": "abcd"},
{"content": "test"},
{"paragraph": {"content": "ok"}},
]
}
def get_content(o):
if isinstance(o, dict):
for k, v in o.items():
if k == "content":
yield v
else:
yield from get_content(v)
elif isinstance(o, list):
for v in o:
yield from get_content(v)
print(list(get_content(dct)))
Prints:
['abcd', 'test', 'ok']
Related
I've been trying to convert a nested json file to csv. Here is a small example of the json file.
json_data =
{"labels":
{
"longfilename01:png": {
"events": {
"-N8V6uUR__vvB0qv1lPb": {
"t": "2022-08-02T19:54:23.608Z",
"user": "bmEhwNCZT9Wiftgvsopb7vBjO9o1"
}
},
"questions": {
"would-you": {
"-N8V6uUR__vvB0qv1lPb": {
"answer": "no",
"format": 1
}
}
}
},
"longfilename02:png": {
"events": {
"-N8ILnaH-1ylwp2LGvtP": {
"t": "2022-07-31T08:24:23.698Z",
"user": "Qf7C5cXQkXfQanxKPR0rsKW4QzE2"
}
},
"questions": {
"would-you": {
"-N8ILnaH-1ylwp2LGvtP": {
"answer": "yes",
"format": 1
}
}
}
}
I've tried multiple ways to get this output:
Labels
Event
User
Time
Answer
Long filename 01
-N8V6uUR__vvB0qv1lPb
bmEhwNCZT9Wiftgvsopb7vBjO9o1
2022-08-02T19:54:23.608Z
no
Long filename 02
-N8ILnaH-1ylwp2LGvtP
bmEhwNCZT9Wiftgvsopb7vBjO9o1
2022-07-31T08:24:23.698Z
yes
If I normalise with:
f= open('after_labels.json')
data = json.load(f)
df = pd.json_normalize(data)
Or try to flatten the file with multiple functions such as:
def flatten_json(json):
def process_value(keys, value, flattened):
if isinstance(value, dict):
for key in value.keys():
process_value(keys + [key], value[key], flattened)
elif isinstance(value, list):
for idx, v in enumerate(value):
process_value(keys + [str(idx)], v, flattened)
else:
flattened['__'.join(keys)] = value
flattened = {}
for key in json.keys():
process_value([key], json[key], flattened)
return flattened
df = flatten_json(data)
or
from copy import deepcopy
import pandas
def cross_join(left, right):
new_rows = [] if right else left
for left_row in left:
for right_row in right:
temp_row = deepcopy(left_row)
for key, value in right_row.items():
temp_row[key] = value
new_rows.append(deepcopy(temp_row))
return new_rows
def flatten_list(data):
for elem in data:
if isinstance(elem, list):
yield from flatten_list(elem)
else:
yield elem
def json_to_dataframe(data_in):
def flatten_json(data, prev_heading=''):
if isinstance(data, dict):
rows = [{}]
for key, value in data.items():
rows = cross_join(rows, flatten_json(value, prev_heading + '.' + key))
elif isinstance(data, list):
rows = []
for item in data:
[rows.append(elem) for elem in flatten_list(flatten_json(item, prev_heading))]
else:
rows = [{prev_heading[1:]: data}]
return rows
return pandas.DataFrame(flatten_json(data_in))
df = json_to_dataframe(data)
print(df)
It gives me 292 columns and I suspect this is because of the long unique filenames.
I can't change the json file before processing, because that seems like the simple solution to do "filename": "longfilename01:png" as they would then all be consistent and I wouldn't have this problem.
I would be grateful for any other clever ideas on how to solve this.
Try:
json_data = {
"labels": {
"longfilename01:png": {
"events": {
"-N8V6uUR__vvB0qv1lPb": {
"t": "2022-08-02T19:54:23.608Z",
"user": "bmEhwNCZT9Wiftgvsopb7vBjO9o1",
}
},
"questions": {
"would-you": {
"-N8V6uUR__vvB0qv1lPb": {"answer": "no", "format": 1}
}
},
},
"longfilename02:png": {
"events": {
"-N8ILnaH-1ylwp2LGvtP": {
"t": "2022-07-31T08:24:23.698Z",
"user": "Qf7C5cXQkXfQanxKPR0rsKW4QzE2",
}
},
"questions": {
"would-you": {
"-N8ILnaH-1ylwp2LGvtP": {"answer": "yes", "format": 1}
}
},
},
}
}
df = pd.DataFrame(
[
{
"Labels": k,
"Event": list(v["events"])[0],
"User": list(v["events"].values())[0]["user"],
"Time": list(v["events"].values())[0]["t"],
"Answer": list(list(v["questions"].values())[0].values())[0][
"answer"
],
}
for k, v in json_data["labels"].items()
]
)
print(df)
Prints:
Labels Event User Time Answer
0 longfilename01:png -N8V6uUR__vvB0qv1lPb bmEhwNCZT9Wiftgvsopb7vBjO9o1 2022-08-02T19:54:23.608Z no
1 longfilename02:png -N8ILnaH-1ylwp2LGvtP Qf7C5cXQkXfQanxKPR0rsKW4QzE2 2022-07-31T08:24:23.698Z yes
I am trying to replace a value in a dict with another value in a dict. This has been done plenty on here from my searches but I think I have a unique case as I need to search the value for a value rather than replace the whole value.
I have tried this so far and it partially works:
input_data = [
{
"name": "system1",
"other_param": "%param1%",
"secret_text": "%secret%",
"options": {
"conditions": "[\"f--updated_at,geq,\\\"%max_date%\\\"\",\"f--status,eq,\\\"Void\\\"\"]"
}
}
]
variables = {
'%secret%': "abc",
'%param1%': "text_param",
'%max_date%': '2018-01-01'
}
def process_variables(dict_obj, dict_vars):
for k, v in dict_obj.items():
for var, val in dict_vars.items():
dict_obj[k] = str(v).replace(var, val)
return dict_obj
for x in input_data:
print(process_variables(x, variables))
The desired result is:
{
"name": "system1",
"other_param": "text_param",
"secret_text": "abc",
"options": {
"conditions": "[\"f--updated_at,geq,\\\"2018-01-01\\\"\",\"f--status,eq,\\\"Void\\\"\"]"
}
}
Actual result:
{
"name":"system1",
"other_param":"%param1%",
"secret_text":"%secret%",
"options":"{\\'conditions\\': \\'[\"f--updated_at,geq,\\\\\"2018-01-01\\\\\"\",\"f--status,eq,\\\\\"Void\\\\\"\"]\\'}"
}
As you can see it seems to flatten the dict down and not replace the top level values.
Is there a better way to achieve this? I can alter the JSON input if needed but I need to do some sort of variable substitution as I have hundreds of inputs with these common values I plan to iterate over.
You could loop over input_data recursively and replace each value separately. Looping recursively has the benefit that you dont need to care how deeply input_data is nested:
def traverse_dict(dct, replacements):
if isinstance(dct, list):
for i, item in enumerate(dct):
dct[i] = traverse_dict(item, variables)
elif isinstance(dct, dict):
for k, v in dct.items():
dct[k] = traverse_dict(v, variables)
else:
# all values in the JSON end up here
if isinstance(dct, str):
for repl in replacements:
if repl in dct:
dct = dct.replace(repl, replacements[repl])
return dct
return dct
result = traverse_dict(input_data, variables)
print(result)
Output:
[
{
"name": "system1",
"options": {
"conditions": "[\"f--updated_at,geq,\\\"2018-01-01\\\"\",\"f--status,eq,\\\"Void\\\"\"]"
},
"other_param": "text_param",
"secret_text": "abc"
}
]
Makeing input_data more complex, still replaces correctly:
input_data = [
{
"name": "system1",
"other_param": "%param1%",
"secret_text": "%secret%",
"options": {
"conditions": "[\"f--updated_at,geq,\\\"%max_date%\\\"\",\"f--status,eq,\\\"Void\\\"\"]",
"foo": [
{
"bar": {
"baz": "%secret%",
}
}
]
}
}
]
Output:
[
{
"name": "system1",
"options": {
"conditions": "[\"f--updated_at,geq,\\\"2018-01-01\\\"\",\"f--status,eq,\\\"Void\\\"\"]",
"foo": [
{
"bar": {
"baz": "abc"
}
}
]
},
"other_param": "text_param",
"secret_text": "abc"
}
]
I was able to achieve the result treating the input data as a string and using ast (Abstract Syntax Trees):
data = str(input_data)
for k, v in variables.items():
data = data.replace(k, v)
import ast
data = ast.literal_eval(data)
print(type(data))
print(data)
Output:
<class 'list'>
[{'name': 'system1', 'other_param': 'text_param', 'secret_text': 'abc', 'options': {'conditions': '["f--updated_at,geq,\\"2018-01-01\\"","f--status,eq,\\"Void\\""]'}}]
Cant vouch for this being the best way but it simplifies the process. Worth a try?
I have a json in the following format.
json_tree = {
"Gardens": {
"Seaside": {
"#loc": "porch",
"#myID": "1.2.3",
"Tid": "1",
"InfoList": {
"status": {
"#default": "0",
"#myID": "26"
},
"count": {
"#default": "0",
"#myID": "1"
}
},
"BackYard": {
"#myID": "75",
"Tid": "2",
"InfoList": {
"status": {
"#default": "6",
"#myID": "32"
},
"count": {
"#default": "0",
"#myID": "2"
}
}
}
}
}
}
I want to be able to return the "#loc" value when I search for the key 'Seaside" or "Backyard". I want it to be generic as the key could be any string.
Currently, I have the following method and it only returns me the "#loc" when I search for the "Seaside" and None when I search for "BackYard". Note "BackYard" has the same "#loc" as "Seaside" as it is nested inside it.
I am not sure what is missing in this code.
My implementation:
def getLoc(json_tree , key):
for k1,v1 in json_tree.items():
for k,v in v1.items():
if '#loc' in v and v['#loc'] is not None and str(k) == key:
return v['#loc']
The output should be "backyard" for the following example.
getLoc(json_tree, "Seaside")
getLoc(json_tree, "Backyard")
You can use recursion with a generator:
def get_vals(d, _key, _score = None):
for a, b in d.items():
if _key == a:
yield b.get('#loc', _score)
if isinstance(b, dict):
yield from get_vals(b, _key, b.get('#loc', _score))
elif isinstance(b, list):
for i in b:
yield from get_vals(i, _key, b.get('#loc', _score))
print(list(get_vals(json_tree, 'Seaside')))
print(list(get_vals(json_tree, 'BackYard')))
Output:
['porch']
['porch']
I want to access with loop the inner element alias values of both dims and metrics present in json and appended in separate dimsList and metricsList python lists.
json_obj =
{
"dataset":"246",
"dims":{
"Location":{
"alias":"Location",
"format":""
}
},
"metrics":{
"ToTal_Dwell":[
{
"agg":"sum",
"format":"",
"alias":"ToTal_Dwell"
}
]
},
"filters":"",
"limit":"10"
}
expecting result to be like dimsList = ['Location'] and metricsList = ['ToTal_Dwell']
you can recursively iterate using .items(). every time you see an inner dict you make a recursive call, and an inner list causes a call per inner dict in the list.
try this:
json_obj = {
"dataset": "246",
"dims": {
"Location": {
"alias": "Location",
"format": ""
}
},
"metrics": {
"ToTal_Dwell": [
{
"agg": "sum",
"format": "",
"alias": "ToTal_Dwell"
}
]
},
"filters": "",
"limit": "10"
}
def extract_inner_values(d, key):
results = []
for k, v in d.items():
if k == key:
results.append(v)
if isinstance(v, dict):
results.extend(extract_inner_values(v, key))
if isinstance(v, list):
for inner_d in v:
results.extend(extract_inner_values(inner_d, key))
return results
dimsList = extract_inner_values(json_obj["dims"], "alias")
metricsList = extract_inner_values(json_obj["metrics"], "alias")
print(dimsList)
print(metricsList)
Output:
['Location']
['ToTal_Dwell']
I am trying to add my sub dictionary element in list. It is giving me type error.
Here is dictionary and my code:
{
"key1": "value1",
"key2": {
"skey1": "svalue2",
"skey2": {
"sskey1": [{
"url": "value",
"sid": "511"
},
{
"url": "value",
"sid": "522"
},
{
"url": "value",
"sid": "533"
}]
}
}
}
I want to add the sid into the list like [511,522,533]:
here is my code:
rsId=[]
for i in op['key2']['skey2']['sskey1']:
for k,v in i.items():
if k=='sid':
rsId.append(v)
D = {
"key1":"value1",
"key2":{
"skey1":"svalue2",
"skey2":{
"sskey1":[{
"url":"value",
"sid":"511"
},
{
"url":"value",
"sid":"522"
},
{
"url":"value",
"sid":"533"
} ]
}
}
}
res = []
for i in D['key2']['skey2']['sskey1']:
res.append(i['sid'])
print res
Result:
['511', '522', '533']
or a one line code:
res = [i['sid'] for i in D['key2']['skey2']['sskey1']]
You can use dict comprehension:
rsId = [v for item in op['key2']['skey2']['sskey1'] for k, v in item.items() if k == 'sid']
You can try with one line something like this:
print(list(map(lambda x:x['sid'],data['key2']['skey2']['sskey1'])))
output:
['511', '522', '533']
If you want value in int then:
print(list(map(lambda x:int(x['sid']),data['key2']['skey2']['sskey1'])))
output:
[511, 522, 533]
when data is:
data = {
"key1":"value1",
"key2":{
"skey1":"svalue2",
"skey2":{
"sskey1":[{
"url":"value",
"sid":"511"
},
{
"url":"value",
"sid":"522"
},
{
"url":"value",
"sid":"533"
} ]
}
}
}
Get the int as output
The type error is probably due to the fact that you get a string as item of the list. Let’s see it transforming it to a number wit int() it solves your problem.
The only change to your code is in the last line of code.
op = {
"key1": "value1",
"key2": {
"skey1": "svalue2",
"skey2": {
"sskey1": [{
"url": "value",
"sid": "511"
},
{
"url": "value",
"sid": "522"
},
{
"url": "value",
"sid": "533"
}]
}
}
}
rsId = []
for i in op['key2']['skey2']['sskey1']:
for k, v in i.items():
if k == 'sid':
rsId.append(int(v)) # put the int here
output
>>> rsId
[511, 522, 533]
Another approach: checking every key that has a dictionary as value
op = {
"key1": "value1",
"key2": {
"skey1": "svalue2",
"skey2": {
"sskey1": [
{
"url": "value",
"sid": "511"
},
{
"url": "value",
"sid": "522"
},
{
"url": "value",
"sid": "533"
}
]
}
}
}
l = []
for k in op: # searching in the main dictonary
if type(op[k]) is dict: # if the value contains a dict (sub1)
for k2 in op[k]: # for every key
if type(op[k][k2]) is dict: # if the value is a dict (sub2)
for k3 in op[k][k2]: # for each key of subdict 2
for i in op[k][k2][k3]: # for every item of the list
for k4 in i: # foreach key in the item (a dict)
if k4 == 'sid': # if the key is 'sid'
l.append(int((i[k4]))) # append the value
print(l)
output
[511, 522, 533]