Decide JSON data python parsing flat - python

it's been a while since I've been stuck on a subject to which I can't find the desired solution.
Example: I have a json given like this:
{
"SECTION": {
"ID": 1,
"COMMENT" : "foo bar ",
"STRUCTURE" : {
"LIEN" : [
{
"from": "2020-01-01",
"to": "2020-01-03"
},
{
"from": "2020-01-04",
"to": "2999-01-07"
}
]
},
"CONTEXTE":{
"NATURE": {
"text": "lorem smdlk fjq lsjdf mqjsh dflkq hs dfhkq g"
}
}
}
}
I would like to have output, for example this:
{
"SECTION.ID": 1,
"SECTION.COMMENT": "foo bar ",
"SECTION.STRUCTURE.LIEN.from": "2020-01-01",
"SECTION.STRUCTURE.LIEN.to": "2020-01-03",
"SECTION.CONTEXTE.NATURE.text": "lorem smdlk fjq lsjdf mqjsh dflkq hs dfhkq g"
}
{
"SECTION.ID": 1,
"SECTION.COMMENT": "foo bar ",
"SECTION.STRUCTURE.LIEN.from": "2020-01-04",
"SECTION.STRUCTURE.LIEN.to": "2999-01-07",
"SECTION.CONTEXTE.NATURE.text": "lorem smdlk fjq lsjdf mqjsh dflkq hs dfhkq g"
}
Does anyone have any idea how I can do this in python? Thank you so much

I suggest you use the json Python module to convert the JSON object to a Python object. Then you can use recursion. If you are using Python 3.5 or later, the following code could be a good starting point:
import json
def flatten_helper(prefix, list_of_dict):
res = []
for i in list_of_dict:
res_dict={}
for k, v in i.items():
res_dict['.'.join([prefix,k])]=v
res.append(res_dict)
return res
def flatten(x):
if isinstance(x, list):
res = []
for ele in x:
res = res + flatten(ele)
return res
else:
res = [{}]
for k, v in x.items():
if (isinstance(v, dict) or isinstance(v,list)):
new_res = []
tempo = flatten(v)
for r in res:
for t in tempo:
new_res.append({**r, **t})
res = flatten_helper(k,new_res)
else:
for i, val in enumerate(res):
res[i][k]=v
return res
jsonobj = '{"SECTION": {"ID": 1, "COMMENT" : "foo bar ", "STRUCTURE" : { "LIEN" : [{"from": "2020-01-01", "to": "2020-01-03"}, {"from": "2020-01-04", "to": "2999-01-07" }]}, "CONTEXTE":{"NATURE": {"text": "lorem smdlk fjq lsjdf mqjsh dflkq hs dfhkq g"}}}}'
pyobj = json.loads(jsonobj)
res = flatten(pyobj)

Sounds like a classic case for recursion; aggregate path until you reach a "simple" value then write the pair "aggregated path"."key" : "value"

Related

Create complex object in Python based on property names in dot notation

I am trying to create a complex object based on metadata I have. It is an array of attributes which I am iterating and trying to create a dict. For example below is the array:
[
"itemUniqueId",
"itemDescription",
"manufacturerInfo[0].manufacturer.value",
"manufacturerInfo[0].manufacturerPartNumber",
"attributes.noun.value",
"attributes.modifier.value",
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
]
This array should give an output as below:
{
"itemUniqueId": "",
"itemDescription": "",
"manufacturerInfo": [
{
"manufacturer": {
"value": ""
},
"manufacturerPartNumber": ""
}
],
"attributes": {
"noun": {
"value": ""
},
"modifier": {
"value": ""
},
"entityAttributes": [
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
},
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
}
]
}
}
I have written this logic but unable to get the desired output. It should work on both object and array given the metadata.
source_json = [
"itemUniqueId",
"itemDescription",
"manufacturerInfo[0].manufacturer.value",
"manufacturerInfo[0].manufacturerPartNumber",
"attributes.noun.value",
"attributes.modifier.value",
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
]
for row in source_json:
propertyNames = row.split('.')
temp = ''
parent = {}
parentArr = []
parentObj = {}
# if len(propertyNames) > 1:
arrLength = len(propertyNames)
for i, (current) in enumerate(zip(propertyNames)):
if i == 0:
if '[' in current:
parent[current]=parentArr
else:
parent[current] = parentObj
temp = current
if i > 0 and i < arrLength - 1:
if '[' in current:
parent[current] = parentArr
else:
parent[current] = parentObj
temp = current
if i == arrLength - 1:
if '[' in current:
parent[current] = parentArr
else:
parent[current] = parentObj
temp = current
# temp[prev][current] = ""
# finalMapping[target] = target
print(parent)
There's a similar question at Convert Dot notation string into nested Python object with Dictionaries and arrays where the accepted answer works for this question, but has unused code paths (e.g. isInArray) and caters to unconventional conversions expected by that question:
❓ "arrOne[0]": "1,2,3" → "arrOne": ["1", "2", "3"] instead of
✅ "arrOne[0]": "1,2,3" → "arrOne": ["1,2,3"] or
✅ "arrOne[0]": "1", "arrOne[1]": "2", "arrOne[2]": "3" → "arrOne": ["1", "2", "3"]
Here's a refined implementation of the branch function:
def branch(tree, path, value):
key = path[0]
array_index_match = re.search(r'\[([0-9]+)\]', key)
if array_index_match:
# Get the array index, and remove the match from the key
array_index = int(array_index_match[0].replace('[', '').replace(']', ''))
key = key.replace(array_index_match[0], '')
# Prepare the array at the key
if key not in tree:
tree[key] = []
# Prepare the object at the array index
if array_index == len(tree[key]):
tree[key].append({})
# Replace the object at the array index
tree[key][array_index] = value if len(path) == 1 else branch(tree[key][array_index], path[1:], value)
else:
# Prepare the object at the key
if key not in tree:
tree[key] = {}
# Replace the object at the key
tree[key] = value if len(path) == 1 else branch(tree[key], path[1:], value)
return tree
Usage:
VALUE = ''
def create_dict(attributes):
d = {}
for path_str in attributes:
branch(d, path_str.split('.'), VALUE)
return d
source_json = [
"itemUniqueId",
"itemDescription",
"manufacturerInfo[0].manufacturer.value",
"manufacturerInfo[0].manufacturerPartNumber",
"attributes.noun.value",
"attributes.modifier.value",
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
]
assert create_dict(source_json) == {
"itemUniqueId": "",
"itemDescription": "",
"manufacturerInfo": [
{
"manufacturer": {
"value": ""
},
"manufacturerPartNumber": ""
}
],
"attributes": {
"noun": {
"value": ""
},
"modifier": {
"value": ""
},
"entityAttributes": [
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
},
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
}
]
}
}
First we should iterate over whole list and store each 3rd attributes, after that we could change this struct to our desired output:
from typing import Dict, List
source_json = [
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
"attributes.entityAttributes[2].attributeName"
]
def accumulate(source: List) -> Dict:
accumulator = {}
for v in source:
vs = v.split(".")
root_attribute = vs[0]
if not root_attribute in accumulator:
accumulator[root_attribute] = {}
i = vs[1].rfind('[')
k = (vs[1][:i], vs[1][i+1:-1])
if not k in accumulator[root_attribute]:
accumulator[root_attribute][k] = {}
accumulator[root_attribute][k][vs[2]] = ""
return accumulator
def get_result(accumulated: Dict) -> Dict:
result = {}
for k, v in accumulated.items():
result[k] = {}
for (entity, idx), v1 in v.items():
if not entity in result[k]:
result[k][entity] = []
if len(v1) == 3:
result[k][entity].append(v1)
return result
print(get_result(accumulate(source_json)))
The output will be:
{
'attributes':
{
'entityAttributes':
[
{
'attributeName': '',
'attributeValue': '',
'attributeUOM': ''
},
{'attributeName': '',
'attributeValue': '',
'attributeUOM': ''
}
]
}
}
In accumulate function we store 3rd level attributes in Dict with (entityAttributes, 0) ... (entityAttributes, 2) keys.
In get_result function we convert Dict with (entityAttributes, 0) ... (entityAttributes, 2) keys to Dict from string to List.
How about something like this:
import re
import json
source_json = [
"attributes.entityAttributes[0].attributeName",
"attributes.entityAttributes[0].attributeValue",
"attributes.entityAttributes[0].attributeUOM",
"attributes.entityAttributes[1].attributeName",
"attributes.entityAttributes[1].attributeValue",
"attributes.entityAttributes[1].attributeUOM",
"attributes.entityAttributes[2].attributeName"
]
def to_object(source_json):
def add_attribute(target, attribute_list):
head, tail = attribute_list[0], attribute_list[1:]
if tail:
add_attribute(target.setdefault(head,{}), tail)
else:
target[head] = ''
target = {}
for row in source_json:
add_attribute(target, re.split(r'[\.\[\]]+',row))
return target
print(json.dumps(to_object(source_json), indent=4))
Note that this will not exactly do what you requested. It interprets stores the array also as an object with keys '0' ... '2'. This makes it easier to implement and also more stable. What would you expect, when the input list missed the entries with entityAttributes[0]. Should the list include an empty element or something different. Anyway you save space by not including this element, which works only if you store the array in an object.
None of the answers provided so far strike me as very intuitive. Here's one way
to tackle the problem with three easy-to-understand functions.
Normalize inputs. First we need a function to normalize the inputs strings. Instead of rules-bearing strings like
'foo[0].bar' – where one must understand that integers
in square brackets imply a list – we want a simple tuple
of keys like ('foo', 0, 'bar').
def attribute_to_keys(a):
return tuple(
int(k) if k.isdigit() else k
for k in a.replace('[', '.').replace(']', '').split('.')
)
Build a uniform data structure. Second, we need a function to assemble a data structure consisting of dicts
of dicts of dicts ... all the way down.
def assemble_data(attributes):
data = {}
for a in attributes:
d = data
for k in attribute_to_keys(a):
d = d.setdefault(k, {})
return convert(data)
def convert(d):
# Just a placeholder for now.
return d
Convert the uniform data. Third, we need to implement a real version of the placeholder. Specifically, we
need it to recursively convert the uniform data structure into our ultimate
goal having (a) empty strings at leaf nodes, and (b) lists rather than dicts
whenever the dict keys are all integers. Note that this even fills in empty
list positions with an empty string (a contingency not covered in your problem
description; adjust as needed if you want a different behavior).
def convert(d):
if not d:
return ''
elif all(isinstance(k, int) for k in d):
return [convert(d.get(i)) for i in range(max(d) + 1)]
else:
return {k : convert(v) for k, v in d.items()}
You can use a custom builder class which implements __getattr__ and __getitem__ to gradually build the underlying object. This building can then be triggered by using eval on each of the attribute strings (note: eval is not safe for input from untrusted sources).
The following is an example implementation:
class Builder:
def __init__(self):
self.obj = None
def __getattr__(self, key):
if self.obj is None:
self.obj = {}
return self.obj.setdefault(key, Builder())
def __getitem__(self, index):
if self.obj is None:
self.obj = []
self.obj.extend(Builder() for _ in range(index+1-len(self.obj)))
return self.obj[index]
def convert(self):
if self.obj is None:
return ''
elif isinstance(self.obj, list):
return [v.convert() for v in self.obj]
elif isinstance(self.obj, dict):
return {k: v.convert() for k,v in self.obj.items()}
else:
assert False
attributes = [
'itemUniqueId',
'itemDescription',
'manufacturerInfo[0].manufacturer.value',
'manufacturerInfo[0].manufacturerPartNumber',
'attributes.noun.value',
'attributes.modifier.value',
'attributes.entityAttributes[0].attributeName',
'attributes.entityAttributes[0].attributeValue',
'attributes.entityAttributes[0].attributeUOM',
'attributes.entityAttributes[1].attributeName',
'attributes.entityAttributes[1].attributeValue',
'attributes.entityAttributes[1].attributeUOM',
]
builder = Builder()
for attr in attributes:
eval(f'builder.{attr}')
result = builder.convert()
import json
print(json.dumps(result, indent=4))
which gives the following output:
{
"itemUniqueId": "",
"itemDescription": "",
"manufacturerInfo": [
{
"manufacturer": {
"value": ""
},
"manufacturerPartNumber": ""
}
],
"attributes": {
"noun": {
"value": ""
},
"modifier": {
"value": ""
},
"entityAttributes": [
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
},
{
"attributeName": "",
"attributeValue": "",
"attributeUOM": ""
}
]
}
}

remove unused dictonnary and list symbol in python

I have been trying to reformat some json data from a python query. I want the slug of my data to become the key of a dictionary. I did it but I have a list and a dictionary in extra. How can I remove them? I would like to have this result:
{
"corps-connecte" :{
"id": 9666888,
"title": "Corps connect\u00e9",
"slug": "corps-connecte",
},
"portal-thanos" : {
"id": 9666888,
"title": "Portal thanos",
"slug": "portal-thanos",
},...
}
But actually i have this :
[
{
"corps-connecte" :{
"id": 9666888,
"title": "Corps connect\u00e9",
"slug": "corps-connecte",
},
},
{
"portal-thanos" : {
"id": 9666888,
"title": "Portal thanos",
"slug": "portal-thanos",
}
}...
]
Here is how I did to get the data above, maybe there is an easier way that I can reformat my query correctly?
def artist_artworks(self, artist_id):
artist = self.artist(artist_id)
artworks = []
with ThreadPool(self.threads) as pool:
for artwork in pool.imap(self.artwork, artist["projects"]):
keyList = [artwork["slug"]]
valueList = [artwork]
artworks.append(dict(list(zip(keyList, valueList))))
continue
break
return artworks
def save_artists_json(self, artist):
result = self.save_artist(artist)
json_formatted_str = json.dumps(result)
return json_formatted_str
Thanks
I believe your mistake is to have artworks be a list and append to it, instead of having it as a dict and updating it with the new artworks:
def artist_artworks(self, artist_id):
artist = self.artist(artist_id)
artworks = {} # <- Initialize as dictionary
with ThreadPool(self.threads) as pool:
for artwork in pool.imap(self.artwork, artist["projects"]):
keyList = [artwork["slug"]]
valueList = [artwork]
artworks.update(dict(list(zip(keyList, valueList)))) # <- update items
continue
break
return artworks
def save_artists_json(self, artist):
result = self.save_artist(artist)
json_formatted_str = json.dumps(result)
return json_formatted_str
You could just do:
from pprint import pprint
x = [
{
"corps-connecte" :{
"id": 9666888,
"title": "Corps connect\u00e9",
"slug": "corps-connecte",
},
},
{
"portal-thanos" : {
"id": 9666888,
"title": "Portal thanos",
"slug": "portal-thanos",
}
}
]
y = {k: v for d in x for k, v in d.items()}
pprint(y)
{'corps-connecte': {'id': 9666888,
'slug': 'corps-connecte',
'title': 'Corps connecté'},
'portal-thanos': {'id': 9666888,
'slug': 'portal-thanos',
'title': 'Portal thanos'}}
alternate one-liner solution (but likely more inefficient):
{next(iter(e)): e[next(iter(e))] for e in L}
an optimized version using walrus := operator in Python 3.8:
{(k := next(iter(e))): e[k] for e in L}
also maybe more efficient version:
dict(e.popitem() for e in L)
more optimized version of above, as suggested in comments:
dict(map(dict.popitem, L))
Timing the different options (run on Mac OS Big Sur, venv with Python 3.9.0)
from pprint import pprint
from timeit import timeit
from copy import deepcopy
x = [
{
"corps-connecte" :{
"id": 9666888,
"title": "Corps connect\u00e9",
"slug": "corps-connecte",
},
},
{
"portal-thanos" : {
"id": 9666888,
"title": "Portal thanos",
"slug": "portal-thanos",
}
}
]
# 0.947
print('Items: ',
timeit('L = [z.copy() for z in x]; {k: v for e in L for k, v in e.items()}',
globals=globals()))
# 0.827
print('Next -> Iter: ',
timeit('L = [z.copy() for z in x]; {(k := next(iter(e))): e[k] for e in L}',
globals=globals()))
# 0.912
print('PopItem: ',
timeit('L = [z.copy() for z in x]; dict(e.popitem() for e in L)',
globals=globals()))
# 0.734
print('Map -> PopItem: ',
timeit('L = [z.copy() for z in x]; dict(map(dict.popitem, L))',
globals=globals()))

How to convert from lists to json in python?

The required output of my nested loops is json, how to get there?
The input list structure looks like list = [[name, version, id],[name, version, id], ...]
list_1 = [
['mipl-abnd','v1.0.2','eelp234'],
['mipl-avfd','v1.1.5','32fv423'],
['mipl-awsd','v9.0.2','234eelp'],
['mipl-tgfd','v3.0.0','124fdge'],
['mipl-hrdss','v1.0.2','543rfd3'],
['mipl-oldss','v1.0.2','eelp234']]
list_2 = [
['mipl-abnd','v1.0.2','eelp234'],
['mipl-avfd','v1.1.6','3254323'],
['mipl-awsd','v9.0.2','234eelp'],
['mipl-tgfd','v3.0.0','124fdge'],
['mipl-hrdss','v1.0.2','543rfd3'],
['mipl-newss','v1.0.2','eelp234']]
This is the code I used to get a final list:
def get_difference(l1,l2):
l1 = get_ordered_list(file1.read())
l2 = get_ordered_list(file2.read())
d1 = {k:[v1,v2] for k,v1,v2 in l1}
d2 = {k:[v1,v2] for k,v1,v2 in l2}
result = []
for k,v in d2.items():
if k in d1:
v1 = d1[k]
if v1[0] != v[0]:
result.append({k,v1[0],v[0], v1[1],v[1]})
else:
result.append({k,'new',v[0],'new', v[1]})
for k,v in d1.items():
if k not in d2:
result.append({k,v[0],'deprecated', v[1], 'deprecated'})
res_json = json.dumps(result)
return res_json
Current Output :
result = [['mipl-avfd', 'v1.1.5', 'v1.1.6','32fv423', '3254323'], ['mipl-oldss','v1.0.2', 'deprecated','eelp234', 'deprecated'], ['mipl-newss', 'new','v1.0.2','new', 'eelp234']]
Required Output(I want to write it to an easily readable JSON which can be later made into a table) :
{diff = {"name" : "mipl-avfd",
"old-version" : "v1.1.5",
"new-version" : "v1.1.6",
"old-id" : "32fv423",
"new-id" : "3254323"
},
{"name" : "mipl-oldss",
"old-version" : "v1.0.2",
"new-version" : "deprecated",
"old-id" : "eelp234",
"new-id" : "deprecated"
},
{"name" : "mipl-newss",
"old-version" : "new",
"new-version" : "v1.0.2",
"old-id" : "eelp234",
"new-id" : "new"
}
}
I hope I understand your question right. You have "old" list_1 and "new" list_2 and you want to construct flat list how the versions change (I assume, in list_1 you have old versions):
import json
from itertools import groupby
list_1 = [
['mipl-abnd','v1.0.2','eelp234'],
['mipl-avfd','v1.1.5','32fv423'],
['mipl-awsd','v9.0.2','234eelp'],
['mipl-tgfd','v3.0.0','124fdge'],
['mipl-hrdss','v1.0.2','543rfd3'],
['mipl-oldss','v1.0.2','eelp234']]
list_2 = [
['mipl-abnd','v1.0.2','eelp234'],
['mipl-avfd','v1.1.6','3254323'],
['mipl-awsd','v9.0.2','234eelp'],
['mipl-tgfd','v3.0.0','124fdge'],
['mipl-hrdss','v1.0.2','543rfd3'],
['mipl-newss','v1.0.2','eelp234']]
s = sorted(list_1 + list_2, key=lambda k: k[0])
out = []
for v, g in groupby(s, lambda k: k[0]):
g = list(g)
if len(g) == 2:
out.append({
'name': v,
'old-version': g[0][1],
'new-version': g[1][1],
'old-id': g[0][2],
'new-id': g[1][2],
})
else:
if g[0] in list_1:
out.append({
'name': v,
'old-version': g[0][1],
'new-version': 'deprecated',
'old-id': g[0][2],
'new-id': 'deprecated',
})
else:
out.append({
'name': v,
'old-version': 'new',
'new-version': g[0][1],
'old-id': 'new',
'new-id': g[0][2],
})
print(json.dumps(out, indent=4))
Prints:
[
{
"name": "mipl-abnd",
"old-version": "v1.0.2",
"new-version": "v1.0.2",
"old-id": "eelp234",
"new-id": "eelp234"
},
{
"name": "mipl-avfd",
"old-version": "v1.1.5",
"new-version": "v1.1.6",
"old-id": "32fv423",
"new-id": "3254323"
},
{
"name": "mipl-awsd",
"old-version": "v9.0.2",
"new-version": "v9.0.2",
"old-id": "234eelp",
"new-id": "234eelp"
},
{
"name": "mipl-hrdss",
"old-version": "v1.0.2",
"new-version": "v1.0.2",
"old-id": "543rfd3",
"new-id": "543rfd3"
},
{
"name": "mipl-newss",
"old-version": "new",
"new-version": "v1.0.2",
"old-id": "new",
"new-id": "eelp234"
},
{
"name": "mipl-oldss",
"old-version": "v1.0.2",
"new-version": "deprecated",
"old-id": "eelp234",
"new-id": "deprecated"
},
{
"name": "mipl-tgfd",
"old-version": "v3.0.0",
"new-version": "v3.0.0",
"old-id": "124fdge",
"new-id": "124fdge"
}
]
What you said is a json is not a valid json. Also, json is a string - you want a dict structure. You don't have to dump it into a json string.
Why do you give l1 and l2 as arguments to the function when you overwrite them in the first lines?
file1 and file2 are not defined in the function. Also, for reading files you should use with to properly close the file.
First, you need to declare the keys (labels) somewhere:
keys = ["name", "old-version", "old-id", "new-id"]
Then, instead of appending a list, you append a dict.
Thankfully, dicts can be easily made from lists of tuples - and we can merge keys and your current lists into lists of tuples easily, e.g.:
dict(zip(keys, [k,v1[0],v[0], v1[1],v[1]]))
So it now looks like this:
for k,v in d2.items():
if k in d1:
v1 = d1[k]
if v1[0] != v[0]:
result.append(dict(zip(keys, [k,v1[0],v[0], v1[1],v[1]])))
else:
result.append(dict(zip(keys, [k,'new',v[0],'new', v[1]])))
for k,v in d1.items():
if k not in d2:
result.append(dict(zip(keys, [k,v[0],'deprecated', v[1], 'deprecated'])))

Remove duplicate values in different Json Lists python

I know that there are a lot of questions about duplicates but I can't find a solution suitable for me.
I have a json structure like this:
{
"test": [
{
"name2": [
"Tik",
"eev",
"asdv",
"asdfa",
"sadf",
"Nick"
]
},
{
"name2": [
"Tik",
"eev",
"123",
"r45",
"676",
"121"
]
}
]
}
I want to keep the first value and remove all the other duplicates.
Expected Result
{
"test": [
{
"name2": [
"Tik",
"eev",
"asdv",
"asdfa",
"sadf",
"Nick"
]
},
{
"name2": [
"123",
"r45",
"676",
"121"
]
}
]
}
I tried using a tmp to check for duplicates but it didn't seem to work. Also I can't find a way to make it json again.
import json
with open('myjson') as access_json:
read_data = json.load(access_json)
tmp = []
tmp2 = []
def get_synonyms():
ingredients_access = read_data['test']
for x in ingredients_access:
for j in x['name2']:
tmp.append(j)
if j in tmp:
tmp2.append(j)
get_synonyms()
print(len(tmp))
print(len(tmp2))
You can use recursion:
def filter_d(d):
seen = set()
def inner(_d):
if isinstance(_d, dict):
return {a:inner(b) if isinstance(b, (dict, list)) else b for a, b in _d.items()}
_r = []
for i in _d:
if isinstance(i, (dict, list)):
_r.append(inner(i))
elif i not in seen:
_r.append(i)
seen.add(i)
return _r
return inner(d)
import json
print(json.dumps(filter_d(data), indent=4))
Output:
{
"test": [
{
"name2": [
"Tik",
"eev",
"asdv",
"asdfa",
"sadf",
"Nick"
]
},
{
"name2": [
"123",
"r45",
"676",
"121"
]
}
]
}
You are first adding everything to tmp and then to tmp2 because every value was added to tmp before.
I changed the function a little bit to work for your specific test example:
def get_synonyms():
test_list = []
ingredients_access = read_data['test']
used_values =[]
for x in ingredients_access:
inner_tmp = []
for j in x['name2']:
if j not in used_values:
inner_tmp.append(j)
used_values.append(j)
test_list.append({'name2':inner_tmp})
return {'test': test_list}
result = get_synonyms()
print(result)
Output:
{'test': [{'name2': ['Tik', 'eev', 'asdv', 'asdfa', 'sadf', 'Nick']}, {'name2': ['123', 'r45', '676', '121']}]}
Here's a little hackish answer:
d = {'test': [{'name2': ['Tik', 'eev', 'asdv', 'asdfa', 'sadf', 'Nick']},
{'name2': ['Tik', 'eev', '123', 'r45', '676', '121']}]}
s = set()
for l in d['test']:
l['name2'] = [(v, s.add(v))[0] for v in l['name2'] if v not in s]
Output:
{'test': [{'name2': ['Tik', 'eev', 'asdv', 'asdfa', 'sadf', 'Nick']},
{'name2': ['123', 'r45', '676', '121']}]}
This uses a set to track the unique values, and add unique values to set while returning the value back to the list.

Flatten nested List

Not sure if I'm asking this in the right place or if I'm understanding this correctly. I need change the dictionary so that the occurrence field is added on to the availability key. For example: "Availability.Occurrence": "Daily"
BEFORE
test_data = {
"testDate": "2018-11-19 21:00:00",
"testMessage": "This is a test message",
"testStatus": "Warning",
"Results": [
{
"Availability": {
"Occurence": "Daily"
},
"OldestRefreshDate": "2018-11-15 15:40:57 EST",
"TableName": "test"
}
],
"TaskId": "CheckSourceRefreshDates"
}
AFTER
test_data = {
"testDate": "2018-11-19 21:00:00",
"testMessage": "This is a test message",
"testStatus": "Warning",
"Results": [
{
"Availability.Occurrence": "Daily",
"OldestRefreshDate": "2018-11-15 15:40:57 EST",
"TableName": "test"
}
],
"TaskId": "CheckSourceRefreshDates"
}
I am new to Python just trying to figure all this out. Any help is appreciated.
Heres the function that I tried using, however it only flattens the entire dictionary.
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '.')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + '.')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out

Categories

Resources