How to convert from lists to json in python? - python

The required output of my nested loops is json, how to get there?
The input list structure looks like list = [[name, version, id],[name, version, id], ...]
list_1 = [
['mipl-abnd','v1.0.2','eelp234'],
['mipl-avfd','v1.1.5','32fv423'],
['mipl-awsd','v9.0.2','234eelp'],
['mipl-tgfd','v3.0.0','124fdge'],
['mipl-hrdss','v1.0.2','543rfd3'],
['mipl-oldss','v1.0.2','eelp234']]
list_2 = [
['mipl-abnd','v1.0.2','eelp234'],
['mipl-avfd','v1.1.6','3254323'],
['mipl-awsd','v9.0.2','234eelp'],
['mipl-tgfd','v3.0.0','124fdge'],
['mipl-hrdss','v1.0.2','543rfd3'],
['mipl-newss','v1.0.2','eelp234']]
This is the code I used to get a final list:
def get_difference(l1,l2):
l1 = get_ordered_list(file1.read())
l2 = get_ordered_list(file2.read())
d1 = {k:[v1,v2] for k,v1,v2 in l1}
d2 = {k:[v1,v2] for k,v1,v2 in l2}
result = []
for k,v in d2.items():
if k in d1:
v1 = d1[k]
if v1[0] != v[0]:
result.append({k,v1[0],v[0], v1[1],v[1]})
else:
result.append({k,'new',v[0],'new', v[1]})
for k,v in d1.items():
if k not in d2:
result.append({k,v[0],'deprecated', v[1], 'deprecated'})
res_json = json.dumps(result)
return res_json
Current Output :
result = [['mipl-avfd', 'v1.1.5', 'v1.1.6','32fv423', '3254323'], ['mipl-oldss','v1.0.2', 'deprecated','eelp234', 'deprecated'], ['mipl-newss', 'new','v1.0.2','new', 'eelp234']]
Required Output(I want to write it to an easily readable JSON which can be later made into a table) :
{diff = {"name" : "mipl-avfd",
"old-version" : "v1.1.5",
"new-version" : "v1.1.6",
"old-id" : "32fv423",
"new-id" : "3254323"
},
{"name" : "mipl-oldss",
"old-version" : "v1.0.2",
"new-version" : "deprecated",
"old-id" : "eelp234",
"new-id" : "deprecated"
},
{"name" : "mipl-newss",
"old-version" : "new",
"new-version" : "v1.0.2",
"old-id" : "eelp234",
"new-id" : "new"
}
}

I hope I understand your question right. You have "old" list_1 and "new" list_2 and you want to construct flat list how the versions change (I assume, in list_1 you have old versions):
import json
from itertools import groupby
list_1 = [
['mipl-abnd','v1.0.2','eelp234'],
['mipl-avfd','v1.1.5','32fv423'],
['mipl-awsd','v9.0.2','234eelp'],
['mipl-tgfd','v3.0.0','124fdge'],
['mipl-hrdss','v1.0.2','543rfd3'],
['mipl-oldss','v1.0.2','eelp234']]
list_2 = [
['mipl-abnd','v1.0.2','eelp234'],
['mipl-avfd','v1.1.6','3254323'],
['mipl-awsd','v9.0.2','234eelp'],
['mipl-tgfd','v3.0.0','124fdge'],
['mipl-hrdss','v1.0.2','543rfd3'],
['mipl-newss','v1.0.2','eelp234']]
s = sorted(list_1 + list_2, key=lambda k: k[0])
out = []
for v, g in groupby(s, lambda k: k[0]):
g = list(g)
if len(g) == 2:
out.append({
'name': v,
'old-version': g[0][1],
'new-version': g[1][1],
'old-id': g[0][2],
'new-id': g[1][2],
})
else:
if g[0] in list_1:
out.append({
'name': v,
'old-version': g[0][1],
'new-version': 'deprecated',
'old-id': g[0][2],
'new-id': 'deprecated',
})
else:
out.append({
'name': v,
'old-version': 'new',
'new-version': g[0][1],
'old-id': 'new',
'new-id': g[0][2],
})
print(json.dumps(out, indent=4))
Prints:
[
{
"name": "mipl-abnd",
"old-version": "v1.0.2",
"new-version": "v1.0.2",
"old-id": "eelp234",
"new-id": "eelp234"
},
{
"name": "mipl-avfd",
"old-version": "v1.1.5",
"new-version": "v1.1.6",
"old-id": "32fv423",
"new-id": "3254323"
},
{
"name": "mipl-awsd",
"old-version": "v9.0.2",
"new-version": "v9.0.2",
"old-id": "234eelp",
"new-id": "234eelp"
},
{
"name": "mipl-hrdss",
"old-version": "v1.0.2",
"new-version": "v1.0.2",
"old-id": "543rfd3",
"new-id": "543rfd3"
},
{
"name": "mipl-newss",
"old-version": "new",
"new-version": "v1.0.2",
"old-id": "new",
"new-id": "eelp234"
},
{
"name": "mipl-oldss",
"old-version": "v1.0.2",
"new-version": "deprecated",
"old-id": "eelp234",
"new-id": "deprecated"
},
{
"name": "mipl-tgfd",
"old-version": "v3.0.0",
"new-version": "v3.0.0",
"old-id": "124fdge",
"new-id": "124fdge"
}
]

What you said is a json is not a valid json. Also, json is a string - you want a dict structure. You don't have to dump it into a json string.
Why do you give l1 and l2 as arguments to the function when you overwrite them in the first lines?
file1 and file2 are not defined in the function. Also, for reading files you should use with to properly close the file.
First, you need to declare the keys (labels) somewhere:
keys = ["name", "old-version", "old-id", "new-id"]
Then, instead of appending a list, you append a dict.
Thankfully, dicts can be easily made from lists of tuples - and we can merge keys and your current lists into lists of tuples easily, e.g.:
dict(zip(keys, [k,v1[0],v[0], v1[1],v[1]]))
So it now looks like this:
for k,v in d2.items():
if k in d1:
v1 = d1[k]
if v1[0] != v[0]:
result.append(dict(zip(keys, [k,v1[0],v[0], v1[1],v[1]])))
else:
result.append(dict(zip(keys, [k,'new',v[0],'new', v[1]])))
for k,v in d1.items():
if k not in d2:
result.append(dict(zip(keys, [k,v[0],'deprecated', v[1], 'deprecated'])))

Related

Count number of objects in list of dictionary where a key's value is more than 1

Given a list of dictionaries:
data = {
"data": [
{
"categoryOptionCombo": {
"id": "A"
},
"dataElement": {
"id": "123"
}
},
{
"categoryOptionCombo": {
"id": "B"
},
"dataElement": {
"id": "123"
}
},
{
"categoryOptionCombo": {
"id": "C"
},
"dataElement": {
"id": "456"
}
}
]
}
I would like to display the dataElement where the count of distinct categoryOptionCombo is larger than 1.
e.g. the result of the function would be an iterable of IDs:
[123]
because the dataElement with id 123 has two different categoryOptionCombos.
tracker = {}
for d in data['data']:
data_element = d['dataElement']['id']
coc = d['categoryOptionCombo']['id']
if data_element not in tracker:
tracker[data_element] = set()
tracker[data_element].add(coc)
too_many = [key for key,value in tracker.items() if len(value) > 1]
How can I iterate the list of dictionaries preferably with a comprehension? This solution above is not pythonic.
One approach:
import collections
counts = collections.defaultdict(set)
for d in data["data"]:
counts[d["dataElement"]["id"]].add(d["categoryOptionCombo"]["id"])
res = [k for k, v in counts.items() if len(v) > 1]
print(res)
Output
['123']
This approach creates a dictionary mapping dataElements to the different types of categoryOptionCombo:
defaultdict(<class 'set'>, {'123': {'B', 'A'}, '456': {'C'}})
Almost a one-liner:
counts = collections.Counter( d['dataElement']['id'] for d in data['data'] )
print( counts )
Output:
Counter({'123': 2, '456': 1})
No need for sets, you can just remember each data element's first coc or mark it as having 'multiple'.
tracker = {}
for d in data['data']:
data_element = d['dataElement']['id']
coc = d['categoryOptionCombo']['id']
if tracker.setdefault(data_element, coc) != coc:
tracker[data_element] = 'multiple'
too_many = [key for key,value in tracker.items() if value == 'multiple']
(If the string 'multiple' can be a coc id, then use multiple = object() and compare with is).

remove unused dictonnary and list symbol in python

I have been trying to reformat some json data from a python query. I want the slug of my data to become the key of a dictionary. I did it but I have a list and a dictionary in extra. How can I remove them? I would like to have this result:
{
"corps-connecte" :{
"id": 9666888,
"title": "Corps connect\u00e9",
"slug": "corps-connecte",
},
"portal-thanos" : {
"id": 9666888,
"title": "Portal thanos",
"slug": "portal-thanos",
},...
}
But actually i have this :
[
{
"corps-connecte" :{
"id": 9666888,
"title": "Corps connect\u00e9",
"slug": "corps-connecte",
},
},
{
"portal-thanos" : {
"id": 9666888,
"title": "Portal thanos",
"slug": "portal-thanos",
}
}...
]
Here is how I did to get the data above, maybe there is an easier way that I can reformat my query correctly?
def artist_artworks(self, artist_id):
artist = self.artist(artist_id)
artworks = []
with ThreadPool(self.threads) as pool:
for artwork in pool.imap(self.artwork, artist["projects"]):
keyList = [artwork["slug"]]
valueList = [artwork]
artworks.append(dict(list(zip(keyList, valueList))))
continue
break
return artworks
def save_artists_json(self, artist):
result = self.save_artist(artist)
json_formatted_str = json.dumps(result)
return json_formatted_str
Thanks
I believe your mistake is to have artworks be a list and append to it, instead of having it as a dict and updating it with the new artworks:
def artist_artworks(self, artist_id):
artist = self.artist(artist_id)
artworks = {} # <- Initialize as dictionary
with ThreadPool(self.threads) as pool:
for artwork in pool.imap(self.artwork, artist["projects"]):
keyList = [artwork["slug"]]
valueList = [artwork]
artworks.update(dict(list(zip(keyList, valueList)))) # <- update items
continue
break
return artworks
def save_artists_json(self, artist):
result = self.save_artist(artist)
json_formatted_str = json.dumps(result)
return json_formatted_str
You could just do:
from pprint import pprint
x = [
{
"corps-connecte" :{
"id": 9666888,
"title": "Corps connect\u00e9",
"slug": "corps-connecte",
},
},
{
"portal-thanos" : {
"id": 9666888,
"title": "Portal thanos",
"slug": "portal-thanos",
}
}
]
y = {k: v for d in x for k, v in d.items()}
pprint(y)
{'corps-connecte': {'id': 9666888,
'slug': 'corps-connecte',
'title': 'Corps connecté'},
'portal-thanos': {'id': 9666888,
'slug': 'portal-thanos',
'title': 'Portal thanos'}}
alternate one-liner solution (but likely more inefficient):
{next(iter(e)): e[next(iter(e))] for e in L}
an optimized version using walrus := operator in Python 3.8:
{(k := next(iter(e))): e[k] for e in L}
also maybe more efficient version:
dict(e.popitem() for e in L)
more optimized version of above, as suggested in comments:
dict(map(dict.popitem, L))
Timing the different options (run on Mac OS Big Sur, venv with Python 3.9.0)
from pprint import pprint
from timeit import timeit
from copy import deepcopy
x = [
{
"corps-connecte" :{
"id": 9666888,
"title": "Corps connect\u00e9",
"slug": "corps-connecte",
},
},
{
"portal-thanos" : {
"id": 9666888,
"title": "Portal thanos",
"slug": "portal-thanos",
}
}
]
# 0.947
print('Items: ',
timeit('L = [z.copy() for z in x]; {k: v for e in L for k, v in e.items()}',
globals=globals()))
# 0.827
print('Next -> Iter: ',
timeit('L = [z.copy() for z in x]; {(k := next(iter(e))): e[k] for e in L}',
globals=globals()))
# 0.912
print('PopItem: ',
timeit('L = [z.copy() for z in x]; dict(e.popitem() for e in L)',
globals=globals()))
# 0.734
print('Map -> PopItem: ',
timeit('L = [z.copy() for z in x]; dict(map(dict.popitem, L))',
globals=globals()))

Remove duplicate values in different Json Lists python

I know that there are a lot of questions about duplicates but I can't find a solution suitable for me.
I have a json structure like this:
{
"test": [
{
"name2": [
"Tik",
"eev",
"asdv",
"asdfa",
"sadf",
"Nick"
]
},
{
"name2": [
"Tik",
"eev",
"123",
"r45",
"676",
"121"
]
}
]
}
I want to keep the first value and remove all the other duplicates.
Expected Result
{
"test": [
{
"name2": [
"Tik",
"eev",
"asdv",
"asdfa",
"sadf",
"Nick"
]
},
{
"name2": [
"123",
"r45",
"676",
"121"
]
}
]
}
I tried using a tmp to check for duplicates but it didn't seem to work. Also I can't find a way to make it json again.
import json
with open('myjson') as access_json:
read_data = json.load(access_json)
tmp = []
tmp2 = []
def get_synonyms():
ingredients_access = read_data['test']
for x in ingredients_access:
for j in x['name2']:
tmp.append(j)
if j in tmp:
tmp2.append(j)
get_synonyms()
print(len(tmp))
print(len(tmp2))
You can use recursion:
def filter_d(d):
seen = set()
def inner(_d):
if isinstance(_d, dict):
return {a:inner(b) if isinstance(b, (dict, list)) else b for a, b in _d.items()}
_r = []
for i in _d:
if isinstance(i, (dict, list)):
_r.append(inner(i))
elif i not in seen:
_r.append(i)
seen.add(i)
return _r
return inner(d)
import json
print(json.dumps(filter_d(data), indent=4))
Output:
{
"test": [
{
"name2": [
"Tik",
"eev",
"asdv",
"asdfa",
"sadf",
"Nick"
]
},
{
"name2": [
"123",
"r45",
"676",
"121"
]
}
]
}
You are first adding everything to tmp and then to tmp2 because every value was added to tmp before.
I changed the function a little bit to work for your specific test example:
def get_synonyms():
test_list = []
ingredients_access = read_data['test']
used_values =[]
for x in ingredients_access:
inner_tmp = []
for j in x['name2']:
if j not in used_values:
inner_tmp.append(j)
used_values.append(j)
test_list.append({'name2':inner_tmp})
return {'test': test_list}
result = get_synonyms()
print(result)
Output:
{'test': [{'name2': ['Tik', 'eev', 'asdv', 'asdfa', 'sadf', 'Nick']}, {'name2': ['123', 'r45', '676', '121']}]}
Here's a little hackish answer:
d = {'test': [{'name2': ['Tik', 'eev', 'asdv', 'asdfa', 'sadf', 'Nick']},
{'name2': ['Tik', 'eev', '123', 'r45', '676', '121']}]}
s = set()
for l in d['test']:
l['name2'] = [(v, s.add(v))[0] for v in l['name2'] if v not in s]
Output:
{'test': [{'name2': ['Tik', 'eev', 'asdv', 'asdfa', 'sadf', 'Nick']},
{'name2': ['123', 'r45', '676', '121']}]}
This uses a set to track the unique values, and add unique values to set while returning the value back to the list.

Get json object with value with python for loop

When I use:
for reports in raw_data:
for names in reports["names"]:
report_name = json.dumps(names).strip('"')
report_names.append(report_name)
I get the key/object name: 'report1', ...
When I use:
for reports in raw_data:
for names in reports["names"].values():
report_name = json.dumps(names).strip('"')
report_names.append(report_name)
I get the value of the object: 'name1', ...
How do get the object and value together, for example: 'report1': 'name1', ...
The json:
[
{
"names": {
"report1": "name1",
"report2": "name2"
}
},
{
"names": {
"report3": "name3",
"report4": "name4"
}
}
]
You need to loop over each dictionary in the object, then extract each key: value pair from items():
data = [
{
"names": {
"report1": "name1",
"report2": "name2"
}
},
{
"names": {
"report3": "name3",
"report4": "name4"
}
}
]
for d in data:
for k, v in d["names"].items():
print(k, v)
Result:
report1 name1
report2 name2
report3 name3
report4 name4
Or if you can just print out the tuple pairs:
for d in data:
for pair in d["names"].items():
print(pair)
# ('report1', 'name1')
# ('report2', 'name2')
# ('report3', 'name3')
# ('report4', 'name4')
If you want all of the pairs in a list, use a list comprehension:
[pair for d in data for pair in d["names"].items()]
# [('report1', 'name1'), ('report2', 'name2'), ('report3', 'name3'), ('report4', 'name4')]
Try something like this:
import json
with open(r'jsonfile.json', 'r') as f:
qe = json.load(f)
for item in qe:
if item == 'name1':
print(qe)

Need help on converting Ruby function to Python function

I'm trying to create a Python function to convert the lists (Objects of arrays in ELK term) to dictionary. I found a sample Ruby function which does that and I'm trying to convert it to Python function for my usage. I'm finding hard time to get the output. The output will be inserted back to Elastic Search.
Ruby Function - I found in Internet
def arrays_to_hash(h)
h.each do |k,v|
# If v is nil, an array is being iterated and the value is k.
# If v is not nil, a hash is being iterated and the value is v.
value = v || k
if value.is_a?(Array)
# "value" is replaced with "value_hash" later.
value_hash = {}
value.each_with_index do |v, i|
value_hash[i.to_s] = v
end
h[k] = value_hash
end
if value.is_a?(Hash) || value.is_a?(Array)
arrays_to_hash(value)
end
end
end
Python Function - I'm trying - Upon seeing the O/P i can see the first list inside the dictionary is getting converted but the nested list inside that is still present
def array_path(my_dict):
for k,v in my_dict.items():
if isinstance(v,list):
print (len(v))
for i, item in enumerate(v):
my_dict2[str(i)] = item
my_dict[k] = my_dict2
elif isinstance(v,dict):
array_path(v)
else:
my_dict[k] = v
Input
{
"foo": "bar",
"test": {
"steps": [
{
"response_time": "100"
},
{
"response_time": "101",
"more_nested": [
{
"hello": "world"
},
{
"hello2": "world2"
}
]
}
]
}
}
**
Expected Output
**
{
"foo": "bar",
"test": {
"steps": {
"0": {
"response_time": "100"
},
"1": {
"response_time": "101",
"more_nested": {
"0": {
"hello": "world"
},
"1": {
"hello2": "world2"
}
}
}
}
}
}
Current O/P
{'0': {'response_time': '100'},
'1': {'more_nested': [{'hello': 'world'}, {'hello2': 'world2'}],
'response_time': '101'}}
the original script stopped its check to list, not implementing a solution for a list of dicts. looks ok now
def array_path(my_dict):
if type(my_dict) is dict:
for k, v in my_dict.items():
my_dict[k] = array_path(v)
elif type(my_dict) is list:
return {str(i): array_path(item) for i, item in enumerate(my_dict)}
return my_dict

Categories

Resources