How to create a priority list of dictionary - python

list is below
preference dictionary is below
if all the keys and values except type will be same then ..
Need to compare type in each list which is highest order in preference dictionary
Output is list of dictionary which type is highest order
list_ = [
{
"id": "11",
"name": "son",
"email": "n#network.com",
"type": "Owner"
},
{
"id": "11",
"name": "son",
"email": "n#network.com",
"type": "Manager"
},
{
"id": "21",
"name": "abc",
"email": "abc#network.com",
"type": "Employ"
},
{
"id": "21",
"name": "abc",
"email": "abc#network.com",
"type": "Manager"
}
]
A preference dictionary = {'Owner': 1, 'Manager':2, 'employ':3, 'HR': 4 }
My expected output dictionary below
[{'id': '11', 'name': 'son', 'email': 'n#network.com', 'type': 'Owner'},
{'id':'21','name': 'abc','email': 'abc#network.com','type': 'Manager'}]
new_list = []
for each in list_:
if each['type'] in priority.keys():
if each['id'] not in new_list:
new_list.append(each)

You can simply do src.sort(key = lambda x : preference[x["type"]]) and your list will be sorted.

This solution groups all the elements by id, and sorts the groups according to the preference (so that the Owner is first and HR is last) and then just picks the first from each group:
from collections import defaultdict
src = [
{
"id": "11",
"name": "son",
"email": "n#network.com",
"type": "Owner"
},
{
"id": "11",
"name": "son",
"email": "n#network.com",
"type": "Manager"
},
{
"id": "21",
"name": "abc",
"email": "abc#network.com",
"type": "Employ"
},
{
"id": "21",
"name": "abc",
"email": "abc#network.com",
"type": "Manager"
}
]
preference = {'Owner': 1, 'Manager':2, 'Employ':3, 'HR': 4 }
d = defaultdict(list)
# group all the records by id
for item in src:
d[item['id']].append(item)
# sort each group by the preference
for item in d.values():
item.sort(key=lambda x: preference[x['type']])
# select only the first from each group
result = [item[0] for item in d.values()]
print(result)
Output:
[{'id': '11', 'name': 'son', 'email': 'n#network.com', 'type': 'Owner'}, {'id': '21', 'name': 'abc', 'email': 'abc#network.com', 'type': 'Manager'}]

You could create a priority queue:
from queue import PriorityQueue
priority = {'Owner': 1, 'Manager':2, 'employ':3, 'HR': 4 }
q = PriorityQueue()
for elem in list_:
p = priority[elem['type']]
q.put((p, id(elem), elem))
Or you could also sort a list based on the type with:
priority_list = sorted(list_, key=lambda x: priority[x['type']], reverse=True)

Well here's my shot!
It isn't beautiful but it seems to work.
list_ = [
{
"id": "11",
"name": "son",
"email": "n#network.com",
"type": "Owner"
},
{
"id": "11",
"name": "son",
"email": "n#network.com",
"type": "Manager"
},
{
"id": "21",
"name": "abc",
"email": "abc#network.com",
"type": "Employ"
},
{
"id": "21",
"name": "abc",
"email": "abc#network.com",
"type": "Manager"
}
]
new = dict( Owner = 0, Manager = 0, Employ = 0, HR = 0 )
for a in list_ :
type_ = a[ 'type' ]
if type_ == 'Owner':
new[ 'Owner' ] += 1
if type_ == 'Manager':
new[ 'Manager' ] += 1
if type_ in [ 'Employ', 'Manager' ]:
new[ 'Employ' ] += 1
new[ 'HR' ] += 1
print( new )

Related

How to compare two list of dictionary and update value from another [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed last year.
Improve this question
list_1 = [
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Owner" },
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Manager" },
{ "id": "21", "name": "abc", "email": "abc#network.com", "type": "Employ" },
{ "id": "21", "name": "abc", "email": "abc#network.com", "type": "Manager" },
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Manager" },
{ "id": "100", "name": "last", "email": "last#network.com", "type": "Manager" }
]
list_2 = [
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Manager" },
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Manager" },
{ "id": "21", "name": "abc", "email": "abc#network.com", "type": "Employ" },
{ "id": "52", "name": "abcded", "email": "abcded#network.com", "type": "Manager" }
]
A preference dictionary {'Owner': 1, 'Manager':2, 'employ':3, 'HR': 4 }
There are two list of dictionaries
list_1 is the primary_dictionary, list_2 is the secondary_dictionary
I need to update the role in list_1 dictionary if the 'email' present in secondary dictionary with respect to check in preference dictionary
In the end output i should contain 'type' as in preference dictionary
If any emails match I want to update list one at all places with that email to contain a new role under'type'. for example if 123#gmail.com was in a dictionary within list2 then I want to change every item in list1 that contains 123#gmail.com as the email to have a role that is determined by a selection from the preference dictionary.
Expected out
[
{'id': '11', 'name': 'son', 'email': 'n#network.com', 'type': 'Owner'},
{'id': '21', 'name': 'abc', 'email': 'abc#network.com', 'type': 'Manager'},
{"id": "100", "name": "last", "email": "last#network.com", "type": "Manager"}
]
Code is below
for each1 in list_1:
for each2 in list_2:
if each1['email'] == each2['email']:
# Update the list_1 dictionary with respect to preference
Given the following inputs:
list_1 = [
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Owner" },
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Manager" },
{ "id": "21", "name": "abc", "email": "abc#network.com", "type": "Employ" },
{ "id": "21", "name": "abc", "email": "abc#network.com", "type": "Manager" },
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Manager" },
{ "id": "100", "name": "last", "email": "last#network.com", "type": "Manager" }
]
list_2 = [
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Manager" },
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Manager" },
{ "id": "21", "name": "abc", "email": "abc#network.com", "type": "Employ" },
{ "id": "52", "name": "abcded", "email": "abcded#network.com", "type": "Manager" }
]
preference = {'Owner': 1, 'Manager':2, 'Employ':3, 'HR': 4 }
I would start by reshaping list_2 into something more useable. This will get rid of the duplicates leaving us just the "best" type for each email:
list_2_lookup = {}
for item in list_2:
key, value = item["email"], item["type"]
list_2_lookup.setdefault(key, value)
if preference[value] < preference[list_2_lookup[key]]:
list_2_lookup[key] = value
Then we can iterate over the items in the first list and use the lookup we just created. Note, this is a little more convoluted than might be needed as it is not clear from your question and your expected result what items from list_1 should actually appear in the output. I have tried to match your stated output.
result = {}
for item in list_1:
key = item["email"]
result.setdefault(key, item)
if preference[result[key]["type"]] > preference[item["type"]]:
result[key]["type"] = item["type"]
if preference[result[key]["type"]] > preference.get(list_2_lookup.get(key), 99):
result[key]["type"] = list_2_lookup.get(key)
At this point we can:
print(list(result.values()))
Giving us:
[
{'id': '11', 'name': 'son', 'email': 'n#network.com', 'type': 'Owner'},
{'id': '21', 'name': 'abc', 'email': 'abc#network.com', 'type': 'Manager'},
{'id': '100', 'name': 'last', 'email': 'last#network.com', 'type': 'Manager'}
]
Here is code that will do the following:
If any emails match update list one at all places with that email to contain a new role under 'type'. for example if 123#gmail.com was in a dictionary within list2 then it will change change every item in list1 that contains 123#gmail.com as the email to have a role that is determined by a selection from the preference dictionary.
list_1 = [
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Owner" },
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Manager" },
{ "id": "21", "name": "abc", "email": "abc#network.com", "type": "Employ" },
{ "id": "21", "name": "abc", "email": "abc#network.com", "type": "Manager" },
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Manager" },
{ "id": "100", "name": "last", "email": "last#network.com", "type": "Manager" }
]
list_2 = [
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Manager" },
{ "id": "11", "name": "son", "email": "n#network.com", "type": "Manager" },
{ "id": "21", "name": "abc", "email": "abc#network.com", "type": "Employ" },
{ "id": "52", "name": "abcded", "email": "abcded#network.com", "type": "Manager" }
]
pref = {1: 'owner', 2: 'Manager', 3: 'employ', 4: 'HR' }
choice = 1
idx1 = -1
for each1 in list_1:
idx1 += 1
for each2 in list_2:
if each1['email'] == each2['email']:
print(list_1[idx1]['type'])
print(pref[choice])
list_1[idx1]['type'] = pref[choice]
print(list_1)

duplicates in a JSON file based on two attributes

I have a JSON file and that is a nested JSON. I would like to remove duplicates based on two keys.
JSON example:
"books": [
{
"id": "1",
"story": {
"title": "Lonely lion"
},
"description": [
{
"release": false,
"author": [
{
"name": "John",
"main": 1
},
{
"name": "Jeroge",
"main": 0
},
{
"name": "Peter",
"main": 0
}
]
}
]
},
{
"id": "2",
"story": {
"title": "Lonely lion"
},
"description": [
{
"release": false,
"author": [
{
"name": "Jeroge",
"main": 1
},
{
"name": "Peter",
"main": 0
},
{
"name": "John",
"main": 0
}
]
}
]
},
{
"id": "3",
"story": {
"title": "Lonely lion"
},
"description": [
{
"release": false,
"author": [
{
"name": "John",
"main": 1
},
{
"name": "Jeroge",
"main": 0
}
]
}
]
}
]
Here I try to match the title and author name. For example, for id 1 and id 2 are duplicates( as the title is same and author names are also same(the author sequence doesn't matter and no need to consider the main attributes). So, in the output JSON only id:1 or id:2 will remain with id:3. In the final output I need two file.
Output_JSON:
"books": [
{
"id": "1",
"story": {
"title": "Lonely lion"
},
"description": [
{
"release": false,
"author": [
{
"name": "John",
"main": 1
},
{
"name": "Jeroge",
"main": 0
},
{
"name": "Peter",
"main": 0
}
]
}
]
},
{
"id": "3",
"story": {
"title": "Lonely lion"
},
"description": [
{
"release": false,
"author": [
{
"name": "John",
"main": 1
},
{
"name": "Jeroge",
"main": 0
}
]
}
]
}
]
duplicatedID.csv:
1-2
The following method I tried but it is not giving correct results:
list= []
duplicate_Id = []
for data in (json_data['books'])[:]:
elements= []
id = data['id']
title = data['story']['title']
elements.append(title)
for i in (data['description'][0]['author']):
name = (i['name'])
elements.append(name)
if not list:
list.append(elements)
else:
for j in list:
if set(elements) == set(j):
duplicate_Id.append(id)
elements = []
The general idea is to:
Get the groups identified by some function that collects duplicates.
Then return the first entry of each group, ensuring no duplicates.
Define the key function as the sorted list of authors and. As the list of authors is by definition the unique key, but may appear in any order.
import json
from itertools import groupby
j = json.load(books)
def transform(books):
groups = [list(group) for _, group in groupby(books, key=getAuthors)]
return [group[0] for group in groups]
def getAuthors(book):
authors = book['description'][0]['author']
return sorted([author['name'] for author in authors])
print(transform(j['books']))
If we wanted to get the duplicates, then we do the same computation, but return any sublist with length > 1 as this is by our definition duplicated data.
def transform(books):
groups = [list(group) for _, group in groupby(books, key=getAuthors)]
return [group for group in groups if len(group) > 1]
Where j['books'] is the JSON you gave enclosed in an object.

Recursive list inside dict? - python

I have started learning Python not long ago, and I'm trying to do recursive function for my list. This is the function:
def get_employee(j, field, value):
res = j
for x in res:
if x['Name'] == field and x['Value'] == value:
return res
elif "Properties" not in x:
if x is not None:
continue
elif "Properties" in x:
return get_employee(x['Properties'], field, value)
And this is my JSON:
[{
"Value": "Sales",
"Name": "Department",
"Properties": [{
"Value": "US",
"Name": "Country",
"Properties": [{
"Value": "Employee",
"Name": "EmployeeType",
"Properties": [{
"Value": "Manya Bishter",
"Name": "EmployeeFullName",
"Properties": [{
"Value": 1111,
"Name": "EmployeeID"
},
{
"Value": "Manya",
"Name": "EmployeeFirstName"
},
{
"Value": "Bishter",
"Name": "EmployeeLastName"
}
]
},
{
"Value": "Michael Ort",
"Name": "EmployeeFullName",
"Properties": [{
"Value": 1112,
"Name": "EmployeeID"
},
{
"Value": "Michael",
"Name": "EmployeeFirstName"
},
{
"Value": "Ort",
"Name": "EmployeeLastName"
}
]
}
]
},
{
"Value": "Manager",
"Name": "EmployeeType",
"Properties": [{
"Value": "Nick Fair",
"Name": "EmployeeFullName",
"Properties": [{
"Value": 1113,
"Name": "EmployeeID"
},
{
"Value": "Nick",
"Name": "EmployeeFirstName"
},
{
"Value": "Fair",
"Name": "EmployeeLastName"
}
]
}]
}
]
}]
},
{
"Value": "Marketing",
"Name": "Department",
"Properties": [{
"Value": "US",
"Name": "Country",
"Properties": [{
"Value": "Employee",
"Name": "EmployeeType",
"Properties": [{
"Value": "Tamta Hiresh",
"Name": "EmployeeFullName",
"Properties": [{
"Value": 1121,
"Name": "EmployeeID"
},
{
"Value": "Tamta",
"Name": "EmployeeFirstName"
},
{
"Value": "Hiresh",
"Name": "EmployeeLastName"
}
]
}]
}]
}]
}
]
The function work only on Manya, but nowhere else.
For example, if I do this:
print(get_employee(myjson, "EmployeeFirstName", "Nick"))
It will print:
[{'Value': 1111, 'Name': 'EmployeeID'}, {'Value': 'Manya', 'Name': 'EmployeeFirstName'}, {'Value': 'Bishter', 'Name': 'EmployeeLastName'}]
But for others (like Nick), it will return None.
Can you please help?
Thanks!
Here is the code:
def get_employee(data, field, value):
result = []
def recursor(j, field, value):
res = j
for x in res:
if x['Name'] == field and x['Value'] == value:
result.append(res)
elif "Properties" not in x:
if x is not None:
continue
elif "Properties" in x:
recursor(x['Properties'], field, value)
recursor(data,field,value)
return result
The problem with your recursive function is that, once it hits return get_employee(x['Properties'], field, value); it will stop the outer for loop for x in res: . Thus it will never run on the next item in your list.

find parent nodes in a tree

I created a JSON example and I would like to input an id and get that item and all parents.
Though my code does work, it doesn't feel pythonic, there must be a better way.
a_list = [
{
"name": {"en": "Canada"},
"id": "CAN",
"children": [
{
"name": {"en": "British Columbia"},
"id": "01",
"children": [
{"name": {"en": "Vancouver"}, "id": "10", "children": []},
{"name": {"en": "Victoria"}, "id": "11", "children": []}
]
},
{
"name": {"en": "Nova Scotia"},
"id": "02",
"children": [
{"name": {"en": "Halifax"}, "id": "13", "children": []}
]
}
],
"name": {"en": "USA"},
"id": "US",
"children": [
{
"name": {"en": "Virgina"},
"id": "VA",
"children": [
{"name": {"en": "Richmond"}, "id": "20", "children": []},
{"name": {"en": "Norfolk"}, "id": "21", "children": []}
]
},
{
"name": {"en": "Maryland"},
"id": "MD",
"children": [
{"name": {"en": "Baltimore"}, "id": "23", "children": []},
{"name": {"en": "Gaithersburg"}, "id": "24", "children": []}
]
}
]
}
]
so I created this function
def find_item_and_parents_in_list(value, items):
item_found = []
for item in items:
if item['id'] == value:
item_found.append(item)
else:
item_list = find_item_and_parents_in_list(value, item['children'])
if item_list:
item_list.append(item)
item_found = item_list
return item_found
if I use it, i.e
items = find_item_in_list("23", a_list)
for item in items:
print(item['id'])
it returns
23
MD
US
but can my find_item_and_parents_in_list be written in a better way? using yield for example
You can use recursion with a generator:
def get_paths(d, _id, c = []):
if d['id'] == _id:
yield c+[_id]
yield from [i for b in d['children'] for i in get_paths(b, _id, c+[d['id']])]
print(list(get_paths(a_list[0], '23'))[0])
Output:
['US', 'MD', '23']

Flatten nested JSON arrays with inherits properties in Python

I have a big json/dictionary with different levels of nested json arrays, I would like to flatten it, and also capture the relationship of the structure,
Part of my json looks like:
{
"name": "root",
"type": "all",
"children": [
{
"name": "properties",
"type": "feature",
"children": [
{
"name": "print",
"type": "feature",
"children": [
{
"name": "graphic print",
"type": "feature",
"inherits": true
},
{
"name": "striped print",
"type": "feature",
"inherits": true,
"children": [
{
"name": "pinstriped",
"type": "feature",
"inherits": true
},
{
"name": "light stripe",
"type": "feature",
"inherits": true
},
{
"name": "wide stripe",
"type": "feature",
"inherits": true
}
]
}
]
}
]
},
{
"name": "colours",
"type": "colour",
"children": [
{
"name": "main colours",
"type": "colour",
"children": [
{
"name": "black",
"type": "colour",
"children": [
{
"name": "light black",
"type": "colour",
"inherits": true
},
{
"name": "blue black",
"type": "colour",
"inherits": true
}
]
},
{
"name": "red",
"type": "colour",
"children": [
{
"name": "bright red",
"type": "colour",
"inherits": true
},
{
"name": "light red",
"type": "colour"
}
]
}
]
}
]
},
{
"name": "genders",
"type": "gender",
"children": [
{
"name": "female",
"type": "gender"
},
{
"name": "male",
"type": "gender"
}
]
}
]
}
The depth of nests is not all the same. I
- want all the nodes (values of "name")
- also want all its parents if the node has "Inherit" key of True value.
Something like:
But if there are better ideas on how to store this data, will be happy to accept as well!
Many Thanks!
I think this should do your need
def parse_dict_of_dict(_dict, _parent = '', ret_dict={}):
_name, _children, _inherit = _dict["name"], _dict.get('children', None), _dict.get('inherits', False)
if _children is not None:
if isinstance(_children, list):
for _child in _children:
parse_dict_of_dict(_child, _name+ ', ' + _parent if _inherit else _name , ret_dict)
ret_dict[ _name] = _parent.strip(' ').strip(',') if _inherit else None
return ret_dict
Can you elaborate more on your output?
OR you can use this function to flatten a nested JSON to a simple JSON.
def parse_dict_of_dict(_dict, _str = ''):
ret_dict = {}
for k, v in _dict.iteritems():
if isinstance(v, dict):
ret_dict.update(parse_dict_of_dict(v, _str= _str+k+'_'))
elif isinstance(v, list):
for index, item in enumerate(v):
if isinstance(item, dict):
ret_dict.update(parse_dict_of_dict(item, _str=_str+k+'_%d_'%(index)))
else:
ret_dict.update({k+'_%d'%(index): item})
else:
try:
ret_dict[_str + k] = str(v)
except Exception as e:
ret_dict[_str + k] = unicode.encode(v, errors='ignore')
return ret_dict

Categories

Resources