Find all occurrence of keys in nested python dict - python

I have a dictionary like this:
a = {'compatibility': {'schema': ['attribute_variables/evar44',
'event42',
'container_visitors'],
'status': 'valid',
'supported_features': ['function_and',
'function_attr',
'function_container',
'function_event',
'function_event-exists',
'function_streq'],
'supported_products': ['o', 'data_warehouse', 'discover'],
'supported_schema': ['warehouse', 'n'],
'validator_version': '1.1.11'},
'definition': {'container': {'context': 'visitors',
'func': 'container',
'pred': {'func': 'and',
'preds': [{'description': 'e42',
'evt': {'func': 'event', 'name': 'metrics/event42'},
'func': 'event-exists'},
{'description': 'v44',
'func': 'streq',
'str': '544',
'val': {'func': 'attr', 'name': 'variables/evar44'}}]}},
'func': 'segment',
'version': [1, 0, 0]},
'description': '',
'id': 's2165c30c946ebceb',
'modified': '12',
'name': 'Apop',
'owner': {'id': 84699, 'login': 'max', 'name': 'Max'},
'reportSuiteName': 'App',
'rsid': 'test',
'siteTitle': 'App',
'tags': []}
I would like to extract the values of every key "description", "func", and "str"/"num" and return these values in one DataFrame of these dict.
I tried it with this code, but I wasn´t able to get every value und struggeld to put the values in one DataFrame.
def findkeys(node, kv):
if isinstance(node, list):
for i in node:
for x in findkeys(i, kv):
yield x
elif isinstance(node, dict):
if kv in node:
yield node[kv]
for j in node.values():
for x in findkeys(j, kv):
yield x
For my example the output I would like to have:
pd.DataFrame(np.array([['e42', 'event', 'NaN'], ['v44', 'streq', '544']]),
columns=['description', 'funk', 'str/num'])

The code below collect the values of the "interesting" keys into a dict.
from collections import defaultdict
a = {'compatibility': {'schema': ['attribute_variables/evar44',
'event42',
'container_visitors'],
'status': 'valid',
'supported_features': ['function_and',
'function_attr',
'function_container',
'function_event',
'function_event-exists',
'function_streq'],
'supported_products': ['o', 'data_warehouse', 'discover'],
'supported_schema': ['warehouse', 'n'],
'validator_version': '1.1.11'},
'definition': {'container': {'context': 'visitors',
'func': 'container',
'pred': {'func': 'and',
'preds': [{'description': 'e42',
'evt': {'func': 'event', 'name': 'metrics/event42'},
'func': 'event-exists'},
{'description': 'v44',
'func': 'streq',
'str': '544',
'val': {'func': 'attr', 'name': 'variables/evar44'}}]}},
'func': 'segment',
'version': [1, 0, 0]},
'description': '',
'id': 's2165c30c946ebceb',
'modified': '12',
'name': 'Apop',
'owner': {'id': 84699, 'login': 'max', 'name': 'Max'},
'reportSuiteName': 'App',
'rsid': 'test',
'siteTitle': 'App',
'tags': []}
def walk_dict(d, interesting_keys, result, depth=0):
for k, v in sorted(d.items(), key=lambda x: x[0]):
if isinstance(v, dict):
walk_dict(v, interesting_keys, result, depth + 1)
elif isinstance(v,list):
for entry in v:
if isinstance(entry, dict):
walk_dict(entry, interesting_keys, result, depth + 1)
else:
if k in interesting_keys:
result[k].append(v)
result = defaultdict(list)
walk_dict(a, ["description", "func", "str", "num"], result)
print(result)
output
defaultdict(<class 'list'>, {'func': ['container', 'and', 'event', 'event-exists', 'streq', 'attr', 'segment'], 'description': ['e42', 'v44', ''], 'str': ['544']})

Related

python according to the same value combining dictionary

i have a list of dict like this
[
{'id': 'A123',
'feature': {'name': 'jack', 'age' : '18' },
'create_time': '2022-5-17 10:29:47',
'is_fast': False},
{'id': 'A123',
'feature': {'gender': 'male'},
'create_time': '2022-5-17 10:29:47',
'is_fast': False},
{'id': 'A123',
'habit': {'name': 'read'},
'create_time': '2022-5-15 10:29:45',
'is_fast': False},
{'id': 'A456',
'feature': {'name': 'rose'},
'create_time': '2022-4-15 10:29:45',
'is_fast': False},
{'id': 'A456',
'habit': {'name': 'sport'},
'create_time': '2022-3-15 10:29:45',
'is_fast': False}
]
But I want to merge the same "id" values ​​together using something function
The desired output is as follows
[
{'id': 'A123',
'feature': {'name': 'jack', 'age' : '18' ,'gender': 'male'},
'habit': {'name': 'read'},
'create_time': '2022-5-19 10:29:47', #Get the latest time based on the same id
'is_fast': False},
{'id': 'A456',
'feature': {'name': 'rose'},
'habit': {'name': 'sport'},
'create_time': '2022-4-15 10:29:45',
'is_fast': False},
]
How can I merge the same "id" values ​​according to these dictionaries..
This should get you started... I put some inline notes to explain what the code is doing. You still need to implement a date time comparison.
def merge_dicts(lst):
final = {} # results
for row in lst: # iterate through list
if row['id'] not in final: # if current item id hasnt been seen
final[row['id']] = row # assign it to results with id as the key
else:
record = final[row['id']] # otherwise compare to data already stored
for k,v in row.items(): #iterate through dictionary items
if k not in record: # if key not in results
record[k] = v # add the key and value
continue
if record[k] == v: continue # if they are already equal move on
if isinstance(v, dict): # if its a dictionary
record[k].update(v) # update the dictionary
else: # must be date time sequence so do some datetime comparison
"""Do some date comparison and assign correct date"""
return [v for k,v in final.items()] # convert to list
print(merge_dicts(lst))
output:
[
{
'id': 'A123',
'feature': {'name': 'jack', 'age': '18', 'gender': 'male'},
'create_time': '2022-5-17 10:29:47',
'is_fast': False,
'habit': {'name': 'read'}
},
{
'id': 'A456',
'feature': {'name': 'rose'},
'create_time': '2022-4-15 10:29:45',
'is_fast': False,
'habit': {'name': 'sport'}
}
]
You can use the dict.setdefault method to initialize sub-dicts under keys that don't already exist to avoid cluttering up your code with conditional statements that test the existence of keys:
merged = {}
for d in lst:
s = merged.setdefault(d['id'], d)
for k, v in d.items():
if isinstance(v, dict):
s.setdefault(k, v).update(v)
elif v > s[k]: # the dates/times in the input follow alphabetical order
s[k] = v # later dates/times takes precedence
print(list(merged.values()))
Demo: https://replit.com/#blhsing/BlandCarelessPolygons#main.py

Merge Lists in Dictionary and remove duplicates

I only found questions where people wanted to merge lists into dictionaries or merge dictionaries but not merge lists that are already in a dictionary
Lets say I have a Dictionary having following structure
myDict= {
'key1': [{'description': 'some description', 'listwithstrings': ['somestring1'], 'number': '1'}, {'listwithstrings': ['somestring1', 'somestring2'], 'description': 'some other description', 'number': '1'}],
'key2': [{'listwithstrings': ['somestring4'], 'description': "some different description, 'number': '2'}, {'number': '2', 'listwithstrings': ['somestring5'], 'description': 'some different description'}],
'key3': [{'number': '3', 'listwithstrings': ['somestring7', 'somestring8'], 'description': 'only one entry'}]
}
now I want to merge the entries in the dictionary from each key for itself and remove the duplicates. I don't know for each key whether it has multiple entries (it can have more than two, too) or just one, so I can't use the key as a condition like number==1
Resulting in
myCleanedDict= {
'key1': [{'description': ['some description', 'some other description'], 'listwithstrings': ['somestring1', 'somestring2'], 'number': '1'}],
'key2': [{'listwithstrings': ['somestring4', 'somestring5'], 'description': 'some different description', 'number': '2'}],
'key3': [{'number': '3', 'listwithstrings': ['somestring7', 'somestring8'], 'description': 'only one entry'}]
}
myDict = {
'key1': [
{
'description': 'some description',
'listwithstrings': ['somestring1'],
'number': '1'
},
{
'listwithstrings': ['somestring1', 'somestring2'],
'description': 'some other description',
'number': '1'
}
],
'key2': [
{
'listwithstrings': ['somestring4'],
'description': 'some different description',
'number': '2'
},
{
'number': '2',
'listwithstrings': ['somestring5'],
'description': 'some different description'
}
],
'key3': [
{
'number': '3',
'listwithstrings': ['somestring7', 'somestring8'],
'description': 'only one entry'
}
]
}
newDict = {}
for k, v in myDict.items():
if len(v) == 0: continue
target = v[0]
for k in target:
if not isinstance(target[k], list):
target[k] = [target[k]]
for i in range(1, len(v)):
for k, v in v[i].items():
if isinstance(v, list):
target[k] += v
else:
target[k].append(v)
target[k] = list(set(target[k]))
for k in target:
if len(target[k]) == 1:
target[k] = target[k][0]
newDict[k] = [target]
print(newDict)

Why only last element is printing while using function [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 2 years ago.
Improve this question
I need to get the value for business' name and append it to a list.
I need to get the value policies and append to a list after checking parent.
if parent is Marketing name has to added to level1.
if parent is Advertising name has to added to level2.
if some place Business is [] I need to pass None instead of Null List
Also need to check key exists or not for some keys there is a chance of missing policies, business
dictionary is below
If in the list contains same elements example 'Business':['Customer', Customer] then only one element has to take
searchtest = [
{'_index': 'newtest',
'_type': '_doc',
'_id': '100',
'_score': 1.0,
'_source': {'id': '100',
'name': 'A',
'policies': [
{
'id': '332',
'name': 'Second division',
'parent': 'Marketing'},
{'id': '3323', 'name':
'First division',
'parent': 'Marketing'}
]
}
},
{'_index': 'newtest',
'_type': '_doc',
'_id': '101',
'_score': 1.0,
'_source': {
'id': '101',
'name': 'B',
'Business': [{'id': '9'}, {'id': '10', 'name': 'Customer'}],
'policies': [{'id': '332', 'name': 'Second division', 'parent': 'Marketing'}, {'id': '3323', 'name': 'First division', 'parent': 'Advertising'}]}}]`
Code is below
def business(searchtest):
for el in searchtest:
Business_List = []
if 'Business' in el['_source']:
for j in el['_source']['Business']:
if 'name' in j:
Business_List.append(j['name'])
else:
Business_List.extend([])
return Business_List
def policy(searchtest):
for el in searchtest:
level1= []
if 'policies' in el['_source']:
for j in el['_source']['policies']:
if 'parent' in j:
if 'Marketing' in j['parent'] :
level1.append(j['name'])
else:
level1.extend([])
level2= []
if 'policies' in el['_source']:
for j in el['_source']['policies']:
if 'parent' in j:
if 'Advertising' in j['parent']:
level2.append(j['name'])
else:
level2.extend([])
return [level1, level2]
def data_product(searchtest):
resp = []
for el in searchtest:
d = {
'id' : el['_source']['id'],
'name' : el['_source']['name'],
'Business' : business(searchtest),
'level1' : policy(searchtest)[0],
'level2' : policy(searchtest)[1]
}
resp.append(d)
return resp
if __name__ == "__main__":
import pprint
pp = pprint.PrettyPrinter(4)
pp.pprint(data_product(searchtest))
My output
[ { 'Business': [],
'id': '101',
'level1': ['Second division'],
'level2': ['First division'],
'name': 'B'}]
Expected out
[ { 'Business': [],
'id': '100',
'level1': ['Second division','First division'],
'level2': [],
'name': 'A'},
{ 'Business': ['Customer'],
'id': '101',
'level1': ['Second division'],
'level2': ['First division'],
'name': 'B'}]
if resp.append(d) is put inside the loop then only one id is repeating?
my whole code with change
searchtest = [{'_index': 'newtest',
'_type': '_doc',
'_id': '100',
'_score': 1.0,
'_source': {'id': '100',
'name': 'A',
'policies': [{'id': '332',
'name': 'Second division',
'parent': 'Marketing'},
{'id': '3323', 'name': 'First division', 'parent': 'Marketing'}]}},
{'_index': 'newtest',
'_type': '_doc',
'_id': '101',
'_score': 1.0,
'_source': {'id': '101',
'name': 'B',
'Business': [{'id': '9'}, {'id': '10', 'name': 'Customer'}],
'policies': [{'id': '332',
'name': 'Second division',
'parent': 'Marketing'},
{'id': '3323', 'name': 'First division', 'parent': 'Advertising'}]}}]
def business(el):
Business_List = []
# for el in searchtest:
if 'Business' in el['_source']:
for j in el['_source']['Business']:
if 'name' in j:
Business_List.append(j['name'])
else:
Business_List.extend([])
return Business_List
def policy(searchtest):
for el in searchtest:
level1 = []
if 'policies' in el['_source']:
for j in el['_source']['policies']:
if 'parent' in j:
if 'Marketing' in j['parent']:
level1 .append(j['name'])
else:
level1 .extend([])
level2 = []
if 'policies' in el['_source']:
for j in el['_source']['policies']:
if 'parent' in j:
if 'Advertising' in j['parent']:
level2.append(j['name'])
else:
level2.extend([])
return [level1, level1 ]
def data_product(searchtest):
resp = []
for el in searchtest:
d = {
'id': el['_source']['id'],
'name': el['_source']['name'],
'Business': business(el),
'level1': policy(searchtest)[0],
'level2': policy(searchtest)[1]
}
resp.append(d)
return resp
if __name__ == "__main__":
import pprint
pp = pprint.PrettyPrinter(4)
pp.pprint(data_product(searchtest))
output:
[ { 'Business': [],
'id': '100',
'level1': ['Second division'],
'level2': ['First division'],
'name': 'A'},
{ 'Business': ['Customer'],
'id': '101',
'level1': ['Second division'],
'level2': ['First division'],
'name': 'B'}]

add key value in nested dictionary

datainput = {'thissong-fav-user:type1-chan-44-John': [{'Song': 'Rock',
'Type': 'Hard',
'Price': '10'}],
'thissong-fav-user:type1-chan-45-kelly-md': [{'Song': 'Rock',
'Type': 'Soft',
'Price': '5'}]}
Outputrequired:
{'thissong-fav-user:type1-chan-44-John': [{key:'Song',Value:'Rock'},
{key:'Type', Value:'Hard'},
{Key: 'Price', Value:'10'}],
'thissong-fav-user:type1-chan-45-kelly-md': [{key:'Song',Value:'Rock'},
{key:'Type', Value:'Soft'},
{Key: 'Price', Value:'5'}]}
I started with below, which gives me an inner nested pattern not sure how I can get the desired output.
temps = [{'Key': key, 'Value': value} for (key, value) in datainput.items()]
Here is how:
datainput = {'thissong-fav-user:type1-chan-44-John': [{'Song': 'Rock',
'Type': 'Hard',
'Price': '10'}],
'thissong-fav-user:type1-chan-45-kelly-md': [{'Song': 'Rock',
'Type': 'Soft',
'Price': '5'}]}
temps = {k:[{'Key':a, 'Value':b}
for a,b in v[0].items()]
for k,v in datainput.items()}
print(datainput)
Output:
{'thissong-fav-user:type1-chan-44-John': [{'Key': 'Song', 'Value': 'Rock'},
{'Key': 'Type', 'Value': 'Hard'},
{'Key': 'Price', 'Value': '10'}],
'thissong-fav-user:type1-chan-45-kelly-md': [{'Key': 'Song', 'Value': 'Rock'},
{'Key': 'Type', 'Value': 'Soft'},
{'Key': 'Price', 'Value': '5'}]}
I believe the way of having taken the input is fine but in order to get the desired output, you got to take the inputs initially, then key-value pair and finally iterate.
datainput = {'thissong-fav-user:type1-chan-44-John': [{'Song': 'Rock',
'Type': 'Hard',
'Price': '10'}],
'thissong-fav-user:type1-chan-45-kelly-md': [{'Song': 'Rock',
'Type': 'Soft',
'Price': '5'}]}
datainput = {k:[{'Key':a, 'Value':b} for a,b in v[0].items()] for k,v in datainput.items()}
print(datainput)
Most probably, you'll get the desired output in this fashion.

python - dictionary - update text values of keys - setting an priority (max principle)

I have the following strings as values for a dictionary key:
["low", "middle", "high", "very high"]
These are the options for the dicionary item key 'priority', a sample dict element is:
{'name': 'service', 'priority': value}
My task is to collect a list of dictionaries with the keys, all differ in the key value 'priority'.
my_list = [{'name': 'service', 'priority': 'low'}, {'name': 'service', 'priority': 'high'}]
In the end a final dictionary item should exist, that has the highest priority value. It should work like the maximum principle. In this case {'name': 'service', 'priority': 'high'} would be the result.
The problem is that the value is a string, not an integer.
Thanks for all ideas to get it work.
Here is the approach with itertools module usage:
# Step 0: prepare data
score = ["low", "middle", "high", "very high"]
my_list = [{'name': 'service', 'priority': 'low', 'label1':'text'}, {'name': 'service', 'priority': 'middle', 'label2':'text'}, {'name': 'service_b', 'priority': 'middle'}, {'name': 'service_b', 'priority': 'very high'}]
my_list # to just show source data in list
Out[1]:
[{'name': 'service', 'priority': 'low', 'label1': 'text'},
{'name': 'service', 'priority': 'middle', 'label2': 'text'},
{'name': 'service_b', 'priority': 'middle'},
{'name': 'service_b', 'priority': 'very high'}]
# Step 0.5: convert bytes-string (if it is) to string
# my_list = [{k:(lambda x: (x.decode() if type(x) == bytes else x))(v) for k,v in i.items()} for i in my_list ]
# Step 1: reorganize "score"-list on most useful way - to dict
score_dic = {i[0]:i[1] for i in list(zip(score, range(len(score))))}
score_dic
Out[2]:
{'low': 0, 'middle': 1, 'high': 2, 'very high': 3}
# Step 2: get result
import itertools
[max(list(g), key = lambda b: score_dic[b['priority']]) for k,g in itertools.groupby(my_list, lambda x:x['name'])]
Out[3]:
[{'name': 'service', 'priority': 'middle', 'label2': 'text'},
{'name': 'service_b', 'priority': 'very high'}]
Is this what you want?
priorities = ["low", "middle", "high", "very high"]
items = [{'name': 'service', 'priority': 'high'}, {'name': 'service2', 'priority': 'high'}, {'name': 'service', 'priority': 'very high'}, {'name': 'service2', 'priority': 'very high'}]
max_priority = max(items, key=lambda item: priorities.index(item['priority']))['priority']
max_items = [item for item in items if item['priority'] == max_priority]
print(max_items)
Output:
[{'name': 'service', 'priority': 'very high'}, {'name': 'service2', 'priority': 'very high'}]

Categories

Resources