How to add data to dictionary from an array conditionally - python

I have dictionaries in a list that already have some data and I want to add a vin number to each brand in these dictionaries.
my_brand_dict = [
{"key": {"Brand": "Tesla", "Date": "20203"}, "Total": 56},
{"key": {"Brand": "Tesla", "Date": "20207"}, "Total": 88},
{"key": {"Brand": "Audi", "Date": "202014"}, "Total": 79},
{"key": {"Brand": "Mercedes", "Date": "20201"}, "Total": 49},
]
my_vins = ["f60a0a", "#2019c0", "#a81b1b", "#468650", "#21248a", "#ff7a00"]
When Brand is Tesla add '#468650'
When Brand is Mercedes add '#2019c0'
When Brand is Toyota add '#21248a'
When Brand is Audi add '#ff7a00'
My expected output:
my_brand_dict = [
{"key": {"Brand": "Tesla", "Date": "20203"}, "Total": 56, "my_vin": "#468650"},
{"key": {"Brand": "Toyota", "Date": "20207"}, "Total": 88, "my_vin": "#21248a"},
{"key": {"Brand": "Audi", "Date": "202014"}, "Total": 79, "my_vin": "#ff7a00"},
{"key": {"Brand": "Mercedes", "Date": "20201"}, "Total": 49, "my_vin": "#2019c0"},
]
Couldn't find anything that matches what I want to achieve
Conditionally add values to dictionary

I would suggest using a dictionary instead of a list for your my_vins so that it maps brands to vins. This way you can easily get corresponding vin.
my_brand_dict = [
{"key": {"Brand": "Tesla", "Date": "20203"}, "Total": 56},
{"key": {"Brand": "Tesla", "Date": "20207"}, "Total": 88},
{"key": {"Brand": "Audi", "Date": "202014"}, "Total": 79},
{"key": {"Brand": "Mercedes", "Date": "20201"}, "Total": 49},
]
my_vins = {
"Mercedes": "#2019c0",
"Tesla": "#468650",
"Toyota": "#21248a",
"Audi": "#ff7a00",
}
for d in my_brand_dict:
brand = d["key"]["Brand"]
vin = my_vins[brand]
d["my_vin"] = vin
print(my_brand_dict)
Then take care of what should happen if a brand doesn't have a vin, You can raise exception or assign a default value.

You can define a dict base Brand & my_vins. Then use the defined dict and change value in-place in the my_brand_dict like the below.
my_vins_dct = {'Tesla' : '#468650',
'Mercedes' : '#2019c0',
'Toyota' : '#21248a',
'Audi' : '#ff7a00'}
my_brand_dict = [
{"key": {"Brand": "Tesla", "Date": "20203"}, "Total": 56},
{"key": {"Brand": "Tesla", "Date": "20207"}, "Total": 88},
{"key": {"Brand": "Audi", "Date": "202014"}, "Total": 79},
{"key": {"Brand": "Mercedes", "Date": "20201"}, "Total": 49},
{"key": {"Brand": "xxxx", "Date": "20201"}, "Total": 49},
]
for dct in my_brand_dict:
# First approach try/except and 'continue'
try :
dct['my_vin'] = my_vins_dct[dct['key']['Brand']]
except KeyError:
continue
# Second approach for adding 'Not Found'
# dct['my_vin'] = my_vins_dct.get(dct['key']['Brand'], 'Brand Not Found')
print(my_brand_dict)
Output:
[
{'key': {'Brand': 'Tesla', 'Date': '20203'}, 'Total': 56, 'my_vin': '#468650'},
{'key': {'Brand': 'Tesla', 'Date': '20207'}, 'Total': 88, 'my_vin': '#468650'},
{'key': {'Brand': 'Audi', 'Date': '202014'}, 'Total': 79, 'my_vin': '#ff7a00'},
{'key': {'Brand': 'Mercedes', 'Date': '20201'}, 'Total': 49, 'my_vin': '#2019c0'},
{'key': {'Brand': 'xxxx', 'Date': '20201'}, 'Total': 49}
]
# Output Second approach
# [
# {'key': {'Brand': 'Tesla', 'Date': '20203'}, 'Total': 56, 'my_vin': '#468650'},
# {'key': {'Brand': 'Tesla', 'Date': '20207'}, 'Total': 88, 'my_vin': '#468650'},
# {'key': {'Brand': 'Audi', 'Date': '202014'}, 'Total': 79, 'my_vin': '#ff7a00'},
# {'key': {'Brand': 'Mercedes', 'Date': '20201'}, 'Total': 49, 'my_vin': '#2019c0'},
# {'key': {'Brand': 'xxxx', 'Date': '20201'}, 'Total': 49, 'my_vin': 'Brand Not Found'}
# ]

my_brand_dict = [{'key': {'Brand': 'Tesla', 'Date': '20203'}, 'Total': 56}, {'key': {'Brand': 'Tesla', 'Date': '20207'}, 'Total': 88},
{'key': {'Brand': 'Audi', 'Date': '202014'}, 'Total': 79}, {'key': {'Brand': 'Mercedes', 'Date': '20201'}, 'Total': 49}]
my_vins = ['f60a0a', '#2019c0', '#a81b1b', '#468650', '#21248a', '#ff7a00']
# When Brand Tesla add '#468650'
# When Brand Mercedes add '#2019c0'
# When Brand Toyota add '#21248a'
# When Brand Audi add '#ff7a00'
for item in my_brand_dict:
if item['key']['Brand'] == 'Tesla':
item['my_vin'] = '#468650'
elif item['key']['Brand'] == 'Mercedes':
item['my_vin'] = '#2019c0'
elif item['key']['Brand'] == 'Toyota':
item['my_vin'] = '#21248a'
elif item['key']['Brand'] == 'Audi':
item['my_vin'] = '#ff7a00'
print(my_brand_dict)
This code works for me

Related

How to convert list of multiple jsons into individual jsons using pyspark

I have a list of jsons as mentioned below
[
{
"files": 0,
"data": [
{"name": "RFC", "value": "XXXXXXX", "attId": 01},
{"name": "NOMBRE", "value": "JOSE", "attId": 02},
{"name": "APELLIDO PATERNO", "value": "MONTIEL", "attId": 03},
{"name": "APELLIDO MATERNO", "value": "MENDOZA", "attId": 04},
{"name": "FECHA NACIMIENTO", "value": "1989-02-04", "attId": 05}
],
"dirId": 1,
"docId": 4,
"structure": {
"name": "personales",
"folioId": 22
}
},
{
"files": 0,
"data": [
{"name": "CALLE", "value": "AMOR", "attId": 06},
{"name": "No. EXTERIOR", "value": "4", "attId": 07},
{"name": "No. INTERIOR", "value": "2", "attId": 08},
{"name": "C.P.", "value": "55060", "attId": 09},
{"name": "ENTIDAD", "value": "ESTADO DE MEXICO", "attId": 10},
{"name": "MUNICIPIO", "value": "ECATEPEC", "attId": 11},
{"name": "COLONIA", "value": "INDUSTRIAL", "attId": 12}
],
"dirId": 1,
"docId": 4,
"structure": {
"name": "direccion",
"folioId": 22
}
}
]
I need to convert this list of jsons into separate individual jsons and execute them separately.
how to achieve this using pyspark or python?
Something like this, just one loop should work;
PN: There are few leading zeros in your input raw data...
# Assuming given list in your question is stored in lst
for item in lst:
print(item,"\n")
# item here will behave as individual dict/json
# Do your coding here as required
# Output for items;
{'files': 0, 'data': [{'name': 'RFC', 'value': 'XXXXXXX', 'attId': '01'}, {'name': 'NOMBRE', 'value': 'JOSE', 'attId': '02'}, {'name': 'APELLIDO PATERNO', 'value': 'MONTIEL', 'attId': '03'}, {'name': 'APELLIDO MATERNO', 'value': 'MENDOZA', 'attId': '04'}, {'name': 'FECHA NACIMIENTO', 'value': '1989-02-04', 'attId': '05'}], 'dirId': 1, 'docId': 4, 'structure': {'name': 'personales', 'folioId': 22}}
{'files': 0, 'data': [{'name': 'CALLE', 'value': 'AMOR', 'attId': '06'}, {'name': 'No. EXTERIOR', 'value': '4', 'attId': '07'}, {'name': 'No. INTERIOR', 'value': '2', 'attId': '08'}, {'name': 'C.P.', 'value': '55060', 'attId': '09'}, {'name': 'ENTIDAD', 'value': 'ESTADO DE MEXICO', 'attId': '10'}, {'name': 'MUNICIPIO', 'value': 'ECATEPEC', 'attId': '11'}, {'name': 'COLONIA', 'value': 'INDUSTRIAL', 'attId': '12'}], 'dirId': 1, 'docId': 4, 'structure': {'name': 'direccion', 'folioId': 22}}

Remove duplicates from list of dictionaries created using groupby itertools in Python

I want to remove some duplicates in my merged dictionary.
My data:
mongo_data = [{
'url': 'https://goodreads.com/',
'variables': [{'key': 'Harry Potter', 'value': '10.0'},
{'key': 'Discovery of Witches', 'value': '8.5'},],
'vendor': 'Fantasy'
},{
'url': 'https://goodreads.com/',
'variables': [{'key': 'Hunger Games', 'value': '10.0'},
{'key': 'Maze Runner', 'value': '5.5'},],
'vendor': 'Dystopia'
},{
'url': 'https://kindle.com/',
'variables': [{'key': 'Divergent', 'value': '9.0'},
{'key': 'Lord of the Rings', 'value': '9.0'},],
'vendor': 'Fantasy'
},{
'url': 'https://kindle.com/',
'variables': [{'key': 'The Handmaids Tale', 'value': '10.0'},
{'key': 'Divergent', 'value': '9.0'},],
'vendor': 'Fantasy'
}]
My code:
for key, group in groupby(mongo_data, key=lambda chunk: chunk['url']):
search = {"url": key, "results": []}
for vendor, group2 in groupby(group, key=lambda chunk2: chunk2['vendor']):
result = {
"genre": vendor,
"data": [{'key': key['key'], 'value': key['value']}
for result2 in group2
for key in result2["variables"]],
}
search["results"].append(result)
searches.append(search)
My result:
[
{
"url": "https://goodreads.com/",
"results": [
{
"genre": "Fantasy",
"data": [
{
"key": "Harry Potter",
"value": "10.0"
},
{
"key": "Discovery of Witches",
"value": "8.5"
}
]
},
{
"genre": "Dystopia",
"data": [
{
"key": "Hunger Games",
"value": "10.0"
},
{
"key": "Maze Runner",
"value": "5.5"
}
]
}
]
},
{
"url": "https://kindle.com/",
"results": [
{
"genre": "Fantasy",
"data": [
{
"key": "Divergent",
"value": "9.0"
},
{
"key": "Lord of the Rings",
"value": "9.0"
},
{
"key": "The Handmaids Tale",
"value": "10.0"
},
{
"key": "Divergent",
"value": "9.0"
}
]
}
}
]
}
]
I do not want any duplicates in my structure. I am not sure on how to take them out. My expected result can be seen below.
Expected result:
[
{
"url": "https://goodreads.com/",
"results": [
{
"genre": "Fantasy",
"data": [
{
"key": "Harry Potter",
"value": "10.0"
},
{
"key": "Discovery of Witches",
"value": "8.5"
}
]
},
{
"genre": "Dystopia",
"data": [
{
"key": "Hunger Games",
"value": "10.0"
},
{
"key": "Maze Runner",
"value": "5.5"
}
]
}
]
},
{
"url": "https://kindle.com/",
"results": [
{
"genre": "Fantasy",
"data": [
{
"key": "Divergent",
"value": "9.0"
},
{
"key": "Lord of the Rings",
"value": "9.0"
},
{
"key": "The Handmaids Tale",
"value": "10.0"
}
]
}
}
]
}
]
Divergent is getting repeated in the last list of dictionaries. When I merged my dictionaries even the duplicates inside https://kindle.com/-->Fantasy got merged into one. Is there a way for me to remove the duplicate dictionary?
I want the https://kindle.com/ part to look like:
{
"url": "https://kindle.com/",
"results": [
{
"genre": "Fantasy",
"data": [
{
"key": "Divergent",
"value": "9.0"
},
{
"key": "Lord of the Rings",
"value": "9.0"
},
{
"key": "The Handmaids Tale",
"value": "10.0"
}
]
}
}
]
}
You can try convert those dict to a set of tuple first and then convert back to a list of dict later:
mongo_data = [{
'url': 'https://goodreads.com/',
'variables': [{'key': 'Harry Potter', 'value': '10.0'},
{'key': 'Discovery of Witches', 'value': '8.5'},],
'vendor': 'Fantasy'
},{
'url': 'https://goodreads.com/',
'variables': [{'key': 'Hunger Games', 'value': '10.0'},
{'key': 'Maze Runner', 'value': '5.5'},],
'vendor': 'Dystopia'
},{
'url': 'https://kindle.com/',
'variables': [{'key': 'Divergent', 'value': '9.0'},
{'key': 'Lord of the Rings', 'value': '9.0'},],
'vendor': 'Fantasy'
},{
'url': 'https://kindle.com/',
'variables': [{'key': 'The Handmaids Tale', 'value': '10.0'},
{'key': 'Divergent', 'value': '9.0'},],
'vendor': 'Fantasy'
}]
from itertools import groupby
searches = []
for key, group in groupby(mongo_data, key=lambda chunk: chunk['url']):
search = {"url": key, "results": []}
for vendor, group2 in groupby(group, key=lambda chunk2: chunk2['vendor']):
result = {
"genre": vendor,
"data": set((key['key'], key['value'])
for result2 in group2
for key in result2["variables"]),
}
result['data'] = [{"key": tup[0], "value": tup[1]} for tup in result['data']]
search["results"].append(result)
searches.append(search)
searches
Output:
[{'results': [{'data': [{'key': 'Harry Potter', 'value': '10.0'},
{'key': 'Discovery of Witches', 'value': '8.5'}],
'genre': 'Fantasy'},
{'data': [{'key': 'Maze Runner', 'value': '5.5'},
{'key': 'Hunger Games', 'value': '10.0'}],
'genre': 'Dystopia'}],
'url': 'https://goodreads.com/'},
{'results': [{'data': [{'key': 'The Handmaids Tale', 'value': '10.0'},
{'key': 'Lord of the Rings', 'value': '9.0'},
{'key': 'Divergent', 'value': '9.0'}],
'genre': 'Fantasy'}],
'url': 'https://kindle.com/'}]

Merge value item and relevant infos from its "parent"

I am trying to merge the parent elements with each value item.
The JSON code has the following format:
[
{"id": "1",
"name": "a",
"values": [
{"ts": 111,
"speed": 12
},
{"ts": 112,
"speed": 8
},
]},
{"id": "2",
"name": "b",
"values": [
{"ts": 113,
"speed": 10
},
{"ts": 114,
"speed": 7
},
]}
In the end, the results should look as follows:
[{"id": "1", "name": "a", "ts": 111, "speed": 12},
{"id": "1", "name": "a", "ts": 112, "speed": 8},
{"id": "2", "name": "b", "ts": 113, "speed": 10},
{"id": "2", "name": "b", "ts": 114, "speed": 7}]
My idea was to use two loops. One that loops through all entries and one that loops through "values".
for entry in data:
for value in entry["values"]:
# a = entry without "values"
# a.update(value)
# print(a)
However, here I have the following problem. How can I get all the values of my entries except "values". I tried to delete "values" from a, however, this resulted in KeyError: 'values'
Furthermore, I am not sure if this is actually a good solution to my problem.
I am using python version 3.6.3.
Thanks a lot in advance for any suggestions!
You can build a new list with a nested comprehension to pull out any values you need:
newList = [{'id': d['id'],'name': d['name'], **v} for d in l for v in d['values']]
newList will be:
[{'id': '1', 'name': 'a', 'ts': 111, 'speed': 12},
{'id': '1', 'name': 'a', 'ts': 112, 'speed': 8},
{'id': '2', 'name': 'b', 'ts': 113, 'speed': 10},
{'id': '2', 'name': 'b', 'ts': 114, 'speed': 7}]

Group python list of dictionaries by values

Given I have a list of dictionaries I want to create new list of lists grouping the dictionaries by the values of "price":
dicts = [
{ "name": "item1", "price": 10 },
{ "name": "item2", "price": 5 },
{ "name": "item3", "price": 10 },
{ "name": "item4", "price": 12 },
{ "name": "item5", "price": 12 },
{ "name": "item6", "price": 5 }
]
Should create:
grouped_dicts = [
[{ "name": "item1", "price": 10 },
{ "name": "item3", "price": 10 }],
[{ "name": "item2", "price": 5 },
{ "name": "item6", "price": 5 }],
[{ "name": "item4", "price": 12 },
{ "name": "item5", "price": 12 }]
]
Is there a nice way of doing this?
Thanks
As an alternative to the answer above, here is how you do it without an additional import:
d = {}
for item in dicts:
d.setdefault(item['price'], []).append(item)
list(d.values())
Out:
[[{'name': 'item1', 'price': 10}, {'name': 'item3', 'price': 10}],
[{'name': 'item2', 'price': 5}, {'name': 'item6', 'price': 5}],
[{'name': 'item4', 'price': 12}, {'name': 'item5', 'price': 12}]]
You could use deaultdict
from collections import defaultdict
d=defaultdict(list)
for item in dicts:
d[list(item.values())[-1]].append(item)
Output:
defaultdict(list,
{10: [{'name': 'item1', 'price': 10},
{'name': 'item3', 'price': 10}],
5: [{'name': 'item2', 'price': 5}, {'name': 'item6', 'price': 5}],
12: [{'name': 'item4', 'price': 12},
{'name': 'item5', 'price': 12}]})
If you just need the list then just extract the values of defaultdict
list(d.values())
Output:
[[{'name': 'item1', 'price': 10}, {'name': 'item3', 'price': 10}],
[{'name': 'item2', 'price': 5}, {'name': 'item6', 'price': 5}],
[{'name': 'item4', 'price': 12}, {'name': 'item5', 'price': 12}]]

Python, reorganize array of dicts

To be honest, it's too easy for me to make in JS or Perl, but i've completely stuck with that in Python because of coplexed tools for dealing with dicts/lists. So, what i need:
i have an array of dicts:
[
{"id": 1, "name": "Res1", "type": "resource", "k_name": "Ind1_1", "k_id": 4},
{"id": 1, "name": "Res1", "type": "resource", "k_name": "Ind1_2", "k_id": 5},
{"id": 1, "name": "Res1", "type": "resource", "k_name": "Ind1_3", "k_id": 6},
{"id": 2, "name": "Res2", "type": "service", "k_name": "Ind2_1", "k_id": 7},
{"id": 2, "name": "Res2", "type": "service", "k_name": "Ind2_2", "k_id": 8},
{"id": 2, "name": "Res2", "type": "service", "k_name": "Ind2_3", "k_id": 9},
{"id": 2, "name": "Res2", "type": "service", "k_name": "Ind2_4", "k_id": 10},
{"id": 3, "name": "Res3", "type": "service", "k_name": "Ind3_1", "k_id": 11},
{"id": 3, "name": "Res3", "type": "service", "k_name": "Ind3_2", "k_id": 12},
{"id": 3, "name": "Res3", "type": "service", "k_name": "Ind3_3", "k_id": 13},
{"id": 3, "name": "Res3", "type": "service", "k_name": "Ind3_4", "k_id": 14}
]
and i need to make that:
[
{
"id": 1,
"name": "Res1",
"type": "resource",
"indicators": [
{"name": "Ind1_1","id": 4},
{"name": "Ind1_2","id": 5},
{"name": "Ind1_3","id": 6}
]
},
{
"id": 2,
"name": "Res2",
"type": "service",
"indicators": [
{"name": "Ind2_1","id": 7},
{"name": "Ind2_2","id": 8},
{"name": "Ind2_3","id": 9},
{"name": "Ind2_4","id": 10}
]
},
{
"id": 3,
"name": "Res3",
"type": "service",
"indicators": [
{"name": "Ind3_1","id": 11},
{"name": "Ind3_2","id": 12},
{"name": "Ind3_3","id": 13},
{"name": "Ind3_4","id": 14}
]
}
]
Can you help me with that?
itertools to the rescue:
import itertools
# Assuming your original list is `l`
# if it does not come in order, you need to do this line first, and will probably be less efficient.
l = sorted(l, key=lambda x:(x["id"], x["name"], x["type"]))
d = []
for k, g in itertools.groupby(l, lambda x: (x["id"], x["name"], x["type"])):
d.append({i:v for i, v in zip(["id", "name", "type"], k)})
d[-1]["indicator"] = [{y.split('_')[1]:e[y] for y in ["k_id", "k_name"]} for e in list(g)]
d becomes:
[{'id': 1,
'indicator': [{'id': 4, 'name': 'Ind1_1'},
{'id': 5, 'name': 'Ind1_2'},
{'id': 6, 'name': 'Ind1_3'}],
'name': 'Res1',
'type': 'resource'},
{'id': 2,
'indicator': [{'id': 7, 'name': 'Ind2_1'},
{'id': 8, 'name': 'Ind2_2'},
{'id': 9, 'name': 'Ind2_3'},
{'id': 10, 'name': 'Ind2_4'}],
'name': 'Res2',
'type': 'service'},
{'id': 3,
'indicator': [{'id': 11, 'name': 'Ind3_1'},
{'id': 12, 'name': 'Ind3_2'},
{'id': 13, 'name': 'Ind3_3'},
{'id': 14, 'name': 'Ind3_4'}],
'name': 'Res3',
'type': 'service'}]
You can use a mapping dict to map ids to corresponding sub-lists, so that as you iterate through the list (named l in this example), you can append a new entry to the output list if the id is not found in the mapping, or append the entry to the existing sub-list if id is found in the mapping:
mapping = {}
output = []
for d in l:
i = {'name': d.pop('k_name'), 'id': d.pop('k_id')}
if d['id'] in mapping:
mapping[d['id']].append(i)
else:
output.append({**d, 'indicators': [i]})
mapping[d['id']] = output[-1]['indicators']
output becomes:
[{'id': 1, 'name': 'Res1', 'type': 'resource', 'indicators': [{'name': 'Ind1_1', 'id': 4}, {'name': 'Ind1_2', 'id': 5}, {'name': 'Ind1_3', 'id': 6}]}, {'id': 2, 'name': 'Res2', 'type': 'service', 'indicators': [{'name': 'Ind2_1', 'id': 7}, {'name': 'Ind2_2', 'id': 8}, {'name': 'Ind2_3', 'id': 9}, {'name': 'Ind2_4', 'id': 10}]}, {'id': 3, 'name': 'Res3', 'type': 'service', 'indicators': [{'name': 'Ind3_1', 'id': 11}, {'name': 'Ind3_2', 'id': 12}, {'name': 'Ind3_3', 'id': 13}, {'name': 'Ind3_4', 'id': 14}]}]

Categories

Resources