Python groupby/convert join table to triple nested dictionary

Python groupby/convert join table to triple nested dictionary - python

From a SQL stored procedure that performs a join on 3 tables I get the data below.
data = [
{"so_number": "ABC", "po_status": "OPEN", "item_id": 0, "part_number": "XTZ", "ticket_id": 10, "ticket_month": "JUNE"},
{"so_number": "ABC", "po_status": "OPEN", "item_id": 0, "part_number": "XTZ", "ticket_id": 11, "ticket_month": "JUNE"},
{"so_number": "ABC", "po_status": "OPEN", "item_id": 1, "part_number": "XTY", "ticket_id": 12, "ticket_month": "JUNE"},
{"so_number": "DEF", "po_status": "OPEN", "item_id": 3, "part_number": "XTU", "ticket_id": 13, "ticket_month": "JUNE"},
{"so_number": "DEF", "po_status": "OPEN", "item_id": 3, "part_number": "XTU", "ticket_id": 14, "ticket_month": "JUNE"},
{"so_number": "DEF", "po_status": "OPEN", "item_id": 3, "part_number": "XTU", "ticket_id": 15, "ticket_month": "JUNE"}]
I would like to group the data on so_number and item_id to return a list of dicts like below.
[
{
"so_number ": "ABC",
"po_status": "OPEN",
"line_items": [
{
"item_id": 0,
"part_number": "XTZ",
"tickets": [
{
"ticket_id": 10,
"ticket_month": "JUNE"
},
{
"ticket_id": 11,
"ticket_month": "JUNE"
}
]
},
{
"item_id": 1,
"part_number": "XTY",
"tickets": [
{
"ticket_id": 12,
"ticket_month": "JUNE"
}
]
}
]
},
{
"so_number ": "DEF",
"po_status": "OPEN",
"line_items": [
{
"item_id": 3,
"part_number": "XTU"
"tickets": [
{
"ticket_id": 13,
"ticket_month": "JUNE"
},
{
"ticket_id": 14,
"ticket_month": "JUNE"
},
{
"ticket_id": 15,
"ticket_month": "JUNE"
}
]
}
]
}
]
I wanted to know if there was an efficient way of doing this. I am open to using pandas as well.
I thought about accessing the 3 sql tables through a loop and creating this list of dicts but it will probably not be best practice or efficient.

Given the nested structure, you could use groupby in loops:
import pandas as pd
import json
data = [
{"so_number": "ABC", "po_status": "OPEN", "item_id": 0, "part_number": "XTZ", "ticket_id": 10, "ticket_month": "JUNE"},
{"so_number": "ABC", "po_status": "OPEN", "item_id": 0, "part_number": "XTZ", "ticket_id": 11, "ticket_month": "JUNE"},
{"so_number": "ABC", "po_status": "OPEN", "item_id": 1, "part_number": "XTY", "ticket_id": 12, "ticket_month": "JUNE"},
{"so_number": "DEF", "po_status": "OPEN", "item_id": 3, "part_number": "XTU", "ticket_id": 13, "ticket_month": "JUNE"},
{"so_number": "DEF", "po_status": "OPEN", "item_id": 3, "part_number": "XTU", "ticket_id": 14, "ticket_month": "JUNE"},
{"so_number": "DEF", "po_status": "OPEN", "item_id": 3, "part_number": "XTU", "ticket_id": 15, "ticket_month": "JUNE"}]
df = pd.DataFrame(data)
res = []
for (so, po), dfg1 in df.groupby(["so_number", "po_status"]):
d1 = {"so_number ": so,
"po_status": po,
"line_items": []
}
for (iid, pnb), dfg2 in dfg1.groupby(["item_id", "part_number"]):
d2 = {"item_id": iid,
"part_number": pnb,
"tickets": dfg2[["ticket_id", "ticket_month"]].to_dict(orient="records")
}
d1["line_items"].append(d2)
res.append(d1)
print(json.dumps(res, indent=2, default=int))
Output:
[
{
"so_number ": "ABC",
"po_status": "OPEN",
"line_items": [
{
"item_id": 0,
"part_number": "XTZ",
"tickets": [
{
"ticket_id": 10,
"ticket_month": "JUNE"
},
{
"ticket_id": 11,
"ticket_month": "JUNE"
}
]
},
{
"item_id": 1,
"part_number": "XTY",
"tickets": [
{
"ticket_id": 12,
"ticket_month": "JUNE"
}
]
}
]
},
{
"so_number ": "DEF",
"po_status": "OPEN",
"line_items": [
{
"item_id": 3,
"part_number": "XTU",
"tickets": [
{
"ticket_id": 13,
"ticket_month": "JUNE"
},
{
"ticket_id": 14,
"ticket_month": "JUNE"
},
{
"ticket_id": 15,
"ticket_month": "JUNE"
}
]
}
]
}
]
Edit following your comment: you will still have to define the grouping keys. But you can do it only once and keep all other keys at the last level:
res = []
lvl1 = ["so_number", "po_status"]
lvl2 = ["item_id", "part_number"]
for val1, dfg1 in df.groupby(lvl1):
d1 = dict(zip(lvl1, val1))
d1["line_items"]= []
for val2, dfg2 in dfg1.groupby(lvl2):
d2 = dict(zip(lvl2, val2))
d2["tickets"]= dfg2.drop(columns=lvl1+lvl2).to_dict(orient="records")
d1["line_items"].append(d2)
res.append(d1)

Related

Convert sql join data into list of dictionaries on certain same key

From a sql stored proc that performs a join on two tables I get the data below.
[
{"service_order_number": "ABC", "vendor_id": 0, "recipient_id": 0, "item_id": 0, "part_number": "string", "part_description": "string"},
{"service_order_number": "ABC", "vendor_id": 0, "recipient_id": 0, "item_id": 1, "part_number": "string", "part_description": "string"},
{"service_order_number": "DEF", "vendor_id": 0, "recipient_id": 0, "item_id": 2, "part_number": "string", "part_description": "string"},
{"service_order_number": "DEF", "vendor_id": 0, "recipient_id": 0, "item_id": 3, "part_number": "string", "part_description": "string"}
]
What would be the best way to convert this data into the below format? Is it possible on the python side? Or is there something other than a join I can perform to get data back in this format?
[{
"service_order_number": "ABC",
"vendor_id": 0,
"recipient_id": 0,
items: [
{
"item_id": 0,
"part_number": "string",
"part_description": "string",
},
{
"item_id": 1,
"part_number": "string",
"part_description": "string",
}
]
},
{"service_order_number": "DEF"
"vendor_id": 0,
"recipient_id": 0,
items: [
{
"item_id": 2,
"part_number": "string",
"part_description": "string",
},
{
"item_id": 3,
"part_number": "string",
"part_description": "string",
}
]
}]

Here a possibile solution using defaultdict()
from collections import defaultdict
grouped_data = defaultdict(list)
for item in data:
grouped_data[item['service_order_number']].append(item)
res = [{'service_order_number': k, 'vendor_id': v[0]['vendor_id'],
'recipient_id': v[0]['recipient_id'], 'items': [{'item_id': item['item_id'],
'part_number': item['part_number'], 'part_description': item['part_description']}
for item in v]} for k, v in grouped_data.items()]
print(res)

If you don't need the original data after, you can use dict.pop to create common keys to group over and populate a dictionary in a loop. Note that this code destroys the original data you'll only have res in the end.
res = {}
keys = ['service_order_number', 'vendor_id', 'recipient_id']
for d in my_data:
vals = tuple(d.pop(k) for k in keys)
res.setdefault(vals, {}).update(dict(zip(keys, vals)))
# "items" key-value pairs are further nested inside 'items' key
res[vals].setdefault('items', []).append(d)
res = list(res.values())
which outputs
[{'service_order_number': 'ABC',
'vendor_id': 0,
'recipient_id': 0,
'items': [{'item_id': 0, 'part_number': 'string', 'part_description': 'string'},
{'item_id': 1, 'part_number': 'string', 'part_description': 'string'}]},
{'service_order_number': 'DEF',
'vendor_id': 0,
'recipient_id': 0,
'items': [{'item_id': 2, 'part_number': 'string', 'part_description': 'string'},
{'item_id': 3, 'part_number': 'string', 'part_description': 'string'}]}]

Python groupby/convert a triple join table to nested dictionary

From a SQL stored procedure that performs a join on 3 tables I get the data below.
data = [
{"service_order_number": "ABC", "item_id": 0, "ticket_id": 10},
{"service_order_number": "ABC", "item_id": 0, "ticket_id": 11},
{"service_order_number": "ABC", "item_id": 1, "ticket_id": 12},
{"service_order_number": "DEF", "item_id": 3, "ticket_id": 13},
{"service_order_number": "DEF", "item_id": 3, "ticket_id": 14},
{"service_order_number": "DEF", "item_id": 3, "ticket_id": 15}]
I would like to group the data on service_order_number and item_id to return a list of dicts like below.
[
{
"service_order_number": "ABC",
"line_items": [
{
"item_id": 0,
"tickets": [
{
"ticket_id": 10
},
{
"ticket_id": 11
}
]
},
{
"item_id": 1,
"tickets": [
{
"ticket_id": 12
}
]
}
]
},
{
"service_order_number": "DEF",
"line_items": [
{
"item_id": 3,
"tickets": [
{
"ticket_id": 13
},
{
"ticket_id": 14
},
{
"ticket_id": 15
}
]
}
]
}
]
The hierarchy would be service_order_number > item_id > ticket_id
Is there an easy way to convert this data into my desired structure?

Here is a possible solution using defaultdict()
import json
from collections import defaultdict
my_dict = defaultdict(lambda: defaultdict(list))
for item in data:
(my_dict[item['service_order_number']][item['item_id']]
.append({'ticket_id': item['ticket_id']}))
res = [{'service_order_number': service_order_number,
'line_items': [{'item_id': item_id, 'tickets': tickets}
for item_id, tickets in item_group.items()]}
for service_order_number, item_group in my_dict.items()
]
print(json.dumps(res, indent=1, default=int))
[
{
"service_order_number": "ABC",
"line_items": [
{
"item_id": 0,
"tickets": [
{
"ticket_id": 10
},
{
"ticket_id": 11
}
]
},
{
"item_id": 1,
"tickets": [
{
"ticket_id": 12
}
]
}
]
},
{
"service_order_number": "DEF",
"line_items": [
{
"item_id": 3,
"tickets": [
{
"ticket_id": 13
},
{
"ticket_id": 14
},
{
"ticket_id": 15
}
]
}
]
}
]

Get Object insinde JSON Object

I am pretty new to JSON and need to get an Object inside a list of JSON Objects.
This is my data structure and code so far:
{
"nhits": 15,
"parameters": {
"dataset": "100073",
"timezone": "UTC",
"q": "timestamp:[2021-02-21T23:00:00Z TO 2021-03-08T22:59:59Z]",
"rows": 10,
"start": 0,
"sort": [
"timestamp"
],
"format": "json",
"facet": [
"timestamp"
]
},
"records": [
{
"datasetid": "100073",
"recordid": "a1252522b7820edd98eb464811953d0f6ba56458",
"fields": {
"week": 10,
"ncumul_conf": 9971,
"current_quarantined": 506,
"timestamp": "2021-03-08T09:30:00+00:00",
"source": "https://www.gesundheit.bs.ch",
"ncumul_released": 9627,
"ndiff_conf": 4,
"current_quarantined_total": 623,
"current_hosp_resident": 13,
"ncumul_deceased": 192,
"current_isolated": 152,
"current_hosp": 19,
"ndiff_released": 10,
"current_hosp_non_resident": 6,
"current_quarantined_riskareatravel": 117,
"time": "10:30",
"date": "2021-03-08",
"ndiff_deceased": 0,
"current_icu": 5,
"abbreviation_canton_and_fl": "BS"
},
"record_timestamp": "2021-03-08T21:01:15.004000+00:00"
},
{
"datasetid": "100073",
"recordid": "c1a9f3fd45008ef3c140e446303ab3c2906166e0",
"fields": {
"week": 9,
"ncumul_conf": 9967,
"current_quarantined": 468,
"timestamp": "2021-03-07T11:40:00+00:00",
"source": "https://www.gesundheit.bs.ch",
"ncumul_released": 9617,
"ndiff_conf": 13,
"current_quarantined_total": 646,
"current_hosp_resident": 14,
"ncumul_deceased": 192,
"current_isolated": 158,
"current_hosp": 20,
"ndiff_released": 16,
"current_hosp_non_resident": 6,
"current_quarantined_riskareatravel": 178,
"time": "12:40",
"date": "2021-03-07",
"ndiff_deceased": 0,
"current_icu": 5,
"abbreviation_canton_and_fl": "BS"
},
"record_timestamp": "2021-03-08T21:01:15.004000+00:00"
},
{
"datasetid": "100073",
"recordid": "3668aa9ae4f9cf73890ad8c7f13efef7246cc461",
"fields": {
"week": 9,
"ncumul_conf": 9954,
"current_quarantined": 417,
"timestamp": "2021-03-06T11:20:00+00:00",
"source": "https://www.gesundheit.bs.ch",
"ncumul_released": 9601,
"ndiff_conf": 22,
"current_quarantined_total": 602,
"current_hosp_resident": 13,
"ncumul_deceased": 192,
"current_isolated": 161,
"current_hosp": 19,
"ndiff_released": 23,
"current_hosp_non_resident": 6,
"current_quarantined_riskareatravel": 185,
"time": "12:20",
"date": "2021-03-06",
"ndiff_deceased": 0,
"current_icu": 5,
"abbreviation_canton_and_fl": "BS"
},
"record_timestamp": "2021-03-08T21:01:15.004000+00:00"
},
{
"datasetid": "100073",
"recordid": "96a2bfde464cb4664ae8b16723960a7141800e56",
"fields": {
"week": 9,
"ncumul_conf": 9932,
"current_quarantined": 345,
"timestamp": "2021-03-05T09:50:00+00:00",
"source": "https://www.gesundheit.bs.ch",
"ncumul_released": 9578,
"ndiff_conf": 25,
"current_quarantined_total": 550,
"current_hosp_resident": 12,
"ncumul_deceased": 192,
"current_isolated": 162,
"current_hosp": 20,
"ndiff_released": 14,
"current_hosp_non_resident": 8,
"current_quarantined_riskareatravel": 205,
"time": "10:50",
"date": "2021-03-05",
"ndiff_deceased": 0,
"current_icu": 6,
"abbreviation_canton_and_fl": "BS"
},
"record_timestamp": "2021-03-08T21:01:15.004000+00:00"
},
{
"datasetid": "100073",
"recordid": "37a9b2c6a896a7dff362b27b671c71b83f467ccd",
"fields": {
"week": 9,
"ncumul_conf": 9907,
"current_quarantined": 253,
"timestamp": "2021-03-04T09:40:00+00:00",
"source": "https://www.gesundheit.bs.ch",
"ncumul_released": 9564,
"ndiff_conf": 27,
"current_quarantined_total": 481,
"current_hosp_resident": 13,
"ncumul_deceased": 192,
"current_isolated": 151,
"current_hosp": 21,
"ndiff_released": 23,
"current_hosp_non_resident": 8,
"current_quarantined_riskareatravel": 228,
"time": "10:40",
"date": "2021-03-04",
"ndiff_deceased": 0,
"current_icu": 6,
"abbreviation_canton_and_fl": "BS"
},
"record_timestamp": "2021-03-08T21:01:15.004000+00:00"
},
{
"datasetid": "100073",
"recordid": "c7933687391ff92436f1a75503648ce9430e0baa",
"fields": {
"week": 9,
"ncumul_conf": 9880,
"current_quarantined": 241,
"timestamp": "2021-03-03T10:50:00+00:00",
"source": "https://www.gesundheit.bs.ch",
"ncumul_released": 9541,
"ndiff_conf": 13,
"current_quarantined_total": 467,
"current_hosp_resident": 15,
"ncumul_deceased": 192,
"current_isolated": 147,
"current_hosp": 23,
"ndiff_released": 15,
"current_hosp_non_resident": 8,
"current_quarantined_riskareatravel": 226,
"time": "11:50",
"date": "2021-03-03",
"ndiff_deceased": 0,
"current_icu": 7,
"abbreviation_canton_and_fl": "BS"
},
"record_timestamp": "2021-03-08T21:01:15.004000+00:00"
},
{
"datasetid": "100073",
"recordid": "dd830a16c7f18e6cc2d5f8b03f5a75437d1331d3",
"fields": {
"week": 9,
"ncumul_conf": 9867,
"current_quarantined": 197,
"timestamp": "2021-03-02T09:40:00+00:00",
"source": "https://www.gesundheit.bs.ch",
"ncumul_released": 9526,
"ndiff_conf": 28,
"current_quarantined_total": 419,
"current_hosp_resident": 15,
"ncumul_deceased": 192,
"current_isolated": 149,
"current_hosp": 22,
"ndiff_released": 27,
"current_hosp_non_resident": 7,
"current_quarantined_riskareatravel": 222,
"time": "10:40",
"date": "2021-03-02",
"ndiff_deceased": 0,
"current_icu": 7,
"abbreviation_canton_and_fl": "BS"
},
"record_timestamp": "2021-03-08T21:01:15.004000+00:00"
},
{
"datasetid": "100073",
"recordid": "4de6410562c2e0329a9395f8e7687ed098f788b6",
"fields": {
"week": 9,
"ncumul_conf": 9839,
"current_quarantined": 159,
"timestamp": "2021-03-01T09:40:00+00:00",
"source": "https://www.gesundheit.bs.ch",
"ncumul_released": 9499,
"ndiff_conf": -14,
"current_quarantined_total": 365,
"current_hosp_resident": 15,
"ncumul_deceased": 192,
"current_isolated": 148,
"current_hosp": 21,
"ndiff_released": -4,
"current_hosp_non_resident": 6,
"current_quarantined_riskareatravel": 206,
"time": "10:40",
"date": "2021-03-01",
"ndiff_deceased": 0,
"current_icu": 7,
"abbreviation_canton_and_fl": "BS"
},
"record_timestamp": "2021-03-08T21:01:15.004000+00:00"
},
{
"datasetid": "100073",
"recordid": "90006046ef1f6627c4c742520e37c99c04eb2db3",
"fields": {
"week": 8,
"ncumul_conf": 9853,
"current_quarantined": 167,
"timestamp": "2021-02-28T08:00:00+00:00",
"source": "https://www.gesundheit.bs.ch",
"ncumul_released": 9503,
"ndiff_conf": 13,
"current_quarantined_total": 358,
"current_hosp_resident": 10,
"ncumul_deceased": 192,
"current_isolated": 158,
"current_hosp": 16,
"ndiff_released": 14,
"current_hosp_non_resident": 6,
"current_quarantined_riskareatravel": 191,
"time": "09:00",
"date": "2021-02-28",
"ndiff_deceased": 0,
"current_icu": 7,
"abbreviation_canton_and_fl": "BS"
},
"record_timestamp": "2021-03-08T21:01:15.004000+00:00"
},
{
"datasetid": "100073",
"recordid": "41c0f47f811b68f3ca393546e202b1b698e741c1",
"fields": {
"week": 8,
"ncumul_conf": 9840,
"current_quarantined": 177,
"timestamp": "2021-02-27T09:30:00+00:00",
"source": "https://www.gesundheit.bs.ch",
"ncumul_released": 9489,
"ndiff_conf": 21,
"current_quarantined_total": 359,
"current_hosp_resident": 10,
"ncumul_deceased": 192,
"current_isolated": 159,
"current_hosp": 16,
"ndiff_released": 14,
"current_hosp_non_resident": 6,
"current_quarantined_riskareatravel": 182,
"time": "10:30",
"date": "2021-02-27",
"ndiff_deceased": 0,
"current_icu": 7,
"abbreviation_canton_and_fl": "BS"
},
"record_timestamp": "2021-03-08T21:01:15.004000+00:00"
}
],
"facet_groups": [
{
"facets": [
{
"count": 15,
"path": "2021",
"state": "displayed",
"name": "2021"
}
],
"name": "timestamp"
}
]
}
To get the data in the "records" list i use:
import csv
from urllib.request import urlopen
import json
url = 'https://data.bs.ch/api/records/1.0/search/?dataset=100073&q=timestamp%3A%5B2021-02-21T23%3A00%3A00Z+TO+2021-03-08T22%3A59%3A59Z%5D&sort=timestamp&facet=timestamp'
ddict = {}
def getDataFromBS():
json_url = urlopen(url)
data = json.loads(json_url.read())
records = data['records']
getDataFromBS()
My problem now is, that i need to get the data inside the "fields" object. But i don't know how to extract it. Can anyone help me? Even if it is just a hint.
Every help will be much appreciated.

You need to return something from your function. The records field is a list of dictionaries. You can iterate over them and pull out the fields object
def getDataFromBS():
json_url = urlopen(url)
data = json.loads(json_url.read())
records = data['records']
fields = [r.get('fields') for r in records]
return fields

You could directly do data['records'][0]['fields'] after data = getDataFromBS()

Looks like you have the answer already, but here is another alternative if you jsut want to return 1 field. You could add a loop and iterate over the others
import json
with open('test.json') as json_file:
data = json.load(json_file)
print(data['records'][1]['fields'])

Get names of keys in objectpath

How would I get the names of the keys, for example [800, 801] (the key names are unknown) with objectpath.
It is easy in jmespath: keys(#).
"groups": {
"800": {
"short_name": "22",
"oname": "11",
"group": 8,
"title": "SS",
"name": "33",
"onames": [""],
"alt_name": False,
"waytype": 1,
"multiple": 1,
"primary": 1
},
"801": {
"short_name": "ss",
"oname": "zz",
"group": 8,
"title": "ss",
"name": "bbb",
"onames": [""],
"alt_name": False,
"waytype": 1,
"multiple": 1,
"primary": 0
},

let your object is assigned to name variable
const name = { "groups": {
"800": {
"short_name": "22",
"oname": "11",
"group": 8,
"title": "SS",
"name": "33",
"onames": [""],
"alt_name": false,
"waytype": 1,
"multiple": 1,
"primary": 1
},
"801": {
"short_name": "ss",
"oname": "zz",
"group": 8,
"title": "ss",
"name": "bbb",
"onames": [""],
"alt_name": false,
"waytype": 1,
"multiple": 1,
"primary": 0
} } }
Use for loop to get the key name as
for(var num in name.groups) {
console.log(num);
}
and to get the values of key
for(var num in name.groups) {
console.log(name.groups[num]);
}

How to Convert a list of dicts into nested JSON in python without using pandas DataFrame

I have a list of dicts like this
[
{
"subject_id": 1,
"subject_name": "HR Sector 0",
"id": 1,
"name": "parent2",
"value": 10.6
},
{
"subject_id": 18,
"subject_name": "Test11",
"id": 1,
"name": "parent2",
"value": 12
},
{
"subject_id": 2,
"subject_name": "AG1",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 17
},
{
"subject_id": 3,
"subject_name": "Finance Group 2",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 1.5
},
{
"subject_id": 10,
"subject_name": "test",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 10
},
{
"subject_id": null,
"subject_name": null,
"id": 3,
"name": "Technology Team 2",
"value": null
},
{
"subject_id": 8,
"subject_name": "Group 4",
"id": 5,
"name": "Accounting Group 4",
"value": 10
},
{
"subject_id": null,
"subject_name": null,
"id": 9,
"name": "PG2",
"value": null
}
]
I want to convert it into nested JSON and ignore null values to get below result set
[
{
"id": 1,
"name": "parent2",
"subjects”: [
{”subject_id": 1,
"subject_name": "HR Sector 0",
"value": 10.6
},
{”subject_id": 18,
"subject_name": "Test11",
"value": 12
}
]
},
{
"id": 2,
"name": "Customer Delivery Dpt. 1",
"subjects”: [
{“subject_id": 2,
"subject_name": "AG1",
"value": 17
},
{“subject_id": 3,
"subject_name": "Finance Group 2",
"value": 1.5
},
{“subject_id": 10,
"subject_name": “test”,
"value": 10
}
]
},
{
"id": 3,
"name": "Technology Team 2",
"subjects”: []
},
{
"id": 5,
"name": "Accounting Group 4",
"subjects” : [
{ "subject_id": 8,
"subject_name": "Group 4",
"value": 10
}
]
},
{
"id": 9,
"name": "PG2",
"subjects”: []
}
]

import json
arr = [
{
"subject_id": 1,
"subject_name": "HR Sector 0",
"id": 1,
"name": "parent2",
"value": 10.6
},
{
"subject_id": 18,
"subject_name": "Test11",
"id": 1,
"name": "parent2",
"value": 12
},
{
"subject_id": 2,
"subject_name": "AG1",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 17
},
{
"subject_id": 3,
"subject_name": "Finance Group 2",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 1.5
},
{
"subject_id": 10,
"subject_name": "test",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 3,
"name": "Technology Team 2",
"value": None
},
{
"subject_id": 8,
"subject_name": "Group 4",
"id": 5,
"name": "Accounting Group 4",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 9,
"name": "PG2",
"value": None
}
]
def process_arr_to_json(arr):
newArr = []
addedIds = {}
for item in arr:
if(addedIds.get(item["id"]) is None):
formatted_item = {"subjects":[]}
newArr.append(formatted_item)
addedIds[item["id"]] = {"idx": 0, "pos": len(newArr)-1} #index in the dictionary for the subject item
else:
formatted_item = newArr[addedIds[item["id"]]["pos"]]
addedIds[item["id"]]["idx"] += 1
for k,v in item.items():
if(v is not None):
if(k == "id" or k == "name"):
formatted_item[k] = v
else:
if(len(formatted_item["subjects"]) <= addedIds[item["id"]]["idx"]):
formatted_item["subjects"].append({k:v})
else:
formatted_item["subjects"][addedIds[item["id"]]["idx"]][k] = v
print(newArr)
return json.dumps(newArr)
if __name__ == "__main__":
process_arr_to_json(arr)
my solution

Please see code below to form the merged results
import json
def process_items(items):
results = {}
for item in items:
results[item['id']] = {
'id': item['id'],
'name': item['name'],
}
to_append = {}
for k in ['subject_id', 'value', 'subject_name']:
if item.get(k):
to_append[k] = item[k]
results[item['id']].setdefault('subjects', [])
if to_append:
results[item['id']]['subjects'].append(to_append)
return results
items = [
{
"subject_id": 1,
"subject_name": "HR Sector 0",
"id": 1,
"name": "parent2",
"value": 10.6
},
{
"subject_id": 18,
"subject_name": "Test11",
"id": 1,
"name": "parent2",
"value": 12
},
{
"subject_id": 2,
"subject_name": "AG1",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 17
},
{
"subject_id": 3,
"subject_name": "Finance Group 2",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 1.5
},
{
"subject_id": 10,
"subject_name": "test",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 3,
"name": "Technology Team 2",
"value": None
},
{
"subject_id": 8,
"subject_name": "Group 4",
"id": 5,
"name": "Accounting Group 4",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 9,
"name": "PG2",
"value": None
}
]
result = process_items(items)
json.dumps(result.values()) # For python 3: json.dumps(list(results.values()))

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python groupby/convert join table to triple nested dictionary - python

Related

Convert sql join data into list of dictionaries on certain same key

Python groupby/convert a triple join table to nested dictionary

Get Object insinde JSON Object

Get names of keys in objectpath

How to Convert a list of dicts into nested JSON in python without using pandas DataFrame

Categories

Resources