Export Nested JSON to CSV using Python

Export Nested JSON to CSV using Python - python

I have the following JSON script which i got from Xero.
It is a nested JSON script and im trying to create a flat table and then export it to CSV.
I have written this python code but im struggling to flatten the nested JSON script.
Initially i get the the data from Xero and i use the json.dumps so as to serialise the datetime. The JSON export which is displayed here comes from Postman software. When i get the JSON script using python the date format is the following 'UpdatedDateUTC': datetime.datetime(2018, 10, 24, 12, 53, 55, 930000). So i use json.dumps so as to serialise it.
When i produce the first export:
df = pd.read_json(b_str)
df.to_csv(path+'invoices.csv')
The CSV file looks like this:
The next step is to flatten the Contact and CreditNotes columns and make them part of the main table. So instead of the Contact column will have 8 new columns: ContactID, ContactNumber, Name, Addresses, Phones, ContactGroups, ContactPersons, HasValidationErrors. Similar process for CreditNotes column
Im trying to replicate the methodology on this link but with no luck. I get an export which looks like this. The contacts_with_id dataframe is shown on multiple rows and not multiple columns. I cant figure out what i am doing wrong.
I have also used the flatten_json function but with no luck either.
I dont really need to make this methodology work. I just want to find a way to export the nested json script to a readable csv file.
Python Code:
from xero import Xero
from xero.auth import PrivateCredentials
with open("E:\\privatekey.pem") as keyfile:
rsa_key = keyfile.read()
credentials = PrivateCredentials('BHK1ZBEKIL4WM0BLKLIOT65PSIA43N', rsa_key)
xero = Xero(credentials)
import json
import pandas as pd
from pandas.io.json import json_normalize #package for flattening json in pandas df
# The following is a list
a_list = xero.invoices.all()
# The following is a string. Serialised Datetime
b_str = json.dumps(a_list, default=str)
path='E:\\MyDrive\\Python Workspaces\\'
df = pd.read_json(b_str)
df.to_csv(path+'invoices.csv')
# ********************* FLATTEN JSON *****************
dd = json.loads(b_str)
contacts_with_id = pd.io.json.json_normalize(dd, record_path='Contact', meta='InvoiceID',
record_prefix='Contact.')
df_final = pd.merge(contacts_with_id, df, how='inner', on='InvoiceID')
df_final.to_csv(path+'invoices_final.csv')
Json Script Below:
{
"Id": "568d1686-7c53-4f22-a93f-754589a246a7",
"Status": "OK",
"ProviderName": "Rest API",
"DateTimeUTC": "/Date(1552234854959)/",
"Invoices": [
{
"Type": "ACCPAY",
"InvoiceID": "8289ab9d-2134-4601-8622-e7fdae4b6d89",
"InvoiceNumber": "10522",
"Reference": "10522",
"Payments": [],
"CreditNotes": [],
"Prepayments": [],
"Overpayments": [],
"AmountDue": 102,
"AmountPaid": 0,
"AmountCredited": 0,
"CurrencyRate": 1,
"HasErrors": false,
"IsDiscounted": false,
"HasAttachments": false,
"Contact": {
"ContactID": "d1dba397-0f0b-4819-a6ce-2839b7be5008",
"ContactNumber": "c03bbcb5-fb0b-4f46-83f0-8687f754488b",
"Name": "Micro",
"Addresses": [],
"Phones": [],
"ContactGroups": [],
"ContactPersons": [],
"HasValidationErrors": false
},
"DateString": "2017-02-06T00:00:00",
"Date": "/Date(1486339200000+0000)/",
"DueDateString": "2017-03-08T00:00:00",
"DueDate": "/Date(1488931200000+0000)/",
"Status": "AUTHORISED",
"LineAmountTypes": "Exclusive",
"LineItems": [],
"SubTotal": 85,
"TotalTax": 17,
"Total": 102,
"UpdatedDateUTC": "/Date(1529940362110+0000)/",
"CurrencyCode": "GBP"
},
{
"Type": "ACCREC",
"InvoiceID": "9e37150f-88a5-4213-a085-b30c5e01c2bf",
"InvoiceNumber": "(13)",
"Reference": "",
"Payments": [],
"CreditNotes": [
{
"CreditNoteID": "3c5c7dec-534a-46e0-ad1b-f0f69822cfd5",
"CreditNoteNumber": "(12)",
"ID": "3c5c7dec-534a-46e0-ad1b-f0f69822cfd5",
"AppliedAmount": 1200,
"DateString": "2011-05-04T00:00:00",
"Date": "/Date(1304467200000+0000)/",
"LineItems": [],
"Total": 7800
},
{
"CreditNoteID": "af38e37f-4ba3-4208-a193-a32b418c2bbc",
"CreditNoteNumber": "(14)",
"ID": "af38e37f-4ba3-4208-a193-a32b418c2bbc",
"AppliedAmount": 2600,
"DateString": "2011-05-04T00:00:00",
"Date": "/Date(1304467200000+0000)/",
"LineItems": [],
"Total": 2600
}
],
"Prepayments": [],
"Overpayments": [],
"AmountDue": 0,
"AmountPaid": 0,
"AmountCredited": 3800,
"CurrencyRate": 1,
"HasErrors": false,
"IsDiscounted": false,
"HasAttachments": false,
"Contact": {
"ContactID": "58164bd6-5225-4f30-ad89-35140db5b624",
"ContactNumber": "d0b420b8-4a58-40d1-9717-8525edda7658",
"Name": "FSales (1)",
"Addresses": [],
"Phones": [],
"ContactGroups": [],
"ContactPersons": [],
"HasValidationErrors": false
},
"DateString": "2011-05-04T00:00:00",
"Date": "/Date(1304467200000+0000)/",
"DueDateString": "2011-06-03T00:00:00",
"DueDate": "/Date(1307059200000+0000)/",
"Status": "PAID",
"LineAmountTypes": "Exclusive",
"LineItems": [],
"SubTotal": 3166.67,
"TotalTax": 633.33,
"Total": 3800,
"UpdatedDateUTC": "/Date(1529943661150+0000)/",
"CurrencyCode": "GBP",
"FullyPaidOnDate": "/Date(1304467200000+0000)/"
},
{
"Type": "ACCPAY",
"InvoiceID": "1ddea7ec-a0d5-457a-a8fd-cfcdc2099d51",
"InvoiceNumber": "01596057543",
"Reference": "",
"Payments": [
{
"PaymentID": "fd639da3-c009-47df-a4bf-98ccd5c68e43",
"Date": "/Date(1551657600000+0000)/",
"Amount": 173.86,
"Reference": "",
"CurrencyRate": 1,
"HasAccount": false,
"HasValidationErrors": false
}
],
"CreditNotes": [],
"Prepayments": [],
"Overpayments": [],
"AmountDue": 0,
"AmountPaid": 173.86,
"AmountCredited": 0,
"CurrencyRate": 1,
"HasErrors": false,
"IsDiscounted": false,
"HasAttachments": true,
"Contact": {
"ContactID": "309afb74-0a3b-4d68-85e8-2259ca5acd13",
"ContactNumber": "91eef1f0-5fe6-45d7-b739-1ab5352a5523",
"Name": "Company AAA",
"Addresses": [],
"Phones": [],
"ContactGroups": [],
"ContactPersons": [],
"HasValidationErrors": false
},
"DateString": "2019-02-23T00:00:00",
"Date": "/Date(1550880000000+0000)/",
"DueDateString": "2019-03-21T00:00:00",
"DueDate": "/Date(1553126400000+0000)/",
"Status": "PAID",
"LineAmountTypes": "Exclusive",
"LineItems": [],
"SubTotal": 144.88,
"TotalTax": 28.98,
"Total": 173.86,
"UpdatedDateUTC": "/Date(1551777481907+0000)/",
"CurrencyCode": "GBP",
"FullyPaidOnDate": "/Date(1551657600000+0000)/"
},
{
"Type": "ACCPAY",
"InvoiceID": "ba5ff3b1-1058-4645-80da-5475c23da949",
"InvoiceNumber": "Q0603",
"Reference": "",
"Payments": [],
"CreditNotes": [],
"Prepayments": [],
"Overpayments": [],
"AmountDue": 213.24,
"AmountPaid": 0,
"AmountCredited": 0,
"CurrencyRate": 1,
"HasErrors": false,
"IsDiscounted": false,
"HasAttachments": true,
"Contact": {
"ContactID": "f0473b41-da92-4397-9d2c-741812f2475c",
"ContactNumber": "1f124969-de8d-40b8-8140-d4997511b0dc",
"Name": "BTelcom",
"Addresses": [],
"Phones": [],
"ContactGroups": [],
"ContactPersons": [],
"HasValidationErrors": false
},
"DateString": "2019-03-05T00:00:00",
"Date": "/Date(1551744000000+0000)/",
"DueDateString": "2019-03-21T00:00:00",
"DueDate": "/Date(1553126400000+0000)/",
"Status": "SUBMITTED",
"LineAmountTypes": "Exclusive",
"LineItems": [],
"SubTotal": 177.7,
"TotalTax": 35.54,
"Total": 213.24,
"UpdatedDateUTC": "/Date(1552068778417+0000)/",
"CurrencyCode": "GBP"
}
]
}

I've had to do something like this before:
Basically flattened out the entire nested json, then iterate through those columns (which uses a pattern to include which row it would be constructed into a table) to create the new rows.
There are 4 invoices, and this creates 4 rows (for each of the invoices). Hopefully this is what you are looking for.
NOTE Where you might run into some issues:
One of the things to consider if trying to flatten out a json file where there is nested lists, and the nested lists are of different lengths, anytime a single row has ONE value for any given column, it has to create that column even if all the other rows are null. In that Payments Key, there are lists with additional 7 elements. So if there are 8 payments for some IDs (as opposed to all the others only having 1 payment), it'll have to create 56 additional columns to store all those in separate columns / flat file.
jsonStr = '''{
"Id": "568d1686-7c53-4f22-a93f-754589a246a7",
"Status": "OK",
"ProviderName": "Rest API",
"DateTimeUTC": "/Date(1552234854959)/",
"Invoices": [
{
"Type": "ACCPAY",
"InvoiceID": "8289ab9d-2134-4601-8622-e7fdae4b6d89",
"InvoiceNumber": "10522",
"Reference": "10522",
"Payments": [],
"CreditNotes": [],
"Prepayments": [],
"Overpayments": [],
"AmountDue": 102,
"AmountPaid": 0,
"AmountCredited": 0,
"CurrencyRate": 1,
"HasErrors": false,
"IsDiscounted": false,
"HasAttachments": false,
"Contact": {
"ContactID": "d1dba397-0f0b-4819-a6ce-2839b7be5008",
"ContactNumber": "c03bbcb5-fb0b-4f46-83f0-8687f754488b",
"Name": "Micro",
"Addresses": [],
"Phones": [],
"ContactGroups": [],
"ContactPersons": [],
"HasValidationErrors": false
},
"DateString": "2017-02-06T00:00:00",
"Date": "/Date(1486339200000+0000)/",
"DueDateString": "2017-03-08T00:00:00",
"DueDate": "/Date(1488931200000+0000)/",
"Status": "AUTHORISED",
"LineAmountTypes": "Exclusive",
"LineItems": [],
"SubTotal": 85,
"TotalTax": 17,
"Total": 102,
"UpdatedDateUTC": "/Date(1529940362110+0000)/",
"CurrencyCode": "GBP"
},
{
"Type": "ACCREC",
"InvoiceID": "9e37150f-88a5-4213-a085-b30c5e01c2bf",
"InvoiceNumber": "(13)",
"Reference": "",
"Payments": [],
"CreditNotes": [
{
"CreditNoteID": "3c5c7dec-534a-46e0-ad1b-f0f69822cfd5",
"CreditNoteNumber": "(12)",
"ID": "3c5c7dec-534a-46e0-ad1b-f0f69822cfd5",
"AppliedAmount": 1200,
"DateString": "2011-05-04T00:00:00",
"Date": "/Date(1304467200000+0000)/",
"LineItems": [],
"Total": 7800
},
{
"CreditNoteID": "af38e37f-4ba3-4208-a193-a32b418c2bbc",
"CreditNoteNumber": "(14)",
"ID": "af38e37f-4ba3-4208-a193-a32b418c2bbc",
"AppliedAmount": 2600,
"DateString": "2011-05-04T00:00:00",
"Date": "/Date(1304467200000+0000)/",
"LineItems": [],
"Total": 2600
}
],
"Prepayments": [],
"Overpayments": [],
"AmountDue": 0,
"AmountPaid": 0,
"AmountCredited": 3800,
"CurrencyRate": 1,
"HasErrors": false,
"IsDiscounted": false,
"HasAttachments": false,
"Contact": {
"ContactID": "58164bd6-5225-4f30-ad89-35140db5b624",
"ContactNumber": "d0b420b8-4a58-40d1-9717-8525edda7658",
"Name": "FSales (1)",
"Addresses": [],
"Phones": [],
"ContactGroups": [],
"ContactPersons": [],
"HasValidationErrors": false
},
"DateString": "2011-05-04T00:00:00",
"Date": "/Date(1304467200000+0000)/",
"DueDateString": "2011-06-03T00:00:00",
"DueDate": "/Date(1307059200000+0000)/",
"Status": "PAID",
"LineAmountTypes": "Exclusive",
"LineItems": [],
"SubTotal": 3166.67,
"TotalTax": 633.33,
"Total": 3800,
"UpdatedDateUTC": "/Date(1529943661150+0000)/",
"CurrencyCode": "GBP",
"FullyPaidOnDate": "/Date(1304467200000+0000)/"
},
{
"Type": "ACCPAY",
"InvoiceID": "1ddea7ec-a0d5-457a-a8fd-cfcdc2099d51",
"InvoiceNumber": "01596057543",
"Reference": "",
"Payments": [
{
"PaymentID": "fd639da3-c009-47df-a4bf-98ccd5c68e43",
"Date": "/Date(1551657600000+0000)/",
"Amount": 173.86,
"Reference": "",
"CurrencyRate": 1,
"HasAccount": false,
"HasValidationErrors": false
}
],
"CreditNotes": [],
"Prepayments": [],
"Overpayments": [],
"AmountDue": 0,
"AmountPaid": 173.86,
"AmountCredited": 0,
"CurrencyRate": 1,
"HasErrors": false,
"IsDiscounted": false,
"HasAttachments": true,
"Contact": {
"ContactID": "309afb74-0a3b-4d68-85e8-2259ca5acd13",
"ContactNumber": "91eef1f0-5fe6-45d7-b739-1ab5352a5523",
"Name": "Company AAA",
"Addresses": [],
"Phones": [],
"ContactGroups": [],
"ContactPersons": [],
"HasValidationErrors": false
},
"DateString": "2019-02-23T00:00:00",
"Date": "/Date(1550880000000+0000)/",
"DueDateString": "2019-03-21T00:00:00",
"DueDate": "/Date(1553126400000+0000)/",
"Status": "PAID",
"LineAmountTypes": "Exclusive",
"LineItems": [],
"SubTotal": 144.88,
"TotalTax": 28.98,
"Total": 173.86,
"UpdatedDateUTC": "/Date(1551777481907+0000)/",
"CurrencyCode": "GBP",
"FullyPaidOnDate": "/Date(1551657600000+0000)/"
},
{
"Type": "ACCPAY",
"InvoiceID": "ba5ff3b1-1058-4645-80da-5475c23da949",
"InvoiceNumber": "Q0603",
"Reference": "",
"Payments": [],
"CreditNotes": [],
"Prepayments": [],
"Overpayments": [],
"AmountDue": 213.24,
"AmountPaid": 0,
"AmountCredited": 0,
"CurrencyRate": 1,
"HasErrors": false,
"IsDiscounted": false,
"HasAttachments": true,
"Contact": {
"ContactID": "f0473b41-da92-4397-9d2c-741812f2475c",
"ContactNumber": "1f124969-de8d-40b8-8140-d4997511b0dc",
"Name": "BTelcom",
"Addresses": [],
"Phones": [],
"ContactGroups": [],
"ContactPersons": [],
"HasValidationErrors": false
},
"DateString": "2019-03-05T00:00:00",
"Date": "/Date(1551744000000+0000)/",
"DueDateString": "2019-03-21T00:00:00",
"DueDate": "/Date(1553126400000+0000)/",
"Status": "SUBMITTED",
"LineAmountTypes": "Exclusive",
"LineItems": [],
"SubTotal": 177.7,
"TotalTax": 35.54,
"Total": 213.24,
"UpdatedDateUTC": "/Date(1552068778417+0000)/",
"CurrencyCode": "GBP"
}
]
}'''
import json
import pandas as pd
import re
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
jsonObj = json.loads(jsonStr)
flat = flatten_json(jsonObj)
results = pd.DataFrame()
special_cols = []
columns_list = list(flat.keys())
for item in columns_list:
try:
row_idx = re.findall(r'\_(\d+)\_', item )[0]
except:
special_cols.append(item)
continue
column = re.findall(r'\_\d+\_(.*)', item )[0]
column = column.replace('_', '')
row_idx = int(row_idx)
value = flat[item]
results.loc[row_idx, column] = value
for item in special_cols:
results[item] = flat[item]
Output:
print (results.to_string())
Type InvoiceID InvoiceNumber Reference AmountDue AmountPaid AmountCredited CurrencyRate HasErrors IsDiscounted HasAttachments ContactContactID ContactContactNumber ContactName ContactHasValidationErrors DateString Date DueDateString DueDate Status LineAmountTypes SubTotal TotalTax Total UpdatedDateUTC CurrencyCode CreditNotes0CreditNoteID CreditNotes0CreditNoteNumber CreditNotes0ID CreditNotes0AppliedAmount CreditNotes0DateString CreditNotes0Date CreditNotes0Total CreditNotes1CreditNoteID CreditNotes1CreditNoteNumber CreditNotes1ID CreditNotes1AppliedAmount CreditNotes1DateString CreditNotes1Date CreditNotes1Total FullyPaidOnDate Payments0PaymentID Payments0Date Payments0Amount Payments0Reference Payments0CurrencyRate Payments0HasAccount Payments0HasValidationErrors Id ProviderName DateTimeUTC
0 ACCPAY 8289ab9d-2134-4601-8622-e7fdae4b6d89 10522 10522 102.00 0.00 0.0 1.0 False False False d1dba397-0f0b-4819-a6ce-2839b7be5008 c03bbcb5-fb0b-4f46-83f0-8687f754488b Micro False 2017-02-06T00:00:00 /Date(1486339200000+0000)/ 2017-03-08T00:00:00 /Date(1488931200000+0000)/ OK Exclusive 85.00 17.00 102.00 /Date(1529940362110+0000)/ GBP NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 568d1686-7c53-4f22-a93f-754589a246a7 Rest API /Date(1552234854959)/
1 ACCREC 9e37150f-88a5-4213-a085-b30c5e01c2bf (13) 0.00 0.00 3800.0 1.0 False False False 58164bd6-5225-4f30-ad89-35140db5b624 d0b420b8-4a58-40d1-9717-8525edda7658 FSales (1) False 2011-05-04T00:00:00 /Date(1304467200000+0000)/ 2011-06-03T00:00:00 /Date(1307059200000+0000)/ OK Exclusive 3166.67 633.33 3800.00 /Date(1529943661150+0000)/ GBP 3c5c7dec-534a-46e0-ad1b-f0f69822cfd5 (12) 3c5c7dec-534a-46e0-ad1b-f0f69822cfd5 1200.0 2011-05-04T00:00:00 /Date(1304467200000+0000)/ 7800.0 af38e37f-4ba3-4208-a193-a32b418c2bbc (14) af38e37f-4ba3-4208-a193-a32b418c2bbc 2600.0 2011-05-04T00:00:00 /Date(1304467200000+0000)/ 2600.0 /Date(1304467200000+0000)/ NaN NaN NaN NaN NaN NaN NaN 568d1686-7c53-4f22-a93f-754589a246a7 Rest API /Date(1552234854959)/
2 ACCPAY 1ddea7ec-a0d5-457a-a8fd-cfcdc2099d51 01596057543 0.00 173.86 0.0 1.0 False False True 309afb74-0a3b-4d68-85e8-2259ca5acd13 91eef1f0-5fe6-45d7-b739-1ab5352a5523 Company AAA False 2019-02-23T00:00:00 /Date(1550880000000+0000)/ 2019-03-21T00:00:00 /Date(1553126400000+0000)/ OK Exclusive 144.88 28.98 173.86 /Date(1551777481907+0000)/ GBP NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN /Date(1551657600000+0000)/ fd639da3-c009-47df-a4bf-98ccd5c68e43 /Date(1551657600000+0000)/ 173.86 1.0 False False 568d1686-7c53-4f22-a93f-754589a246a7 Rest API /Date(1552234854959)/
3 ACCPAY ba5ff3b1-1058-4645-80da-5475c23da949 Q0603 213.24 0.00 0.0 1.0 False False True f0473b41-da92-4397-9d2c-741812f2475c 1f124969-de8d-40b8-8140-d4997511b0dc BTelcom False 2019-03-05T00:00:00 /Date(1551744000000+0000)/ 2019-03-21T00:00:00 /Date(1553126400000+0000)/ OK Exclusive 177.70 35.54 213.24 /Date(1552068778417+0000)/ GBP NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 568d1686-7c53-4f22-a93f-754589a246a7 Rest API /Date(1552234854959)/

Related

Get fields from a JSON file with Python

I have this json file loaded in Python with json.loads('myfile.json'):
[
{
"cart": {
"items": {
"3154ba405e5c5a22bbdf9bf1": {
"item": {
"_id": "3154ba405e5c5a22bbdf9bf1",
"title": "Drink alla cannella",
"price": 5.65,
"__v": 0
},
"qty": 1,
"price": 5.65
}
},
"totalQty": 1,
"totalPrice": 5.65
}
},
{
"cart": {
"items": {
"6214ba405e4c5a31bbdf9ad7": {
"item": {
"_id": "6214ba405e4c5a31bbdf9ad7",
"title": "Drink alla menta",
"price": 5.65,
"__v": 0
},
"qty": 2,
"price": 11.3
}
},
"totalQty": 2,
"totalPrice": 11.3
}
}
]
How I can access to both totalQty and totalPrice fields at same time and sum them?
How I can access to both Title fields to print it?

Let's assume that you have the JSON data available as a string then:
jdata = '''
[
{
"cart": {
"items": {
"3154ba405e5c5a22bbdf9bf1": {
"item": {
"_id": "3154ba405e5c5a22bbdf9bf1",
"title": "Drink alla cannella",
"price": 5.65,
"__v": 0
},
"qty": 1,
"price": 5.65
}
},
"totalQty": 1,
"totalPrice": 5.65
}
},
{
"cart": {
"items": {
"6214ba405e4c5a31bbdf9ad7": {
"item": {
"_id": "6214ba405e4c5a31bbdf9ad7",
"title": "Drink alla menta",
"price": 5.65,
"__v": 0
},
"qty": 2,
"price": 11.3
}
},
"totalQty": 2,
"totalPrice": 11.3
}
}
]
'''
totalQty = 0
totalPrice = 0
for d in json.loads(jdata):
c = d['cart']
totalQty += c['totalQty']
totalPrice += c['totalPrice']
for sd in c['items'].values():
print(sd['item']['title'])
print(f'{totalQty:d}', f'{totalPrice:.2f}')
Output:
3 16.95
Note:
I suspect that what you really want to do is multiply those two values

How to aggregate data with date range?

Hello I have the following problem, whenever I aggregate data, the aggregations and to be more exact the date_histogram is always different. It starts with pretty much random date.
I am using elasticpy and my query looks like this before executing. Note that I am using python datetime objects to get a "real" results. I had some problems with other formats.
{
"query": {
"bool": {
"filter": [
{
"range": {
"original_date": {
"gte": datetime.datetime(2020, 2, 13, 0, 0),
"lte": datetime.datetime(2020, 2, 15, 23, 0),
}
}
}
],
"must": [
{
"query_string": {
"query": "whatever string"
}
}
],
}
},
"aggs": {
"docs_histogram": {
"date_histogram": {
"field": "original_date",
"interval": "hour",
"time_zone": "EET",
},
... (other aggs)
},
},
}
The date histogram should be in this range: 2020-02-13 00:00:00 - 2020-02-15 23:00:00 But look at the output's start and end. It starts 1 day later and ends same day 18:00 ??
"buckets": [
{
"key_as_string": "2020-02-14T00:00:00.000+02:00",
"key": 1581631200000,
"doc_count": 1,
"source_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{"key": "WhateverKey", "doc_count": 1}],
},
},
...
{
"key_as_string": "2020-02-14T18:00:00.000+02:00",
"key": 1581696000000,
"doc_count": 1,
"source_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{"key": "WhateverKey2", "doc_count": 1}],
},
},
]

Merge 2 lists and remove duplicates in Python

I have 2 lists, looking like:
temp_data:
{
"id": 1,
"name": "test (replaced)",
"code": "test",
"last_update": "2020-01-01",
"online": false,
"data": {
"temperature": [
{
"date": "2019-12-17",
"value": 23.652905748126333
},
...
]}
hum_data:
{
"id": 1,
"name": "test (replaced)",
"code": "test",
"last_update": "2020-01-01",
"online": false,
"data": {
"humidity": [
{
"date": "2019-12-17",
"value": 23.652905748126333
},
...
]}
I need to merge the 2 lists to 1 without duplicating data. What is the easiest/efficient way? After merging, I want something like this:
{
"id": 1,
"name": "test",
"code": "test",
"last_update": "2020-01-01",
"online": false,
"data": {
"temperature": [
{
"date": "2019-12-17",
"value": 23.652905748126333
},
...
],
"humidity": [
{
"date": "2019-12-17",
"value": 23.652905748126333
},
...
Thanks for helping.

If your lists hum_data and temp_data are not sorted then first sort them and then concatenate the dictionaries pair-wise.
# To make comparisons for sorting
compare_function = lambda value : value['id']
# sort arrays before to make later concatenation easier
temp_data.sort(key=compare_function)
hum_data.sort(key=compare_function)
combined_data = temp_data.copy()
# concatenate the dictionries using the update function
for hum_row, combined_row in zip(hum_data, combined_data):
combined_row['data'].update(hum_row['data'])
# combined hum_data and temp_data
combined_data
If the lists are already sorted then you just need to concatenate dictionary by dictionary.
combined_data = temp_data.copy()
# concatenate the dictionries using the update function
for hum_row, combined_row in zip(hum_data, combined_data):
combined_row['data'].update(hum_row['data'])
# combined hum_data and temp_data
combined_data
With that code I got the following result:
[
{
'id': 1,
'name': 'test (replaced)',
'code': 'test',
'last_update': '2020-01-01',
'online': False,
'data': {
'temperature': [{'date': '2019-12-17', 'value': 1}],
'humidity': [{'date': '2019-12-17', 'value': 1}]}
},
{
'id': 2,
'name': 'test (replaced)',
'code': 'test',
'last_update': '2020-01-01',
'online': False,
'data': {
'temperature': [{'date': '2019-12-17', 'value': 2}],
'humidity': [{'date': '2019-12-17', 'value': 2}]}
}
]

Get names of keys in objectpath

How would I get the names of the keys, for example [800, 801] (the key names are unknown) with objectpath.
It is easy in jmespath: keys(#).
"groups": {
"800": {
"short_name": "22",
"oname": "11",
"group": 8,
"title": "SS",
"name": "33",
"onames": [""],
"alt_name": False,
"waytype": 1,
"multiple": 1,
"primary": 1
},
"801": {
"short_name": "ss",
"oname": "zz",
"group": 8,
"title": "ss",
"name": "bbb",
"onames": [""],
"alt_name": False,
"waytype": 1,
"multiple": 1,
"primary": 0
},

let your object is assigned to name variable
const name = { "groups": {
"800": {
"short_name": "22",
"oname": "11",
"group": 8,
"title": "SS",
"name": "33",
"onames": [""],
"alt_name": false,
"waytype": 1,
"multiple": 1,
"primary": 1
},
"801": {
"short_name": "ss",
"oname": "zz",
"group": 8,
"title": "ss",
"name": "bbb",
"onames": [""],
"alt_name": false,
"waytype": 1,
"multiple": 1,
"primary": 0
} } }
Use for loop to get the key name as
for(var num in name.groups) {
console.log(num);
}
and to get the values of key
for(var num in name.groups) {
console.log(name.groups[num]);
}

How to Convert a list of dicts into nested JSON in python without using pandas DataFrame

I have a list of dicts like this
[
{
"subject_id": 1,
"subject_name": "HR Sector 0",
"id": 1,
"name": "parent2",
"value": 10.6
},
{
"subject_id": 18,
"subject_name": "Test11",
"id": 1,
"name": "parent2",
"value": 12
},
{
"subject_id": 2,
"subject_name": "AG1",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 17
},
{
"subject_id": 3,
"subject_name": "Finance Group 2",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 1.5
},
{
"subject_id": 10,
"subject_name": "test",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 10
},
{
"subject_id": null,
"subject_name": null,
"id": 3,
"name": "Technology Team 2",
"value": null
},
{
"subject_id": 8,
"subject_name": "Group 4",
"id": 5,
"name": "Accounting Group 4",
"value": 10
},
{
"subject_id": null,
"subject_name": null,
"id": 9,
"name": "PG2",
"value": null
}
]
I want to convert it into nested JSON and ignore null values to get below result set
[
{
"id": 1,
"name": "parent2",
"subjects”: [
{”subject_id": 1,
"subject_name": "HR Sector 0",
"value": 10.6
},
{”subject_id": 18,
"subject_name": "Test11",
"value": 12
}
]
},
{
"id": 2,
"name": "Customer Delivery Dpt. 1",
"subjects”: [
{“subject_id": 2,
"subject_name": "AG1",
"value": 17
},
{“subject_id": 3,
"subject_name": "Finance Group 2",
"value": 1.5
},
{“subject_id": 10,
"subject_name": “test”,
"value": 10
}
]
},
{
"id": 3,
"name": "Technology Team 2",
"subjects”: []
},
{
"id": 5,
"name": "Accounting Group 4",
"subjects” : [
{ "subject_id": 8,
"subject_name": "Group 4",
"value": 10
}
]
},
{
"id": 9,
"name": "PG2",
"subjects”: []
}
]

import json
arr = [
{
"subject_id": 1,
"subject_name": "HR Sector 0",
"id": 1,
"name": "parent2",
"value": 10.6
},
{
"subject_id": 18,
"subject_name": "Test11",
"id": 1,
"name": "parent2",
"value": 12
},
{
"subject_id": 2,
"subject_name": "AG1",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 17
},
{
"subject_id": 3,
"subject_name": "Finance Group 2",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 1.5
},
{
"subject_id": 10,
"subject_name": "test",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 3,
"name": "Technology Team 2",
"value": None
},
{
"subject_id": 8,
"subject_name": "Group 4",
"id": 5,
"name": "Accounting Group 4",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 9,
"name": "PG2",
"value": None
}
]
def process_arr_to_json(arr):
newArr = []
addedIds = {}
for item in arr:
if(addedIds.get(item["id"]) is None):
formatted_item = {"subjects":[]}
newArr.append(formatted_item)
addedIds[item["id"]] = {"idx": 0, "pos": len(newArr)-1} #index in the dictionary for the subject item
else:
formatted_item = newArr[addedIds[item["id"]]["pos"]]
addedIds[item["id"]]["idx"] += 1
for k,v in item.items():
if(v is not None):
if(k == "id" or k == "name"):
formatted_item[k] = v
else:
if(len(formatted_item["subjects"]) <= addedIds[item["id"]]["idx"]):
formatted_item["subjects"].append({k:v})
else:
formatted_item["subjects"][addedIds[item["id"]]["idx"]][k] = v
print(newArr)
return json.dumps(newArr)
if __name__ == "__main__":
process_arr_to_json(arr)
my solution

Please see code below to form the merged results
import json
def process_items(items):
results = {}
for item in items:
results[item['id']] = {
'id': item['id'],
'name': item['name'],
}
to_append = {}
for k in ['subject_id', 'value', 'subject_name']:
if item.get(k):
to_append[k] = item[k]
results[item['id']].setdefault('subjects', [])
if to_append:
results[item['id']]['subjects'].append(to_append)
return results
items = [
{
"subject_id": 1,
"subject_name": "HR Sector 0",
"id": 1,
"name": "parent2",
"value": 10.6
},
{
"subject_id": 18,
"subject_name": "Test11",
"id": 1,
"name": "parent2",
"value": 12
},
{
"subject_id": 2,
"subject_name": "AG1",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 17
},
{
"subject_id": 3,
"subject_name": "Finance Group 2",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 1.5
},
{
"subject_id": 10,
"subject_name": "test",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 3,
"name": "Technology Team 2",
"value": None
},
{
"subject_id": 8,
"subject_name": "Group 4",
"id": 5,
"name": "Accounting Group 4",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 9,
"name": "PG2",
"value": None
}
]
result = process_items(items)
json.dumps(result.values()) # For python 3: json.dumps(list(results.values()))

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Export Nested JSON to CSV using Python - python

Related

Get fields from a JSON file with Python

How to aggregate data with date range?

Merge 2 lists and remove duplicates in Python

Get names of keys in objectpath

How to Convert a list of dicts into nested JSON in python without using pandas DataFrame

Categories

Resources