Unnest Facebook Ads JSON with Python - python

I have some JSON data like:
[
{
"adset_id": "23851149362570451",
"reach": "862",
"clicks": "1",
"actions": [
{
"action_type": "post_reaction",
"value": "1"
},
{
"action_type": "post_engagement",
"value": "1"
},
{
"action_type": "page_engagement",
"value": "1"
}
],
"date_start": "2022-10-06",
},
]
In the actual data, this array would contain around 30 or 40 objects; I have shown just one for reference.
How can I flatten everything inside the "actions" array, so that it looks like this instead?
[
{
"adset_id": "23851149362570451",
"reach": "862",
"clicks": "1",
"post_reaction" : "1",
"post_engagement" : "1",
"page_engagement" : "1",
"date_start": "2022-10-06",
},
]

I quickly wrote this. Modify it as per how you ingest the data:
import json
final_result = []
with open('data.json') as f:
api_data_dict = json.load(f)
for api_data_obj in api_data_dict:
actions = []
values = []
updated_actions = {}
#assuming there are objects with action_type and value present inside this dict
for action in api_data_obj['actions']:
actions.append(action['action_type'])
values.append(action['value'])
for action, value in zip(actions, values):
updated_actions[action] = value
api_data_obj.pop('actions', None)
final_result.append({**api_data_obj, **updated_actions})
print(final_result)
data.json contents:
[{
"adset_id": "23851149362570451",
"reach": "862",
"clicks": "1",
"actions": [
{
"action_type": "post_reaction",
"value": "1"
},
{
"action_type": "post_engagement",
"value": "1"
},
{
"action_type": "page_engagement",
"value": "1"
}
],
"date_start": "2022-10-06"
},
{
"adset_id": "234543535643543",
"reach": "862",
"clicks": "1",
"actions": [
{
"action_type": "post_reaction",
"value": "2"
},
{
"action_type": "post_engagement",
"value": "2"
},
{
"action_type": "page_engagement",
"value": "2"
}
],
"date_start": "2022-10-06"
}]
Result:
[
{
"adset_id":"23851149362570451",
"reach":"862",
"clicks":"1",
"date_start":"2022-10-06",
"post_reaction":"1",
"post_engagement":"1",
"page_engagement":"1"
},
{
"adset_id":"234543535643543",
"reach":"862",
"clicks":"1",
"date_start":"2022-10-06",
"post_reaction":"2",
"post_engagement":"2",
"page_engagement":"2"
}
]

Related

Best way to parse a JSON to store in SQL database (SQL stored procedure/Python)

I have a table of overly complex JSON files I'm trying to convert to tabular format to store in a SQL database. I'm pulling the JSONs from the quickbooks online API, and the format is messy to say the least.. (We're talking 7x nested JSONs for some bits of it..
The format resembles the code snippet down below. Currently I am using a bunch of OpenJSON's + Cross applys to dig down to the innermost ColData then work my way up but it looks like some of the ColData's get skipped over doing that.
Are there any better ways, using either Python (since I pull the JSON initially in Python before sending the JSON to a SQL database to parse) or SQL to convert it to tabular format besides manually trying to use OpenJSON with Cross applys?
The goal is to get all of the ColData's into a SQL table...
Thanks!
{
"Header": {
"ReportName": "BalanceSheet",
"Option": [
{
"Name": "AccountingStandard",
"Value": "GAAP"
},
{
"Name": "NoReportData",
"Value": "false"
}
],
"DateMacro": "this calendar year-to-date",
"ReportBasis": "Accrual",
"StartPeriod": "2016-01-01",
"Currency": "USD",
"EndPeriod": "2016-10-31",
"Time": "2016-10-31T09:42:21-07:00",
"SummarizeColumnsBy": "Total"
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "ASSETS"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Current Assets"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Bank Accounts"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "35",
"value": "Checking"
},
{
"value": "1350.55"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "36",
"value": "Savings"
},
{
"value": "800.00"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "BankAccounts",
"Summary": {
"ColData": [
{
"value": "Total Bank Accounts"
},
{
"value": "2150.55"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Accounts Receivable"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "84",
"value": "Accounts Receivable (A/R)"
},
{
"value": "6383.12"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "AR",
"Summary": {
"ColData": [
{
"value": "Total Accounts Receivable"
},
{
"value": "6383.12"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Other current assets"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "81",
"value": "Inventory Asset"
},
{
"value": "596.25"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "4",
"value": "Undeposited Funds"
},
{
"value": "2117.52"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "OtherCurrentAssets",
"Summary": {
"ColData": [
{
"value": "Total Other current assets"
},
{
"value": "2713.77"
}
]
}
}
]
},
"type": "Section",
"group": "CurrentAssets",
"Summary": {
"ColData": [
{
"value": "Total Current Assets"
},
{
"value": "11247.44"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Fixed Assets"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"id": "37",
"value": "Truck"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "38",
"value": "Original Cost"
},
{
"value": "13495.00"
}
],
"type": "Data"
}
]
},
"type": "Section",
"Summary": {
"ColData": [
{
"value": "Total Truck"
},
{
"value": "13495.00"
}
]
}
}
]
},
"type": "Section",
"group": "FixedAssets",
"Summary": {
"ColData": [
{
"value": "Total Fixed Assets"
},
{
"value": "13495.00"
}
]
}
}
]
},
"type": "Section",
"group": "TotalAssets",
"Summary": {
"ColData": [
{
"value": "TOTAL ASSETS"
},
{
"value": "24742.44"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "LIABILITIES AND EQUITY"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Liabilities"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Current Liabilities"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Accounts Payable"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "33",
"value": "Accounts Payable (A/P)"
},
{
"value": "1984.17"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "AP",
"Summary": {
"ColData": [
{
"value": "Total Accounts Payable"
},
{
"value": "1984.17"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Credit Cards"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "41",
"value": "Mastercard"
},
{
"value": "157.72"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "CreditCards",
"Summary": {
"ColData": [
{
"value": "Total Credit Cards"
},
{
"value": "157.72"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Other Current Liabilities"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "89",
"value": "Arizona Dept. of Revenue Payable"
},
{
"value": "4.55"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "90",
"value": "Board of Equalization Payable"
},
{
"value": "401.98"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "43",
"value": "Loan Payable"
},
{
"value": "4000.00"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "OtherCurrentLiabilities",
"Summary": {
"ColData": [
{
"value": "Total Other Current Liabilities"
},
{
"value": "4406.53"
}
]
}
}
]
},
"type": "Section",
"group": "CurrentLiabilities",
"Summary": {
"ColData": [
{
"value": "Total Current Liabilities"
},
{
"value": "6548.42"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Long-Term Liabilities"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "44",
"value": "Notes Payable"
},
{
"value": "25000.00"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "LongTermLiabilities",
"Summary": {
"ColData": [
{
"value": "Total Long-Term Liabilities"
},
{
"value": "25000.00"
}
]
}
}
]
},
"type": "Section",
"group": "Liabilities",
"Summary": {
"ColData": [
{
"value": "Total Liabilities"
},
{
"value": "31548.42"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Equity"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "34",
"value": "Opening Balance Equity"
},
{
"value": "-9337.50"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "2",
"value": "Retained Earnings"
},
{
"value": "91.25"
}
],
"type": "Data"
},
{
"ColData": [
{
"value": "Net Income"
},
{
"value": "2440.27"
}
],
"type": "Data",
"group": "NetIncome"
}
]
},
"type": "Section",
"group": "Equity",
"Summary": {
"ColData": [
{
"value": "Total Equity"
},
{
"value": "-6805.98"
}
]
}
}
]
},
"type": "Section",
"group": "TotalLiabilitiesAndEquity",
"Summary": {
"ColData": [
{
"value": "TOTAL LIABILITIES AND EQUITY"
},
{
"value": "24742.44"
}
]
}
}
]
},
"Columns": {
"Column": [
{
"ColType": "Account",
"ColTitle": "",
"MetaData": [
{
"Name": "ColKey",
"Value": "account"
}
]
},
{
"ColType": "Money",
"ColTitle": "Total",
"MetaData": [
{
"Name": "ColKey",
"Value": "total"
}
]
}
]
}
}
Here is what I tried to get the ColData (unsuccessfully I might add), I think it might be a little too contrived to do in SQL but I'm not sure if I should continue trying this way or if there's a better way in Python:
declare #json nvarchar(max)
SELECT #json = json FROM QboApiRawJSONData WHERE ID = 2
--Outer layer of JSON breaks into 3 parts - header, columns, rows
SELECT * FROM OPENJSON(#json)
WITH
(
Rows nvarchar(max) AS JSON
) as MainLayer
CROSS APPLY OPENJSON (MainLayer.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as SecondaryLayer
CROSS APPLY OPENJSON (SecondaryLayer.Row)
WITH
(
Rows nvarchar(max) AS JSON
) As ThirdLayer
CROSS APPLY OPENJSON (ThirdLayer.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as FourthLayer
CROSS APPLY OPENJSON (FourthLayer.Row)
WITH
(
Rows nvarchar(max) AS JSON
) as FifthLayer
CROSS APPLY OPENJSON (FifthLayer.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as SixthLayer
CROSS APPLY OPENJSON (SixthLayer.Row)
WITH
(
Rows nvarchar(max) AS JSON
) as SeventhLayer
CROSS APPLY OPENJSON (SeventhLayer.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as EighthLayer
CROSS APPLY OPENJSON (EighthLayer.Row)
WITH
(
Rows nvarchar(max) AS JSON
) as LayerNine
---Things get funky here
CROSS APPLY OPENJSON (LayerNine.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as LayerTen
CROSS APPLY OPENJSON (LayerTen.Row)
WITH
(
Rows nvarchar(max) AS JSON
) as LayerEleven
CROSS APPLY OPENJSON (LayerEleven.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as LayerTwelve
--21 items in last col
There is JSON support for sql-server:
https://learn.microsoft.com/en-us/sql/relational-databases/json/json-data-sql-server?view=sql-server-ver15
There is a JSON storage method here: https://learn.microsoft.com/en-us/sql/relational-databases/json/store-json-documents-in-sql-tables?view=sql-server-ver15. Although it will be fairly complex here I recommend storing the JSON data as a Logs table and then following the tutorial above to see if that solves your issue.
Use Quickbooks API to get the JSON, then refer to this guide:
https://learn.microsoft.com/en-us/sql/relational-databases/json/json-data-sql-server?view=sql-server-ver15
and this guide:
https://learn.microsoft.com/en-us/sql/relational-databases/json/convert-json-data-to-rows-and-columns-with-openjson-sql-server?view=sql-server-ver15
You can also consider setting something like AWS Lambda or Google Cloud Functions if you need something more automated.

Transforming nested JSON with pyjq

I'm trying to transform the JSON from this:
{
"meta": {
"ver": "3.0"
},
"cols": [
{
"name": "val"
}
],
"rows": [
"cols": [
{
"name": "ts"
},
{
"name": "v0"
},
{
"name": "v1"
},
{
"name": "v2"
},
{
"name": "v3"
},
{
"ts": {
"_kind": "dateTime",
"val": "2021-07-07T00:10:00-07:00",
"tz": "Los_Angeles"
},
"v3": {
"_kind": "number",
"val": 6167699.5,
"unit": "kWh"
}
},
{
"ts": {
"_kind": "dateTime",
"val": "2021-07-07T00:15:00-07:00",
"tz": "Los_Angeles"
},
"v0": {
"_kind": "number",
"val": 808926.0625,
"unit": "m\\u00b3"
},
"v1": {
"_kind": "number",
"val": 112999.3046875,
"unit": "m\\u00b3"
},
"v2": {
"_kind": "number",
"val": 8823498,
"unit": "kWh"
}
}
]
}
to a more simplified form using the pyjq module:
{
"data": {
"v0": [
[
"first timestamp",
val
],
[
"second timestamp",
val
]
],
"v1": [
[
"first timestamp",
val
],
[
"second timestamp",
val
]
]
}
}
I got started with the pyjq module, however I'm unsure about how to proceed with place two values (one str, one float) within the [] separated by a comma. Here's my code (returns error as expected).
import json
import pyjq
with open('file.json') as f:
data = json.load(f)
transformed = pyjq.all('{data: { meter_id_1: [[[.rows[].val.rows[].ts.val + "," + .rows[].val.rows[].v0.val]]}}', data)
Thanks in advance.

Python Script to convert multiple json files in to single csv

{
"type": "Data",
"version": "1.0",
"box": {
"identifier": "abcdef",
"serial": "12345678"
},
"payload": {
"Type": "EL",
"Version": "1",
"Result": "Successful",
"Reference": null,
"Box": {
"Identifier": "abcdef",
"Serial": "12345678"
},
"Configuration": {
"EL": "1"
},
"vent": [
{
"ventType": "Arm",
"Timestamp": "2020-03-18T12:17:04+10:00",
"Parameters": [
{
"Name": "Arm",
"Value": "LT"
},
{
"Name": "Status",
"Value": "LD"
}
]
},
{
"ventType": "Arm",
"Timestamp": "2020-03-18T12:17:24+10:00",
"Parameters": [
{
"Name": "Arm",
"Value": "LT"
},
{
"Name": "Status",
"Value": "LD"
}
]
},
{
"EventType": "TimeUpdateCompleted",
"Timestamp": "2020-03-18T02:23:21.2979668Z",
"Parameters": [
{
"Name": "ActualAdjustment",
"Value": "PT0S"
},
{
"Name": "CorrectionOffset",
"Value": "PT0S"
},
{
"Name": "Latency",
"Value": "PT0.2423996S"
}
]
}
]
}
}
If you're looking to transfer information from a JSON file to a CSV, then you can use the following code to read in a JSON file into a dictionary in Python:
import json
with open('data.txt') as json_file:
data_dict = json.load(json_file)
You could then convert this dictionary into a list with either data_dict.items() or data_dict.values().
Then you just need to write this list to a CSV file which you can easily do by just looping through the list.

Python built JSON with mixed types

Actually I build Json object starting from a python object.
My starting JSON is:
responseMsgObject = {'Version': 1,
'Id': 'xc23',
'Local': "US"
'Type': "Test",
'Message' : "Message body" }
responseMsgJson = json.dumps(responseMsgObject, sort_keys=False )
Every things works but now I need to put the JSON below into "Message" field.
{
"DepID": "001",
"Assets": [
{
"Type": "xyz",
"Text": [
"abc",
"def"
],
"Metadata": {
"V": "1",
"Req": true,
"Other": "othervalue"
},
"Check": "refdw321"
},
{
"Type": "jkl",
"Text": [
"ghi"
],
"Metadata": {
"V": "6"
},
"Check": "345ghsdan"
}
]
}
I built many other json (but simpler) but I'm in trouble with this json.
Thanks for the help.
try to replace true with True works fine for me
import json
responseMsgObject = {
'Version': 1,
'Id': 'xc23',
'Local': "US",
'Type': "Test",
'Message': {
"DepID": "001",
"Assets": [{
"Type": "xyz",
"Text": [
"abc",
"def"
],
"Metadata": {
"V": "1",
"Req": True,
"Other": "othervalue"
},
"Check": "refdw321"
}, {
"Type": "jkl",
"Text": [
"ghi"
],
"Metadata": {
"V": "6"
},
"Check": "345ghsdan4"
}]
}
}
responseMsgJson = json.dumps(responseMsgObject, sort_keys=False )
print("responseMsgJson", responseMsgJson)
DEMO

Json to CSV using python and blender 2.74

I have a project in which i have to convert a json file into a CSV file.
The Json sample :
{
"P_Portfolio Group": {
"depth": 1,
"dataType": "PortfolioOverview",
"levelId": "P_Portfolio Group",
"path": [
{
"label": "Portfolio Group",
"levelId": "P_Portfolio Group"
}
],
"label": "Portfolio Group",
"header": [
{
"id": "Label",
"label": "Security name",
"type": "text",
"contentType": "text"
},
{
"id": "SecurityValue",
"label": "MioCHF",
"type": "text",
"contentType": "number"
},
{
"id": "SecurityValuePct",
"label": "%",
"type": "text",
"contentType": "pct"
}
],
"data": [
{
"dataValues": [
{
"value": "Client1",
"type": "text"
},
{
"value": 2068.73,
"type": "number"
},
{
"value": 14.0584,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client2",
"type": "text"
},
{
"value": 1511.9,
"type": "number"
},
{
"value": 10.2744,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client3",
"type": "text"
},
{
"value": 1354.74,
"type": "number"
},
{
"value": 9.2064,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client4",
"type": "text"
},
{
"value": 1225.78,
"type": "number"
},
{
"value": 8.33,
"type": "pct"
}
]
}
],
"summary": [
{
"value": "Total",
"type": "text"
},
{
"value": 11954.07,
"type": "number"
},
{
"value": 81.236,
"type": "pct"
}
]
}
}
And i want o obtain something like:
Client1,2068.73,14.0584
Client2,1511.9,10.2744
Client3,871.15,5.92
Client4,11954.07,81.236
Can you please give me a hint.
import csv
import json
with open("C:\Users\SVC\Desktop\test.json") as file:
x = json.load(file)
f = csv.writer(open("C:\Users\SVC\Desktop\test.csv", "wb+"))
for x in x:
f.writerow(x["P_Portfolio Group"]["data"]["dataValues"]["value"])
but it doesn't work.
Can you please give me a hint.
import csv
import json
with open('C:\Users\SVC\Desktop\test.json') as json_file:
portfolio_group = json.load(json_file)
with open('C:\Users\SVC\Desktop\test.csv', 'w') as csv_file:
csv_obj = csv.writer(csv_file)
for data in portfolio_group['P_Portfolio Group']['data']:
csv_obj.writerow([d['value'] for d in data['dataValues']])
This results in the following C:\Users\SVC\Desktop\test.csv content:
Client1,2068.73,14.0584
Client2,1511.9,10.2744
Client3,1354.74,9.2064
Client4,1225.78,8.33
Use the pandas library:
import pandas as pd
data = pd.read_csv("C:\Users\SVC\Desktop\test.json")
data.to_csv('test.csv')
done

Categories

Resources