Json to CSV using python and blender 2.74 - python

I have a project in which i have to convert a json file into a CSV file.
The Json sample :
{
"P_Portfolio Group": {
"depth": 1,
"dataType": "PortfolioOverview",
"levelId": "P_Portfolio Group",
"path": [
{
"label": "Portfolio Group",
"levelId": "P_Portfolio Group"
}
],
"label": "Portfolio Group",
"header": [
{
"id": "Label",
"label": "Security name",
"type": "text",
"contentType": "text"
},
{
"id": "SecurityValue",
"label": "MioCHF",
"type": "text",
"contentType": "number"
},
{
"id": "SecurityValuePct",
"label": "%",
"type": "text",
"contentType": "pct"
}
],
"data": [
{
"dataValues": [
{
"value": "Client1",
"type": "text"
},
{
"value": 2068.73,
"type": "number"
},
{
"value": 14.0584,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client2",
"type": "text"
},
{
"value": 1511.9,
"type": "number"
},
{
"value": 10.2744,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client3",
"type": "text"
},
{
"value": 1354.74,
"type": "number"
},
{
"value": 9.2064,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client4",
"type": "text"
},
{
"value": 1225.78,
"type": "number"
},
{
"value": 8.33,
"type": "pct"
}
]
}
],
"summary": [
{
"value": "Total",
"type": "text"
},
{
"value": 11954.07,
"type": "number"
},
{
"value": 81.236,
"type": "pct"
}
]
}
}
And i want o obtain something like:
Client1,2068.73,14.0584
Client2,1511.9,10.2744
Client3,871.15,5.92
Client4,11954.07,81.236
Can you please give me a hint.
import csv
import json
with open("C:\Users\SVC\Desktop\test.json") as file:
x = json.load(file)
f = csv.writer(open("C:\Users\SVC\Desktop\test.csv", "wb+"))
for x in x:
f.writerow(x["P_Portfolio Group"]["data"]["dataValues"]["value"])
but it doesn't work.
Can you please give me a hint.

import csv
import json
with open('C:\Users\SVC\Desktop\test.json') as json_file:
portfolio_group = json.load(json_file)
with open('C:\Users\SVC\Desktop\test.csv', 'w') as csv_file:
csv_obj = csv.writer(csv_file)
for data in portfolio_group['P_Portfolio Group']['data']:
csv_obj.writerow([d['value'] for d in data['dataValues']])
This results in the following C:\Users\SVC\Desktop\test.csv content:
Client1,2068.73,14.0584
Client2,1511.9,10.2744
Client3,1354.74,9.2064
Client4,1225.78,8.33

Use the pandas library:
import pandas as pd
data = pd.read_csv("C:\Users\SVC\Desktop\test.json")
data.to_csv('test.csv')
done

Related

Modify the value of a field of a specific nested object (its index) depending on a condition

I would like to modify the value of a field on a specific index of a nested type depending on another value of the same nested object or a field outside of the nested object.
As example, I have the current mapping of my index feed:
{
"feed": {
"mappings": {
"properties": {
"attacks_ids": {
"type": "keyword"
},
"created_by": {
"type": "keyword"
},
"date": {
"type": "date"
},
"groups_related": {
"type": "keyword"
},
"indicators": {
"type": "nested",
"properties": {
"date": {
"type": "date"
},
"description": {
"type": "text"
},
"role": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"malware_families": {
"type": "keyword"
},
"published": {
"type": "boolean"
},
"references": {
"type": "keyword"
},
"tags": {
"type": "keyword"
},
"targeted_countries": {
"type": "keyword"
},
"title": {
"type": "text"
},
"tlp": {
"type": "keyword"
}
}
}
}
}
Take the following document as example:
{
"took": 194,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "feed",
"_type": "_doc",
"_id": "W3CS7IABovFpcGfZjfyu",
"_score": 1,
"_source": {
"title": "Test",
"date": "2022-05-22T16:21:09.159711",
"created_by": "finch",
"tlp": "white",
"published": true,
"references": [
"test",
"test"
],
"tags": [
"tag1",
"tag2"
],
"targeted_countries": [
"Italy",
"Germany"
],
"malware_families": [
"family1",
"family2"
],
"groups_related": [
"group1",
"griup2"
],
"attacks_ids": [
""
],
"indicators": [
{
"value": "testest",
"description": "This is a test",
"type": "sha256",
"role": "file",
"date": "2022-05-22T16:21:09.159560"
},
{
"value": "testest2",
"description": "This is a test 2",
"type": "ipv4",
"role": "c2",
"date": "2022-05-22T16:21:09.159699"
}
]
}
}
]
}
}
I would like to make this update: indicators[0].value = 'changed'
if _id == 'W3CS7IABovFpcGfZjfyu'
or if title == 'some_title'
or if indicators[0].role == 'c2'
I already tried with a script, but it seems I can't manage to get it work, I hope the explanation is clear, ask any question if not, thank you.
Edit 1:
I managed to make it work, however it needs the _id, still looking for a way to do that without it.
My partial solution:
update = Pulse.get(id="XHCz7IABovFpcGfZWfz9") #Pulse is my document
update.update(script="for (indicator in ctx._source.indicators) {if (indicator.value=='changed2') {indicator.value='changed3'}}")
# Modify depending on the value of a field inside the same nested object

Best way to parse a JSON to store in SQL database (SQL stored procedure/Python)

I have a table of overly complex JSON files I'm trying to convert to tabular format to store in a SQL database. I'm pulling the JSONs from the quickbooks online API, and the format is messy to say the least.. (We're talking 7x nested JSONs for some bits of it..
The format resembles the code snippet down below. Currently I am using a bunch of OpenJSON's + Cross applys to dig down to the innermost ColData then work my way up but it looks like some of the ColData's get skipped over doing that.
Are there any better ways, using either Python (since I pull the JSON initially in Python before sending the JSON to a SQL database to parse) or SQL to convert it to tabular format besides manually trying to use OpenJSON with Cross applys?
The goal is to get all of the ColData's into a SQL table...
Thanks!
{
"Header": {
"ReportName": "BalanceSheet",
"Option": [
{
"Name": "AccountingStandard",
"Value": "GAAP"
},
{
"Name": "NoReportData",
"Value": "false"
}
],
"DateMacro": "this calendar year-to-date",
"ReportBasis": "Accrual",
"StartPeriod": "2016-01-01",
"Currency": "USD",
"EndPeriod": "2016-10-31",
"Time": "2016-10-31T09:42:21-07:00",
"SummarizeColumnsBy": "Total"
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "ASSETS"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Current Assets"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Bank Accounts"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "35",
"value": "Checking"
},
{
"value": "1350.55"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "36",
"value": "Savings"
},
{
"value": "800.00"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "BankAccounts",
"Summary": {
"ColData": [
{
"value": "Total Bank Accounts"
},
{
"value": "2150.55"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Accounts Receivable"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "84",
"value": "Accounts Receivable (A/R)"
},
{
"value": "6383.12"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "AR",
"Summary": {
"ColData": [
{
"value": "Total Accounts Receivable"
},
{
"value": "6383.12"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Other current assets"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "81",
"value": "Inventory Asset"
},
{
"value": "596.25"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "4",
"value": "Undeposited Funds"
},
{
"value": "2117.52"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "OtherCurrentAssets",
"Summary": {
"ColData": [
{
"value": "Total Other current assets"
},
{
"value": "2713.77"
}
]
}
}
]
},
"type": "Section",
"group": "CurrentAssets",
"Summary": {
"ColData": [
{
"value": "Total Current Assets"
},
{
"value": "11247.44"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Fixed Assets"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"id": "37",
"value": "Truck"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "38",
"value": "Original Cost"
},
{
"value": "13495.00"
}
],
"type": "Data"
}
]
},
"type": "Section",
"Summary": {
"ColData": [
{
"value": "Total Truck"
},
{
"value": "13495.00"
}
]
}
}
]
},
"type": "Section",
"group": "FixedAssets",
"Summary": {
"ColData": [
{
"value": "Total Fixed Assets"
},
{
"value": "13495.00"
}
]
}
}
]
},
"type": "Section",
"group": "TotalAssets",
"Summary": {
"ColData": [
{
"value": "TOTAL ASSETS"
},
{
"value": "24742.44"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "LIABILITIES AND EQUITY"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Liabilities"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Current Liabilities"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Accounts Payable"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "33",
"value": "Accounts Payable (A/P)"
},
{
"value": "1984.17"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "AP",
"Summary": {
"ColData": [
{
"value": "Total Accounts Payable"
},
{
"value": "1984.17"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Credit Cards"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "41",
"value": "Mastercard"
},
{
"value": "157.72"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "CreditCards",
"Summary": {
"ColData": [
{
"value": "Total Credit Cards"
},
{
"value": "157.72"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Other Current Liabilities"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "89",
"value": "Arizona Dept. of Revenue Payable"
},
{
"value": "4.55"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "90",
"value": "Board of Equalization Payable"
},
{
"value": "401.98"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "43",
"value": "Loan Payable"
},
{
"value": "4000.00"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "OtherCurrentLiabilities",
"Summary": {
"ColData": [
{
"value": "Total Other Current Liabilities"
},
{
"value": "4406.53"
}
]
}
}
]
},
"type": "Section",
"group": "CurrentLiabilities",
"Summary": {
"ColData": [
{
"value": "Total Current Liabilities"
},
{
"value": "6548.42"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Long-Term Liabilities"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "44",
"value": "Notes Payable"
},
{
"value": "25000.00"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "LongTermLiabilities",
"Summary": {
"ColData": [
{
"value": "Total Long-Term Liabilities"
},
{
"value": "25000.00"
}
]
}
}
]
},
"type": "Section",
"group": "Liabilities",
"Summary": {
"ColData": [
{
"value": "Total Liabilities"
},
{
"value": "31548.42"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Equity"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "34",
"value": "Opening Balance Equity"
},
{
"value": "-9337.50"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "2",
"value": "Retained Earnings"
},
{
"value": "91.25"
}
],
"type": "Data"
},
{
"ColData": [
{
"value": "Net Income"
},
{
"value": "2440.27"
}
],
"type": "Data",
"group": "NetIncome"
}
]
},
"type": "Section",
"group": "Equity",
"Summary": {
"ColData": [
{
"value": "Total Equity"
},
{
"value": "-6805.98"
}
]
}
}
]
},
"type": "Section",
"group": "TotalLiabilitiesAndEquity",
"Summary": {
"ColData": [
{
"value": "TOTAL LIABILITIES AND EQUITY"
},
{
"value": "24742.44"
}
]
}
}
]
},
"Columns": {
"Column": [
{
"ColType": "Account",
"ColTitle": "",
"MetaData": [
{
"Name": "ColKey",
"Value": "account"
}
]
},
{
"ColType": "Money",
"ColTitle": "Total",
"MetaData": [
{
"Name": "ColKey",
"Value": "total"
}
]
}
]
}
}
Here is what I tried to get the ColData (unsuccessfully I might add), I think it might be a little too contrived to do in SQL but I'm not sure if I should continue trying this way or if there's a better way in Python:
declare #json nvarchar(max)
SELECT #json = json FROM QboApiRawJSONData WHERE ID = 2
--Outer layer of JSON breaks into 3 parts - header, columns, rows
SELECT * FROM OPENJSON(#json)
WITH
(
Rows nvarchar(max) AS JSON
) as MainLayer
CROSS APPLY OPENJSON (MainLayer.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as SecondaryLayer
CROSS APPLY OPENJSON (SecondaryLayer.Row)
WITH
(
Rows nvarchar(max) AS JSON
) As ThirdLayer
CROSS APPLY OPENJSON (ThirdLayer.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as FourthLayer
CROSS APPLY OPENJSON (FourthLayer.Row)
WITH
(
Rows nvarchar(max) AS JSON
) as FifthLayer
CROSS APPLY OPENJSON (FifthLayer.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as SixthLayer
CROSS APPLY OPENJSON (SixthLayer.Row)
WITH
(
Rows nvarchar(max) AS JSON
) as SeventhLayer
CROSS APPLY OPENJSON (SeventhLayer.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as EighthLayer
CROSS APPLY OPENJSON (EighthLayer.Row)
WITH
(
Rows nvarchar(max) AS JSON
) as LayerNine
---Things get funky here
CROSS APPLY OPENJSON (LayerNine.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as LayerTen
CROSS APPLY OPENJSON (LayerTen.Row)
WITH
(
Rows nvarchar(max) AS JSON
) as LayerEleven
CROSS APPLY OPENJSON (LayerEleven.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as LayerTwelve
--21 items in last col
There is JSON support for sql-server:
https://learn.microsoft.com/en-us/sql/relational-databases/json/json-data-sql-server?view=sql-server-ver15
There is a JSON storage method here: https://learn.microsoft.com/en-us/sql/relational-databases/json/store-json-documents-in-sql-tables?view=sql-server-ver15. Although it will be fairly complex here I recommend storing the JSON data as a Logs table and then following the tutorial above to see if that solves your issue.
Use Quickbooks API to get the JSON, then refer to this guide:
https://learn.microsoft.com/en-us/sql/relational-databases/json/json-data-sql-server?view=sql-server-ver15
and this guide:
https://learn.microsoft.com/en-us/sql/relational-databases/json/convert-json-data-to-rows-and-columns-with-openjson-sql-server?view=sql-server-ver15
You can also consider setting something like AWS Lambda or Google Cloud Functions if you need something more automated.

Transforming nested JSON with pyjq

I'm trying to transform the JSON from this:
{
"meta": {
"ver": "3.0"
},
"cols": [
{
"name": "val"
}
],
"rows": [
"cols": [
{
"name": "ts"
},
{
"name": "v0"
},
{
"name": "v1"
},
{
"name": "v2"
},
{
"name": "v3"
},
{
"ts": {
"_kind": "dateTime",
"val": "2021-07-07T00:10:00-07:00",
"tz": "Los_Angeles"
},
"v3": {
"_kind": "number",
"val": 6167699.5,
"unit": "kWh"
}
},
{
"ts": {
"_kind": "dateTime",
"val": "2021-07-07T00:15:00-07:00",
"tz": "Los_Angeles"
},
"v0": {
"_kind": "number",
"val": 808926.0625,
"unit": "m\\u00b3"
},
"v1": {
"_kind": "number",
"val": 112999.3046875,
"unit": "m\\u00b3"
},
"v2": {
"_kind": "number",
"val": 8823498,
"unit": "kWh"
}
}
]
}
to a more simplified form using the pyjq module:
{
"data": {
"v0": [
[
"first timestamp",
val
],
[
"second timestamp",
val
]
],
"v1": [
[
"first timestamp",
val
],
[
"second timestamp",
val
]
]
}
}
I got started with the pyjq module, however I'm unsure about how to proceed with place two values (one str, one float) within the [] separated by a comma. Here's my code (returns error as expected).
import json
import pyjq
with open('file.json') as f:
data = json.load(f)
transformed = pyjq.all('{data: { meter_id_1: [[[.rows[].val.rows[].ts.val + "," + .rows[].val.rows[].v0.val]]}}', data)
Thanks in advance.

Python Script to convert multiple json files in to single csv

{
"type": "Data",
"version": "1.0",
"box": {
"identifier": "abcdef",
"serial": "12345678"
},
"payload": {
"Type": "EL",
"Version": "1",
"Result": "Successful",
"Reference": null,
"Box": {
"Identifier": "abcdef",
"Serial": "12345678"
},
"Configuration": {
"EL": "1"
},
"vent": [
{
"ventType": "Arm",
"Timestamp": "2020-03-18T12:17:04+10:00",
"Parameters": [
{
"Name": "Arm",
"Value": "LT"
},
{
"Name": "Status",
"Value": "LD"
}
]
},
{
"ventType": "Arm",
"Timestamp": "2020-03-18T12:17:24+10:00",
"Parameters": [
{
"Name": "Arm",
"Value": "LT"
},
{
"Name": "Status",
"Value": "LD"
}
]
},
{
"EventType": "TimeUpdateCompleted",
"Timestamp": "2020-03-18T02:23:21.2979668Z",
"Parameters": [
{
"Name": "ActualAdjustment",
"Value": "PT0S"
},
{
"Name": "CorrectionOffset",
"Value": "PT0S"
},
{
"Name": "Latency",
"Value": "PT0.2423996S"
}
]
}
]
}
}
If you're looking to transfer information from a JSON file to a CSV, then you can use the following code to read in a JSON file into a dictionary in Python:
import json
with open('data.txt') as json_file:
data_dict = json.load(json_file)
You could then convert this dictionary into a list with either data_dict.items() or data_dict.values().
Then you just need to write this list to a CSV file which you can easily do by just looping through the list.

BigQuery - use_avro_logical_types doesn´t work in a python script

The doubt is similar to the question: BigQuery use_avro_logical_types ignored in Python script however I have already updated the libraries I use google without success. So I would like to understand what is happening in my case. I will send the intake script part of the avro file. Note: The issue occurs in more than one avro file ingestion pipeline for BigQuery.
EDIT: The mentioned solution of changing schema type didn't work, it gave another error.
Schema in BQ:
Data in BQ:
import csv
import base64
import json
import io
import avro.schema
import avro.io
from avro.datafile import DataFileReader, DataFileWriter
import math
import os
import gcloud
from gcloud import storage
from google.cloud import bigquery
from oauth2client.client import GoogleCredentials
from datetime import datetime, timedelta, date
import numpy as np
try:
script_path = os.path.dirname(os.path.abspath(__file__)) + "/"
except:
script_path = "C:\\Users\\me\\key.json"
#Bigquery Credentials and settings
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = script_path
folder = str((datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d'))
data_folder = str((datetime.now() - timedelta(days=1)).strftime('%Y%m%d'))
bucket_name = 'gs://bucket/*.csv'
dataset = 'dataset'
tabela = 'table_ids'
new_file = 'C:\\Users\\me\\register_' + data_folder + '.avro'
file_schema = 'C:\\Users\\me\\schema.avsc'
new_filename = 'register_' + data_folder + '.avro'
bq1 = bigquery.Client()
#Deleta IDs
query1 = """DELETE FROM dataset.table_ids WHERE ID IS NOT NULL"""
query_job1 = bq1.query(query1)
def insert_bigquery(target_uri, dataset_id, table_id):
bigquery_client = bigquery.Client()
dataset_ref = bigquery_client.dataset(dataset_id)
job_config = bigquery.LoadJobConfig()
job_config.schema = [
bigquery.SchemaField('id','STRING',mode='REQUIRED')
]
job_config.source_format = bigquery.SourceFormat.CSV
job_config.field_delimiter = ";"
uri = target_uri
load_job = bigquery_client.load_table_from_uri(
uri,
dataset_ref.table(table_id),
job_config=job_config
)
print('Starting job {}'.format(load_job.job_id))
load_job.result()
print('Job finished.')
insert_bigquery(bucket_name, dataset, tabela)
def get_data_from_bigquery():
"""query bigquery to get data to import to PSQL"""
bq = bigquery.Client()
#Busca IDs
query = """SELECT id FROM dataset.table_ids"""
query_job = bq.query(query)
data = query_job.result()
rows = list(data)
return rows
a = get_data_from_bigquery()
length = len(a)
line_count = 0
schema = avro.schema.Parse(open(file_schema, "rb").read()) # need to know the schema to write. According to 1.8.2 of Apache Avro
writer = DataFileWriter(open(new_file, "wb"), avro.io.DatumWriter(), schema)
for row in range(length):
bytes = base64.b64decode(str(a[row][0]))
bytes = bytes[5:]
buf = io.BytesIO(bytes)
decoder = avro.io.BinaryDecoder(buf)
rec_reader = avro.io.DatumReader(avro.schema.Parse(open(file_schema).read()))
out=rec_reader.read(decoder)
writer.append(out)
writer.close()
def upload_blob(bucket_name, source_file_name, destination_blob_name):
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob("insert/" + destination_blob_name)
blob.upload_from_filename(source_file_name)
print('File {} uploaded to {}'.format(
source_file_name,
destination_blob_name
))
upload_blob('bucket', new_file, new_filename)
def insert_bigquery_avro(target_uri, dataset_id, table_id):
bigquery_client = bigquery.Client()
dataset_ref = bigquery_client.dataset(dataset_id)
job_config = bigquery.LoadJobConfig()
job_config.autodetect = True
job_config.source_format = bigquery.SourceFormat.AVRO
job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND
job_config.use_avro_logical_types = True
time_partitioning = bigquery.table.TimePartitioning()
job_config.time_partitioning = time_partitioning
uri = target_uri
load_job = bigquery_client.load_table_from_uri(
uri,
dataset_ref.table(table_id),
job_config=job_config
)
print('Starting job {}'.format(load_job.job_id))
load_job.result()
print('Job finished.')
Avro Schema:
"fields": [
{
"name": "id",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": "the payment id"
},
{
"name": "merchant",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": "the merchant who owns the payment"
},
{
"name": "date",
"type": {
"type": "long",
"logicalType": "timestamp-millis"
},
"doc": "the date where the transaction happend"
},
{
"name": "amount",
"type": {
"type": "record",
"name": "amount",
"fields": [
{
"name": "amount",
"type": [
"null",
{
"type": "bytes",
"logicalType": "decimal",
"precision": 5,
"scale": 5
}
],
"doc": "the original currency amount",
"default": null
},
{
"name": "foreignAmount",
"type": [
"null",
{
"type": "bytes",
"logicalType": "decimal",
"precision": 5,
"scale": 5
}
],
"doc": "the foreign amount for the payment",
"default": null
},
{
"name": "code",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": "the destination currency code"
}
],
"default": null
}
},
{
"name": "exchange_rate",
"type": {
"type": "record",
"name": "code",
"fields": [
{
"name": "currency_code",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"doc": "the exchange rate currency code",
"default": null
},
{
"name": "rate",
"type": [
"null",
{
"type": "bytes",
"logicalType": "decimal",
"precision": 5,
"scale": 5
}
],
"default": null
},
{
"name": "online_rate",
"type": [
"null",
{
"type": "bytes",
"logicalType": "decimal",
"precision": 5,
"scale": 5
}
],
"default": null
}
]
},
"doc": "The transaction exchange rate"
},
{
"name": "consumer",
"type": {
"type": "record",
"name": "Consumer",
"fields": [
{
"name": "name",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"doc": "the consumer's name",
"default": null
},
{
"name": "email",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"doc": "the consumer's email address",
"default": null
},
{
"name": "external_id",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"doc": "the consumer's external id when needed",
"default": null
},
{
"name": "national_id",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": "the national id"
},
{
"name": "phone",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": "the consumer's phone number",
"default": ""
}
]
}
},
{
"name": "soft_descriptor",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"doc": "the description as it will be shown at the customer's invoice",
"default": null
},
{
"name": "merchant_contract",
"type": {
"type": "enum",
"name": "merchant_contract_type",
"symbols": [
"PAY",
"BANK"
]
},
"default": "PAY"
},
{
"name": "type",
"type": {
"type": "enum",
"name": "payment_type",
"symbols": [
"INITIAL",
"CREDIT_CARD",
"DEBIT_CARD",
"ONLINE_DEBIT",
"BANK_SLIP",
"DIGITAL_WALLET",
"ELECTRONIC_BANK_TRANSFER"
]
},
"default": "INITIAL"
},
{
"name": "card",
"type": {
"type": "record",
"name": "card",
"fields": [
{
"name": "type",
"type": [
"null",
{
"type": "enum",
"name": "card_type",
"symbols": [
"CARD",
"TOKEN"
]
}
],
"default": null
},
{
"name": "mask_number",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"default": null
},
{
"name": "card_holder",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"default": null
},
{
"name": "brand",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"default": null
}
]
}
},
{
"name": "confirm",
"type": "boolean",
"doc": "indicates whether is self confirmed",
"default": false
},
{
"name": "installments",
"type": "int",
"doc": "Number of installments for the payment",
"default": 1
},
{
"name": "due_date",
"type": [
"null",
{
"type": "int",
"logicalType": "date"
}
],
"default": null
},
{
"name": "correlation_id",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"doc": "the external customer correlationid",
"default": null
},
{
"name": "billing",
"type": {
"type": "record",
"name": "Billing",
"fields": [
{
"name": "national_id",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"doc": "billing info",
"default": null
},
{
"name": "name",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"doc": "the consumer address name",
"default": null
}
]
}
},
{
"name": "status",
"type": {
"type": "enum",
"name": "payment_status",
"symbols": [
"INITIAL",
"CONSUMER",
"AUTHORIZED",
"WAITING_CONFIRMATION",
"CANCELED",
"WAITING_CLEARING",
"CLEARED",
"TRANSFERENCE",
"DECLINED_BY_ISSUER",
"DECLINED_BY_BUSINESS_RULES",
"CONFIRMED",
"WAITING_CANCELING",
"WAITING_CONSUMER",
"TRANSFER_REQUESTED"
],
"default": "INITIAL"
}
},
{
"name": "metadata",
"type": {
"type": "map",
"values": {
"type": "string",
"avro.java.string": "String"
},
"avro.java.string": "String"
}
},
{
"name": "events",
"type": {
"type": "array",
"items": {
"type": "record",
"name": "event",
"fields": [
{
"name": "id",
"type": {
"type": "string",
"avro.java.string": "String"
},
"default": "0"
},
{
"name": "type",
"type": {
"type": "enum",
"name": "event_type",
"symbols": [
"AUTHORIZATION",
"AUTHENTICATION",
"CONFIRMATION",
"CANCELATION",
"CHECKOUT_CREATION",
"SETTLEMENT",
"TRANSFER_VALIDATION",
"TRANSFER_SCHEDULE",
"TRANSFERRED"
]
}
},
{
"name": "gateway",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"default": null
},
{
"name": "breadcrumb_id",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"default": null
},
{
"name": "request_time",
"type": {
"type": "long",
"logicalType": "timestamp-millis"
},
"doc": "the moment where this request was received by the platform"
},
{
"name": "response_time",
"type": {
"type": "long",
"logicalType": "timestamp-millis"
},
"doc": "the moment where this request was returned by the platform"
},
{
"name": "status",
"type": {
"type": "enum",
"name": "event_status",
"symbols": [
"SUCCESS",
"DENIED",
"ERROR",
"TIMEOUT",
"PENDING"
]
}
},
{
"name": "actor",
"type": {
"type": "enum",
"name": "actor",
"symbols": [
"AQ",
"GTW",
"CONCIL"
],
"default": "GTW"
},
"default": "GTW"
},
{
"name": "amount",
"type": [
"null",
{
"type": "bytes",
"logicalType": "decimal",
"precision": 5,
"scale": 5
}
],
"doc": "the original currency amount",
"default": null
},
{
"name": "foreign_amount",
"type": [
"null",
{
"type": "bytes",
"logicalType": "decimal",
"precision": 5,
"scale": 5
}
],
"doc": "the foreign amount for the payment",
"default": null
},
{
"name": "error",
"type": {
"type": "record",
"name": "Error",
"fields": [
{
"name": "code",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"default": null
},
{
"name": "message",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"default": null
}
]
}
},
{
"name": "message",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"default": null
},
{
"name": "fee_amount",
"type": [
"null",
{
"type": "bytes",
"logicalType": "decimal",
"precision": 5,
"scale": 5
}
],
"doc": "the fee amount",
"default": null
},
{
"name": "net_amount",
"type": [
"null",
{
"type": "bytes",
"logicalType": "decimal",
"precision": 5,
"scale": 5
}
],
"doc": "the net amount",
"default": null
},
{
"name": "metadata",
"type": {
"type": "map",
"values": {
"type": "string",
"avro.java.string": "String"
},
"avro.java.string": "String"
}
},
{
"name": "internal_metadata",
"type": {
"type": "map",
"values": {
"type": "string",
"avro.java.string": "String"
},
"avro.java.string": "String"
}
}
]
}
}
},
{
"name": "bank_account",
"type": [
"null",
{
"type": "record",
"name": "bank_account",
"fields": [
{
"name": "name",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": "the bank name",
"default": ""
},
{
"name": "code",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": "the bank code",
"default": ""
},
{
"name": "agency",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": "the bank agency",
"default": ""
},
{
"name": "account",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": "the bank account",
"default": ""
},
{
"name": "document_number",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": "the bank document number (CNPJ)",
"default": ""
}
]
}
],
"doc": "The bank account values",
"default": null
}
]
Try setting this as "type" for all timestamps columns on your avro schema:
"type": ["null", {"type": "long", "logicalType": "timestamp-millis"}]

Categories

Resources