Extract Specific Data in Nested Dictionary-List and write onto Json List - python

I have nested data that need to extract some data from it. Due to in-depth level of the nested data..I'm not able to extract all the data require. For this questions I just focus on how to get all the data requires for object node.
So far I have extract key 'id' value only...in the nested data got some other key of 'tp-id', 'ip' and 'mac' that I need to get the values also...but I still not able to get it....
This is the raw data (source data to collect the data)
{
"network-topology": {
"topology": [
{
"node": [
{
"opendaylight-topology-inventory:inventory-node-ref": "/opendaylight-inventory:nodes/opendaylight-inventory:node[opendaylight-inventory:id='openflow:1']",
"node-id": "openflow:1",
"termination-point": [
{
"opendaylight-topology-inventory:inventory-node-connector-ref": "/opendaylight-inventory:nodes/opendaylight-inventory:node[opendaylight-inventory:id='openflow:1']/opendaylight-inventory:node-connector[opendaylight-inventory:id='openflow:1:2']",
"tp-id": "openflow:1:2"
},
{
"opendaylight-topology-inventory:inventory-node-connector-ref": "/opendaylight-inventory:nodes/opendaylight-inventory:node[opendaylight-inventory:id='openflow:1']/opendaylight-inventory:node-connector[opendaylight-inventory:id='openflow:1:1']",
"tp-id": "openflow:1:1"
},
{
"opendaylight-topology-inventory:inventory-node-connector-ref": "/opendaylight-inventory:nodes/opendaylight-inventory:node[opendaylight-inventory:id='openflow:1']/opendaylight-inventory:node-connector[opendaylight-inventory:id='openflow:1:LOCAL']",
"tp-id": "openflow:1:LOCAL"
}
]
},
{
"host-tracker-service:addresses": [
{
"first-seen": 1562655393902,
"ip": "10.0.0.1",
"mac": "00:00:00:00:00:01",
"id": 6,
"last-seen": 1562655393902
}
],
"host-tracker-service:id": "00:00:00:00:00:01",
"host-tracker-service:attachment-points": [
{
"active": true,
"corresponding-tp": "host:00:00:00:00:00:01",
"tp-id": "openflow:1:1"
}
],
"node-id": "host:00:00:00:00:00:01",
"termination-point": [
{
"tp-id": "host:00:00:00:00:00:01"
}
]
},
{
"opendaylight-topology-inventory:inventory-node-ref": "/opendaylight-inventory:nodes/opendaylight-inventory:node[opendaylight-inventory:id='openflow:2']",
"node-id": "openflow:2",
"termination-point": [
{
"opendaylight-topology-inventory:inventory-node-connector-ref": "/opendaylight-inventory:nodes/opendaylight-inventory:node[opendaylight-inventory:id='openflow:2']/opendaylight-inventory:node-connector[opendaylight-inventory:id='openflow:2:LOCAL']",
"tp-id": "openflow:2:LOCAL"
},
{
"opendaylight-topology-inventory:inventory-node-connector-ref": "/opendaylight-inventory:nodes/opendaylight-inventory:node[opendaylight-inventory:id='openflow:2']/opendaylight-inventory:node-connector[opendaylight-inventory:id='openflow:2:1']",
"tp-id": "openflow:2:1"
},
{
"opendaylight-topology-inventory:inventory-node-connector-ref": "/opendaylight-inventory:nodes/opendaylight-inventory:node[opendaylight-inventory:id='openflow:2']/opendaylight-inventory:node-connector[opendaylight-inventory:id='openflow:2:2']",
"tp-id": "openflow:2:2"
}
]
},
{
"host-tracker-service:addresses": [
{
"first-seen": 1562655393906,
"ip": "10.0.0.2",
"mac": "00:00:00:00:00:02",
"id": 7,
"last-seen": 1562655393906
}
],
"host-tracker-service:id": "00:00:00:00:00:02",
"host-tracker-service:attachment-points": [
{
"active": true,
"corresponding-tp": "host:00:00:00:00:00:02",
"tp-id": "openflow:2:1"
}
],
"node-id": "host:00:00:00:00:00:02",
"termination-point": [
{
"tp-id": "host:00:00:00:00:00:02"
}
]
}
],
"link": [
{
"link-id": "host:00:00:00:00:00:01/openflow:1:1",
"destination": {
"dest-node": "openflow:1",
"dest-tp": "openflow:1:1"
},
"source": {
"source-tp": "host:00:00:00:00:00:01",
"source-node": "host:00:00:00:00:00:01"
}
},
{
"link-id": "openflow:2:1/host:00:00:00:00:00:02",
"destination": {
"dest-node": "host:00:00:00:00:00:02",
"dest-tp": "host:00:00:00:00:00:02"
},
"source": {
"source-tp": "openflow:2:1",
"source-node": "openflow:2"
}
},
{
"link-id": "openflow:1:2",
"destination": {
"dest-node": "openflow:2",
"dest-tp": "openflow:2:2"
},
"source": {
"source-tp": "openflow:1:2",
"source-node": "openflow:1"
}
},
{
"link-id": "openflow:2:2",
"destination": {
"dest-node": "openflow:1",
"dest-tp": "openflow:1:2"
},
"source": {
"source-tp": "openflow:2:2",
"source-node": "openflow:2"
}
},
{
"link-id": "openflow:1:1/host:00:00:00:00:00:01",
"destination": {
"dest-node": "host:00:00:00:00:00:01",
"dest-tp": "host:00:00:00:00:00:01"
},
"source": {
"source-tp": "openflow:1:1",
"source-node": "openflow:1"
}
},
{
"link-id": "host:00:00:00:00:00:02/openflow:2:1",
"destination": {
"dest-node": "openflow:2",
"dest-tp": "openflow:2:1"
},
"source": {
"source-tp": "host:00:00:00:00:00:02",
"source-node": "host:00:00:00:00:00:02"
}
}
],
"topology-id": "flow:1"
}
]
}
}
From here I want to have Node - id, tpid, ip and mac... and so far only got id.
Please find below the code I used to get the id value.. I tried to get tpid for example using for loop but i stuck also....
#Extract nodes info
node_list = []
for nodes in restData["network-topology"]["topology"][0]["node"]:
node = {}
node['id'] = nodes['node-id']
#for port in nodes['termination-point']:
#node['tpid'] = port['tp-id']
node_list.append(node)
nodes_list1 = [dd['id'] for dd in node_list]
print nodes_list1
#Add nodes_list1 info onto networkx
graph = nx.Graph()
graph.add_nodes_from(nodes_list1)
#Write onto json file
d = json_graph.node_link_data(graph)
with open('mytopo.json', 'w') as fc:
json.dump(d, fc, indent=4)
This is the output of mytopo.json
{
"directed": false,
"graph": {},
"nodes": [
{
"id": "host:00:00:00:00:00:02"
},
{
"id": "openflow:1"
},
{
"id": "openflow:2"
},
{
"id": "host:00:00:00:00:00:01"
}
],
"links": [],
"multigraph": false
}
I would like to have tpid, ip and mac to be in the json file...I expect to have mytopo.json as follows
{
"directed": false,
"graph": {
},
"nodes": [
{
"id": "host:00:00:00:00:00:01",
"tpid": "host:00:00:00:00:00:01",
"ip": "10.0.0.1",
"mac": "00:00:00:00:00:01"
},
{
"id": "openflow:1",
"tpid": [
"openflow:1:2",
"openflow:1:1",
"openflow:1:LOCAL"
]
},
{
"id": "openflow:2",
"tpid": [
"openflow:2:LOCAL",
"openflow:2:1",
"openflow:2:2"
]
},
{
"id": "host:00:00:00:00:00:02",
"tpid": "host:00:00:00:00:00:02",
"ip": "10.0.0.2",
"mac": "00:00:00:00:00:02"
}
],
"links": [
],
"multigraph": false
}
Appreciate your experts help and guideline for me to proceed further. Thank you.
I update for loop as per input #Tim Seed but it only return last tpid value. Based on the source raw data, tpid value of openflow1 and openflow2 have 3 values while host:00:00:00:00:00:01 and host:00:00:00:00:00:02 only have 1 value of tpid... so it should return something like below
nodes": [
{
"id": "host:00:00:00:00:00:01",
"tpid": "host:00:00:00:00:00:01",
"ip": "10.0.0.1",
"mac": "00:00:00:00:00:01"
},
{
"id": "openflow:1",
"tpid": [
"openflow:1:2",
"openflow:1:1",
"openflow:1:LOCAL"
]
},
{
"id": "openflow:2",
"tpid": [
"openflow:2:LOCAL",
"openflow:2:1",
"openflow:2:2"
]
},
{
"id": "host:00:00:00:00:00:02",
"tpid": "host:00:00:00:00:00:02",
"ip": "10.0.0.2",
"mac": "00:00:00:00:00:02"
}
],
when i print node['tpid']... it will show all the tpid as follows
openflow:1:2
openflow:1:1
openflow:1:LOCAL
host:00:00:00:00:00:01
openflow:2:LOCAL
openflow:2:1
openflow:2:2
host:00:00:00:00:00:02
but it just only add 1 tpid each onto node_list...
nodes_list1 = [(dd['id'], dd['tpid']) for dd in node_list]
Thanks

Try this ... and expand on it
node_list = []
for nodes in restData["network-topology"]["topology"]:
for sub in nodes["node"]:
node = {}
node['id'] = sub['node-id']
for port in sub['termination-point']:
node['tpid'] = port['tp-id']
node_list.append(node)
It should output
[{'id': 'openflow:1', 'tpid': 'openflow:1:LOCAL'},
{'id': 'host:00:00:00:00:00:01', 'tpid': 'host:00:00:00:00:00:01'},
{'id': 'openflow:2', 'tpid': 'openflow:2:2'},
{'id': 'host:00:00:00:00:00:02', 'tpid': 'host:00:00:00:00:00:02'}]

Related

Best way to parse a JSON to store in SQL database (SQL stored procedure/Python)

I have a table of overly complex JSON files I'm trying to convert to tabular format to store in a SQL database. I'm pulling the JSONs from the quickbooks online API, and the format is messy to say the least.. (We're talking 7x nested JSONs for some bits of it..
The format resembles the code snippet down below. Currently I am using a bunch of OpenJSON's + Cross applys to dig down to the innermost ColData then work my way up but it looks like some of the ColData's get skipped over doing that.
Are there any better ways, using either Python (since I pull the JSON initially in Python before sending the JSON to a SQL database to parse) or SQL to convert it to tabular format besides manually trying to use OpenJSON with Cross applys?
The goal is to get all of the ColData's into a SQL table...
Thanks!
{
"Header": {
"ReportName": "BalanceSheet",
"Option": [
{
"Name": "AccountingStandard",
"Value": "GAAP"
},
{
"Name": "NoReportData",
"Value": "false"
}
],
"DateMacro": "this calendar year-to-date",
"ReportBasis": "Accrual",
"StartPeriod": "2016-01-01",
"Currency": "USD",
"EndPeriod": "2016-10-31",
"Time": "2016-10-31T09:42:21-07:00",
"SummarizeColumnsBy": "Total"
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "ASSETS"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Current Assets"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Bank Accounts"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "35",
"value": "Checking"
},
{
"value": "1350.55"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "36",
"value": "Savings"
},
{
"value": "800.00"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "BankAccounts",
"Summary": {
"ColData": [
{
"value": "Total Bank Accounts"
},
{
"value": "2150.55"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Accounts Receivable"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "84",
"value": "Accounts Receivable (A/R)"
},
{
"value": "6383.12"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "AR",
"Summary": {
"ColData": [
{
"value": "Total Accounts Receivable"
},
{
"value": "6383.12"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Other current assets"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "81",
"value": "Inventory Asset"
},
{
"value": "596.25"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "4",
"value": "Undeposited Funds"
},
{
"value": "2117.52"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "OtherCurrentAssets",
"Summary": {
"ColData": [
{
"value": "Total Other current assets"
},
{
"value": "2713.77"
}
]
}
}
]
},
"type": "Section",
"group": "CurrentAssets",
"Summary": {
"ColData": [
{
"value": "Total Current Assets"
},
{
"value": "11247.44"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Fixed Assets"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"id": "37",
"value": "Truck"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "38",
"value": "Original Cost"
},
{
"value": "13495.00"
}
],
"type": "Data"
}
]
},
"type": "Section",
"Summary": {
"ColData": [
{
"value": "Total Truck"
},
{
"value": "13495.00"
}
]
}
}
]
},
"type": "Section",
"group": "FixedAssets",
"Summary": {
"ColData": [
{
"value": "Total Fixed Assets"
},
{
"value": "13495.00"
}
]
}
}
]
},
"type": "Section",
"group": "TotalAssets",
"Summary": {
"ColData": [
{
"value": "TOTAL ASSETS"
},
{
"value": "24742.44"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "LIABILITIES AND EQUITY"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Liabilities"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Current Liabilities"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Accounts Payable"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "33",
"value": "Accounts Payable (A/P)"
},
{
"value": "1984.17"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "AP",
"Summary": {
"ColData": [
{
"value": "Total Accounts Payable"
},
{
"value": "1984.17"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Credit Cards"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "41",
"value": "Mastercard"
},
{
"value": "157.72"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "CreditCards",
"Summary": {
"ColData": [
{
"value": "Total Credit Cards"
},
{
"value": "157.72"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Other Current Liabilities"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "89",
"value": "Arizona Dept. of Revenue Payable"
},
{
"value": "4.55"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "90",
"value": "Board of Equalization Payable"
},
{
"value": "401.98"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "43",
"value": "Loan Payable"
},
{
"value": "4000.00"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "OtherCurrentLiabilities",
"Summary": {
"ColData": [
{
"value": "Total Other Current Liabilities"
},
{
"value": "4406.53"
}
]
}
}
]
},
"type": "Section",
"group": "CurrentLiabilities",
"Summary": {
"ColData": [
{
"value": "Total Current Liabilities"
},
{
"value": "6548.42"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Long-Term Liabilities"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "44",
"value": "Notes Payable"
},
{
"value": "25000.00"
}
],
"type": "Data"
}
]
},
"type": "Section",
"group": "LongTermLiabilities",
"Summary": {
"ColData": [
{
"value": "Total Long-Term Liabilities"
},
{
"value": "25000.00"
}
]
}
}
]
},
"type": "Section",
"group": "Liabilities",
"Summary": {
"ColData": [
{
"value": "Total Liabilities"
},
{
"value": "31548.42"
}
]
}
},
{
"Header": {
"ColData": [
{
"value": "Equity"
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"id": "34",
"value": "Opening Balance Equity"
},
{
"value": "-9337.50"
}
],
"type": "Data"
},
{
"ColData": [
{
"id": "2",
"value": "Retained Earnings"
},
{
"value": "91.25"
}
],
"type": "Data"
},
{
"ColData": [
{
"value": "Net Income"
},
{
"value": "2440.27"
}
],
"type": "Data",
"group": "NetIncome"
}
]
},
"type": "Section",
"group": "Equity",
"Summary": {
"ColData": [
{
"value": "Total Equity"
},
{
"value": "-6805.98"
}
]
}
}
]
},
"type": "Section",
"group": "TotalLiabilitiesAndEquity",
"Summary": {
"ColData": [
{
"value": "TOTAL LIABILITIES AND EQUITY"
},
{
"value": "24742.44"
}
]
}
}
]
},
"Columns": {
"Column": [
{
"ColType": "Account",
"ColTitle": "",
"MetaData": [
{
"Name": "ColKey",
"Value": "account"
}
]
},
{
"ColType": "Money",
"ColTitle": "Total",
"MetaData": [
{
"Name": "ColKey",
"Value": "total"
}
]
}
]
}
}
Here is what I tried to get the ColData (unsuccessfully I might add), I think it might be a little too contrived to do in SQL but I'm not sure if I should continue trying this way or if there's a better way in Python:
declare #json nvarchar(max)
SELECT #json = json FROM QboApiRawJSONData WHERE ID = 2
--Outer layer of JSON breaks into 3 parts - header, columns, rows
SELECT * FROM OPENJSON(#json)
WITH
(
Rows nvarchar(max) AS JSON
) as MainLayer
CROSS APPLY OPENJSON (MainLayer.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as SecondaryLayer
CROSS APPLY OPENJSON (SecondaryLayer.Row)
WITH
(
Rows nvarchar(max) AS JSON
) As ThirdLayer
CROSS APPLY OPENJSON (ThirdLayer.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as FourthLayer
CROSS APPLY OPENJSON (FourthLayer.Row)
WITH
(
Rows nvarchar(max) AS JSON
) as FifthLayer
CROSS APPLY OPENJSON (FifthLayer.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as SixthLayer
CROSS APPLY OPENJSON (SixthLayer.Row)
WITH
(
Rows nvarchar(max) AS JSON
) as SeventhLayer
CROSS APPLY OPENJSON (SeventhLayer.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as EighthLayer
CROSS APPLY OPENJSON (EighthLayer.Row)
WITH
(
Rows nvarchar(max) AS JSON
) as LayerNine
---Things get funky here
CROSS APPLY OPENJSON (LayerNine.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as LayerTen
CROSS APPLY OPENJSON (LayerTen.Row)
WITH
(
Rows nvarchar(max) AS JSON
) as LayerEleven
CROSS APPLY OPENJSON (LayerEleven.Rows)
WITH
(
Row nvarchar(max) AS JSON
) as LayerTwelve
--21 items in last col
There is JSON support for sql-server:
https://learn.microsoft.com/en-us/sql/relational-databases/json/json-data-sql-server?view=sql-server-ver15
There is a JSON storage method here: https://learn.microsoft.com/en-us/sql/relational-databases/json/store-json-documents-in-sql-tables?view=sql-server-ver15. Although it will be fairly complex here I recommend storing the JSON data as a Logs table and then following the tutorial above to see if that solves your issue.
Use Quickbooks API to get the JSON, then refer to this guide:
https://learn.microsoft.com/en-us/sql/relational-databases/json/json-data-sql-server?view=sql-server-ver15
and this guide:
https://learn.microsoft.com/en-us/sql/relational-databases/json/convert-json-data-to-rows-and-columns-with-openjson-sql-server?view=sql-server-ver15
You can also consider setting something like AWS Lambda or Google Cloud Functions if you need something more automated.

Merge 2 json files with jsonmerge

I want to merge many JSON files with the same nested structure, using jsonmerge, but have been unsuccessful so far. For example, I want to merge base and head:
base = {
"data": [
{
"author_id": "id1",
"id": "1"
},
{
"author_id": "id2",
"id": "2"
}
],
"includes": {
"users": [
{
"id": "user1",
"name": "user1"
},
{
"id": "user2",
"name": "user2"
}
]
}
}
head = {
"data": [
{
"author_id": "id3",
"id": "3"
},
{
"author_id": "id4",
"id": "4"
}
],
"includes": {
"users": [
{
"id": "user3",
"name": "user3"
},
{
"id": "user4",
"name": "user4"
}
]
}
}
The resulting JSON should be:
final_result = {
"data": [
{
"author_id": "id1",
"id": "1"
},
{
"author_id": "id2",
"id": "2"
},
{
"author_id": "id3",
"id": "3"
},
{
"author_id": "id4",
"id": "4"
}
],
"includes": {
"users": [
{
"id": "user1",
"name": "user1"
},
{
"id": "user2",
"name": "user2"
},
{
"id": "user3",
"name": "user3"
},
{
"id": "user4",
"name": "user4"
}
]
}
}
However, I've only managed to merge correctly the data fields, while for users it doesn't seem to work. This is my code:
from jsonmerge import merge
from jsonmerge import Merger
schema = { "properties": {
"data": {
"mergeStrategy": "append"
},
"includes": {
"users": {
"mergeStrategy": "append"
}
}
}
}
merger = Merger(schema)
result = merger.merge(base, head)
The end result is:
{'data': [{'author_id': 'id1', 'id': '1'},
{'author_id': 'id2', 'id': '2'},
{'author_id': 'id3', 'id': '3'},
{'author_id': 'id4', 'id': '4'}],
'includes': {'users': [{'id': 'user3', 'name': 'user3'},
{'id': 'user4', 'name': 'user4'}]}}
The issue is with the definition of the schema, but I do not know if it is possible to do it like that with jsonmerge. Any help is appreciated!
Thank you!
It is based on jsonschema. So when you have an object within an object (e.g. "users" within "includes") then you'll need to tell jsonschema it is dealing with another object like so:
schema = {
"properties": {
"data": {
"mergeStrategy": "append"
},
"includes": {
"type": "object",
"properties": {
"users": {
"mergeStrategy": "append"
}
}
}
}
}
Note that this also happens for your top-level objects, hence you have "properties" argument on the highest level.

Google Assistant API V2 userstorage

I'm attempting to develop a Google Action with the Dialogflow v2 API
My function saves a value to userstorage as follows
def save_value(value):
res = {
"fulfillmentText": "Set value to {}".format(int(value)),
"payload": {
"google": {
"userStorage": str(value)
}
}
}
print ("Saved value")
response = jsonify(res)
return response
And I get the following back from testing in Dialogflow
{
"fulfillmentText": "Set value to 36237269",
"payload": {
"google": {
"userStorage": "36237269"
}
}
}
This works for the duration of session, I am able to use this in later intents via
value = request_json['originalRequest']['data']['user']['userStorage']
However, the data is only stored for one session - if I invoke my action again, there is nothing saved.
Is this the correct way of using userstorage? Has anyone successfully used it with Python?
Failed "response"
{
"responseMetadata": {
"status": {
"code": 10,
"message": "Failed to parse Dialogflow response into AppResponse because of empty speech response",
"details": [
{
"#type": "type.googleapis.com/google.protobuf.Value",
"value": "{\"id\":\"816605a7-f7e0-4d37-a490-c84ff63fb7dd\",\"timestamp\":\"2018-11-08T17:18:49.422Z\",\"lang\":\"en-us\",\"result\":{},\"alternateResult\":{},\"status\":{\"code\":206,\"errorType\":\"partial_content\",\"errorDetails\":\"Webhook call failed. Error: 500 Internal Server Error\"},\"sessionId\":\"ABwppHHai3qsY2WPZWezmh9Q_bUF45aD51GbQ81sUDF7iSrRLA2m8KFgZ1ZYavnCv3fAckW1tcoJdydZTXQY5Nw\"}"
}
]
}
}
}
Working "response"
{
"conversationToken": "[]",
"finalResponse": {
"richResponse": {
"items": [
{
"simpleResponse": {
"textToSpeech": "Text"
}
}
]
}
},
"responseMetadata": {
"status": {
"message": "Success (200)"
},
"queryMatchInfo": {
"queryMatched": true,
"intent": "047ad9d9-0180-47f9-88bd-e5ffc8936c08"
}
},
"userStorage": "36237269"
}
Working "Request"
{
"user": {
"userId": "ABwppHE5H0FKrXKk8PjJyzZJ12OSMQcjxuT2NnfPAgLvai0UsfWEoYE8R_L8qLQdqY29sOnsZhQhE5G4XVVXiGs",
"locale": "en-US",
"lastSeen": "2018-11-08T17:18:16Z",
"userStorage": "36237269"
},
"conversation": {
"conversationId": "ABwppHE6BwK2zIBKxHA8hc9uBGumVgKbbNGHhRVFz7O6yrxxa1WJ_xtKNqhesj3EwNCVlestM-bF6tDWzZhqUXE",
"type": "ACTIVE",
"conversationToken": "[]"
},
"inputs": [
{
"intent": "actions.intent.TEXT",
"rawInputs": [
{
"inputType": "KEYBOARD",
"query": "when is the next bus"
}
],
"arguments": [
{
"name": "text",
"rawText": "when is the next bus",
"textValue": "when is the next bus"
}
]
}
],
"surface": {
"capabilities": [
{
"name": "actions.capability.MEDIA_RESPONSE_AUDIO"
},
{
"name": "actions.capability.WEB_BROWSER"
},
{
"name": "actions.capability.AUDIO_OUTPUT"
},
{
"name": "actions.capability.SCREEN_OUTPUT"
}
]
},
"isInSandbox": true,
"availableSurfaces": [
{
"capabilities": [
{
"name": "actions.capability.WEB_BROWSER"
},
{
"name": "actions.capability.AUDIO_OUTPUT"
},
{
"name": "actions.capability.SCREEN_OUTPUT"
}
]
}
],
"requestType": "SIMULATOR"
}
Failed "request"
{
"user": {
"userId": "ABwppHE5H0FKrXKk8PjJyzZJ12OSMQcjxuT2NnfPAgLvai0UsfWEoYE8R_L8qLQdqY29sOnsZhQhE5G4XVVXiGs",
"locale": "en-US",
"lastSeen": "2018-11-08T17:18:41Z"
},
"conversation": {
"conversationId": "ABwppHHai3qsY2WPZWezmh9Q_bUF45aD51GbQ81sUDF7iSrRLA2m8KFgZ1ZYavnCv3fAckW1tcoJdydZTXQY5Nw",
"type": "ACTIVE",
"conversationToken": "[]"
},
"inputs": [
{
"intent": "actions.intent.TEXT",
"rawInputs": [
{
"inputType": "KEYBOARD",
"query": "when is the next bus"
}
],
"arguments": [
{
"name": "text",
"rawText": "when is the next bus",
"textValue": "when is the next bus"
}
]
}
],
"surface": {
"capabilities": [
{
"name": "actions.capability.WEB_BROWSER"
},
{
"name": "actions.capability.MEDIA_RESPONSE_AUDIO"
},
{
"name": "actions.capability.SCREEN_OUTPUT"
},
{
"name": "actions.capability.AUDIO_OUTPUT"
}
]
},
"isInSandbox": true,
"availableSurfaces": [
{
"capabilities": [
{
"name": "actions.capability.WEB_BROWSER"
},
{
"name": "actions.capability.SCREEN_OUTPUT"
},
{
"name": "actions.capability.AUDIO_OUTPUT"
}
]
}
],
"requestType": "SIMULATOR"
}

Obtain records based on matching key value pairs and comparing date in Python

I have a following collection in MongoDB:
{
"_id" : ObjectId("5bbc86e5c16a27f1e1bd39f8"),
"name" : "swetha",
"nameId" : 123,
"source" : "Blore",
"sourceId" : 10,
"LastUpdate" : "10-Oct-2018"
}
{
"_id" : ObjectId("5bbc86e5c16a27f1e1bd39f9"),
"name" : "swetha",
"nameId" : 123,
"source" : "Mlore",
"sourceId" : "11",
"LastUpdate" : "11-Oct-2018"
}
{
"_id" : ObjectId("5bbc86e5c16a27f1e1bd39fa"),
"name" : "swathi",
"nameId" : 124,
"source" : "Mlore",
"sourceId" : "11",
"LastUpdate" : "9-Oct-2018"
}
I am a beginner to Python and want to compare the 'LastUpdate' between the above records based on matching 'name' or 'nameId' and want to push the record with latest date to another collection. E.g. name:'Swetha' is same in first two records. So compare 'LastUpdate' between them and output the record with latest date.
I have written following code to read data records from MongoDB and to print. I didn't understand how to compare records within a same key and compare their timestamp though I referred few resources on Google.
import json
import pandas as pd
from pymongo import MongoClient
try:
client = MongoClient()
print("Connected successfully!!!")
except:
print("Could not connect to MongoDB")
# database
db = client.conn
collection = db.contactReg
df = collection.find()
for row in df:
print(row)
Links that are ref
Is there a better way to compare dictionary values
https://gis.stackexchange.com/questions/87276/how-to-compare-values-from-a-column-in-attribute-table-with-values-in-dictionary
Comparing two dictionaries and printing key value pair in python and few more.
I think what you need is an aggregation. This might look big but once you get the hang out of mongo aggregations you'll get comfortable.
df = collection.aggregate([
{
"$project": {
"_id": 0,
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"LastUpdateArray": {
"$split": [
"$LastUpdate",
"-"
]
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"LastUpdateArray": 1,
"LastUpdateMonth": {
"$arrayElemAt": [
"$LastUpdateArray",
1
]
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"Year": {
"$arrayElemAt": [
"$LastUpdateArray",
2
]
},
"Date": {
"$arrayElemAt": [
"$LastUpdateArray",
0
]
},
"Month": {
"$switch": {
"branches": [
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Jan"
]
},
"then": "01"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Feb"
]
},
"then": "02"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Mar"
]
},
"then": "03"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Apr"
]
},
"then": "04"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"May"
]
},
"then": "05"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Jun"
]
},
"then": "06"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Jul"
]
},
"then": "07"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Aug"
]
},
"then": "08"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Sep"
]
},
"then": "09"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Oct"
]
},
"then": "10"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Nov"
]
},
"then": "11"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Dec"
]
},
"then": "12"
}
],
"default": "01"
}
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"Year": 1,
"Date": 1,
"Month": 1,
"DateString": {
"$concat": [
"$Year",
"-",
"$Month",
"-",
"$Date"
]
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"Date": {
"$dateFromString": {
"dateString": "$DateString"
}
}
}
},
{
"$sort": {
"Date": -1
}
},
{
"$group": {
"_id": "$name",
"name": {
"$first": "$name"
},
"nameId": {
"$first": "$nameId"
},
"source": {
"$first": "$source"
},
"sourceId": {
"$first": "$sourceId"
},
"LastUpdate": {
"$first": "$LastUpdate"
},
"Date": {
"$first": "$Date"
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1
}
}
])
The first 5 steps of aggregation, I tried to convert it into a date and then sort descending by date. In group by I grouped with name and took the first that comes with that name.
Hope this helps.
I'm assuming what you need is duplicate records and I'm taking the first one that comes. Reference : https://stackoverflow.com/a/26985011/7630071
df = collection.aggregate([
{
"$group": {
"_id": "$name",
"count": {
"$sum": 1
},
"data": {
"$push": {
"nameId": "$nameId",
"source": "$source",
"sourceId": "$sourceId",
"LastUpdate": "$LastUpdate"
}
}
}
},
{
"$match": {
"_id": {
"$ne": null
},
"count": {
"$gt": 1
}
}
}
])

Mongo Cursor not returnning a cursor but an object

test_cursor = db.command({
"aggregate": "New_layout",
"pipeline": [
{ "$match": { "$and": [
{ "FIRST_DATE": { "$gte": new_date } },
{ "CHAIN_ID": { "$ne": "" } }
] } },
{ "$unwind": { "path": "$ENTERS", "includeArrayIndex": "Date" } },
{ "$project": {
"_id": 0,
"SITE_ID": "$SITE_ID",
"CHAIN_ID": "$CHAIN_ID",
"SEGMENT_ID": "$SEGMENT_ID",
"ZIP": "$ZIP",
"ZIP3": "$ZIP3",
"MARKET_ID": "$MARKET_ID",
"REGION": "$REGION",
"MALL_CODE": "$MALL_CODE",
"MALL_AREA": "$MALL_AREA",
"MALL_NAME": "$MALL_NAME",
"FIRST_DATE": "$FIRST_DATE",
"MARKET_AREA": "$MARKET_AREA",
"REGION_AREA": "$REGION_AREA",
"ZIP_AREA": "$ZIP_AREA",
"ZIP3_AREA": "$ZIP3_AREA",
"DATE": "$Date",
"ENTERS": "$ENTERS"
} }
],
"allowDiskUse": bool(1),
"cursor": {}
})
asd=list(test_cursor)
The contents of the cursor are as below :-
[u'cursor', u'ok', u'waitedMS'] .
However with an $out statement, the output collection has the expected contents.
I am running pymongo v3.2.2 and mongo 3.2. I was told this problem is experienced with v3.0 or lesser, but this is something I am not able to figure out
You should use aggregate() instead of command().
test_cursor = db.New_layout.aggregate([
{ "$match": { "$and": [
{ "FIRST_DATE": { "$gte": new_date } },
{ "CHAIN_ID": { "$ne": "" } }
] } },
{ "$unwind": { "path": "$ENTERS", "includeArrayIndex": "Date" } },
{ "$project": {
"_id": 0,
"SITE_ID": "$SITE_ID",
"CHAIN_ID": "$CHAIN_ID",
"SEGMENT_ID": "$SEGMENT_ID",
"ZIP": "$ZIP",
"ZIP3": "$ZIP3",
"MARKET_ID": "$MARKET_ID",
"REGION": "$REGION",
"MALL_CODE": "$MALL_CODE",
"MALL_AREA": "$MALL_AREA",
"MALL_NAME": "$MALL_NAME",
"FIRST_DATE": "$FIRST_DATE",
"MARKET_AREA": "$MARKET_AREA",
"REGION_AREA": "$REGION_AREA",
"ZIP_AREA": "$ZIP_AREA",
"ZIP3_AREA": "$ZIP3_AREA",
"DATE": "$Date",
"ENTERS": "$ENTERS"
} }
],
allowDiskUse=True)

Categories

Resources