Merging two json files using python - python

I'm new to python, I want to merge two JSON files
there should not be any duplicate:
if the values and name are same then I will add both the keys and maintain a single record, otherwise, I will keep the record
File 1:
[ {
"key": 1,
"name": "test",
"value": "NY"
},
{
"key": 1,
"name": "test",
"value": "CA"
},
{
"key": 1,
"name": "test",
"value": "MA"
},
{
"key": 1,
"name": "test",
"value": "MA"
}
]
File 2:
[ {
"key": 1,
"name": "test",
"value": "NJ"
},
{
"key": 1,
"name": "test",
"value": "CA"
},
{
"key": 1,
"name": "test",
"value": "TX"
},
{
"key": 1,
"name": "test",
"value": "MA"
}
]
and the merged file output should be:
[
{
"key": 1,
"name": "test",
"value": "NY"
},
{
"key": 3,
"name": "test",
"value": "MA"
},
{
"key": 1,
"name": "test",
"value": "NJ"
},
{
"key": 2,
"name": "test",
"value": "CA"
},
{
"key": 1,
"name": "test",
"value": "TX"
}
]
order of the record does not matter.
I have tried several approaches, like merging the files and then iterating over then, parsing both files separately but I'm facing issues, being new to python.

This should help.
# -*- coding: utf-8 -*-
f1 = [ {
"key": 1,
"value": "NY"
},
{
"key": 1,
"value": "CA"
},
{
"key": 1,
"value": "MA"
}
]
f2 = [ {
"key": 1,
"value": "NJ"
},
{
"key": 1,
"value": "CA"
},
{
"key": 1,
"value": "TX"
}
]
check = [i["value"] for i in f1] #check list to see if the value already exist in f1.
for i in f2:
if i['value'] not in check:
f1.append(i)
print(f1)
Output:
[{'value': 'NY', 'key': 1}, {'value': 'CA', 'key': 1}, {'value': 'MA', 'key': 1}, {'value': 'NJ', 'key': 1}, {'value': 'TX', 'key': 1}]

Related

Merge 2 json files with jsonmerge

I want to merge many JSON files with the same nested structure, using jsonmerge, but have been unsuccessful so far. For example, I want to merge base and head:
base = {
"data": [
{
"author_id": "id1",
"id": "1"
},
{
"author_id": "id2",
"id": "2"
}
],
"includes": {
"users": [
{
"id": "user1",
"name": "user1"
},
{
"id": "user2",
"name": "user2"
}
]
}
}
head = {
"data": [
{
"author_id": "id3",
"id": "3"
},
{
"author_id": "id4",
"id": "4"
}
],
"includes": {
"users": [
{
"id": "user3",
"name": "user3"
},
{
"id": "user4",
"name": "user4"
}
]
}
}
The resulting JSON should be:
final_result = {
"data": [
{
"author_id": "id1",
"id": "1"
},
{
"author_id": "id2",
"id": "2"
},
{
"author_id": "id3",
"id": "3"
},
{
"author_id": "id4",
"id": "4"
}
],
"includes": {
"users": [
{
"id": "user1",
"name": "user1"
},
{
"id": "user2",
"name": "user2"
},
{
"id": "user3",
"name": "user3"
},
{
"id": "user4",
"name": "user4"
}
]
}
}
However, I've only managed to merge correctly the data fields, while for users it doesn't seem to work. This is my code:
from jsonmerge import merge
from jsonmerge import Merger
schema = { "properties": {
"data": {
"mergeStrategy": "append"
},
"includes": {
"users": {
"mergeStrategy": "append"
}
}
}
}
merger = Merger(schema)
result = merger.merge(base, head)
The end result is:
{'data': [{'author_id': 'id1', 'id': '1'},
{'author_id': 'id2', 'id': '2'},
{'author_id': 'id3', 'id': '3'},
{'author_id': 'id4', 'id': '4'}],
'includes': {'users': [{'id': 'user3', 'name': 'user3'},
{'id': 'user4', 'name': 'user4'}]}}
The issue is with the definition of the schema, but I do not know if it is possible to do it like that with jsonmerge. Any help is appreciated!
Thank you!
It is based on jsonschema. So when you have an object within an object (e.g. "users" within "includes") then you'll need to tell jsonschema it is dealing with another object like so:
schema = {
"properties": {
"data": {
"mergeStrategy": "append"
},
"includes": {
"type": "object",
"properties": {
"users": {
"mergeStrategy": "append"
}
}
}
}
}
Note that this also happens for your top-level objects, hence you have "properties" argument on the highest level.

JSON ID extraction from Array in Python

I wrote a script to pull data from Verizon's connectivity management API with the following. Below is the section that requests the line information based on the search item, in this case, the SIM or iccid. I did not include the previous parts because they are just to connect to the API and get credentials.
header = {
'accept': 'application/json',
'VZ-M2M-Token': session_token,
'Authorization': 'Bearer' + bearer_token,
'Content-Type': 'application/json',
}
data = '{ "deviceId": { "id": ' + SIM +', "kind": "ICCID" }}'
response = requests.post('https://thingspace.verizon.com/api/m2m/v1/devices/actions/list', headers=header, data=data)
And the response I get is a JSON Array which looks like
{
"hasMoreData": false,
"devices": [
{
"accountName": "123456789-00001",
"billingCycleEndDate": "2020-10-31T20:00:00-04:00",
"carrierInformations": [
{
"carrierName": "Verizon Wireless",
"servicePlan": "3rrrrx48wwwwrjgjtyjtyjtyjtyj",
"state": "active"
}
],
"connected": true,
"createdAt": "2016-11-04T11:06:28-04:00",
"deviceIds": [
{
"id": "5256694405",
"kind": "mdn"
},
{
"id": "3114949302094150",
"kind": "imsi"
},
{
"id": "35922505468230",
"kind": "imei"
},
{
"id": "891480000054957290575",
"kind": "iccId"
},
{
"id": "15256694405",
"kind": "msisdn"
},
{
"id": "5256694405",
"kind": "min"
}
],
"extendedAttributes": [
{
"key": "PrimaryPlaceOfUseTitle"
},
{
"key": "PrimaryPlaceOfUseFirstName",
"value": "5256694405",
},
{
"key": "PrimaryPlaceOfUseMiddleName"
},
{
"key": "PrimaryPlaceOfUseLastName",
"value": "ESN"
},
{
"key": "PrimaryPlaceOfUseSuffix"
},
{
"key": "PrimaryPlaceOfUseAddressLine1"
},
{
"key": "PrimaryPlaceOfUseAddressLine2"
},
{
"key": "PrimaryPlaceOfUseCity"
},
{
"key": "PrimaryPlaceOfUseState"
},
{
"key": "PrimaryPlaceOfUseCountry"
},
{
"key": "PrimaryPlaceOfUseZipCode"
},
{
"key": "PrimaryPlaceOfUseZipCode4"
},
{
"key": "PrimaryPlaceOfUseCBRPhone"
},
{
"key": "PrimaryPlaceOfUseCBRPhoneType"
},
{
"key": "PrimaryPlaceOfUseEmailAddress"
},
{
"key": "AccountNumber",
"value": "5256694405-00001"
},
{
"key": "SmsrOid"
},
{
"key": "ProfileStatus"
},
{
"key": "PromoCodes",
"value": ""
},
{
"key": "PromotionStartDate",
"value": ""
},
{
"key": "PromotionScheduledEndDate",
"value": ""
},
{
"key": "LeadId",
"value": ""
},
{
"key": "CustomerName",
"value": ""
},
{
"key": "CustomerAddressLine1",
"value": ""
},
{
"key": "CustomerAddressLine2",
"value": ""
},
{
"key": "CustomerAddressCity",
"value": ""
},
{
"key": "CustomerAddressState",
"value": ""
},
{
"key": "CustomerAddressZipCode",
"value": ""
},
{
"key": "ServiceZipCode",
"value": ""
},
{
"key": "SkuNumber",
"value": "VZW080000460053"
},
{
"key": "CostCenterCode"
},
{
"key": "PreIMEI",
"value": "3592254564568445"
},
{
"key": "PreSKU",
"value": "VZW080000100037"
},
{
"key": "SIMOTADate",
"value": "4/30/2020 1:22:18 PM"
},
{
"key": "RoamingStatus",
"value": "NotRoaming"
},
{
"key": "LastRoamingStatusUpdate",
"value": "9/24/2020 5:40:26 PM"
}
],
"groupNames": [
"Default: 0220433754-00001"
],
"ipAddress": "100.100.100.100",
"lastActivationBy": "User Verizon",
"lastActivationDate": "2016-11-04T11:06:28-04:00",
"lastConnectionDate": "2020-09-24T13:40:26-04:00"
}
]
}
I added a part to my script to pull the mdn, iccid and the imei from the array with the code that is below.
def puller(line_json):
line_data = json.loads(line_json)
mdn = (line_data['devices'][0]['deviceIds'][0]['id'])
iccid = (line_data['devices'][0]['deviceIds'][3]['id'])
imei = (line_data['devices'][0]['deviceIds'][2]['id'])
print('phone = ' ,mdn)
print('SIM = ' , iccid)
print('IMEI = ' , imei)
I tested this code and it works the way it should with one test ID. I then proceeded to test with another test ID and I learned that the array structure is not always the same. That second JSON array is below. I am wondering is there a better way to find the specific values that I want, but in the way that I am not specifically telling the script where in the structure the item will be as I did above.
{
"hasMoreData": false,
"devices": [
{
"accountName": "02234234234-00001",
"billingCycleEndDate": "2020-10-31T20:00:00-04:00",
"carrierInformations": [
{
"carrierName": "Verizon Wireless",
"servicePlan": "37776xdsfewsfwe576193",
"state": "active"
}
],
"connected": true,
"createdAt": "2016-05-24T15:55:06-04:00",
"deviceIds": [
{
"id": "0945437676404",
"kind": "esn"
},
{
"id": "1234565799",
"kind": "mdn"
},
{
"id": "31148454545458767",
"kind": "imsi"
},
{
"id": "01426786678211",
"kind": "imei"
},
{
"id": "89148000006456456454",
"kind": "iccId"
},
{
"id": "1234565799",
"kind": "min"
}
],
"extendedAttributes": [
{
"key": "PrimaryPlaceOfUseTitle"
},
{
"key": "PrimaryPlaceOfUseFirstName",
"value": "096114564506772"
},
{
"key": "PrimaryPlaceOfUseMiddleName"
},
{
"key": "PrimaryPlaceOfUseLastName",
"value": "096546454806772"
},
{
"key": "PrimaryPlaceOfUseSuffix"
},
{
"key": "PrimaryPlaceOfUseAddressLine1"
},
{
"key": "PrimaryPlaceOfUseAddressLine2"
},
{
"key": "PrimaryPlaceOfUseCity"
},
{
"key": "PrimaryPlaceOfUseState"
},
{
"key": "PrimaryPlaceOfUseCountry"
},
{
"key": "PrimaryPlaceOfUseZipCode"
},
{
"key": "PrimaryPlaceOfUseZipCode4"
},
{
"key": "PrimaryPlaceOfUseCBRPhone"
},
{
"key": "PrimaryPlaceOfUseCBRPhoneType"
},
{
"key": "PrimaryPlaceOfUseEmailAddress"
},
{
"key": "AccountNumber",
"value": "02242342354-00001"
},
{
"key": "SmsrOid"
},
{
"key": "ProfileStatus"
},
{
"key": "PromoCodes",
"value": ""
},
{
"key": "PromotionStartDate",
"value": ""
},
{
"key": "PromotionScheduledEndDate",
"value": ""
},
{
"key": "LeadId",
"value": ""
},
{
"key": "CustomerName",
"value": ""
},
{
"key": "CustomerAddressLine1",
"value": ""
},
{
"key": "CustomerAddressLine2",
"value": ""
},
{
"key": "CustomerAddressCity",
"value": ""
},
{
"key": "CustomerAddressState",
"value": ""
},
{
"key": "CustomerAddressZipCode",
"value": ""
},
{
"key": "ServiceZipCode",
"value": ""
},
{
"key": "SkuNumber",
"value": "VZW12000364343005"
},
{
"key": "CostCenterCode"
},
{
"key": "PreIMEI"
},
{
"key": "PreSKU",
"value": "VZW12000334340005"
},
{
"key": "SIMOTADate",
"value": "3/13/2020 10:52:07 AM"
},
{
"key": "RoamingStatus",
"value": "NotRoaming"
},
{
"key": "LastRoamingStatusUpdate",
"value": "10/20/2020 6:14:20 PM"
}
],
"groupNames": [
"Default: 02342343754-00001"
],
"ipAddress": "101.101.101.101",
"lastActivationBy": "User Verizon",
"lastActivationDate": "2016-05-24T15:55:16-04:00",
"lastConnectionDate": "2020-10-20T14:14:20-04:00"
}
]
}
I tried to use this block of code from some research I did to find the value that I was looking for; in this case, the mdn. Problem I have is that the response returns a blank set of brackets with no information, so I know there is something I probably did wrong.
def json_extract(obj, kind):
"""Recursively fetch values from nested JSON."""
arr = []
def extract(obj, arr, kind):
"""Recursively search for values of key in JSON tree."""
if isinstance(obj, dict):
for k, v in obj.items():
if isinstance(v, (dict, list)):
extract(v, arr, kind)
elif k == kind:
arr.append(v)
elif isinstance(obj, list):
for item in obj:
extract(item, arr, kind)
return arr
values = extract(obj, arr, kind)
return values
names = json_extract(response , 'mdn')
print(names)
I understood that you are trying to find, mdn, iccid and imei'id from the json object above,so, instead of recursion and the complicated coding that you have done there, it is easier to use python's inbuilt libraries to help you out:
You can use the next function for your purpose:
# load your json data
line_data = json.loads(data)
# narrow your focus on the array in question
device_ids = line_data['devices'][0]['deviceIds']
# This gets the first item's id attribute from the list that matches the condition, and returns None if no item matches.
mdn = next((x['id'] for x in device_ids if x['kind'] == "mdn"), None)
iccid = next((x['id'] for x in device_ids if x['kind'] == "iccid"), None)
imei = next((x['id'] for x in device_ids if x['kind'] == "imei"), None)
You will need to handle the None if it was unable to find such element in the array.
Reference : Find object in list that has attribute equal to some value (that meets any condition)

Search for key in Nested Json in Python

I have below Json.
My requirement is to search this Json and get 'id' value if 'name' is equal to 'Latisha Chase'
[
{
"_id": "5d3121cd001453772160a791",
"friends": [
{
"id": 6,
"name": "Mcknight Tran"
},
{
"id": 7,
"name": "Helena Bowers"
},
{
"id": 8,
"name": "Dorsey Ayala"
}
]
},
{
"_id": "5d3121cd838efa513e7dda96",
"friends": [ {
"friends": [
{
"id": 90,
"name": "w Stark"
},
{
"id": 91,
"name": "w Jacobs"
},
{
"id": 93,
"name": "w Garner"
}
]},
{
"id": 10,
"name": "Amalia Stark"
},
{
"id": 11,
"name": "Myra Jacobs"
},
{
"id": 12,
"name": "Norton Garner"
}
]
}
]
This is sample code that I have. Could anyone help me with this.?
I tried recursive codes online but didn't work with my example here.
Update:
Its not necessary that 'friends' will have single depth. it can have friends inside friends. ex: friends [{ friends[ {}]}]
A more general approach using recursion:
def recursive_function(name, l):
if isinstance(l,list):
for i in l:
recursive_function(name, i)
elif isinstance(l,dict):
if l.get("name") == name:
print (l.get("id"))
for v in l.values():
if isinstance(v, list) or isinstance(v, dict):
recursive_function(name, v)
recursive_function("Latisha Chase",json_obj)
Result:
3
Try this
j = [{
"_id": "5d3121cd001453772160a791",
"friends": [{
"id": 6,
"name": "Mcknight Tran"
},
{
"id": 7,
"name": "Helena Bowers"
},
{
"id": 8,
"name": "Dorsey Ayala"
}
]
},
{
"_id": "5d3121cded44d8ba6ad96b78",
"friends": [{
"id": 2,
"name": "June Gilbert"
},
{
"id": 3,
"name": "Latisha Chase"
},
{
"id": 4,
"name": "Franco Carlson"
}
]
},
{
"_id": "5d3121cd838efa513e7dda96",
"friends": [{
"id": 10,
"name": "Amalia Stark"
},
{
"id": 11,
"name": "Myra Jacobs"
},
{
"id": 12,
"name": "Norton Garner"
}
]
}
]
for x in j:
for y in x.get('friends'):
if y.get('name') == 'Latisha Chase':
print y.get('id')

Read a JSON file and select node like in Pandas dataframe

I need to read a JSON config file like the example below and change some of its values with a querying structure like in Pandas.
Ex:
[
{
"_id": "5d1f5d0289725ba2c32695ac",
"index": 0,
"guid": "d1a8c2e2-1011-4db2-97a8-b68777c2d18b",
"isActive": false,
"name": {
"first": "Barnett",
"last": "Obrien"
},
"latitude": "-76.327744",
"longitude": "-131.003501",
"friends": [
{
"friend_id": 0,
"name": "Burnett Burke"
},
{
"friend_id": 1,
"name": "Lawrence Hunt"
},
{
"friend_id": 2,
"name": "Nola Benjamin"
}
]
},
{
"_id": "5d1f5d023ef4523b5e326ae2",
"index": 1,
"guid": "6b0ad8a7-2b10-4892-9b91-fc7445038aca",
"isActive": true,
"name": {
"first": "Valerie",
"last": "Preston"
},
"latitude": "27.995886",
"longitude": "170.930419",
"friends": [
{
"friend_id": 0,
"name": "Gretchen Hobbs"
},
{
"friend_id": 1,
"name": "Irene Fox"
},
{
"friend_id": 2,
"name": "Porter King"
}
]
}
]
Then I wanted to change the value for the friend_id == 1 and object with guid == 6b0ad8a7-2b10-4892-9b91-fc7445038aca from Irene Fox to something else.
With Pandas I can have something like this:
valerie = dataframe['guid'] == '6b0ad8a7-2b10-4892-9b91-fc7445038aca'
friend1 = dataframe['friend_1'] == 1
dataframe[valerie & friend1]['name'] = 'Karen Smith'
How can I achieve this without having to add Pandas dependency?
With simple loop:
import json
import sys
data = json.load(open('input.json'))
for d in data:
if d["guid"] == "6b0ad8a7-2b10-4892-9b91-fc7445038aca":
for f in d["friends"]:
if f["friend_id"] == 1:
f["name"] = "Karen Smith"
# break <- uncomment if only one match is implied
# replace sys.stdout with output file pointer
json.dump(data, sys.stdout, indent=4)
you may also break the outer for loop if items/dicts have unique guids.
The output (for demonstration):
[
{
"_id": "5d1f5d0289725ba2c32695ac",
"index": 0,
"guid": "d1a8c2e2-1011-4db2-97a8-b68777c2d18b",
"isActive": false,
"name": {
"first": "Barnett",
"last": "Obrien"
},
"latitude": "-76.327744",
"longitude": "-131.003501",
"friends": [
{
"friend_id": 0,
"name": "Burnett Burke"
},
{
"friend_id": 1,
"name": "Lawrence Hunt"
},
{
"friend_id": 2,
"name": "Nola Benjamin"
}
]
},
{
"_id": "5d1f5d023ef4523b5e326ae2",
"index": 1,
"guid": "6b0ad8a7-2b10-4892-9b91-fc7445038aca",
"isActive": true,
"name": {
"first": "Valerie",
"last": "Preston"
},
"latitude": "27.995886",
"longitude": "170.930419",
"friends": [
{
"friend_id": 0,
"name": "Gretchen Hobbs"
},
{
"friend_id": 1,
"name": "Karen Smith"
},
{
"friend_id": 2,
"name": "Porter King"
}
]
}
]

Python built JSON with mixed types

Actually I build Json object starting from a python object.
My starting JSON is:
responseMsgObject = {'Version': 1,
'Id': 'xc23',
'Local': "US"
'Type': "Test",
'Message' : "Message body" }
responseMsgJson = json.dumps(responseMsgObject, sort_keys=False )
Every things works but now I need to put the JSON below into "Message" field.
{
"DepID": "001",
"Assets": [
{
"Type": "xyz",
"Text": [
"abc",
"def"
],
"Metadata": {
"V": "1",
"Req": true,
"Other": "othervalue"
},
"Check": "refdw321"
},
{
"Type": "jkl",
"Text": [
"ghi"
],
"Metadata": {
"V": "6"
},
"Check": "345ghsdan"
}
]
}
I built many other json (but simpler) but I'm in trouble with this json.
Thanks for the help.
try to replace true with True works fine for me
import json
responseMsgObject = {
'Version': 1,
'Id': 'xc23',
'Local': "US",
'Type': "Test",
'Message': {
"DepID": "001",
"Assets": [{
"Type": "xyz",
"Text": [
"abc",
"def"
],
"Metadata": {
"V": "1",
"Req": True,
"Other": "othervalue"
},
"Check": "refdw321"
}, {
"Type": "jkl",
"Text": [
"ghi"
],
"Metadata": {
"V": "6"
},
"Check": "345ghsdan4"
}]
}
}
responseMsgJson = json.dumps(responseMsgObject, sort_keys=False )
print("responseMsgJson", responseMsgJson)
DEMO

Categories

Resources