I have a large json file that needs to be parsed into a dataframe. Here is an example of the data structure...
{
"genericTags": [
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"event",
"webinar"
],
"name": "Language",
"type": null,
"values": [
{
"value": "Chinese",
"hidden": false,
"deleted": false
},
{
"value": "English",
"hidden": false,
"deleted": false
},
{
"value": "French",
"hidden": false,
"deleted": false
},
{
"value": "Japanese",
"hidden": false,
"deleted": false
},
{
"value": "Portuguese",
"hidden": false,
"deleted": false
},
{
"value": "Spanish",
"hidden": false,
"deleted": false
}
],
"description": null
},
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"webinar",
"event"
],
"name": "Product",
"type": null,
"values": [
{
"value": "P1",
"hidden": false,
"deleted": false
},
{
"value": "P2",
"hidden": false,
"deleted": false
},
{
"value": "P3",
"hidden": false,
"deleted": false
},
{
"value": "P4",
"hidden": false,
"deleted": false
},
{
"value": "P5",
"hidden": false,
"deleted": false
},
{
"value": "P6",
"hidden": false,
"deleted": false
},
{
"value": "P7",
"hidden": false,
"deleted": false
},
{
"value": "P8",
"hidden": false,
"deleted": false
},
{
"value": "P9",
"hidden": false,
"deleted": false
},
{
"value": "P10",
"hidden": false,
"deleted": false
},
{
"value": "P11",
"hidden": false,
"deleted": false
},
{
"value": "P12",
"hidden": false,
"deleted": false
},
{
"value": "Multiple",
"hidden": false,
"deleted": false
},
{
"value": "None",
"hidden": false,
"deleted": false
}
],
"description": null
},
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"event",
"webinar"
],
"name": "Region",
"type": null,
"values": [
{
"value": "Americas",
"hidden": false,
"deleted": false
},
{
"value": "APJ",
"hidden": false,
"deleted": false
},
{
"value": "EMEA",
"hidden": false,
"deleted": false
},
{
"value": "WW",
"hidden": false,
"deleted": false
}
],
"description": null
}
],
"channelTags": [
{
"value": "Advertising",
"progression": {
"name": "Advertising",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Association",
"progression": {
"name": "Association",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Contact Request",
"progression": {
"name": "Contact Request",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Sent Email",
"description": null,
"typeId": null,
"order": 20,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 30,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Content",
"progression": {
"name": "Content",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Content Syndication",
"progression": {
"name": "Content Syndication",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Direct Mail",
"progression": {
"name": "Direct Mail",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Display",
"progression": {
"name": "Display",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Email",
"progression": {
"name": "Email",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Email Send",
"progression": {
"name": "Email Send",
"type": "email_batch",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Event",
"progression": {
"name": "Event",
"type": "event",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": false,
"isUsed": false
},
{
"name": "Invited",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
},
{
"name": "Registered",
"type": "Registered",
"typeName": "registered",
"description": null,
"typeId": 5,
"order": 20,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
},
{
"name": "Waitlisted",
"type": "Waitlisted",
"typeName": "wait_listed",
"description": null,
"typeId": 4,
"order": 20,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": false
},
{
"name": "No Show",
"description": null,
"typeId": null,
"order": 30,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
},
{
"name": "Attended",
"type": "Attended",
"typeName": "attended",
"description": null,
"typeId": 7,
"order": 40,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
},
{
"name": "Attended Session",
"description": null,
"typeId": null,
"order": 50,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Intent Signal",
"progression": {
"name": "Intent Signal",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Interactive Chat",
"progression": {
"name": "Interactive Chat",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "List Import",
"progression": {
"name": "List Import",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Nurture",
"progression": {
"name": "Nurture",
"type": "nurture",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Converted",
"description": null,
"typeId": null,
"order": 30,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": false
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Nurture Cast",
"progression": {
"name": "Nurture Cast",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Excluded",
"description": null,
"typeId": null,
"order": 20,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Converted",
"description": null,
"typeId": null,
"order": 30,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 30,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Operational",
"progression": {
"name": "Operational",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": true,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
}
]
}
What i am needing is specifically the channelTags array which i have currently broken out like this...
import pandas as pd
import json
# open json file
with open('file_name.json') as json_file:
data = json.load(json_file)
# turn json into df and get channelTags array
df_channel = pd.DataFrame.from_dict(data['channelTags'])
This is resulting in the following data structure...
value
hidden
deleted
progression.name
progression.type
progression.description
progression.steps
Advertising
False
False
Advertising
program
null
[{'name': 'Not in Program', 'type': 'Not in Program', 'typeName': 'not_in_program', 'description': None, 'typeId': 1, 'order': 0, 'success': False, 'hidden': False, 'system': True, 'isEventCapSet': None, 'isUsed': False}, {'name': 'Member', 'description': None, 'typeId': None, 'order': 10, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': None, 'isUsed': True}, {'name': 'Engaged', 'description': None, 'typeId': None, 'order': 20, 'success': True, 'hidden': False, 'system': False, 'isEventCapSet': None, 'isUsed': True}]
Trade Show
False
False
Trade Show
event
null
[{'name': 'Not in Program', 'type': 'Not in Program', 'typeName': 'not_in_program', 'description': None, 'typeId': 1, 'order': 0, 'success': False, 'hidden': False, 'system': True, 'isEventCapSet': False, 'isUsed': False}, {'name': 'Invited', 'description': None, 'typeId': None, 'order': 10, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}, {'name': 'Registered', 'type': 'Registered', 'typeName': 'registered', 'description': None, 'typeId': 5, 'order': 20, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}, {'name': 'Waitlisted', 'type': 'Waitlisted', 'typeName': 'wait_listed', 'description': None, 'typeId': 4, 'order': 20, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': False}, {'name': 'No Show', 'description': None, 'typeId': None, 'order': 30, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': False}, {'name': 'Attended Show', 'type': 'Attended', 'typeName': 'attended', 'description': None, 'typeId': 7, 'order': 40, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}, {'name': 'Visited Booth', 'description': None, 'typeId': None, 'order': 50, 'success': True, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}, {'name': 'Attended Hosted Event', 'description': None, 'typeId': None, 'order': 60, 'success': True, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}]
I still need to be able to go one step further down to parse out the progression.steps. I have accomplished this somewhat by using the following...
df_normal = pd.json_normalize(df_channel['progression'], 'steps', errors='ignore')
This code results in the following...
name
type
typeName
description
typeId
order
success
hidden
system
isEventCapSet
IsUsed
Not in Program
Not in Program
Not in Program
null
1
0
FALSE
FALSE
TRUE
null
FALSE
Member
null
null
null
null
10
FALSE
FALSE
TRUE
null
FALSE
Engaged
null
null
null
null
20
FALSE
FALSE
TRUE
null
FALSE
What i am missing is the ability to combine these two dataframes as i do not have a key field, the end result should be the df_channel dataframe and the df_normal dataframe combined with a left join on a df_channel['value'] entry.
What am i missing here?
starting from a sample dictionary data as below:
data = {
"genericTags": [
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"event",
"webinar"
],
"name": "Language",
"type": None,
"values": [
{
"value": "Chinese",
"hidden": False,
"deleted": False
},
{
"value": "English",
"hidden": False,
"deleted": False
},
{
"value": "French",
"hidden": False,
"deleted": False
},
{
"value": "Japanese",
"hidden": False,
"deleted": False
},
{
"value": "Portuguese",
"hidden": False,
"deleted": False
},
{
"value": "Spanish",
"hidden": False,
"deleted": False
}
],
"description": None
},
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"webinar",
"event"
],
"name": "Product",
"type": None,
"values": [
{
"value": "P1",
"hidden": False,
"deleted": False
},
{
"value": "P2",
"hidden": False,
"deleted": False
},
{
"value": "P3",
"hidden": False,
"deleted": False
},
{
"value": "P4",
"hidden": False,
"deleted": False
},
{
"value": "P5",
"hidden": False,
"deleted": False
},
{
"value": "P6",
"hidden": False,
"deleted": False
},
{
"value": "P7",
"hidden": False,
"deleted": False
},
{
"value": "P8",
"hidden": False,
"deleted": False
},
{
"value": "P9",
"hidden": False,
"deleted": False
},
{
"value": "P10",
"hidden": False,
"deleted": False
},
{
"value": "P11",
"hidden": False,
"deleted": False
},
{
"value": "P12",
"hidden": False,
"deleted": False
},
{
"value": "Multiple",
"hidden": False,
"deleted": False
},
{
"value": "None",
"hidden": False,
"deleted": False
}
],
"description": None
},
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"event",
"webinar"
],
"name": "Region",
"type": None,
"values": [
{
"value": "Americas",
"hidden": False,
"deleted": False
},
{
"value": "APJ",
"hidden": False,
"deleted": False
},
{
"value": "EMEA",
"hidden": False,
"deleted": False
},
{
"value": "WW",
"hidden": False,
"deleted": False
}
],
"description": None
}
],
"channelTags": [
{
"value": "Advertising",
"progression": {
"name": "Advertising",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Association",
"progression": {
"name": "Association",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Contact Request",
"progression": {
"name": "Contact Request",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Sent Email",
"description": None,
"typeId": None,
"order": 20,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 30,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Content",
"progression": {
"name": "Content",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Content Syndication",
"progression": {
"name": "Content Syndication",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Direct Mail",
"progression": {
"name": "Direct Mail",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Display",
"progression": {
"name": "Display",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Email",
"progression": {
"name": "Email",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Email Send",
"progression": {
"name": "Email Send",
"type": "email_batch",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Event",
"progression": {
"name": "Event",
"type": "event",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": False,
"isUsed": False
},
{
"name": "Invited",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
},
{
"name": "Registered",
"type": "Registered",
"typeName": "registered",
"description": None,
"typeId": 5,
"order": 20,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
},
{
"name": "Waitlisted",
"type": "Waitlisted",
"typeName": "wait_listed",
"description": None,
"typeId": 4,
"order": 20,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": False
},
{
"name": "No Show",
"description": None,
"typeId": None,
"order": 30,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
},
{
"name": "Attended",
"type": "Attended",
"typeName": "attended",
"description": None,
"typeId": 7,
"order": 40,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
},
{
"name": "Attended Session",
"description": None,
"typeId": None,
"order": 50,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Intent Signal",
"progression": {
"name": "Intent Signal",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Interactive Chat",
"progression": {
"name": "Interactive Chat",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "List Import",
"progression": {
"name": "List Import",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Nurture",
"progression": {
"name": "Nurture",
"type": "nurture",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Converted",
"description": None,
"typeId": None,
"order": 30,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": False
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Nurture Cast",
"progression": {
"name": "Nurture Cast",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Excluded",
"description": None,
"typeId": None,
"order": 20,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Converted",
"description": None,
"typeId": None,
"order": 30,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 30,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Operational",
"progression": {
"name": "Operational",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": True,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
}
]
}
try:
df = pd.json_normalize(data['channelTags'])
df = df.merge(pd.concat(df['progression.steps'].apply(pd.DataFrame).tolist(), keys=df.index), how='left')
df=pd.DataFrame(data) #all data. there are two columns: genericTags and channelTags
import json
to_json = json.loads(df.to_json(orient="records"))
df_v1 = pd.json_normalize(to_json)
mask = df_v1.explode('genericTags.values', ignore_index=True)
final=mask.join(pd.DataFrame([*mask.pop('genericTags.values')], index=mask.index))
final.columns=final.columns + '_' #for second step. because there are columns with the same name.
final_mask = final.explode('channelTags.progression.steps_', ignore_index=True)
final=final_mask.join(pd.DataFrame([*final_mask.pop('channelTags.progression.steps_')], index=final_mask.index))
I'm trying to do a python script that merge 2 json files for example:
First file: test.json
[
{
"user_id": "273631610",
"item_id": "0829482",
"rating": "7"
},
{
"user_id": "40688800",
"item_id": "1343092",
"rating": "8"
},
{
"user_id": "395680865",
"item_id": "1453405",
"rating": "8"
}
]
Second file: test_userz.json
[
{"contributors": null, "truncated": false, "text": "", "in_reply_to_status_id": null, "id": 421040281296052225, "favorite_count": 0, "source": "IMDb Movies & TV on iOS", "retweeted": false, "coordinates": null, "entities": {"symbols": [], "user_mentions": [], "hashtags": [{"indices": [23, 28], "text": "IMDb"}], "urls": [{"url": "5E1fLOxB", "indices": [29, 51], "expanded_url": "http://www.imdb.com/title/tt0829482", "display_url": "imdb.com/title/tt0829482"}]}, "in_reply_to_screen_name": null, "id_str": "421040281296052225", "retweet_count": 0, "in_reply_to_user_id": null, "favorited": false, "user": {"follow_request_sent": false, "profile_use_background_image": true, "id": 273631610, "verified": false, "profile_text_color": "333333", "profile_image_url_https": "https://pbs.twimg.com/profile_images/378800000723050633/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg", "profile_sidebar_fill_color": "DDEEF6", "is_translator": false, "geo_enabled": true, "entities": {"description": {"urls": []}}, "followers_count": 330, "protected": false, "location": "southend on sea", "default_profile_image": false, "id_str": "273631610", "lang": "en", "utc_offset": null, "statuses_count": 897, "description": "", "friends_count": 973, "profile_link_color": "0084B4", "profile_image_url": "http://pbs.twimg.com/profile_images/378800000723050633/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg", "notifications": false, "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_color": "C0DEED", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", "name": "Ben Morris", "is_translation_enabled": false, "profile_background_tile": false, "favourites_count": 12, "screen_name": "benyere", "url": null, "created_at": "Mon Mar 28 21:31:52 +0000 2011", "contributors_enabled": false, "time_zone": null, "profile_sidebar_border_color": "C0DEED", "default_profile": true, "following": false, "listed_count": 0}, "geo": null, "in_reply_to_user_id_str": null, "possibly_sensitive": false, "lang": "en", "created_at": "Wed Jan 08 22:06:40 +0000 2014", "in_reply_to_status_id_str": null, "place": null},
{"contributors": null, "truncated": false, "text": "", "in_reply_to_status_id": null, "id": 421040870931320833, "favorite_count": 0, "source": "iOS", "retweeted": false, "coordinates": null, "entities": {"symbols": [], "user_mentions": [], "hashtags": [{"indices": [31, 36], "text": "IMDb"}], "urls": [{"url": "dTFeexXDk", "indices": [37, 59], "expanded_url": "http://www.imdb.com/title/tt1343092", "display_url": "imdb.com/title/tt1343092"}]}, "in_reply_to_screen_name": null, "id_str": "421040870931320833", "retweet_count": 0, "in_reply_to_user_id": null, "favorited": false, "user": {"follow_request_sent": false, "profile_use_background_image": false, "id": 40688800, "verified": false, "profile_text_color": "333333", "profile_image_url_https": "https://pbs.twimg.com/profile_images/378800000807375392/2ee9bbee70e09ac097f8415794877e7c_normal.jpeg", "profile_sidebar_fill_color": "FFFFFF", "is_translator": false, "geo_enabled": true, "entities": {"description": {"urls": []}}, "followers_count": 230, "protected": false, "location": "Alexandria", "default_profile_image": false, "id_str": "40688800", "lang": "en", "utc_offset": 7200, "statuses_count": 8914, "description": "#TedxAlexandriaU head. resp of #emshi_fe_7artak in #alex Day dreamer. #medstudent by accident. I speak sarcasm.", "friends_count": 722, "profile_link_color": "0099CC", "profile_image_url": "http://pbs.twimg.com/profile_images/378800000807375392/2ee9bbee70e09ac097f8415794877e7c_normal.jpeg", "notifications": false, "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/778453690/27087117198cdf1d18bb86a36140de62.jpeg", "profile_background_color": "FFF04D", "profile_banner_url": "https://pbs.twimg.com/profile_banners/40688800/1378766815", "profile_background_image_url": "http://a0.twimg.com/profile_background_images/778453690/27087117198cdf1d18bb86a36140de62.jpeg", "name": "NOUR", "is_translation_enabled": false, "profile_background_tile": true, "favourites_count": 324, "screen_name": "Nour_Ayman", "url": null, "created_at": "Sun May 17 16:38:59 +0000 2009", "contributors_enabled": false, "time_zone": "Cairo", "profile_sidebar_border_color": "FFFFFF", "default_profile": false, "following": false, "listed_count": 2}, "geo": null, "in_reply_to_user_id_str": null, "possibly_sensitive": false, "lang": "en", "created_at": "Wed Jan 08 22:09:00 +0000 2014", "in_reply_to_status_id_str": null, "place": null},
{"contributors": null, "truncated": false, "text": "", "in_reply_to_status_id": null, "id": 421041098287755264, "favorite_count": 0, "source": "iOS", "retweeted": false, "coordinates": null, "entities": {"symbols": [], "user_mentions": [], "hashtags": [{"indices": [34, 39], "text": "IMDb"}], "urls": [{"url": "I2HqieyDnn", "indices": [40, 62], "expanded_url": "http://www.imdb.com/title/tt1453405", "display_url": "imdb.com/title/tt1453405"}]}, "in_reply_to_screen_name": null, "id_str": "421041098287755264", "retweet_count": 0, "in_reply_to_user_id": null, "favorited": false, "user": {"follow_request_sent": false, "profile_use_background_image": true, "id": 395680865, "verified": false, "profile_text_color": "333333", "profile_image_url_https": "https://pbs.twimg.com/profile_images/3130997221/4a981c8594f3c9ec127542ae40bac4ce_normal.jpeg", "profile_sidebar_fill_color": "DDEEF6", "is_translator": false, "geo_enabled": false, "entities": {"description": {"urls": []}}, "followers_count": 9, "protected": false, "location": "Denmark", "default_profile_image": false, "id_str": "395680865", "lang": "en", "utc_offset": 3600, "statuses_count": 1557, "description": "On basketball, film and other really cool things.", "friends_count": 27, "profile_link_color": "0084B4", "profile_image_url": "http://pbs.twimg.com/profile_images/3130997221/4a981c8594f3c9ec127542ae40bac4ce_normal.jpeg", "notifications": false, "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_color": "C0DEED", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", "name": "Kristian Meller", "is_translation_enabled": false, "profile_background_tile": false, "favourites_count": 5, "screen_name": "kristianmeller", "url": null, "created_at": "Sat Oct 22 02:18:30 +0000 2011", "contributors_enabled": false, "time_zone": "Berlin", "profile_sidebar_border_color": "C0DEED", "default_profile": true, "following": false, "listed_count": 0}, "geo": null, "in_reply_to_user_id_str": null, "possibly_sensitive": false, "lang": "en", "created_at": "Wed Jan 08 22:09:55 +0000 2014", "in_reply_to_status_id_str": null, "place": null}
]
I would like to merge these into a single array like this:
[
{"user_id":"273631610","item_id":"0829482","rating":"7","contributors":null,"truncated":false,"text":"","in_reply_to_status_id":null,"id":421040281296052225,"favorite_count":0,"source":"<a href=\"http:\/\/itunes.apple.com\/us\/app\/imdb-movies-tv\/id342792525?mt=8&uo=4\" rel=\"nofollow\">IMDb Movies & TV on iOS<\/a>","retweeted":false,"coordinates":null,"entities":{"symbols":[],"user_mentions":[],"hashtags":[{"indices":[23,28],"text":"IMDb"}],"urls":[{"url":"/bk5E1fLOxB","indices":[29,51],"expanded_url":"http:\/\/www.imdb.com\/title\/tt0829482","display_url":"imdb.com\/title\/tt0829482"}]},"in_reply_to_screen_name":null,"id_str":"421040281296052225","retweet_count":0,"in_reply_to_user_id":null,"favorited":false,"user":{"follow_request_sent":false,"profile_use_background_image":true,"id":273631610,"verified":false,"profile_text_color":"333333","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/378800000723050633\/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg","profile_sidebar_fill_color":"DDEEF6","is_translator":false,"geo_enabled":true,"entities":{"description":{"urls":[]}},"followers_count":330,"protected":false,"location":"southend on sea","default_profile_image":false,"id_str":"273631610","lang":"en","utc_offset":null,"statuses_count":897,"description":"","friends_count":973,"profile_link_color":"0084B4","profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/378800000723050633\/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg","notifications":false,"profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","name":"Ben Morris","is_translation_enabled":false,"profile_background_tile":false,"favourites_count":12,"screen_name":"benyere","url":null,"created_at":"Mon Mar 28 21:31:52 +0000 2011","contributors_enabled":false,"time_zone":null,"profile_sidebar_border_color":"C0DEED","default_profile":true,"following":false,"listed_count":0},"geo":null,"in_reply_to_user_id_str":null,"possibly_sensitive":false,"lang":"en","created_at":"Wed Jan 08 22:06:40 +0000 2014","in_reply_to_status_id_str":null,"place":null},
{"user_id":"40688800","item_id":"1343092","rating":"8","contributors":null,"truncated":false,"text":"","in_reply_to_status_id":null,"id":421040281296052225,"favorite_count":0,"source":"<a href=\"http:\/\/itunes.apple.com\/us\/app\/imdb-movies-tv\/id342792525?mt=8&uo=4\" rel=\"nofollow\">IMDb Movies & TV on iOS<\/a>","retweeted":false,"coordinates":null,"entities":{"symbols":[],"user_mentions":[],"hashtags":[{"indices":[23,28],"text":"IMDb"}],"urls":[{"url":"/bk5E1fLOxB","indices":[29,51],"expanded_url":"http:\/\/www.imdb.com\/title\/tt0829482","display_url":"imdb.com\/title\/tt0829482"}]},"in_reply_to_screen_name":null,"id_str":"421040281296052225","retweet_count":0,"in_reply_to_user_id":null,"favorited":false,"user":{"follow_request_sent":false,"profile_use_background_image":true,"id":273631610,"verified":false,"profile_text_color":"333333","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/378800000723050633\/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg","profile_sidebar_fill_color":"DDEEF6","is_translator":false,"geo_enabled":true,"entities":{"description":{"urls":[]}},"followers_count":330,"protected":false,"location":"southend on sea","default_profile_image":false,"id_str":"273631610","lang":"en","utc_offset":null,"statuses_count":897,"description":"","friends_count":973,"profile_link_color":"0084B4","profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/378800000723050633\/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg","notifications":false,"profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","name":"Ben Morris","is_translation_enabled":false,"profile_background_tile":false,"favourites_count":12,"screen_name":"benyere","url":null,"created_at":"Mon Mar 28 21:31:52 +0000 2011","contributors_enabled":false,"time_zone":null,"profile_sidebar_border_color":"C0DEED","default_profile":true,"following":false,"listed_count":0},"geo":null,"in_reply_to_user_id_str":null,"possibly_sensitive":false,"lang":"en","created_at":"Wed Jan 08 22:06:40 +0000 2014","in_reply_to_status_id_str":null,"place":null},
{"user_id":"3956800865","item_id":"1453405","rating":"8","contributors":null,"truncated":false,"text":"","in_reply_to_status_id":null,"id":421040281296052225,"favorite_count":0,"source":"<a href=\"http:\/\/itunes.apple.com\/us\/app\/imdb-movies-tv\/id342792525?mt=8&uo=4\" rel=\"nofollow\">IMDb Movies & TV on iOS<\/a>","retweeted":false,"coordinates":null,"entities":{"symbols":[],"user_mentions":[],"hashtags":[{"indices":[23,28],"text":"IMDb"}],"urls":[{"url":"\/bk5E1fLOxB","indices":[29,51],"expanded_url":"http:\/\/www.imdb.com\/title\/tt0829482","display_url":"imdb.com\/title\/tt0829482"}]},"in_reply_to_screen_name":null,"id_str":"421040281296052225","retweet_count":0,"in_reply_to_user_id":null,"favorited":false,"user":{"follow_request_sent":false,"profile_use_background_image":true,"id":273631610,"verified":false,"profile_text_color":"333333","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/378800000723050633\/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg","profile_sidebar_fill_color":"DDEEF6","is_translator":false,"geo_enabled":true,"entities":{"description":{"urls":[]}},"followers_count":330,"protected":false,"location":"southend on sea","default_profile_image":false,"id_str":"273631610","lang":"en","utc_offset":null,"statuses_count":897,"description":"","friends_count":973,"profile_link_color":"0084B4","profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/378800000723050633\/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg","notifications":false,"profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","name":"Ben Morris","is_translation_enabled":false,"profile_background_tile":false,"favourites_count":12,"screen_name":"benyere","url":null,"created_at":"Mon Mar 28 21:31:52 +0000 2011","contributors_enabled":false,"time_zone":null,"profile_sidebar_border_color":"C0DEED","default_profile":true,"following":false,"listed_count":0},"geo":null,"in_reply_to_user_id_str":null,"possibly_sensitive":false,"lang":"en","created_at":"Wed Jan 08 22:06:40 +0000 2014","in_reply_to_status_id_str":null,"place":null}
]
To do that I did the following code:
import json
with open("test.json") as fin1:
data1 = json.load(fin1)
with open("test_userz.json") as fin2:
data2 = json.load(fin2)
data1.update(data2)
with open("merged.json", "w") as fout:
json.dump(data1, fout)
but in runtime i see this error :
Traceback (most recent call last):
File "json/merge2jsons.py", line 7, in <module>
data1.update(data2)
AttributeError: 'list' object has no attribute 'update'
How can i do this merge ?
The error you are facing is that your data1 upon loading from your file is going to be a list. You are trying to call the dictionary update on it, which is incorrect.
To help provide an example of your error, look at this:
>>> [].update('stuff')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'list' object has no attribute 'update'
I tried calling update on my list. There is no update method on the list. If I do this for a dictionary, however:
>>> d = {'a': 1}
>>> d.update({'b': 2})
>>> d
{'b': 2, 'a': 1}
As you can see, the update method exists, and works fine.
So, what you want to do is iterate over one of the list of dictionaries, and just update along the way. Since you are confident in your data being sequential and of equal length, this can be simply done by:
with open('f1.txt') as f1, open('f2.txt') as f2:
first_list = json.load(f1)
second_list = json.load(f2)
for i, v in enumerate(first_list):
second_list[i].update(v)
So, what is happening here is that by using the enumerate, you will have an incremental count representing the index of your list to easily access each dictionary. With this, simply update the dictionary of each dict in second_list with the dictionary you get from each iteration of first_list
To merge dictionaries that are at the same index in two lists:
merged = [{**d1, **d2} for d1, d2 in zip(data1, data2)]
I am new to Elasticsearch and am attempting to do some data analysis of Twitter data by importing it into Elasticsearch and running Kibana on it. I'm getting stuck when importing Twitter data into Elasticsearch. Any help is appreciated!
Here's a sample working program that produces the error.
import json
from elasticsearch import Elasticsearch
es = Elasticsearch()
data = json.loads(open("data.json").read())
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)
Here's the error:
Traceback (most recent call last):
File "elasticsearch_import_test.py", line 5, in <module>
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/client/utils.py", line 69, in _wrapped
return func(*args, params=params, **kwargs)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/client/__init__.py", line 279, in index
_make_path(index, doc_type, id), params=params, body=body)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/transport.py", line 329, in perform_request
status, headers, data = connection.perform_request(method, url, params, body, ignore=ignore, timeout=timeout)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/connection/http_urllib3.py", line 109, in perform_request
self._raise_error(response.status, raw_data)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/connection/base.py", line 108, in _raise_error
raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
elasticsearch.exceptions.RequestError: TransportError(400, u'illegal_argument_exception', u'[Raza][127.0.0.1:9300][indices:data/write/index[p]]')
Here's an example Twitter JSON file (data.json)
{
"_id": {
"$oid": "570597358c68d71c16b3b722"
},
"contributors": null,
"coordinates": null,
"created_at": "Wed Apr 06 23:09:41 +0000 2016",
"entities": {
"hashtags": [
{
"indices": [
68,
72
],
"text": "dnd"
},
{
"indices": [
73,
79
],
"text": "Nat20"
},
{
"indices": [
80,
93
],
"text": "CriticalRole"
},
{
"indices": [
94,
103
],
"text": "d20babes"
}
],
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
],
"symbols": [],
"urls": [
{
"display_url": "darkcastlecollectibles.com",
"expanded_url": "http://www.darkcastlecollectibles.com/",
"indices": [
44,
67
],
"url": "https://shortened.url/SJgFTE0o8h"
}
],
"user_mentions": [
{
"id": 2375847847,
"id_str": "2375847847",
"indices": [
3,
19
],
"name": "Zack Chini",
"screen_name": "Zenttsilverwing"
}
]
},
"extended_entities": {
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
},
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953295727009793,
"id_str": "715953295727009793",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
]
},
"favorite_count": 0,
"favorited": false,
"filter_level": "low",
"geo": null,
"id": 717851801417031680,
"id_str": "717851801417031680",
"in_reply_to_screen_name": null,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"is_quote_status": false,
"lang": "en",
"place": null,
"possibly_sensitive": false,
"retweet_count": 0,
"retweeted": false,
"retweeted_status": {
"contributors": null,
"coordinates": null,
"created_at": "Fri Apr 01 17:25:42 +0000 2016",
"entities": {
"hashtags": [
{
"indices": [
47,
51
],
"text": "dnd"
},
{
"indices": [
52,
58
],
"text": "Nat20"
},
{
"indices": [
59,
72
],
"text": "CriticalRole"
},
{
"indices": [
73,
82
],
"text": "d20babes"
}
],
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
],
"symbols": [],
"urls": [
{
"display_url": "darkcastlecollectibles.com",
"expanded_url": "http://www.darkcastlecollectibles.com/",
"indices": [
23,
46
],
"url": "https://shortened.url/SJgFTE0o8h"
}
],
"user_mentions": []
},
"extended_entities": {
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
},
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953295727009793,
"id_str": "715953295727009793",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
]
},
"favorite_count": 5,
"favorited": false,
"filter_level": "low",
"geo": null,
"id": 715953298076012545,
"id_str": "715953298076012545",
"in_reply_to_screen_name": null,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"is_quote_status": false,
"lang": "en",
"place": null,
"possibly_sensitive": false,
"retweet_count": 1,
"retweeted": false,
"source": "Twitter Web Client",
"text": "coins came in!! Thanks https://shortened.url/SJgFTE0o8h #dnd #Nat20 #CriticalRole #d20babes https://shortened.url/YQoxEuEAXV",
"truncated": false,
"user": {
"contributors_enabled": false,
"created_at": "Thu Mar 06 19:59:14 +0000 2014",
"default_profile": true,
"default_profile_image": false,
"description": "DM Geek Critter Con-man. I am here to like your art ^.^",
"favourites_count": 4990,
"follow_request_sent": null,
"followers_count": 57,
"following": null,
"friends_count": 183,
"geo_enabled": false,
"id": 2375847847,
"id_str": "2375847847",
"is_translator": false,
"lang": "en",
"listed_count": 7,
"location": "Flower Mound, TX",
"name": "Zack Chini",
"notifications": null,
"profile_background_color": "C0DEED",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_tile": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2375847847/1430928759",
"profile_image_url": "http://pbs.twimg.com/profile_images/708816622358663168/mNF4Ysr5_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/708816622358663168/mNF4Ysr5_normal.jpg",
"profile_link_color": "0084B4",
"profile_sidebar_border_color": "C0DEED",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"protected": false,
"screen_name": "Zenttsilverwing",
"statuses_count": 551,
"time_zone": null,
"url": null,
"utc_offset": null,
"verified": false
}
},
"source": "Twitter Web Client",
"text": "RT #Zenttsilverwing: coins came in!! Thanks https://shortened.url/SJgFTE0o8h #dnd #Nat20 #CriticalRole #d20babes https://shortened.url/YQoxEuEAXV",
"timestamp_ms": "1459984181156",
"truncated": false,
"user": {
"contributors_enabled": false,
"created_at": "Tue Feb 10 04:31:18 +0000 2009",
"default_profile": false,
"default_profile_image": false,
"description": "I use Twitter to primarily retweet Critter artwork of Critical Role and their own creations. I maintain a list of all the Critter artists I've come across.",
"favourites_count": 17586,
"follow_request_sent": null,
"followers_count": 318,
"following": null,
"friends_count": 651,
"geo_enabled": true,
"id": 20491914,
"id_str": "20491914",
"is_translator": false,
"lang": "en",
"listed_count": 33,
"location": "SanDiego, CA",
"name": "UnknownOutrider",
"notifications": null,
"profile_background_color": "EDECE9",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme3/bg.gif",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme3/bg.gif",
"profile_background_tile": false,
"profile_image_url": "http://pbs.twimg.com/profile_images/224346493/cartoon_dragon_tattoo_designs_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/224346493/cartoon_dragon_tattoo_designs_normal.jpg",
"profile_link_color": "088253",
"profile_sidebar_border_color": "D3D2CF",
"profile_sidebar_fill_color": "E3E2DE",
"profile_text_color": "634047",
"profile_use_background_image": true,
"protected": false,
"screen_name": "UnknownOutrider",
"statuses_count": 12760,
"time_zone": "Pacific Time (US & Canada)",
"url": null,
"utc_offset": -25200,
"verified": false
}
}
The reason that don't work is that you are trying to index document with a field named _id which is already exist as a default field. So delete that field or change field name:
import json
from elasticsearch import Elasticsearch
es = Elasticsearch()
data = json.loads(open("data.json").read())
# data['id_'] = data['_id'] <= You can change _id as id_
del data['_id']
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)