I have a large json file that needs to be parsed into a dataframe. Here is an example of the data structure...
{
"genericTags": [
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"event",
"webinar"
],
"name": "Language",
"type": null,
"values": [
{
"value": "Chinese",
"hidden": false,
"deleted": false
},
{
"value": "English",
"hidden": false,
"deleted": false
},
{
"value": "French",
"hidden": false,
"deleted": false
},
{
"value": "Japanese",
"hidden": false,
"deleted": false
},
{
"value": "Portuguese",
"hidden": false,
"deleted": false
},
{
"value": "Spanish",
"hidden": false,
"deleted": false
}
],
"description": null
},
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"webinar",
"event"
],
"name": "Product",
"type": null,
"values": [
{
"value": "P1",
"hidden": false,
"deleted": false
},
{
"value": "P2",
"hidden": false,
"deleted": false
},
{
"value": "P3",
"hidden": false,
"deleted": false
},
{
"value": "P4",
"hidden": false,
"deleted": false
},
{
"value": "P5",
"hidden": false,
"deleted": false
},
{
"value": "P6",
"hidden": false,
"deleted": false
},
{
"value": "P7",
"hidden": false,
"deleted": false
},
{
"value": "P8",
"hidden": false,
"deleted": false
},
{
"value": "P9",
"hidden": false,
"deleted": false
},
{
"value": "P10",
"hidden": false,
"deleted": false
},
{
"value": "P11",
"hidden": false,
"deleted": false
},
{
"value": "P12",
"hidden": false,
"deleted": false
},
{
"value": "Multiple",
"hidden": false,
"deleted": false
},
{
"value": "None",
"hidden": false,
"deleted": false
}
],
"description": null
},
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"event",
"webinar"
],
"name": "Region",
"type": null,
"values": [
{
"value": "Americas",
"hidden": false,
"deleted": false
},
{
"value": "APJ",
"hidden": false,
"deleted": false
},
{
"value": "EMEA",
"hidden": false,
"deleted": false
},
{
"value": "WW",
"hidden": false,
"deleted": false
}
],
"description": null
}
],
"channelTags": [
{
"value": "Advertising",
"progression": {
"name": "Advertising",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Association",
"progression": {
"name": "Association",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Contact Request",
"progression": {
"name": "Contact Request",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Sent Email",
"description": null,
"typeId": null,
"order": 20,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 30,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Content",
"progression": {
"name": "Content",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Content Syndication",
"progression": {
"name": "Content Syndication",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Direct Mail",
"progression": {
"name": "Direct Mail",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Display",
"progression": {
"name": "Display",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Email",
"progression": {
"name": "Email",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Email Send",
"progression": {
"name": "Email Send",
"type": "email_batch",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Event",
"progression": {
"name": "Event",
"type": "event",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": false,
"isUsed": false
},
{
"name": "Invited",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
},
{
"name": "Registered",
"type": "Registered",
"typeName": "registered",
"description": null,
"typeId": 5,
"order": 20,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
},
{
"name": "Waitlisted",
"type": "Waitlisted",
"typeName": "wait_listed",
"description": null,
"typeId": 4,
"order": 20,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": false
},
{
"name": "No Show",
"description": null,
"typeId": null,
"order": 30,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
},
{
"name": "Attended",
"type": "Attended",
"typeName": "attended",
"description": null,
"typeId": 7,
"order": 40,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
},
{
"name": "Attended Session",
"description": null,
"typeId": null,
"order": 50,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Intent Signal",
"progression": {
"name": "Intent Signal",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Interactive Chat",
"progression": {
"name": "Interactive Chat",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "List Import",
"progression": {
"name": "List Import",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Nurture",
"progression": {
"name": "Nurture",
"type": "nurture",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Converted",
"description": null,
"typeId": null,
"order": 30,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": false
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Nurture Cast",
"progression": {
"name": "Nurture Cast",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Excluded",
"description": null,
"typeId": null,
"order": 20,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Converted",
"description": null,
"typeId": null,
"order": 30,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 30,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Operational",
"progression": {
"name": "Operational",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": true,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
}
]
}
What i am needing is specifically the channelTags array which i have currently broken out like this...
import pandas as pd
import json
# open json file
with open('file_name.json') as json_file:
data = json.load(json_file)
# turn json into df and get channelTags array
df_channel = pd.DataFrame.from_dict(data['channelTags'])
This is resulting in the following data structure...
value
hidden
deleted
progression.name
progression.type
progression.description
progression.steps
Advertising
False
False
Advertising
program
null
[{'name': 'Not in Program', 'type': 'Not in Program', 'typeName': 'not_in_program', 'description': None, 'typeId': 1, 'order': 0, 'success': False, 'hidden': False, 'system': True, 'isEventCapSet': None, 'isUsed': False}, {'name': 'Member', 'description': None, 'typeId': None, 'order': 10, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': None, 'isUsed': True}, {'name': 'Engaged', 'description': None, 'typeId': None, 'order': 20, 'success': True, 'hidden': False, 'system': False, 'isEventCapSet': None, 'isUsed': True}]
Trade Show
False
False
Trade Show
event
null
[{'name': 'Not in Program', 'type': 'Not in Program', 'typeName': 'not_in_program', 'description': None, 'typeId': 1, 'order': 0, 'success': False, 'hidden': False, 'system': True, 'isEventCapSet': False, 'isUsed': False}, {'name': 'Invited', 'description': None, 'typeId': None, 'order': 10, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}, {'name': 'Registered', 'type': 'Registered', 'typeName': 'registered', 'description': None, 'typeId': 5, 'order': 20, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}, {'name': 'Waitlisted', 'type': 'Waitlisted', 'typeName': 'wait_listed', 'description': None, 'typeId': 4, 'order': 20, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': False}, {'name': 'No Show', 'description': None, 'typeId': None, 'order': 30, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': False}, {'name': 'Attended Show', 'type': 'Attended', 'typeName': 'attended', 'description': None, 'typeId': 7, 'order': 40, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}, {'name': 'Visited Booth', 'description': None, 'typeId': None, 'order': 50, 'success': True, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}, {'name': 'Attended Hosted Event', 'description': None, 'typeId': None, 'order': 60, 'success': True, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}]
I still need to be able to go one step further down to parse out the progression.steps. I have accomplished this somewhat by using the following...
df_normal = pd.json_normalize(df_channel['progression'], 'steps', errors='ignore')
This code results in the following...
name
type
typeName
description
typeId
order
success
hidden
system
isEventCapSet
IsUsed
Not in Program
Not in Program
Not in Program
null
1
0
FALSE
FALSE
TRUE
null
FALSE
Member
null
null
null
null
10
FALSE
FALSE
TRUE
null
FALSE
Engaged
null
null
null
null
20
FALSE
FALSE
TRUE
null
FALSE
What i am missing is the ability to combine these two dataframes as i do not have a key field, the end result should be the df_channel dataframe and the df_normal dataframe combined with a left join on a df_channel['value'] entry.
What am i missing here?
starting from a sample dictionary data as below:
data = {
"genericTags": [
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"event",
"webinar"
],
"name": "Language",
"type": None,
"values": [
{
"value": "Chinese",
"hidden": False,
"deleted": False
},
{
"value": "English",
"hidden": False,
"deleted": False
},
{
"value": "French",
"hidden": False,
"deleted": False
},
{
"value": "Japanese",
"hidden": False,
"deleted": False
},
{
"value": "Portuguese",
"hidden": False,
"deleted": False
},
{
"value": "Spanish",
"hidden": False,
"deleted": False
}
],
"description": None
},
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"webinar",
"event"
],
"name": "Product",
"type": None,
"values": [
{
"value": "P1",
"hidden": False,
"deleted": False
},
{
"value": "P2",
"hidden": False,
"deleted": False
},
{
"value": "P3",
"hidden": False,
"deleted": False
},
{
"value": "P4",
"hidden": False,
"deleted": False
},
{
"value": "P5",
"hidden": False,
"deleted": False
},
{
"value": "P6",
"hidden": False,
"deleted": False
},
{
"value": "P7",
"hidden": False,
"deleted": False
},
{
"value": "P8",
"hidden": False,
"deleted": False
},
{
"value": "P9",
"hidden": False,
"deleted": False
},
{
"value": "P10",
"hidden": False,
"deleted": False
},
{
"value": "P11",
"hidden": False,
"deleted": False
},
{
"value": "P12",
"hidden": False,
"deleted": False
},
{
"value": "Multiple",
"hidden": False,
"deleted": False
},
{
"value": "None",
"hidden": False,
"deleted": False
}
],
"description": None
},
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"event",
"webinar"
],
"name": "Region",
"type": None,
"values": [
{
"value": "Americas",
"hidden": False,
"deleted": False
},
{
"value": "APJ",
"hidden": False,
"deleted": False
},
{
"value": "EMEA",
"hidden": False,
"deleted": False
},
{
"value": "WW",
"hidden": False,
"deleted": False
}
],
"description": None
}
],
"channelTags": [
{
"value": "Advertising",
"progression": {
"name": "Advertising",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Association",
"progression": {
"name": "Association",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Contact Request",
"progression": {
"name": "Contact Request",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Sent Email",
"description": None,
"typeId": None,
"order": 20,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 30,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Content",
"progression": {
"name": "Content",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Content Syndication",
"progression": {
"name": "Content Syndication",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Direct Mail",
"progression": {
"name": "Direct Mail",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Display",
"progression": {
"name": "Display",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Email",
"progression": {
"name": "Email",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Email Send",
"progression": {
"name": "Email Send",
"type": "email_batch",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Event",
"progression": {
"name": "Event",
"type": "event",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": False,
"isUsed": False
},
{
"name": "Invited",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
},
{
"name": "Registered",
"type": "Registered",
"typeName": "registered",
"description": None,
"typeId": 5,
"order": 20,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
},
{
"name": "Waitlisted",
"type": "Waitlisted",
"typeName": "wait_listed",
"description": None,
"typeId": 4,
"order": 20,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": False
},
{
"name": "No Show",
"description": None,
"typeId": None,
"order": 30,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
},
{
"name": "Attended",
"type": "Attended",
"typeName": "attended",
"description": None,
"typeId": 7,
"order": 40,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
},
{
"name": "Attended Session",
"description": None,
"typeId": None,
"order": 50,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Intent Signal",
"progression": {
"name": "Intent Signal",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Interactive Chat",
"progression": {
"name": "Interactive Chat",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "List Import",
"progression": {
"name": "List Import",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Nurture",
"progression": {
"name": "Nurture",
"type": "nurture",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Converted",
"description": None,
"typeId": None,
"order": 30,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": False
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Nurture Cast",
"progression": {
"name": "Nurture Cast",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Excluded",
"description": None,
"typeId": None,
"order": 20,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Converted",
"description": None,
"typeId": None,
"order": 30,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 30,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Operational",
"progression": {
"name": "Operational",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": True,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
}
]
}
try:
df = pd.json_normalize(data['channelTags'])
df = df.merge(pd.concat(df['progression.steps'].apply(pd.DataFrame).tolist(), keys=df.index), how='left')
df=pd.DataFrame(data) #all data. there are two columns: genericTags and channelTags
import json
to_json = json.loads(df.to_json(orient="records"))
df_v1 = pd.json_normalize(to_json)
mask = df_v1.explode('genericTags.values', ignore_index=True)
final=mask.join(pd.DataFrame([*mask.pop('genericTags.values')], index=mask.index))
final.columns=final.columns + '_' #for second step. because there are columns with the same name.
final_mask = final.explode('channelTags.progression.steps_', ignore_index=True)
final=final_mask.join(pd.DataFrame([*final_mask.pop('channelTags.progression.steps_')], index=final_mask.index))
I'm trying to search a JSON from an API. I'm trying to get names and IDs from the JSON. Some keys inside of the JSON do not have a name. Is there any way to set a default? Or is there any other way to delete the JSON values without names?
Code:
import requests
from pprint import pprint
url = "https://<SCHOOL CANVAS PAGE>.instructure.com/api/v1/courses"
headers = {"Authorization": "Bearer <AUTHORIZATION KEY>"}
r = requests.get(url, headers=headers).json()
pprint(r)
# for k in r:
# print(f'Name: {k["name"]}')
# --> If no name it returns "print(f'Name: {k["name"]}') | KeyError: 'name'"
# for k in r:
# print(f'ID: {k["id"]}')
Api Output:
[
{
"id": 3941,
"root_account_id": 1,
"account_id": 9,
"enrollment_term_id": 93,
"name": "7th Period",
"uuid": "8IGbnEvvHWK9beNGlI1uwGWLjv3R1hEIjExkY1kc",
"start_at": "2019-08-07T17:55:35Z",
"grading_standard_id": null,
"is_public": null,
"created_at": "2019-07-30T21:48:19Z",
"course_code": "Social Studies Advanced 08 - Fickley",
"default_view": "modules",
"license": null,
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_8IGbnEvvHWK9beNGlI1uwGWLjv3R1hEIjExkY1kc.ics"
},
"time_zone": "America/New_York",
"original_name": "Social Studies Advanced 08 - Fickley",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false,
"overridden_course_visibility": ""
},
{
"id": 10397,
"root_account_id": 1,
"account_id": 10,
"name": "Algebra 2 H - Scurlock",
"enrollment_term_id": 356,
"uuid": "W6nOG0uwQGCbbgnIzHdJxT0fxp99wZUhLQxe6SFk",
"start_at": "2021-07-22T13:32:44Z",
"grading_standard_id": null,
"is_public": null,
"created_at": "2021-07-19T16:05:23Z",
"course_code": "Algebra 2 H - Scurlock",
"default_view": "modules",
"license": null,
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_W6nOG0uwQGCbbgnIzHdJxT0fxp99wZUhLQxe6SFk.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false
},
{
"id": 5844,
"root_account_id": 1,
"account_id": 10,
"name": "Chemistry 1 H - Blackburn-sem2",
"enrollment_term_id": 221,
"uuid": "pNc50PmQxqa1G42hzUFLjgR3JaS4TOGgJ9i0nOko",
"start_at": "2021-01-04T18:03:47Z",
"grading_standard_id": null,
"is_public": false,
"created_at": "2020-07-22T17:33:31Z",
"course_code": "Chemistry 1 H - Blackburn",
"default_view": "modules",
"license": "private",
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_pNc50PmQxqa1G42hzUFLjgR3JaS4TOGgJ9i0nOko.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false,
"overridden_course_visibility": ""
},
{
"id": 10420,
"root_account_id": 1,
"account_id": 10,
"name": "CLIMB 10th Grade - Vacheresse",
"enrollment_term_id": 352,
"uuid": "rHFv0zDQQ0MqaLh5ls9Ty2NB65zcL7ZGO8sYhuHk",
"start_at": "2021-08-02T15:36:49Z",
"grading_standard_id": null,
"is_public": null,
"created_at": "2021-07-19T16:05:24Z",
"course_code": "CLIMB 10th Grade - Vacheresse",
"default_view": "feed",
"license": null,
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_rHFv0zDQQ0MqaLh5ls9Ty2NB65zcL7ZGO8sYhuHk.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false
},
{
"id": 5378,
"access_restricted_by_date": true
},
{
"id": 5588,
"root_account_id": 1,
"account_id": 10,
"name": "English 1 H - Semester 2 Colbaugh",
"enrollment_term_id": 221,
"uuid": "2KbshUnWT0lFqekusEtVhh5LBLhldQN4OqdoKN5E",
"start_at": "2021-01-04T12:41:16Z",
"grading_standard_id": null,
"is_public": false,
"created_at": "2020-07-22T17:33:22Z",
"course_code": "English 1 H-S2 - Colbaugh",
"default_view": "modules",
"license": "private",
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_2KbshUnWT0lFqekusEtVhh5LBLhldQN4OqdoKN5E.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false,
"overridden_course_visibility": ""
},
{
"id": 10525,
"root_account_id": 1,
"account_id": 10,
"name": "English 2 H - Poulakis",
"enrollment_term_id": 353,
"uuid": "U2BRAMor6PFOP2h8LUMdlnVFbtMbSFkHBVGZgS2w",
"start_at": "2021-08-02T18:52:26Z",
"grading_standard_id": null,
"is_public": false,
"created_at": "2021-07-19T16:05:27Z",
"course_code": "English 2 H - Poulakis",
"default_view": "modules",
"license": "private",
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_U2BRAMor6PFOP2h8LUMdlnVFbtMbSFkHBVGZgS2w.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false,
"overridden_course_visibility": ""
},
{
"id": 10708,
"root_account_id": 1,
"account_id": 10,
"name": "Fall World History",
"enrollment_term_id": 353,
"uuid": "yGqeV55bAwYP9rQyjmGGE3QO1dYm6c2pGCT9eUQC",
"start_at": "2021-08-03T12:07:47Z",
"grading_standard_id": null,
"is_public": false,
"created_at": "2021-07-19T16:05:32Z",
"course_code": "World History and Geography - LaMagna",
"default_view": "modules",
"license": "private",
"grade_passback_setting": "nightly_sync",
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_yGqeV55bAwYP9rQyjmGGE3QO1dYm6c2pGCT9eUQC.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
},
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false,
"overridden_course_visibility": ""
},
{
"id": 5480,
"root_account_id": 1,
"account_id": 10,
"name": "Geometry H - Storey",
"enrollment_term_id": 224,
"uuid": "0hUvYrHEV8wGWFbtqejTMVDMIQtew6lVLZBKl3py",
"start_at": "2020-08-11T14:06:28Z",
"grading_standard_id": null,
"is_public": null,
"created_at": "2020-07-22T17:33:20Z",
"course_code": "Geometry H - Storey",
"default_view": "modules",
"license": null,
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_0hUvYrHEV8wGWFbtqejTMVDMIQtew6lVLZBKl3py.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false
},
{
"id": 5347,
"root_account_id": 1,
"account_id": 10,
"name": "Human Geography AP - Brose",
"enrollment_term_id": 219,
"uuid": "BNUgjVCnf3SEaa5SEn14RdNXKJAy0RcSjoKbvO7o",
"start_at": "2020-08-10T01:58:54Z",
"grading_standard_id": null,
"is_public": null,
"created_at": "2020-07-22T17:33:18Z",
"course_code": "Human Geography AP - Brose",
"default_view": "modules",
"license": null,
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_BNUgjVCnf3SEaa5SEn14RdNXKJAy0RcSjoKbvO7o.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false,
"overridden_course_visibility": ""
}
]
For the key with ID 5378, it has no "name" attribute, and I get the error:
KeyError: 'name'
Use .get() to specify a default if it doesn't exist:
print(f'Name: {k.get("name","<na>")}')
Standalone example:
import json
text = '''\
[
{
"id": 5378,
"access_restricted_by_date": true
},
{
"id": 9999,
"name": "whatever",
"access_restricted_by_date": true
}
]
'''
data = json.loads(text)
for item in data:
print(item['id'], item.get('name','-na-'))
Output:
5378 -na-
9999 whatever
I want to split incidents by "incidentType" values for python. It always have 5 of these values: period, injuryTime, goal, card and substitution.
Json File
{
"incidents": [
{
"text": "FT",
"homeScore": 2,
"awayScore": 1,
"isLive": false,
"time": 90,
"addedTime": 999,
"incidentType": "period"
},
{
"length": 4,
"time": 90,
"addedTime": 0,
"incidentType": "injuryTime"
},
{
"homeScore": 2,
"awayScore": 1,
"player": {
"name": "Mostafa Mohamed",
"firstName": "",
"lastName": "",
"slug": "mostafa-mohamed",
"shortName": "M. Mohamed",
"position": "F",
"userCount": 3949,
"id": 873551
},
"id": 141786584,
"time": 89,
"isHome": true,
"incidentClass": "penalty",
"incidentType": "goal"
},
{
"player": {
"name": "Duško Tošić",
"slug": "dusko-tosic",
"shortName": "D. Tošić",
"position": "D",
"userCount": 215,
"id": 14557
},
"playerName": "Duško Tošić",
"reason": "Foul",
"id": 119728583,
"time": 85,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "Younès Belhanda",
"slug": "younes-belhanda",
"shortName": "Y. Belhanda",
"position": "M",
"userCount": 2165,
"id": 72999
},
"playerOut": {
"name": "Martin Linnes",
"slug": "martin-linnes",
"shortName": "M. Linnes",
"position": "D",
"userCount": 339,
"id": 109569
},
"id": 120059400,
"time": 82,
"isHome": true,
"incidentType": "substitution"
},
{
"player": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerName": "Kevin Varga",
"reason": "Foul",
"id": 119728582,
"time": 82,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "DeAndre Yedlin",
"slug": "deandre-yedlin",
"shortName": "D. Yedlin",
"position": "D",
"userCount": 702,
"id": 314040
},
"playerOut": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 120059399,
"time": 77,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Ryan Donk",
"slug": "ryan-donk",
"shortName": "R. Donk",
"position": "D",
"userCount": 489,
"id": 14900
},
"playerOut": {
"name": "Ryan Babel",
"slug": "ryan-babel",
"shortName": "R. Babel",
"position": "F",
"userCount": 1577,
"id": 1876
},
"id": 120059397,
"time": 72,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Emre Akbaba",
"slug": "emre-akbaba",
"shortName": "E. Akbaba",
"position": "M",
"userCount": 604,
"id": 343527
},
"playerOut": {
"name": "Gedson Fernandes",
"slug": "fernandes-gedson",
"shortName": "G. Fernandes",
"position": "M",
"userCount": 3030,
"id": 862055
},
"id": 120059396,
"time": 71,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Henry Onyekuru",
"slug": "henry-onyekuru",
"shortName": "H. Onyekuru",
"position": "M",
"userCount": 1474,
"id": 809220
},
"playerOut": {
"name": "Emre Kılınç",
"slug": "emre-kilinc",
"shortName": "E. Kılınç",
"position": "M",
"userCount": 526,
"id": 202032
},
"id": 120059398,
"time": 71,
"isHome": true,
"incidentType": "substitution"
},
{
"player": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"playerName": "Haris Hajradinović",
"reason": "Foul",
"id": 119728581,
"time": 71,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 1,
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"assist1": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"id": 141786585,
"time": 51,
"isHome": false,
"incidentClass": "regular",
"incidentType": "goal"
},
{
"playerIn": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerOut": {
"name": "Gilbert Koomson",
"slug": "gilbert-koomson",
"shortName": "G. Koomson",
"position": "F",
"userCount": 76,
"id": 341107
},
"id": 120059401,
"time": 46,
"isHome": false,
"incidentType": "substitution"
},
{
"text": "HT",
"homeScore": 1,
"awayScore": 0,
"isLive": false,
"time": 45,
"addedTime": 999,
"incidentType": "period"
},
{
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"playerName": "Isaac Kiese Thelin",
"reason": "Foul",
"id": 119728580,
"time": 15,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 0,
"player": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 141786583,
"time": 9,
"isHome": true,
"incidentClass": "regular",
"incidentType": "goal"
}
]
}
ABC = {
"incidents": [
{
"text": "FT",
"homeScore": 2,
"awayScore": 1,
"isLive": False,
"time": 90,
"addedTime": 999,
"incidentType": "period"
},
{
"length": 4,
"time": 90,
"addedTime": 0,
"incidentType": "injuryTime"
},
{
"homeScore": 2,
"awayScore": 1,
"player": {
"name": "Mostafa Mohamed",
"firstName": "",
"lastName": "",
"slug": "mostafa-mohamed",
"shortName": "M. Mohamed",
"position": "F",
"userCount": 3949,
"id": 873551
},
"id": 141786584,
"time": 89,
"isHome": True,
"incidentClass": "penalty",
"incidentType": "goal"
},
{
"player": {
"name": "Duško Tošić",
"slug": "dusko-tosic",
"shortName": "D. Tošić",
"position": "D",
"userCount": 215,
"id": 14557
},
"playerName": "Duško Tošić",
"reason": "Foul",
"id": 119728583,
"time": 85,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "Younès Belhanda",
"slug": "younes-belhanda",
"shortName": "Y. Belhanda",
"position": "M",
"userCount": 2165,
"id": 72999
},
"playerOut": {
"name": "Martin Linnes",
"slug": "martin-linnes",
"shortName": "M. Linnes",
"position": "D",
"userCount": 339,
"id": 109569
},
"id": 120059400,
"time": 82,
"isHome": True,
"incidentType": "substitution"
},
{
"player": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerName": "Kevin Varga",
"reason": "Foul",
"id": 119728582,
"time": 82,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "DeAndre Yedlin",
"slug": "deandre-yedlin",
"shortName": "D. Yedlin",
"position": "D",
"userCount": 702,
"id": 314040
},
"playerOut": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 120059399,
"time": 77,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Ryan Donk",
"slug": "ryan-donk",
"shortName": "R. Donk",
"position": "D",
"userCount": 489,
"id": 14900
},
"playerOut": {
"name": "Ryan Babel",
"slug": "ryan-babel",
"shortName": "R. Babel",
"position": "F",
"userCount": 1577,
"id": 1876
},
"id": 120059397,
"time": 72,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Emre Akbaba",
"slug": "emre-akbaba",
"shortName": "E. Akbaba",
"position": "M",
"userCount": 604,
"id": 343527
},
"playerOut": {
"name": "Gedson Fernandes",
"slug": "fernandes-gedson",
"shortName": "G. Fernandes",
"position": "M",
"userCount": 3030,
"id": 862055
},
"id": 120059396,
"time": 71,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Henry Onyekuru",
"slug": "henry-onyekuru",
"shortName": "H. Onyekuru",
"position": "M",
"userCount": 1474,
"id": 809220
},
"playerOut": {
"name": "Emre Kılınç",
"slug": "emre-kilinc",
"shortName": "E. Kılınç",
"position": "M",
"userCount": 526,
"id": 202032
},
"id": 120059398,
"time": 71,
"isHome": True,
"incidentType": "substitution"
},
{
"player": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"playerName": "Haris Hajradinović",
"reason": "Foul",
"id": 119728581,
"time": 71,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 1,
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"assist1": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"id": 141786585,
"time": 51,
"isHome": False,
"incidentClass": "regular",
"incidentType": "goal"
},
{
"playerIn": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerOut": {
"name": "Gilbert Koomson",
"slug": "gilbert-koomson",
"shortName": "G. Koomson",
"position": "F",
"userCount": 76,
"id": 341107
},
"id": 120059401,
"time": 46,
"isHome": False,
"incidentType": "substitution"
},
{
"text": "HT",
"homeScore": 1,
"awayScore": 0,
"isLive": False,
"time": 45,
"addedTime": 999,
"incidentType": "period"
},
{
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"playerName": "Isaac Kiese Thelin",
"reason": "Foul",
"id": 119728580,
"time": 15,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 0,
"player": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 141786583,
"time": 9,
"isHome": True,
"incidentClass": "regular",
"incidentType": "goal"
}
]
}
First, create a dictionary to hold all distinct incidentType. Then iterate through incidents and check if whether incidentType exists in the dictionary or not. If it exists? Append. if not, create a new key : value pair
result = {}
for js in ABC["incidents"]:
icdType = js["incidentType"]
if icdType in result:
result[icdType].append(js)
else:
result[icdType] = [js]
for key,val in result.items():
print(key, ":", val, "\n")
I am new to Elasticsearch and am attempting to do some data analysis of Twitter data by importing it into Elasticsearch and running Kibana on it. I'm getting stuck when importing Twitter data into Elasticsearch. Any help is appreciated!
Here's a sample working program that produces the error.
import json
from elasticsearch import Elasticsearch
es = Elasticsearch()
data = json.loads(open("data.json").read())
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)
Here's the error:
Traceback (most recent call last):
File "elasticsearch_import_test.py", line 5, in <module>
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/client/utils.py", line 69, in _wrapped
return func(*args, params=params, **kwargs)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/client/__init__.py", line 279, in index
_make_path(index, doc_type, id), params=params, body=body)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/transport.py", line 329, in perform_request
status, headers, data = connection.perform_request(method, url, params, body, ignore=ignore, timeout=timeout)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/connection/http_urllib3.py", line 109, in perform_request
self._raise_error(response.status, raw_data)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/connection/base.py", line 108, in _raise_error
raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
elasticsearch.exceptions.RequestError: TransportError(400, u'illegal_argument_exception', u'[Raza][127.0.0.1:9300][indices:data/write/index[p]]')
Here's an example Twitter JSON file (data.json)
{
"_id": {
"$oid": "570597358c68d71c16b3b722"
},
"contributors": null,
"coordinates": null,
"created_at": "Wed Apr 06 23:09:41 +0000 2016",
"entities": {
"hashtags": [
{
"indices": [
68,
72
],
"text": "dnd"
},
{
"indices": [
73,
79
],
"text": "Nat20"
},
{
"indices": [
80,
93
],
"text": "CriticalRole"
},
{
"indices": [
94,
103
],
"text": "d20babes"
}
],
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
],
"symbols": [],
"urls": [
{
"display_url": "darkcastlecollectibles.com",
"expanded_url": "http://www.darkcastlecollectibles.com/",
"indices": [
44,
67
],
"url": "https://shortened.url/SJgFTE0o8h"
}
],
"user_mentions": [
{
"id": 2375847847,
"id_str": "2375847847",
"indices": [
3,
19
],
"name": "Zack Chini",
"screen_name": "Zenttsilverwing"
}
]
},
"extended_entities": {
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
},
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953295727009793,
"id_str": "715953295727009793",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
]
},
"favorite_count": 0,
"favorited": false,
"filter_level": "low",
"geo": null,
"id": 717851801417031680,
"id_str": "717851801417031680",
"in_reply_to_screen_name": null,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"is_quote_status": false,
"lang": "en",
"place": null,
"possibly_sensitive": false,
"retweet_count": 0,
"retweeted": false,
"retweeted_status": {
"contributors": null,
"coordinates": null,
"created_at": "Fri Apr 01 17:25:42 +0000 2016",
"entities": {
"hashtags": [
{
"indices": [
47,
51
],
"text": "dnd"
},
{
"indices": [
52,
58
],
"text": "Nat20"
},
{
"indices": [
59,
72
],
"text": "CriticalRole"
},
{
"indices": [
73,
82
],
"text": "d20babes"
}
],
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
],
"symbols": [],
"urls": [
{
"display_url": "darkcastlecollectibles.com",
"expanded_url": "http://www.darkcastlecollectibles.com/",
"indices": [
23,
46
],
"url": "https://shortened.url/SJgFTE0o8h"
}
],
"user_mentions": []
},
"extended_entities": {
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
},
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953295727009793,
"id_str": "715953295727009793",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
]
},
"favorite_count": 5,
"favorited": false,
"filter_level": "low",
"geo": null,
"id": 715953298076012545,
"id_str": "715953298076012545",
"in_reply_to_screen_name": null,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"is_quote_status": false,
"lang": "en",
"place": null,
"possibly_sensitive": false,
"retweet_count": 1,
"retweeted": false,
"source": "Twitter Web Client",
"text": "coins came in!! Thanks https://shortened.url/SJgFTE0o8h #dnd #Nat20 #CriticalRole #d20babes https://shortened.url/YQoxEuEAXV",
"truncated": false,
"user": {
"contributors_enabled": false,
"created_at": "Thu Mar 06 19:59:14 +0000 2014",
"default_profile": true,
"default_profile_image": false,
"description": "DM Geek Critter Con-man. I am here to like your art ^.^",
"favourites_count": 4990,
"follow_request_sent": null,
"followers_count": 57,
"following": null,
"friends_count": 183,
"geo_enabled": false,
"id": 2375847847,
"id_str": "2375847847",
"is_translator": false,
"lang": "en",
"listed_count": 7,
"location": "Flower Mound, TX",
"name": "Zack Chini",
"notifications": null,
"profile_background_color": "C0DEED",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_tile": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2375847847/1430928759",
"profile_image_url": "http://pbs.twimg.com/profile_images/708816622358663168/mNF4Ysr5_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/708816622358663168/mNF4Ysr5_normal.jpg",
"profile_link_color": "0084B4",
"profile_sidebar_border_color": "C0DEED",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"protected": false,
"screen_name": "Zenttsilverwing",
"statuses_count": 551,
"time_zone": null,
"url": null,
"utc_offset": null,
"verified": false
}
},
"source": "Twitter Web Client",
"text": "RT #Zenttsilverwing: coins came in!! Thanks https://shortened.url/SJgFTE0o8h #dnd #Nat20 #CriticalRole #d20babes https://shortened.url/YQoxEuEAXV",
"timestamp_ms": "1459984181156",
"truncated": false,
"user": {
"contributors_enabled": false,
"created_at": "Tue Feb 10 04:31:18 +0000 2009",
"default_profile": false,
"default_profile_image": false,
"description": "I use Twitter to primarily retweet Critter artwork of Critical Role and their own creations. I maintain a list of all the Critter artists I've come across.",
"favourites_count": 17586,
"follow_request_sent": null,
"followers_count": 318,
"following": null,
"friends_count": 651,
"geo_enabled": true,
"id": 20491914,
"id_str": "20491914",
"is_translator": false,
"lang": "en",
"listed_count": 33,
"location": "SanDiego, CA",
"name": "UnknownOutrider",
"notifications": null,
"profile_background_color": "EDECE9",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme3/bg.gif",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme3/bg.gif",
"profile_background_tile": false,
"profile_image_url": "http://pbs.twimg.com/profile_images/224346493/cartoon_dragon_tattoo_designs_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/224346493/cartoon_dragon_tattoo_designs_normal.jpg",
"profile_link_color": "088253",
"profile_sidebar_border_color": "D3D2CF",
"profile_sidebar_fill_color": "E3E2DE",
"profile_text_color": "634047",
"profile_use_background_image": true,
"protected": false,
"screen_name": "UnknownOutrider",
"statuses_count": 12760,
"time_zone": "Pacific Time (US & Canada)",
"url": null,
"utc_offset": -25200,
"verified": false
}
}
The reason that don't work is that you are trying to index document with a field named _id which is already exist as a default field. So delete that field or change field name:
import json
from elasticsearch import Elasticsearch
es = Elasticsearch()
data = json.loads(open("data.json").read())
# data['id_'] = data['_id'] <= You can change _id as id_
del data['_id']
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)