Dataframe large JSON (Nested of Nested) - python

I need to Dataframe a json object in the following dataframe (df3):
My code is:
import pandas as pd
df =pd.read_json('fetched_tweets.json', orient='records', lines=True)
df2 = pd.DataFrame(df)
df3 = pd.DataFrame.from_dict(df2['entities'])
I also tried:
y = df3['entities'].str.lower()
it gave me :
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
etc ..
I also tried this answer by implementing the below code:
import json
import pandas as pd
df =pd.read_json('test.json', orient='records', lines=True)
df6=df[['created_at']].join(pd.DataFrame(df['entities'].
apply(json.loads).values.tolist()))
print (df6)
It did not work...
I spent so much time on this task and I not really a python expert. Please Help!
JSON Data Sample:
[{"created_at": "Wed Dec 19 22:22:50 +0000 2018", "id": 1075516836656136194, "id_str": "1075516836656136194", "text": "#Yahoo Well your about 16 months late on this bombshell: \nhttps://t.co", "display_text_range": [7, 81], "source": "Twitter Web Client", "truncated": false, "in_reply_to_status_id": 1075512357621850112, "in_reply_to_status_id_str": "1075512357621850112", "in_reply_to_user_id": 19380829, "in_reply_to_user_id_str": "19380829", "in_reply_to_screen_name": "Yahoo", "user": {"id": 1075515640302186497, "id_str": "1075515640302186497", "name": "Dave Wasilla", "screen_name": "DaveWasilla", "location": null, "url": null, "description": null, "translator_type": "none", "protected": false, "verified": false, "followers_count": 0, "friends_count": 0, "listed_count": 0, "favourites_count": 0, "statuses_count": 1, "created_at": "Wed Dec 19 22:18:05 +0000 2018", "utc_offset": null, "time_zone": null, "geo_enabled": false, "lang": "en", "contributors_enabled": false, "is_translator": false, "profile_background_color": "F5F8FA", "profile_background_image_url": "", "profile_background_image_url_https": "", "profile_background_tile": false, "profile_link_color": "1DA1F2", "profile_sidebar_border_color": "C0DEED", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": true, "profile_image_url": "http://abs.twimg.com/sticky/default_profile_images/default_profile_normal.png", "profile_image_url_https": "https://abs.twimg.com/sticky/default_profile_images/default_profile_normal.png", "default_profile": true, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "quoted_status_id": 1075238373927895040, "quoted_status_id_str": "1075238373927895040", "quoted_status": {"created_at": "Wed Dec 19 03:56:19 +0000 2018", "id": 1075238373927895040, "id_str": "1075238373927895040", "text": "#mkraju \"Turns out?\" Donald Trump Junior testified his father signed this letter of intent back in September of 201\u2026 https://t.co", "display_text_range": [8, 140], "source": "Twitter Web Client", "truncated": true, "in_reply_to_status_id": 1075236204654850048, "in_reply_to_status_id_str": "1075236204654850048", "in_reply_to_user_id": 39155029, "in_reply_to_user_id_str": "39155029", "in_reply_to_screen_name": "mkraju", "user": {"id": 18005085, "id_str": "18005085", "name": "Mollie", "screen_name": "MZHemingway", "location": "Senior Editor, #FDRLST", "url": "http://thefederalist.com", "description": "I don't share your groupthink and it's unlikely I will be bullied into sharing it. Have a great day.", "translator_type": "none", "protected": false, "verified": true, "followers_count": 215017, "friends_count": 1512, "listed_count": 2724, "favourites_count": 11096, "statuses_count": 88844, "created_at": "Tue Dec 09 22:44:12 +0000 2008", "utc_offset": null, "time_zone": null, "geo_enabled": false, "lang": "en", "contributors_enabled": false, "is_translator": false, "profile_background_color": "000000", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme17/bg.gif", "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme17/bg.gif", "profile_background_tile": false, "profile_link_color": "19CF86", "profile_sidebar_border_color": "000000", "profile_sidebar_fill_color": "000000", "profile_text_color": "000000", "profile_use_background_image": false, "profile_image_url": "http://pbs.twimg.com/profile_images/648645685244510208/HD4ui7lh_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/648645685244510208/HD4ui7lh_normal.jpg", "profile_banner_url": "https://pbs.twimg.com/profile_banners/18005085/1541047835", "default_profile": false, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "is_quote_status": false, "extended_tweet": {"full_text": "#mkraju \"Turns out?\" Donald Trump Junior testified his father signed this letter of intent back in September of 2017. Link here (https://t.co) Relevant portion of testimony shown. https://t.co", "display_text_range": [8, 190], "entities": {"hashtags": [], "urls": [{"url": "https://t.co", "expanded_url": "https://www.judiciary.senate.gov/imo/media/doc/Trump%20Jr%20Transcript_redacted.pdf", "display_url": "judiciary.senate.gov/imo/media/doc/\u2026", "indices": [129, 152]}], "user_mentions": [{"screen_name": "mkraju", "name": "Manu Raju", "id": 39155029, "id_str": "39155029", "indices": [0, 7]}], "symbols": [], "media": [{"id": 1075238230591791105, "id_str": "1075238230591791105", "indices": [191, 214], "media_url": "http://pbs.twimg.com/media/DuwDeU9X4AEFRbt.jpg", "media_url_https": "https://pbs.twimg.com/media/DuwDeU9X4AEFRbt.jpg", "url": "https://t.co", "display_url": "pic.twitter.com/VLBpb7YkEr", "expanded_url": "https://twitter.com/MZHemingway/status/1075238373927895040/photo/1", "type": "photo", "sizes": {"small": {"w": 371, "h": 680, "resize": "fit"}, "thumb": {"w": 150, "h": 150, "resize": "crop"}, "large": {"w": 532, "h": 975, "resize": "fit"}, "medium": {"w": 532, "h": 975, "resize": "fit"}}}]}, "extended_entities": {"media": [{"id": 1075238230591791105, "id_str": "1075238230591791105", "indices": [191, 214], "media_url": "http://pbs.twimg.com/media/DuwDeU9X4AEFRbt.jpg", "media_url_https": "https://pbs.twimg.com/media/DuwDeU9X4AEFRbt.jpg", "url": "https://t.co", "display_url": "pic.twitter.com/VLBpb7YkEr", "expanded_url": "https://twitter.com/MZHemingway/status/1075238373927895040/photo/1", "type": "photo", "sizes": {"small": {"w": 371, "h": 680, "resize": "fit"}, "thumb": {"w": 150, "h": 150, "resize": "crop"}, "large": {"w": 532, "h": 975, "resize": "fit"}, "medium": {"w": 532, "h": 975, "resize": "fit"}}}]}}, "quote_count": 62, "reply_count": 73, "retweet_count": 203, "favorite_count": 487, "entities": {"hashtags": [], "urls": [{"url": "https://t.co", "expanded_url": "https://twitter.com/i/web/status/1075238373927895040", "display_url": "twitter.com/i/web/status/1\u2026", "indices": [117, 140]}], "user_mentions": [{"screen_name": "mkraju", "name": "Manu Raju", "id": 39155029, "id_str": "39155029", "indices": [0, 7]}], "symbols": []}, "favorited": false, "retweeted": false, "possibly_sensitive": false, "filter_level": "low", "lang": "en"}, "quoted_status_permalink": {"url": "https://t.co", "expanded": "https://twitter.com/MZHemingway/status/1075238373927895040", "display": "twitter.com/MZHemingway/st\u2026"}, "is_quote_status": true, "quote_count": 0, "reply_count": 0, "retweet_count": 0, "favorite_count": 0, "entities": {"hashtags": [], "urls": [{"url": "https://t.co", "expanded_url": "https://twitter.com/MZHemingway/status/1075238373927895040", "display_url": "twitter.com/MZHemingway/st\u2026", "indices": [58, 81]}], "user_mentions": [{"screen_name": "Yahoo", "name": "Yahoo", "id": 19380829, "id_str": "19380829", "indices": [0, 6]}], "symbols": []}, "favorited": false, "retweeted": false, "possibly_sensitive": false, "filter_level": "low", "lang": "en", "timestamp_ms": "1545258170652"}, {another JSON object},{another object}]
**EDIT**
I was able to divide the entities string using this code:
import json
import pandas as pd
p = 'test.json'
with open(p, 'r') as f:
doc = json.loads(f.read())
df=pd.DataFrame.from_dict(doc[0]['entities']['urls'])
But it is giving me only the first row, I need to get all rows (I have to mention index though or it will give me an error)

Can you try this ;
import json
from pandas.io.json import json_normalize
d = []
with open('json/b.json') as json_data:
d = json.load(json_data)
df3 = json_normalize(data=d)

Because I have nested of nested values I was able to fix it this way:
import pandas as pd
import json
from pandas.io.json import json_normalize
with open('test.json') as data_file:
data = json.loads(data_file.read())
cols = ['entities', 'urls', 'url', 'expanded_url', 'display_url']
rows = []
for data in data:
data_id = data['entities']
criteria = data['entities']['urls']
for d in criteria:
rows.append([data_id, criteria.index(d)+1, *list(d.values())[:-1]])
df = pd.DataFrame(rows, columns=cols)
And this is how it became:
This answer is my reference

import pandas as pd
import json
from pandas.io.json import json_normalize
with open('test.json') as data_file:
data = json.loads(data_file.read())
cols = ['entities', 'urls', 'url', 'expanded_url', 'display_url']
rows = []
for data in data:
data_id = data['entities']
# new json object added
data_idj = json.loads(data_id)
criteria = data_idj['entities']['urls']
for d in criteria:
rows.append([data_id, criteria.index(d)+1, *list(d.values())[:-1]])
df = pd.DataFrame(rows, columns=cols)

Related

How can i use json normalize to get down to multiple levels?

I have a large json file that needs to be parsed into a dataframe. Here is an example of the data structure...
{
"genericTags": [
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"event",
"webinar"
],
"name": "Language",
"type": null,
"values": [
{
"value": "Chinese",
"hidden": false,
"deleted": false
},
{
"value": "English",
"hidden": false,
"deleted": false
},
{
"value": "French",
"hidden": false,
"deleted": false
},
{
"value": "Japanese",
"hidden": false,
"deleted": false
},
{
"value": "Portuguese",
"hidden": false,
"deleted": false
},
{
"value": "Spanish",
"hidden": false,
"deleted": false
}
],
"description": null
},
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"webinar",
"event"
],
"name": "Product",
"type": null,
"values": [
{
"value": "P1",
"hidden": false,
"deleted": false
},
{
"value": "P2",
"hidden": false,
"deleted": false
},
{
"value": "P3",
"hidden": false,
"deleted": false
},
{
"value": "P4",
"hidden": false,
"deleted": false
},
{
"value": "P5",
"hidden": false,
"deleted": false
},
{
"value": "P6",
"hidden": false,
"deleted": false
},
{
"value": "P7",
"hidden": false,
"deleted": false
},
{
"value": "P8",
"hidden": false,
"deleted": false
},
{
"value": "P9",
"hidden": false,
"deleted": false
},
{
"value": "P10",
"hidden": false,
"deleted": false
},
{
"value": "P11",
"hidden": false,
"deleted": false
},
{
"value": "P12",
"hidden": false,
"deleted": false
},
{
"value": "Multiple",
"hidden": false,
"deleted": false
},
{
"value": "None",
"hidden": false,
"deleted": false
}
],
"description": null
},
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"event",
"webinar"
],
"name": "Region",
"type": null,
"values": [
{
"value": "Americas",
"hidden": false,
"deleted": false
},
{
"value": "APJ",
"hidden": false,
"deleted": false
},
{
"value": "EMEA",
"hidden": false,
"deleted": false
},
{
"value": "WW",
"hidden": false,
"deleted": false
}
],
"description": null
}
],
"channelTags": [
{
"value": "Advertising",
"progression": {
"name": "Advertising",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Association",
"progression": {
"name": "Association",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Contact Request",
"progression": {
"name": "Contact Request",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Sent Email",
"description": null,
"typeId": null,
"order": 20,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 30,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Content",
"progression": {
"name": "Content",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Content Syndication",
"progression": {
"name": "Content Syndication",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Direct Mail",
"progression": {
"name": "Direct Mail",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Display",
"progression": {
"name": "Display",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Email",
"progression": {
"name": "Email",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Email Send",
"progression": {
"name": "Email Send",
"type": "email_batch",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Event",
"progression": {
"name": "Event",
"type": "event",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": false,
"isUsed": false
},
{
"name": "Invited",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
},
{
"name": "Registered",
"type": "Registered",
"typeName": "registered",
"description": null,
"typeId": 5,
"order": 20,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
},
{
"name": "Waitlisted",
"type": "Waitlisted",
"typeName": "wait_listed",
"description": null,
"typeId": 4,
"order": 20,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": false
},
{
"name": "No Show",
"description": null,
"typeId": null,
"order": 30,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
},
{
"name": "Attended",
"type": "Attended",
"typeName": "attended",
"description": null,
"typeId": 7,
"order": 40,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
},
{
"name": "Attended Session",
"description": null,
"typeId": null,
"order": 50,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": false,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Intent Signal",
"progression": {
"name": "Intent Signal",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Interactive Chat",
"progression": {
"name": "Interactive Chat",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "List Import",
"progression": {
"name": "List Import",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Nurture",
"progression": {
"name": "Nurture",
"type": "nurture",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Converted",
"description": null,
"typeId": null,
"order": 30,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": false
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Nurture Cast",
"progression": {
"name": "Nurture Cast",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Excluded",
"description": null,
"typeId": null,
"order": 20,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Converted",
"description": null,
"typeId": null,
"order": 30,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 30,
"success": true,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
},
{
"value": "Operational",
"progression": {
"name": "Operational",
"type": "program",
"description": null,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": null,
"typeId": 1,
"order": 0,
"success": false,
"hidden": false,
"system": true,
"isEventCapSet": null,
"isUsed": false
},
{
"name": "Member",
"description": null,
"typeId": null,
"order": 10,
"success": false,
"hidden": false,
"system": false,
"isEventCapSet": null,
"isUsed": true
},
{
"name": "Engaged",
"description": null,
"typeId": null,
"order": 20,
"success": true,
"hidden": true,
"system": false,
"isEventCapSet": null,
"isUsed": true
}
]
},
"hidden": false,
"deleted": false
}
]
}
What i am needing is specifically the channelTags array which i have currently broken out like this...
import pandas as pd
import json
# open json file
with open('file_name.json') as json_file:
data = json.load(json_file)
# turn json into df and get channelTags array
df_channel = pd.DataFrame.from_dict(data['channelTags'])
This is resulting in the following data structure...
value
hidden
deleted
progression.name
progression.type
progression.description
progression.steps
Advertising
False
False
Advertising
program
null
[{'name': 'Not in Program', 'type': 'Not in Program', 'typeName': 'not_in_program', 'description': None, 'typeId': 1, 'order': 0, 'success': False, 'hidden': False, 'system': True, 'isEventCapSet': None, 'isUsed': False}, {'name': 'Member', 'description': None, 'typeId': None, 'order': 10, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': None, 'isUsed': True}, {'name': 'Engaged', 'description': None, 'typeId': None, 'order': 20, 'success': True, 'hidden': False, 'system': False, 'isEventCapSet': None, 'isUsed': True}]
Trade Show
False
False
Trade Show
event
null
[{'name': 'Not in Program', 'type': 'Not in Program', 'typeName': 'not_in_program', 'description': None, 'typeId': 1, 'order': 0, 'success': False, 'hidden': False, 'system': True, 'isEventCapSet': False, 'isUsed': False}, {'name': 'Invited', 'description': None, 'typeId': None, 'order': 10, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}, {'name': 'Registered', 'type': 'Registered', 'typeName': 'registered', 'description': None, 'typeId': 5, 'order': 20, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}, {'name': 'Waitlisted', 'type': 'Waitlisted', 'typeName': 'wait_listed', 'description': None, 'typeId': 4, 'order': 20, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': False}, {'name': 'No Show', 'description': None, 'typeId': None, 'order': 30, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': False}, {'name': 'Attended Show', 'type': 'Attended', 'typeName': 'attended', 'description': None, 'typeId': 7, 'order': 40, 'success': False, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}, {'name': 'Visited Booth', 'description': None, 'typeId': None, 'order': 50, 'success': True, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}, {'name': 'Attended Hosted Event', 'description': None, 'typeId': None, 'order': 60, 'success': True, 'hidden': False, 'system': False, 'isEventCapSet': False, 'isUsed': True}]
I still need to be able to go one step further down to parse out the progression.steps. I have accomplished this somewhat by using the following...
df_normal = pd.json_normalize(df_channel['progression'], 'steps', errors='ignore')
This code results in the following...
name
type
typeName
description
typeId
order
success
hidden
system
isEventCapSet
IsUsed
Not in Program
Not in Program
Not in Program
null
1
0
FALSE
FALSE
TRUE
null
FALSE
Member
null
null
null
null
10
FALSE
FALSE
TRUE
null
FALSE
Engaged
null
null
null
null
20
FALSE
FALSE
TRUE
null
FALSE
What i am missing is the ability to combine these two dataframes as i do not have a key field, the end result should be the df_channel dataframe and the df_normal dataframe combined with a left join on a df_channel['value'] entry.
What am i missing here?
starting from a sample dictionary data as below:
data = {
"genericTags": [
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"event",
"webinar"
],
"name": "Language",
"type": None,
"values": [
{
"value": "Chinese",
"hidden": False,
"deleted": False
},
{
"value": "English",
"hidden": False,
"deleted": False
},
{
"value": "French",
"hidden": False,
"deleted": False
},
{
"value": "Japanese",
"hidden": False,
"deleted": False
},
{
"value": "Portuguese",
"hidden": False,
"deleted": False
},
{
"value": "Spanish",
"hidden": False,
"deleted": False
}
],
"description": None
},
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"webinar",
"event"
],
"name": "Product",
"type": None,
"values": [
{
"value": "P1",
"hidden": False,
"deleted": False
},
{
"value": "P2",
"hidden": False,
"deleted": False
},
{
"value": "P3",
"hidden": False,
"deleted": False
},
{
"value": "P4",
"hidden": False,
"deleted": False
},
{
"value": "P5",
"hidden": False,
"deleted": False
},
{
"value": "P6",
"hidden": False,
"deleted": False
},
{
"value": "P7",
"hidden": False,
"deleted": False
},
{
"value": "P8",
"hidden": False,
"deleted": False
},
{
"value": "P9",
"hidden": False,
"deleted": False
},
{
"value": "P10",
"hidden": False,
"deleted": False
},
{
"value": "P11",
"hidden": False,
"deleted": False
},
{
"value": "P12",
"hidden": False,
"deleted": False
},
{
"value": "Multiple",
"hidden": False,
"deleted": False
},
{
"value": "None",
"hidden": False,
"deleted": False
}
],
"description": None
},
{
"appliesTo": [
"program",
"email_batch",
"nurture",
"event",
"webinar"
],
"name": "Region",
"type": None,
"values": [
{
"value": "Americas",
"hidden": False,
"deleted": False
},
{
"value": "APJ",
"hidden": False,
"deleted": False
},
{
"value": "EMEA",
"hidden": False,
"deleted": False
},
{
"value": "WW",
"hidden": False,
"deleted": False
}
],
"description": None
}
],
"channelTags": [
{
"value": "Advertising",
"progression": {
"name": "Advertising",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Association",
"progression": {
"name": "Association",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Contact Request",
"progression": {
"name": "Contact Request",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Sent Email",
"description": None,
"typeId": None,
"order": 20,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 30,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Content",
"progression": {
"name": "Content",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Content Syndication",
"progression": {
"name": "Content Syndication",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Direct Mail",
"progression": {
"name": "Direct Mail",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Display",
"progression": {
"name": "Display",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Email",
"progression": {
"name": "Email",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Email Send",
"progression": {
"name": "Email Send",
"type": "email_batch",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Event",
"progression": {
"name": "Event",
"type": "event",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": False,
"isUsed": False
},
{
"name": "Invited",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
},
{
"name": "Registered",
"type": "Registered",
"typeName": "registered",
"description": None,
"typeId": 5,
"order": 20,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
},
{
"name": "Waitlisted",
"type": "Waitlisted",
"typeName": "wait_listed",
"description": None,
"typeId": 4,
"order": 20,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": False
},
{
"name": "No Show",
"description": None,
"typeId": None,
"order": 30,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
},
{
"name": "Attended",
"type": "Attended",
"typeName": "attended",
"description": None,
"typeId": 7,
"order": 40,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
},
{
"name": "Attended Session",
"description": None,
"typeId": None,
"order": 50,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": False,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Intent Signal",
"progression": {
"name": "Intent Signal",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Interactive Chat",
"progression": {
"name": "Interactive Chat",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "List Import",
"progression": {
"name": "List Import",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Nurture",
"progression": {
"name": "Nurture",
"type": "nurture",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Converted",
"description": None,
"typeId": None,
"order": 30,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": False
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Nurture Cast",
"progression": {
"name": "Nurture Cast",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Excluded",
"description": None,
"typeId": None,
"order": 20,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Converted",
"description": None,
"typeId": None,
"order": 30,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 30,
"success": True,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
},
{
"value": "Operational",
"progression": {
"name": "Operational",
"type": "program",
"description": None,
"steps": [
{
"name": "Not in Program",
"type": "Not in Program",
"typeName": "not_in_program",
"description": None,
"typeId": 1,
"order": 0,
"success": False,
"hidden": False,
"system": True,
"isEventCapSet": None,
"isUsed": False
},
{
"name": "Member",
"description": None,
"typeId": None,
"order": 10,
"success": False,
"hidden": False,
"system": False,
"isEventCapSet": None,
"isUsed": True
},
{
"name": "Engaged",
"description": None,
"typeId": None,
"order": 20,
"success": True,
"hidden": True,
"system": False,
"isEventCapSet": None,
"isUsed": True
}
]
},
"hidden": False,
"deleted": False
}
]
}
try:
df = pd.json_normalize(data['channelTags'])
df = df.merge(pd.concat(df['progression.steps'].apply(pd.DataFrame).tolist(), keys=df.index), how='left')
df=pd.DataFrame(data) #all data. there are two columns: genericTags and channelTags
import json
to_json = json.loads(df.to_json(orient="records"))
df_v1 = pd.json_normalize(to_json)
mask = df_v1.explode('genericTags.values', ignore_index=True)
final=mask.join(pd.DataFrame([*mask.pop('genericTags.values')], index=mask.index))
final.columns=final.columns + '_' #for second step. because there are columns with the same name.
final_mask = final.explode('channelTags.progression.steps_', ignore_index=True)
final=final_mask.join(pd.DataFrame([*final_mask.pop('channelTags.progression.steps_')], index=final_mask.index))

Set Default Name for JSON Key in Python

I'm trying to search a JSON from an API. I'm trying to get names and IDs from the JSON. Some keys inside of the JSON do not have a name. Is there any way to set a default? Or is there any other way to delete the JSON values without names?
Code:
import requests
from pprint import pprint
url = "https://<SCHOOL CANVAS PAGE>.instructure.com/api/v1/courses"
headers = {"Authorization": "Bearer <AUTHORIZATION KEY>"}
r = requests.get(url, headers=headers).json()
pprint(r)
# for k in r:
# print(f'Name: {k["name"]}')
# --> If no name it returns "print(f'Name: {k["name"]}') | KeyError: 'name'"
# for k in r:
# print(f'ID: {k["id"]}')
Api Output:
[
{
"id": 3941,
"root_account_id": 1,
"account_id": 9,
"enrollment_term_id": 93,
"name": "7th Period",
"uuid": "8IGbnEvvHWK9beNGlI1uwGWLjv3R1hEIjExkY1kc",
"start_at": "2019-08-07T17:55:35Z",
"grading_standard_id": null,
"is_public": null,
"created_at": "2019-07-30T21:48:19Z",
"course_code": "Social Studies Advanced 08 - Fickley",
"default_view": "modules",
"license": null,
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_8IGbnEvvHWK9beNGlI1uwGWLjv3R1hEIjExkY1kc.ics"
},
"time_zone": "America/New_York",
"original_name": "Social Studies Advanced 08 - Fickley",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false,
"overridden_course_visibility": ""
},
{
"id": 10397,
"root_account_id": 1,
"account_id": 10,
"name": "Algebra 2 H - Scurlock",
"enrollment_term_id": 356,
"uuid": "W6nOG0uwQGCbbgnIzHdJxT0fxp99wZUhLQxe6SFk",
"start_at": "2021-07-22T13:32:44Z",
"grading_standard_id": null,
"is_public": null,
"created_at": "2021-07-19T16:05:23Z",
"course_code": "Algebra 2 H - Scurlock",
"default_view": "modules",
"license": null,
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_W6nOG0uwQGCbbgnIzHdJxT0fxp99wZUhLQxe6SFk.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false
},
{
"id": 5844,
"root_account_id": 1,
"account_id": 10,
"name": "Chemistry 1 H - Blackburn-sem2",
"enrollment_term_id": 221,
"uuid": "pNc50PmQxqa1G42hzUFLjgR3JaS4TOGgJ9i0nOko",
"start_at": "2021-01-04T18:03:47Z",
"grading_standard_id": null,
"is_public": false,
"created_at": "2020-07-22T17:33:31Z",
"course_code": "Chemistry 1 H - Blackburn",
"default_view": "modules",
"license": "private",
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_pNc50PmQxqa1G42hzUFLjgR3JaS4TOGgJ9i0nOko.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false,
"overridden_course_visibility": ""
},
{
"id": 10420,
"root_account_id": 1,
"account_id": 10,
"name": "CLIMB 10th Grade - Vacheresse",
"enrollment_term_id": 352,
"uuid": "rHFv0zDQQ0MqaLh5ls9Ty2NB65zcL7ZGO8sYhuHk",
"start_at": "2021-08-02T15:36:49Z",
"grading_standard_id": null,
"is_public": null,
"created_at": "2021-07-19T16:05:24Z",
"course_code": "CLIMB 10th Grade - Vacheresse",
"default_view": "feed",
"license": null,
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_rHFv0zDQQ0MqaLh5ls9Ty2NB65zcL7ZGO8sYhuHk.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false
},
{
"id": 5378,
"access_restricted_by_date": true
},
{
"id": 5588,
"root_account_id": 1,
"account_id": 10,
"name": "English 1 H - Semester 2 Colbaugh",
"enrollment_term_id": 221,
"uuid": "2KbshUnWT0lFqekusEtVhh5LBLhldQN4OqdoKN5E",
"start_at": "2021-01-04T12:41:16Z",
"grading_standard_id": null,
"is_public": false,
"created_at": "2020-07-22T17:33:22Z",
"course_code": "English 1 H-S2 - Colbaugh",
"default_view": "modules",
"license": "private",
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_2KbshUnWT0lFqekusEtVhh5LBLhldQN4OqdoKN5E.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false,
"overridden_course_visibility": ""
},
{
"id": 10525,
"root_account_id": 1,
"account_id": 10,
"name": "English 2 H - Poulakis",
"enrollment_term_id": 353,
"uuid": "U2BRAMor6PFOP2h8LUMdlnVFbtMbSFkHBVGZgS2w",
"start_at": "2021-08-02T18:52:26Z",
"grading_standard_id": null,
"is_public": false,
"created_at": "2021-07-19T16:05:27Z",
"course_code": "English 2 H - Poulakis",
"default_view": "modules",
"license": "private",
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_U2BRAMor6PFOP2h8LUMdlnVFbtMbSFkHBVGZgS2w.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false,
"overridden_course_visibility": ""
},
{
"id": 10708,
"root_account_id": 1,
"account_id": 10,
"name": "Fall World History",
"enrollment_term_id": 353,
"uuid": "yGqeV55bAwYP9rQyjmGGE3QO1dYm6c2pGCT9eUQC",
"start_at": "2021-08-03T12:07:47Z",
"grading_standard_id": null,
"is_public": false,
"created_at": "2021-07-19T16:05:32Z",
"course_code": "World History and Geography - LaMagna",
"default_view": "modules",
"license": "private",
"grade_passback_setting": "nightly_sync",
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_yGqeV55bAwYP9rQyjmGGE3QO1dYm6c2pGCT9eUQC.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
},
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false,
"overridden_course_visibility": ""
},
{
"id": 5480,
"root_account_id": 1,
"account_id": 10,
"name": "Geometry H - Storey",
"enrollment_term_id": 224,
"uuid": "0hUvYrHEV8wGWFbtqejTMVDMIQtew6lVLZBKl3py",
"start_at": "2020-08-11T14:06:28Z",
"grading_standard_id": null,
"is_public": null,
"created_at": "2020-07-22T17:33:20Z",
"course_code": "Geometry H - Storey",
"default_view": "modules",
"license": null,
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_0hUvYrHEV8wGWFbtqejTMVDMIQtew6lVLZBKl3py.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false
},
{
"id": 5347,
"root_account_id": 1,
"account_id": 10,
"name": "Human Geography AP - Brose",
"enrollment_term_id": 219,
"uuid": "BNUgjVCnf3SEaa5SEn14RdNXKJAy0RcSjoKbvO7o",
"start_at": "2020-08-10T01:58:54Z",
"grading_standard_id": null,
"is_public": null,
"created_at": "2020-07-22T17:33:18Z",
"course_code": "Human Geography AP - Brose",
"default_view": "modules",
"license": null,
"grade_passback_setting": null,
"end_at": null,
"public_syllabus": false,
"public_syllabus_to_auth": false,
"storage_quota_mb": 500,
"is_public_to_auth_users": false,
"homeroom_course": false,
"course_color": null,
"friendly_name": null,
"apply_assignment_group_weights": false,
"calendar": {
"ics": "https://clevelandschools.instructure.com/feeds/calendars/course_BNUgjVCnf3SEaa5SEn14RdNXKJAy0RcSjoKbvO7o.ics"
},
"time_zone": "America/New_York",
"blueprint": false,
"template": false,
"enrollments": [
{
"type": "student",
"role": "StudentEnrollment",
"role_id": 3,
"user_id": 1546,
"enrollment_state": "active",
"limit_privileges_to_course_section": false
}
],
"hide_final_grades": false,
"workflow_state": "available",
"restrict_enrollments_to_course_dates": false,
"overridden_course_visibility": ""
}
]
For the key with ID 5378, it has no "name" attribute, and I get the error:
KeyError: 'name'
Use .get() to specify a default if it doesn't exist:
print(f'Name: {k.get("name","<na>")}')
Standalone example:
import json
text = '''\
[
{
"id": 5378,
"access_restricted_by_date": true
},
{
"id": 9999,
"name": "whatever",
"access_restricted_by_date": true
}
]
'''
data = json.loads(text)
for item in data:
print(item['id'], item.get('name','-na-'))
Output:
5378 -na-
9999 whatever

Split Json Data by certain string values (Python)

I want to split incidents by "incidentType" values for python. It always have 5 of these values: period, injuryTime, goal, card and substitution.
Json File
{
"incidents": [
{
"text": "FT",
"homeScore": 2,
"awayScore": 1,
"isLive": false,
"time": 90,
"addedTime": 999,
"incidentType": "period"
},
{
"length": 4,
"time": 90,
"addedTime": 0,
"incidentType": "injuryTime"
},
{
"homeScore": 2,
"awayScore": 1,
"player": {
"name": "Mostafa Mohamed",
"firstName": "",
"lastName": "",
"slug": "mostafa-mohamed",
"shortName": "M. Mohamed",
"position": "F",
"userCount": 3949,
"id": 873551
},
"id": 141786584,
"time": 89,
"isHome": true,
"incidentClass": "penalty",
"incidentType": "goal"
},
{
"player": {
"name": "Duško Tošić",
"slug": "dusko-tosic",
"shortName": "D. Tošić",
"position": "D",
"userCount": 215,
"id": 14557
},
"playerName": "Duško Tošić",
"reason": "Foul",
"id": 119728583,
"time": 85,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "Younès Belhanda",
"slug": "younes-belhanda",
"shortName": "Y. Belhanda",
"position": "M",
"userCount": 2165,
"id": 72999
},
"playerOut": {
"name": "Martin Linnes",
"slug": "martin-linnes",
"shortName": "M. Linnes",
"position": "D",
"userCount": 339,
"id": 109569
},
"id": 120059400,
"time": 82,
"isHome": true,
"incidentType": "substitution"
},
{
"player": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerName": "Kevin Varga",
"reason": "Foul",
"id": 119728582,
"time": 82,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "DeAndre Yedlin",
"slug": "deandre-yedlin",
"shortName": "D. Yedlin",
"position": "D",
"userCount": 702,
"id": 314040
},
"playerOut": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 120059399,
"time": 77,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Ryan Donk",
"slug": "ryan-donk",
"shortName": "R. Donk",
"position": "D",
"userCount": 489,
"id": 14900
},
"playerOut": {
"name": "Ryan Babel",
"slug": "ryan-babel",
"shortName": "R. Babel",
"position": "F",
"userCount": 1577,
"id": 1876
},
"id": 120059397,
"time": 72,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Emre Akbaba",
"slug": "emre-akbaba",
"shortName": "E. Akbaba",
"position": "M",
"userCount": 604,
"id": 343527
},
"playerOut": {
"name": "Gedson Fernandes",
"slug": "fernandes-gedson",
"shortName": "G. Fernandes",
"position": "M",
"userCount": 3030,
"id": 862055
},
"id": 120059396,
"time": 71,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Henry Onyekuru",
"slug": "henry-onyekuru",
"shortName": "H. Onyekuru",
"position": "M",
"userCount": 1474,
"id": 809220
},
"playerOut": {
"name": "Emre Kılınç",
"slug": "emre-kilinc",
"shortName": "E. Kılınç",
"position": "M",
"userCount": 526,
"id": 202032
},
"id": 120059398,
"time": 71,
"isHome": true,
"incidentType": "substitution"
},
{
"player": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"playerName": "Haris Hajradinović",
"reason": "Foul",
"id": 119728581,
"time": 71,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 1,
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"assist1": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"id": 141786585,
"time": 51,
"isHome": false,
"incidentClass": "regular",
"incidentType": "goal"
},
{
"playerIn": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerOut": {
"name": "Gilbert Koomson",
"slug": "gilbert-koomson",
"shortName": "G. Koomson",
"position": "F",
"userCount": 76,
"id": 341107
},
"id": 120059401,
"time": 46,
"isHome": false,
"incidentType": "substitution"
},
{
"text": "HT",
"homeScore": 1,
"awayScore": 0,
"isLive": false,
"time": 45,
"addedTime": 999,
"incidentType": "period"
},
{
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"playerName": "Isaac Kiese Thelin",
"reason": "Foul",
"id": 119728580,
"time": 15,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 0,
"player": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 141786583,
"time": 9,
"isHome": true,
"incidentClass": "regular",
"incidentType": "goal"
}
]
}
ABC = {
"incidents": [
{
"text": "FT",
"homeScore": 2,
"awayScore": 1,
"isLive": False,
"time": 90,
"addedTime": 999,
"incidentType": "period"
},
{
"length": 4,
"time": 90,
"addedTime": 0,
"incidentType": "injuryTime"
},
{
"homeScore": 2,
"awayScore": 1,
"player": {
"name": "Mostafa Mohamed",
"firstName": "",
"lastName": "",
"slug": "mostafa-mohamed",
"shortName": "M. Mohamed",
"position": "F",
"userCount": 3949,
"id": 873551
},
"id": 141786584,
"time": 89,
"isHome": True,
"incidentClass": "penalty",
"incidentType": "goal"
},
{
"player": {
"name": "Duško Tošić",
"slug": "dusko-tosic",
"shortName": "D. Tošić",
"position": "D",
"userCount": 215,
"id": 14557
},
"playerName": "Duško Tošić",
"reason": "Foul",
"id": 119728583,
"time": 85,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "Younès Belhanda",
"slug": "younes-belhanda",
"shortName": "Y. Belhanda",
"position": "M",
"userCount": 2165,
"id": 72999
},
"playerOut": {
"name": "Martin Linnes",
"slug": "martin-linnes",
"shortName": "M. Linnes",
"position": "D",
"userCount": 339,
"id": 109569
},
"id": 120059400,
"time": 82,
"isHome": True,
"incidentType": "substitution"
},
{
"player": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerName": "Kevin Varga",
"reason": "Foul",
"id": 119728582,
"time": 82,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "DeAndre Yedlin",
"slug": "deandre-yedlin",
"shortName": "D. Yedlin",
"position": "D",
"userCount": 702,
"id": 314040
},
"playerOut": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 120059399,
"time": 77,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Ryan Donk",
"slug": "ryan-donk",
"shortName": "R. Donk",
"position": "D",
"userCount": 489,
"id": 14900
},
"playerOut": {
"name": "Ryan Babel",
"slug": "ryan-babel",
"shortName": "R. Babel",
"position": "F",
"userCount": 1577,
"id": 1876
},
"id": 120059397,
"time": 72,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Emre Akbaba",
"slug": "emre-akbaba",
"shortName": "E. Akbaba",
"position": "M",
"userCount": 604,
"id": 343527
},
"playerOut": {
"name": "Gedson Fernandes",
"slug": "fernandes-gedson",
"shortName": "G. Fernandes",
"position": "M",
"userCount": 3030,
"id": 862055
},
"id": 120059396,
"time": 71,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Henry Onyekuru",
"slug": "henry-onyekuru",
"shortName": "H. Onyekuru",
"position": "M",
"userCount": 1474,
"id": 809220
},
"playerOut": {
"name": "Emre Kılınç",
"slug": "emre-kilinc",
"shortName": "E. Kılınç",
"position": "M",
"userCount": 526,
"id": 202032
},
"id": 120059398,
"time": 71,
"isHome": True,
"incidentType": "substitution"
},
{
"player": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"playerName": "Haris Hajradinović",
"reason": "Foul",
"id": 119728581,
"time": 71,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 1,
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"assist1": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"id": 141786585,
"time": 51,
"isHome": False,
"incidentClass": "regular",
"incidentType": "goal"
},
{
"playerIn": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerOut": {
"name": "Gilbert Koomson",
"slug": "gilbert-koomson",
"shortName": "G. Koomson",
"position": "F",
"userCount": 76,
"id": 341107
},
"id": 120059401,
"time": 46,
"isHome": False,
"incidentType": "substitution"
},
{
"text": "HT",
"homeScore": 1,
"awayScore": 0,
"isLive": False,
"time": 45,
"addedTime": 999,
"incidentType": "period"
},
{
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"playerName": "Isaac Kiese Thelin",
"reason": "Foul",
"id": 119728580,
"time": 15,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 0,
"player": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 141786583,
"time": 9,
"isHome": True,
"incidentClass": "regular",
"incidentType": "goal"
}
]
}
First, create a dictionary to hold all distinct incidentType. Then iterate through incidents and check if whether incidentType exists in the dictionary or not. If it exists? Append. if not, create a new key : value pair
result = {}
for js in ABC["incidents"]:
icdType = js["incidentType"]
if icdType in result:
result[icdType].append(js)
else:
result[icdType] = [js]
for key,val in result.items():
print(key, ":", val, "\n")

How to merge two json file in Python?

I'm trying to do a python script that merge 2 json files for example:
First file: test.json
[
{
"user_id": "273631610",
"item_id": "0829482",
"rating": "7"
},
{
"user_id": "40688800",
"item_id": "1343092",
"rating": "8"
},
{
"user_id": "395680865",
"item_id": "1453405",
"rating": "8"
}
]
Second file: test_userz.json
[
{"contributors": null, "truncated": false, "text": "", "in_reply_to_status_id": null, "id": 421040281296052225, "favorite_count": 0, "source": "IMDb Movies & TV on iOS", "retweeted": false, "coordinates": null, "entities": {"symbols": [], "user_mentions": [], "hashtags": [{"indices": [23, 28], "text": "IMDb"}], "urls": [{"url": "5E1fLOxB", "indices": [29, 51], "expanded_url": "http://www.imdb.com/title/tt0829482", "display_url": "imdb.com/title/tt0829482"}]}, "in_reply_to_screen_name": null, "id_str": "421040281296052225", "retweet_count": 0, "in_reply_to_user_id": null, "favorited": false, "user": {"follow_request_sent": false, "profile_use_background_image": true, "id": 273631610, "verified": false, "profile_text_color": "333333", "profile_image_url_https": "https://pbs.twimg.com/profile_images/378800000723050633/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg", "profile_sidebar_fill_color": "DDEEF6", "is_translator": false, "geo_enabled": true, "entities": {"description": {"urls": []}}, "followers_count": 330, "protected": false, "location": "southend on sea", "default_profile_image": false, "id_str": "273631610", "lang": "en", "utc_offset": null, "statuses_count": 897, "description": "", "friends_count": 973, "profile_link_color": "0084B4", "profile_image_url": "http://pbs.twimg.com/profile_images/378800000723050633/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg", "notifications": false, "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_color": "C0DEED", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", "name": "Ben Morris", "is_translation_enabled": false, "profile_background_tile": false, "favourites_count": 12, "screen_name": "benyere", "url": null, "created_at": "Mon Mar 28 21:31:52 +0000 2011", "contributors_enabled": false, "time_zone": null, "profile_sidebar_border_color": "C0DEED", "default_profile": true, "following": false, "listed_count": 0}, "geo": null, "in_reply_to_user_id_str": null, "possibly_sensitive": false, "lang": "en", "created_at": "Wed Jan 08 22:06:40 +0000 2014", "in_reply_to_status_id_str": null, "place": null},
{"contributors": null, "truncated": false, "text": "", "in_reply_to_status_id": null, "id": 421040870931320833, "favorite_count": 0, "source": "iOS", "retweeted": false, "coordinates": null, "entities": {"symbols": [], "user_mentions": [], "hashtags": [{"indices": [31, 36], "text": "IMDb"}], "urls": [{"url": "dTFeexXDk", "indices": [37, 59], "expanded_url": "http://www.imdb.com/title/tt1343092", "display_url": "imdb.com/title/tt1343092"}]}, "in_reply_to_screen_name": null, "id_str": "421040870931320833", "retweet_count": 0, "in_reply_to_user_id": null, "favorited": false, "user": {"follow_request_sent": false, "profile_use_background_image": false, "id": 40688800, "verified": false, "profile_text_color": "333333", "profile_image_url_https": "https://pbs.twimg.com/profile_images/378800000807375392/2ee9bbee70e09ac097f8415794877e7c_normal.jpeg", "profile_sidebar_fill_color": "FFFFFF", "is_translator": false, "geo_enabled": true, "entities": {"description": {"urls": []}}, "followers_count": 230, "protected": false, "location": "Alexandria", "default_profile_image": false, "id_str": "40688800", "lang": "en", "utc_offset": 7200, "statuses_count": 8914, "description": "#TedxAlexandriaU head. resp of #emshi_fe_7artak in #alex Day dreamer. #medstudent by accident. I speak sarcasm.", "friends_count": 722, "profile_link_color": "0099CC", "profile_image_url": "http://pbs.twimg.com/profile_images/378800000807375392/2ee9bbee70e09ac097f8415794877e7c_normal.jpeg", "notifications": false, "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/778453690/27087117198cdf1d18bb86a36140de62.jpeg", "profile_background_color": "FFF04D", "profile_banner_url": "https://pbs.twimg.com/profile_banners/40688800/1378766815", "profile_background_image_url": "http://a0.twimg.com/profile_background_images/778453690/27087117198cdf1d18bb86a36140de62.jpeg", "name": "NOUR", "is_translation_enabled": false, "profile_background_tile": true, "favourites_count": 324, "screen_name": "Nour_Ayman", "url": null, "created_at": "Sun May 17 16:38:59 +0000 2009", "contributors_enabled": false, "time_zone": "Cairo", "profile_sidebar_border_color": "FFFFFF", "default_profile": false, "following": false, "listed_count": 2}, "geo": null, "in_reply_to_user_id_str": null, "possibly_sensitive": false, "lang": "en", "created_at": "Wed Jan 08 22:09:00 +0000 2014", "in_reply_to_status_id_str": null, "place": null},
{"contributors": null, "truncated": false, "text": "", "in_reply_to_status_id": null, "id": 421041098287755264, "favorite_count": 0, "source": "iOS", "retweeted": false, "coordinates": null, "entities": {"symbols": [], "user_mentions": [], "hashtags": [{"indices": [34, 39], "text": "IMDb"}], "urls": [{"url": "I2HqieyDnn", "indices": [40, 62], "expanded_url": "http://www.imdb.com/title/tt1453405", "display_url": "imdb.com/title/tt1453405"}]}, "in_reply_to_screen_name": null, "id_str": "421041098287755264", "retweet_count": 0, "in_reply_to_user_id": null, "favorited": false, "user": {"follow_request_sent": false, "profile_use_background_image": true, "id": 395680865, "verified": false, "profile_text_color": "333333", "profile_image_url_https": "https://pbs.twimg.com/profile_images/3130997221/4a981c8594f3c9ec127542ae40bac4ce_normal.jpeg", "profile_sidebar_fill_color": "DDEEF6", "is_translator": false, "geo_enabled": false, "entities": {"description": {"urls": []}}, "followers_count": 9, "protected": false, "location": "Denmark", "default_profile_image": false, "id_str": "395680865", "lang": "en", "utc_offset": 3600, "statuses_count": 1557, "description": "On basketball, film and other really cool things.", "friends_count": 27, "profile_link_color": "0084B4", "profile_image_url": "http://pbs.twimg.com/profile_images/3130997221/4a981c8594f3c9ec127542ae40bac4ce_normal.jpeg", "notifications": false, "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_color": "C0DEED", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", "name": "Kristian Meller", "is_translation_enabled": false, "profile_background_tile": false, "favourites_count": 5, "screen_name": "kristianmeller", "url": null, "created_at": "Sat Oct 22 02:18:30 +0000 2011", "contributors_enabled": false, "time_zone": "Berlin", "profile_sidebar_border_color": "C0DEED", "default_profile": true, "following": false, "listed_count": 0}, "geo": null, "in_reply_to_user_id_str": null, "possibly_sensitive": false, "lang": "en", "created_at": "Wed Jan 08 22:09:55 +0000 2014", "in_reply_to_status_id_str": null, "place": null}
]
I would like to merge these into a single array like this:
[
{"user_id":"273631610","item_id":"0829482","rating":"7","contributors":null,"truncated":false,"text":"","in_reply_to_status_id":null,"id":421040281296052225,"favorite_count":0,"source":"<a href=\"http:\/\/itunes.apple.com\/us\/app\/imdb-movies-tv\/id342792525?mt=8&uo=4\" rel=\"nofollow\">IMDb Movies & TV on iOS<\/a>","retweeted":false,"coordinates":null,"entities":{"symbols":[],"user_mentions":[],"hashtags":[{"indices":[23,28],"text":"IMDb"}],"urls":[{"url":"/bk5E1fLOxB","indices":[29,51],"expanded_url":"http:\/\/www.imdb.com\/title\/tt0829482","display_url":"imdb.com\/title\/tt0829482"}]},"in_reply_to_screen_name":null,"id_str":"421040281296052225","retweet_count":0,"in_reply_to_user_id":null,"favorited":false,"user":{"follow_request_sent":false,"profile_use_background_image":true,"id":273631610,"verified":false,"profile_text_color":"333333","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/378800000723050633\/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg","profile_sidebar_fill_color":"DDEEF6","is_translator":false,"geo_enabled":true,"entities":{"description":{"urls":[]}},"followers_count":330,"protected":false,"location":"southend on sea","default_profile_image":false,"id_str":"273631610","lang":"en","utc_offset":null,"statuses_count":897,"description":"","friends_count":973,"profile_link_color":"0084B4","profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/378800000723050633\/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg","notifications":false,"profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","name":"Ben Morris","is_translation_enabled":false,"profile_background_tile":false,"favourites_count":12,"screen_name":"benyere","url":null,"created_at":"Mon Mar 28 21:31:52 +0000 2011","contributors_enabled":false,"time_zone":null,"profile_sidebar_border_color":"C0DEED","default_profile":true,"following":false,"listed_count":0},"geo":null,"in_reply_to_user_id_str":null,"possibly_sensitive":false,"lang":"en","created_at":"Wed Jan 08 22:06:40 +0000 2014","in_reply_to_status_id_str":null,"place":null},
{"user_id":"40688800","item_id":"1343092","rating":"8","contributors":null,"truncated":false,"text":"","in_reply_to_status_id":null,"id":421040281296052225,"favorite_count":0,"source":"<a href=\"http:\/\/itunes.apple.com\/us\/app\/imdb-movies-tv\/id342792525?mt=8&uo=4\" rel=\"nofollow\">IMDb Movies & TV on iOS<\/a>","retweeted":false,"coordinates":null,"entities":{"symbols":[],"user_mentions":[],"hashtags":[{"indices":[23,28],"text":"IMDb"}],"urls":[{"url":"/bk5E1fLOxB","indices":[29,51],"expanded_url":"http:\/\/www.imdb.com\/title\/tt0829482","display_url":"imdb.com\/title\/tt0829482"}]},"in_reply_to_screen_name":null,"id_str":"421040281296052225","retweet_count":0,"in_reply_to_user_id":null,"favorited":false,"user":{"follow_request_sent":false,"profile_use_background_image":true,"id":273631610,"verified":false,"profile_text_color":"333333","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/378800000723050633\/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg","profile_sidebar_fill_color":"DDEEF6","is_translator":false,"geo_enabled":true,"entities":{"description":{"urls":[]}},"followers_count":330,"protected":false,"location":"southend on sea","default_profile_image":false,"id_str":"273631610","lang":"en","utc_offset":null,"statuses_count":897,"description":"","friends_count":973,"profile_link_color":"0084B4","profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/378800000723050633\/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg","notifications":false,"profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","name":"Ben Morris","is_translation_enabled":false,"profile_background_tile":false,"favourites_count":12,"screen_name":"benyere","url":null,"created_at":"Mon Mar 28 21:31:52 +0000 2011","contributors_enabled":false,"time_zone":null,"profile_sidebar_border_color":"C0DEED","default_profile":true,"following":false,"listed_count":0},"geo":null,"in_reply_to_user_id_str":null,"possibly_sensitive":false,"lang":"en","created_at":"Wed Jan 08 22:06:40 +0000 2014","in_reply_to_status_id_str":null,"place":null},
{"user_id":"3956800865","item_id":"1453405","rating":"8","contributors":null,"truncated":false,"text":"","in_reply_to_status_id":null,"id":421040281296052225,"favorite_count":0,"source":"<a href=\"http:\/\/itunes.apple.com\/us\/app\/imdb-movies-tv\/id342792525?mt=8&uo=4\" rel=\"nofollow\">IMDb Movies & TV on iOS<\/a>","retweeted":false,"coordinates":null,"entities":{"symbols":[],"user_mentions":[],"hashtags":[{"indices":[23,28],"text":"IMDb"}],"urls":[{"url":"\/bk5E1fLOxB","indices":[29,51],"expanded_url":"http:\/\/www.imdb.com\/title\/tt0829482","display_url":"imdb.com\/title\/tt0829482"}]},"in_reply_to_screen_name":null,"id_str":"421040281296052225","retweet_count":0,"in_reply_to_user_id":null,"favorited":false,"user":{"follow_request_sent":false,"profile_use_background_image":true,"id":273631610,"verified":false,"profile_text_color":"333333","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/378800000723050633\/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg","profile_sidebar_fill_color":"DDEEF6","is_translator":false,"geo_enabled":true,"entities":{"description":{"urls":[]}},"followers_count":330,"protected":false,"location":"southend on sea","default_profile_image":false,"id_str":"273631610","lang":"en","utc_offset":null,"statuses_count":897,"description":"","friends_count":973,"profile_link_color":"0084B4","profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/378800000723050633\/0fadb894b240dd426eb7b850dd8378d6_normal.jpeg","notifications":false,"profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","name":"Ben Morris","is_translation_enabled":false,"profile_background_tile":false,"favourites_count":12,"screen_name":"benyere","url":null,"created_at":"Mon Mar 28 21:31:52 +0000 2011","contributors_enabled":false,"time_zone":null,"profile_sidebar_border_color":"C0DEED","default_profile":true,"following":false,"listed_count":0},"geo":null,"in_reply_to_user_id_str":null,"possibly_sensitive":false,"lang":"en","created_at":"Wed Jan 08 22:06:40 +0000 2014","in_reply_to_status_id_str":null,"place":null}
]
To do that I did the following code:
import json
with open("test.json") as fin1:
data1 = json.load(fin1)
with open("test_userz.json") as fin2:
data2 = json.load(fin2)
data1.update(data2)
with open("merged.json", "w") as fout:
json.dump(data1, fout)
but in runtime i see this error :
Traceback (most recent call last):
File "json/merge2jsons.py", line 7, in <module>
data1.update(data2)
AttributeError: 'list' object has no attribute 'update'
How can i do this merge ?
The error you are facing is that your data1 upon loading from your file is going to be a list. You are trying to call the dictionary update on it, which is incorrect.
To help provide an example of your error, look at this:
>>> [].update('stuff')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'list' object has no attribute 'update'
I tried calling update on my list. There is no update method on the list. If I do this for a dictionary, however:
>>> d = {'a': 1}
>>> d.update({'b': 2})
>>> d
{'b': 2, 'a': 1}
As you can see, the update method exists, and works fine.
So, what you want to do is iterate over one of the list of dictionaries, and just update along the way. Since you are confident in your data being sequential and of equal length, this can be simply done by:
with open('f1.txt') as f1, open('f2.txt') as f2:
first_list = json.load(f1)
second_list = json.load(f2)
for i, v in enumerate(first_list):
second_list[i].update(v)
So, what is happening here is that by using the enumerate, you will have an incremental count representing the index of your list to easily access each dictionary. With this, simply update the dictionary of each dict in second_list with the dictionary you get from each iteration of first_list
To merge dictionaries that are at the same index in two lists:
merged = [{**d1, **d2} for d1, d2 in zip(data1, data2)]

Illegal_argument_exception when importing Twitter into Elasticsearch

I am new to Elasticsearch and am attempting to do some data analysis of Twitter data by importing it into Elasticsearch and running Kibana on it. I'm getting stuck when importing Twitter data into Elasticsearch. Any help is appreciated!
Here's a sample working program that produces the error.
import json
from elasticsearch import Elasticsearch
es = Elasticsearch()
data = json.loads(open("data.json").read())
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)
Here's the error:
Traceback (most recent call last):
File "elasticsearch_import_test.py", line 5, in <module>
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/client/utils.py", line 69, in _wrapped
return func(*args, params=params, **kwargs)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/client/__init__.py", line 279, in index
_make_path(index, doc_type, id), params=params, body=body)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/transport.py", line 329, in perform_request
status, headers, data = connection.perform_request(method, url, params, body, ignore=ignore, timeout=timeout)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/connection/http_urllib3.py", line 109, in perform_request
self._raise_error(response.status, raw_data)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/connection/base.py", line 108, in _raise_error
raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
elasticsearch.exceptions.RequestError: TransportError(400, u'illegal_argument_exception', u'[Raza][127.0.0.1:9300][indices:data/write/index[p]]')
Here's an example Twitter JSON file (data.json)
{
"_id": {
"$oid": "570597358c68d71c16b3b722"
},
"contributors": null,
"coordinates": null,
"created_at": "Wed Apr 06 23:09:41 +0000 2016",
"entities": {
"hashtags": [
{
"indices": [
68,
72
],
"text": "dnd"
},
{
"indices": [
73,
79
],
"text": "Nat20"
},
{
"indices": [
80,
93
],
"text": "CriticalRole"
},
{
"indices": [
94,
103
],
"text": "d20babes"
}
],
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
],
"symbols": [],
"urls": [
{
"display_url": "darkcastlecollectibles.com",
"expanded_url": "http://www.darkcastlecollectibles.com/",
"indices": [
44,
67
],
"url": "https://shortened.url/SJgFTE0o8h"
}
],
"user_mentions": [
{
"id": 2375847847,
"id_str": "2375847847",
"indices": [
3,
19
],
"name": "Zack Chini",
"screen_name": "Zenttsilverwing"
}
]
},
"extended_entities": {
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
},
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953295727009793,
"id_str": "715953295727009793",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
]
},
"favorite_count": 0,
"favorited": false,
"filter_level": "low",
"geo": null,
"id": 717851801417031680,
"id_str": "717851801417031680",
"in_reply_to_screen_name": null,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"is_quote_status": false,
"lang": "en",
"place": null,
"possibly_sensitive": false,
"retweet_count": 0,
"retweeted": false,
"retweeted_status": {
"contributors": null,
"coordinates": null,
"created_at": "Fri Apr 01 17:25:42 +0000 2016",
"entities": {
"hashtags": [
{
"indices": [
47,
51
],
"text": "dnd"
},
{
"indices": [
52,
58
],
"text": "Nat20"
},
{
"indices": [
59,
72
],
"text": "CriticalRole"
},
{
"indices": [
73,
82
],
"text": "d20babes"
}
],
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
],
"symbols": [],
"urls": [
{
"display_url": "darkcastlecollectibles.com",
"expanded_url": "http://www.darkcastlecollectibles.com/",
"indices": [
23,
46
],
"url": "https://shortened.url/SJgFTE0o8h"
}
],
"user_mentions": []
},
"extended_entities": {
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
},
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953295727009793,
"id_str": "715953295727009793",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
]
},
"favorite_count": 5,
"favorited": false,
"filter_level": "low",
"geo": null,
"id": 715953298076012545,
"id_str": "715953298076012545",
"in_reply_to_screen_name": null,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"is_quote_status": false,
"lang": "en",
"place": null,
"possibly_sensitive": false,
"retweet_count": 1,
"retweeted": false,
"source": "Twitter Web Client",
"text": "coins came in!! Thanks https://shortened.url/SJgFTE0o8h #dnd #Nat20 #CriticalRole #d20babes https://shortened.url/YQoxEuEAXV",
"truncated": false,
"user": {
"contributors_enabled": false,
"created_at": "Thu Mar 06 19:59:14 +0000 2014",
"default_profile": true,
"default_profile_image": false,
"description": "DM Geek Critter Con-man. I am here to like your art ^.^",
"favourites_count": 4990,
"follow_request_sent": null,
"followers_count": 57,
"following": null,
"friends_count": 183,
"geo_enabled": false,
"id": 2375847847,
"id_str": "2375847847",
"is_translator": false,
"lang": "en",
"listed_count": 7,
"location": "Flower Mound, TX",
"name": "Zack Chini",
"notifications": null,
"profile_background_color": "C0DEED",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_tile": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2375847847/1430928759",
"profile_image_url": "http://pbs.twimg.com/profile_images/708816622358663168/mNF4Ysr5_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/708816622358663168/mNF4Ysr5_normal.jpg",
"profile_link_color": "0084B4",
"profile_sidebar_border_color": "C0DEED",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"protected": false,
"screen_name": "Zenttsilverwing",
"statuses_count": 551,
"time_zone": null,
"url": null,
"utc_offset": null,
"verified": false
}
},
"source": "Twitter Web Client",
"text": "RT #Zenttsilverwing: coins came in!! Thanks https://shortened.url/SJgFTE0o8h #dnd #Nat20 #CriticalRole #d20babes https://shortened.url/YQoxEuEAXV",
"timestamp_ms": "1459984181156",
"truncated": false,
"user": {
"contributors_enabled": false,
"created_at": "Tue Feb 10 04:31:18 +0000 2009",
"default_profile": false,
"default_profile_image": false,
"description": "I use Twitter to primarily retweet Critter artwork of Critical Role and their own creations. I maintain a list of all the Critter artists I've come across.",
"favourites_count": 17586,
"follow_request_sent": null,
"followers_count": 318,
"following": null,
"friends_count": 651,
"geo_enabled": true,
"id": 20491914,
"id_str": "20491914",
"is_translator": false,
"lang": "en",
"listed_count": 33,
"location": "SanDiego, CA",
"name": "UnknownOutrider",
"notifications": null,
"profile_background_color": "EDECE9",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme3/bg.gif",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme3/bg.gif",
"profile_background_tile": false,
"profile_image_url": "http://pbs.twimg.com/profile_images/224346493/cartoon_dragon_tattoo_designs_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/224346493/cartoon_dragon_tattoo_designs_normal.jpg",
"profile_link_color": "088253",
"profile_sidebar_border_color": "D3D2CF",
"profile_sidebar_fill_color": "E3E2DE",
"profile_text_color": "634047",
"profile_use_background_image": true,
"protected": false,
"screen_name": "UnknownOutrider",
"statuses_count": 12760,
"time_zone": "Pacific Time (US & Canada)",
"url": null,
"utc_offset": -25200,
"verified": false
}
}
The reason that don't work is that you are trying to index document with a field named _id which is already exist as a default field. So delete that field or change field name:
import json
from elasticsearch import Elasticsearch
es = Elasticsearch()
data = json.loads(open("data.json").read())
# data['id_'] = data['_id'] <= You can change _id as id_
del data['_id']
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)

Categories

Resources