Split Json Data by certain string values (Python) - python

I want to split incidents by "incidentType" values for python. It always have 5 of these values: period, injuryTime, goal, card and substitution.
Json File
{
"incidents": [
{
"text": "FT",
"homeScore": 2,
"awayScore": 1,
"isLive": false,
"time": 90,
"addedTime": 999,
"incidentType": "period"
},
{
"length": 4,
"time": 90,
"addedTime": 0,
"incidentType": "injuryTime"
},
{
"homeScore": 2,
"awayScore": 1,
"player": {
"name": "Mostafa Mohamed",
"firstName": "",
"lastName": "",
"slug": "mostafa-mohamed",
"shortName": "M. Mohamed",
"position": "F",
"userCount": 3949,
"id": 873551
},
"id": 141786584,
"time": 89,
"isHome": true,
"incidentClass": "penalty",
"incidentType": "goal"
},
{
"player": {
"name": "Duško Tošić",
"slug": "dusko-tosic",
"shortName": "D. Tošić",
"position": "D",
"userCount": 215,
"id": 14557
},
"playerName": "Duško Tošić",
"reason": "Foul",
"id": 119728583,
"time": 85,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "Younès Belhanda",
"slug": "younes-belhanda",
"shortName": "Y. Belhanda",
"position": "M",
"userCount": 2165,
"id": 72999
},
"playerOut": {
"name": "Martin Linnes",
"slug": "martin-linnes",
"shortName": "M. Linnes",
"position": "D",
"userCount": 339,
"id": 109569
},
"id": 120059400,
"time": 82,
"isHome": true,
"incidentType": "substitution"
},
{
"player": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerName": "Kevin Varga",
"reason": "Foul",
"id": 119728582,
"time": 82,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "DeAndre Yedlin",
"slug": "deandre-yedlin",
"shortName": "D. Yedlin",
"position": "D",
"userCount": 702,
"id": 314040
},
"playerOut": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 120059399,
"time": 77,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Ryan Donk",
"slug": "ryan-donk",
"shortName": "R. Donk",
"position": "D",
"userCount": 489,
"id": 14900
},
"playerOut": {
"name": "Ryan Babel",
"slug": "ryan-babel",
"shortName": "R. Babel",
"position": "F",
"userCount": 1577,
"id": 1876
},
"id": 120059397,
"time": 72,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Emre Akbaba",
"slug": "emre-akbaba",
"shortName": "E. Akbaba",
"position": "M",
"userCount": 604,
"id": 343527
},
"playerOut": {
"name": "Gedson Fernandes",
"slug": "fernandes-gedson",
"shortName": "G. Fernandes",
"position": "M",
"userCount": 3030,
"id": 862055
},
"id": 120059396,
"time": 71,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Henry Onyekuru",
"slug": "henry-onyekuru",
"shortName": "H. Onyekuru",
"position": "M",
"userCount": 1474,
"id": 809220
},
"playerOut": {
"name": "Emre Kılınç",
"slug": "emre-kilinc",
"shortName": "E. Kılınç",
"position": "M",
"userCount": 526,
"id": 202032
},
"id": 120059398,
"time": 71,
"isHome": true,
"incidentType": "substitution"
},
{
"player": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"playerName": "Haris Hajradinović",
"reason": "Foul",
"id": 119728581,
"time": 71,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 1,
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"assist1": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"id": 141786585,
"time": 51,
"isHome": false,
"incidentClass": "regular",
"incidentType": "goal"
},
{
"playerIn": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerOut": {
"name": "Gilbert Koomson",
"slug": "gilbert-koomson",
"shortName": "G. Koomson",
"position": "F",
"userCount": 76,
"id": 341107
},
"id": 120059401,
"time": 46,
"isHome": false,
"incidentType": "substitution"
},
{
"text": "HT",
"homeScore": 1,
"awayScore": 0,
"isLive": false,
"time": 45,
"addedTime": 999,
"incidentType": "period"
},
{
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"playerName": "Isaac Kiese Thelin",
"reason": "Foul",
"id": 119728580,
"time": 15,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 0,
"player": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 141786583,
"time": 9,
"isHome": true,
"incidentClass": "regular",
"incidentType": "goal"
}
]
}

ABC = {
"incidents": [
{
"text": "FT",
"homeScore": 2,
"awayScore": 1,
"isLive": False,
"time": 90,
"addedTime": 999,
"incidentType": "period"
},
{
"length": 4,
"time": 90,
"addedTime": 0,
"incidentType": "injuryTime"
},
{
"homeScore": 2,
"awayScore": 1,
"player": {
"name": "Mostafa Mohamed",
"firstName": "",
"lastName": "",
"slug": "mostafa-mohamed",
"shortName": "M. Mohamed",
"position": "F",
"userCount": 3949,
"id": 873551
},
"id": 141786584,
"time": 89,
"isHome": True,
"incidentClass": "penalty",
"incidentType": "goal"
},
{
"player": {
"name": "Duško Tošić",
"slug": "dusko-tosic",
"shortName": "D. Tošić",
"position": "D",
"userCount": 215,
"id": 14557
},
"playerName": "Duško Tošić",
"reason": "Foul",
"id": 119728583,
"time": 85,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "Younès Belhanda",
"slug": "younes-belhanda",
"shortName": "Y. Belhanda",
"position": "M",
"userCount": 2165,
"id": 72999
},
"playerOut": {
"name": "Martin Linnes",
"slug": "martin-linnes",
"shortName": "M. Linnes",
"position": "D",
"userCount": 339,
"id": 109569
},
"id": 120059400,
"time": 82,
"isHome": True,
"incidentType": "substitution"
},
{
"player": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerName": "Kevin Varga",
"reason": "Foul",
"id": 119728582,
"time": 82,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "DeAndre Yedlin",
"slug": "deandre-yedlin",
"shortName": "D. Yedlin",
"position": "D",
"userCount": 702,
"id": 314040
},
"playerOut": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 120059399,
"time": 77,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Ryan Donk",
"slug": "ryan-donk",
"shortName": "R. Donk",
"position": "D",
"userCount": 489,
"id": 14900
},
"playerOut": {
"name": "Ryan Babel",
"slug": "ryan-babel",
"shortName": "R. Babel",
"position": "F",
"userCount": 1577,
"id": 1876
},
"id": 120059397,
"time": 72,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Emre Akbaba",
"slug": "emre-akbaba",
"shortName": "E. Akbaba",
"position": "M",
"userCount": 604,
"id": 343527
},
"playerOut": {
"name": "Gedson Fernandes",
"slug": "fernandes-gedson",
"shortName": "G. Fernandes",
"position": "M",
"userCount": 3030,
"id": 862055
},
"id": 120059396,
"time": 71,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Henry Onyekuru",
"slug": "henry-onyekuru",
"shortName": "H. Onyekuru",
"position": "M",
"userCount": 1474,
"id": 809220
},
"playerOut": {
"name": "Emre Kılınç",
"slug": "emre-kilinc",
"shortName": "E. Kılınç",
"position": "M",
"userCount": 526,
"id": 202032
},
"id": 120059398,
"time": 71,
"isHome": True,
"incidentType": "substitution"
},
{
"player": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"playerName": "Haris Hajradinović",
"reason": "Foul",
"id": 119728581,
"time": 71,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 1,
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"assist1": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"id": 141786585,
"time": 51,
"isHome": False,
"incidentClass": "regular",
"incidentType": "goal"
},
{
"playerIn": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerOut": {
"name": "Gilbert Koomson",
"slug": "gilbert-koomson",
"shortName": "G. Koomson",
"position": "F",
"userCount": 76,
"id": 341107
},
"id": 120059401,
"time": 46,
"isHome": False,
"incidentType": "substitution"
},
{
"text": "HT",
"homeScore": 1,
"awayScore": 0,
"isLive": False,
"time": 45,
"addedTime": 999,
"incidentType": "period"
},
{
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"playerName": "Isaac Kiese Thelin",
"reason": "Foul",
"id": 119728580,
"time": 15,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 0,
"player": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 141786583,
"time": 9,
"isHome": True,
"incidentClass": "regular",
"incidentType": "goal"
}
]
}
First, create a dictionary to hold all distinct incidentType. Then iterate through incidents and check if whether incidentType exists in the dictionary or not. If it exists? Append. if not, create a new key : value pair
result = {}
for js in ABC["incidents"]:
icdType = js["incidentType"]
if icdType in result:
result[icdType].append(js)
else:
result[icdType] = [js]
for key,val in result.items():
print(key, ":", val, "\n")

Related

How to map the dictionary values to another dictionary

I have dictionary which is below
{
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ADL", "doc_count": 1 },
{ "key": "SDD", "doc_count": 1 },
{ "key": "JJD", "doc_count": 1 }
]
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ABC", "doc_count": 1 },
{ "key": "CDE", "doc_count": 1 },
{ "key": "FGH", "doc_count": 1 }
]
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "XYX", "doc_count": 1 },
{ "key": "NXS", "doc_count": 1 }
]
}
}
}
aggregations.keys will be aggregationfilters.fieldName
aggregations.buckets.key will be aggregationfilters.values.title
aggregationfilters.values.paragraph is null everytime
aggregations.buckets.doc_count will be aggregationfilters.values.count
Basically I need to extract aggregations.keys and aggregations.bucket values and put into different dictionary.
Need to write a general code structure to do that.
I cannot do with .pop(rename) the dictioanry
My expected out
{
"aggregationfilters": [
{
"name": "ABC",
"fieldName": "A",
"values": [
{ "title": "ADL", "paragraph": null, "count": 1 },
{ "title": "SDD", "paragraph": null, "count": 1 },
{ "title": "JJD", "paragraph": null, "count": 1 }
]
}, {
"name": "CDE",
"fieldName": "B",
"values": [
{ "title": "ABC", "paragraph": null, "count": 1 },
{ "title": "CDE", "paragraph": null, "count": 1 },
{ "title": "FGH", "paragraph": null, "count": 1 }
]
}, {
"name": "FGH",
"fieldName": "C",
"values": [
{ "title": "XYX", "paragraph": null, "count": 1 },
{ "title": "NXS", "paragraph": null, "count": 1 }
]
}
]
}
Well, this works, but even with my best effort this still doesn't look that clean.
import json
source = {
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ADL", "doc_count": 1},
{"key": "SDD", "doc_count": 1},
{"key": "JJD", "doc_count": 1},
],
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ABC", "doc_count": 1},
{"key": "CDE", "doc_count": 1},
{"key": "FGH", "doc_count": 1},
],
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{"key": "XYX", "doc_count": 1}, {"key": "NXS", "doc_count": 1}],
},
}
}
convert_map = {
"buckets": "values",
"doc_count": "count",
"key": "title",
}
remove_map = {"sum_other_doc_count", "doc_count_error_upper_bound"}
add_map = {"name": "Changed VAL_", "fieldName": "VAL_"}
def converting_generator(
source_: dict, convert_map_: dict, remove_map_: set, add_map_: dict
):
working_dict = {k: v for k, v in source_.items()}
variable_identifier = "VAL_"
for key, inner_dic in working_dict.items():
inner_dic: dict
for rm_key in remove_map_:
try:
inner_dic.pop(rm_key)
except KeyError:
pass
for add_key, add_val in add_map_.items():
inner_dic[add_key] = add_val.replace(variable_identifier, key)
dumped = json.dumps(inner_dic, indent=2)
for original, target in convert_map_.items():
dumped = dumped.replace(original, target)
yield json.loads(dumped)
converted = {
"aggregation_filters": list(
converting_generator(source["aggregations"], convert_map, remove_map, add_map)
)
}
for inner_dict in converted["aggregation_filters"]:
for even_inner_dict in inner_dict["values"]:
even_inner_dict["paragraph"] = None
print(json.dumps(converted, indent=2))
Output:
{
"aggregation_filters": [
{
"values": [
{
"title": "ADL",
"count": 1,
"paragraph": null
},
{
"title": "SDD",
"count": 1,
"paragraph": null
},
{
"title": "JJD",
"count": 1,
"paragraph": null
}
],
"name": "Changed A",
"fieldName": "A"
},
{
"values": [
{
"title": "ABC",
"count": 1,
"paragraph": null
},
{
"title": "CDE",
"count": 1,
"paragraph": null
},
{
"title": "FGH",
"count": 1,
"paragraph": null
}
],
"name": "Changed B",
"fieldName": "B"
},
{
"values": [
{
"title": "XYX",
"count": 1,
"paragraph": null
},
{
"title": "NXS",
"count": 1,
"paragraph": null
}
],
"name": "Changed C",
"fieldName": "C"
}
]
}
Always show your code, would be nice if that's a working one - to show that you've put at least that worth of the effort on your problem.
I don't bother it as this feels like puzzle solving, but others may not.

Get names of keys in objectpath

How would I get the names of the keys, for example [800, 801] (the key names are unknown) with objectpath.
It is easy in jmespath: keys(#).
"groups": {
"800": {
"short_name": "22",
"oname": "11",
"group": 8,
"title": "SS",
"name": "33",
"onames": [""],
"alt_name": False,
"waytype": 1,
"multiple": 1,
"primary": 1
},
"801": {
"short_name": "ss",
"oname": "zz",
"group": 8,
"title": "ss",
"name": "bbb",
"onames": [""],
"alt_name": False,
"waytype": 1,
"multiple": 1,
"primary": 0
},
let your object is assigned to name variable
const name = { "groups": {
"800": {
"short_name": "22",
"oname": "11",
"group": 8,
"title": "SS",
"name": "33",
"onames": [""],
"alt_name": false,
"waytype": 1,
"multiple": 1,
"primary": 1
},
"801": {
"short_name": "ss",
"oname": "zz",
"group": 8,
"title": "ss",
"name": "bbb",
"onames": [""],
"alt_name": false,
"waytype": 1,
"multiple": 1,
"primary": 0
} } }
Use for loop to get the key name as
for(var num in name.groups) {
console.log(num);
}
and to get the values of key
for(var num in name.groups) {
console.log(name.groups[num]);
}

remove duplicates from json [duplicate]

This question already has answers here:
Removing Duplicates From Dictionary
(11 answers)
Closed 3 years ago.
I have following JSON
{
"FileResults": [
{
"FileName": "gtg.0.wav",
"FileUrl": null,
"Results": [
{
"Status": "Success",
"ChannelNumber": null,
"SpeakerId": null,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": null,
"Words": [
{
"Word": "ask",
"Offset": 944400000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": null,
"SpeakerId": null,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": null,
"Words": [
{
"Word": "ask",
"Offset": 90500000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": null,
"SpeakerId": null,
"Offset": 169400000,
"Duration": 157500000,
"NBest": [
{
"Confidence": 0.944001734,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": "",
"Sentiment": null,
"Words": [
{
"Word": "welcome",
"Offset": 169700000,
"Duration": 4500000
},
{
"Word": "to",
"Offset": 174200000,
"Duration": 2600000
},
{
"Word": "the",
"Offset": 176800000,
"Duration": 8600000
},
{
"Word": "scheduled",
"Offset": 186500000,
"Duration": 7900000
},
{
"Word": "special",
"Offset": 194400000,
"Duration": 6000000
},
{
"Word": "budget",
"Offset": 200400000,
"Duration": 4400000
},
{
"Word": "hearings",
"Offset": 204800000,
"Duration": 6400000
},
{
"Word": "meeting",
"Offset": 211400000,
"Duration": 4800000
},
{
"Word": "of",
"Offset": 216200000,
"Duration": 1600000
},
{
"Word": "the",
"Offset": 217800000,
"Duration": 1300000
},
{
"Word": "los",
"Offset": 219100000,
"Duration": 2300000
},
{
"Word": "lm",
"Offset": 221400000,
"Duration": 3600000
},
{
"Word": "mk",
"Offset": 225000000,
"Duration": 5500000
},
{
"Word": "board",
"Offset": 231800000,
"Duration": 4600000
},
{
"Word": "of",
"Offset": 236400000,
"Duration": 1000000
},
{
"Word": "supervisors",
"Offset": 237400000,
"Duration": 9200000
},
{
"Word": "seems",
"Offset": 246600000,
"Duration": 3000000
},
{
"Word": "like",
"Offset": 249600000,
"Duration": 2400000
},
{
"Word": "we",
"Offset": 252000000,
"Duration": 1400000
},
{
"Word": "were",
"Offset": 253400000,
"Duration": 1600000
},
{
"Word": "just",
"Offset": 255000000,
"Duration": 3400000
},
{
"Word": "here",
"Offset": 258400000,
"Duration": 5500000
},
{
"Word": "but",
"Offset": 270200000,
"Duration": 4000000
},
{
"Word": "no",
"Offset": 274200000,
"Duration": 3000000
},
{
"Word": "it's",
"Offset": 277200000,
"Duration": 1600000
},
{
"Word": "wednesday",
"Offset": 278800000,
"Duration": 6700000
},
{
"Word": "may",
"Offset": 288600000,
"Duration": 3800000
},
{
"Word": "sixteenth",
"Offset": 292400000,
"Duration": 8800000
},
{
"Word": "full",
"Offset": 307200000,
"Duration": 4600000
},
{
"Word": "complement",
"Offset": 311800000,
"Duration": 6600000
},
{
"Word": "not",
"Offset": 318400000,
"Duration": 3000000
},
{
"Word": "quite",
"Offset": 321400000,
"Duration": 5300000
}
]
}
]
}
]
}
]
}
I would like to remove duplicates from the JSON only
For instance "Word": "ask" came twice; I would like to retain first occurrence of "Word": "ask" and remove second.
{
"Word": "welcome",
"Offset": 169700000,
"Duration": 4500000
},
I have tried various dedup techniques but nothing is helping
Here is my sample code:
import json
with open('example1.json') as json_data:
obj = json.load(json_data)
#attr = lambda x: x['hdfs:batchprocessing'][0]['application']['app_id']+x['hdfs:batchprocessing'][0]['application']['app_id']
el_set = set()
el_list = []
for el in obj:
if str(el) not in el_set:
el_set.add(str(el))
el_list.append(el)
open("updated_structure.json", "w").write(
json.dumps(el_list, sort_keys=True, indent=4, separators=(',', ': '))
)
JSON without any duplicate values for "Word"
Here ('data' is the data struct from the post)
The code removes duplicate words from 'data'
import copy
import pprint
data = {
"FileResults": [
{
"FileName": "gtg.0.wav",
"FileUrl": None,
"Results": [
{
"Status": "Success",
"ChannelNumber": None,
"SpeakerId": None,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": None,
"Words": [
{
"Word": "ask",
"Offset": 944400000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": None,
"SpeakerId": None,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": None,
"Words": [
{
"Word": "ask",
"Offset": 90500000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": None,
"SpeakerId": None,
"Offset": 169400000,
"Duration": 157500000,
"NBest": [
{
"Confidence": 0.944001734,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": "",
"Sentiment": None,
"Words": [
{
"Word": "welcome",
"Offset": 169700000,
"Duration": 4500000
},
{
"Word": "to",
"Offset": 174200000,
"Duration": 2600000
},
{
"Word": "the",
"Offset": 176800000,
"Duration": 8600000
},
{
"Word": "scheduled",
"Offset": 186500000,
"Duration": 7900000
},
{
"Word": "special",
"Offset": 194400000,
"Duration": 6000000
},
{
"Word": "budget",
"Offset": 200400000,
"Duration": 4400000
},
{
"Word": "hearings",
"Offset": 204800000,
"Duration": 6400000
},
{
"Word": "meeting",
"Offset": 211400000,
"Duration": 4800000
},
{
"Word": "of",
"Offset": 216200000,
"Duration": 1600000
},
{
"Word": "the",
"Offset": 217800000,
"Duration": 1300000
},
{
"Word": "los",
"Offset": 219100000,
"Duration": 2300000
},
{
"Word": "lm",
"Offset": 221400000,
"Duration": 3600000
},
{
"Word": "mk",
"Offset": 225000000,
"Duration": 5500000
},
{
"Word": "board",
"Offset": 231800000,
"Duration": 4600000
},
{
"Word": "of",
"Offset": 236400000,
"Duration": 1000000
},
{
"Word": "supervisors",
"Offset": 237400000,
"Duration": 9200000
},
{
"Word": "seems",
"Offset": 246600000,
"Duration": 3000000
},
{
"Word": "like",
"Offset": 249600000,
"Duration": 2400000
},
{
"Word": "we",
"Offset": 252000000,
"Duration": 1400000
},
{
"Word": "were",
"Offset": 253400000,
"Duration": 1600000
},
{
"Word": "just",
"Offset": 255000000,
"Duration": 3400000
},
{
"Word": "here",
"Offset": 258400000,
"Duration": 5500000
},
{
"Word": "but",
"Offset": 270200000,
"Duration": 4000000
},
{
"Word": "no",
"Offset": 274200000,
"Duration": 3000000
},
{
"Word": "it's",
"Offset": 277200000,
"Duration": 1600000
},
{
"Word": "wednesday",
"Offset": 278800000,
"Duration": 6700000
},
{
"Word": "may",
"Offset": 288600000,
"Duration": 3800000
},
{
"Word": "sixteenth",
"Offset": 292400000,
"Duration": 8800000
},
{
"Word": "full",
"Offset": 307200000,
"Duration": 4600000
},
{
"Word": "complement",
"Offset": 311800000,
"Duration": 6600000
},
{
"Word": "not",
"Offset": 318400000,
"Duration": 3000000
},
{
"Word": "quite",
"Offset": 321400000,
"Duration": 5300000
}
]
}
]
}
]
}
]
}
words_set = set()
for entry in data['FileResults']:
for result in entry['Results']:
for nbsets_dict in result['NBest']:
clone = copy.deepcopy(nbsets_dict['Words'])
tmp = []
for idx, words in enumerate(nbsets_dict['Words']):
if words['Word'] in words_set:
print('About to remove entry: ' + words['Word'])
tmp.append(idx)
else:
words_set.add(words['Word'])
for idx in sorted(tmp,reverse=True):
del clone[idx]
nbsets_dict['Words'] = clone
pprint.pprint(data)

How to Convert a list of dicts into nested JSON in python without using pandas DataFrame

I have a list of dicts like this
[
{
"subject_id": 1,
"subject_name": "HR Sector 0",
"id": 1,
"name": "parent2",
"value": 10.6
},
{
"subject_id": 18,
"subject_name": "Test11",
"id": 1,
"name": "parent2",
"value": 12
},
{
"subject_id": 2,
"subject_name": "AG1",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 17
},
{
"subject_id": 3,
"subject_name": "Finance Group 2",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 1.5
},
{
"subject_id": 10,
"subject_name": "test",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 10
},
{
"subject_id": null,
"subject_name": null,
"id": 3,
"name": "Technology Team 2",
"value": null
},
{
"subject_id": 8,
"subject_name": "Group 4",
"id": 5,
"name": "Accounting Group 4",
"value": 10
},
{
"subject_id": null,
"subject_name": null,
"id": 9,
"name": "PG2",
"value": null
}
]
I want to convert it into nested JSON and ignore null values to get below result set
[
{
"id": 1,
"name": "parent2",
"subjects”: [
{”subject_id": 1,
"subject_name": "HR Sector 0",
"value": 10.6
},
{”subject_id": 18,
"subject_name": "Test11",
"value": 12
}
]
},
{
"id": 2,
"name": "Customer Delivery Dpt. 1",
"subjects”: [
{“subject_id": 2,
"subject_name": "AG1",
"value": 17
},
{“subject_id": 3,
"subject_name": "Finance Group 2",
"value": 1.5
},
{“subject_id": 10,
"subject_name": “test”,
"value": 10
}
]
},
{
"id": 3,
"name": "Technology Team 2",
"subjects”: []
},
{
"id": 5,
"name": "Accounting Group 4",
"subjects” : [
{ "subject_id": 8,
"subject_name": "Group 4",
"value": 10
}
]
},
{
"id": 9,
"name": "PG2",
"subjects”: []
}
]
import json
arr = [
{
"subject_id": 1,
"subject_name": "HR Sector 0",
"id": 1,
"name": "parent2",
"value": 10.6
},
{
"subject_id": 18,
"subject_name": "Test11",
"id": 1,
"name": "parent2",
"value": 12
},
{
"subject_id": 2,
"subject_name": "AG1",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 17
},
{
"subject_id": 3,
"subject_name": "Finance Group 2",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 1.5
},
{
"subject_id": 10,
"subject_name": "test",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 3,
"name": "Technology Team 2",
"value": None
},
{
"subject_id": 8,
"subject_name": "Group 4",
"id": 5,
"name": "Accounting Group 4",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 9,
"name": "PG2",
"value": None
}
]
def process_arr_to_json(arr):
newArr = []
addedIds = {}
for item in arr:
if(addedIds.get(item["id"]) is None):
formatted_item = {"subjects":[]}
newArr.append(formatted_item)
addedIds[item["id"]] = {"idx": 0, "pos": len(newArr)-1} #index in the dictionary for the subject item
else:
formatted_item = newArr[addedIds[item["id"]]["pos"]]
addedIds[item["id"]]["idx"] += 1
for k,v in item.items():
if(v is not None):
if(k == "id" or k == "name"):
formatted_item[k] = v
else:
if(len(formatted_item["subjects"]) <= addedIds[item["id"]]["idx"]):
formatted_item["subjects"].append({k:v})
else:
formatted_item["subjects"][addedIds[item["id"]]["idx"]][k] = v
print(newArr)
return json.dumps(newArr)
if __name__ == "__main__":
process_arr_to_json(arr)
my solution
Please see code below to form the merged results
import json
def process_items(items):
results = {}
for item in items:
results[item['id']] = {
'id': item['id'],
'name': item['name'],
}
to_append = {}
for k in ['subject_id', 'value', 'subject_name']:
if item.get(k):
to_append[k] = item[k]
results[item['id']].setdefault('subjects', [])
if to_append:
results[item['id']]['subjects'].append(to_append)
return results
items = [
{
"subject_id": 1,
"subject_name": "HR Sector 0",
"id": 1,
"name": "parent2",
"value": 10.6
},
{
"subject_id": 18,
"subject_name": "Test11",
"id": 1,
"name": "parent2",
"value": 12
},
{
"subject_id": 2,
"subject_name": "AG1",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 17
},
{
"subject_id": 3,
"subject_name": "Finance Group 2",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 1.5
},
{
"subject_id": 10,
"subject_name": "test",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 3,
"name": "Technology Team 2",
"value": None
},
{
"subject_id": 8,
"subject_name": "Group 4",
"id": 5,
"name": "Accounting Group 4",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 9,
"name": "PG2",
"value": None
}
]
result = process_items(items)
json.dumps(result.values()) # For python 3: json.dumps(list(results.values()))

Illegal_argument_exception when importing Twitter into Elasticsearch

I am new to Elasticsearch and am attempting to do some data analysis of Twitter data by importing it into Elasticsearch and running Kibana on it. I'm getting stuck when importing Twitter data into Elasticsearch. Any help is appreciated!
Here's a sample working program that produces the error.
import json
from elasticsearch import Elasticsearch
es = Elasticsearch()
data = json.loads(open("data.json").read())
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)
Here's the error:
Traceback (most recent call last):
File "elasticsearch_import_test.py", line 5, in <module>
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/client/utils.py", line 69, in _wrapped
return func(*args, params=params, **kwargs)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/client/__init__.py", line 279, in index
_make_path(index, doc_type, id), params=params, body=body)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/transport.py", line 329, in perform_request
status, headers, data = connection.perform_request(method, url, params, body, ignore=ignore, timeout=timeout)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/connection/http_urllib3.py", line 109, in perform_request
self._raise_error(response.status, raw_data)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/connection/base.py", line 108, in _raise_error
raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
elasticsearch.exceptions.RequestError: TransportError(400, u'illegal_argument_exception', u'[Raza][127.0.0.1:9300][indices:data/write/index[p]]')
Here's an example Twitter JSON file (data.json)
{
"_id": {
"$oid": "570597358c68d71c16b3b722"
},
"contributors": null,
"coordinates": null,
"created_at": "Wed Apr 06 23:09:41 +0000 2016",
"entities": {
"hashtags": [
{
"indices": [
68,
72
],
"text": "dnd"
},
{
"indices": [
73,
79
],
"text": "Nat20"
},
{
"indices": [
80,
93
],
"text": "CriticalRole"
},
{
"indices": [
94,
103
],
"text": "d20babes"
}
],
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
],
"symbols": [],
"urls": [
{
"display_url": "darkcastlecollectibles.com",
"expanded_url": "http://www.darkcastlecollectibles.com/",
"indices": [
44,
67
],
"url": "https://shortened.url/SJgFTE0o8h"
}
],
"user_mentions": [
{
"id": 2375847847,
"id_str": "2375847847",
"indices": [
3,
19
],
"name": "Zack Chini",
"screen_name": "Zenttsilverwing"
}
]
},
"extended_entities": {
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
},
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953295727009793,
"id_str": "715953295727009793",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
]
},
"favorite_count": 0,
"favorited": false,
"filter_level": "low",
"geo": null,
"id": 717851801417031680,
"id_str": "717851801417031680",
"in_reply_to_screen_name": null,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"is_quote_status": false,
"lang": "en",
"place": null,
"possibly_sensitive": false,
"retweet_count": 0,
"retweeted": false,
"retweeted_status": {
"contributors": null,
"coordinates": null,
"created_at": "Fri Apr 01 17:25:42 +0000 2016",
"entities": {
"hashtags": [
{
"indices": [
47,
51
],
"text": "dnd"
},
{
"indices": [
52,
58
],
"text": "Nat20"
},
{
"indices": [
59,
72
],
"text": "CriticalRole"
},
{
"indices": [
73,
82
],
"text": "d20babes"
}
],
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
],
"symbols": [],
"urls": [
{
"display_url": "darkcastlecollectibles.com",
"expanded_url": "http://www.darkcastlecollectibles.com/",
"indices": [
23,
46
],
"url": "https://shortened.url/SJgFTE0o8h"
}
],
"user_mentions": []
},
"extended_entities": {
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
},
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953295727009793,
"id_str": "715953295727009793",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
]
},
"favorite_count": 5,
"favorited": false,
"filter_level": "low",
"geo": null,
"id": 715953298076012545,
"id_str": "715953298076012545",
"in_reply_to_screen_name": null,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"is_quote_status": false,
"lang": "en",
"place": null,
"possibly_sensitive": false,
"retweet_count": 1,
"retweeted": false,
"source": "Twitter Web Client",
"text": "coins came in!! Thanks https://shortened.url/SJgFTE0o8h #dnd #Nat20 #CriticalRole #d20babes https://shortened.url/YQoxEuEAXV",
"truncated": false,
"user": {
"contributors_enabled": false,
"created_at": "Thu Mar 06 19:59:14 +0000 2014",
"default_profile": true,
"default_profile_image": false,
"description": "DM Geek Critter Con-man. I am here to like your art ^.^",
"favourites_count": 4990,
"follow_request_sent": null,
"followers_count": 57,
"following": null,
"friends_count": 183,
"geo_enabled": false,
"id": 2375847847,
"id_str": "2375847847",
"is_translator": false,
"lang": "en",
"listed_count": 7,
"location": "Flower Mound, TX",
"name": "Zack Chini",
"notifications": null,
"profile_background_color": "C0DEED",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_tile": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2375847847/1430928759",
"profile_image_url": "http://pbs.twimg.com/profile_images/708816622358663168/mNF4Ysr5_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/708816622358663168/mNF4Ysr5_normal.jpg",
"profile_link_color": "0084B4",
"profile_sidebar_border_color": "C0DEED",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"protected": false,
"screen_name": "Zenttsilverwing",
"statuses_count": 551,
"time_zone": null,
"url": null,
"utc_offset": null,
"verified": false
}
},
"source": "Twitter Web Client",
"text": "RT #Zenttsilverwing: coins came in!! Thanks https://shortened.url/SJgFTE0o8h #dnd #Nat20 #CriticalRole #d20babes https://shortened.url/YQoxEuEAXV",
"timestamp_ms": "1459984181156",
"truncated": false,
"user": {
"contributors_enabled": false,
"created_at": "Tue Feb 10 04:31:18 +0000 2009",
"default_profile": false,
"default_profile_image": false,
"description": "I use Twitter to primarily retweet Critter artwork of Critical Role and their own creations. I maintain a list of all the Critter artists I've come across.",
"favourites_count": 17586,
"follow_request_sent": null,
"followers_count": 318,
"following": null,
"friends_count": 651,
"geo_enabled": true,
"id": 20491914,
"id_str": "20491914",
"is_translator": false,
"lang": "en",
"listed_count": 33,
"location": "SanDiego, CA",
"name": "UnknownOutrider",
"notifications": null,
"profile_background_color": "EDECE9",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme3/bg.gif",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme3/bg.gif",
"profile_background_tile": false,
"profile_image_url": "http://pbs.twimg.com/profile_images/224346493/cartoon_dragon_tattoo_designs_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/224346493/cartoon_dragon_tattoo_designs_normal.jpg",
"profile_link_color": "088253",
"profile_sidebar_border_color": "D3D2CF",
"profile_sidebar_fill_color": "E3E2DE",
"profile_text_color": "634047",
"profile_use_background_image": true,
"protected": false,
"screen_name": "UnknownOutrider",
"statuses_count": 12760,
"time_zone": "Pacific Time (US & Canada)",
"url": null,
"utc_offset": -25200,
"verified": false
}
}
The reason that don't work is that you are trying to index document with a field named _id which is already exist as a default field. So delete that field or change field name:
import json
from elasticsearch import Elasticsearch
es = Elasticsearch()
data = json.loads(open("data.json").read())
# data['id_'] = data['_id'] <= You can change _id as id_
del data['_id']
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)

Categories

Resources