Python: change json outcome into data.frame - python

I have got json object after connection via Python to F.A.C.E API and analysis of one of face image.
I am newbie in python and my question is:
How to transform json object into data.frame and then into .xls/.csv?
print ('Response : ', json_resp.text)
#Console print:
Response : {
"error_code": 0,
"description": "",
"img_size": { "w": 650, "h": 488 },
"people": [
{
"age": 22,
"gender": 84,
"mood": 29,
"position": { "x": 190, "y": 161, "w": 259, "h": 259 },
"rotation": { "yaw": -3, "pitch": 3, "roll": -1 },
"landmarks": { "lefteye": { "x": 371, "y": 233 }, "righteye": { "x": 266, "y": 236 }, "maskpoints": [ { "x": 371, "y": 233 }, { "x": 266, "y": 236 }, { "x": 203, "y": 234 }, { "x": 206, "y": 261 }, { "x": 212, "y": 287 }, { "x": 220, "y": 313 }, { "x": 233, "y": 338 }, { "x": 250, "y": 357 }, { "x": 273, "y": 373 }, { "x": 296, "y": 388 }, { "x": 321, "y": 394 }, { "x": 346, "y": 390 }, { "x": 371, "y": 377 }, { "x": 396, "y": 362 }, { "x": 416, "y": 341 }, { "x": 430, "y": 315 }, { "x": 437, "y": 287 }, { "x": 444, "y": 258 }, { "x": 448, "y": 227 }, { "x": 215, "y": 221 }, { "x": 226, "y": 203 }, { "x": 247, "y": 195 }, { "x": 269, "y": 198 }, { "x": 291, "y": 204 }, { "x": 336, "y": 201 }, { "x": 360, "y": 193 }, { "x": 385, "y": 190 }, { "x": 408, "y": 198 }, { "x": 423, "y": 216 }, { "x": 314, "y": 231 }, { "x": 314, "y": 247 }, { "x": 314, "y": 263 }, { "x": 314, "y": 279 }, { "x": 294, "y": 297 }, { "x": 304, "y": 300 }, { "x": 315, "y": 302 }, { "x": 327, "y": 299 }, { "x": 338, "y": 297 }, { "x": 242, "y": 236 }, { "x": 254, "y": 226 }, { "x": 271, "y": 226 }, { "x": 284, "y": 240 }, { "x": 270, "y": 245 }, { "x": 253, "y": 244 }, { "x": 349, "y": 238 }, { "x": 362, "y": 224 }, { "x": 379, "y": 224 }, { "x": 393, "y": 234 }, { "x": 381, "y": 242 }, { "x": 363, "y": 242 }, { "x": 281, "y": 332 }, { "x": 294, "y": 327 }, { "x": 306, "y": 322 }, { "x": 315, "y": 325 }, { "x": 325, "y": 323 }, { "x": 340, "y": 328 }, { "x": 357, "y": 335 }, { "x": 341, "y": 347 }, { "x": 327, "y": 354 }, { "x": 317, "y": 354 }, { "x": 306, "y": 353 }, { "x": 294, "y": 347 }, { "x": 289, "y": 333 }, { "x": 306, "y": 331 }, { "x": 316, "y": 332 }, { "x": 325, "y": 331 }, { "x": 349, "y": 334 }, { "x": 326, "y": 339 }, { "x": 316, "y": 340 }, { "x": 306, "y": 339 } ] },
"clothingcolors": [ ],
"ethnicity": { "african": 83, "asian": 0, "caucasian": 12, "hispanic": 3 },
"emotions": { "happiness": 1, "surprise": 5, "anger": 2, "disgust": 2, "fear": 1, "sadness": 11 }
}
]
}
I would like to obtain data frame like:
error_code, age, gender, mood, (...), emotions.sadness
0 0, 0, 84, 29, (...), 11
and then
.xls or .csv in "C://Users"

You could use the methods pandas.read_json and Dataframe.to_csv:
import pandas
df = pandas.read_json(json_resp.tex, typ='frame')
df.to_csv("path_to_your_file.csv")

Related

Converting JSON coordinates to numpy array

I would like to convert a JSON file back into a png image or a NumPy array.
The file JSON file consists of a list of coordinates and other metadata. As an example. it would look like this :
"firstEditDate": "2019-12-02T19:05:45.393Z",
"lastEditDate": "2020-06-30T13:21:33.371Z",
"folder": "/Pictures/poly",
"objects": [
{
"classIndex": 5,
"layer": 0,
"polygon": [
{
"x": 0,
"y": 0
},
{
"x": 1699.7291626931146,
"y": 0
},
{
"x": 1699.7291626931146,
"y": 1066.87392714095
},
{
"x": 0,
"y": 1066.87392714095
}
],
},
{
"classIndex": 2,
"layer": 0,
"polygon": [
{
"x": 844.2300556586271,
"y": 711.8243676199173
},
{
"x": 851.156462585034,
"y": 740.5194820293175
},
{
"x": 854.1249226963513,
"y": 744.477428844407
},
{
"x": 854.1249226963513,
"y": 747.4458889557243
},
(coordinates should be rounded to the nearest ones prior to creating the array or image)
The dimension of the array/ picture should be 1727 x 971
Is there any function in python that can convert the file either into an array with the value inside the array of the ClassIndex? Or into a picture where each ClassIndex is assigned to a specific color?
Here is a solution:
import matplotlib.pyplot as plt
import numpy as np
import mahotas.polygon as mp
json_dict = {
"firstEditDate": "2019-12-02T19:05:45.393Z",
"lastEditDate": "2020-06-30T13:21:33.371Z",
"folder": "/Pictures/poly",
"objects": [{
"classIndex": 1,
"layer": 0,
"polygon": [
{"x": 170, "y": 674},
{"x": 70, "y": 674},
{"x": 70, "y": 1120},
{"x": 870, "y": 1120},
{"x": 870, "y": 674},
{"x": 770, "y": 674},
{"x": 770, "y": 1020},
{"x": 170, "y": 1020},
],
}, {
"classIndex": 2,
"layer": 0,
"polygon": [
{"x": 220, "y": 870},
{"x": 220, "y": 970},
{"x": 720, "y": 970},
{"x": 720, "y": 870},
]
}, {
"classIndex": 3,
"layer": 0,
"polygon": [
{"x": 250, "y": 615},
{"x": 225, "y": 710},
{"x": 705, "y": 840},
{"x": 730, "y": 745},
]
}, {
"classIndex": 4,
"layer": 0,
"polygon": [
{"x": 350, "y": 380},
{"x": 300, "y": 465},
{"x": 730, "y": 710},
{"x": 780, "y": 630},
]
}, {
"classIndex": 5,
"layer": 0,
"polygon": [
{"x": 505, "y": 180},
{"x": 435, "y": 250},
{"x": 790, "y": 605},
{"x": 855, "y": 535},
]
}, {
"classIndex": 6,
"layer": 0,
"polygon": [
{"x": 700, "y": 30},
{"x": 615, "y": 80},
{"x": 870, "y": 515},
{"x": 950, "y": 465},
]
}]
}
canvas = np.zeros((1000,1150))
for obj in json_dict["objects"]:
pts = [(round(p["x"]),round(p["y"])) for p in obj["polygon"]]
mp.fill_polygon(pts, canvas, obj["classIndex"])
plt.imshow(canvas.transpose())
plt.colorbar()
plt.show()
Output:

Adding values to a nested dictionary in python

I have the following dictionary
d1 = {
"Completely Agree": {
"child": {
"Male": {
"child": {
"Greater than 54": {
"child": {},
"value": 4,
"label": "Greater than 54"
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39"
}
},
"value": 5,
"label": "Male"
}
},
"value": 5,
"label": "Completely Agree"
},
"Somewhat Agree": {
"child": {
"Male": {
"child": {
"Greater than 54": {
"child": {},
"value": 1,
"label": "Greater than 54"
},
"Between 45 to 49": {
"child": {},
"value": 2,
"label": "Between 45 to 49"
},
"Between 25 to 29": {
"child": {},
"value": 1,
"label": "Between 25 to 29"
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39"
},
"Between 50 to 54": {
"child": {},
"value": 3,
"label": "Between 50 to 54"
},
"Between 40 to 44": {
"child": {},
"value": 1,
"label": "Between 40 to 44"
}
},
"value": 9,
"label": "Male"
},
"Female": {
"child": {
"Between 25 to 29": {
"child": {},
"value": 2,
"label": "Between 25 to 29"
},
"Between 30 to 34": {
"child": {},
"value": 1,
"label": "Between 30 to 34"
},
"Greater than 54": {
"child": {},
"value": 1,
"label": "Greater than 54"
}
},
"value": 4,
"label": "Female"
}
},
"value": 13,
"label": "Somewhat Agree"
},
"Neither Agree nor Disagree": {
"child": {
"Male": {
"child": {
"Between 25 to 29": {
"child": {},
"value": 1,
"label": "Between 25 to 29"
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39"
},
"Between 30 to 34": {
"child": {},
"value": 1,
"label": "Between 30 to 34"
},
"Between 45 to 49": {
"child": {},
"value": 1,
"label": "Between 45 to 49"
},
"Between 50 to 54": {
"child": {},
"value": 1,
"label": "Between 50 to 54"
}
},
"value": 5,
"label": "Male"
},
"Female": {
"child": {
"Less than 25": {
"child": {},
"value": 1,
"label": "Less than 25"
}
},
"value": 1,
"label": "Female"
}
},
"value": 6,
"label": "Neither Agree nor Disagree"
}
I want to insert another key lets say 'data_recs' on the same level where child is an empty dictionary {}. So the result should be
d1 = {
"Completely Agree": {
"child": {
"Male": {
"child": {
"Greater than 54": {
"child": {},
"value": 4,
"label": "Greater than 54",
"data_recs": [1,2,3,4]
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39",
"data_recs": [1,2,3,4]
}
},
"value": 5,
"label": "Male"
}
},
"value": 5,
"label": "Completely Agree"
},
"Somewhat Agree": {
"child": {
"Male": {
"child": {
"Greater than 54": {
"child": {},
"value": 1,
"label": "Greater than 54",
"data_recs": [1,2,3,4]
},
"Between 45 to 49": {
"child": {},
"value": 2,
"label": "Between 45 to 49"
},
"Between 25 to 29": {
"child": {},
"value": 1,
"label": "Between 25 to 29",
"data_recs": [1,2,3,4]
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39",
"data_recs": [1,2,3,4]
},
"Between 50 to 54": {
"child": {},
"value": 3,
"label": "Between 50 to 54",
"data_recs": [1,2,3,4]
},
"Between 40 to 44": {
"child": {},
"value": 1,
"label": "Between 40 to 44",
"data_recs": [1,2,3,4]
}
},
"value": 9,
"label": "Male"
},
"Female": {
"child": {
"Between 25 to 29": {
"child": {},
"value": 2,
"label": "Between 25 to 29",
"data_recs": [1,2,3,4]
},
"Between 30 to 34": {
"child": {},
"value": 1,
"label": "Between 30 to 34",
"data_recs": [1,2,3,4]
},
"Greater than 54": {
"child": {},
"value": 1,
"label": "Greater than 54",
"data_recs": [1,2,3,4]
}
},
"value": 4,
"label": "Female"
}
},
"value": 13,
"label": "Somewhat Agree"
}
The dictionary can have n number of hierarchy. I have written the following code to implement this but I think I am missing something out here.
def parse_master_dict(data, recs_map):
for k,v in data.items():
print k, v
if v.get('child', None):
child = v['child']
if not child:
print "here", k
v['data_recs'] = recs_map.get(k, [])
else:
#if child can have further children
parse_master_dict(child, recs_map)
Please advise.
Your if v.get('child', None): statement is preventing you from proceeding to update the dict when the child dict is empty since the condition would be evaluated as False. Remove the if statement and your code should work:
def parse_master_dict(data, recs_map):
for k,v in data.items():
child = v['child']
if not child:
v['data_recs'] = recs_map.get(k, [])
else:
parse_master_dict(child, recs_map)

Python: Json string to csv via Pandas --> ValueError: Mixing dicts with non-Series may lead to ambiguous ordering

Hello I faced problem with json string to data.frame conversion.
print (json_resp.text)
{
"error_code": 0,
"description": "",
"img_size": { "w": 650, "h": 488 },
"people": [
{
"age": 22,
"gender": 84,
"mood": 29,
"position": { "x": 190, "y": 161, "w": 259, "h": 259 },
"rotation": { "yaw": -3, "pitch": 3, "roll": -1 },
"landmarks": { "lefteye": { "x": 371, "y": 233 }, "righteye": { "x": 266, "y": 236 }, "maskpoints": [ { "x": 371, "y": 233 }, { "x": 266, "y": 236 }, { "x": 203, "y": 234 }, { "x": 206, "y": 261 }, { "x": 212, "y": 287 }, { "x": 220, "y": 313 }, { "x": 233, "y": 338 }, { "x": 250, "y": 357 }, { "x": 273, "y": 373 }, { "x": 296, "y": 388 }, { "x": 321, "y": 394 }, { "x": 346, "y": 390 }, { "x": 371, "y": 377 }, { "x": 396, "y": 362 }, { "x": 416, "y": 341 }, { "x": 430, "y": 315 }, { "x": 437, "y": 287 }, { "x": 444, "y": 258 }, { "x": 448, "y": 227 }, { "x": 215, "y": 221 }, { "x": 226, "y": 203 }, { "x": 247, "y": 195 }, { "x": 269, "y": 198 }, { "x": 291, "y": 204 }, { "x": 336, "y": 201 }, { "x": 360, "y": 193 }, { "x": 385, "y": 190 }, { "x": 408, "y": 198 }, { "x": 423, "y": 216 }, { "x": 314, "y": 231 }, { "x": 314, "y": 247 }, { "x": 314, "y": 263 }, { "x": 314, "y": 279 }, { "x": 294, "y": 297 }, { "x": 304, "y": 300 }, { "x": 315, "y": 302 }, { "x": 327, "y": 299 }, { "x": 338, "y": 297 }, { "x": 242, "y": 236 }, { "x": 254, "y": 226 }, { "x": 271, "y": 226 }, { "x": 284, "y": 240 }, { "x": 270, "y": 245 }, { "x": 253, "y": 244 }, { "x": 349, "y": 238 }, { "x": 362, "y": 224 }, { "x": 379, "y": 224 }, { "x": 393, "y": 234 }, { "x": 381, "y": 242 }, { "x": 363, "y": 242 }, { "x": 281, "y": 332 }, { "x": 294, "y": 327 }, { "x": 306, "y": 322 }, { "x": 315, "y": 325 }, { "x": 325, "y": 323 }, { "x": 340, "y": 328 }, { "x": 357, "y": 335 }, { "x": 341, "y": 347 }, { "x": 327, "y": 354 }, { "x": 317, "y": 354 }, { "x": 306, "y": 353 }, { "x": 294, "y": 347 }, { "x": 289, "y": 333 }, { "x": 306, "y": 331 }, { "x": 316, "y": 332 }, { "x": 325, "y": 331 }, { "x": 349, "y": 334 }, { "x": 326, "y": 339 }, { "x": 316, "y": 340 }, { "x": 306, "y": 339 } ] },
"clothingcolors": [ ],
"ethnicity": { "african": 83, "asian": 0, "caucasian": 12, "hispanic": 3 },
"emotions": { "happiness": 1, "surprise": 5, "anger": 2, "disgust": 2, "fear": 1, "sadness": 11 }
}
]
However, when I try to change json string to data.frame I got:
import pandas as pd
df_json = pd.read_json(json_resp.text, typ='frame')
print (df_json)
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\io\json\json.py", line 427, in read_json
result = json_reader.read()
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\io\json\json.py", line 537, in read
obj = self._get_object_parser(self.data)
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\io\json\json.py", line 556, in _get_object_parser
obj = FrameParser(json, **kwargs).parse()
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\io\json\json.py", line 652, in parse
self._parse_no_numpy()
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\io\json\json.py", line 871, in _parse_no_numpy
loads(json, precise_float=self.precise_float), dtype=None)
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\core\frame.py", line 392, in __init__
mgr = init_dict(data, index, columns, dtype=dtype)
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\core\internals\construction.py", line 212, in init_dict
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\core\internals\construction.py", line 51, in arrays_to_mgr
index = extract_index(arrays)
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\core\internals\construction.py", line 320, in extract_index
raise ValueError('Mixing dicts with non-Series may lead to '
ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.
What shall I change in code to obtain simple data.frame?
json_normalize is what you would want to do. However, there are nested lists within which means it only normalizes/flattens out to first level.
I think the issue comes in with the landmarks.maskpoints as that creates 70 rows with 2 columns x, and y. So trying to create a single row, WITH something that contains 70 rows could be an issue.
You can sort of see what I mean if you just start trying to unwrap/flatten it little by little. Essentially to flatten, you want to normalize each part, then at the end merge them all together into a single row, but you can see what the issue is with the maskpoints.
jsonStr = '''
{
"error_code": 0,
"description": "",
"img_size": { "w": 650, "h": 488 },
"people": [
{
"age": 22,
"gender": 84,
"mood": 29,
"position": { "x": 190, "y": 161, "w": 259, "h": 259 },
"rotation": { "yaw": -3, "pitch": 3, "roll": -1 },
"landmarks": { "lefteye": { "x": 371, "y": 233 }, "righteye": { "x": 266, "y": 236 }, "maskpoints": [ { "x": 371, "y": 233 }, { "x": 266, "y": 236 }, { "x": 203, "y": 234 }, { "x": 206, "y": 261 }, { "x": 212, "y": 287 }, { "x": 220, "y": 313 }, { "x": 233, "y": 338 }, { "x": 250, "y": 357 }, { "x": 273, "y": 373 }, { "x": 296, "y": 388 }, { "x": 321, "y": 394 }, { "x": 346, "y": 390 }, { "x": 371, "y": 377 }, { "x": 396, "y": 362 }, { "x": 416, "y": 341 }, { "x": 430, "y": 315 }, { "x": 437, "y": 287 }, { "x": 444, "y": 258 }, { "x": 448, "y": 227 }, { "x": 215, "y": 221 }, { "x": 226, "y": 203 }, { "x": 247, "y": 195 }, { "x": 269, "y": 198 }, { "x": 291, "y": 204 }, { "x": 336, "y": 201 }, { "x": 360, "y": 193 }, { "x": 385, "y": 190 }, { "x": 408, "y": 198 }, { "x": 423, "y": 216 }, { "x": 314, "y": 231 }, { "x": 314, "y": 247 }, { "x": 314, "y": 263 }, { "x": 314, "y": 279 }, { "x": 294, "y": 297 }, { "x": 304, "y": 300 }, { "x": 315, "y": 302 }, { "x": 327, "y": 299 }, { "x": 338, "y": 297 }, { "x": 242, "y": 236 }, { "x": 254, "y": 226 }, { "x": 271, "y": 226 }, { "x": 284, "y": 240 }, { "x": 270, "y": 245 }, { "x": 253, "y": 244 }, { "x": 349, "y": 238 }, { "x": 362, "y": 224 }, { "x": 379, "y": 224 }, { "x": 393, "y": 234 }, { "x": 381, "y": 242 }, { "x": 363, "y": 242 }, { "x": 281, "y": 332 }, { "x": 294, "y": 327 }, { "x": 306, "y": 322 }, { "x": 315, "y": 325 }, { "x": 325, "y": 323 }, { "x": 340, "y": 328 }, { "x": 357, "y": 335 }, { "x": 341, "y": 347 }, { "x": 327, "y": 354 }, { "x": 317, "y": 354 }, { "x": 306, "y": 353 }, { "x": 294, "y": 347 }, { "x": 289, "y": 333 }, { "x": 306, "y": 331 }, { "x": 316, "y": 332 }, { "x": 325, "y": 331 }, { "x": 349, "y": 334 }, { "x": 326, "y": 339 }, { "x": 316, "y": 340 }, { "x": 306, "y": 339 } ] },
"clothingcolors": [ ],
"ethnicity": { "african": 83, "asian": 0, "caucasian": 12, "hispanic": 3 },
"emotions": { "happiness": 1, "surprise": 5, "anger": 2, "disgust": 2, "fear": 1, "sadness": 11 }
}
]
}'''
import json
from pandas.io.json import json_normalize
jsonObj = json.loads(jsonStr)
# flatten at 1st level. But still nested lists/dictionaries in column `people`
df_a = json_normalize(jsonObj)
# so flatten out people, and you'll see clothingcolors still has a list and landmarks too
df_people = json_normalize(jsonObj['people'])
df_clothingcolors = json_normalize(jsonObj['people'][0]['clothingcolors'])
df_landmarks = json_normalize(jsonObj['people'][0]['landmarks'])
# the landmarks column still need to flatten maskpoints...but maskpoints produces 70 rows, and there's your issue
df_maskpoints = json_normalize(jsonObj['people'][0]['landmarks']['maskpoints'])
So if you look at the shape of these:
print (df_a.shape)
(1, 5)
print (df_people.shape)
(1, 26)
print (df_clothingcolors.shape)
(0, 0)
print (df_landmarks.shape)
(1, 5)
print (df_maskpoints.shape)
(70, 2)
...you see the maskpoints shape is 70 rows.
BUT,
I found this blog useful. Essentially it unwraps all those nested lists so that you end up with 1 big flat table.
jsonStr = '''
{
"error_code": 0,
"description": "",
"img_size": { "w": 650, "h": 488 },
"people": [
{
"age": 22,
"gender": 84,
"mood": 29,
"position": { "x": 190, "y": 161, "w": 259, "h": 259 },
"rotation": { "yaw": -3, "pitch": 3, "roll": -1 },
"landmarks": { "lefteye": { "x": 371, "y": 233 }, "righteye": { "x": 266, "y": 236 }, "maskpoints": [ { "x": 371, "y": 233 }, { "x": 266, "y": 236 }, { "x": 203, "y": 234 }, { "x": 206, "y": 261 }, { "x": 212, "y": 287 }, { "x": 220, "y": 313 }, { "x": 233, "y": 338 }, { "x": 250, "y": 357 }, { "x": 273, "y": 373 }, { "x": 296, "y": 388 }, { "x": 321, "y": 394 }, { "x": 346, "y": 390 }, { "x": 371, "y": 377 }, { "x": 396, "y": 362 }, { "x": 416, "y": 341 }, { "x": 430, "y": 315 }, { "x": 437, "y": 287 }, { "x": 444, "y": 258 }, { "x": 448, "y": 227 }, { "x": 215, "y": 221 }, { "x": 226, "y": 203 }, { "x": 247, "y": 195 }, { "x": 269, "y": 198 }, { "x": 291, "y": 204 }, { "x": 336, "y": 201 }, { "x": 360, "y": 193 }, { "x": 385, "y": 190 }, { "x": 408, "y": 198 }, { "x": 423, "y": 216 }, { "x": 314, "y": 231 }, { "x": 314, "y": 247 }, { "x": 314, "y": 263 }, { "x": 314, "y": 279 }, { "x": 294, "y": 297 }, { "x": 304, "y": 300 }, { "x": 315, "y": 302 }, { "x": 327, "y": 299 }, { "x": 338, "y": 297 }, { "x": 242, "y": 236 }, { "x": 254, "y": 226 }, { "x": 271, "y": 226 }, { "x": 284, "y": 240 }, { "x": 270, "y": 245 }, { "x": 253, "y": 244 }, { "x": 349, "y": 238 }, { "x": 362, "y": 224 }, { "x": 379, "y": 224 }, { "x": 393, "y": 234 }, { "x": 381, "y": 242 }, { "x": 363, "y": 242 }, { "x": 281, "y": 332 }, { "x": 294, "y": 327 }, { "x": 306, "y": 322 }, { "x": 315, "y": 325 }, { "x": 325, "y": 323 }, { "x": 340, "y": 328 }, { "x": 357, "y": 335 }, { "x": 341, "y": 347 }, { "x": 327, "y": 354 }, { "x": 317, "y": 354 }, { "x": 306, "y": 353 }, { "x": 294, "y": 347 }, { "x": 289, "y": 333 }, { "x": 306, "y": 331 }, { "x": 316, "y": 332 }, { "x": 325, "y": 331 }, { "x": 349, "y": 334 }, { "x": 326, "y": 339 }, { "x": 316, "y": 340 }, { "x": 306, "y": 339 } ] },
"clothingcolors": [ ],
"ethnicity": { "african": 83, "asian": 0, "caucasian": 12, "hispanic": 3 },
"emotions": { "happiness": 1, "surprise": 5, "anger": 2, "disgust": 2, "fear": 1, "sadness": 11 }
}
]
}'''
import json
from pandas.io.json import json_normalize
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
jsonObj = json.loads(jsonStr)
flat = flatten_json(jsonObj)
df = json_normalize(flat)
The output will be your 1 row, with 168 columns.

New to Python and working on it to mess with JSON files

So I'm new to phython, and I was wondering on how I could modify(adding or subtracting) each individual "x" in one go.
"Position": {
"7,-5": {
"id": "58",
"y": -5,
"x": 7
},
"2,-4": {
"id": "183",
"y": -4,
"x": 2
},
"-4,-1": {
"id": "190",
"y": -1,
"x": -4
}
I tried doing
import json
with open ('position.txt', 'r+') as f:
position_data = json.load(f)
position_data['Position']['x'] = +1
TypeError: list indices must be integers or slices, not str
This is what I want
"Position": {
"7,-5": {
"id": "58",
"y": -5,
"x": 8
},
"2,-4": {
"id": "183",
"y": -4,
"x": 3
},
"-4,-1": {
"id": "190",
"y": -1,
"x": -3
}
I'm not sure on how to go from here. Please advice.
you could do something like this:
for key in position_data['Position'].keys():
position_data['Position'][key]['x'] += 1
for value in position_data['Position'].values():
value['x'] += 1
Use itervalues in Python 2 for better efficiency.
Demo (since I got downvoted without explanation):
from pprint import pprint
position_data = {
"Position": {
"7,-5": {
"id": "58",
"y": -5,
"x": 7
},
"2,-4": {
"id": "183",
"y": -4,
"x": 2
},
"-4,-1": {
"id": "190",
"y": -1,
"x": -4
}
}
}
pprint(position_data)
for value in position_data['Position'].values():
value['x'] += 1
pprint(position_data)
Output:
{'Position': {'-4,-1': {'id': '190', 'x': -4, 'y': -1},
'2,-4': {'id': '183', 'x': 2, 'y': -4},
'7,-5': {'id': '58', 'x': 7, 'y': -5}}}
{'Position': {'-4,-1': {'id': '190', 'x': -3, 'y': -1},
'2,-4': {'id': '183', 'x': 3, 'y': -4},
'7,-5': {'id': '58', 'x': 8, 'y': -5}}}

Illegal_argument_exception when importing Twitter into Elasticsearch

I am new to Elasticsearch and am attempting to do some data analysis of Twitter data by importing it into Elasticsearch and running Kibana on it. I'm getting stuck when importing Twitter data into Elasticsearch. Any help is appreciated!
Here's a sample working program that produces the error.
import json
from elasticsearch import Elasticsearch
es = Elasticsearch()
data = json.loads(open("data.json").read())
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)
Here's the error:
Traceback (most recent call last):
File "elasticsearch_import_test.py", line 5, in <module>
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/client/utils.py", line 69, in _wrapped
return func(*args, params=params, **kwargs)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/client/__init__.py", line 279, in index
_make_path(index, doc_type, id), params=params, body=body)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/transport.py", line 329, in perform_request
status, headers, data = connection.perform_request(method, url, params, body, ignore=ignore, timeout=timeout)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/connection/http_urllib3.py", line 109, in perform_request
self._raise_error(response.status, raw_data)
File "/usr/local/lib/python2.7/site-packages/elasticsearch/connection/base.py", line 108, in _raise_error
raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
elasticsearch.exceptions.RequestError: TransportError(400, u'illegal_argument_exception', u'[Raza][127.0.0.1:9300][indices:data/write/index[p]]')
Here's an example Twitter JSON file (data.json)
{
"_id": {
"$oid": "570597358c68d71c16b3b722"
},
"contributors": null,
"coordinates": null,
"created_at": "Wed Apr 06 23:09:41 +0000 2016",
"entities": {
"hashtags": [
{
"indices": [
68,
72
],
"text": "dnd"
},
{
"indices": [
73,
79
],
"text": "Nat20"
},
{
"indices": [
80,
93
],
"text": "CriticalRole"
},
{
"indices": [
94,
103
],
"text": "d20babes"
}
],
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
],
"symbols": [],
"urls": [
{
"display_url": "darkcastlecollectibles.com",
"expanded_url": "http://www.darkcastlecollectibles.com/",
"indices": [
44,
67
],
"url": "https://shortened.url/SJgFTE0o8h"
}
],
"user_mentions": [
{
"id": 2375847847,
"id_str": "2375847847",
"indices": [
3,
19
],
"name": "Zack Chini",
"screen_name": "Zenttsilverwing"
}
]
},
"extended_entities": {
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
},
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953295727009793,
"id_str": "715953295727009793",
"indices": [
104,
127
],
"media_url": "http://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"source_status_id": 715953298076012545,
"source_status_id_str": "715953298076012545",
"source_user_id": 2375847847,
"source_user_id_str": "2375847847",
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
]
},
"favorite_count": 0,
"favorited": false,
"filter_level": "low",
"geo": null,
"id": 717851801417031680,
"id_str": "717851801417031680",
"in_reply_to_screen_name": null,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"is_quote_status": false,
"lang": "en",
"place": null,
"possibly_sensitive": false,
"retweet_count": 0,
"retweeted": false,
"retweeted_status": {
"contributors": null,
"coordinates": null,
"created_at": "Fri Apr 01 17:25:42 +0000 2016",
"entities": {
"hashtags": [
{
"indices": [
47,
51
],
"text": "dnd"
},
{
"indices": [
52,
58
],
"text": "Nat20"
},
{
"indices": [
59,
72
],
"text": "CriticalRole"
},
{
"indices": [
73,
82
],
"text": "d20babes"
}
],
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
],
"symbols": [],
"urls": [
{
"display_url": "darkcastlecollectibles.com",
"expanded_url": "http://www.darkcastlecollectibles.com/",
"indices": [
23,
46
],
"url": "https://shortened.url/SJgFTE0o8h"
}
],
"user_mentions": []
},
"extended_entities": {
"media": [
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953292849754112,
"id_str": "715953292849754112",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TugAUsAASZht.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
},
{
"display_url": "pic.twitter.com/YQoxEuEAXV",
"expanded_url": "http://twitter.com/Zenttsilverwing/status/715953298076012545/photo/1",
"id": 715953295727009793,
"id_str": "715953295727009793",
"indices": [
83,
106
],
"media_url": "http://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"media_url_https": "https://pbs.twimg.com/media/Ce-TuquUIAEsVn9.jpg",
"sizes": {
"large": {
"h": 768,
"resize": "fit",
"w": 1024
},
"medium": {
"h": 450,
"resize": "fit",
"w": 600
},
"small": {
"h": 255,
"resize": "fit",
"w": 340
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": "https://shortened.url/YQoxEuEAXV"
}
]
},
"favorite_count": 5,
"favorited": false,
"filter_level": "low",
"geo": null,
"id": 715953298076012545,
"id_str": "715953298076012545",
"in_reply_to_screen_name": null,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"is_quote_status": false,
"lang": "en",
"place": null,
"possibly_sensitive": false,
"retweet_count": 1,
"retweeted": false,
"source": "Twitter Web Client",
"text": "coins came in!! Thanks https://shortened.url/SJgFTE0o8h #dnd #Nat20 #CriticalRole #d20babes https://shortened.url/YQoxEuEAXV",
"truncated": false,
"user": {
"contributors_enabled": false,
"created_at": "Thu Mar 06 19:59:14 +0000 2014",
"default_profile": true,
"default_profile_image": false,
"description": "DM Geek Critter Con-man. I am here to like your art ^.^",
"favourites_count": 4990,
"follow_request_sent": null,
"followers_count": 57,
"following": null,
"friends_count": 183,
"geo_enabled": false,
"id": 2375847847,
"id_str": "2375847847",
"is_translator": false,
"lang": "en",
"listed_count": 7,
"location": "Flower Mound, TX",
"name": "Zack Chini",
"notifications": null,
"profile_background_color": "C0DEED",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_tile": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2375847847/1430928759",
"profile_image_url": "http://pbs.twimg.com/profile_images/708816622358663168/mNF4Ysr5_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/708816622358663168/mNF4Ysr5_normal.jpg",
"profile_link_color": "0084B4",
"profile_sidebar_border_color": "C0DEED",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"protected": false,
"screen_name": "Zenttsilverwing",
"statuses_count": 551,
"time_zone": null,
"url": null,
"utc_offset": null,
"verified": false
}
},
"source": "Twitter Web Client",
"text": "RT #Zenttsilverwing: coins came in!! Thanks https://shortened.url/SJgFTE0o8h #dnd #Nat20 #CriticalRole #d20babes https://shortened.url/YQoxEuEAXV",
"timestamp_ms": "1459984181156",
"truncated": false,
"user": {
"contributors_enabled": false,
"created_at": "Tue Feb 10 04:31:18 +0000 2009",
"default_profile": false,
"default_profile_image": false,
"description": "I use Twitter to primarily retweet Critter artwork of Critical Role and their own creations. I maintain a list of all the Critter artists I've come across.",
"favourites_count": 17586,
"follow_request_sent": null,
"followers_count": 318,
"following": null,
"friends_count": 651,
"geo_enabled": true,
"id": 20491914,
"id_str": "20491914",
"is_translator": false,
"lang": "en",
"listed_count": 33,
"location": "SanDiego, CA",
"name": "UnknownOutrider",
"notifications": null,
"profile_background_color": "EDECE9",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme3/bg.gif",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme3/bg.gif",
"profile_background_tile": false,
"profile_image_url": "http://pbs.twimg.com/profile_images/224346493/cartoon_dragon_tattoo_designs_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/224346493/cartoon_dragon_tattoo_designs_normal.jpg",
"profile_link_color": "088253",
"profile_sidebar_border_color": "D3D2CF",
"profile_sidebar_fill_color": "E3E2DE",
"profile_text_color": "634047",
"profile_use_background_image": true,
"protected": false,
"screen_name": "UnknownOutrider",
"statuses_count": 12760,
"time_zone": "Pacific Time (US & Canada)",
"url": null,
"utc_offset": -25200,
"verified": false
}
}
The reason that don't work is that you are trying to index document with a field named _id which is already exist as a default field. So delete that field or change field name:
import json
from elasticsearch import Elasticsearch
es = Elasticsearch()
data = json.loads(open("data.json").read())
# data['id_'] = data['_id'] <= You can change _id as id_
del data['_id']
es.index(index='tweets5', doc_type='tweets', id=data['id'], body=data)

Categories

Resources