I have a jsonl file with content
How to read a file and change the number after the label sign to a random number 0 or 1 and save the converted file back in python
{"idx": 0, "passage": {"questions": [{"idx": 0, "answers": [{"idx": 0, "label": 0}, {"idx": 1, "label": 0}, {"idx": 2, "label": 0}, {"idx": 3, "label": 0}]}, {"idx": 1, "answers": [{"idx": 4, "label": 0}, {"idx": 5, "label": 0}, {"idx": 6, "label": 0}, {"idx": 7, "label": 0}]}, {"idx": 2, "answers": [{"idx": 8, "label": 1}, {"idx": 9, "label": 0}, {"idx": 10, "label": 0}, {"idx": 11, "label": 0}]}, {"idx": 3, "answers": [{"idx": 12, "label": 0}, {"idx": 13, "label": 0}, {"idx": 14, "label": 0}, {"idx": 15, "label": 0}]}, {"idx": 4, "answers": [{"idx": 16, "label": 0}, {"idx": 17, "label": 0}, {"idx": 18, "label": 0}, {"idx": 19, "label": 0}, {"idx": 20, "label": 0}]}, {"idx": 5, "answers": [{"idx": 21, "label": 0}, {"idx": 22, "label": 0}, {"idx": 23, "label": 0}, {"idx": 24, "label": 0}, {"idx": 25, "label": 0}]}, {"idx": 6, "answers": [{"idx": 26, "label": 0}, {"idx": 27, "label": 0}, {"idx": 28, "label": 0}, {"idx": 29, "label": 0}, {"idx": 30, "label": 0}]}, {"idx": 7, "answers": [{"idx": 31, "label": 0}, {"idx": 32, "label": 0}, {"idx": 33, "label": 0}, {"idx": 34, "label": 0}, {"idx": 35, "label": 0}]}, {"idx": 8, "answers": [{"idx": 36, "label": 0}, {"idx": 37, "label": 0}, {"idx": 38, "label": 0}, {"idx": 39, "label": 0}, {"idx": 40, "label": 0}]}, {"idx": 9, "answers": [{"idx": 41, "label": 0}, {"idx": 42, "label": 0}, {"idx": 43, "label": 0}, {"idx": 44, "label": 0}, {"idx": 45, "label": 0}]}]}}
{"idx": 1, "passage": {"questions": [{"idx": 10, "answers": [{"idx": 46, "label": 0}, {"idx": 47, "label": 0}, {"idx": 48, "label": 0}, {"idx": 49, "label": 0}, {"idx": 50, "label": 0}]}, {"idx": 11, "answers": [{"idx": 51, "label": 0}, {"idx": 52, "label": 0}, {"idx": 53, "label": 0}, {"idx": 54, "label": 0}, {"idx": 55, "label": 0}]}]}}
{"idx": 2, "passage": {"questions": [{"idx": 12, "answers": [{"idx": 56, "label": 0}, {"idx": 57, "label": 0}, {"idx": 58, "label": 0}, {"idx": 59, "label": 0}, {"idx": 60, "label": 0}]}, {"idx": 13, "answers": [{"idx": 61, "label": 0}, {"idx": 62, "label": 0}, {"idx": 63, "label": 0}, {"idx": 64, "label": 0}, {"idx": 65, "label": 0}]}, {"idx": 14, "answers": [{"idx": 66, "label": 0}, {"idx": 67, "label": 0}, {"idx": 68, "label": 0}, {"idx": 69, "label": 0}, {"idx": 70, "label": 0}]}, {"idx": 15, "answers": [{"idx": 71, "label": 0}, {"idx": 72, "label": 0}, {"idx": 73, "label": 0}, {"idx": 74, "label": 0}, {"idx": 75, "label": 0}]}, {"idx": 16, "answers": [{"idx": 76, "label": 0}, {"idx": 77, "label": 0}, {"idx": 78, "label": 0}, {"idx": 79, "label": 0}, {"idx": 80, "label": 0}]}, {"idx": 17, "answers": [{"idx": 81, "label": 0}, {"idx": 82, "label": 0}, {"idx": 83, "label": 0}, {"idx": 84, "label": 0}, {"idx": 85, "label": 0}]}, {"idx": 18, "answers": [{"idx": 86, "label": 0}, {"idx": 87, "label": 0}, {"idx": 88, "label": 0}, {"idx": 89, "label": 0}, {"idx": 90, "label": 0}]}, {"idx": 19, "answers": [{"idx": 91, "label": 0}, {"idx": 92, "label": 0}, {"idx": 93, "label": 0}, {"idx": 94, "label": 0}, {"idx": 95, "label": 0}]}]}}
{"idx": 3, "passage": {"questions": [{"idx": 20, "answers": [{"idx": 96, "label": 0}, {"idx": 97, "label": 0}, {"idx": 98, "label": 0}, {"idx": 99, "label": 0}]}, {"idx": 21, "answers": [{"idx": 100, "label": 0}, {"idx": 101, "label": 0}, {"idx": 102, "label": 0}, {"idx": 103, "label": 0}]}, {"idx": 22, "answers": [{"idx": 104, "label": 0}, {"idx": 105, "label": 0}, {"idx": 106, "label": 0}, {"idx": 107, "label": 0}]}, {"idx": 23, "answers": [{"idx": 108, "label": 0}, {"idx": 109, "label": 0}, {"idx": 110, "label": 0}, {"idx": 111, "label": 0}]}, {"idx": 24, "answers": [{"idx": 112, "label": 0}, {"idx": 113, "label": 0}, {"idx": 114, "label": 0}, {"idx": 115, "label": 0}]}, {"idx": 25, "answers": [{"idx": 116, "label": 0}, {"idx": 117, "label": 0}, {"idx": 118, "label": 0}, {"idx": 119, "label": 0}]}, {"idx": 26, "answers": [{"idx": 120, "label": 0}, {"idx": 121, "label": 0}, {"idx": 122, "label": 0}, {"idx": 123, "label": 0}]}, {"idx": 27, "answers": [{"idx": 124, "label": 0}, {"idx": 125, "label": 0}, {"idx": 126, "label": 0}, {"idx": 127, "label": 0}]}]}}
{"idx": 4, "passage": {"questions": [{"idx": 28, "answers": [{"idx": 128, "label": 1}, {"idx": 129, "label": 1}, {"idx": 130, "label": 1}, {"idx": 131, "label": 1}, {"idx": 132, "label": 1}]}, {"idx": 29, "answers": [{"idx": 133, "label": 0}, {"idx": 134, "label": 1}, {"idx": 135, "label": 1}, {"idx": 136, "label": 0}, {"idx": 137, "label": 1}]}, {"idx": 30, "answers": [{"idx": 138, "label": 0}, {"idx": 139, "label": 0}, {"idx": 140, "label": 1}, {"idx": 141, "label": 0}, {"idx": 142, "label": 0}]}, {"idx": 31, "answers": [{"idx": 143, "label": 0}, {"idx": 144, "label": 0}, {"idx": 145, "label": 0}, {"idx": 146, "label": 0}, {"idx": 147, "label": 0}]}, {"idx": 32, "answers": [{"idx": 148, "label": 0}, {"idx": 149, "label": 0}, {"idx": 150, "label": 0}, {"idx": 151, "label": 0}, {"idx": 152, "label": 0}]}, {"idx": 33, "answers": [{"idx": 153, "label": 0}, {"idx": 154, "label": 1}, {"idx": 155, "label": 1}, {"idx": 156, "label": 1}, {"idx": 157, "label": 1}]}, {"idx": 34, "answers": [{"idx": 158, "label": 0}, {"idx": 159, "label": 0}, {"idx": 160, "label": 0}, {"idx": 161, "label": 0}, {"idx": 162, "label": 0}]}, {"idx": 35, "answers": [{"idx": 163, "label": 0}, {"idx": 164, "label": 1}, {"idx": 165, "label": 1}, {"idx": 166, "label": 0}, {"idx": 167, "label": 0}]}, {"idx": 36, "answers": [{"idx": 168, "label": 0}, {"idx": 169, "label": 0}, {"idx": 170, "label": 1}, {"idx": 171, "label": 0}, {"idx": 172, "label": 0}]}, {"idx": 37, "answers": [{"idx": 173, "label": 1}, {"idx": 174, "label": 0}, {"idx": 175, "label": 0}, {"idx": 176, "label": 0}, {"idx": 177, "label": 0}]}, {"idx": 38, "answers": [{"idx": 178, "label": 0}, {"idx": 179, "label": 1}, {"idx": 180, "label": 1}, {"idx": 181, "label": 0}, {"idx": 182, "label": 1}]}, {"idx": 39, "answers": [{"idx": 183, "label": 1}, {"idx": 184, "label": 1}, {"idx": 185, "label": 1}, {"idx": 186, "label": 0}, {"idx": 187, "label": 0}]}, {"idx": 40, "answers": [{"idx": 188, "label": 0}, {"idx": 189, "label": 0}, {"idx": 190, "label": 1}, {"idx": 191, "label": 0}, {"idx": 192, "label": 0}]}, {"idx": 41, "answers": [{"idx": 193, "label": 0}, {"idx": 194, "label": 0}, {"idx": 195, "label": 0}, {"idx": 196, "label": 0}, {"idx": 197, "label": 0}]}]}}
{"idx": 5, "passage": {"questions": [{"idx": 42, "answers": [{"idx": 198, "label": 0}, {"idx": 199, "label": 0}]}, {"idx": 43, "answers": [{"idx": 200, "label": 1}, {"idx": 201, "label": 0}]}, {"idx": 44, "answers": [{"idx": 202, "label": 0}, {"idx": 203, "label": 0}, {"idx": 204, "label": 0}, {"idx": 205, "label": 0}]}, {"idx": 45, "answers": [{"idx": 206, "label": 0}, {"idx": 207, "label": 0}, {"idx": 208, "label": 0}, {"idx": 209, "label": 0}]}, {"idx": 46, "answers": [{"idx": 210, "label": 0}, {"idx": 211, "label": 0}, {"idx": 212, "label": 0}, {"idx": 213, "label": 0}]}, {"idx": 47, "answers": [{"idx": 214, "label": 0}, {"idx": 215, "label": 0}, {"idx": 216, "label": 0}, {"idx": 217, "label": 0}]}, {"idx": 48, "answers": [{"idx": 218, "label": 0}, {"idx": 219, "label": 0}, {"idx": 220, "label": 0}, {"idx": 221, "label": 0}, {"idx": 222, "label": 0}]}, {"idx": 49, "answers": [{"idx": 223, "label": 1}, {"idx": 224, "label": 0}, {"idx": 225, "label": 0}, {"idx": 226, "label": 0}, {"idx": 227, "label": 0}]}, {"idx": 50, "answers": [{"idx": 228, "label": 1}, {"idx": 229, "label": 0}, {"idx": 230, "label": 0}, {"idx": 231, "label": 0}, {"idx": 232, "label": 0}]}, {"idx": 51, "answers": [{"idx": 233, "label": 0}, {"idx": 234, "label": 0}, {"idx": 235, "label": 0}, {"idx": 236, "label": 0}, {"idx": 237, "label": 0}]}, {"idx": 52, "answers": [{"idx": 238, "label": 1}, {"idx": 239, "label": 0}, {"idx": 240, "label": 0}, {"idx": 241, "label": 0}, {"idx": 242, "label": 0}]}, {"idx": 53, "answers": [{"idx": 243, "label": 0}, {"idx": 244, "label": 0}, {"idx": 245, "label": 0}, {"idx": 246, "label": 1}, {"idx": 247, "label": 1}]}, {"idx": 54, "answers": [{"idx": 248, "label": 1}, {"idx": 249, "label": 1}, {"idx": 250, "label": 1}, {"idx": 251, "label": 1}, {"idx": 252, "label": 0}]}, {"idx": 55, "answers": [{"idx": 253, "label": 0}, {"idx": 254, "label": 0}, {"idx": 255, "label": 0}, {"idx": 256, "label": 0}, {"idx": 257, "label": 0}]}]}}
{"idx": 6, "passage": {"questions": [{"idx": 56, "answers": [{"idx": 258, "label": 1}, {"idx": 259, "label": 0}, {"idx": 260, "label": 1}, {"idx": 261, "label": 0}]}, {"idx": 57, "answers": [{"idx": 262, "label": 1}, {"idx": 263, "label": 1}, {"idx": 264, "label": 1}]}, {"idx": 58, "answers": [{"idx": 265, "label": 1}, {"idx": 266, "label": 1}, {"idx": 267, "label": 1}, {"idx": 268, "label": 1}]}, {"idx": 59, "answers": [{"idx": 269, "label": 1}, {"idx": 270, "label": 1}, {"idx": 271, "label": 1}, {"idx": 272, "label": 1}]}, {"idx": 60, "answers": [{"idx": 273, "label": 1}, {"idx": 274, "label": 1}, {"idx": 275, "label": 1}, {"idx": 276, "label": 1}]}, {"idx": 61, "answers": [{"idx": 277, "label": 1}, {"idx": 278, "label": 1}, {"idx": 279, "label": 1}, {"idx": 280, "label": 1}]}]}}
{"idx": 7, "passage": {"questions": [{"idx": 62, "answers": [{"idx": 281, "label": 0}, {"idx": 282, "label": 1}, {"idx": 283, "label": 1}, {"idx": 284, "label": 1}, {"idx": 285, "label": 0}]}, {"idx": 63, "answers": [{"idx": 286, "label": 0}, {"idx": 287, "label": 0}, {"idx": 288, "label": 0}, {"idx": 289, "label": 0}, {"idx": 290, "label": 1}]}, {"idx": 64, "answers": [{"idx": 291, "label": 0}, {"idx": 292, "label": 0}, {"idx": 293, "label": 0}, {"idx": 294, "label": 0}, {"idx": 295, "label": 0}]}, {"idx": 65, "answers": [{"idx": 296, "label": 1}, {"idx": 297, "label": 1}, {"idx": 298, "label": 1}, {"idx": 299, "label": 1}, {"idx": 300, "label": 1}]}, {"idx": 66, "answers": [{"idx": 301, "label": 1}, {"idx": 302, "label": 0}, {"idx": 303, "label": 1}, {"idx": 304, "label": 0}, {"idx": 305, "label": 1}]}, {"idx": 67, "answers": [{"idx": 306, "label": 0}, {"idx": 307, "label": 0}, {"idx": 308, "label": 0}, {"idx": 309, "label": 1}, {"idx": 310, "label": 1}]}, {"idx": 68, "answers": [{"idx": 311, "label": 0}, {"idx": 312, "label": 0}, {"idx": 313, "label": 0}, {"idx": 314, "label": 1}, {"idx": 315, "label": 0}]}, {"idx": 69, "answers": [{"idx": 316, "label": 1}, {"idx": 317, "label": 1}, {"idx": 318, "label": 1}, {"idx": 319, "label": 1}, {"idx": 320, "label": 1}]}]}}
{"idx": 8, "passage": {"questions": [{"idx": 70, "answers": [{"idx": 321, "label": 0}, {"idx": 322, "label": 0}, {"idx": 323, "label": 0}, {"idx": 324, "label": 0}]}, {"idx": 71, "answers": [{"idx": 325, "label": 1}, {"idx": 326, "label": 0}, {"idx": 327, "label": 0}, {"idx": 328, "label": 0}]}, {"idx": 72, "answers": [{"idx": 329, "label": 0}, {"idx": 330, "label": 0}, {"idx": 331, "label": 0}, {"idx": 332, "label": 0}, {"idx": 333, "label": 0}]}, {"idx": 73, "answers": [{"idx": 334, "label": 0}, {"idx": 335, "label": 0}, {"idx": 336, "label": 0}, {"idx": 337, "label": 0}, {"idx": 338, "label": 0}]}, {"idx": 74, "answers": [{"idx": 339, "label": 0}, {"idx": 340, "label": 0}, {"idx": 341, "label": 0}, {"idx": 342, "label": 1}, {"idx": 343, "label": 1}]}, {"idx": 75, "answers": [{"idx": 344, "label": 1}, {"idx": 345, "label": 1}, {"idx": 346, "label": 0}, {"idx": 347, "label": 0}, {"idx": 348, "label": 0}]}, {"idx": 76, "answers": [{"idx": 349, "label": 0}, {"idx": 350, "label": 1}, {"idx": 351, "label": 0}, {"idx": 352, "label": 0}, {"idx": 353, "label": 0}]}, {"idx": 77, "answers": [{"idx": 354, "label": 0}, {"idx": 355, "label": 0}, {"idx": 356, "label": 0}, {"idx": 357, "label": 1}, {"idx": 358, "label": 0}]}, {"idx": 78, "answers": [{"idx": 359, "label": 0}, {"idx": 360, "label": 1}, {"idx": 361, "label": 0}, {"idx": 362, "label": 0}, {"idx": 363, "label": 0}]}, {"idx": 79, "answers": [{"idx": 364, "label": 0}, {"idx": 365, "label": 0}, {"idx": 366, "label": 0}, {"idx": 367, "label": 0}, {"idx": 368, "label": 0}]}, {"idx": 80, "answers": [{"idx": 369, "label": 0}, {"idx": 370, "label": 0}, {"idx": 371, "label": 0}, {"idx": 372, "label": 0}, {"idx": 373, "label": 0}]}, {"idx": 81, "answers": [{"idx": 374, "label": 0}, {"idx": 375, "label": 0}, {"idx": 376, "label": 0}, {"idx": 377, "label": 0}, {"idx": 378, "label": 0}]}]}}
{"idx": 9, "passage": {"questions": [{"idx": 82, "answers": [{"idx": 379, "label": 0}, {"idx": 380, "label": 0}, {"idx": 381, "label": 0}, {"idx": 382, "label": 0}]}, {"idx": 83, "answers": [{"idx": 383, "label": 0}, {"idx": 384, "label": 1}, {"idx": 385, "label": 0}, {"idx": 386, "label": 0}]}, {"idx": 84, "answers": [{"idx": 387, "label": 0}, {"idx": 388, "label": 0}, {"idx": 389, "label": 0}, {"idx": 390, "label": 0}]}, {"idx": 85, "answers": [{"idx": 391, "label": 1}, {"idx": 392, "label": 0}, {"idx": 393, "label": 1}, {"idx": 394, "label": 1}]}, {"idx": 86, "answers": [{"idx": 395, "label": 0}, {"idx": 396, "label": 0}, {"idx": 397, "label": 0}, {"idx": 398, "label": 0}]}, {"idx": 87, "answers": [{"idx": 399, "label": 0}, {"idx": 400, "label": 0}, {"idx": 401, "label": 0}, {"idx": 402, "label": 1}]}, {"idx": 88, "answers": [{"idx": 403, "label": 0}, {"idx": 404, "label": 0}, {"idx": 405, "label": 1}, {"idx": 406, "label": 0}]}, {"idx": 89, "answers": [{"idx": 407, "label": 0}, {"idx": 408, "label": 0}, {"idx": 409, "label": 0}, {"idx": 410, "label": 1}]}]}}
import json
import random
# read each decoded JSON line into a list
with open('test.jsonl',encoding='utf8') as f:
data = [json.loads(line) for line in f]
# walk the structure and change the labels
for item in data:
for q in item['passage']['questions']:
for a in q['answers']:
a['label'] = random.randint(0,1)
# write each JSON line back to a new file
with open('test2.jsonl','w',encoding='utf8') as f:
for item in data:
json.dump(item,f)
print(file=f) # add a newline
You could write back to the same file, but safer to delete and rename once written successfully.
Hope this different and Fast approach will work
import json
import numpy as np
import random
def find(original, sub):
return [i+9 for i in range(len(original)) if original.startswith(sub, i)]
def split_into_parts(number, n=2):
if number % n==0:
return [int(number/2),int(number/2)]
else:
return [int(number/2),int(number/2)+1]
def get_ranodom_list(num):
o,z=split_into_parts(num,2)
ls=[0]*o+[1]*z
random.shuffle(ls)
return ls
d={"idx": 0, "passage": {"questions": [{"idx": 0, "answers": [{"idx": 0, "label": 0}, {"idx": 1, "label": 0}, {"idx": 2, "label": 0}, {"idx": 3, "label": 0}]}, {"idx": 1, "answers": [{"idx": 4, "label": 0}, {"idx": 5, "label": 0}, {"idx": 6, "label": 0}, {"idx": 7, "label": 0}]}, {"idx": 2, "answers": [{"idx": 8, "label": 1}, {"idx": 9, "label": 0}, {"idx": 10, "label": 0}, {"idx": 11, "label": 0}]}, {"idx": 3, "answers": [{"idx": 12, "label": 0}, {"idx": 13, "label": 0}, {"idx": 14, "label": 0}, {"idx": 15, "label": 0}]}, {"idx": 4, "answers": [{"idx": 16, "label": 0}, {"idx": 17, "label": 0}, {"idx": 18, "label": 0}, {"idx": 19, "label": 0}, {"idx": 20, "label": 0}]}, {"idx": 5, "answers": [{"idx": 21, "label": 0}, {"idx": 22, "label": 0}, {"idx": 23, "label": 0}, {"idx": 24, "label": 0}, {"idx": 25, "label": 0}]}, {"idx": 6, "answers": [{"idx": 26, "label": 0}, {"idx": 27, "label": 0}, {"idx": 28, "label": 0}, {"idx": 29, "label": 0}, {"idx": 30, "label": 0}]}, {"idx": 7, "answers": [{"idx": 31, "label": 0}, {"idx": 32, "label": 0}, {"idx": 33, "label": 0}, {"idx": 34, "label": 0}, {"idx": 35, "label": 0}]}, {"idx": 8, "answers": [{"idx": 36, "label": 0}, {"idx": 37, "label": 0}, {"idx": 38, "label": 0}, {"idx": 39, "label": 0}, {"idx": 40, "label": 0}]}, {"idx": 9, "answers": [{"idx": 41, "label": 0}, {"idx": 42, "label": 0}, {"idx": 43, "label": 0}, {"idx": 44, "label": 0}, {"idx": 45, "label": 0}]}]}}
original = json.dumps(d)
result=find(original,'"label": ')
num=len(result)
zo_list=get_ranodom_list(num)
temp = list(original)
counter=0
for i in range(len(original)):
if i in result:
temp[i] = str(zo_list[counter])
counter+=1
res = ''.join(temp)
print(res)
Hello I faced problem with json string to data.frame conversion.
print (json_resp.text)
{
"error_code": 0,
"description": "",
"img_size": { "w": 650, "h": 488 },
"people": [
{
"age": 22,
"gender": 84,
"mood": 29,
"position": { "x": 190, "y": 161, "w": 259, "h": 259 },
"rotation": { "yaw": -3, "pitch": 3, "roll": -1 },
"landmarks": { "lefteye": { "x": 371, "y": 233 }, "righteye": { "x": 266, "y": 236 }, "maskpoints": [ { "x": 371, "y": 233 }, { "x": 266, "y": 236 }, { "x": 203, "y": 234 }, { "x": 206, "y": 261 }, { "x": 212, "y": 287 }, { "x": 220, "y": 313 }, { "x": 233, "y": 338 }, { "x": 250, "y": 357 }, { "x": 273, "y": 373 }, { "x": 296, "y": 388 }, { "x": 321, "y": 394 }, { "x": 346, "y": 390 }, { "x": 371, "y": 377 }, { "x": 396, "y": 362 }, { "x": 416, "y": 341 }, { "x": 430, "y": 315 }, { "x": 437, "y": 287 }, { "x": 444, "y": 258 }, { "x": 448, "y": 227 }, { "x": 215, "y": 221 }, { "x": 226, "y": 203 }, { "x": 247, "y": 195 }, { "x": 269, "y": 198 }, { "x": 291, "y": 204 }, { "x": 336, "y": 201 }, { "x": 360, "y": 193 }, { "x": 385, "y": 190 }, { "x": 408, "y": 198 }, { "x": 423, "y": 216 }, { "x": 314, "y": 231 }, { "x": 314, "y": 247 }, { "x": 314, "y": 263 }, { "x": 314, "y": 279 }, { "x": 294, "y": 297 }, { "x": 304, "y": 300 }, { "x": 315, "y": 302 }, { "x": 327, "y": 299 }, { "x": 338, "y": 297 }, { "x": 242, "y": 236 }, { "x": 254, "y": 226 }, { "x": 271, "y": 226 }, { "x": 284, "y": 240 }, { "x": 270, "y": 245 }, { "x": 253, "y": 244 }, { "x": 349, "y": 238 }, { "x": 362, "y": 224 }, { "x": 379, "y": 224 }, { "x": 393, "y": 234 }, { "x": 381, "y": 242 }, { "x": 363, "y": 242 }, { "x": 281, "y": 332 }, { "x": 294, "y": 327 }, { "x": 306, "y": 322 }, { "x": 315, "y": 325 }, { "x": 325, "y": 323 }, { "x": 340, "y": 328 }, { "x": 357, "y": 335 }, { "x": 341, "y": 347 }, { "x": 327, "y": 354 }, { "x": 317, "y": 354 }, { "x": 306, "y": 353 }, { "x": 294, "y": 347 }, { "x": 289, "y": 333 }, { "x": 306, "y": 331 }, { "x": 316, "y": 332 }, { "x": 325, "y": 331 }, { "x": 349, "y": 334 }, { "x": 326, "y": 339 }, { "x": 316, "y": 340 }, { "x": 306, "y": 339 } ] },
"clothingcolors": [ ],
"ethnicity": { "african": 83, "asian": 0, "caucasian": 12, "hispanic": 3 },
"emotions": { "happiness": 1, "surprise": 5, "anger": 2, "disgust": 2, "fear": 1, "sadness": 11 }
}
]
However, when I try to change json string to data.frame I got:
import pandas as pd
df_json = pd.read_json(json_resp.text, typ='frame')
print (df_json)
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\io\json\json.py", line 427, in read_json
result = json_reader.read()
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\io\json\json.py", line 537, in read
obj = self._get_object_parser(self.data)
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\io\json\json.py", line 556, in _get_object_parser
obj = FrameParser(json, **kwargs).parse()
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\io\json\json.py", line 652, in parse
self._parse_no_numpy()
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\io\json\json.py", line 871, in _parse_no_numpy
loads(json, precise_float=self.precise_float), dtype=None)
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\core\frame.py", line 392, in __init__
mgr = init_dict(data, index, columns, dtype=dtype)
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\core\internals\construction.py", line 212, in init_dict
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\core\internals\construction.py", line 51, in arrays_to_mgr
index = extract_index(arrays)
File "C:\Users\uzytkownik\PycharmProjects\Face API\venv\lib\site-packages\pandas\core\internals\construction.py", line 320, in extract_index
raise ValueError('Mixing dicts with non-Series may lead to '
ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.
What shall I change in code to obtain simple data.frame?
json_normalize is what you would want to do. However, there are nested lists within which means it only normalizes/flattens out to first level.
I think the issue comes in with the landmarks.maskpoints as that creates 70 rows with 2 columns x, and y. So trying to create a single row, WITH something that contains 70 rows could be an issue.
You can sort of see what I mean if you just start trying to unwrap/flatten it little by little. Essentially to flatten, you want to normalize each part, then at the end merge them all together into a single row, but you can see what the issue is with the maskpoints.
jsonStr = '''
{
"error_code": 0,
"description": "",
"img_size": { "w": 650, "h": 488 },
"people": [
{
"age": 22,
"gender": 84,
"mood": 29,
"position": { "x": 190, "y": 161, "w": 259, "h": 259 },
"rotation": { "yaw": -3, "pitch": 3, "roll": -1 },
"landmarks": { "lefteye": { "x": 371, "y": 233 }, "righteye": { "x": 266, "y": 236 }, "maskpoints": [ { "x": 371, "y": 233 }, { "x": 266, "y": 236 }, { "x": 203, "y": 234 }, { "x": 206, "y": 261 }, { "x": 212, "y": 287 }, { "x": 220, "y": 313 }, { "x": 233, "y": 338 }, { "x": 250, "y": 357 }, { "x": 273, "y": 373 }, { "x": 296, "y": 388 }, { "x": 321, "y": 394 }, { "x": 346, "y": 390 }, { "x": 371, "y": 377 }, { "x": 396, "y": 362 }, { "x": 416, "y": 341 }, { "x": 430, "y": 315 }, { "x": 437, "y": 287 }, { "x": 444, "y": 258 }, { "x": 448, "y": 227 }, { "x": 215, "y": 221 }, { "x": 226, "y": 203 }, { "x": 247, "y": 195 }, { "x": 269, "y": 198 }, { "x": 291, "y": 204 }, { "x": 336, "y": 201 }, { "x": 360, "y": 193 }, { "x": 385, "y": 190 }, { "x": 408, "y": 198 }, { "x": 423, "y": 216 }, { "x": 314, "y": 231 }, { "x": 314, "y": 247 }, { "x": 314, "y": 263 }, { "x": 314, "y": 279 }, { "x": 294, "y": 297 }, { "x": 304, "y": 300 }, { "x": 315, "y": 302 }, { "x": 327, "y": 299 }, { "x": 338, "y": 297 }, { "x": 242, "y": 236 }, { "x": 254, "y": 226 }, { "x": 271, "y": 226 }, { "x": 284, "y": 240 }, { "x": 270, "y": 245 }, { "x": 253, "y": 244 }, { "x": 349, "y": 238 }, { "x": 362, "y": 224 }, { "x": 379, "y": 224 }, { "x": 393, "y": 234 }, { "x": 381, "y": 242 }, { "x": 363, "y": 242 }, { "x": 281, "y": 332 }, { "x": 294, "y": 327 }, { "x": 306, "y": 322 }, { "x": 315, "y": 325 }, { "x": 325, "y": 323 }, { "x": 340, "y": 328 }, { "x": 357, "y": 335 }, { "x": 341, "y": 347 }, { "x": 327, "y": 354 }, { "x": 317, "y": 354 }, { "x": 306, "y": 353 }, { "x": 294, "y": 347 }, { "x": 289, "y": 333 }, { "x": 306, "y": 331 }, { "x": 316, "y": 332 }, { "x": 325, "y": 331 }, { "x": 349, "y": 334 }, { "x": 326, "y": 339 }, { "x": 316, "y": 340 }, { "x": 306, "y": 339 } ] },
"clothingcolors": [ ],
"ethnicity": { "african": 83, "asian": 0, "caucasian": 12, "hispanic": 3 },
"emotions": { "happiness": 1, "surprise": 5, "anger": 2, "disgust": 2, "fear": 1, "sadness": 11 }
}
]
}'''
import json
from pandas.io.json import json_normalize
jsonObj = json.loads(jsonStr)
# flatten at 1st level. But still nested lists/dictionaries in column `people`
df_a = json_normalize(jsonObj)
# so flatten out people, and you'll see clothingcolors still has a list and landmarks too
df_people = json_normalize(jsonObj['people'])
df_clothingcolors = json_normalize(jsonObj['people'][0]['clothingcolors'])
df_landmarks = json_normalize(jsonObj['people'][0]['landmarks'])
# the landmarks column still need to flatten maskpoints...but maskpoints produces 70 rows, and there's your issue
df_maskpoints = json_normalize(jsonObj['people'][0]['landmarks']['maskpoints'])
So if you look at the shape of these:
print (df_a.shape)
(1, 5)
print (df_people.shape)
(1, 26)
print (df_clothingcolors.shape)
(0, 0)
print (df_landmarks.shape)
(1, 5)
print (df_maskpoints.shape)
(70, 2)
...you see the maskpoints shape is 70 rows.
BUT,
I found this blog useful. Essentially it unwraps all those nested lists so that you end up with 1 big flat table.
jsonStr = '''
{
"error_code": 0,
"description": "",
"img_size": { "w": 650, "h": 488 },
"people": [
{
"age": 22,
"gender": 84,
"mood": 29,
"position": { "x": 190, "y": 161, "w": 259, "h": 259 },
"rotation": { "yaw": -3, "pitch": 3, "roll": -1 },
"landmarks": { "lefteye": { "x": 371, "y": 233 }, "righteye": { "x": 266, "y": 236 }, "maskpoints": [ { "x": 371, "y": 233 }, { "x": 266, "y": 236 }, { "x": 203, "y": 234 }, { "x": 206, "y": 261 }, { "x": 212, "y": 287 }, { "x": 220, "y": 313 }, { "x": 233, "y": 338 }, { "x": 250, "y": 357 }, { "x": 273, "y": 373 }, { "x": 296, "y": 388 }, { "x": 321, "y": 394 }, { "x": 346, "y": 390 }, { "x": 371, "y": 377 }, { "x": 396, "y": 362 }, { "x": 416, "y": 341 }, { "x": 430, "y": 315 }, { "x": 437, "y": 287 }, { "x": 444, "y": 258 }, { "x": 448, "y": 227 }, { "x": 215, "y": 221 }, { "x": 226, "y": 203 }, { "x": 247, "y": 195 }, { "x": 269, "y": 198 }, { "x": 291, "y": 204 }, { "x": 336, "y": 201 }, { "x": 360, "y": 193 }, { "x": 385, "y": 190 }, { "x": 408, "y": 198 }, { "x": 423, "y": 216 }, { "x": 314, "y": 231 }, { "x": 314, "y": 247 }, { "x": 314, "y": 263 }, { "x": 314, "y": 279 }, { "x": 294, "y": 297 }, { "x": 304, "y": 300 }, { "x": 315, "y": 302 }, { "x": 327, "y": 299 }, { "x": 338, "y": 297 }, { "x": 242, "y": 236 }, { "x": 254, "y": 226 }, { "x": 271, "y": 226 }, { "x": 284, "y": 240 }, { "x": 270, "y": 245 }, { "x": 253, "y": 244 }, { "x": 349, "y": 238 }, { "x": 362, "y": 224 }, { "x": 379, "y": 224 }, { "x": 393, "y": 234 }, { "x": 381, "y": 242 }, { "x": 363, "y": 242 }, { "x": 281, "y": 332 }, { "x": 294, "y": 327 }, { "x": 306, "y": 322 }, { "x": 315, "y": 325 }, { "x": 325, "y": 323 }, { "x": 340, "y": 328 }, { "x": 357, "y": 335 }, { "x": 341, "y": 347 }, { "x": 327, "y": 354 }, { "x": 317, "y": 354 }, { "x": 306, "y": 353 }, { "x": 294, "y": 347 }, { "x": 289, "y": 333 }, { "x": 306, "y": 331 }, { "x": 316, "y": 332 }, { "x": 325, "y": 331 }, { "x": 349, "y": 334 }, { "x": 326, "y": 339 }, { "x": 316, "y": 340 }, { "x": 306, "y": 339 } ] },
"clothingcolors": [ ],
"ethnicity": { "african": 83, "asian": 0, "caucasian": 12, "hispanic": 3 },
"emotions": { "happiness": 1, "surprise": 5, "anger": 2, "disgust": 2, "fear": 1, "sadness": 11 }
}
]
}'''
import json
from pandas.io.json import json_normalize
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
jsonObj = json.loads(jsonStr)
flat = flatten_json(jsonObj)
df = json_normalize(flat)
The output will be your 1 row, with 168 columns.
I have got json object after connection via Python to F.A.C.E API and analysis of one of face image.
I am newbie in python and my question is:
How to transform json object into data.frame and then into .xls/.csv?
print ('Response : ', json_resp.text)
#Console print:
Response : {
"error_code": 0,
"description": "",
"img_size": { "w": 650, "h": 488 },
"people": [
{
"age": 22,
"gender": 84,
"mood": 29,
"position": { "x": 190, "y": 161, "w": 259, "h": 259 },
"rotation": { "yaw": -3, "pitch": 3, "roll": -1 },
"landmarks": { "lefteye": { "x": 371, "y": 233 }, "righteye": { "x": 266, "y": 236 }, "maskpoints": [ { "x": 371, "y": 233 }, { "x": 266, "y": 236 }, { "x": 203, "y": 234 }, { "x": 206, "y": 261 }, { "x": 212, "y": 287 }, { "x": 220, "y": 313 }, { "x": 233, "y": 338 }, { "x": 250, "y": 357 }, { "x": 273, "y": 373 }, { "x": 296, "y": 388 }, { "x": 321, "y": 394 }, { "x": 346, "y": 390 }, { "x": 371, "y": 377 }, { "x": 396, "y": 362 }, { "x": 416, "y": 341 }, { "x": 430, "y": 315 }, { "x": 437, "y": 287 }, { "x": 444, "y": 258 }, { "x": 448, "y": 227 }, { "x": 215, "y": 221 }, { "x": 226, "y": 203 }, { "x": 247, "y": 195 }, { "x": 269, "y": 198 }, { "x": 291, "y": 204 }, { "x": 336, "y": 201 }, { "x": 360, "y": 193 }, { "x": 385, "y": 190 }, { "x": 408, "y": 198 }, { "x": 423, "y": 216 }, { "x": 314, "y": 231 }, { "x": 314, "y": 247 }, { "x": 314, "y": 263 }, { "x": 314, "y": 279 }, { "x": 294, "y": 297 }, { "x": 304, "y": 300 }, { "x": 315, "y": 302 }, { "x": 327, "y": 299 }, { "x": 338, "y": 297 }, { "x": 242, "y": 236 }, { "x": 254, "y": 226 }, { "x": 271, "y": 226 }, { "x": 284, "y": 240 }, { "x": 270, "y": 245 }, { "x": 253, "y": 244 }, { "x": 349, "y": 238 }, { "x": 362, "y": 224 }, { "x": 379, "y": 224 }, { "x": 393, "y": 234 }, { "x": 381, "y": 242 }, { "x": 363, "y": 242 }, { "x": 281, "y": 332 }, { "x": 294, "y": 327 }, { "x": 306, "y": 322 }, { "x": 315, "y": 325 }, { "x": 325, "y": 323 }, { "x": 340, "y": 328 }, { "x": 357, "y": 335 }, { "x": 341, "y": 347 }, { "x": 327, "y": 354 }, { "x": 317, "y": 354 }, { "x": 306, "y": 353 }, { "x": 294, "y": 347 }, { "x": 289, "y": 333 }, { "x": 306, "y": 331 }, { "x": 316, "y": 332 }, { "x": 325, "y": 331 }, { "x": 349, "y": 334 }, { "x": 326, "y": 339 }, { "x": 316, "y": 340 }, { "x": 306, "y": 339 } ] },
"clothingcolors": [ ],
"ethnicity": { "african": 83, "asian": 0, "caucasian": 12, "hispanic": 3 },
"emotions": { "happiness": 1, "surprise": 5, "anger": 2, "disgust": 2, "fear": 1, "sadness": 11 }
}
]
}
I would like to obtain data frame like:
error_code, age, gender, mood, (...), emotions.sadness
0 0, 0, 84, 29, (...), 11
and then
.xls or .csv in "C://Users"
You could use the methods pandas.read_json and Dataframe.to_csv:
import pandas
df = pandas.read_json(json_resp.tex, typ='frame')
df.to_csv("path_to_your_file.csv")