Reading a Json response recursively with python - python

I'm trying to print all the "keys, values" from a json response without knowing the keys names (without using the syntax json['example'], for example). I'm doing this with a recursively function that uses iteritems(), but I'm having some problems:
This is the Json response that I'm trying to read:
{"servers": [{"id": "a059eccb-d929-43b2-8db3-b32b6201d60f", "links": [{"href": "http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "self"}, {"href": "http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "bookmark"}], "name": "birk"}]}
This is the funcion that I'm using:
def format_main_response(self, json_string):
print "json: " + json_string
content = json.loads(str(json_string))
for key, value in content.iteritems():
print key
if type(value) == type(['']):
strg = str(json.dumps(value))
strg = strg.strip('[]')
self.format_main_response(strg)
else:
print value
I'm using the strip function to take out all the '[ ]' from my json string. If I didn't do that I got an error when trying to load it using 'json.loads()' function.
Traceback (most recent call last):
File "main.py", line 135, in <module>
formatter.format_main_response(nova_API.list_servers())
File "/home/python/jsonformatter.py", line 51, in format_main_response
self.format_main_response(strg, mod)
File "/home/python/jsonformatter.py", line 51, in format_main_response
self.format_main_response(strg, mod)
File "/home/python/jsonformatter.py", line 31, in format_main_response
for key, value in content.iteritems():
AttributeError: 'list' object has no attribute 'iteritems'
My problem is that in some point the json that should be printed looks like this, without the '[ ]':
{"href": "http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "self"}, {"href": "http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "bookmark"}
When the function tries to find the 'key,value' from this json, I got this error:
Traceback (most recent call last): File "main.py", line 135, in <module>
formatter.format_main_response(nova_API.list_servers())
File "/home/python/jsonformatter.py", line 34, in format_main_response
self.format_main_response(strg)
File "/home/python/jsonformatter.py", line 34, in format_main_response
self.format_main_response(strg)
File "/home/python/jsonformatter.py", line 28, in format_main_response
content = json.loads(str(json_string))
File "/usr/lib/python2.7/json/__init__.py", line 326, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 369, in decode
raise ValueError(errmsg("Extra data", s, end, len(s)))
ValueError: Extra data: line 1 column 135 - line 1 column 273 (char 135 - 273)
What should I do in this case? Or any other way to get the same result?

Use that:
def format_main_response(json_string):
print "json: " + json_string
content = json.loads(str(json_string))
for key, value in content.iteritems():
print key
if type(value) == type(['']):
for sub_value in value:
strg = str(json.dumps(sub_value))
format_main_response(strg)
else:
print value
That's the result:
~$ python test_pdb.py
json: {"servers": [{"id": "a059eccb-d929-43b2-8db3-b32b6201d60f", "links": [{"href": "http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "self"}, {"href": "http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "bookmark"}], "name": "birk"}]}
servers
json: {"id": "a059eccb-d929-43b2-8db3-b32b6201d60f", "links": [{"href": "http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "self"}, {"href": "http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "bookmark"}], "name": "birk"}
id
a059eccb-d929-43b2-8db3-b32b6201d60f
links
json: {"href": "http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "self"}
href
http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f
rel
self
json: {"href": "http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "bookmark"}
href
http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f
rel
bookmark
name
birk

How about:
jsonStr = {"href": "http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "self"}, {"href": "http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "bookmark"}
print json.dumps(jsonStr, sort_keys=True, indent=2, separators=(',', ': '))
This should give you the format you want

Code below recursively traverses the json response and prints the key,value pairs:
Trick is to load json response only once in the main and then recursively traverse the response:
def parse_json_response(content):
if len (content.keys()) > 1 :
for key, value in content.iteritems():
print "key : ", key
print "Value", value
if type(value) is dict:
parse_json_response(value)
else:
print value
if __name__ == '__main__':
content = json.loads(str(response))
parse_json_response(content)
Hope it helps.

Related

Json count unique values

I have this json file:
[
{
"#timestamp": "",
"userID": "",
"destinationUserName": "",
"message": ": 12,050",
"name": "Purge Client Events"
},
{
"#timestamp": "",
"userID": "",
"destinationUserName": "",
"message": "",
"name": ""
},
{
"#timestamp": "",
"userID": "",
"destinationUserName": "",
"message": "",
"name": ""
},
{
"#timestamp": "",
"userID": "",
"name": "",
"sourceUserName": "",
"deviceAction": ""
}
]
I am looking for a solution in which I can loop over all the file, and count the unique values for UserID and return that value printed.
I found different solution but non of them worked for me and I am completely stuck.
So far this is my code, its just a formatter that convert the file into a json format.
After that I tried to check the length of the file and loop over it appending unique elements.
import json
to_queue = []
def structure_json():
with open("file.json", "r+") as f:
old = f.read()
f.seek(0) # rewind
# save to the old string after replace
new = old.replace('}{', '},{')
f.write(new)
tmps = '[' + str(new) + ']'
json_string = json.loads(tmps)
for key in json_string:
to_queue.append(key)
f.close
with open('update_2.json', 'w') as file:
json.dump(json_string, file, indent=2)
size=len(file["UserID"])
uniqueNames = [];
for i in range(0,size,1):
if(file["UserID"] not in uniqueNames):
uniqueNames.append(file["UserID"]);
print(uniqueNames)
structure_json()
print(to_queue)
But I get the following error:
Traceback (most recent call last):
File "format.py", line 24, in <module>
structure_json()
File "format.py", line 17, in structure_json
size=len(file["UserID"])
TypeError: '_io.TextIOWrapper' object is not subscriptable
Please any help will be much appreciated. Thank you so much for any help, and if you need any more info just let me know
Open the file and load the content. Then you can iterate over list of dicts and crate set of all values for key userID. Note, if any missing key it will yield None and will affect the count (+1).
import json
with open('your_file.json') as f:
data = json.load(f)
users = set(item.get('userID') for item in data)
print(len(users))
print(users)

Read complex json file in python

Actual Json is:
{
"title1": {
"titleID": "1234",
"titlename": "a-b-c",
},
"title2": [
{
"block": "0.0.0.0/26",
"abc_id": "abc-0123",
"tags": [{ "key": "Name", "value": "abc-name"},
{ "key": "env", "value": "dev"}]
},
{
"block": "1.2.0.0/26",
"abc_id": "abc-4567"
},
{
"block": "0.0.0.0/26",
"abc_id": "abc-8999",
"tags": [{ "key": "Name", "value": "xyz-name"}]
},
{
"block": "0.0.0.0/26",
"abc_id": "abc-7766",
"tags": [{ "app": "Name", "value": "web-app"}]
}
]
}
My Code is
with open('/tmp/temp.json') as access_json:
read_content = json.load(access_json)
for key1, value1 in read_content.items():
if key1 == "title1":
title_id = value1['titleID']
if key1 == "title2":
title2_access = read_content['title2']
for title2_data in title2_access:
for key2, value2 in title2_data.items():
if key2 == "abc_id":
abc_id = value2
if key2 == "tags":
tags_access = read_content['tags']
for tags_data in tags_access:
for key3, value3 in tags_data.items():
if key3 == "Name":
abc_name = value3
and the error is:
Traceback (most recent call last):
File "/tmp/runscript.py", line 123, in <module>
runpy.run_path(temp_file_path, run_name='__main__')
File "/usr/local/lib/python3.6/runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "/usr/local/lib/python3.6/runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "/usr/local/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/tmp/glue-python-scripts-lw031e0z/tsf_dev.py", line 160, in <module>
KeyError: 'tags'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/tmp/runscript.py", line 142, in <module>
raise e_type(e_value).with_traceback(new_stack)
File "/tmp/glue-python-scripts-lw031e0z/tsf_dev.py", line 160, in <module>
KeyError: KeyError('tags',)
Reason: All the items in the title2 dict will not contain "tags". so, if there is no 'tags' or the tags['name'], then the abc_name = ''
i need the list of lists
(titleID, abc_id, abc_name).
Expected output :
['1234','abc-0123','abc-name']
['1234','abc-4567','']
['1234','abc-8999','xyz-name']
['1234','abc-7766','']
There is a dictionary of "title2",
and it contains abc_id and few items contains "tags" as well.
If there is no tags, then the abc-name should be ''.
If there is no Key: "name", then the abc-name should be ''.
If there is tags and the key: "name" in the dict, then the abc-name should be the value present in the title2[tags][value: ""] where title2[tags][key is "name"]
You have too many if statements and for-loops to properly handle your code. Use the default option for the dictionary get method to handle the cases where the data doesn't exist like shown below.
title_id = read_content.get('title1', {}).get('titleID', '')
for block in read_content['title2']:
id_ = block.get('abc_id', '')
tags = block.get('tags', [{}])
for tag in tags:
if tag.get('key', '') == 'Name':
name = tag.get('value', '')
else:
name = ''
vals = [title_id, id_, name]
print(vals)
['1234', 'abc-0123', 'abc-name']
['1234', 'abc-0123', '']
['1234', 'abc-4567', '']
['1234', 'abc-8999', 'xyz-name']
['1234', 'abc-7766', '']

how to open json file in python

I am stuck here again... I have a file named "data.json" and I want to open it with python but I am getting errors.
import json
>>> data=json.load(open("data.json"))
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Users\Angel\AppData\Local\Programs\Python\Python38-32\lib\json\__init__.py", line 293, in load
return loads(fp.read(),
File "C:\Users\Angel\AppData\Local\Programs\Python\Python38-32\lib\json\__init__.py", line 357, in loads
return _default_decoder.decode(s)
File "C:\Users\Angel\AppData\Local\Programs\Python\Python38-32\lib\json\decoder.py", line 340,
in decode
raise JSONDecodeError("Extra data", s, end)
json.decoder.JSONDecodeError: Extra data: line 2 column 1 (char 4912995)
>>>
According to Python JSON documentation
If the data being deserialized is not a valid JSON document, a JSONDecodeError will be raised.
Not knowing the content of your file, it is hard to say what is wrong, but I would suspect that text in your file is not a valid JSON object, or more likely (according to "Extra data" search, answered here) the file "data.json" includes more than one JSON object.
For example, using your code:
This file works correctly
{ "name":"John", "age":30, "car":null }
but this one
{ "name":"John", "age":30, "car":null }
{ "name":"John", "age":30, "car":null }
throws the same errors
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Users\a\AppData\Local\Programs\Python\Python37-32\lib\json\__init__.py",
line 296, in load
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
File "C:\Users\a\AppData\Local\Programs\Python\Python37-32\lib\json\__init__.py",
line 348, in loads
return _default_decoder.decode(s)
File "C:\Users\a\AppData\Local\Programs\Python\Python37-32\lib\json\decoder.py",
line 340, in decode
raise JSONDecodeError("Extra data", s, end)
json.decoder.JSONDecodeError: Extra data: line 6 column 1 (char 55)
In case 2 or more than 2 record, you have to reformat your file as mentioned below OR you have to load file record by record.
You need to reformat your json to contain an array like below:
{
"foo" : [
{"name": "XYZ", "address": "54.7168,94.0215", "country_of_residence": "PQR", "countries": "LMN;PQRST", "date": "28-AUG-2008", "type": null},
{"name": "OLMS", "address": null, "country_of_residence": null, "countries": "Not identified;No", "date": "23-FEB-2017", "type": null}
]
}

Expected object or Value while read the .json file in Python

I am trying to read the .json file in python.
Here is my python code:
import pandas as pd
df_idf = pd.read_json('/home/lazzydevs/Data/datajs.json',lines = True)
print("Schema:\n\n",df_idf.dtypes)
print("Number of questions,columns=",df_idf.shape)
I checked my json file also it's also valid file.
Here is my .json file:
[{
"id": "4821394",
"title": "Serializing a private struct - Can it be done?",
"body": "\u003cp\u003eI have a public class that contains a private struct. The struct contains properties (mostly string) that I want to serialize. When I attempt to serialize the struct and stream it to disk, using XmlSerializer, I get an error saying only public types can be serialized. I don't need, and don't want, this struct to be public. Is there a way I can serialize it and keep it private?\u003c/p\u003e",
"answer_count": "1",
"comment_count": "0",
"creation_date": "2011-01-27 20:19:13.563 UTC",
"last_activity_date": "2011-01-27 20:21:37.59 UTC",
"last_editor_display_name": "",
"owner_display_name": "",
"owner_user_id": "163534",
"post_type_id": "1",
"score": "0",
"tags": "c#|serialization|xml-serialization",
"view_count": "296"
},{
"id": "3367882",
"title": "How do I prevent floated-right content from overlapping main content?",
"body": "\u003cp\u003eI have the following HTML:\u003c/p\u003e\n\n\u003cpre\u003e\u003ccode\u003e\u0026lt;td class='a'\u0026gt;\n \u0026lt;img src='/images/some_icon.png' alt='Some Icon' /\u0026gt;\n \u0026lt;span\u0026gt;Some content that's waaaaaaaaay too long to fit in the allotted space, but which can get cut off.\u0026lt;/span\u0026gt;\n\u0026lt;/td\u0026gt;\n\u003c/code\u003e\u003c/pre\u003e\n\n\u003cp\u003eIt should display as follows:\u003c/p\u003e\n\n\u003cpre\u003e\u003ccode\u003e[Some content that's wa [ICON]]\n\u003c/code\u003e\u003c/pre\u003e\n\n\u003cp\u003eI have the following CSS:\u003c/p\u003e\n\n\u003cpre\u003e\u003ccode\u003etd.a span {\n overflow: hidden;\n white-space: nowrap;\n z-index: 1;\n}\n\ntd.a img {\n display: block;\n float: right;\n z-index: 2;\n}\n\u003c/code\u003e\u003c/pre\u003e\n\n\u003cp\u003eWhen I resize the browser to cut off the text, it cuts off at the edge of the \u003ccode\u003e\u0026lt;td\u0026gt;\u003c/code\u003e rather than before the \u003ccode\u003e\u0026lt;img\u0026gt;\u003c/code\u003e, which leaves the \u003ccode\u003e\u0026lt;img\u0026gt;\u003c/code\u003e overlapping the \u003ccode\u003e\u0026lt;span\u0026gt;\u003c/code\u003e content. I've tried various \u003ccode\u003epadding\u003c/code\u003e and \u003ccode\u003emargin\u003c/code\u003es, but nothing seemed to work. Is this possible?\u003c/p\u003e\n\n\u003cp\u003eNB: It's \u003cem\u003every\u003c/em\u003e difficult to add a \u003ccode\u003e\u0026lt;td\u0026gt;\u003c/code\u003e that just contains the \u003ccode\u003e\u0026lt;img\u0026gt;\u003c/code\u003e here. If it were easy, I'd just do that :)\u003c/p\u003e",
"accepted_answer_id": "3367943",
"answer_count": "2",
"comment_count": "2",
"creation_date": "2010-07-30 00:01:50.9 UTC",
"favorite_count": "0",
"last_activity_date": "2012-05-10 14:16:05.143 UTC",
"last_edit_date": "2012-05-10 14:16:05.143 UTC",
"last_editor_display_name": "",
"last_editor_user_id": "44390",
"owner_display_name": "",
"owner_user_id": "1190",
"post_type_id": "1",
"score": "2",
"tags": "css|overflow|css-float|crop",
"view_count": "4121"
}]
Now i am trying to read the json file in python but every time it's showing error:
Traceback (most recent call last):
File "/home/lazzydevs/Desktop/tfstack.py", line 4, in <module>
df_idf = pd.read_json('/home/lazzydevs/Data/datajs.json',lines = True)
File "/home/lazzydevs/.local/lib/python3.7/site-packages/pandas/io/json/_json.py", line 592, in read_json
result = json_reader.read()
File "/home/lazzydevs/.local/lib/python3.7/site-packages/pandas/io/json/_json.py", line 715, in read
obj = self._get_object_parser(self._combine_lines(data.split("\n")))
File "/home/lazzydevs/.local/lib/python3.7/site-packages/pandas/io/json/_json.py", line 739, in _get_object_parser
obj = FrameParser(json, **kwargs).parse()
File "/home/lazzydevs/.local/lib/python3.7/site-packages/pandas/io/json/_json.py", line 849, in parse
self._parse_no_numpy()
File "/home/lazzydevs/.local/lib/python3.7/site-packages/pandas/io/json/_json.py", line 1093, in _parse_no_numpy
loads(json, precise_float=self.precise_float), dtype=None
ValueError: Expected object or value
I checked so many posts but not working...i don't know what is the problem.
The following piece of code seems to work on my machine.
import pandas as pd
df_idf = pd.read_json('/home/lazzydevs/Data/datajs.json')
print("Schema:\n\n",df_idf.dtypes)
print("Number of questions,columns=",df_idf.shape)

Reading and parsing a json file

I'm using the following python script to read and parse a json file
import json
with open('testdata.json', 'r') as raw_data:
content = json.load(raw_data)
print(content)
that has data like:
{"grp":"1"; "total":"10"}
{"event":"run", "timestamp":"2010-01-30 10:00:40", "id": "200", "distance": "5"}
{"event":"walk", "timestamp":"2010-01-31 18:46:00", "id": "200", "disrance": "2"}
I'm getting the error:
Traceback (most recent call last):
File "readdata.py", line 4, in <module>
content = json.load(raw_data)
File "/usr/lib/python2.7/json/__init__.py", line 290, in load **kw)
File "/usr/lib/python2.7/json/__init__.py", line 338, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 369, in decode
raise ValueError(errmsg("Extra data", s, end, len(s)))
ValueError: Extra data: line 2 column 1 - line 3 column 1 (char 93 - 187)
If I have one row of data it works... 2 or more rows of data I get the error
Can't see anything that is causing this problem
The SO syntax highlighter solved your issue.
"distance': "5"}
^
Change this to double quotes
But there are many other issues. here is a valid version of your json file.
[
{"grp":1, "total":10},
{"event":"run", "timestamp":"2010-01-30 10:00:40", "id": "200", "distance": "5"},
{"event":"walk", "timestamp":"2010-01-31 18:46:00", "id": "200", "disrance": "2"}
]
Note the " arround each key. the , between key:value pairs, and the , between elements of the list.
You can validate your JSON using tools like jsonlint.com

Categories

Resources