Related
Alright, I'm stumped. I have googled everything I can think of from nested Dicts, Dicts inside Lists inside Dicts, to JSON referencing and have no idea how to get to this data.
I have this AWS Lambda handler that is reading Slack events and simply reversing someone's message and then spitting it out back to Slack. However, the bot can respond to itself (creating an infinite loop). I thought I had this solved, however, that was for the legacy stuff. I am Python stupid, so how do reference this data?
Data (slack_body_dict print from below):
{'token': 'NgapUeqidaGeTf4ONWkUQQiP', 'team_id': 'T7BD9RY57', 'api_app_id': 'A01LZHA7R9U', 'event': {'client_msg_id': '383aeac2-a436-4bad-8e19-7fa68facf916', 'type': 'message', 'text': 'rip', 'user': 'U7D1RQ9MM', 'ts': '1612727797.024200', 'team': 'T7BD9RY57', 'blocks': [{'type': 'rich_text', 'block_id': 'gA7K', 'elements': [{'type': 'rich_text_section', 'elements': [{'type': 'text', 'text': 'rip'}]}]}], 'channel': 'D01MK0JSNDP', 'event_ts': '1612727797.024200', 'channel_type': 'im'}, 'type': 'event_callback', 'event_id': 'Ev01MN8LJ117', 'event_time': 1612727797, 'authorizations': [{'enterprise_id': None, 'team_id': 'T7BD9RY57', 'user_id': 'U01MW6UK55W', 'is_bot': True, 'is_enterprise_install': False}], 'is_ext_shared_channel': False, 'event_context': '1-message-T7BD9RY57-D01MK0JSNDP'}
There is an 'is_bot' there under 'authorizations' I want to check. I assume this will let the bot stop responding to itself. However, for the life of me, I cannot reference it. It seems to be nested in there.
I have tried the following:
def lambda_handler(api_event, api_context):
print(f"Received event:\n{api_event}\nWith context:\n{api_context}")
# Grab relevant information form the api_event
slack_body_raw = api_event.get('body')
slack_body_dict = json.loads(slack_body_raw)
request_headers = api_event["headers"]
print(f"!!!!!!!!!!!!!!!!!!!!!!!body_dict:\n{slack_body_dict}")
print(f"#######################is_bot:\n{slack_body_dict('is_bot')}")
print(f"#######################is_bot:\n{slack_body_dict("is_bot")}")
print(f"#######################is_bot:\n{slack_body_dict(['is_bot']}")
print(f"#######################is_bot:\n{slack_body_dict(["is_bot"]}")
print(f"#######################is_bot:\n{slack_body_dict['authorizations']['is_bot']}")
As you can see I have absolutely no clue how to get to that variable to tell if it is true or false. Every 'is_bot' print reference results in an error. Can someone tell me how to reference that variable or give me something to google? Appreciate it. Code is below in case it is relevant.
import json
import os
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
def is_challenge(slack_event_body: dict) -> bool:
"""Is the event a challenge from slack? If yes return the correct response to slack
Args:
slack_event_body (dict): The slack event JSON
Returns:
returns True if it is a slack challenge event returns False otherwise
"""
if "challenge" in slack_event_body:
LOGGER.info(f"Challenge Data: {slack_event_body['challenge']}")
return True
return False
def lambda_handler(api_event, api_context):
# Grab relevant information form the api_event
slack_body_raw = api_event.get('body')
slack_body_dict = json.loads(slack_body_raw)
request_headers = api_event["headers"]
# This is to appease the slack challenge gods
if is_challenge(slack_body_dict):
challenge_response_body = {
"challenge": slack_body_dict["challenge"]
}
return helpers.form_response(200, challenge_response_body)
# This parses the slack body dict to get the event JSON
slack_event_dict = slack_body_dict["event"]
# Build the slack client.
slack_client = WebClient(token=os.environ['BOT_TOKEN'])
# We need to discriminate between events generated by
# the users, which we want to process and handle,
# and those generated by the bot.
if slack_body_dict['is_bot']: #THIS IS GIVING ME THE ERROR. I WANT TO CHECK IF BOT HERE.
logging.warning("Ignore bot event")
else:
# Get the text of the message the user sent to the bot,
# and reverse it.
text = slack_event_dict["text"]
reversed_text = text[::-1]
# Get the ID of the channel where the message was posted.
channel_id = slack_event_dict["channel"]
try:
response = slack_client.chat_postMessage(
channel=channel_id,
text=reversed_text
)
except SlackApiError as e:
# You will get a SlackApiError if "ok" is False
assert e.response["error"] # str like 'invalid_auth', 'channel_not_found'
The structure of the data is:
{
"authorizations": [
{
"is_bot": true
}
]
}
So you would need to first index "authorizations", then to get the first item 0, and lastly "is_bot".
data["authorizations"][0]["is_bot"]
Alternativly, you could iterate over all the authorizations and check if any (or all) of them are marked as a bot like so:
any(auth["is_bot"] for auth in slack_body_dict["authorizations"])
I have an issue about how to customize OrderedDict format and convert them into a json or dictionary format(but be able to reset the key names and the structure). I have the data below:
result= OrderedDict([('index', 'cfs_fsd_00001'),
('host', 'GIISSP707'),
('source', 'D:\\usrLLSS_SS'),
('_time', '2018-11-02 14:43:30.000 EDT'),
('count', '153')])
...However, I want to change the format like this:
{
"servarname": {
"index": "cfs_fsd_00001",
"host": "GIISSP707"
},
"times": '2018-11-02 14:43:30.000 EDT',
"metricTags": {
"source": 'D:\\ddevel.log'"
},
"metricName": "serverice count",
"metricValue": 153,
"metricType": "count"
}
I will be really appreciate your help. Basically the output I got is pretty flat. But I want to customize the structure. The original structure is
OrderedDict([('index', 'cfs_fsd_00001'),('host', 'GIISSP707').....]).
The output I want to achieve is {"servarname"{"index":"cfs_fsd_00001","host":"GIISSP707"},......
You can simply reference the result dict with the respective keys that you want your target data structure to have:
{
"servarname": {
"index": result['index'],
"host": result['host']
},
"times": result['_time'],
"metricTags": {
"source": result['source']
},
"metricName": "serverice count",
"metricValue": result['count'],
"metricType": "count"
}
No sure how flexible you need for your method. I assume you have a few common keys in your OrderedDict and you want to find the metric there, then reformat them into a new dict. Here is a short function which is implemented in python 3 and I hope it could help.
from collections import OrderedDict
import json
def reformat_ordered_dict(dict_result):
"""Reconstruct the OrderedDict result into specific format
This method assumes that your input OrderedDict has the following common keys: 'index',
'host', 'source', '_time', and a potential metric whcih is subject to change (of course
you can support more metrics with minor tweak of the code). The function also re-map the
keys (for example, mapping '_time' to 'times', pack 'index' and 'source' into 'servarname'
).
:param dict_result: the OrderedDict
:return: the reformated OrderedDict
"""
common_keys = ('index', 'host', 'source', '_time')
assert all(common_key in dict_result for common_key in common_keys), (
'You have to provide all the commen keys!')
# write common keys
reformated = OrderedDict()
reformated["servarname"] = OrderedDict([
("index", dict_result['index']),
("host", dict_result['host'])
])
reformated["times"] = dict_result['_time']
reformated["metricTags"] = {"source": dict_result['source']}
# write metric
metric = None
for key in dict_result.keys():
if key not in common_keys:
metric = key
break
assert metric is not None, 'Cannot find metric in the OrderedDict!'
# don't know where you get this value. But you can customize it if needed
# for exampe if the metric name is needed here
reformated['metricName'] = "serverice count"
reformated['metricValue'] = dict_result[metric]
reformated['metricType'] = metric
return reformated
if __name__ == '__main__':
result= OrderedDict([('index', 'cfs_fsd_00001'),
('host', 'GIISSP707'),
('source', 'D:\\usrLLSS_SS'),
('_time', '2018-11-02 14:43:30.000 EDT'),
('count', '153')])
reformated = reformat_ordered_dict(result)
print(json.dumps(reformated))
I'm working with the Microsoft Azure face API and I want to get only the glasses response.
heres my code:
########### Python 3.6 #############
import http.client, urllib.request, urllib.parse, urllib.error, base64, requests, json
###############################################
#### Update or verify the following values. ###
###############################################
# Replace the subscription_key string value with your valid subscription key.
subscription_key = '(MY SUBSCRIPTION KEY)'
# Replace or verify the region.
#
# You must use the same region in your REST API call as you used to obtain your subscription keys.
# For example, if you obtained your subscription keys from the westus region, replace
# "westcentralus" in the URI below with "westus".
#
# NOTE: Free trial subscription keys are generated in the westcentralus region, so if you are using
# a free trial subscription key, you should not need to change this region.
uri_base = 'https://westcentralus.api.cognitive.microsoft.com'
# Request headers.
headers = {
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key': subscription_key,
}
# Request parameters.
params = {
'returnFaceAttributes': 'glasses',
}
# Body. The URL of a JPEG image to analyze.
body = {'url': 'https://upload.wikimedia.org/wikipedia/commons/c/c3/RH_Louise_Lillian_Gish.jpg'}
try:
# Execute the REST API call and get the response.
response = requests.request('POST', uri_base + '/face/v1.0/detect', json=body, data=None, headers= headers, params=params)
print ('Response:')
parsed = json.loads(response.text)
info = (json.dumps(parsed, sort_keys=True, indent=2))
print(info)
except Exception as e:
print('Error:')
print(e)
and it returns a list like this:
[
{
"faceAttributes": {
"glasses": "NoGlasses"
},
"faceId": "0f0a985e-8998-4c01-93b6-8ef4bb565cf6",
"faceRectangle": {
"height": 162,
"left": 177,
"top": 131,
"width": 162
}
}
]
I want just the glasses attribute so it would just return either "Glasses" or "NoGlasses"
Thanks for any help in advance!
I think you're printing the whole response, when really you want to drill down and get elements inside it. Try this:
print(info[0]["faceAttributes"]["glasses"])
I'm not sure how the API works so I don't know what your specified params are actually doing, but this should work on this end.
EDIT: Thank you to #Nuageux for noting that this is indeed an array, and you will have to specify that the first object is the one you want.
I guess that you can get few elements in that list, so you could do this:
info = [
{
"faceAttributes": {
"glasses": "NoGlasses"
},
"faceId": "0f0a985e-8998-4c01-93b6-8ef4bb565cf6",
"faceRectangle": {
"height": 162,
"left": 177,
"top": 131,
"width": 162
}
}
]
for item in info:
print (item["faceAttributes"]["glasses"])
>>> 'NoGlasses'
Did you try:
glasses = parsed[0]['faceAttributes']['glasses']
This looks more like a dictionary than a list. Dictionaries are defined using the { key: value } syntax, and can be referenced by the value for their key. In your code, you have faceAttributes as a key that for value contains another dictionary with a key glasses leading to the last value that you want.
Your info object is a list with one element: a dictionary. So in order to get at the values in that dictionary, you'll need to tell the list where the dictionary is (at the head of the list, so info[0]).
So your reference syntax will be:
#If you want to store it in a variable, like glass_var
glass_var = info[0]["faceAttributes"]["glasses"]
#Or if you want to print it directly
print(info[0]["faceAttributes"]["glasses"])
What's going on here? info[0] is the dictionary containing several keys, including faceAttributes,faceId and faceRectangle. faceRectangle and faceAttributes are both dictionaries in themselves with more keys, which you can reference to get their values.
Your printed tree there is showing all the keys and values of your dictionary, so you can reference any part of your dictionary using the right keys:
print(info["faceId"]) #prints "0f0a985e-8998-4c01-93b6-8ef4bb565cf6"
print(info["faceRectangle"]["left"]) #prints 177
print(info["faceRectangle"]["width"]) #prints 162
If you have multiple entries in your info list, then you'll have multiple dictionaries, and you can get all the outputs as so:
for entry in info: #Note: "entry" is just a variable name,
# this can be any name you want. Every
# iteration of entry is one of the
# dictionaries in info.
print(entry["faceAttributes"]["glasses"])
Edit: I didn't see that info was a list of a dictionary, adapted for that fact.
I'm using Yahoo Placemaker API which gives different structure of json depending on input.
Simple json file looks like this:
{
'document':{
'itemDetails':{
'id'='0'
'prop1':'1',
'prop2':'2'
}
'other':{
'propA':'A',
'propB':'B'
}
}
}
When I want to access itemDetails I simply write json_file['document']['itemDetails'].
But when I get more complicated response, such as
{
'document':{
'1':{
'itemDetails':{
'id'='1'
'prop1':'1',
'prop2':'2'
}
},
'0':{
'itemDetails':{
'id'='0'
'prop1':'1',
'prop2':'2'
},
'2':{
'itemDetails':{
'id'='1'
'prop1':'1',
'prop2':'2'
}
'other':{
'propA':'A',
'propB':'B'
}
}
}
the solution obviously does not work.
I use id, prop1 and prop2 to create objects.
What would be the best approach to automatically access itemDetails in the second case without writing json_file['document']['0']['itemDetails'] ?
If I understand correctly, you want to loop through all of json_file['document']['0']['itemDetails'], json_file['document']['1']['itemDetails'], ...
If that's the case, then:
item_details = {}
for key, value in json_file['document']:
item_details[key] = value['itemDetails']
Or, a one-liner:
item_details = {k: v['itemDetails'] for k, v in json_file['document']}
Then, you would access them as item_details['0'], item_details['1'], ...
Note: You can suppress the single quotes around 0 and 1, by using int(key) or int(k).
Edit:
If you want to access both cases seamlessly (whether there is one result or many), you could check:
if 'itemDetails' in json_file['document']:
item_details = {'0': json_file['document']['itemDetails']}
else:
item_details = {k: v['itemDetails'] for k, v in json_file['document'] if k != 'other'}
Then loop through the item_details dict.
Is there a way to define a XPath type query for nested python dictionaries.
Something like this:
foo = {
'spam':'eggs',
'morefoo': {
'bar':'soap',
'morebar': {'bacon' : 'foobar'}
}
}
print( foo.select("/morefoo/morebar") )
>> {'bacon' : 'foobar'}
I also needed to select nested lists ;)
This can be done easily with #jellybean's solution:
def xpath_get(mydict, path):
elem = mydict
try:
for x in path.strip("/").split("/"):
try:
x = int(x)
elem = elem[x]
except ValueError:
elem = elem.get(x)
except:
pass
return elem
foo = {
'spam':'eggs',
'morefoo': [{
'bar':'soap',
'morebar': {
'bacon' : {
'bla':'balbla'
}
}
},
'bla'
]
}
print xpath_get(foo, "/morefoo/0/morebar/bacon")
[EDIT 2016] This question and the accepted answer are ancient. The newer answers may do the job better than the original answer. However I did not test them so I won't change the accepted answer.
One of the best libraries I've been able to identify, which, in addition, is very actively developed, is an extracted project from boto: JMESPath. It has a very powerful syntax of doing things that would normally take pages of code to express.
Here are some examples:
search('foo | bar', {"foo": {"bar": "baz"}}) -> "baz"
search('foo[*].bar | [0]', {
"foo": [{"bar": ["first1", "second1"]},
{"bar": ["first2", "second2"]}]}) -> ["first1", "second1"]
search('foo | [0]', {"foo": [0, 1, 2]}) -> [0]
There is an easier way to do this now.
http://github.com/akesterson/dpath-python
$ easy_install dpath
>>> dpath.util.search(YOUR_DICTIONARY, "morefoo/morebar")
... done. Or if you don't like getting your results back in a view (merged dictionary that retains the paths), yield them instead:
$ easy_install dpath
>>> for (path, value) in dpath.util.search(YOUR_DICTIONARY, "morefoo/morebar", yielded=True)
... and done. 'value' will hold {'bacon': 'foobar'} in that case.
Not exactly beautiful, but you might use sth like
def xpath_get(mydict, path):
elem = mydict
try:
for x in path.strip("/").split("/"):
elem = elem.get(x)
except:
pass
return elem
This doesn't support xpath stuff like indices, of course ... not to mention the / key trap unutbu indicated.
There is the newer jsonpath-rw library supporting a JSONPATH syntax but for python dictionaries and arrays, as you wished.
So your 1st example becomes:
from jsonpath_rw import parse
print( parse('$.morefoo.morebar').find(foo) )
And the 2nd:
print( parse("$.morefoo[0].morebar.bacon").find(foo) )
PS: An alternative simpler library also supporting dictionaries is python-json-pointer with a more XPath-like syntax.
dict > jmespath
You can use JMESPath which is a query language for JSON, and which has a python implementation.
import jmespath # pip install jmespath
data = {'root': {'section': {'item1': 'value1', 'item2': 'value2'}}}
jmespath.search('root.section.item2', data)
Out[42]: 'value2'
The jmespath query syntax and live examples: http://jmespath.org/tutorial.html
dict > xml > xpath
Another option would be converting your dictionaries to XML using something like dicttoxml and then use regular XPath expressions e.g. via lxml or whatever other library you prefer.
from dicttoxml import dicttoxml # pip install dicttoxml
from lxml import etree # pip install lxml
data = {'root': {'section': {'item1': 'value1', 'item2': 'value2'}}}
xml_data = dicttoxml(data, attr_type=False)
Out[43]: b'<?xml version="1.0" encoding="UTF-8" ?><root><root><section><item1>value1</item1><item2>value2</item2></section></root></root>'
tree = etree.fromstring(xml_data)
tree.xpath('//item2/text()')
Out[44]: ['value2']
Json Pointer
Yet another option is Json Pointer which is an IETF spec that has a python implementation:
https://github.com/stefankoegl/python-json-pointer
From the jsonpointer-python tutorial:
from jsonpointer import resolve_pointer
obj = {"foo": {"anArray": [ {"prop": 44}], "another prop": {"baz": "A string" }}}
resolve_pointer(obj, '') == obj
# True
resolve_pointer(obj, '/foo/another%20prop/baz') == obj['foo']['another prop']['baz']
# True
>>> resolve_pointer(obj, '/foo/anArray/0') == obj['foo']['anArray'][0]
# True
If terseness is your fancy:
def xpath(root, path, sch='/'):
return reduce(lambda acc, nxt: acc[nxt],
[int(x) if x.isdigit() else x for x in path.split(sch)],
root)
Of course, if you only have dicts, then it's simpler:
def xpath(root, path, sch='/'):
return reduce(lambda acc, nxt: acc[nxt],
path.split(sch),
root)
Good luck finding any errors in your path spec tho ;-)
Another alternative (besides that suggested by jellybean) is this:
def querydict(d, q):
keys = q.split('/')
nd = d
for k in keys:
if k == '':
continue
if k in nd:
nd = nd[k]
else:
return None
return nd
foo = {
'spam':'eggs',
'morefoo': {
'bar':'soap',
'morebar': {'bacon' : 'foobar'}
}
}
print querydict(foo, "/morefoo/morebar")
More work would have to be put into how the XPath-like selector would work.
'/' is a valid dictionary key, so how would
foo={'/':{'/':'eggs'},'//':'ham'}
be handled?
foo.select("///")
would be ambiguous.
Is there any reason for you to the query it the way like the XPath pattern? As the commenter to your question suggested, it just a dictionary, so you can access the elements in a nest manner. Also, considering that data is in the form of JSON, you can use simplejson module to load it and access the elements too.
There is this project JSONPATH, which is trying to help people do opposite of what you intend to do (given an XPATH, how to make it easily accessible via python objects), which seems more useful.
def Dict(var, *arg, **kwarg):
""" Return the value of an (imbricated) dictionnary, if all fields exist else return "" unless "default=new_value" specified as end argument
Avoid TypeError: argument of type 'NoneType' is not iterable
Ex: Dict(variable_dict, 'field1', 'field2', default = 0)
"""
for key in arg:
if isinstance(var, dict) and key and key in var: var = var[key]
else: return kwarg['default'] if kwarg and 'default' in kwarg else "" # Allow Dict(var, tvdbid).isdigit() for example
return kwarg['default'] if var in (None, '', 'N/A', 'null') and kwarg and 'default' in kwarg else "" if var in (None, '', 'N/A', 'null') else var
foo = {
'spam':'eggs',
'morefoo': {
'bar':'soap',
'morebar': {'bacon' : 'foobar'}
}
}
print Dict(foo, 'morefoo', 'morebar')
print Dict(foo, 'morefoo', 'morebar', default=None)
Have a SaveDict(value, var, *arg) function that can even append to lists in dict...
I reference form this link..
Following code is for json xpath base parse implemented in python :
import json
import xmltodict
# Parse the json string
class jsonprase(object):
def __init__(self, json_value):
try:
self.json_value = json.loads(json_value)
except Exception :
raise ValueError('must be a json str value')
def find_json_node_by_xpath(self, xpath):
elem = self.json_value
nodes = xpath.strip("/").split("/")
for x in range(len(nodes)):
try:
elem = elem.get(nodes[x])
except AttributeError:
elem = [y.get(nodes[x]) for y in elem]
return elem
def datalength(self, xpath="/"):
return len(self.find_json_node_by_xpath(xpath))
#property
def json_to_xml(self):
try:
root = {"root": self.json_value}
xml = xmltodict.unparse(root, pretty=True)
except ArithmeticError :
pyapilog().error(e)
return xml
Test Json :
{
"responseHeader": {
"zkConnected": true,
"status": 0,
"QTime": 2675,
"params": {
"q": "TxnInitTime:[2021-11-01T00:00:00Z TO 2021-11-30T23:59:59Z] AND Status:6",
"stats": "on",
"stats.facet": "CountryCode",
"rows": "0",
"wt": "json",
"stats.field": "ItemPrice"
}
},
"response": {
"numFound": 15162439,
"start": 0,
"maxScore": 1.8660598,
"docs": []
}
}
Test Code to read the values from above input json.
numFound = jsonprase(ABOVE_INPUT_JSON).find_json_node_by_xpath('/response/numFound')
print(numFound)