I am indexing data into Elasticsearch.
I do not know what is "sort". I have not put it in the mapping and it is neither in the data that I am indexing.
Why does it appear?
THIS IS MY CODE
def initialize_mapping(es):
mapping_classification = {
'properties': {
'#timestamp': {'type': 'date'},
'Labels': {'type': 'keyword'},
'Model': {'type': 'keyword'},
'Image': {'type': 'keyword'},
'Time(ms)': {'type': 'short'},
'Inference': {'type': 'text'},
'Score': {'type': 'short'},
'TPU_temp(°C)': {'type': 'short'}
}
}
print("Initializing the mapping ...")
if not es.indices.exists(INDEX_NAME):
es.indices.create(INDEX_NAME)
es.indices.put_mapping(body=mapping_classification, doc_type=DOC_TYPE, index=INDEX_NAME)
def main():
es=initialize_elasticsearch()
initialize_mapping(es)
actions = [
{
'_index': INDEX_NAME,
'_type': DOC_TYPE,
"#timestamp": str(datetime.datetime.utcnow().strftime("%Y-%m-%d"'T'"%H:%M:%S")),
"Labels": maX_group[0].split(":")[1],
"Model": maX_group[1].split(":")[1],
"Image": maX_group[2].split(":")[1],
"Time(ms)": maX_group[4].split(":")[1],
"Inference": maX_group[5].split(":")[1],
"Score": maX_group[6].split(":")[1],
"TPU_temp(°C)": maX_group[7].split(":")[1]
}]
try:
res=helpers.bulk(client=es, index = INDEX_NAME, actions = actions)
print ("\nhelpers.bulk() RESPONSE:", res)
print ("RESPONSE TYPE:", type(res))
except Exception as err:
print("\nhelpers.bulk() ERROR:", err)
if __name__ == "__main__":
main()
That sort value is not in your document at all. Only what you see in _source is actually your document.
In your other question, you might have created an index-pattern without specifying any #timestamp field, and hence the documents where not sorted in the Discover view and you didn't see any sort value.
Related
I have a JSON file called "hostnames" formatted like below
{
'propertyName': 'www.property1.com',
'propertyVersion': 1,
'etag': 'jbcas6764023nklf78354',
'rules': {
'name': 'default',
'children': [{
'name': 'Route',
'children': [],
'behaviors': [{
'name': 'origin',
'options': {
'originType': 'CUSTOMER',
'hostname': 'www.origin1.com',
and I wanted to get the values of keys "propertyName" and "hostname" and have a new JSON file like below
'properties': [{
'propertyName': 'www.property1.com',
'hostnames': ['www.origin1.com', 'www.origin2.com']
}, {
'propertyName': 'www.property1.com',
'hostnames': ['www.origin1.com', 'www.origin2.com']
}]
my code looks like this
hostnames = result.json()
hostnameslist = [host['hostname'] for host in hostnames['rules']['children']['behaviors']['options']]
print(hostnameslist)
but I'm getting the error
TypeError: list indices must be integers or slices, not str
You are trying to access a list elements with a string index ('behaviors').
Try:
hostnames = result.json()
hostnameslist = []
for child in hostnames['rules']['children']:
for behavior in child['behaviors']:
if behavior['name'] == 'origin':
hostnameslist.append(behavior['options']['hostname'])
properties = [{
'propertyName': hostnames['propertyName'],
'hostnames': hostnameslist
}]
Making an assumption about how the OP's data might be structured.
Recursive navigation of the dictionary to find all/any values associated with a dictionary key of 'hostname' appears to be well-suited here.
Doing it this way obviates the need for knowledge about the depth of the dictionary or indeed any of the dictionary key names except (obviously) 'hostname'.
Of course, there may be other dictionaries within the "master" dictionary that contain a 'hostname' key. If that's the case then this function may return values that are not needed/wanted.
data = {
'propertyName': 'www.property1.com',
'propertyVersion': 1,
'etag': 'jbcas6764023nklf78354',
'rules': {
'name': 'default',
'children': [{
'name': 'Route',
'children': [],
'behaviors': [{
'name': 'origin',
'options': {
'originType': 'CUSTOMER',
'hostname': 'www.origin1.com'
}
},
{
'name': 'origin',
'options': {
'originType': 'CUSTOMER',
'hostname': 'www.origin2.com'
}
}
]
}
]
}
}
def get_hostnames(d):
def _get_hostnames(_d, _l):
if isinstance(_d, dict):
if 'hostname' in _d:
_l.append(_d['hostname'])
else:
for _v in _d.values():
_get_hostnames(_v, _l)
else:
if isinstance(_d, list):
for _v in _d:
_get_hostnames(_v, _l)
return _l
return _get_hostnames(d, [])
result = {'properties': [{'propertyName': data.get('propertyName'), 'hostnames': get_hostnames(data)}]}
print(result)
Output:
{'properties': [{'propertyName': 'www.property1.com', 'hostnames': ['www.origin1.com', 'www.origin2.com']}]}
I am currently writing a scraper that reads from an API that contains a JSON. By doing response.json() it would return a dict where we could easily use the e.g response["object"]to get the value we want as I assume that converts it to a dict. The current mock data looks like this:
data = {
'id': 336461,
'thumbnail': '/images/product/123456?trim&h=80',
'variants': None,
'name': 'Testing',
'data': {
'Videoutgång': {
'Typ av gränssnitt': {
'name': 'Typ av gränssnitt',
'value': 'PCI Test'
}
}
},
'stock': {
'web': 0,
'supplier': None,
'displayCap': '50',
'1': 0,
'orders': {
'CL': {
'ordered': -10,
'status': 1
}
}
}
}
What I am looking after is that the API sometimes does contain "orders -> CL" but sometime doesn't . That means that both happy path and unhappy path is what I am looking for which is the fastest way to get a data from a dict.
I have currently done something like this:
data = {
'id': 336461,
'thumbnail': '/images/product/123456?trim&h=80',
'variants': None,
'name': 'Testing',
'data': {
'Videoutgång': {
'Typ av gränssnitt': {
'name': 'Typ av gränssnitt',
'value': 'PCI Test'
}
}
},
'stock': {
'web': 0,
'supplier': None,
'displayCap': '50',
'1': 0,
'orders': {
'CL': {
'ordered': -10,
'status': 1
}
}
}
}
if (
"stock" in data
and "orders" in data["stock"]
and "CL" in data["stock"]["orders"]
and "status" in data["stock"]["orders"]["CL"]
and data["stock"]["orders"]["CL"]["status"]
):
print(f'{data["stock"]["orders"]["CL"]["status"]}: {data["stock"]["orders"]["CL"]["ordered"]}')
1: -10
However my question is that I would like to know which is the fastest way to get the data from a dict if it is in the dict?
Lookups are faster in dictionaries because Python implements them using hash tables.
If we explain the difference by Big O concepts, dictionaries have constant time complexity, O(1). This is another approach using .get() method as well:
data = {
'id': 336461,
'thumbnail': '/images/product/123456?trim&h=80',
'variants': None,
'name': 'Testing',
'data': {
'Videoutgång': {
'Typ av gränssnitt': {
'name': 'Typ av gränssnitt',
'value': 'PCI Test'
}
}
},
'stock': {
'web': 0,
'supplier': None,
'displayCap': '50',
'1': 0,
'orders': {
'CL': {
'ordered': -10,
'status': 1
}
}
}
}
if (data.get('stock', {}).get('orders', {}).get('CL')):
print(f'{data["stock"]["orders"]["CL"]["status"]}: {data["stock"]["orders"]["CL"]["ordered"]}')
Here is a nice writeup on lookups in Python with list and dictionary as example.
I got your point. For this question, since your stock has just 4 values it is hard to say if .get() method will work faster than using a loop or not. If your dictionary would have more items then certainly .get() would have worked much faster but since there are few keys, using loop will not make much difference.
I need to get the 'ids' of this json response,the thing is that, there are many dictionaries with a list of dictionaries inside,how can I do this??(PS:len(items) is 20,so I need to get the 20 ids in the form of a dictionary.
{'playlists': {'href': 'https://api.spotify.com/v1/search?query=rewind-The%25&type=playlist&offset=0&limit=20',
'items': [{'collaborative': False,
'description': 'Remember what you listened to in 2010? Rewind and rediscover your favorites.',
'external_urls': {'spotify': 'https://open.spotify.com/playlist/37i9dQZF1DXc6IFF23C9jj'},
'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DXc6IFF23C9jj',
'id': '37i9dQZF1DXc6IFF23C9jj',
'images': [{'height': None,
'url': 'https://i.scdn.co/image/ab67706f0000000327ba1078080355421d1a49e2',
'width': None}],
'name': 'Rewind - The Sound of 2010',
'owner': {'display_name': 'Spotify',
'external_urls': {'spotify': 'https://open.spotify.com/user/spotify'},
'href': 'https://api.spotify.com/v1/users/spotify',
'id': 'spotify',
'type': 'user',
'uri': 'spotify:user:spotify'},
'primary_color': None,
'public': None,
'snapshot_id': 'MTU5NTUzMTE1OSwwMDAwMDAwMGQ0MWQ4Y2Q5OGYwMGIyMDRlOTgwMDk5OGVjZjg0Mjdl',
'tracks': {'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DXc6IFF23C9jj/tracks',
'total': 100},
'type': 'playlist',
'uri': 'spotify:playlist:37i9dQZF1DXc6IFF23C9jj'},
Im trying to get it through this:
dict={'id':''}
for playlists in playlist_data['playlists']:
for items in playlists['items']:
for item in items:
for dic in range(len(item)):
for id in dic['id']:
dict.update('id')
print(dict)
I get this error:
TypeError: string indices must be integers ```
Try something like this:
ids = [item["id"] for item in json_data["playlists"]["items"]]
This is called a list comprehension.
You want to iterate over all of the "items" within the "playlists" key.
You can access that list of items:
json_data["playlists"]["items"]
Then you iterate over each item within items:
for item in json_data["playlists"]["items"]
Then you access the "id" of each item:
item["id"]
You can index an object using the keys of object. I can see there are two places where id is present in an object. To retrieve those two ids and store them in a dictionary format, you can use the following approach -
_json = {
'playlists': {
'href': 'https://api.spotify.com/v1/search?query=rewind-The%25&type=playlist&offset=0&limit=20',
'items': [{
'collaborative': False,
'description': 'Remember what you listened to in 2010? Rewind and rediscover your favorites.',
'external_urls': {
'spotify': 'https://open.spotify.com/playlist/37i9dQZF1DXc6IFF23C9jj'
},
'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DXc6IFF23C9jj',
'id': '37i9dQZF1DXc6IFF23C9jj',
'images': [{
'height': None,
'url': 'https://i.scdn.co/image/ab67706f0000000327ba1078080355421d1a49e2',
'width': None
}],
'name': 'Rewind - The Sound of 2010',
'owner': {
'display_name': 'Spotify',
'external_urls': {
'spotify': 'https://open.spotify.com/user/spotify'
},
'href': 'https://api.spotify.com/v1/users/spotify',
'id': 'spotify',
'type': 'user',
'uri': 'spotify:user:spotify'
},
'primary_color': None,
'public': None,
'snapshot_id': 'MTU5NTUzMTE1OSwwMDAwMDAwMGQ0MWQ4Y2Q5OGYwMGIyMDRlOTgwMDk5OGVjZjg0Mjdl',
'tracks': {
'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DXc6IFF23C9jj/tracks',
'total': 100
},
'type': 'playlist',
'uri': 'spotify:playlist:37i9dQZF1DXc6IFF23C9jj'
}, ]
}
}
res_dict = {'id':[items['id'], items['owner']['id']] for items in _json['playlists']['items']}
print(res_dict)
OUTPUT :
{'id': ['37i9dQZF1DXc6IFF23C9jj', 'spotify']}
If you don't need the second id that's present in the json object, you can just remove it from above res_dict and modify it as -
res_dict = {'id':items['id'] for items in _json['playlists']['items']}
This will only fetch the id present in the items array as key of any element and not any further nested ids (like items[i]->owner->id won't be in the final res as it was in the fist case ).
I have been trying to do something simple yet something hard for me to solve it!
I have a json object that looks like:
jsonObject = {
'attributes': {
'192': { <--- This can be changed times to times meaning different number
'id': '192',
'code': 'hello',
'label': 'world',
'options': [
{
'id': '211',
'label': '5'
},
{
'id': '1202',
'label': '8.5'
},
{
'id': '54',
'label': '9'
},
{
'id': '1203',
'label': '9.5'
},
{
'id': '58',
'label': '10'
}
]
}
},
'template': '12345',
'basePrice': '51233',
'oldPrice': '51212',
'productId': 'hello',
}
and what I want to do is to get the values from options (To have both id and label saved into a list)
For now I only managed to do:
for att, value in jsonObject.items():
print(f"{att} - {value}"
How can I get the label and id?
You can try the following code:
attr = jsonObject['attributes']
temp = list(attr.values())[0] # It is same as "temp = attr['192']", but you said '192' can be changed.
options = temp['options']
for option in options:
print(f"id: {option['id']}, label: {option['label']}")
I have a simple api in which coordinates and distance are provided and a and documents from within that distance are returned. I intend it to return just the id and distance but the defined schema is being ignored and the whole document is being returned. Any ideas?
item = {'item_title': 'relate',
'datasource': {
'source': 'api',
'filter': {'_type': 'line'},
'aggregation': {'pipeline': [{'$geoNear':{'near':{'type': 'point', 'coordinates': '$coords'},'distanceField': 'distance','maxDistance': '$maxDist','num': 1, 'spherical': 'true'}}]}
},
'schema': {
'_id': {'type': 'string'},
'distance': {'type': 'float'}
},
}
DOMAIN = {"data": item}
and the postman query is:
http://localhost:8090/data?aggregate={"$maxDist": 500, "$coords": [-1.47, 50.93]}
EDIT:
Following Neil's comment I tried this:
item = {'item_title': 'relate',
'schema': {
'uri': {'type': 'string'},
'distance': {'type': 'float'}
},
'datasource': {
'source': 'api',
'filter': {'_type': 'link'},
'aggregation': {'pipeline': [{'$geoNear':{'near':{'type': 'point', 'coordinates': ['$lng', '$lat']},'distanceField': 'distance','maxDistance': '$maxDist','num': 1, 'spherical': 'true'}}]}
}
}
With the following postman request:
http://localhost:8090/data?aggregate={"$maxDist": 500, "$lng": -1.47, "$lat": 50.93}
This is leading to the following error:
geoNear command failed: { ok: 0.0, errmsg: "'near' field must be point", code: 17304, codeName: "Location17304" }